diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-01-29 11:52:33 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-01-29 11:52:33 +0000 |
commit | 37bbf6a1792773f11c15a4da1588a7520ee2fb4e (patch) | |
tree | 64944d4aa665a1e479cfc004e446593062254550 /lib/mesa/src/gallium/drivers/radeon | |
parent | 6b139c2063623e9310025247cd966490b9aa57ea (diff) |
Merge Mesa 18.3.2
Diffstat (limited to 'lib/mesa/src/gallium/drivers/radeon')
24 files changed, 1000 insertions, 10947 deletions
diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.am b/lib/mesa/src/gallium/drivers/radeon/Makefile.am deleted file mode 100644 index 7f64b7615..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/Makefile.am +++ /dev/null @@ -1,28 +0,0 @@ -include Makefile.sources -include $(top_srcdir)/src/gallium/Automake.inc - - -AM_CFLAGS = \ - $(GALLIUM_DRIVER_CFLAGS) \ - $(RADEON_CFLAGS) \ - -Wstrict-overflow=0 -# ^^ disable warnings about overflows (os_time_timeout) - -noinst_LTLIBRARIES = libradeon.la - -libradeon_la_SOURCES = \ - $(C_SOURCES) - -if HAVE_GALLIUM_LLVM - -AM_CFLAGS += \ - $(LLVM_CFLAGS) - -libradeon_la_LIBADD = \ - $(CLOCK_LIB) \ - $(LLVM_LIBS) - -libradeon_la_LDFLAGS = \ - $(LLVM_LDFLAGS) - -endif diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.in b/lib/mesa/src/gallium/drivers/radeon/Makefile.in deleted file mode 100644 index fa9d3a7b7..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/Makefile.in +++ /dev/null @@ -1,870 +0,0 @@ -# Makefile.in generated by automake 1.12.6 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994-2012 Free Software Foundation, Inc. - -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -VPATH = @srcdir@ -am__make_dryrun = \ - { \ - am__dry=no; \ - case $$MAKEFLAGS in \ - *\\[\ \ ]*) \ - echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ - | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ - *) \ - for am__flg in $$MAKEFLAGS; do \ - case $$am__flg in \ - *=*|--*) ;; \ - *n*) am__dry=yes; break;; \ - esac; \ - done;; \ - esac; \ - test $$am__dry = yes; \ - } -pkgdatadir = $(datadir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkglibexecdir = $(libexecdir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ -target_triplet = @target@ -DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ - $(srcdir)/Makefile.sources $(top_srcdir)/bin/depcomp \ - $(top_srcdir)/src/gallium/Automake.inc -@HAVE_LIBDRM_TRUE@am__append_1 = \ -@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS) - -@HAVE_DRISW_TRUE@am__append_2 = \ -@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la - -@HAVE_DRISW_KMS_TRUE@am__append_3 = \ -@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ -@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS) - -@HAVE_GALLIUM_LLVM_TRUE@am__append_4 = \ -@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_CFLAGS) - -subdir = src/gallium/drivers/radeon -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \ - $(top_srcdir)/m4/ax_check_gnu_make.m4 \ - $(top_srcdir)/m4/ax_check_python_mako_module.m4 \ - $(top_srcdir)/m4/ax_gcc_builtin.m4 \ - $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \ - $(top_srcdir)/m4/ax_prog_bison.m4 \ - $(top_srcdir)/m4/ax_prog_flex.m4 \ - $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \ - $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ - $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ - $(top_srcdir)/VERSION $(top_srcdir)/configure.ac -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -mkinstalldirs = $(install_sh) -d -CONFIG_CLEAN_FILES = -CONFIG_CLEAN_VPATH_FILES = -LTLIBRARIES = $(noinst_LTLIBRARIES) -am__DEPENDENCIES_1 = -@HAVE_GALLIUM_LLVM_TRUE@libradeon_la_DEPENDENCIES = \ -@HAVE_GALLIUM_LLVM_TRUE@ $(am__DEPENDENCIES_1) \ -@HAVE_GALLIUM_LLVM_TRUE@ $(am__DEPENDENCIES_1) -am__objects_1 = r600_buffer_common.lo r600_gpu_load.lo \ - r600_perfcounter.lo r600_pipe_common.lo r600_query.lo \ - r600_test_dma.lo r600_texture.lo radeon_uvd.lo \ - radeon_vcn_dec.lo radeon_vce_40_2_2.lo radeon_vce_50.lo \ - radeon_vce_52.lo radeon_vce.lo radeon_video.lo -am_libradeon_la_OBJECTS = $(am__objects_1) -libradeon_la_OBJECTS = $(am_libradeon_la_OBJECTS) -AM_V_lt = $(am__v_lt_@AM_V@) -am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) -am__v_lt_0 = --silent -am__v_lt_1 = -libradeon_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(libradeon_la_LDFLAGS) $(LDFLAGS) -o $@ -AM_V_P = $(am__v_P_@AM_V@) -am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) -am__v_P_0 = false -am__v_P_1 = : -AM_V_GEN = $(am__v_GEN_@AM_V@) -am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) -am__v_GEN_0 = @echo " GEN " $@; -am__v_GEN_1 = -AM_V_at = $(am__v_at_@AM_V@) -am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) -am__v_at_0 = @ -am__v_at_1 = -DEFAULT_INCLUDES = -I.@am__isrc@ -depcomp = $(SHELL) $(top_srcdir)/bin/depcomp -am__depfiles_maybe = depfiles -am__mv = mv -f -COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ - $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ - $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ - $(AM_CFLAGS) $(CFLAGS) -AM_V_CC = $(am__v_CC_@AM_V@) -am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) -am__v_CC_0 = @echo " CC " $@; -am__v_CC_1 = -CCLD = $(CC) -LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(AM_LDFLAGS) $(LDFLAGS) -o $@ -AM_V_CCLD = $(am__v_CCLD_@AM_V@) -am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) -am__v_CCLD_0 = @echo " CCLD " $@; -am__v_CCLD_1 = -SOURCES = $(libradeon_la_SOURCES) -DIST_SOURCES = $(libradeon_la_SOURCES) -am__can_run_installinfo = \ - case $$AM_UPDATE_INFO_DIR in \ - n|no|NO) false;; \ - *) (install-info --version) >/dev/null 2>&1;; \ - esac -ETAGS = etags -CTAGS = ctags -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = @ACLOCAL@ -AMDGPU_CFLAGS = @AMDGPU_CFLAGS@ -AMDGPU_LIBS = @AMDGPU_LIBS@ -AMTAR = @AMTAR@ -AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ -ANDROID_CFLAGS = @ANDROID_CFLAGS@ -ANDROID_LIBS = @ANDROID_LIBS@ -AR = @AR@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -BSYMBOLIC = @BSYMBOLIC@ -CC = @CC@ -CCAS = @CCAS@ -CCASDEPMODE = @CCASDEPMODE@ -CCASFLAGS = @CCASFLAGS@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ -CLOCK_LIB = @CLOCK_LIB@ -CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@ -CPP = @CPP@ -CPPFLAGS = @CPPFLAGS@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -CXXDEPMODE = @CXXDEPMODE@ -CXXFLAGS = @CXXFLAGS@ -CYGPATH_W = @CYGPATH_W@ -D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@ -DEFINES = @DEFINES@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -DLLTOOL = @DLLTOOL@ -DLOPEN_LIBS = @DLOPEN_LIBS@ -DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ -DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ -DRIGL_CFLAGS = @DRIGL_CFLAGS@ -DRIGL_LIBS = @DRIGL_LIBS@ -DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ -DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ -DRI_LIB_DEPS = @DRI_LIB_DEPS@ -DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ -DSYMUTIL = @DSYMUTIL@ -DUMPBIN = @DUMPBIN@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGL_CFLAGS = @EGL_CFLAGS@ -EGL_LIB_DEPS = @EGL_LIB_DEPS@ -EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ -EGREP = @EGREP@ -ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@ -ETNAVIV_LIBS = @ETNAVIV_LIBS@ -EXEEXT = @EXEEXT@ -EXPAT_CFLAGS = @EXPAT_CFLAGS@ -EXPAT_LIBS = @EXPAT_LIBS@ -FGREP = @FGREP@ -FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ -FREEDRENO_LIBS = @FREEDRENO_LIBS@ -GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ -GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ -GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ -GC_SECTIONS = @GC_SECTIONS@ -GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ -GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ -GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ -GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ -GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ -GLPROTO_LIBS = @GLPROTO_LIBS@ -GLVND_CFLAGS = @GLVND_CFLAGS@ -GLVND_LIBS = @GLVND_LIBS@ -GLX_TLS = @GLX_TLS@ -GL_LIB = @GL_LIB@ -GL_LIB_DEPS = @GL_LIB_DEPS@ -GL_PC_CFLAGS = @GL_PC_CFLAGS@ -GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ -GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ -GREP = @GREP@ -HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ -I915_CFLAGS = @I915_CFLAGS@ -I915_LIBS = @I915_LIBS@ -INDENT = @INDENT@ -INDENT_FLAGS = @INDENT_FLAGS@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LD = @LD@ -LDFLAGS = @LDFLAGS@ -LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ -LEX = @LEX@ -LEXLIB = @LEXLIB@ -LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ -LIBATOMIC_LIBS = @LIBATOMIC_LIBS@ -LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ -LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ -LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ -LIBDRM_LIBS = @LIBDRM_LIBS@ -LIBELF_CFLAGS = @LIBELF_CFLAGS@ -LIBELF_LIBS = @LIBELF_LIBS@ -LIBGLVND_DATADIR = @LIBGLVND_DATADIR@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LIBSENSORS_LIBS = @LIBSENSORS_LIBS@ -LIBTOOL = @LIBTOOL@ -LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ -LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ -LIB_DIR = @LIB_DIR@ -LIB_EXT = @LIB_EXT@ -LIPO = @LIPO@ -LLVM_CFLAGS = @LLVM_CFLAGS@ -LLVM_CONFIG = @LLVM_CONFIG@ -LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ -LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ -LLVM_LDFLAGS = @LLVM_LDFLAGS@ -LLVM_LIBS = @LLVM_LIBS@ -LN_S = @LN_S@ -LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ -MAKEINFO = @MAKEINFO@ -MANIFEST_TOOL = @MANIFEST_TOOL@ -MKDIR_P = @MKDIR_P@ -MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ -MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ -NINE_MAJOR = @NINE_MAJOR@ -NINE_MINOR = @NINE_MINOR@ -NINE_TINY = @NINE_TINY@ -NINE_VERSION = @NINE_VERSION@ -NM = @NM@ -NMEDIT = @NMEDIT@ -NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ -NOUVEAU_LIBS = @NOUVEAU_LIBS@ -NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@ -NVVIEUX_LIBS = @NVVIEUX_LIBS@ -OBJDUMP = @OBJDUMP@ -OBJEXT = @OBJEXT@ -OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@ -OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@ -OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@ -OPENCL_LIBNAME = @OPENCL_LIBNAME@ -OPENCL_VERSION = @OPENCL_VERSION@ -OSMESA_LIB = @OSMESA_LIB@ -OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ -OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ -OSMESA_PC_REQ = @OSMESA_PC_REQ@ -OSMESA_VERSION = @OSMESA_VERSION@ -OTOOL = @OTOOL@ -OTOOL64 = @OTOOL64@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_URL = @PACKAGE_URL@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -PKG_CONFIG = @PKG_CONFIG@ -PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ -PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ -POSIX_SHELL = @POSIX_SHELL@ -PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@ -PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@ -PTHREAD_CC = @PTHREAD_CC@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ -PTHREAD_LIBS = @PTHREAD_LIBS@ -PWR8_CFLAGS = @PWR8_CFLAGS@ -PYTHON2 = @PYTHON2@ -RADEON_CFLAGS = @RADEON_CFLAGS@ -RADEON_LIBS = @RADEON_LIBS@ -RANLIB = @RANLIB@ -RM = @RM@ -SED = @SED@ -SELINUX_CFLAGS = @SELINUX_CFLAGS@ -SELINUX_LIBS = @SELINUX_LIBS@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@ -SIMPENROSE_LIBS = @SIMPENROSE_LIBS@ -SSE41_CFLAGS = @SSE41_CFLAGS@ -STRIP = @STRIP@ -SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@ -SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@ -SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@ -SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@ -SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@ -VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ -VALGRIND_LIBS = @VALGRIND_LIBS@ -VA_CFLAGS = @VA_CFLAGS@ -VA_LIBS = @VA_LIBS@ -VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@ -VA_MAJOR = @VA_MAJOR@ -VA_MINOR = @VA_MINOR@ -VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@ -VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@ -VDPAU_CFLAGS = @VDPAU_CFLAGS@ -VDPAU_LIBS = @VDPAU_LIBS@ -VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ -VDPAU_MAJOR = @VDPAU_MAJOR@ -VDPAU_MINOR = @VDPAU_MINOR@ -VERSION = @VERSION@ -VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ -VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ -VL_CFLAGS = @VL_CFLAGS@ -VL_LIBS = @VL_LIBS@ -VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@ -WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@ -WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@ -WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@ -WAYLAND_SCANNER = @WAYLAND_SCANNER@ -WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@ -WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@ -WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@ -WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@ -WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@ -X11_INCLUDES = @X11_INCLUDES@ -XA_MAJOR = @XA_MAJOR@ -XA_MINOR = @XA_MINOR@ -XA_TINY = @XA_TINY@ -XA_VERSION = @XA_VERSION@ -XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ -XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ -XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@ -XCB_DRI3_LIBS = @XCB_DRI3_LIBS@ -XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ -XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ -XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ -XLIBGL_LIBS = @XLIBGL_LIBS@ -XVMC_CFLAGS = @XVMC_CFLAGS@ -XVMC_LIBS = @XVMC_LIBS@ -XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ -XVMC_MAJOR = @XVMC_MAJOR@ -XVMC_MINOR = @XVMC_MINOR@ -YACC = @YACC@ -YFLAGS = @YFLAGS@ -ZLIB_CFLAGS = @ZLIB_CFLAGS@ -ZLIB_LIBS = @ZLIB_LIBS@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_ct_AR = @ac_ct_AR@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_CXX = @ac_ct_CXX@ -ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -acv_mako_found = @acv_mako_found@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -ax_pthread_config = @ax_pthread_config@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_vendor = @build_vendor@ -builddir = @builddir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -ifGNUmake = @ifGNUmake@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target = @target@ -target_alias = @target_alias@ -target_cpu = @target_cpu@ -target_os = @target_os@ -target_vendor = @target_vendor@ -top_build_prefix = @top_build_prefix@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ -C_SOURCES := \ - r600_buffer_common.c \ - r600_cs.h \ - r600_gpu_load.c \ - r600_perfcounter.c \ - r600_pipe_common.c \ - r600_pipe_common.h \ - r600_query.c \ - r600_query.h \ - r600_test_dma.c \ - r600_texture.c \ - radeon_uvd.c \ - radeon_uvd.h \ - radeon_vcn_dec.c \ - radeon_vcn_dec.h \ - radeon_vce_40_2_2.c \ - radeon_vce_50.c \ - radeon_vce_52.c \ - radeon_vce.c \ - radeon_vce.h \ - radeon_video.c \ - radeon_video.h \ - radeon_winsys.h - -GALLIUM_CFLAGS = \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/gallium/auxiliary \ - $(DEFINES) - - -# src/gallium/auxiliary must appear before src/gallium/drivers -# because there are stupidly two rbug_context.h files in -# different directories, and which one is included by the -# preprocessor is determined by the ordering of the -I flags. -GALLIUM_DRIVER_CFLAGS = \ - -I$(srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/drivers \ - -I$(top_srcdir)/src/gallium/winsys \ - $(DEFINES) \ - $(VISIBILITY_CFLAGS) - -GALLIUM_DRIVER_CXXFLAGS = \ - -I$(srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/drivers \ - -I$(top_srcdir)/src/gallium/winsys \ - $(DEFINES) \ - $(VISIBILITY_CXXFLAGS) - -GALLIUM_TARGET_CFLAGS = \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src/loader \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/drivers \ - -I$(top_srcdir)/src/gallium/winsys \ - -I$(top_builddir)/src/util/ \ - -I$(top_builddir)/src/gallium/drivers/ \ - $(DEFINES) \ - $(PTHREAD_CFLAGS) \ - $(LIBDRM_CFLAGS) \ - $(VISIBILITY_CFLAGS) - -GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \ - $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1) -GALLIUM_WINSYS_CFLAGS = \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/gallium/auxiliary \ - $(DEFINES) \ - $(VISIBILITY_CFLAGS) - -GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ - $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ - $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ - $(am__append_2) $(am__append_3) -AM_CFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(RADEON_CFLAGS) \ - -Wstrict-overflow=0 $(am__append_4) -# ^^ disable warnings about overflows (os_time_timeout) -noinst_LTLIBRARIES = libradeon.la -libradeon_la_SOURCES = \ - $(C_SOURCES) - -@HAVE_GALLIUM_LLVM_TRUE@libradeon_la_LIBADD = \ -@HAVE_GALLIUM_LLVM_TRUE@ $(CLOCK_LIB) \ -@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_LIBS) - -@HAVE_GALLIUM_LLVM_TRUE@libradeon_la_LDFLAGS = \ -@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_LDFLAGS) - -all: all-am - -.SUFFIXES: -.SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ - && { if test -f $@; then exit 0; else break; fi; }; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile'; \ - $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile -.PRECIOUS: Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; -$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc: - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(am__aclocal_m4_deps): - -clean-noinstLTLIBRARIES: - -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) - @list='$(noinst_LTLIBRARIES)'; \ - locs=`for p in $$list; do echo $$p; done | \ - sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ - sort -u`; \ - test -z "$$locs" || { \ - echo rm -f $${locs}; \ - rm -f $${locs}; \ - } -libradeon.la: $(libradeon_la_OBJECTS) $(libradeon_la_DEPENDENCIES) $(EXTRA_libradeon_la_DEPENDENCIES) - $(AM_V_CCLD)$(libradeon_la_LINK) $(libradeon_la_OBJECTS) $(libradeon_la_LIBADD) $(LIBS) - -mostlyclean-compile: - -rm -f *.$(OBJEXT) - -distclean-compile: - -rm -f *.tab.c - -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_buffer_common.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_gpu_load.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_perfcounter.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_pipe_common.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_query.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_test_dma.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_texture.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_uvd.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_40_2_2.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_50.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_52.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vcn_dec.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_video.Plo@am__quote@ - -.c.o: -@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ -@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< - -.c.obj: -@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ -@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` - -.c.lo: -@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ -@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ -@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< - -mostlyclean-libtool: - -rm -f *.lo - -clean-libtool: - -rm -rf .libs _libs - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - mkid -fID $$unique -tags: TAGS - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - set x; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - shift; \ - if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - if test $$# -gt 0; then \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - "$$@" $$unique; \ - else \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$unique; \ - fi; \ - fi -ctags: CTAGS -CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - test -z "$(CTAGS_ARGS)$$unique" \ - || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$unique - -GTAGS: - here=`$(am__cd) $(top_builddir) && pwd` \ - && $(am__cd) $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) "$$here" - -cscopelist: $(HEADERS) $(SOURCES) $(LISP) - list='$(SOURCES) $(HEADERS) $(LISP)'; \ - case "$(srcdir)" in \ - [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ - *) sdir=$(subdir)/$(srcdir) ;; \ - esac; \ - for i in $$list; do \ - if test -f "$$i"; then \ - echo "$(subdir)/$$i"; \ - else \ - echo "$$sdir/$$i"; \ - fi; \ - done >> $(top_builddir)/cscope.files - -distclean-tags: - -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags - -distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d "$(distdir)/$$file"; then \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ - cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ - else \ - test -f "$(distdir)/$$file" \ - || cp -p $$d/$$file "$(distdir)/$$file" \ - || exit 1; \ - fi; \ - done -check-am: all-am -check: check-am -all-am: Makefile $(LTLIBRARIES) -installdirs: -install: install-am -install-exec: install-exec-am -install-data: install-data-am -uninstall: uninstall-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-am -install-strip: - if test -z '$(STRIP)'; then \ - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - install; \ - else \ - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ - fi -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." -clean: clean-am - -clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ - mostlyclean-am - -distclean: distclean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -distclean-am: clean-am distclean-compile distclean-generic \ - distclean-tags - -dvi: dvi-am - -dvi-am: - -html: html-am - -html-am: - -info: info-am - -info-am: - -install-data-am: - -install-dvi: install-dvi-am - -install-dvi-am: - -install-exec-am: - -install-html: install-html-am - -install-html-am: - -install-info: install-info-am - -install-info-am: - -install-man: - -install-pdf: install-pdf-am - -install-pdf-am: - -install-ps: install-ps-am - -install-ps-am: - -installcheck-am: - -maintainer-clean: maintainer-clean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-am - -mostlyclean-am: mostlyclean-compile mostlyclean-generic \ - mostlyclean-libtool - -pdf: pdf-am - -pdf-am: - -ps: ps-am - -ps-am: - -uninstall-am: - -.MAKE: install-am install-strip - -.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ - clean-libtool clean-noinstLTLIBRARIES cscopelist ctags \ - distclean distclean-compile distclean-generic \ - distclean-libtool distclean-tags distdir dvi dvi-am html \ - html-am info info-am install install-am install-data \ - install-data-am install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am install-info \ - install-info-am install-man install-pdf install-pdf-am \ - install-ps install-ps-am install-strip installcheck \ - installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.sources b/lib/mesa/src/gallium/drivers/radeon/Makefile.sources deleted file mode 100644 index 22de12973..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/Makefile.sources +++ /dev/null @@ -1,23 +0,0 @@ -C_SOURCES := \ - r600_buffer_common.c \ - r600_cs.h \ - r600_gpu_load.c \ - r600_perfcounter.c \ - r600_pipe_common.c \ - r600_pipe_common.h \ - r600_query.c \ - r600_query.h \ - r600_test_dma.c \ - r600_texture.c \ - radeon_uvd.c \ - radeon_uvd.h \ - radeon_vcn_dec.c \ - radeon_vcn_dec.h \ - radeon_vce_40_2_2.c \ - radeon_vce_50.c \ - radeon_vce_52.c \ - radeon_vce.c \ - radeon_vce.h \ - radeon_video.c \ - radeon_video.h \ - radeon_winsys.h diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c deleted file mode 100644 index 366581d45..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c +++ /dev/null @@ -1,681 +0,0 @@ -/* - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Marek Olšák - */ - -#include "r600_cs.h" -#include "util/u_memory.h" -#include "util/u_upload_mgr.h" -#include <inttypes.h> -#include <stdio.h> - -bool si_rings_is_buffer_referenced(struct r600_common_context *ctx, - struct pb_buffer *buf, - enum radeon_bo_usage usage) -{ - if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) { - return true; - } - if (radeon_emitted(ctx->dma.cs, 0) && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) { - return true; - } - return false; -} - -void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx, - struct r600_resource *resource, - unsigned usage) -{ - enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE; - bool busy = false; - - assert(!(resource->flags & RADEON_FLAG_SPARSE)); - - if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { - return ctx->ws->buffer_map(resource->buf, NULL, usage); - } - - if (!(usage & PIPE_TRANSFER_WRITE)) { - /* have to wait for the last write */ - rusage = RADEON_USAGE_WRITE; - } - - if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && - ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, - resource->buf, rusage)) { - if (usage & PIPE_TRANSFER_DONTBLOCK) { - ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); - return NULL; - } else { - ctx->gfx.flush(ctx, 0, NULL); - busy = true; - } - } - if (radeon_emitted(ctx->dma.cs, 0) && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, - resource->buf, rusage)) { - if (usage & PIPE_TRANSFER_DONTBLOCK) { - ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); - return NULL; - } else { - ctx->dma.flush(ctx, 0, NULL); - busy = true; - } - } - - if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) { - if (usage & PIPE_TRANSFER_DONTBLOCK) { - return NULL; - } else { - /* We will be wait for the GPU. Wait for any offloaded - * CS flush to complete to avoid busy-waiting in the winsys. */ - ctx->ws->cs_sync_flush(ctx->gfx.cs); - if (ctx->dma.cs) - ctx->ws->cs_sync_flush(ctx->dma.cs); - } - } - - /* Setting the CS to NULL will prevent doing checks we have done already. */ - return ctx->ws->buffer_map(resource->buf, NULL, usage); -} - -void si_init_resource_fields(struct r600_common_screen *rscreen, - struct r600_resource *res, - uint64_t size, unsigned alignment) -{ - struct r600_texture *rtex = (struct r600_texture*)res; - - res->bo_size = size; - res->bo_alignment = alignment; - res->flags = 0; - res->texture_handle_allocated = false; - res->image_handle_allocated = false; - - switch (res->b.b.usage) { - case PIPE_USAGE_STREAM: - res->flags = RADEON_FLAG_GTT_WC; - /* fall through */ - case PIPE_USAGE_STAGING: - /* Transfers are likely to occur more often with these - * resources. */ - res->domains = RADEON_DOMAIN_GTT; - break; - case PIPE_USAGE_DYNAMIC: - /* Older kernels didn't always flush the HDP cache before - * CS execution - */ - if (rscreen->info.drm_major == 2 && - rscreen->info.drm_minor < 40) { - res->domains = RADEON_DOMAIN_GTT; - res->flags |= RADEON_FLAG_GTT_WC; - break; - } - /* fall through */ - case PIPE_USAGE_DEFAULT: - case PIPE_USAGE_IMMUTABLE: - default: - /* Not listing GTT here improves performance in some - * apps. */ - res->domains = RADEON_DOMAIN_VRAM; - res->flags |= RADEON_FLAG_GTT_WC; - break; - } - - if (res->b.b.target == PIPE_BUFFER && - res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | - PIPE_RESOURCE_FLAG_MAP_COHERENT)) { - /* Use GTT for all persistent mappings with older - * kernels, because they didn't always flush the HDP - * cache before CS execution. - * - * Write-combined CPU mappings are fine, the kernel - * ensures all CPU writes finish before the GPU - * executes a command stream. - */ - if (rscreen->info.drm_major == 2 && - rscreen->info.drm_minor < 40) - res->domains = RADEON_DOMAIN_GTT; - } - - /* Tiled textures are unmappable. Always put them in VRAM. */ - if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) || - res->b.b.flags & R600_RESOURCE_FLAG_UNMAPPABLE) { - res->domains = RADEON_DOMAIN_VRAM; - res->flags |= RADEON_FLAG_NO_CPU_ACCESS | - RADEON_FLAG_GTT_WC; - } - - /* Displayable and shareable surfaces are not suballocated. */ - if (res->b.b.bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) - res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */ - else - res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; - - /* If VRAM is just stolen system memory, allow both VRAM and - * GTT, whichever has free space. If a buffer is evicted from - * VRAM to GTT, it will stay there. - * - * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only - * placements even with a low amount of stolen VRAM. - */ - if (!rscreen->info.has_dedicated_vram && - (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) && - res->domains == RADEON_DOMAIN_VRAM) { - res->domains = RADEON_DOMAIN_VRAM_GTT; - res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */ - } - - if (rscreen->debug_flags & DBG(NO_WC)) - res->flags &= ~RADEON_FLAG_GTT_WC; - - /* Set expected VRAM and GART usage for the buffer. */ - res->vram_usage = 0; - res->gart_usage = 0; - - if (res->domains & RADEON_DOMAIN_VRAM) - res->vram_usage = size; - else if (res->domains & RADEON_DOMAIN_GTT) - res->gart_usage = size; -} - -bool si_alloc_resource(struct r600_common_screen *rscreen, - struct r600_resource *res) -{ - struct pb_buffer *old_buf, *new_buf; - - /* Allocate a new resource. */ - new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size, - res->bo_alignment, - res->domains, res->flags); - if (!new_buf) { - return false; - } - - /* Replace the pointer such that if res->buf wasn't NULL, it won't be - * NULL. This should prevent crashes with multiple contexts using - * the same buffer where one of the contexts invalidates it while - * the others are using it. */ - old_buf = res->buf; - res->buf = new_buf; /* should be atomic */ - - if (rscreen->info.has_virtual_memory) - res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf); - else - res->gpu_address = 0; - - pb_reference(&old_buf, NULL); - - util_range_set_empty(&res->valid_buffer_range); - res->TC_L2_dirty = false; - - /* Print debug information. */ - if (rscreen->debug_flags & DBG(VM) && res->b.b.target == PIPE_BUFFER) { - fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n", - res->gpu_address, res->gpu_address + res->buf->size, - res->buf->size); - } - return true; -} - -static void r600_buffer_destroy(struct pipe_screen *screen, - struct pipe_resource *buf) -{ - struct r600_resource *rbuffer = r600_resource(buf); - - threaded_resource_deinit(buf); - util_range_destroy(&rbuffer->valid_buffer_range); - pb_reference(&rbuffer->buf, NULL); - FREE(rbuffer); -} - -static bool -r600_invalidate_buffer(struct r600_common_context *rctx, - struct r600_resource *rbuffer) -{ - /* Shared buffers can't be reallocated. */ - if (rbuffer->b.is_shared) - return false; - - /* Sparse buffers can't be reallocated. */ - if (rbuffer->flags & RADEON_FLAG_SPARSE) - return false; - - /* In AMD_pinned_memory, the user pointer association only gets - * broken when the buffer is explicitly re-allocated. - */ - if (rbuffer->b.is_user_ptr) - return false; - - /* Check if mapping this buffer would cause waiting for the GPU. */ - if (si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || - !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { - rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b); - } else { - util_range_set_empty(&rbuffer->valid_buffer_range); - } - - return true; -} - -/* Replace the storage of dst with src. */ -void si_replace_buffer_storage(struct pipe_context *ctx, - struct pipe_resource *dst, - struct pipe_resource *src) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_resource *rdst = r600_resource(dst); - struct r600_resource *rsrc = r600_resource(src); - uint64_t old_gpu_address = rdst->gpu_address; - - pb_reference(&rdst->buf, rsrc->buf); - rdst->gpu_address = rsrc->gpu_address; - rdst->b.b.bind = rsrc->b.b.bind; - rdst->flags = rsrc->flags; - - assert(rdst->vram_usage == rsrc->vram_usage); - assert(rdst->gart_usage == rsrc->gart_usage); - assert(rdst->bo_size == rsrc->bo_size); - assert(rdst->bo_alignment == rsrc->bo_alignment); - assert(rdst->domains == rsrc->domains); - - rctx->rebind_buffer(ctx, dst, old_gpu_address); -} - -void si_invalidate_resource(struct pipe_context *ctx, - struct pipe_resource *resource) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct r600_resource *rbuffer = r600_resource(resource); - - /* We currently only do anyting here for buffers */ - if (resource->target == PIPE_BUFFER) - (void)r600_invalidate_buffer(rctx, rbuffer); -} - -static void *r600_buffer_get_transfer(struct pipe_context *ctx, - struct pipe_resource *resource, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **ptransfer, - void *data, struct r600_resource *staging, - unsigned offset) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct r600_transfer *transfer; - - if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) - transfer = slab_alloc(&rctx->pool_transfers_unsync); - else - transfer = slab_alloc(&rctx->pool_transfers); - - transfer->b.b.resource = NULL; - pipe_resource_reference(&transfer->b.b.resource, resource); - transfer->b.b.level = 0; - transfer->b.b.usage = usage; - transfer->b.b.box = *box; - transfer->b.b.stride = 0; - transfer->b.b.layer_stride = 0; - transfer->b.staging = NULL; - transfer->offset = offset; - transfer->staging = staging; - *ptransfer = &transfer->b.b; - return data; -} - -static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, - unsigned dstx, unsigned srcx, unsigned size) -{ - bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); - - return rctx->screen->has_cp_dma || - (dword_aligned && (rctx->dma.cs || - rctx->screen->has_streamout)); - -} - -static void *r600_buffer_transfer_map(struct pipe_context *ctx, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **ptransfer) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; - struct r600_resource *rbuffer = r600_resource(resource); - uint8_t *data; - - assert(box->x + box->width <= resource->width0); - - /* From GL_AMD_pinned_memory issues: - * - * 4) Is glMapBuffer on a shared buffer guaranteed to return the - * same system address which was specified at creation time? - * - * RESOLVED: NO. The GL implementation might return a different - * virtual mapping of that memory, although the same physical - * page will be used. - * - * So don't ever use staging buffers. - */ - if (rbuffer->b.is_user_ptr) - usage |= PIPE_TRANSFER_PERSISTENT; - - /* See if the buffer range being mapped has never been initialized, - * in which case it can be mapped unsynchronized. */ - if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | - TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) && - usage & PIPE_TRANSFER_WRITE && - !rbuffer->b.is_shared && - !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { - usage |= PIPE_TRANSFER_UNSYNCHRONIZED; - } - - /* If discarding the entire range, discard the whole resource instead. */ - if (usage & PIPE_TRANSFER_DISCARD_RANGE && - box->x == 0 && box->width == resource->width0) { - usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; - } - - if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && - !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | - TC_TRANSFER_MAP_NO_INVALIDATE))) { - assert(usage & PIPE_TRANSFER_WRITE); - - if (r600_invalidate_buffer(rctx, rbuffer)) { - /* At this point, the buffer is always idle. */ - usage |= PIPE_TRANSFER_UNSYNCHRONIZED; - } else { - /* Fall back to a temporary buffer. */ - usage |= PIPE_TRANSFER_DISCARD_RANGE; - } - } - - if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && - !(rscreen->debug_flags & DBG(NO_DISCARD_RANGE)) && - ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_PERSISTENT)) && - r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) || - (rbuffer->flags & RADEON_FLAG_SPARSE))) { - assert(usage & PIPE_TRANSFER_WRITE); - - /* Check if mapping this buffer would cause waiting for the GPU. - */ - if (rbuffer->flags & RADEON_FLAG_SPARSE || - si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || - !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { - /* Do a wait-free write-only transfer using a temporary buffer. */ - unsigned offset; - struct r600_resource *staging = NULL; - - u_upload_alloc(ctx->stream_uploader, 0, - box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), - rctx->screen->info.tcc_cache_line_size, - &offset, (struct pipe_resource**)&staging, - (void**)&data); - - if (staging) { - data += box->x % R600_MAP_BUFFER_ALIGNMENT; - return r600_buffer_get_transfer(ctx, resource, usage, box, - ptransfer, data, staging, offset); - } else if (rbuffer->flags & RADEON_FLAG_SPARSE) { - return NULL; - } - } else { - /* At this point, the buffer is always idle (we checked it above). */ - usage |= PIPE_TRANSFER_UNSYNCHRONIZED; - } - } - /* Use a staging buffer in cached GTT for reads. */ - else if (((usage & PIPE_TRANSFER_READ) && - !(usage & PIPE_TRANSFER_PERSISTENT) && - (rbuffer->domains & RADEON_DOMAIN_VRAM || - rbuffer->flags & RADEON_FLAG_GTT_WC) && - r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) || - (rbuffer->flags & RADEON_FLAG_SPARSE)) { - struct r600_resource *staging; - - assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)); - staging = (struct r600_resource*) pipe_buffer_create( - ctx->screen, 0, PIPE_USAGE_STAGING, - box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT)); - if (staging) { - /* Copy the VRAM buffer to the staging buffer. */ - rctx->dma_copy(ctx, &staging->b.b, 0, - box->x % R600_MAP_BUFFER_ALIGNMENT, - 0, 0, resource, 0, box); - - data = si_buffer_map_sync_with_rings(rctx, staging, - usage & ~PIPE_TRANSFER_UNSYNCHRONIZED); - if (!data) { - r600_resource_reference(&staging, NULL); - return NULL; - } - data += box->x % R600_MAP_BUFFER_ALIGNMENT; - - return r600_buffer_get_transfer(ctx, resource, usage, box, - ptransfer, data, staging, 0); - } else if (rbuffer->flags & RADEON_FLAG_SPARSE) { - return NULL; - } - } - - data = si_buffer_map_sync_with_rings(rctx, rbuffer, usage); - if (!data) { - return NULL; - } - data += box->x; - - return r600_buffer_get_transfer(ctx, resource, usage, box, - ptransfer, data, NULL, 0); -} - -static void r600_buffer_do_flush_region(struct pipe_context *ctx, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_resource *rbuffer = r600_resource(transfer->resource); - - if (rtransfer->staging) { - struct pipe_resource *dst, *src; - unsigned soffset; - struct pipe_box dma_box; - - dst = transfer->resource; - src = &rtransfer->staging->b.b; - soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT; - - u_box_1d(soffset, box->width, &dma_box); - - /* Copy the staging buffer into the original one. */ - ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box); - } - - util_range_add(&rbuffer->valid_buffer_range, box->x, - box->x + box->width); -} - -static void r600_buffer_flush_region(struct pipe_context *ctx, - struct pipe_transfer *transfer, - const struct pipe_box *rel_box) -{ - unsigned required_usage = PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_FLUSH_EXPLICIT; - - if ((transfer->usage & required_usage) == required_usage) { - struct pipe_box box; - - u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box); - r600_buffer_do_flush_region(ctx, transfer, &box); - } -} - -static void r600_buffer_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer *transfer) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - - if (transfer->usage & PIPE_TRANSFER_WRITE && - !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) - r600_buffer_do_flush_region(ctx, transfer, &transfer->box); - - r600_resource_reference(&rtransfer->staging, NULL); - assert(rtransfer->b.staging == NULL); /* for threaded context only */ - pipe_resource_reference(&transfer->resource, NULL); - - /* Don't use pool_transfers_unsync. We are always in the driver - * thread. */ - slab_free(&rctx->pool_transfers, transfer); -} - -void si_buffer_subdata(struct pipe_context *ctx, - struct pipe_resource *buffer, - unsigned usage, unsigned offset, - unsigned size, const void *data) -{ - struct pipe_transfer *transfer = NULL; - struct pipe_box box; - uint8_t *map = NULL; - - u_box_1d(offset, size, &box); - map = r600_buffer_transfer_map(ctx, buffer, 0, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_DISCARD_RANGE | - usage, - &box, &transfer); - if (!map) - return; - - memcpy(map, data, size); - r600_buffer_transfer_unmap(ctx, transfer); -} - -static const struct u_resource_vtbl r600_buffer_vtbl = -{ - NULL, /* get_handle */ - r600_buffer_destroy, /* resource_destroy */ - r600_buffer_transfer_map, /* transfer_map */ - r600_buffer_flush_region, /* transfer_flush_region */ - r600_buffer_transfer_unmap, /* transfer_unmap */ -}; - -static struct r600_resource * -r600_alloc_buffer_struct(struct pipe_screen *screen, - const struct pipe_resource *templ) -{ - struct r600_resource *rbuffer; - - rbuffer = MALLOC_STRUCT(r600_resource); - - rbuffer->b.b = *templ; - rbuffer->b.b.next = NULL; - pipe_reference_init(&rbuffer->b.b.reference, 1); - rbuffer->b.b.screen = screen; - - rbuffer->b.vtbl = &r600_buffer_vtbl; - threaded_resource_init(&rbuffer->b.b); - - rbuffer->buf = NULL; - rbuffer->bind_history = 0; - rbuffer->TC_L2_dirty = false; - util_range_init(&rbuffer->valid_buffer_range); - return rbuffer; -} - -struct pipe_resource *si_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ, - unsigned alignment) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); - - si_init_resource_fields(rscreen, rbuffer, templ->width0, alignment); - - if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) - rbuffer->flags |= RADEON_FLAG_SPARSE; - - if (!si_alloc_resource(rscreen, rbuffer)) { - FREE(rbuffer); - return NULL; - } - return &rbuffer->b.b; -} - -struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen, - unsigned flags, - unsigned usage, - unsigned size, - unsigned alignment) -{ - struct pipe_resource buffer; - - memset(&buffer, 0, sizeof buffer); - buffer.target = PIPE_BUFFER; - buffer.format = PIPE_FORMAT_R8_UNORM; - buffer.bind = 0; - buffer.usage = usage; - buffer.flags = flags; - buffer.width0 = size; - buffer.height0 = 1; - buffer.depth0 = 1; - buffer.array_size = 1; - return si_buffer_create(screen, &buffer, alignment); -} - -struct pipe_resource * -si_buffer_from_user_memory(struct pipe_screen *screen, - const struct pipe_resource *templ, - void *user_memory) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_winsys *ws = rscreen->ws; - struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); - - rbuffer->domains = RADEON_DOMAIN_GTT; - rbuffer->flags = 0; - rbuffer->b.is_user_ptr = true; - util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0); - util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0); - - /* Convert a user pointer to a buffer. */ - rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0); - if (!rbuffer->buf) { - FREE(rbuffer); - return NULL; - } - - if (rscreen->info.has_virtual_memory) - rbuffer->gpu_address = - ws->buffer_get_virtual_address(rbuffer->buf); - else - rbuffer->gpu_address = 0; - - rbuffer->vram_usage = 0; - rbuffer->gart_usage = templ->width0; - - return &rbuffer->b.b; -} diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_cs.h b/lib/mesa/src/gallium/drivers/radeon/r600_cs.h deleted file mode 100644 index 03a04b754..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_cs.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Marek Olšák <maraeo@gmail.com> - */ - -/** - * This file contains helpers for writing commands to commands streams. - */ - -#ifndef R600_CS_H -#define R600_CS_H - -#include "r600_pipe_common.h" -#include "r600d_common.h" - -static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx, - struct r600_ring *ring, - struct r600_resource *rbo, - enum radeon_bo_usage usage, - enum radeon_bo_priority priority) -{ - assert(usage); - - /* Make sure that all previous rings are flushed so that everything - * looks serialized from the driver point of view. - */ - if (!ring->flushing) { - if (ring == &rctx->rings.gfx) { - if (rctx->rings.dma.cs) { - /* flush dma ring */ - rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL); - } - } else { - /* flush gfx ring */ - rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); - } - } - return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage, - rbo->domains, priority) * 4; -} - -static inline void r600_emit_reloc(struct r600_common_context *rctx, - struct r600_ring *ring, struct r600_resource *rbo, - enum radeon_bo_usage usage, - enum radeon_bo_priority priority) -{ - struct radeon_winsys_cs *cs = ring->cs; - bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address; - unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority); - - if (!has_vm) { - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, reloc); - } -} - -static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) -{ - assert(reg < R600_CONTEXT_REG_OFFSET); - assert(cs->cdw+2+num <= cs->max_dw); - radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); - radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2); -} - -static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) -{ - r600_write_config_reg_seq(cs, reg, 1); - radeon_emit(cs, value); -} - -static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) -{ - assert(reg >= R600_CONTEXT_REG_OFFSET); - assert(cs->cdw+2+num <= cs->max_dw); - radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); - radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2); -} - -static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) -{ - r600_write_context_reg_seq(cs, reg, 1); - radeon_emit(cs, value); -} - -static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) -{ - assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); - assert(cs->cdw+2+num <= cs->max_dw); - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); - radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); -} - -static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) -{ - si_write_sh_reg_seq(cs, reg, 1); - radeon_emit(cs, value); -} - -static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) -{ - assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); - assert(cs->cdw+2+num <= cs->max_dw); - radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); - radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); -} - -static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) -{ - cik_write_uconfig_reg_seq(cs, reg, 1); - radeon_emit(cs, value); -} - -#endif diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c b/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c deleted file mode 100644 index 625370b8e..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Marek Olšák <maraeo@gmail.com> - * - */ - -/* The GPU load is measured as follows. - * - * There is a thread which samples the GRBM_STATUS register at a certain - * frequency and the "busy" or "idle" counter is incremented based on - * whether the GUI_ACTIVE bit is set or not. - * - * Then, the user can sample the counters twice and calculate the average - * GPU load between the two samples. - */ - -#include "r600_pipe_common.h" -#include "r600_query.h" -#include "os/os_time.h" - -/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher - * fps (there are too few samples per frame). */ -#define SAMPLES_PER_SEC 10000 - -#define GRBM_STATUS 0x8010 -#define TA_BUSY(x) (((x) >> 14) & 0x1) -#define GDS_BUSY(x) (((x) >> 15) & 0x1) -#define VGT_BUSY(x) (((x) >> 17) & 0x1) -#define IA_BUSY(x) (((x) >> 19) & 0x1) -#define SX_BUSY(x) (((x) >> 20) & 0x1) -#define WD_BUSY(x) (((x) >> 21) & 0x1) -#define SPI_BUSY(x) (((x) >> 22) & 0x1) -#define BCI_BUSY(x) (((x) >> 23) & 0x1) -#define SC_BUSY(x) (((x) >> 24) & 0x1) -#define PA_BUSY(x) (((x) >> 25) & 0x1) -#define DB_BUSY(x) (((x) >> 26) & 0x1) -#define CP_BUSY(x) (((x) >> 29) & 0x1) -#define CB_BUSY(x) (((x) >> 30) & 0x1) -#define GUI_ACTIVE(x) (((x) >> 31) & 0x1) - -#define SRBM_STATUS2 0x0e4c -#define SDMA_BUSY(x) (((x) >> 5) & 0x1) - -#define CP_STAT 0x8680 -#define PFP_BUSY(x) (((x) >> 15) & 0x1) -#define MEQ_BUSY(x) (((x) >> 16) & 0x1) -#define ME_BUSY(x) (((x) >> 17) & 0x1) -#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1) -#define DMA_BUSY(x) (((x) >> 22) & 0x1) -#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1) - -#define IDENTITY(x) x - -#define UPDATE_COUNTER(field, mask) \ - do { \ - if (mask(value)) \ - p_atomic_inc(&counters->named.field.busy); \ - else \ - p_atomic_inc(&counters->named.field.idle); \ - } while (0) - -static void r600_update_mmio_counters(struct r600_common_screen *rscreen, - union r600_mmio_counters *counters) -{ - uint32_t value = 0; - bool gui_busy, sdma_busy = false; - - /* GRBM_STATUS */ - rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value); - - UPDATE_COUNTER(ta, TA_BUSY); - UPDATE_COUNTER(gds, GDS_BUSY); - UPDATE_COUNTER(vgt, VGT_BUSY); - UPDATE_COUNTER(ia, IA_BUSY); - UPDATE_COUNTER(sx, SX_BUSY); - UPDATE_COUNTER(wd, WD_BUSY); - UPDATE_COUNTER(spi, SPI_BUSY); - UPDATE_COUNTER(bci, BCI_BUSY); - UPDATE_COUNTER(sc, SC_BUSY); - UPDATE_COUNTER(pa, PA_BUSY); - UPDATE_COUNTER(db, DB_BUSY); - UPDATE_COUNTER(cp, CP_BUSY); - UPDATE_COUNTER(cb, CB_BUSY); - UPDATE_COUNTER(gui, GUI_ACTIVE); - gui_busy = GUI_ACTIVE(value); - - if (rscreen->chip_class == CIK || rscreen->chip_class == VI) { - /* SRBM_STATUS2 */ - rscreen->ws->read_registers(rscreen->ws, SRBM_STATUS2, 1, &value); - - UPDATE_COUNTER(sdma, SDMA_BUSY); - sdma_busy = SDMA_BUSY(value); - } - - if (rscreen->chip_class >= VI) { - /* CP_STAT */ - rscreen->ws->read_registers(rscreen->ws, CP_STAT, 1, &value); - - UPDATE_COUNTER(pfp, PFP_BUSY); - UPDATE_COUNTER(meq, MEQ_BUSY); - UPDATE_COUNTER(me, ME_BUSY); - UPDATE_COUNTER(surf_sync, SURFACE_SYNC_BUSY); - UPDATE_COUNTER(cp_dma, DMA_BUSY); - UPDATE_COUNTER(scratch_ram, SCRATCH_RAM_BUSY); - } - - value = gui_busy || sdma_busy; - UPDATE_COUNTER(gpu, IDENTITY); -} - -#undef UPDATE_COUNTER - -static int -r600_gpu_load_thread(void *param) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)param; - const int period_us = 1000000 / SAMPLES_PER_SEC; - int sleep_us = period_us; - int64_t cur_time, last_time = os_time_get(); - - while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) { - if (sleep_us) - os_time_sleep(sleep_us); - - /* Make sure we sleep the ideal amount of time to match - * the expected frequency. */ - cur_time = os_time_get(); - - if (os_time_timeout(last_time, last_time + period_us, - cur_time)) - sleep_us = MAX2(sleep_us - 1, 1); - else - sleep_us += 1; - - /*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/ - last_time = cur_time; - - /* Update the counters. */ - r600_update_mmio_counters(rscreen, &rscreen->mmio_counters); - } - p_atomic_dec(&rscreen->gpu_load_stop_thread); - return 0; -} - -void si_gpu_load_kill_thread(struct r600_common_screen *rscreen) -{ - if (!rscreen->gpu_load_thread) - return; - - p_atomic_inc(&rscreen->gpu_load_stop_thread); - thrd_join(rscreen->gpu_load_thread, NULL); - rscreen->gpu_load_thread = 0; -} - -static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen, - unsigned busy_index) -{ - /* Start the thread if needed. */ - if (!rscreen->gpu_load_thread) { - mtx_lock(&rscreen->gpu_load_mutex); - /* Check again inside the mutex. */ - if (!rscreen->gpu_load_thread) - rscreen->gpu_load_thread = - u_thread_create(r600_gpu_load_thread, rscreen); - mtx_unlock(&rscreen->gpu_load_mutex); - } - - unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]); - unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]); - - return busy | ((uint64_t)idle << 32); -} - -static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen, - uint64_t begin, unsigned busy_index) -{ - uint64_t end = r600_read_mmio_counter(rscreen, busy_index); - unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff); - unsigned idle = (end >> 32) - (begin >> 32); - - /* Calculate the % of time the busy counter was being incremented. - * - * If no counters were incremented, return the current counter status. - * It's for the case when the load is queried faster than - * the counters are updated. - */ - if (idle || busy) { - return busy*100 / (busy + idle); - } else { - union r600_mmio_counters counters; - - memset(&counters, 0, sizeof(counters)); - r600_update_mmio_counters(rscreen, &counters); - return counters.array[busy_index] ? 100 : 0; - } -} - -#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \ - rscreen->mmio_counters.array) - -static unsigned busy_index_from_type(struct r600_common_screen *rscreen, - unsigned type) -{ - switch (type) { - case R600_QUERY_GPU_LOAD: - return BUSY_INDEX(rscreen, gpu); - case R600_QUERY_GPU_SHADERS_BUSY: - return BUSY_INDEX(rscreen, spi); - case R600_QUERY_GPU_TA_BUSY: - return BUSY_INDEX(rscreen, ta); - case R600_QUERY_GPU_GDS_BUSY: - return BUSY_INDEX(rscreen, gds); - case R600_QUERY_GPU_VGT_BUSY: - return BUSY_INDEX(rscreen, vgt); - case R600_QUERY_GPU_IA_BUSY: - return BUSY_INDEX(rscreen, ia); - case R600_QUERY_GPU_SX_BUSY: - return BUSY_INDEX(rscreen, sx); - case R600_QUERY_GPU_WD_BUSY: - return BUSY_INDEX(rscreen, wd); - case R600_QUERY_GPU_BCI_BUSY: - return BUSY_INDEX(rscreen, bci); - case R600_QUERY_GPU_SC_BUSY: - return BUSY_INDEX(rscreen, sc); - case R600_QUERY_GPU_PA_BUSY: - return BUSY_INDEX(rscreen, pa); - case R600_QUERY_GPU_DB_BUSY: - return BUSY_INDEX(rscreen, db); - case R600_QUERY_GPU_CP_BUSY: - return BUSY_INDEX(rscreen, cp); - case R600_QUERY_GPU_CB_BUSY: - return BUSY_INDEX(rscreen, cb); - case R600_QUERY_GPU_SDMA_BUSY: - return BUSY_INDEX(rscreen, sdma); - case R600_QUERY_GPU_PFP_BUSY: - return BUSY_INDEX(rscreen, pfp); - case R600_QUERY_GPU_MEQ_BUSY: - return BUSY_INDEX(rscreen, meq); - case R600_QUERY_GPU_ME_BUSY: - return BUSY_INDEX(rscreen, me); - case R600_QUERY_GPU_SURF_SYNC_BUSY: - return BUSY_INDEX(rscreen, surf_sync); - case R600_QUERY_GPU_CP_DMA_BUSY: - return BUSY_INDEX(rscreen, cp_dma); - case R600_QUERY_GPU_SCRATCH_RAM_BUSY: - return BUSY_INDEX(rscreen, scratch_ram); - default: - unreachable("invalid query type"); - } -} - -uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type) -{ - unsigned busy_index = busy_index_from_type(rscreen, type); - return r600_read_mmio_counter(rscreen, busy_index); -} - -unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type, - uint64_t begin) -{ - unsigned busy_index = busy_index_from_type(rscreen, type); - return r600_end_mmio_counter(rscreen, begin, busy_index); -} diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c b/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c index 6c68dc469..57c324689 100644 --- a/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c +++ b/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c @@ -1,5 +1,6 @@ /* * Copyright 2015 Advanced Micro Devices, Inc. + * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,25 +20,21 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * Authors: - * Nicolai Hähnle <nicolai.haehnle@amd.com> - * */ #include "util/u_memory.h" -#include "r600_query.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_query.h" +#include "radeonsi/si_pipe.h" #include "amd/common/sid.h" /* Max counters per HW block */ -#define R600_QUERY_MAX_COUNTERS 16 +#define SI_QUERY_MAX_COUNTERS 16 -static struct r600_perfcounter_block * -lookup_counter(struct r600_perfcounters *pc, unsigned index, +static struct si_perfcounter_block * +lookup_counter(struct si_perfcounters *pc, unsigned index, unsigned *base_gid, unsigned *sub_index) { - struct r600_perfcounter_block *block = pc->blocks; + struct si_perfcounter_block *block = pc->blocks; unsigned bid; *base_gid = 0; @@ -56,11 +53,11 @@ lookup_counter(struct r600_perfcounters *pc, unsigned index, return NULL; } -static struct r600_perfcounter_block * -lookup_group(struct r600_perfcounters *pc, unsigned *index) +static struct si_perfcounter_block * +lookup_group(struct si_perfcounters *pc, unsigned *index) { unsigned bid; - struct r600_perfcounter_block *block = pc->blocks; + struct si_perfcounter_block *block = pc->blocks; for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { if (*index < block->num_groups) @@ -71,113 +68,113 @@ lookup_group(struct r600_perfcounters *pc, unsigned *index) return NULL; } -struct r600_pc_group { - struct r600_pc_group *next; - struct r600_perfcounter_block *block; +struct si_pc_group { + struct si_pc_group *next; + struct si_perfcounter_block *block; unsigned sub_gid; /* only used during init */ unsigned result_base; /* only used during init */ int se; int instance; unsigned num_counters; - unsigned selectors[R600_QUERY_MAX_COUNTERS]; + unsigned selectors[SI_QUERY_MAX_COUNTERS]; }; -struct r600_pc_counter { +struct si_pc_counter { unsigned base; unsigned qwords; unsigned stride; /* in uint64s */ }; -#define R600_PC_SHADERS_WINDOWING (1 << 31) +#define SI_PC_SHADERS_WINDOWING (1 << 31) -struct r600_query_pc { - struct r600_query_hw b; +struct si_query_pc { + struct si_query_hw b; unsigned shaders; unsigned num_counters; - struct r600_pc_counter *counters; - struct r600_pc_group *groups; + struct si_pc_counter *counters; + struct si_pc_group *groups; }; -static void r600_pc_query_destroy(struct r600_common_screen *rscreen, - struct r600_query *rquery) +static void si_pc_query_destroy(struct si_screen *sscreen, + struct si_query *rquery) { - struct r600_query_pc *query = (struct r600_query_pc *)rquery; + struct si_query_pc *query = (struct si_query_pc *)rquery; while (query->groups) { - struct r600_pc_group *group = query->groups; + struct si_pc_group *group = query->groups; query->groups = group->next; FREE(group); } FREE(query->counters); - si_query_hw_destroy(rscreen, rquery); + si_query_hw_destroy(sscreen, rquery); } -static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen, - struct r600_query_hw *hwquery, - struct r600_resource *buffer) +static bool si_pc_query_prepare_buffer(struct si_screen *screen, + struct si_query_hw *hwquery, + struct r600_resource *buffer) { /* no-op */ return true; } -static void r600_pc_query_emit_start(struct r600_common_context *ctx, - struct r600_query_hw *hwquery, - struct r600_resource *buffer, uint64_t va) +static void si_pc_query_emit_start(struct si_context *sctx, + struct si_query_hw *hwquery, + struct r600_resource *buffer, uint64_t va) { - struct r600_perfcounters *pc = ctx->screen->perfcounters; - struct r600_query_pc *query = (struct r600_query_pc *)hwquery; - struct r600_pc_group *group; + struct si_perfcounters *pc = sctx->screen->perfcounters; + struct si_query_pc *query = (struct si_query_pc *)hwquery; + struct si_pc_group *group; int current_se = -1; int current_instance = -1; if (query->shaders) - pc->emit_shaders(ctx, query->shaders); + pc->emit_shaders(sctx, query->shaders); for (group = query->groups; group; group = group->next) { - struct r600_perfcounter_block *block = group->block; + struct si_perfcounter_block *block = group->block; if (group->se != current_se || group->instance != current_instance) { current_se = group->se; current_instance = group->instance; - pc->emit_instance(ctx, group->se, group->instance); + pc->emit_instance(sctx, group->se, group->instance); } - pc->emit_select(ctx, block, group->num_counters, group->selectors); + pc->emit_select(sctx, block, group->num_counters, group->selectors); } if (current_se != -1 || current_instance != -1) - pc->emit_instance(ctx, -1, -1); + pc->emit_instance(sctx, -1, -1); - pc->emit_start(ctx, buffer, va); + pc->emit_start(sctx, buffer, va); } -static void r600_pc_query_emit_stop(struct r600_common_context *ctx, - struct r600_query_hw *hwquery, - struct r600_resource *buffer, uint64_t va) +static void si_pc_query_emit_stop(struct si_context *sctx, + struct si_query_hw *hwquery, + struct r600_resource *buffer, uint64_t va) { - struct r600_perfcounters *pc = ctx->screen->perfcounters; - struct r600_query_pc *query = (struct r600_query_pc *)hwquery; - struct r600_pc_group *group; + struct si_perfcounters *pc = sctx->screen->perfcounters; + struct si_query_pc *query = (struct si_query_pc *)hwquery; + struct si_pc_group *group; - pc->emit_stop(ctx, buffer, va); + pc->emit_stop(sctx, buffer, va); for (group = query->groups; group; group = group->next) { - struct r600_perfcounter_block *block = group->block; + struct si_perfcounter_block *block = group->block; unsigned se = group->se >= 0 ? group->se : 0; unsigned se_end = se + 1; - if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0)) - se_end = ctx->screen->info.max_se; + if ((block->flags & SI_PC_BLOCK_SE) && (group->se < 0)) + se_end = sctx->screen->info.max_se; do { unsigned instance = group->instance >= 0 ? group->instance : 0; do { - pc->emit_instance(ctx, se, instance); - pc->emit_read(ctx, block, + pc->emit_instance(sctx, se, instance); + pc->emit_read(sctx, block, group->num_counters, group->selectors, buffer, va); va += sizeof(uint64_t) * group->num_counters; @@ -185,28 +182,28 @@ static void r600_pc_query_emit_stop(struct r600_common_context *ctx, } while (++se < se_end); } - pc->emit_instance(ctx, -1, -1); + pc->emit_instance(sctx, -1, -1); } -static void r600_pc_query_clear_result(struct r600_query_hw *hwquery, - union pipe_query_result *result) +static void si_pc_query_clear_result(struct si_query_hw *hwquery, + union pipe_query_result *result) { - struct r600_query_pc *query = (struct r600_query_pc *)hwquery; + struct si_query_pc *query = (struct si_query_pc *)hwquery; memset(result, 0, sizeof(result->batch[0]) * query->num_counters); } -static void r600_pc_query_add_result(struct r600_common_screen *rscreen, - struct r600_query_hw *hwquery, - void *buffer, - union pipe_query_result *result) +static void si_pc_query_add_result(struct si_screen *sscreen, + struct si_query_hw *hwquery, + void *buffer, + union pipe_query_result *result) { - struct r600_query_pc *query = (struct r600_query_pc *)hwquery; + struct si_query_pc *query = (struct si_query_pc *)hwquery; uint64_t *results = buffer; unsigned i, j; for (i = 0; i < query->num_counters; ++i) { - struct r600_pc_counter *counter = &query->counters[i]; + struct si_pc_counter *counter = &query->counters[i]; for (j = 0; j < counter->qwords; ++j) { uint32_t value = results[counter->base + j * counter->stride]; @@ -215,27 +212,27 @@ static void r600_pc_query_add_result(struct r600_common_screen *rscreen, } } -static struct r600_query_ops batch_query_ops = { - .destroy = r600_pc_query_destroy, +static struct si_query_ops batch_query_ops = { + .destroy = si_pc_query_destroy, .begin = si_query_hw_begin, .end = si_query_hw_end, .get_result = si_query_hw_get_result }; -static struct r600_query_hw_ops batch_query_hw_ops = { - .prepare_buffer = r600_pc_query_prepare_buffer, - .emit_start = r600_pc_query_emit_start, - .emit_stop = r600_pc_query_emit_stop, - .clear_result = r600_pc_query_clear_result, - .add_result = r600_pc_query_add_result, +static struct si_query_hw_ops batch_query_hw_ops = { + .prepare_buffer = si_pc_query_prepare_buffer, + .emit_start = si_pc_query_emit_start, + .emit_stop = si_pc_query_emit_stop, + .clear_result = si_pc_query_clear_result, + .add_result = si_pc_query_add_result, }; -static struct r600_pc_group *get_group_state(struct r600_common_screen *screen, - struct r600_query_pc *query, - struct r600_perfcounter_block *block, +static struct si_pc_group *get_group_state(struct si_screen *screen, + struct si_query_pc *query, + struct si_perfcounter_block *block, unsigned sub_gid) { - struct r600_pc_group *group = query->groups; + struct si_pc_group *group = query->groups; while (group) { if (group->block == block && group->sub_gid == sub_gid) @@ -243,49 +240,49 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen, group = group->next; } - group = CALLOC_STRUCT(r600_pc_group); + group = CALLOC_STRUCT(si_pc_group); if (!group) return NULL; group->block = block; group->sub_gid = sub_gid; - if (block->flags & R600_PC_BLOCK_SHADER) { + if (block->flags & SI_PC_BLOCK_SHADER) { unsigned sub_gids = block->num_instances; unsigned shader_id; unsigned shaders; unsigned query_shaders; - if (block->flags & R600_PC_BLOCK_SE_GROUPS) + if (block->flags & SI_PC_BLOCK_SE_GROUPS) sub_gids = sub_gids * screen->info.max_se; shader_id = sub_gid / sub_gids; sub_gid = sub_gid % sub_gids; shaders = screen->perfcounters->shader_type_bits[shader_id]; - query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING; + query_shaders = query->shaders & ~SI_PC_SHADERS_WINDOWING; if (query_shaders && query_shaders != shaders) { - fprintf(stderr, "r600_perfcounter: incompatible shader groups\n"); + fprintf(stderr, "si_perfcounter: incompatible shader groups\n"); FREE(group); return NULL; } query->shaders = shaders; } - if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { + if (block->flags & SI_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { // A non-zero value in query->shaders ensures that the shader // masking is reset unless the user explicitly requests one. - query->shaders = R600_PC_SHADERS_WINDOWING; + query->shaders = SI_PC_SHADERS_WINDOWING; } - if (block->flags & R600_PC_BLOCK_SE_GROUPS) { + if (block->flags & SI_PC_BLOCK_SE_GROUPS) { group->se = sub_gid / block->num_instances; sub_gid = sub_gid % block->num_instances; } else { group->se = -1; } - if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { + if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) { group->instance = sub_gid; } else { group->instance = -1; @@ -301,19 +298,19 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries, unsigned *query_types) { - struct r600_common_screen *screen = - (struct r600_common_screen *)ctx->screen; - struct r600_perfcounters *pc = screen->perfcounters; - struct r600_perfcounter_block *block; - struct r600_pc_group *group; - struct r600_query_pc *query; + struct si_screen *screen = + (struct si_screen *)ctx->screen; + struct si_perfcounters *pc = screen->perfcounters; + struct si_perfcounter_block *block; + struct si_pc_group *group; + struct si_query_pc *query; unsigned base_gid, sub_gid, sub_index; unsigned i, j; if (!pc) return NULL; - query = CALLOC_STRUCT(r600_query_pc); + query = CALLOC_STRUCT(si_query_pc); if (!query) return NULL; @@ -326,10 +323,10 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, for (i = 0; i < num_queries; ++i) { unsigned sub_gid; - if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER) + if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER) goto error; - block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER, + block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index); if (!block) goto error; @@ -352,19 +349,16 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, } /* Compute result bases and CS size per group */ - query->b.num_cs_dw_begin = pc->num_start_cs_dwords; query->b.num_cs_dw_end = pc->num_stop_cs_dwords; - - query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */ query->b.num_cs_dw_end += pc->num_instance_cs_dwords; i = 0; for (group = query->groups; group; group = group->next) { - struct r600_perfcounter_block *block = group->block; - unsigned select_dw, read_dw; + struct si_perfcounter_block *block = group->block; + unsigned read_dw; unsigned instances = 1; - if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) + if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0) instances = screen->info.max_se; if (group->instance < 0) instances *= block->num_instances; @@ -373,27 +367,23 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, query->b.result_size += sizeof(uint64_t) * instances * group->num_counters; i += instances * group->num_counters; - pc->get_size(block, group->num_counters, group->selectors, - &select_dw, &read_dw); - query->b.num_cs_dw_begin += select_dw; + read_dw = 6 * group->num_counters; query->b.num_cs_dw_end += instances * read_dw; - query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */ query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords; } if (query->shaders) { - if (query->shaders == R600_PC_SHADERS_WINDOWING) + if (query->shaders == SI_PC_SHADERS_WINDOWING) query->shaders = 0xffffffff; - query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords; } /* Map user-supplied query array to result indices */ query->counters = CALLOC(num_queries, sizeof(*query->counters)); for (i = 0; i < num_queries; ++i) { - struct r600_pc_counter *counter = &query->counters[i]; - struct r600_perfcounter_block *block; + struct si_pc_counter *counter = &query->counters[i]; + struct si_perfcounter_block *block; - block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER, + block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index); sub_gid = sub_index / block->num_selectors; @@ -411,7 +401,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, counter->stride = group->num_counters; counter->qwords = 1; - if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) + if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0) counter->qwords = screen->info.max_se; if (group->instance < 0) counter->qwords *= block->num_instances; @@ -423,12 +413,12 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, return (struct pipe_query *)query; error: - r600_pc_query_destroy(screen, &query->b.b); + si_pc_query_destroy(screen, &query->b.b); return NULL; } -static bool r600_init_block_names(struct r600_common_screen *screen, - struct r600_perfcounter_block *block) +static bool si_init_block_names(struct si_screen *screen, + struct si_perfcounter_block *block) { unsigned i, j, k; unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; @@ -436,25 +426,25 @@ static bool r600_init_block_names(struct r600_common_screen *screen, char *groupname; char *p; - if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) + if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) groups_instance = block->num_instances; - if (block->flags & R600_PC_BLOCK_SE_GROUPS) + if (block->flags & SI_PC_BLOCK_SE_GROUPS) groups_se = screen->info.max_se; - if (block->flags & R600_PC_BLOCK_SHADER) + if (block->flags & SI_PC_BLOCK_SHADER) groups_shader = screen->perfcounters->num_shader_types; namelen = strlen(block->basename); block->group_name_stride = namelen + 1; - if (block->flags & R600_PC_BLOCK_SHADER) + if (block->flags & SI_PC_BLOCK_SHADER) block->group_name_stride += 3; - if (block->flags & R600_PC_BLOCK_SE_GROUPS) { + if (block->flags & SI_PC_BLOCK_SE_GROUPS) { assert(groups_se <= 10); block->group_name_stride += 1; - if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) + if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) block->group_name_stride += 1; } - if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { + if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) { assert(groups_instance <= 100); block->group_name_stride += 2; } @@ -472,18 +462,18 @@ static bool r600_init_block_names(struct r600_common_screen *screen, strcpy(groupname, block->basename); p = groupname + namelen; - if (block->flags & R600_PC_BLOCK_SHADER) { + if (block->flags & SI_PC_BLOCK_SHADER) { strcpy(p, shader_suffix); p += shaderlen; } - if (block->flags & R600_PC_BLOCK_SE_GROUPS) { + if (block->flags & SI_PC_BLOCK_SE_GROUPS) { p += sprintf(p, "%d", j); - if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) + if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) *p++ = '_'; } - if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) + if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) p += sprintf(p, "%d", k); groupname += block->group_name_stride; @@ -511,12 +501,12 @@ static bool r600_init_block_names(struct r600_common_screen *screen, return true; } -int si_get_perfcounter_info(struct r600_common_screen *screen, +int si_get_perfcounter_info(struct si_screen *screen, unsigned index, struct pipe_driver_query_info *info) { - struct r600_perfcounters *pc = screen->perfcounters; - struct r600_perfcounter_block *block; + struct si_perfcounters *pc = screen->perfcounters; + struct si_perfcounter_block *block; unsigned base_gid, sub; if (!pc) @@ -538,11 +528,11 @@ int si_get_perfcounter_info(struct r600_common_screen *screen, return 0; if (!block->selector_names) { - if (!r600_init_block_names(screen, block)) + if (!si_init_block_names(screen, block)) return 0; } info->name = block->selector_names + sub * block->selector_name_stride; - info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index; + info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index; info->max_value.u64 = 0; info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE; @@ -553,12 +543,12 @@ int si_get_perfcounter_info(struct r600_common_screen *screen, return 1; } -int si_get_perfcounter_group_info(struct r600_common_screen *screen, +int si_get_perfcounter_group_info(struct si_screen *screen, unsigned index, struct pipe_driver_query_group_info *info) { - struct r600_perfcounters *pc = screen->perfcounters; - struct r600_perfcounter_block *block; + struct si_perfcounters *pc = screen->perfcounters; + struct si_perfcounter_block *block; if (!pc) return 0; @@ -571,7 +561,7 @@ int si_get_perfcounter_group_info(struct r600_common_screen *screen, return 0; if (!block->group_names) { - if (!r600_init_block_names(screen, block)) + if (!si_init_block_names(screen, block)) return 0; } info->name = block->group_names + index * block->group_name_stride; @@ -580,16 +570,16 @@ int si_get_perfcounter_group_info(struct r600_common_screen *screen, return 1; } -void si_perfcounters_destroy(struct r600_common_screen *rscreen) +void si_perfcounters_destroy(struct si_screen *sscreen) { - if (rscreen->perfcounters) - rscreen->perfcounters->cleanup(rscreen); + if (sscreen->perfcounters) + sscreen->perfcounters->cleanup(sscreen); } -bool si_perfcounters_init(struct r600_perfcounters *pc, +bool si_perfcounters_init(struct si_perfcounters *pc, unsigned num_blocks) { - pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block)); + pc->blocks = CALLOC(num_blocks, sizeof(struct si_perfcounter_block)); if (!pc->blocks) return false; @@ -599,15 +589,15 @@ bool si_perfcounters_init(struct r600_perfcounters *pc, return true; } -void si_perfcounters_add_block(struct r600_common_screen *rscreen, - struct r600_perfcounters *pc, +void si_perfcounters_add_block(struct si_screen *sscreen, + struct si_perfcounters *pc, const char *name, unsigned flags, unsigned counters, unsigned selectors, unsigned instances, void *data) { - struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks]; + struct si_perfcounter_block *block = &pc->blocks[pc->num_blocks]; - assert(counters <= R600_QUERY_MAX_COUNTERS); + assert(counters <= SI_QUERY_MAX_COUNTERS); block->basename = name; block->flags = flags; @@ -616,27 +606,27 @@ void si_perfcounters_add_block(struct r600_common_screen *rscreen, block->num_instances = MAX2(instances, 1); block->data = data; - if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE)) - block->flags |= R600_PC_BLOCK_SE_GROUPS; + if (pc->separate_se && (block->flags & SI_PC_BLOCK_SE)) + block->flags |= SI_PC_BLOCK_SE_GROUPS; if (pc->separate_instance && block->num_instances > 1) - block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS; + block->flags |= SI_PC_BLOCK_INSTANCE_GROUPS; - if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { + if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) { block->num_groups = block->num_instances; } else { block->num_groups = 1; } - if (block->flags & R600_PC_BLOCK_SE_GROUPS) - block->num_groups *= rscreen->info.max_se; - if (block->flags & R600_PC_BLOCK_SHADER) + if (block->flags & SI_PC_BLOCK_SE_GROUPS) + block->num_groups *= sscreen->info.max_se; + if (block->flags & SI_PC_BLOCK_SHADER) block->num_groups *= pc->num_shader_types; ++pc->num_blocks; pc->num_groups += block->num_groups; } -void si_perfcounters_do_destroy(struct r600_perfcounters *pc) +void si_perfcounters_do_destroy(struct si_perfcounters *pc) { unsigned i; diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c deleted file mode 100644 index e5a31bbba..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c +++ /dev/null @@ -1,1498 +0,0 @@ -/* - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Marek Olšák <maraeo@gmail.com> - * - */ - -#include "r600_pipe_common.h" -#include "r600_cs.h" -#include "tgsi/tgsi_parse.h" -#include "util/list.h" -#include "util/u_draw_quad.h" -#include "util/u_memory.h" -#include "util/u_format_s3tc.h" -#include "util/u_upload_mgr.h" -#include "os/os_time.h" -#include "vl/vl_decoder.h" -#include "vl/vl_video_buffer.h" -#include "radeon/radeon_video.h" -#include "amd/common/sid.h" -#include <inttypes.h> -#include <sys/utsname.h> -#include <libsync.h> - -#include <llvm-c/TargetMachine.h> - - -struct r600_multi_fence { - struct pipe_reference reference; - struct pipe_fence_handle *gfx; - struct pipe_fence_handle *sdma; - - /* If the context wasn't flushed at fence creation, this is non-NULL. */ - struct { - struct r600_common_context *ctx; - unsigned ib_index; - } gfx_unflushed; -}; - -/* - * shader binary helpers. - */ -void si_radeon_shader_binary_init(struct ac_shader_binary *b) -{ - memset(b, 0, sizeof(*b)); -} - -void si_radeon_shader_binary_clean(struct ac_shader_binary *b) -{ - if (!b) - return; - FREE(b->code); - FREE(b->config); - FREE(b->rodata); - FREE(b->global_symbol_offsets); - FREE(b->relocs); - FREE(b->disasm_string); - FREE(b->llvm_ir_string); -} - -/* - * pipe_context - */ - -/** - * Write an EOP event. - * - * \param event EVENT_TYPE_* - * \param event_flags Optional cache flush flags (TC) - * \param data_sel 1 = fence, 3 = timestamp - * \param buf Buffer - * \param va GPU address - * \param old_value Previous fence value (for a bug workaround) - * \param new_value Fence value to write for this event. - */ -void si_gfx_write_event_eop(struct r600_common_context *ctx, - unsigned event, unsigned event_flags, - unsigned data_sel, - struct r600_resource *buf, uint64_t va, - uint32_t new_fence, unsigned query_type) -{ - struct radeon_winsys_cs *cs = ctx->gfx.cs; - unsigned op = EVENT_TYPE(event) | - EVENT_INDEX(5) | - event_flags; - unsigned sel = EOP_DATA_SEL(data_sel); - - /* Wait for write confirmation before writing data, but don't send - * an interrupt. */ - if (data_sel != EOP_DATA_SEL_DISCARD) - sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM); - - if (ctx->chip_class >= GFX9) { - /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion - * counters) must immediately precede every timestamp event to - * prevent a GPU hang on GFX9. - * - * Occlusion queries don't need to do it here, because they - * always do ZPASS_DONE before the timestamp. - */ - if (ctx->chip_class == GFX9 && - query_type != PIPE_QUERY_OCCLUSION_COUNTER && - query_type != PIPE_QUERY_OCCLUSION_PREDICATE && - query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - struct r600_resource *scratch = ctx->eop_bug_scratch; - - assert(16 * ctx->screen->info.num_render_backends <= - scratch->b.b.width0); - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); - radeon_emit(cs, scratch->gpu_address); - radeon_emit(cs, scratch->gpu_address >> 32); - - radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - } - - radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0)); - radeon_emit(cs, op); - radeon_emit(cs, sel); - radeon_emit(cs, va); /* address lo */ - radeon_emit(cs, va >> 32); /* address hi */ - radeon_emit(cs, new_fence); /* immediate data lo */ - radeon_emit(cs, 0); /* immediate data hi */ - radeon_emit(cs, 0); /* unused */ - } else { - if (ctx->chip_class == CIK || - ctx->chip_class == VI) { - struct r600_resource *scratch = ctx->eop_bug_scratch; - uint64_t va = scratch->gpu_address; - - /* Two EOP events are required to make all engines go idle - * (and optional cache flushes executed) before the timestamp - * is written. - */ - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, op); - radeon_emit(cs, va); - radeon_emit(cs, ((va >> 32) & 0xffff) | sel); - radeon_emit(cs, 0); /* immediate data */ - radeon_emit(cs, 0); /* unused */ - - radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - } - - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, op); - radeon_emit(cs, va); - radeon_emit(cs, ((va >> 32) & 0xffff) | sel); - radeon_emit(cs, new_fence); /* immediate data */ - radeon_emit(cs, 0); /* unused */ - } - - if (buf) { - radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, - RADEON_PRIO_QUERY); - } -} - -unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen) -{ - unsigned dwords = 6; - - if (screen->chip_class == CIK || - screen->chip_class == VI) - dwords *= 2; - - if (!screen->info.has_virtual_memory) - dwords += 2; - - return dwords; -} - -void si_gfx_wait_fence(struct r600_common_context *ctx, - uint64_t va, uint32_t ref, uint32_t mask) -{ - struct radeon_winsys_cs *cs = ctx->gfx.cs; - - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, ref); /* reference value */ - radeon_emit(cs, mask); /* mask */ - radeon_emit(cs, 4); /* poll interval */ -} - -static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) -{ - struct radeon_winsys_cs *cs = rctx->dma.cs; - - /* NOP waits for idle on Evergreen and later. */ - if (rctx->chip_class >= CIK) - radeon_emit(cs, 0x00000000); /* NOP */ - else - radeon_emit(cs, 0xf0000000); /* NOP */ -} - -void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, - struct r600_resource *dst, struct r600_resource *src) -{ - uint64_t vram = ctx->dma.cs->used_vram; - uint64_t gtt = ctx->dma.cs->used_gart; - - if (dst) { - vram += dst->vram_usage; - gtt += dst->gart_usage; - } - if (src) { - vram += src->vram_usage; - gtt += src->gart_usage; - } - - /* Flush the GFX IB if DMA depends on it. */ - if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && - ((dst && - ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf, - RADEON_USAGE_READWRITE)) || - (src && - ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf, - RADEON_USAGE_WRITE)))) - ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); - - /* Flush if there's not enough space, or if the memory usage per IB - * is too large. - * - * IBs using too little memory are limited by the IB submission overhead. - * IBs using too much memory are limited by the kernel/TTM overhead. - * Too long IBs create CPU-GPU pipeline bubbles and add latency. - * - * This heuristic makes sure that DMA requests are executed - * very soon after the call is made and lowers memory usage. - * It improves texture upload performance by keeping the DMA - * engine busy while uploads are being submitted. - */ - num_dw++; /* for emit_wait_idle below */ - if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) || - ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 || - !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) { - ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); - assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw); - } - - /* Wait for idle if either buffer has been used in the IB before to - * prevent read-after-write hazards. - */ - if ((dst && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf, - RADEON_USAGE_READWRITE)) || - (src && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf, - RADEON_USAGE_WRITE))) - r600_dma_emit_wait_idle(ctx); - - /* If GPUVM is not supported, the CS checker needs 2 entries - * in the buffer list per packet, which has to be done manually. - */ - if (ctx->screen->info.has_virtual_memory) { - if (dst) - radeon_add_to_buffer_list(ctx, &ctx->dma, dst, - RADEON_USAGE_WRITE, - RADEON_PRIO_SDMA_BUFFER); - if (src) - radeon_add_to_buffer_list(ctx, &ctx->dma, src, - RADEON_USAGE_READ, - RADEON_PRIO_SDMA_BUFFER); - } - - /* this function is called before all DMA calls, so increment this. */ - ctx->num_dma_calls++; -} - -static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) -{ -} - -void si_preflush_suspend_features(struct r600_common_context *ctx) -{ - /* suspend queries */ - if (!LIST_IS_EMPTY(&ctx->active_queries)) - si_suspend_queries(ctx); -} - -void si_postflush_resume_features(struct r600_common_context *ctx) -{ - /* resume queries */ - if (!LIST_IS_EMPTY(&ctx->active_queries)) - si_resume_queries(ctx); -} - -static void r600_add_fence_dependency(struct r600_common_context *rctx, - struct pipe_fence_handle *fence) -{ - struct radeon_winsys *ws = rctx->ws; - - if (rctx->dma.cs) - ws->cs_add_fence_dependency(rctx->dma.cs, fence); - ws->cs_add_fence_dependency(rctx->gfx.cs, fence); -} - -static void r600_fence_server_sync(struct pipe_context *ctx, - struct pipe_fence_handle *fence) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; - - /* Only amdgpu needs to handle fence dependencies (for fence imports). - * radeon synchronizes all rings by default and will not implement - * fence imports. - */ - if (rctx->screen->info.drm_major == 2) - return; - - /* Only imported fences need to be handled by fence_server_sync, - * because the winsys handles synchronizations automatically for BOs - * within the process. - * - * Simply skip unflushed fences here, and the winsys will drop no-op - * dependencies (i.e. dependencies within the same ring). - */ - if (rfence->gfx_unflushed.ctx) - return; - - /* All unflushed commands will not start execution before - * this fence dependency is signalled. - * - * Should we flush the context to allow more GPU parallelism? - */ - if (rfence->sdma) - r600_add_fence_dependency(rctx, rfence->sdma); - if (rfence->gfx) - r600_add_fence_dependency(rctx, rfence->gfx); -} - -static void r600_create_fence_fd(struct pipe_context *ctx, - struct pipe_fence_handle **pfence, int fd) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; - struct radeon_winsys *ws = rscreen->ws; - struct r600_multi_fence *rfence; - - *pfence = NULL; - - if (!rscreen->info.has_sync_file) - return; - - rfence = CALLOC_STRUCT(r600_multi_fence); - if (!rfence) - return; - - pipe_reference_init(&rfence->reference, 1); - rfence->gfx = ws->fence_import_sync_file(ws, fd); - if (!rfence->gfx) { - FREE(rfence); - return; - } - - *pfence = (struct pipe_fence_handle*)rfence; -} - -static int r600_fence_get_fd(struct pipe_screen *screen, - struct pipe_fence_handle *fence) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_winsys *ws = rscreen->ws; - struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; - int gfx_fd = -1, sdma_fd = -1; - - if (!rscreen->info.has_sync_file) - return -1; - - /* Deferred fences aren't supported. */ - assert(!rfence->gfx_unflushed.ctx); - if (rfence->gfx_unflushed.ctx) - return -1; - - if (rfence->sdma) { - sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma); - if (sdma_fd == -1) - return -1; - } - if (rfence->gfx) { - gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx); - if (gfx_fd == -1) { - if (sdma_fd != -1) - close(sdma_fd); - return -1; - } - } - - /* If we don't have FDs at this point, it means we don't have fences - * either. */ - if (sdma_fd == -1 && gfx_fd == -1) - return ws->export_signalled_sync_file(ws); - if (sdma_fd == -1) - return gfx_fd; - if (gfx_fd == -1) - return sdma_fd; - - /* Get a fence that will be a combination of both fences. */ - sync_accumulate("radeonsi", &gfx_fd, sdma_fd); - close(sdma_fd); - return gfx_fd; -} - -static void r600_flush_from_st(struct pipe_context *ctx, - struct pipe_fence_handle **fence, - unsigned flags) -{ - struct pipe_screen *screen = ctx->screen; - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct radeon_winsys *ws = rctx->ws; - struct pipe_fence_handle *gfx_fence = NULL; - struct pipe_fence_handle *sdma_fence = NULL; - bool deferred_fence = false; - unsigned rflags = RADEON_FLUSH_ASYNC; - - if (flags & PIPE_FLUSH_END_OF_FRAME) - rflags |= RADEON_FLUSH_END_OF_FRAME; - - /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ - if (rctx->dma.cs) - rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); - - if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) { - if (fence) - ws->fence_reference(&gfx_fence, rctx->last_gfx_fence); - if (!(flags & PIPE_FLUSH_DEFERRED)) - ws->cs_sync_flush(rctx->gfx.cs); - } else { - /* Instead of flushing, create a deferred fence. Constraints: - * - The state tracker must allow a deferred flush. - * - The state tracker must request a fence. - * - fence_get_fd is not allowed. - * Thread safety in fence_finish must be ensured by the state tracker. - */ - if (flags & PIPE_FLUSH_DEFERRED && - !(flags & PIPE_FLUSH_FENCE_FD) && - fence) { - gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs); - deferred_fence = true; - } else { - rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); - } - } - - /* Both engines can signal out of order, so we need to keep both fences. */ - if (fence) { - struct r600_multi_fence *multi_fence = - CALLOC_STRUCT(r600_multi_fence); - if (!multi_fence) { - ws->fence_reference(&sdma_fence, NULL); - ws->fence_reference(&gfx_fence, NULL); - goto finish; - } - - multi_fence->reference.count = 1; - /* If both fences are NULL, fence_finish will always return true. */ - multi_fence->gfx = gfx_fence; - multi_fence->sdma = sdma_fence; - - if (deferred_fence) { - multi_fence->gfx_unflushed.ctx = rctx; - multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; - } - - screen->fence_reference(screen, fence, NULL); - *fence = (struct pipe_fence_handle*)multi_fence; - } -finish: - if (!(flags & PIPE_FLUSH_DEFERRED)) { - if (rctx->dma.cs) - ws->cs_sync_flush(rctx->dma.cs); - ws->cs_sync_flush(rctx->gfx.cs); - } -} - -static void r600_flush_dma_ring(void *ctx, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct radeon_winsys_cs *cs = rctx->dma.cs; - struct radeon_saved_cs saved; - bool check_vm = - (rctx->screen->debug_flags & DBG(CHECK_VM)) && - rctx->check_vm_faults; - - if (!radeon_emitted(cs, 0)) { - if (fence) - rctx->ws->fence_reference(fence, rctx->last_sdma_fence); - return; - } - - if (check_vm) - si_save_cs(rctx->ws, cs, &saved, true); - - rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence); - if (fence) - rctx->ws->fence_reference(fence, rctx->last_sdma_fence); - - if (check_vm) { - /* Use conservative timeout 800ms, after which we won't wait any - * longer and assume the GPU is hung. - */ - rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000); - - rctx->check_vm_faults(rctx, &saved, RING_DMA); - si_clear_saved_cs(&saved); - } -} - -/** - * Store a linearized copy of all chunks of \p cs together with the buffer - * list in \p saved. - */ -void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, - struct radeon_saved_cs *saved, bool get_buffer_list) -{ - uint32_t *buf; - unsigned i; - - /* Save the IB chunks. */ - saved->num_dw = cs->prev_dw + cs->current.cdw; - saved->ib = MALLOC(4 * saved->num_dw); - if (!saved->ib) - goto oom; - - buf = saved->ib; - for (i = 0; i < cs->num_prev; ++i) { - memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4); - buf += cs->prev[i].cdw; - } - memcpy(buf, cs->current.buf, cs->current.cdw * 4); - - if (!get_buffer_list) - return; - - /* Save the buffer list. */ - saved->bo_count = ws->cs_get_buffer_list(cs, NULL); - saved->bo_list = CALLOC(saved->bo_count, - sizeof(saved->bo_list[0])); - if (!saved->bo_list) { - FREE(saved->ib); - goto oom; - } - ws->cs_get_buffer_list(cs, saved->bo_list); - - return; - -oom: - fprintf(stderr, "%s: out of memory\n", __func__); - memset(saved, 0, sizeof(*saved)); -} - -void si_clear_saved_cs(struct radeon_saved_cs *saved) -{ - FREE(saved->ib); - FREE(saved->bo_list); - - memset(saved, 0, sizeof(*saved)); -} - -static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - unsigned latest = rctx->ws->query_value(rctx->ws, - RADEON_GPU_RESET_COUNTER); - - if (rctx->gpu_reset_counter == latest) - return PIPE_NO_RESET; - - rctx->gpu_reset_counter = latest; - return PIPE_UNKNOWN_CONTEXT_RESET; -} - -static void r600_set_debug_callback(struct pipe_context *ctx, - const struct pipe_debug_callback *cb) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - - if (cb) - rctx->debug = *cb; - else - memset(&rctx->debug, 0, sizeof(rctx->debug)); -} - -static void r600_set_device_reset_callback(struct pipe_context *ctx, - const struct pipe_device_reset_callback *cb) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - - if (cb) - rctx->device_reset_callback = *cb; - else - memset(&rctx->device_reset_callback, 0, - sizeof(rctx->device_reset_callback)); -} - -bool si_check_device_reset(struct r600_common_context *rctx) -{ - enum pipe_reset_status status; - - if (!rctx->device_reset_callback.reset) - return false; - - if (!rctx->b.get_device_reset_status) - return false; - - status = rctx->b.get_device_reset_status(&rctx->b); - if (status == PIPE_NO_RESET) - return false; - - rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status); - return true; -} - -static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx, - struct pipe_resource *dst, - uint64_t offset, uint64_t size, - unsigned value) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - - rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE); -} - -static bool r600_resource_commit(struct pipe_context *pctx, - struct pipe_resource *resource, - unsigned level, struct pipe_box *box, - bool commit) -{ - struct r600_common_context *ctx = (struct r600_common_context *)pctx; - struct r600_resource *res = r600_resource(resource); - - /* - * Since buffer commitment changes cannot be pipelined, we need to - * (a) flush any pending commands that refer to the buffer we're about - * to change, and - * (b) wait for threaded submit to finish, including those that were - * triggered by some other, earlier operation. - */ - if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && - ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, - res->buf, RADEON_USAGE_READWRITE)) { - ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); - } - if (radeon_emitted(ctx->dma.cs, 0) && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, - res->buf, RADEON_USAGE_READWRITE)) { - ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); - } - - ctx->ws->cs_sync_flush(ctx->dma.cs); - ctx->ws->cs_sync_flush(ctx->gfx.cs); - - assert(resource->target == PIPE_BUFFER); - - return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit); -} - -bool si_common_context_init(struct r600_common_context *rctx, - struct r600_common_screen *rscreen, - unsigned context_flags) -{ - slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers); - slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers); - - rctx->screen = rscreen; - rctx->ws = rscreen->ws; - rctx->family = rscreen->family; - rctx->chip_class = rscreen->chip_class; - - rctx->b.invalidate_resource = si_invalidate_resource; - rctx->b.resource_commit = r600_resource_commit; - rctx->b.transfer_map = u_transfer_map_vtbl; - rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl; - rctx->b.transfer_unmap = u_transfer_unmap_vtbl; - rctx->b.texture_subdata = u_default_texture_subdata; - rctx->b.memory_barrier = r600_memory_barrier; - rctx->b.flush = r600_flush_from_st; - rctx->b.set_debug_callback = r600_set_debug_callback; - rctx->b.create_fence_fd = r600_create_fence_fd; - rctx->b.fence_server_sync = r600_fence_server_sync; - rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback; - rctx->b.buffer_subdata = si_buffer_subdata; - - if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) { - rctx->b.get_device_reset_status = r600_get_reset_status; - rctx->gpu_reset_counter = - rctx->ws->query_value(rctx->ws, - RADEON_GPU_RESET_COUNTER); - } - - rctx->b.set_device_reset_callback = r600_set_device_reset_callback; - - si_init_context_texture_functions(rctx); - si_init_query_functions(rctx); - - if (rctx->chip_class == CIK || - rctx->chip_class == VI || - rctx->chip_class == GFX9) { - rctx->eop_bug_scratch = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, - 16 * rscreen->info.num_render_backends); - if (!rctx->eop_bug_scratch) - return false; - } - - rctx->allocator_zeroed_memory = - u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, - 0, PIPE_USAGE_DEFAULT, 0, true); - if (!rctx->allocator_zeroed_memory) - return false; - - rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024, - 0, PIPE_USAGE_STREAM); - if (!rctx->b.stream_uploader) - return false; - - rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024, - 0, PIPE_USAGE_DEFAULT); - if (!rctx->b.const_uploader) - return false; - - rctx->ctx = rctx->ws->ctx_create(rctx->ws); - if (!rctx->ctx) - return false; - - if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) { - rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, - r600_flush_dma_ring, - rctx); - rctx->dma.flush = r600_flush_dma_ring; - } - - return true; -} - -void si_common_context_cleanup(struct r600_common_context *rctx) -{ - unsigned i,j; - - /* Release DCC stats. */ - for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) { - assert(!rctx->dcc_stats[i].query_active); - - for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++) - if (rctx->dcc_stats[i].ps_stats[j]) - rctx->b.destroy_query(&rctx->b, - rctx->dcc_stats[i].ps_stats[j]); - - r600_texture_reference(&rctx->dcc_stats[i].tex, NULL); - } - - if (rctx->query_result_shader) - rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader); - - if (rctx->gfx.cs) - rctx->ws->cs_destroy(rctx->gfx.cs); - if (rctx->dma.cs) - rctx->ws->cs_destroy(rctx->dma.cs); - if (rctx->ctx) - rctx->ws->ctx_destroy(rctx->ctx); - - if (rctx->b.stream_uploader) - u_upload_destroy(rctx->b.stream_uploader); - if (rctx->b.const_uploader) - u_upload_destroy(rctx->b.const_uploader); - - slab_destroy_child(&rctx->pool_transfers); - slab_destroy_child(&rctx->pool_transfers_unsync); - - if (rctx->allocator_zeroed_memory) { - u_suballocator_destroy(rctx->allocator_zeroed_memory); - } - rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL); - rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL); - r600_resource_reference(&rctx->eop_bug_scratch, NULL); -} - -/* - * pipe_screen - */ - -static const struct debug_named_value common_debug_options[] = { - /* logging */ - { "tex", DBG(TEX), "Print texture info" }, - { "nir", DBG(NIR), "Enable experimental NIR shaders" }, - { "compute", DBG(COMPUTE), "Print compute info" }, - { "vm", DBG(VM), "Print virtual addresses when creating resources" }, - { "info", DBG(INFO), "Print driver information" }, - - /* shaders */ - { "vs", DBG(VS), "Print vertex shaders" }, - { "gs", DBG(GS), "Print geometry shaders" }, - { "ps", DBG(PS), "Print pixel shaders" }, - { "cs", DBG(CS), "Print compute shaders" }, - { "tcs", DBG(TCS), "Print tessellation control shaders" }, - { "tes", DBG(TES), "Print tessellation evaluation shaders" }, - { "noir", DBG(NO_IR), "Don't print the LLVM IR"}, - { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"}, - { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"}, - { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" }, - { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" }, - { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." }, - - { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." }, - { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." }, - { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." }, - { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." }, - - /* features */ - { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" }, - { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" }, - /* GL uses the word INVALIDATE, gallium uses the word DISCARD */ - { "noinvalrange", DBG(NO_DISCARD_RANGE), "Disable handling of INVALIDATE_RANGE map flags" }, - { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" }, - { "notiling", DBG(NO_TILING), "Disable tiling" }, - { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." }, - { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." }, - { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." }, - { "nowc", DBG(NO_WC), "Disable GTT write combining" }, - { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." }, - { "nodcc", DBG(NO_DCC), "Disable DCC." }, - { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." }, - { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." }, - { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." }, - { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" }, - { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" }, - { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" }, - { "nodpbb", DBG(NO_DPBB), "Disable DPBB." }, - { "nodfsm", DBG(NO_DFSM), "Disable DFSM." }, - { "dpbb", DBG(DPBB), "Enable DPBB." }, - { "dfsm", DBG(DFSM), "Enable DFSM." }, - { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" }, - - DEBUG_NAMED_VALUE_END /* must be last */ -}; - -static const char* r600_get_vendor(struct pipe_screen* pscreen) -{ - return "X.Org"; -} - -static const char* r600_get_device_vendor(struct pipe_screen* pscreen) -{ - return "AMD"; -} - -static const char *r600_get_marketing_name(struct radeon_winsys *ws) -{ - if (!ws->get_chip_name) - return NULL; - return ws->get_chip_name(ws); -} - -static const char *r600_get_family_name(const struct r600_common_screen *rscreen) -{ - switch (rscreen->info.family) { - case CHIP_TAHITI: return "AMD TAHITI"; - case CHIP_PITCAIRN: return "AMD PITCAIRN"; - case CHIP_VERDE: return "AMD CAPE VERDE"; - case CHIP_OLAND: return "AMD OLAND"; - case CHIP_HAINAN: return "AMD HAINAN"; - case CHIP_BONAIRE: return "AMD BONAIRE"; - case CHIP_KAVERI: return "AMD KAVERI"; - case CHIP_KABINI: return "AMD KABINI"; - case CHIP_HAWAII: return "AMD HAWAII"; - case CHIP_MULLINS: return "AMD MULLINS"; - case CHIP_TONGA: return "AMD TONGA"; - case CHIP_ICELAND: return "AMD ICELAND"; - case CHIP_CARRIZO: return "AMD CARRIZO"; - case CHIP_FIJI: return "AMD FIJI"; - case CHIP_POLARIS10: return "AMD POLARIS10"; - case CHIP_POLARIS11: return "AMD POLARIS11"; - case CHIP_POLARIS12: return "AMD POLARIS12"; - case CHIP_STONEY: return "AMD STONEY"; - case CHIP_VEGA10: return "AMD VEGA10"; - case CHIP_RAVEN: return "AMD RAVEN"; - default: return "AMD unknown"; - } -} - -static void r600_disk_cache_create(struct r600_common_screen *rscreen) -{ - /* Don't use the cache if shader dumping is enabled. */ - if (rscreen->debug_flags & DBG_ALL_SHADERS) - return; - - uint32_t mesa_timestamp; - if (disk_cache_get_function_timestamp(r600_disk_cache_create, - &mesa_timestamp)) { - char *timestamp_str; - int res = -1; - uint32_t llvm_timestamp; - - if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, - &llvm_timestamp)) { - res = asprintf(×tamp_str, "%u_%u", - mesa_timestamp, llvm_timestamp); - } - - if (res != -1) { - /* These flags affect shader compilation. */ - uint64_t shader_debug_flags = - rscreen->debug_flags & - (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | - DBG(SI_SCHED) | - DBG(UNSAFE_MATH)); - - rscreen->disk_shader_cache = - disk_cache_create(r600_get_family_name(rscreen), - timestamp_str, - shader_debug_flags); - free(timestamp_str); - } - } -} - -static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; - return rscreen->disk_shader_cache; -} - -static const char* r600_get_name(struct pipe_screen* pscreen) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; - - return rscreen->renderer_string; -} - -static float r600_get_paramf(struct pipe_screen* pscreen, - enum pipe_capf param) -{ - switch (param) { - case PIPE_CAPF_MAX_LINE_WIDTH: - case PIPE_CAPF_MAX_LINE_WIDTH_AA: - case PIPE_CAPF_MAX_POINT_WIDTH: - case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - case PIPE_CAPF_GUARD_BAND_LEFT: - case PIPE_CAPF_GUARD_BAND_TOP: - case PIPE_CAPF_GUARD_BAND_RIGHT: - case PIPE_CAPF_GUARD_BAND_BOTTOM: - return 0.0f; - } - return 0.0f; -} - -static int r600_get_video_param(struct pipe_screen *screen, - enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint, - enum pipe_video_cap param) -{ - switch (param) { - case PIPE_VIDEO_CAP_SUPPORTED: - return vl_profile_supported(screen, profile, entrypoint); - case PIPE_VIDEO_CAP_NPOT_TEXTURES: - return 1; - case PIPE_VIDEO_CAP_MAX_WIDTH: - case PIPE_VIDEO_CAP_MAX_HEIGHT: - return vl_video_buffer_max_size(screen); - case PIPE_VIDEO_CAP_PREFERED_FORMAT: - return PIPE_FORMAT_NV12; - case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - return false; - case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: - return false; - case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: - return true; - case PIPE_VIDEO_CAP_MAX_LEVEL: - return vl_level_supported(screen, profile); - default: - return 0; - } -} - -const char *si_get_llvm_processor_name(enum radeon_family family) -{ - switch (family) { - case CHIP_TAHITI: return "tahiti"; - case CHIP_PITCAIRN: return "pitcairn"; - case CHIP_VERDE: return "verde"; - case CHIP_OLAND: return "oland"; - case CHIP_HAINAN: return "hainan"; - case CHIP_BONAIRE: return "bonaire"; - case CHIP_KABINI: return "kabini"; - case CHIP_KAVERI: return "kaveri"; - case CHIP_HAWAII: return "hawaii"; - case CHIP_MULLINS: - return "mullins"; - case CHIP_TONGA: return "tonga"; - case CHIP_ICELAND: return "iceland"; - case CHIP_CARRIZO: return "carrizo"; - case CHIP_FIJI: - return "fiji"; - case CHIP_STONEY: - return "stoney"; - case CHIP_POLARIS10: - return "polaris10"; - case CHIP_POLARIS11: - case CHIP_POLARIS12: /* same as polaris11 */ - return "polaris11"; - case CHIP_VEGA10: - case CHIP_RAVEN: - return "gfx900"; - default: - return ""; - } -} - -static unsigned get_max_threads_per_block(struct r600_common_screen *screen, - enum pipe_shader_ir ir_type) -{ - if (ir_type != PIPE_SHADER_IR_TGSI) - return 256; - - /* Only 16 waves per thread-group on gfx9. */ - if (screen->chip_class >= GFX9) - return 1024; - - /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice - * round number. - */ - return 2048; -} - -static int r600_get_compute_param(struct pipe_screen *screen, - enum pipe_shader_ir ir_type, - enum pipe_compute_cap param, - void *ret) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; - - //TODO: select these params by asic - switch (param) { - case PIPE_COMPUTE_CAP_IR_TARGET: { - const char *gpu; - const char *triple; - - if (HAVE_LLVM < 0x0400) - triple = "amdgcn--"; - else - triple = "amdgcn-mesa-mesa3d"; - - gpu = si_get_llvm_processor_name(rscreen->family); - if (ret) { - sprintf(ret, "%s-%s", gpu, triple); - } - /* +2 for dash and terminating NIL byte */ - return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); - } - case PIPE_COMPUTE_CAP_GRID_DIMENSION: - if (ret) { - uint64_t *grid_dimension = ret; - grid_dimension[0] = 3; - } - return 1 * sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - if (ret) { - uint64_t *grid_size = ret; - grid_size[0] = 65535; - grid_size[1] = 65535; - grid_size[2] = 65535; - } - return 3 * sizeof(uint64_t) ; - - case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - if (ret) { - uint64_t *block_size = ret; - unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type); - block_size[0] = threads_per_block; - block_size[1] = threads_per_block; - block_size[2] = threads_per_block; - } - return 3 * sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - if (ret) { - uint64_t *max_threads_per_block = ret; - *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type); - } - return sizeof(uint64_t); - case PIPE_COMPUTE_CAP_ADDRESS_BITS: - if (ret) { - uint32_t *address_bits = ret; - address_bits[0] = 64; - } - return 1 * sizeof(uint32_t); - - case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: - if (ret) { - uint64_t *max_global_size = ret; - uint64_t max_mem_alloc_size; - - r600_get_compute_param(screen, ir_type, - PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, - &max_mem_alloc_size); - - /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least - * 1/4 of the MAX_GLOBAL_SIZE. Since the - * MAX_MEM_ALLOC_SIZE is fixed for older kernels, - * make sure we never report more than - * 4 * MAX_MEM_ALLOC_SIZE. - */ - *max_global_size = MIN2(4 * max_mem_alloc_size, - MAX2(rscreen->info.gart_size, - rscreen->info.vram_size)); - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: - if (ret) { - uint64_t *max_local_size = ret; - /* Value reported by the closed source driver. */ - *max_local_size = 32768; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: - if (ret) { - uint64_t *max_input_size = ret; - /* Value reported by the closed source driver. */ - *max_input_size = 1024; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: - if (ret) { - uint64_t *max_mem_alloc_size = ret; - - *max_mem_alloc_size = rscreen->info.max_alloc_size; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: - if (ret) { - uint32_t *max_clock_frequency = ret; - *max_clock_frequency = rscreen->info.max_shader_clock; - } - return sizeof(uint32_t); - - case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: - if (ret) { - uint32_t *max_compute_units = ret; - *max_compute_units = rscreen->info.num_good_compute_units; - } - return sizeof(uint32_t); - - case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: - if (ret) { - uint32_t *images_supported = ret; - *images_supported = 0; - } - return sizeof(uint32_t); - case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: - break; /* unused */ - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: - if (ret) { - uint32_t *subgroup_size = ret; - *subgroup_size = 64; - } - return sizeof(uint32_t); - case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: - if (ret) { - uint64_t *max_variable_threads_per_block = ret; - if (ir_type == PIPE_SHADER_IR_TGSI) - *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; - else - *max_variable_threads_per_block = 0; - } - return sizeof(uint64_t); - } - - fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); - return 0; -} - -static uint64_t r600_get_timestamp(struct pipe_screen *screen) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - - return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) / - rscreen->info.clock_crystal_freq; -} - -static void r600_fence_reference(struct pipe_screen *screen, - struct pipe_fence_handle **dst, - struct pipe_fence_handle *src) -{ - struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; - struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst; - struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src; - - if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { - ws->fence_reference(&(*rdst)->gfx, NULL); - ws->fence_reference(&(*rdst)->sdma, NULL); - FREE(*rdst); - } - *rdst = rsrc; -} - -static boolean r600_fence_finish(struct pipe_screen *screen, - struct pipe_context *ctx, - struct pipe_fence_handle *fence, - uint64_t timeout) -{ - struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; - struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; - struct r600_common_context *rctx; - int64_t abs_timeout = os_time_get_absolute_timeout(timeout); - - ctx = threaded_context_unwrap_sync(ctx); - rctx = ctx ? (struct r600_common_context*)ctx : NULL; - - if (rfence->sdma) { - if (!rws->fence_wait(rws, rfence->sdma, timeout)) - return false; - - /* Recompute the timeout after waiting. */ - if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { - int64_t time = os_time_get_nano(); - timeout = abs_timeout > time ? abs_timeout - time : 0; - } - } - - if (!rfence->gfx) - return true; - - /* Flush the gfx IB if it hasn't been flushed yet. */ - if (rctx && - rfence->gfx_unflushed.ctx == rctx && - rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { - rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); - rfence->gfx_unflushed.ctx = NULL; - - if (!timeout) - return false; - - /* Recompute the timeout after all that. */ - if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { - int64_t time = os_time_get_nano(); - timeout = abs_timeout > time ? abs_timeout - time : 0; - } - } - - return rws->fence_wait(rws, rfence->gfx, timeout); -} - -static void r600_query_memory_info(struct pipe_screen *screen, - struct pipe_memory_info *info) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_winsys *ws = rscreen->ws; - unsigned vram_usage, gtt_usage; - - info->total_device_memory = rscreen->info.vram_size / 1024; - info->total_staging_memory = rscreen->info.gart_size / 1024; - - /* The real TTM memory usage is somewhat random, because: - * - * 1) TTM delays freeing memory, because it can only free it after - * fences expire. - * - * 2) The memory usage can be really low if big VRAM evictions are - * taking place, but the real usage is well above the size of VRAM. - * - * Instead, return statistics of this process. - */ - vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; - gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; - - info->avail_device_memory = - vram_usage <= info->total_device_memory ? - info->total_device_memory - vram_usage : 0; - info->avail_staging_memory = - gtt_usage <= info->total_staging_memory ? - info->total_staging_memory - gtt_usage : 0; - - info->device_memory_evicted = - ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; - - if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) - info->nr_device_memory_evictions = - ws->query_value(ws, RADEON_NUM_EVICTIONS); - else - /* Just return the number of evicted 64KB pages. */ - info->nr_device_memory_evictions = info->device_memory_evicted / 64; -} - -struct pipe_resource *si_resource_create_common(struct pipe_screen *screen, - const struct pipe_resource *templ) -{ - if (templ->target == PIPE_BUFFER) { - return si_buffer_create(screen, templ, 256); - } else { - return si_texture_create(screen, templ); - } -} - -bool si_common_screen_init(struct r600_common_screen *rscreen, - struct radeon_winsys *ws) -{ - char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {}; - struct utsname uname_data; - const char *chip_name; - - ws->query_info(ws, &rscreen->info); - rscreen->ws = ws; - - if ((chip_name = r600_get_marketing_name(ws))) - snprintf(family_name, sizeof(family_name), "%s / ", - r600_get_family_name(rscreen) + 4); - else - chip_name = r600_get_family_name(rscreen); - - if (uname(&uname_data) == 0) - snprintf(kernel_version, sizeof(kernel_version), - " / %s", uname_data.release); - - if (HAVE_LLVM > 0) { - snprintf(llvm_string, sizeof(llvm_string), - ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, - HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); - } - - snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), - "%s (%sDRM %i.%i.%i%s%s)", - chip_name, family_name, rscreen->info.drm_major, - rscreen->info.drm_minor, rscreen->info.drm_patchlevel, - kernel_version, llvm_string); - - rscreen->b.get_name = r600_get_name; - rscreen->b.get_vendor = r600_get_vendor; - rscreen->b.get_device_vendor = r600_get_device_vendor; - rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache; - rscreen->b.get_compute_param = r600_get_compute_param; - rscreen->b.get_paramf = r600_get_paramf; - rscreen->b.get_timestamp = r600_get_timestamp; - rscreen->b.fence_finish = r600_fence_finish; - rscreen->b.fence_reference = r600_fence_reference; - rscreen->b.resource_destroy = u_resource_destroy_vtbl; - rscreen->b.resource_from_user_memory = si_buffer_from_user_memory; - rscreen->b.query_memory_info = r600_query_memory_info; - rscreen->b.fence_get_fd = r600_fence_get_fd; - - if (rscreen->info.has_hw_decode) { - rscreen->b.get_video_param = si_vid_get_video_param; - rscreen->b.is_video_format_supported = si_vid_is_format_supported; - } else { - rscreen->b.get_video_param = r600_get_video_param; - rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; - } - - si_init_screen_texture_functions(rscreen); - si_init_screen_query_functions(rscreen); - - rscreen->family = rscreen->info.family; - rscreen->chip_class = rscreen->info.chip_class; - rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0); - rscreen->has_rbplus = false; - rscreen->rbplus_allowed = false; - - r600_disk_cache_create(rscreen); - - slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64); - - rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); - if (rscreen->force_aniso >= 0) { - printf("radeon: Forcing anisotropy filter to %ix\n", - /* round down to a power of two */ - 1 << util_logbase2(rscreen->force_aniso)); - } - - (void) mtx_init(&rscreen->aux_context_lock, mtx_plain); - (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain); - - if (rscreen->debug_flags & DBG(INFO)) { - printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", - rscreen->info.pci_domain, rscreen->info.pci_bus, - rscreen->info.pci_dev, rscreen->info.pci_func); - printf("pci_id = 0x%x\n", rscreen->info.pci_id); - printf("family = %i (%s)\n", rscreen->info.family, - r600_get_family_name(rscreen)); - printf("chip_class = %i\n", rscreen->info.chip_class); - printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size); - printf("gart_page_size = %u\n", rscreen->info.gart_page_size); - printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); - printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); - printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024)); - printf("max_alloc_size = %i MB\n", - (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); - printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size); - printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram); - printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); - printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); - printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode); - printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings); - printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings); - printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version); - printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version); - printf("me_fw_version = %i\n", rscreen->info.me_fw_version); - printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature); - printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version); - printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature); - printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version); - printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature); - printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config); - printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq); - printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size); - printf("drm = %i.%i.%i\n", rscreen->info.drm_major, - rscreen->info.drm_minor, rscreen->info.drm_patchlevel); - printf("has_userptr = %i\n", rscreen->info.has_userptr); - printf("has_syncobj = %u\n", rscreen->info.has_syncobj); - printf("has_sync_file = %u\n", rscreen->info.has_sync_file); - - printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); - printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); - printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); - printf("max_se = %i\n", rscreen->info.max_se); - printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); - - printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map); - printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid); - printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); - printf("num_render_backends = %i\n", rscreen->info.num_render_backends); - printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); - printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); - printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask); - printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment); - } - return true; -} - -void si_destroy_common_screen(struct r600_common_screen *rscreen) -{ - si_perfcounters_destroy(rscreen); - si_gpu_load_kill_thread(rscreen); - - mtx_destroy(&rscreen->gpu_load_mutex); - mtx_destroy(&rscreen->aux_context_lock); - rscreen->aux_context->destroy(rscreen->aux_context); - - slab_destroy_parent(&rscreen->pool_transfers); - - disk_cache_destroy(rscreen->disk_shader_cache); - rscreen->ws->destroy(rscreen->ws); - FREE(rscreen); -} - -bool si_can_dump_shader(struct r600_common_screen *rscreen, - unsigned processor) -{ - return rscreen->debug_flags & (1 << processor); -} - -bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor) -{ - return (rscreen->debug_flags & DBG(CHECK_IR)) || - si_can_dump_shader(rscreen, processor); -} - -void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, - uint64_t offset, uint64_t size, unsigned value) -{ - struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context; - - mtx_lock(&rscreen->aux_context_lock); - rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value); - rscreen->aux_context->flush(rscreen->aux_context, NULL, 0); - mtx_unlock(&rscreen->aux_context_lock); -} diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h deleted file mode 100644 index a7c91cb8a..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h +++ /dev/null @@ -1,913 +0,0 @@ -/* - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Marek Olšák <maraeo@gmail.com> - * - */ - -/** - * This file contains common screen and context structures and functions - * for r600g and radeonsi. - */ - -#ifndef R600_PIPE_COMMON_H -#define R600_PIPE_COMMON_H - -#include <stdio.h> - -#include "amd/common/ac_binary.h" - -#include "radeon/radeon_winsys.h" - -#include "util/disk_cache.h" -#include "util/u_blitter.h" -#include "util/list.h" -#include "util/u_range.h" -#include "util/slab.h" -#include "util/u_suballoc.h" -#include "util/u_transfer.h" -#include "util/u_threaded_context.h" - -struct u_log_context; - -#define ATI_VENDOR_ID 0x1002 - -#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) -#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) -#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) -#define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3) -#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4) - -#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) -/* Pipeline & streamout query controls. */ -#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1) -#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2) -#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3) -#define R600_CONTEXT_PRIVATE_FLAG (1u << 4) - -/* special primitive types */ -#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX - -#define R600_NOT_QUERY 0xffffffff - -/* Debug flags. */ -enum { - /* Shader logging options: */ - DBG_VS = PIPE_SHADER_VERTEX, - DBG_PS = PIPE_SHADER_FRAGMENT, - DBG_GS = PIPE_SHADER_GEOMETRY, - DBG_TCS = PIPE_SHADER_TESS_CTRL, - DBG_TES = PIPE_SHADER_TESS_EVAL, - DBG_CS = PIPE_SHADER_COMPUTE, - DBG_NO_IR, - DBG_NO_TGSI, - DBG_NO_ASM, - DBG_PREOPT_IR, - - /* Shader compiler options the shader cache should be aware of: */ - DBG_FS_CORRECT_DERIVS_AFTER_KILL, - DBG_UNSAFE_MATH, - DBG_SI_SCHED, - - /* Shader compiler options (with no effect on the shader cache): */ - DBG_CHECK_IR, - DBG_PRECOMPILE, - DBG_NIR, - DBG_MONOLITHIC_SHADERS, - DBG_NO_OPT_VARIANT, - - /* Information logging options: */ - DBG_INFO, - DBG_TEX, - DBG_COMPUTE, - DBG_VM, - - /* Driver options: */ - DBG_FORCE_DMA, - DBG_NO_ASYNC_DMA, - DBG_NO_DISCARD_RANGE, - DBG_NO_WC, - DBG_CHECK_VM, - - /* 3D engine options: */ - DBG_SWITCH_ON_EOP, - DBG_NO_OUT_OF_ORDER, - DBG_NO_DPBB, - DBG_NO_DFSM, - DBG_DPBB, - DBG_DFSM, - DBG_NO_HYPERZ, - DBG_NO_RB_PLUS, - DBG_NO_2D_TILING, - DBG_NO_TILING, - DBG_NO_DCC, - DBG_NO_DCC_CLEAR, - DBG_NO_DCC_FB, - - /* Tests: */ - DBG_TEST_DMA, - DBG_TEST_VMFAULT_CP, - DBG_TEST_VMFAULT_SDMA, - DBG_TEST_VMFAULT_SHADER, -}; - -#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1)) -#define DBG(name) (1ull << DBG_##name) - -#define R600_MAP_BUFFER_ALIGNMENT 64 - -#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024 - -enum r600_coherency { - R600_COHERENCY_NONE, /* no cache flushes needed */ - R600_COHERENCY_SHADER, - R600_COHERENCY_CB_META, -}; - -#ifdef PIPE_ARCH_BIG_ENDIAN -#define R600_BIG_ENDIAN 1 -#else -#define R600_BIG_ENDIAN 0 -#endif - -struct r600_common_context; -struct r600_perfcounters; -struct tgsi_shader_info; -struct r600_qbo_state; - -void si_radeon_shader_binary_init(struct ac_shader_binary *b); -void si_radeon_shader_binary_clean(struct ac_shader_binary *b); - -/* Only 32-bit buffer allocations are supported, gallium doesn't support more - * at the moment. - */ -struct r600_resource { - struct threaded_resource b; - - /* Winsys objects. */ - struct pb_buffer *buf; - uint64_t gpu_address; - /* Memory usage if the buffer placement is optimal. */ - uint64_t vram_usage; - uint64_t gart_usage; - - /* Resource properties. */ - uint64_t bo_size; - unsigned bo_alignment; - enum radeon_bo_domain domains; - enum radeon_bo_flag flags; - unsigned bind_history; - - /* The buffer range which is initialized (with a write transfer, - * streamout, DMA, or as a random access target). The rest of - * the buffer is considered invalid and can be mapped unsynchronized. - * - * This allows unsychronized mapping of a buffer range which hasn't - * been used yet. It's for applications which forget to use - * the unsynchronized map flag and expect the driver to figure it out. - */ - struct util_range valid_buffer_range; - - /* For buffers only. This indicates that a write operation has been - * performed by TC L2, but the cache hasn't been flushed. - * Any hw block which doesn't use or bypasses TC L2 should check this - * flag and flush the cache before using the buffer. - * - * For example, TC L2 must be flushed if a buffer which has been - * modified by a shader store instruction is about to be used as - * an index buffer. The reason is that VGT DMA index fetching doesn't - * use TC L2. - */ - bool TC_L2_dirty; - - /* Whether the resource has been exported via resource_get_handle. */ - unsigned external_usage; /* PIPE_HANDLE_USAGE_* */ - - /* Whether this resource is referenced by bindless handles. */ - bool texture_handle_allocated; - bool image_handle_allocated; -}; - -struct r600_transfer { - struct threaded_transfer b; - struct r600_resource *staging; - unsigned offset; -}; - -struct r600_fmask_info { - uint64_t offset; - uint64_t size; - unsigned alignment; - unsigned pitch_in_pixels; - unsigned bank_height; - unsigned slice_tile_max; - unsigned tile_mode_index; - unsigned tile_swizzle; -}; - -struct r600_cmask_info { - uint64_t offset; - uint64_t size; - unsigned alignment; - unsigned slice_tile_max; - uint64_t base_address_reg; -}; - -struct r600_texture { - struct r600_resource resource; - - uint64_t size; - unsigned num_level0_transfers; - enum pipe_format db_render_format; - bool is_depth; - bool db_compatible; - bool can_sample_z; - bool can_sample_s; - unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */ - unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */ - struct r600_texture *flushed_depth_texture; - struct radeon_surf surface; - - /* Colorbuffer compression and fast clear. */ - struct r600_fmask_info fmask; - struct r600_cmask_info cmask; - struct r600_resource *cmask_buffer; - uint64_t dcc_offset; /* 0 = disabled */ - unsigned cb_color_info; /* fast clear enable bit */ - unsigned color_clear_value[2]; - unsigned last_msaa_resolve_target_micro_mode; - - /* Depth buffer compression and fast clear. */ - uint64_t htile_offset; - bool tc_compatible_htile; - bool depth_cleared; /* if it was cleared at least once */ - float depth_clear_value; - bool stencil_cleared; /* if it was cleared at least once */ - uint8_t stencil_clear_value; - bool upgraded_depth; /* upgraded from unorm to Z32_FLOAT */ - - bool non_disp_tiling; /* R600-Cayman only */ - - /* Whether the texture is a displayable back buffer and needs DCC - * decompression, which is expensive. Therefore, it's enabled only - * if statistics suggest that it will pay off and it's allocated - * separately. It can't be bound as a sampler by apps. Limited to - * target == 2D and last_level == 0. If enabled, dcc_offset contains - * the absolute GPUVM address, not the relative one. - */ - struct r600_resource *dcc_separate_buffer; - /* When DCC is temporarily disabled, the separate buffer is here. */ - struct r600_resource *last_dcc_separate_buffer; - /* We need to track DCC dirtiness, because st/dri usually calls - * flush_resource twice per frame (not a bug) and we don't wanna - * decompress DCC twice. Also, the dirty tracking must be done even - * if DCC isn't used, because it's required by the DCC usage analysis - * for a possible future enablement. - */ - bool separate_dcc_dirty; - /* Statistics gathering for the DCC enablement heuristic. */ - bool dcc_gather_statistics; - /* Estimate of how much this color buffer is written to in units of - * full-screen draws: ps_invocations / (width * height) - * Shader kills, late Z, and blending with trivial discards make it - * inaccurate (we need to count CB updates, not PS invocations). - */ - unsigned ps_draw_ratio; - /* The number of clears since the last DCC usage analysis. */ - unsigned num_slow_clears; - - /* Counter that should be non-zero if the texture is bound to a - * framebuffer. Implemented in radeonsi only. - */ - uint32_t framebuffers_bound; -}; - -struct r600_surface { - struct pipe_surface base; - - /* These can vary with block-compressed textures. */ - unsigned width0; - unsigned height0; - - bool color_initialized; - bool depth_initialized; - - /* Misc. color flags. */ - bool alphatest_bypass; - bool export_16bpc; - bool color_is_int8; - bool color_is_int10; - bool dcc_incompatible; - - /* Color registers. */ - unsigned cb_color_info; - unsigned cb_color_base; - unsigned cb_color_view; - unsigned cb_color_size; /* R600 only */ - unsigned cb_color_dim; /* EG only */ - unsigned cb_color_pitch; /* EG and later */ - unsigned cb_color_slice; /* EG and later */ - unsigned cb_color_attrib; /* EG and later */ - unsigned cb_color_attrib2; /* GFX9 and later */ - unsigned cb_dcc_control; /* VI and later */ - unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ - unsigned cb_color_fmask_slice; /* EG and later */ - unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ - unsigned cb_color_mask; /* R600 only */ - unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */ - unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */ - unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */ - unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */ - struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ - struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ - - /* DB registers. */ - uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */ - uint64_t db_stencil_base; /* EG and later */ - uint64_t db_htile_data_base; - unsigned db_depth_info; /* R600 only, then SI and later */ - unsigned db_z_info; /* EG and later */ - unsigned db_z_info2; /* GFX9+ */ - unsigned db_depth_view; - unsigned db_depth_size; - unsigned db_depth_slice; /* EG and later */ - unsigned db_stencil_info; /* EG and later */ - unsigned db_stencil_info2; /* GFX9+ */ - unsigned db_prefetch_limit; /* R600 only */ - unsigned db_htile_surface; - unsigned db_preload_control; /* EG and later */ -}; - -struct r600_mmio_counter { - unsigned busy; - unsigned idle; -}; - -union r600_mmio_counters { - struct { - /* For global GPU load including SDMA. */ - struct r600_mmio_counter gpu; - - /* GRBM_STATUS */ - struct r600_mmio_counter spi; - struct r600_mmio_counter gui; - struct r600_mmio_counter ta; - struct r600_mmio_counter gds; - struct r600_mmio_counter vgt; - struct r600_mmio_counter ia; - struct r600_mmio_counter sx; - struct r600_mmio_counter wd; - struct r600_mmio_counter bci; - struct r600_mmio_counter sc; - struct r600_mmio_counter pa; - struct r600_mmio_counter db; - struct r600_mmio_counter cp; - struct r600_mmio_counter cb; - - /* SRBM_STATUS2 */ - struct r600_mmio_counter sdma; - - /* CP_STAT */ - struct r600_mmio_counter pfp; - struct r600_mmio_counter meq; - struct r600_mmio_counter me; - struct r600_mmio_counter surf_sync; - struct r600_mmio_counter cp_dma; - struct r600_mmio_counter scratch_ram; - } named; - unsigned array[0]; -}; - -struct r600_memory_object { - struct pipe_memory_object b; - struct pb_buffer *buf; - uint32_t stride; - uint32_t offset; -}; - -struct r600_common_screen { - struct pipe_screen b; - struct radeon_winsys *ws; - enum radeon_family family; - enum chip_class chip_class; - struct radeon_info info; - uint64_t debug_flags; - bool has_cp_dma; - bool has_streamout; - bool has_rbplus; /* if RB+ registers exist */ - bool rbplus_allowed; /* if RB+ is allowed */ - - struct disk_cache *disk_shader_cache; - - struct slab_parent_pool pool_transfers; - - /* Texture filter settings. */ - int force_aniso; /* -1 = disabled */ - - /* Auxiliary context. Mainly used to initialize resources. - * It must be locked prior to using and flushed before unlocking. */ - struct pipe_context *aux_context; - mtx_t aux_context_lock; - - /* This must be in the screen, because UE4 uses one context for - * compilation and another one for rendering. - */ - unsigned num_compilations; - /* Along with ST_DEBUG=precompile, this should show if applications - * are loading shaders on demand. This is a monotonic counter. - */ - unsigned num_shaders_created; - unsigned num_shader_cache_hits; - - /* GPU load thread. */ - mtx_t gpu_load_mutex; - thrd_t gpu_load_thread; - union r600_mmio_counters mmio_counters; - volatile unsigned gpu_load_stop_thread; /* bool */ - - char renderer_string[100]; - - /* Performance counters. */ - struct r600_perfcounters *perfcounters; - - /* If pipe_screen wants to recompute and re-emit the framebuffer, - * sampler, and image states of all contexts, it should atomically - * increment this. - * - * Each context will compare this with its own last known value of - * the counter before drawing and re-emit the states accordingly. - */ - unsigned dirty_tex_counter; - - /* Atomically increment this counter when an existing texture's - * metadata is enabled or disabled in a way that requires changing - * contexts' compressed texture binding masks. - */ - unsigned compressed_colortex_counter; - - struct { - /* Context flags to set so that all writes from earlier jobs - * in the CP are seen by L2 clients. - */ - unsigned cp_to_L2; - - /* Context flags to set so that all writes from earlier jobs - * that end in L2 are seen by CP. - */ - unsigned L2_to_cp; - - /* Context flags to set so that all writes from earlier - * compute jobs are seen by L2 clients. - */ - unsigned compute_to_L2; - } barrier_flags; - - void (*query_opaque_metadata)(struct r600_common_screen *rscreen, - struct r600_texture *rtex, - struct radeon_bo_metadata *md); - - void (*apply_opaque_metadata)(struct r600_common_screen *rscreen, - struct r600_texture *rtex, - struct radeon_bo_metadata *md); -}; - -/* This encapsulates a state or an operation which can emitted into the GPU - * command stream. */ -struct r600_atom { - void (*emit)(struct r600_common_context *ctx, struct r600_atom *state); - unsigned short id; -}; - -struct r600_ring { - struct radeon_winsys_cs *cs; - void (*flush)(void *ctx, unsigned flags, - struct pipe_fence_handle **fence); -}; - -/* Saved CS data for debugging features. */ -struct radeon_saved_cs { - uint32_t *ib; - unsigned num_dw; - - struct radeon_bo_list_item *bo_list; - unsigned bo_count; -}; - -struct r600_common_context { - struct pipe_context b; /* base class */ - - struct r600_common_screen *screen; - struct radeon_winsys *ws; - struct radeon_winsys_ctx *ctx; - enum radeon_family family; - enum chip_class chip_class; - struct r600_ring gfx; - struct r600_ring dma; - struct pipe_fence_handle *last_gfx_fence; - struct pipe_fence_handle *last_sdma_fence; - struct r600_resource *eop_bug_scratch; - unsigned num_gfx_cs_flushes; - unsigned initial_gfx_cs_size; - unsigned gpu_reset_counter; - unsigned last_dirty_tex_counter; - unsigned last_compressed_colortex_counter; - unsigned last_num_draw_calls; - - struct threaded_context *tc; - struct u_suballocator *allocator_zeroed_memory; - struct slab_child_pool pool_transfers; - struct slab_child_pool pool_transfers_unsync; /* for threaded_context */ - - /* Current unaccounted memory usage. */ - uint64_t vram; - uint64_t gtt; - - /* Additional context states. */ - unsigned flags; /* flush flags */ - - /* Queries. */ - /* Maintain the list of active queries for pausing between IBs. */ - int num_occlusion_queries; - int num_perfect_occlusion_queries; - struct list_head active_queries; - unsigned num_cs_dw_queries_suspend; - /* Misc stats. */ - unsigned num_draw_calls; - unsigned num_decompress_calls; - unsigned num_mrt_draw_calls; - unsigned num_prim_restart_calls; - unsigned num_spill_draw_calls; - unsigned num_compute_calls; - unsigned num_spill_compute_calls; - unsigned num_dma_calls; - unsigned num_cp_dma_calls; - unsigned num_vs_flushes; - unsigned num_ps_flushes; - unsigned num_cs_flushes; - unsigned num_cb_cache_flushes; - unsigned num_db_cache_flushes; - unsigned num_L2_invalidates; - unsigned num_L2_writebacks; - unsigned num_resident_handles; - uint64_t num_alloc_tex_transfer_bytes; - unsigned last_tex_ps_draw_ratio; /* for query */ - - /* Render condition. */ - struct r600_atom render_cond_atom; - struct pipe_query *render_cond; - unsigned render_cond_mode; - bool render_cond_invert; - bool render_cond_force_off; /* for u_blitter */ - - /* Statistics gathering for the DCC enablement heuristic. It can't be - * in r600_texture because r600_texture can be shared by multiple - * contexts. This is for back buffers only. We shouldn't get too many - * of those. - * - * X11 DRI3 rotates among a finite set of back buffers. They should - * all fit in this array. If they don't, separate DCC might never be - * enabled by DCC stat gathering. - */ - struct { - struct r600_texture *tex; - /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */ - struct pipe_query *ps_stats[3]; - /* If all slots are used and another slot is needed, - * the least recently used slot is evicted based on this. */ - int64_t last_use_timestamp; - bool query_active; - } dcc_stats[5]; - - struct pipe_debug_callback debug; - struct pipe_device_reset_callback device_reset_callback; - struct u_log_context *log; - - void *query_result_shader; - - /* Copy one resource to another using async DMA. */ - void (*dma_copy)(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dst_x, unsigned dst_y, unsigned dst_z, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box); - - void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, - uint64_t offset, uint64_t size, unsigned value); - - void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, - uint64_t offset, uint64_t size, unsigned value, - enum r600_coherency coher); - - void (*blit_decompress_depth)(struct pipe_context *ctx, - struct r600_texture *texture, - struct r600_texture *staging, - unsigned first_level, unsigned last_level, - unsigned first_layer, unsigned last_layer, - unsigned first_sample, unsigned last_sample); - - void (*decompress_dcc)(struct pipe_context *ctx, - struct r600_texture *rtex); - - /* Reallocate the buffer and update all resource bindings where - * the buffer is bound, including all resource descriptors. */ - void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf); - - /* Update all resource bindings where the buffer is bound, including - * all resource descriptors. This is invalidate_buffer without - * the invalidation. */ - void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf, - uint64_t old_gpu_address); - - /* Enable or disable occlusion queries. */ - void (*set_occlusion_query_state)(struct pipe_context *ctx, - bool old_enable, - bool old_perfect_enable); - - void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st); - - /* This ensures there is enough space in the command stream. */ - void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw, - bool include_draw_vbo); - - void (*set_atom_dirty)(struct r600_common_context *ctx, - struct r600_atom *atom, bool dirty); - - void (*check_vm_faults)(struct r600_common_context *ctx, - struct radeon_saved_cs *saved, - enum ring_type ring); -}; - -/* r600_buffer_common.c */ -bool si_rings_is_buffer_referenced(struct r600_common_context *ctx, - struct pb_buffer *buf, - enum radeon_bo_usage usage); -void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx, - struct r600_resource *resource, - unsigned usage); -void si_buffer_subdata(struct pipe_context *ctx, - struct pipe_resource *buffer, - unsigned usage, unsigned offset, - unsigned size, const void *data); -void si_init_resource_fields(struct r600_common_screen *rscreen, - struct r600_resource *res, - uint64_t size, unsigned alignment); -bool si_alloc_resource(struct r600_common_screen *rscreen, - struct r600_resource *res); -struct pipe_resource *si_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ, - unsigned alignment); -struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen, - unsigned flags, - unsigned usage, - unsigned size, - unsigned alignment); -struct pipe_resource * -si_buffer_from_user_memory(struct pipe_screen *screen, - const struct pipe_resource *templ, - void *user_memory); -void si_invalidate_resource(struct pipe_context *ctx, - struct pipe_resource *resource); -void si_replace_buffer_storage(struct pipe_context *ctx, - struct pipe_resource *dst, - struct pipe_resource *src); - -/* r600_common_pipe.c */ -void si_gfx_write_event_eop(struct r600_common_context *ctx, - unsigned event, unsigned event_flags, - unsigned data_sel, - struct r600_resource *buf, uint64_t va, - uint32_t new_fence, unsigned query_type); -unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen); -void si_gfx_wait_fence(struct r600_common_context *ctx, - uint64_t va, uint32_t ref, uint32_t mask); -bool si_common_screen_init(struct r600_common_screen *rscreen, - struct radeon_winsys *ws); -void si_destroy_common_screen(struct r600_common_screen *rscreen); -void si_preflush_suspend_features(struct r600_common_context *ctx); -void si_postflush_resume_features(struct r600_common_context *ctx); -bool si_common_context_init(struct r600_common_context *rctx, - struct r600_common_screen *rscreen, - unsigned context_flags); -void si_common_context_cleanup(struct r600_common_context *rctx); -bool si_can_dump_shader(struct r600_common_screen *rscreen, - unsigned processor); -bool si_extra_shader_checks(struct r600_common_screen *rscreen, - unsigned processor); -void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, - uint64_t offset, uint64_t size, unsigned value); -struct pipe_resource *si_resource_create_common(struct pipe_screen *screen, - const struct pipe_resource *templ); -const char *si_get_llvm_processor_name(enum radeon_family family); -void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, - struct r600_resource *dst, struct r600_resource *src); -void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, - struct radeon_saved_cs *saved, bool get_buffer_list); -void si_clear_saved_cs(struct radeon_saved_cs *saved); -bool si_check_device_reset(struct r600_common_context *rctx); - -/* r600_gpu_load.c */ -void si_gpu_load_kill_thread(struct r600_common_screen *rscreen); -uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type); -unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type, - uint64_t begin); - -/* r600_perfcounters.c */ -void si_perfcounters_destroy(struct r600_common_screen *rscreen); - -/* r600_query.c */ -void si_init_screen_query_functions(struct r600_common_screen *rscreen); -void si_init_query_functions(struct r600_common_context *rctx); -void si_suspend_queries(struct r600_common_context *ctx); -void si_resume_queries(struct r600_common_context *ctx); - -/* r600_test_dma.c */ -void si_test_dma(struct r600_common_screen *rscreen); - -/* r600_texture.c */ -bool si_prepare_for_dma_blit(struct r600_common_context *rctx, - struct r600_texture *rdst, - unsigned dst_level, unsigned dstx, - unsigned dsty, unsigned dstz, - struct r600_texture *rsrc, - unsigned src_level, - const struct pipe_box *src_box); -void si_texture_get_fmask_info(struct r600_common_screen *rscreen, - struct r600_texture *rtex, - unsigned nr_samples, - struct r600_fmask_info *out); -bool si_init_flushed_depth_texture(struct pipe_context *ctx, - struct pipe_resource *texture, - struct r600_texture **staging); -void si_print_texture_info(struct r600_common_screen *rscreen, - struct r600_texture *rtex, struct u_log_context *log); -struct pipe_resource *si_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ); -bool vi_dcc_formats_compatible(enum pipe_format format1, - enum pipe_format format2); -bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, - unsigned level, - enum pipe_format view_format); -void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx, - struct pipe_resource *tex, - unsigned level, - enum pipe_format view_format); -struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_surface *templ, - unsigned width0, unsigned height0, - unsigned width, unsigned height); -unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap); -void vi_separate_dcc_start_query(struct pipe_context *ctx, - struct r600_texture *tex); -void vi_separate_dcc_stop_query(struct pipe_context *ctx, - struct r600_texture *tex); -void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, - struct r600_texture *tex); -void vi_dcc_clear_level(struct r600_common_context *rctx, - struct r600_texture *rtex, - unsigned level, unsigned clear_value); -void si_do_fast_color_clear(struct r600_common_context *rctx, - struct pipe_framebuffer_state *fb, - struct r600_atom *fb_state, - unsigned *buffers, ubyte *dirty_cbufs, - const union pipe_color_union *color); -bool si_texture_disable_dcc(struct r600_common_context *rctx, - struct r600_texture *rtex); -void si_init_screen_texture_functions(struct r600_common_screen *rscreen); -void si_init_context_texture_functions(struct r600_common_context *rctx); - - -/* Inline helpers. */ - -static inline struct r600_resource *r600_resource(struct pipe_resource *r) -{ - return (struct r600_resource*)r; -} - -static inline void -r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res) -{ - pipe_resource_reference((struct pipe_resource **)ptr, - (struct pipe_resource *)res); -} - -static inline void -r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res) -{ - pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b); -} - -static inline void -r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_resource *res = (struct r600_resource *)r; - - if (res) { - /* Add memory usage for need_gfx_cs_space */ - rctx->vram += res->vram_usage; - rctx->gtt += res->gart_usage; - } -} - -#define SQ_TEX_XY_FILTER_POINT 0x00 -#define SQ_TEX_XY_FILTER_BILINEAR 0x01 -#define SQ_TEX_XY_FILTER_ANISO_POINT 0x02 -#define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03 - -static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso) -{ - if (filter == PIPE_TEX_FILTER_LINEAR) - return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR - : SQ_TEX_XY_FILTER_BILINEAR; - else - return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT - : SQ_TEX_XY_FILTER_POINT; -} - -static inline unsigned r600_tex_aniso_filter(unsigned filter) -{ - if (filter < 2) - return 0; - if (filter < 4) - return 1; - if (filter < 8) - return 2; - if (filter < 16) - return 3; - return 4; -} - -static inline enum radeon_bo_priority -r600_get_sampler_view_priority(struct r600_resource *res) -{ - if (res->b.b.target == PIPE_BUFFER) - return RADEON_PRIO_SAMPLER_BUFFER; - - if (res->b.b.nr_samples > 1) - return RADEON_PRIO_SAMPLER_TEXTURE_MSAA; - - return RADEON_PRIO_SAMPLER_TEXTURE; -} - -static inline bool -r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler) -{ - return (stencil_sampler && tex->can_sample_s) || - (!stencil_sampler && tex->can_sample_z); -} - -static inline bool -vi_dcc_enabled(struct r600_texture *tex, unsigned level) -{ - return tex->dcc_offset && level < tex->surface.num_dcc_levels; -} - -static inline bool -r600_htile_enabled(struct r600_texture *tex, unsigned level) -{ - return tex->htile_offset && level == 0; -} - -static inline bool -vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level) -{ - assert(!tex->tc_compatible_htile || tex->htile_offset); - return tex->tc_compatible_htile && level == 0; -} - -#define COMPUTE_DBG(rscreen, fmt, args...) \ - do { \ - if ((rscreen->b.debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \ - } while (0); - -#define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args) - -static inline int S_FIXED(float value, unsigned frac_bits) -{ - return value * (1 << frac_bits); -} - -#endif diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_query.c b/lib/mesa/src/gallium/drivers/radeon/r600_query.c deleted file mode 100644 index aedf950ff..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_query.c +++ /dev/null @@ -1,2101 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * Copyright 2014 Marek Olšák <marek.olsak@amd.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "r600_query.h" -#include "r600_cs.h" -#include "util/u_memory.h" -#include "util/u_upload_mgr.h" -#include "os/os_time.h" -#include "tgsi/tgsi_text.h" -#include "amd/common/sid.h" - -/* TODO: remove this: */ -void si_update_prims_generated_query_state(struct r600_common_context *rctx, - unsigned type, int diff); - -#define R600_MAX_STREAMS 4 - -struct r600_hw_query_params { - unsigned start_offset; - unsigned end_offset; - unsigned fence_offset; - unsigned pair_stride; - unsigned pair_count; -}; - -/* Queries without buffer handling or suspend/resume. */ -struct r600_query_sw { - struct r600_query b; - - uint64_t begin_result; - uint64_t end_result; - - uint64_t begin_time; - uint64_t end_time; - - /* Fence for GPU_FINISHED. */ - struct pipe_fence_handle *fence; -}; - -static void r600_query_sw_destroy(struct r600_common_screen *rscreen, - struct r600_query *rquery) -{ - struct r600_query_sw *query = (struct r600_query_sw *)rquery; - - rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL); - FREE(query); -} - -static enum radeon_value_id winsys_id_from_type(unsigned type) -{ - switch (type) { - case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY; - case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY; - case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM; - case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT; - case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS; - case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS; - case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS; - case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS; - case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER; - case R600_QUERY_GFX_IB_SIZE: return RADEON_GFX_IB_SIZE_COUNTER; - case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED; - case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS; - case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS; - case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE; - case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE; - case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE; - case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE; - case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK; - case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK; - case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME; - default: unreachable("query type does not correspond to winsys id"); - } -} - -static bool r600_query_sw_begin(struct r600_common_context *rctx, - struct r600_query *rquery) -{ - struct r600_query_sw *query = (struct r600_query_sw *)rquery; - enum radeon_value_id ws_id; - - switch(query->b.type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - case PIPE_QUERY_GPU_FINISHED: - break; - case R600_QUERY_DRAW_CALLS: - query->begin_result = rctx->num_draw_calls; - break; - case R600_QUERY_DECOMPRESS_CALLS: - query->begin_result = rctx->num_decompress_calls; - break; - case R600_QUERY_MRT_DRAW_CALLS: - query->begin_result = rctx->num_mrt_draw_calls; - break; - case R600_QUERY_PRIM_RESTART_CALLS: - query->begin_result = rctx->num_prim_restart_calls; - break; - case R600_QUERY_SPILL_DRAW_CALLS: - query->begin_result = rctx->num_spill_draw_calls; - break; - case R600_QUERY_COMPUTE_CALLS: - query->begin_result = rctx->num_compute_calls; - break; - case R600_QUERY_SPILL_COMPUTE_CALLS: - query->begin_result = rctx->num_spill_compute_calls; - break; - case R600_QUERY_DMA_CALLS: - query->begin_result = rctx->num_dma_calls; - break; - case R600_QUERY_CP_DMA_CALLS: - query->begin_result = rctx->num_cp_dma_calls; - break; - case R600_QUERY_NUM_VS_FLUSHES: - query->begin_result = rctx->num_vs_flushes; - break; - case R600_QUERY_NUM_PS_FLUSHES: - query->begin_result = rctx->num_ps_flushes; - break; - case R600_QUERY_NUM_CS_FLUSHES: - query->begin_result = rctx->num_cs_flushes; - break; - case R600_QUERY_NUM_CB_CACHE_FLUSHES: - query->begin_result = rctx->num_cb_cache_flushes; - break; - case R600_QUERY_NUM_DB_CACHE_FLUSHES: - query->begin_result = rctx->num_db_cache_flushes; - break; - case R600_QUERY_NUM_L2_INVALIDATES: - query->begin_result = rctx->num_L2_invalidates; - break; - case R600_QUERY_NUM_L2_WRITEBACKS: - query->begin_result = rctx->num_L2_writebacks; - break; - case R600_QUERY_NUM_RESIDENT_HANDLES: - query->begin_result = rctx->num_resident_handles; - break; - case R600_QUERY_TC_OFFLOADED_SLOTS: - query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0; - break; - case R600_QUERY_TC_DIRECT_SLOTS: - query->begin_result = rctx->tc ? rctx->tc->num_direct_slots : 0; - break; - case R600_QUERY_TC_NUM_SYNCS: - query->begin_result = rctx->tc ? rctx->tc->num_syncs : 0; - break; - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_MAPPED_VRAM: - case R600_QUERY_MAPPED_GTT: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_VRAM_VIS_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO: - case R600_QUERY_NUM_MAPPED_BUFFERS: - query->begin_result = 0; - break; - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_GFX_IB_SIZE: - case R600_QUERY_NUM_GFX_IBS: - case R600_QUERY_NUM_SDMA_IBS: - case R600_QUERY_NUM_BYTES_MOVED: - case R600_QUERY_NUM_EVICTIONS: - case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: { - enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); - query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); - break; - } - case R600_QUERY_GFX_BO_LIST_SIZE: - ws_id = winsys_id_from_type(query->b.type); - query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); - query->begin_time = rctx->ws->query_value(rctx->ws, - RADEON_NUM_GFX_IBS); - break; - case R600_QUERY_CS_THREAD_BUSY: - ws_id = winsys_id_from_type(query->b.type); - query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); - query->begin_time = os_time_get_nano(); - break; - case R600_QUERY_GALLIUM_THREAD_BUSY: - query->begin_result = - rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0; - query->begin_time = os_time_get_nano(); - break; - case R600_QUERY_GPU_LOAD: - case R600_QUERY_GPU_SHADERS_BUSY: - case R600_QUERY_GPU_TA_BUSY: - case R600_QUERY_GPU_GDS_BUSY: - case R600_QUERY_GPU_VGT_BUSY: - case R600_QUERY_GPU_IA_BUSY: - case R600_QUERY_GPU_SX_BUSY: - case R600_QUERY_GPU_WD_BUSY: - case R600_QUERY_GPU_BCI_BUSY: - case R600_QUERY_GPU_SC_BUSY: - case R600_QUERY_GPU_PA_BUSY: - case R600_QUERY_GPU_DB_BUSY: - case R600_QUERY_GPU_CP_BUSY: - case R600_QUERY_GPU_CB_BUSY: - case R600_QUERY_GPU_SDMA_BUSY: - case R600_QUERY_GPU_PFP_BUSY: - case R600_QUERY_GPU_MEQ_BUSY: - case R600_QUERY_GPU_ME_BUSY: - case R600_QUERY_GPU_SURF_SYNC_BUSY: - case R600_QUERY_GPU_CP_DMA_BUSY: - case R600_QUERY_GPU_SCRATCH_RAM_BUSY: - query->begin_result = si_begin_counter(rctx->screen, - query->b.type); - break; - case R600_QUERY_NUM_COMPILATIONS: - query->begin_result = p_atomic_read(&rctx->screen->num_compilations); - break; - case R600_QUERY_NUM_SHADERS_CREATED: - query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); - break; - case R600_QUERY_NUM_SHADER_CACHE_HITS: - query->begin_result = - p_atomic_read(&rctx->screen->num_shader_cache_hits); - break; - case R600_QUERY_GPIN_ASIC_ID: - case R600_QUERY_GPIN_NUM_SIMD: - case R600_QUERY_GPIN_NUM_RB: - case R600_QUERY_GPIN_NUM_SPI: - case R600_QUERY_GPIN_NUM_SE: - break; - default: - unreachable("r600_query_sw_begin: bad query type"); - } - - return true; -} - -static bool r600_query_sw_end(struct r600_common_context *rctx, - struct r600_query *rquery) -{ - struct r600_query_sw *query = (struct r600_query_sw *)rquery; - enum radeon_value_id ws_id; - - switch(query->b.type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - break; - case PIPE_QUERY_GPU_FINISHED: - rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED); - break; - case R600_QUERY_DRAW_CALLS: - query->end_result = rctx->num_draw_calls; - break; - case R600_QUERY_DECOMPRESS_CALLS: - query->end_result = rctx->num_decompress_calls; - break; - case R600_QUERY_MRT_DRAW_CALLS: - query->end_result = rctx->num_mrt_draw_calls; - break; - case R600_QUERY_PRIM_RESTART_CALLS: - query->end_result = rctx->num_prim_restart_calls; - break; - case R600_QUERY_SPILL_DRAW_CALLS: - query->end_result = rctx->num_spill_draw_calls; - break; - case R600_QUERY_COMPUTE_CALLS: - query->end_result = rctx->num_compute_calls; - break; - case R600_QUERY_SPILL_COMPUTE_CALLS: - query->end_result = rctx->num_spill_compute_calls; - break; - case R600_QUERY_DMA_CALLS: - query->end_result = rctx->num_dma_calls; - break; - case R600_QUERY_CP_DMA_CALLS: - query->end_result = rctx->num_cp_dma_calls; - break; - case R600_QUERY_NUM_VS_FLUSHES: - query->end_result = rctx->num_vs_flushes; - break; - case R600_QUERY_NUM_PS_FLUSHES: - query->end_result = rctx->num_ps_flushes; - break; - case R600_QUERY_NUM_CS_FLUSHES: - query->end_result = rctx->num_cs_flushes; - break; - case R600_QUERY_NUM_CB_CACHE_FLUSHES: - query->end_result = rctx->num_cb_cache_flushes; - break; - case R600_QUERY_NUM_DB_CACHE_FLUSHES: - query->end_result = rctx->num_db_cache_flushes; - break; - case R600_QUERY_NUM_L2_INVALIDATES: - query->end_result = rctx->num_L2_invalidates; - break; - case R600_QUERY_NUM_L2_WRITEBACKS: - query->end_result = rctx->num_L2_writebacks; - break; - case R600_QUERY_NUM_RESIDENT_HANDLES: - query->end_result = rctx->num_resident_handles; - break; - case R600_QUERY_TC_OFFLOADED_SLOTS: - query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0; - break; - case R600_QUERY_TC_DIRECT_SLOTS: - query->end_result = rctx->tc ? rctx->tc->num_direct_slots : 0; - break; - case R600_QUERY_TC_NUM_SYNCS: - query->end_result = rctx->tc ? rctx->tc->num_syncs : 0; - break; - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_MAPPED_VRAM: - case R600_QUERY_MAPPED_GTT: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_VRAM_VIS_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_GFX_IB_SIZE: - case R600_QUERY_NUM_MAPPED_BUFFERS: - case R600_QUERY_NUM_GFX_IBS: - case R600_QUERY_NUM_SDMA_IBS: - case R600_QUERY_NUM_BYTES_MOVED: - case R600_QUERY_NUM_EVICTIONS: - case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: { - enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); - query->end_result = rctx->ws->query_value(rctx->ws, ws_id); - break; - } - case R600_QUERY_GFX_BO_LIST_SIZE: - ws_id = winsys_id_from_type(query->b.type); - query->end_result = rctx->ws->query_value(rctx->ws, ws_id); - query->end_time = rctx->ws->query_value(rctx->ws, - RADEON_NUM_GFX_IBS); - break; - case R600_QUERY_CS_THREAD_BUSY: - ws_id = winsys_id_from_type(query->b.type); - query->end_result = rctx->ws->query_value(rctx->ws, ws_id); - query->end_time = os_time_get_nano(); - break; - case R600_QUERY_GALLIUM_THREAD_BUSY: - query->end_result = - rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0; - query->end_time = os_time_get_nano(); - break; - case R600_QUERY_GPU_LOAD: - case R600_QUERY_GPU_SHADERS_BUSY: - case R600_QUERY_GPU_TA_BUSY: - case R600_QUERY_GPU_GDS_BUSY: - case R600_QUERY_GPU_VGT_BUSY: - case R600_QUERY_GPU_IA_BUSY: - case R600_QUERY_GPU_SX_BUSY: - case R600_QUERY_GPU_WD_BUSY: - case R600_QUERY_GPU_BCI_BUSY: - case R600_QUERY_GPU_SC_BUSY: - case R600_QUERY_GPU_PA_BUSY: - case R600_QUERY_GPU_DB_BUSY: - case R600_QUERY_GPU_CP_BUSY: - case R600_QUERY_GPU_CB_BUSY: - case R600_QUERY_GPU_SDMA_BUSY: - case R600_QUERY_GPU_PFP_BUSY: - case R600_QUERY_GPU_MEQ_BUSY: - case R600_QUERY_GPU_ME_BUSY: - case R600_QUERY_GPU_SURF_SYNC_BUSY: - case R600_QUERY_GPU_CP_DMA_BUSY: - case R600_QUERY_GPU_SCRATCH_RAM_BUSY: - query->end_result = si_end_counter(rctx->screen, - query->b.type, - query->begin_result); - query->begin_result = 0; - break; - case R600_QUERY_NUM_COMPILATIONS: - query->end_result = p_atomic_read(&rctx->screen->num_compilations); - break; - case R600_QUERY_NUM_SHADERS_CREATED: - query->end_result = p_atomic_read(&rctx->screen->num_shaders_created); - break; - case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO: - query->end_result = rctx->last_tex_ps_draw_ratio; - break; - case R600_QUERY_NUM_SHADER_CACHE_HITS: - query->end_result = - p_atomic_read(&rctx->screen->num_shader_cache_hits); - break; - case R600_QUERY_GPIN_ASIC_ID: - case R600_QUERY_GPIN_NUM_SIMD: - case R600_QUERY_GPIN_NUM_RB: - case R600_QUERY_GPIN_NUM_SPI: - case R600_QUERY_GPIN_NUM_SE: - break; - default: - unreachable("r600_query_sw_end: bad query type"); - } - - return true; -} - -static bool r600_query_sw_get_result(struct r600_common_context *rctx, - struct r600_query *rquery, - bool wait, - union pipe_query_result *result) -{ - struct r600_query_sw *query = (struct r600_query_sw *)rquery; - - switch (query->b.type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - /* Convert from cycles per millisecond to cycles per second (Hz). */ - result->timestamp_disjoint.frequency = - (uint64_t)rctx->screen->info.clock_crystal_freq * 1000; - result->timestamp_disjoint.disjoint = false; - return true; - case PIPE_QUERY_GPU_FINISHED: { - struct pipe_screen *screen = rctx->b.screen; - struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b; - - result->b = screen->fence_finish(screen, ctx, query->fence, - wait ? PIPE_TIMEOUT_INFINITE : 0); - return result->b; - } - - case R600_QUERY_GFX_BO_LIST_SIZE: - result->u64 = (query->end_result - query->begin_result) / - (query->end_time - query->begin_time); - return true; - case R600_QUERY_CS_THREAD_BUSY: - case R600_QUERY_GALLIUM_THREAD_BUSY: - result->u64 = (query->end_result - query->begin_result) * 100 / - (query->end_time - query->begin_time); - return true; - case R600_QUERY_GPIN_ASIC_ID: - result->u32 = 0; - return true; - case R600_QUERY_GPIN_NUM_SIMD: - result->u32 = rctx->screen->info.num_good_compute_units; - return true; - case R600_QUERY_GPIN_NUM_RB: - result->u32 = rctx->screen->info.num_render_backends; - return true; - case R600_QUERY_GPIN_NUM_SPI: - result->u32 = 1; /* all supported chips have one SPI per SE */ - return true; - case R600_QUERY_GPIN_NUM_SE: - result->u32 = rctx->screen->info.max_se; - return true; - } - - result->u64 = query->end_result - query->begin_result; - - switch (query->b.type) { - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_GPU_TEMPERATURE: - result->u64 /= 1000; - break; - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - result->u64 *= 1000000; - break; - } - - return true; -} - - -static struct r600_query_ops sw_query_ops = { - .destroy = r600_query_sw_destroy, - .begin = r600_query_sw_begin, - .end = r600_query_sw_end, - .get_result = r600_query_sw_get_result, - .get_result_resource = NULL -}; - -static struct pipe_query *r600_query_sw_create(unsigned query_type) -{ - struct r600_query_sw *query; - - query = CALLOC_STRUCT(r600_query_sw); - if (!query) - return NULL; - - query->b.type = query_type; - query->b.ops = &sw_query_ops; - - return (struct pipe_query *)query; -} - -void si_query_hw_destroy(struct r600_common_screen *rscreen, - struct r600_query *rquery) -{ - struct r600_query_hw *query = (struct r600_query_hw *)rquery; - struct r600_query_buffer *prev = query->buffer.previous; - - /* Release all query buffers. */ - while (prev) { - struct r600_query_buffer *qbuf = prev; - prev = prev->previous; - r600_resource_reference(&qbuf->buf, NULL); - FREE(qbuf); - } - - r600_resource_reference(&query->buffer.buf, NULL); - r600_resource_reference(&query->workaround_buf, NULL); - FREE(rquery); -} - -static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen, - struct r600_query_hw *query) -{ - unsigned buf_size = MAX2(query->result_size, - rscreen->info.min_alloc_size); - - /* Queries are normally read by the CPU after - * being written by the gpu, hence staging is probably a good - * usage pattern. - */ - struct r600_resource *buf = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, 0, - PIPE_USAGE_STAGING, buf_size); - if (!buf) - return NULL; - - if (!query->ops->prepare_buffer(rscreen, query, buf)) { - r600_resource_reference(&buf, NULL); - return NULL; - } - - return buf; -} - -static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen, - struct r600_query_hw *query, - struct r600_resource *buffer) -{ - /* Callers ensure that the buffer is currently unused by the GPU. */ - uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED); - if (!results) - return false; - - memset(results, 0, buffer->b.b.width0); - - if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || - query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || - query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - unsigned max_rbs = rscreen->info.num_render_backends; - unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask; - unsigned num_results; - unsigned i, j; - - /* Set top bits for unused backends. */ - num_results = buffer->b.b.width0 / query->result_size; - for (j = 0; j < num_results; j++) { - for (i = 0; i < max_rbs; i++) { - if (!(enabled_rb_mask & (1<<i))) { - results[(i * 4)+1] = 0x80000000; - results[(i * 4)+3] = 0x80000000; - } - } - results += 4 * max_rbs; - } - } - - return true; -} - -static void r600_query_hw_get_result_resource(struct r600_common_context *rctx, - struct r600_query *rquery, - bool wait, - enum pipe_query_value_type result_type, - int index, - struct pipe_resource *resource, - unsigned offset); - -static struct r600_query_ops query_hw_ops = { - .destroy = si_query_hw_destroy, - .begin = si_query_hw_begin, - .end = si_query_hw_end, - .get_result = si_query_hw_get_result, - .get_result_resource = r600_query_hw_get_result_resource, -}; - -static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, - struct r600_query_hw *query, - struct r600_resource *buffer, - uint64_t va); -static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, - struct r600_query_hw *query, - struct r600_resource *buffer, - uint64_t va); -static void r600_query_hw_add_result(struct r600_common_screen *rscreen, - struct r600_query_hw *, void *buffer, - union pipe_query_result *result); -static void r600_query_hw_clear_result(struct r600_query_hw *, - union pipe_query_result *); - -static struct r600_query_hw_ops query_hw_default_hw_ops = { - .prepare_buffer = r600_query_hw_prepare_buffer, - .emit_start = r600_query_hw_do_emit_start, - .emit_stop = r600_query_hw_do_emit_stop, - .clear_result = r600_query_hw_clear_result, - .add_result = r600_query_hw_add_result, -}; - -bool si_query_hw_init(struct r600_common_screen *rscreen, - struct r600_query_hw *query) -{ - query->buffer.buf = r600_new_query_buffer(rscreen, query); - if (!query->buffer.buf) - return false; - - return true; -} - -static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen, - unsigned query_type, - unsigned index) -{ - struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw); - if (!query) - return NULL; - - query->b.type = query_type; - query->b.ops = &query_hw_ops; - query->ops = &query_hw_default_hw_ops; - - switch (query_type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - query->result_size = 16 * rscreen->info.num_render_backends; - query->result_size += 16; /* for the fence + alignment */ - query->num_cs_dw_begin = 6; - query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen); - break; - case PIPE_QUERY_TIME_ELAPSED: - query->result_size = 24; - query->num_cs_dw_begin = 8; - query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen); - break; - case PIPE_QUERY_TIMESTAMP: - query->result_size = 16; - query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen); - query->flags = R600_QUERY_HW_FLAG_NO_START; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ - query->result_size = 32; - query->num_cs_dw_begin = 6; - query->num_cs_dw_end = 6; - query->stream = index; - break; - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ - query->result_size = 32 * R600_MAX_STREAMS; - query->num_cs_dw_begin = 6 * R600_MAX_STREAMS; - query->num_cs_dw_end = 6 * R600_MAX_STREAMS; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - /* 11 values on GCN. */ - query->result_size = 11 * 16; - query->result_size += 8; /* for the fence + alignment */ - query->num_cs_dw_begin = 6; - query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen); - break; - default: - assert(0); - FREE(query); - return NULL; - } - - if (!si_query_hw_init(rscreen, query)) { - FREE(query); - return NULL; - } - - return (struct pipe_query *)query; -} - -static void r600_update_occlusion_query_state(struct r600_common_context *rctx, - unsigned type, int diff) -{ - if (type == PIPE_QUERY_OCCLUSION_COUNTER || - type == PIPE_QUERY_OCCLUSION_PREDICATE || - type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - bool old_enable = rctx->num_occlusion_queries != 0; - bool old_perfect_enable = - rctx->num_perfect_occlusion_queries != 0; - bool enable, perfect_enable; - - rctx->num_occlusion_queries += diff; - assert(rctx->num_occlusion_queries >= 0); - - if (type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - rctx->num_perfect_occlusion_queries += diff; - assert(rctx->num_perfect_occlusion_queries >= 0); - } - - enable = rctx->num_occlusion_queries != 0; - perfect_enable = rctx->num_perfect_occlusion_queries != 0; - - if (enable != old_enable || perfect_enable != old_perfect_enable) { - rctx->set_occlusion_query_state(&rctx->b, old_enable, - old_perfect_enable); - } - } -} - -static unsigned event_type_for_stream(unsigned stream) -{ - switch (stream) { - default: - case 0: return V_028A90_SAMPLE_STREAMOUTSTATS; - case 1: return V_028A90_SAMPLE_STREAMOUTSTATS1; - case 2: return V_028A90_SAMPLE_STREAMOUTSTATS2; - case 3: return V_028A90_SAMPLE_STREAMOUTSTATS3; - } -} - -static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va, - unsigned stream) -{ - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); -} - -static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, - struct r600_query_hw *query, - struct r600_resource *buffer, - uint64_t va) -{ - struct radeon_winsys_cs *cs = ctx->gfx.cs; - - switch (query->b.type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - emit_sample_streamout(cs, va, query->stream); - break; - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) - emit_sample_streamout(cs, va + 32 * stream, stream); - break; - case PIPE_QUERY_TIME_ELAPSED: - /* Write the timestamp from the CP not waiting for - * outstanding draws (top-of-pipe). - */ - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_COUNT_SEL | - COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | - COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - break; - default: - assert(0); - } - radeon_add_to_buffer_list(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, - RADEON_PRIO_QUERY); -} - -static void r600_query_hw_emit_start(struct r600_common_context *ctx, - struct r600_query_hw *query) -{ - uint64_t va; - - if (!query->buffer.buf) - return; // previous buffer allocation failure - - r600_update_occlusion_query_state(ctx, query->b.type, 1); - si_update_prims_generated_query_state(ctx, query->b.type, 1); - - ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end, - true); - - /* Get a new query buffer if needed. */ - if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { - struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); - *qbuf = query->buffer; - query->buffer.results_end = 0; - query->buffer.previous = qbuf; - query->buffer.buf = r600_new_query_buffer(ctx->screen, query); - if (!query->buffer.buf) - return; - } - - /* emit begin query */ - va = query->buffer.buf->gpu_address + query->buffer.results_end; - - query->ops->emit_start(ctx, query, query->buffer.buf, va); - - ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end; -} - -static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, - struct r600_query_hw *query, - struct r600_resource *buffer, - uint64_t va) -{ - struct radeon_winsys_cs *cs = ctx->gfx.cs; - uint64_t fence_va = 0; - - switch (query->b.type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - va += 8; - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - - fence_va = va + ctx->screen->info.num_render_backends * 16 - 8; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - va += 16; - emit_sample_streamout(cs, va, query->stream); - break; - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - va += 16; - for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) - emit_sample_streamout(cs, va + 32 * stream, stream); - break; - case PIPE_QUERY_TIME_ELAPSED: - va += 8; - /* fall through */ - case PIPE_QUERY_TIMESTAMP: - si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, - 0, EOP_DATA_SEL_TIMESTAMP, NULL, va, - 0, query->b.type); - fence_va = va + 8; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: { - unsigned sample_size = (query->result_size - 8) / 2; - - va += sample_size; - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - - fence_va = va + sample_size; - break; - } - default: - assert(0); - } - radeon_add_to_buffer_list(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, - RADEON_PRIO_QUERY); - - if (fence_va) - si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DATA_SEL_VALUE_32BIT, - query->buffer.buf, fence_va, 0x80000000, - query->b.type); -} - -static void r600_query_hw_emit_stop(struct r600_common_context *ctx, - struct r600_query_hw *query) -{ - uint64_t va; - - if (!query->buffer.buf) - return; // previous buffer allocation failure - - /* The queries which need begin already called this in begin_query. */ - if (query->flags & R600_QUERY_HW_FLAG_NO_START) { - ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false); - } - - /* emit end query */ - va = query->buffer.buf->gpu_address + query->buffer.results_end; - - query->ops->emit_stop(ctx, query, query->buffer.buf, va); - - query->buffer.results_end += query->result_size; - - if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) - ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end; - - r600_update_occlusion_query_state(ctx, query->b.type, -1); - si_update_prims_generated_query_state(ctx, query->b.type, -1); -} - -static void emit_set_predicate(struct r600_common_context *ctx, - struct r600_resource *buf, uint64_t va, - uint32_t op) -{ - struct radeon_winsys_cs *cs = ctx->gfx.cs; - - if (ctx->chip_class >= GFX9) { - radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0)); - radeon_emit(cs, op); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - } else { - radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); - radeon_emit(cs, va); - radeon_emit(cs, op | ((va >> 32) & 0xFF)); - } - radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_READ, - RADEON_PRIO_QUERY); -} - -static void r600_emit_query_predication(struct r600_common_context *ctx, - struct r600_atom *atom) -{ - struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond; - struct r600_query_buffer *qbuf; - uint32_t op; - bool flag_wait, invert; - - if (!query) - return; - - invert = ctx->render_cond_invert; - flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || - ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; - - if (query->workaround_buf) { - op = PRED_OP(PREDICATION_OP_BOOL64); - } else { - switch (query->b.type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - op = PRED_OP(PREDICATION_OP_ZPASS); - break; - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - op = PRED_OP(PREDICATION_OP_PRIMCOUNT); - invert = !invert; - break; - default: - assert(0); - return; - } - } - - /* if true then invert, see GL_ARB_conditional_render_inverted */ - if (invert) - op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */ - else - op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */ - - /* Use the value written by compute shader as a workaround. Note that - * the wait flag does not apply in this predication mode. - * - * The shader outputs the result value to L2. Workarounds only affect VI - * and later, where the CP reads data from L2, so we don't need an - * additional flush. - */ - if (query->workaround_buf) { - uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset; - emit_set_predicate(ctx, query->workaround_buf, va, op); - return; - } - - op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW; - - /* emit predicate packets for all data blocks */ - for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { - unsigned results_base = 0; - uint64_t va_base = qbuf->buf->gpu_address; - - while (results_base < qbuf->results_end) { - uint64_t va = va_base + results_base; - - if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { - for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) { - emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op); - - /* set CONTINUE bit for all packets except the first */ - op |= PREDICATION_CONTINUE; - } - } else { - emit_set_predicate(ctx, qbuf->buf, va, op); - op |= PREDICATION_CONTINUE; - } - - results_base += query->result_size; - } - } -} - -static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) -{ - struct r600_common_screen *rscreen = - (struct r600_common_screen *)ctx->screen; - - if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || - query_type == PIPE_QUERY_GPU_FINISHED || - query_type >= PIPE_QUERY_DRIVER_SPECIFIC) - return r600_query_sw_create(query_type); - - return r600_query_hw_create(rscreen, query_type, index); -} - -static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - rquery->ops->destroy(rctx->screen, rquery); -} - -static boolean r600_begin_query(struct pipe_context *ctx, - struct pipe_query *query) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - return rquery->ops->begin(rctx, rquery); -} - -void si_query_hw_reset_buffers(struct r600_common_context *rctx, - struct r600_query_hw *query) -{ - struct r600_query_buffer *prev = query->buffer.previous; - - /* Discard the old query buffers. */ - while (prev) { - struct r600_query_buffer *qbuf = prev; - prev = prev->previous; - r600_resource_reference(&qbuf->buf, NULL); - FREE(qbuf); - } - - query->buffer.results_end = 0; - query->buffer.previous = NULL; - - /* Obtain a new buffer if the current one can't be mapped without a stall. */ - if (si_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) || - !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { - r600_resource_reference(&query->buffer.buf, NULL); - query->buffer.buf = r600_new_query_buffer(rctx->screen, query); - } else { - if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf)) - r600_resource_reference(&query->buffer.buf, NULL); - } -} - -bool si_query_hw_begin(struct r600_common_context *rctx, - struct r600_query *rquery) -{ - struct r600_query_hw *query = (struct r600_query_hw *)rquery; - - if (query->flags & R600_QUERY_HW_FLAG_NO_START) { - assert(0); - return false; - } - - if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES)) - si_query_hw_reset_buffers(rctx, query); - - r600_resource_reference(&query->workaround_buf, NULL); - - r600_query_hw_emit_start(rctx, query); - if (!query->buffer.buf) - return false; - - LIST_ADDTAIL(&query->list, &rctx->active_queries); - return true; -} - -static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - return rquery->ops->end(rctx, rquery); -} - -bool si_query_hw_end(struct r600_common_context *rctx, - struct r600_query *rquery) -{ - struct r600_query_hw *query = (struct r600_query_hw *)rquery; - - if (query->flags & R600_QUERY_HW_FLAG_NO_START) - si_query_hw_reset_buffers(rctx, query); - - r600_query_hw_emit_stop(rctx, query); - - if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) - LIST_DELINIT(&query->list); - - if (!query->buffer.buf) - return false; - - return true; -} - -static void r600_get_hw_query_params(struct r600_common_context *rctx, - struct r600_query_hw *rquery, int index, - struct r600_hw_query_params *params) -{ - unsigned max_rbs = rctx->screen->info.num_render_backends; - - params->pair_stride = 0; - params->pair_count = 1; - - switch (rquery->b.type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - params->start_offset = 0; - params->end_offset = 8; - params->fence_offset = max_rbs * 16; - params->pair_stride = 16; - params->pair_count = max_rbs; - break; - case PIPE_QUERY_TIME_ELAPSED: - params->start_offset = 0; - params->end_offset = 8; - params->fence_offset = 16; - break; - case PIPE_QUERY_TIMESTAMP: - params->start_offset = 0; - params->end_offset = 0; - params->fence_offset = 8; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - params->start_offset = 8; - params->end_offset = 24; - params->fence_offset = params->end_offset + 4; - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - params->start_offset = 0; - params->end_offset = 16; - params->fence_offset = params->end_offset + 4; - break; - case PIPE_QUERY_SO_STATISTICS: - params->start_offset = 8 - index * 8; - params->end_offset = 24 - index * 8; - params->fence_offset = params->end_offset + 4; - break; - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - params->pair_count = R600_MAX_STREAMS; - params->pair_stride = 32; - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - params->start_offset = 0; - params->end_offset = 16; - - /* We can re-use the high dword of the last 64-bit value as a - * fence: it is initialized as 0, and the high bit is set by - * the write of the streamout stats event. - */ - params->fence_offset = rquery->result_size - 4; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - { - /* Offsets apply to EG+ */ - static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80}; - params->start_offset = offsets[index]; - params->end_offset = 88 + offsets[index]; - params->fence_offset = 2 * 88; - break; - } - default: - unreachable("r600_get_hw_query_params unsupported"); - } -} - -static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index, - bool test_status_bit) -{ - uint32_t *current_result = (uint32_t*)map; - uint64_t start, end; - - start = (uint64_t)current_result[start_index] | - (uint64_t)current_result[start_index+1] << 32; - end = (uint64_t)current_result[end_index] | - (uint64_t)current_result[end_index+1] << 32; - - if (!test_status_bit || - ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { - return end - start; - } - return 0; -} - -static void r600_query_hw_add_result(struct r600_common_screen *rscreen, - struct r600_query_hw *query, - void *buffer, - union pipe_query_result *result) -{ - unsigned max_rbs = rscreen->info.num_render_backends; - - switch (query->b.type) { - case PIPE_QUERY_OCCLUSION_COUNTER: { - for (unsigned i = 0; i < max_rbs; ++i) { - unsigned results_base = i * 16; - result->u64 += - r600_query_read_result(buffer + results_base, 0, 2, true); - } - break; - } - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { - for (unsigned i = 0; i < max_rbs; ++i) { - unsigned results_base = i * 16; - result->b = result->b || - r600_query_read_result(buffer + results_base, 0, 2, true) != 0; - } - break; - } - case PIPE_QUERY_TIME_ELAPSED: - result->u64 += r600_query_read_result(buffer, 0, 2, false); - break; - case PIPE_QUERY_TIMESTAMP: - result->u64 = *(uint64_t*)buffer; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - /* SAMPLE_STREAMOUTSTATS stores this structure: - * { - * u64 NumPrimitivesWritten; - * u64 PrimitiveStorageNeeded; - * } - * We only need NumPrimitivesWritten here. */ - result->u64 += r600_query_read_result(buffer, 2, 6, true); - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - /* Here we read PrimitiveStorageNeeded. */ - result->u64 += r600_query_read_result(buffer, 0, 4, true); - break; - case PIPE_QUERY_SO_STATISTICS: - result->so_statistics.num_primitives_written += - r600_query_read_result(buffer, 2, 6, true); - result->so_statistics.primitives_storage_needed += - r600_query_read_result(buffer, 0, 4, true); - break; - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - result->b = result->b || - r600_query_read_result(buffer, 2, 6, true) != - r600_query_read_result(buffer, 0, 4, true); - break; - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) { - result->b = result->b || - r600_query_read_result(buffer, 2, 6, true) != - r600_query_read_result(buffer, 0, 4, true); - buffer = (char *)buffer + 32; - } - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - result->pipeline_statistics.ps_invocations += - r600_query_read_result(buffer, 0, 22, false); - result->pipeline_statistics.c_primitives += - r600_query_read_result(buffer, 2, 24, false); - result->pipeline_statistics.c_invocations += - r600_query_read_result(buffer, 4, 26, false); - result->pipeline_statistics.vs_invocations += - r600_query_read_result(buffer, 6, 28, false); - result->pipeline_statistics.gs_invocations += - r600_query_read_result(buffer, 8, 30, false); - result->pipeline_statistics.gs_primitives += - r600_query_read_result(buffer, 10, 32, false); - result->pipeline_statistics.ia_primitives += - r600_query_read_result(buffer, 12, 34, false); - result->pipeline_statistics.ia_vertices += - r600_query_read_result(buffer, 14, 36, false); - result->pipeline_statistics.hs_invocations += - r600_query_read_result(buffer, 16, 38, false); - result->pipeline_statistics.ds_invocations += - r600_query_read_result(buffer, 18, 40, false); - result->pipeline_statistics.cs_invocations += - r600_query_read_result(buffer, 20, 42, false); -#if 0 /* for testing */ - printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " - "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, " - "Clipper prims=%llu, PS=%llu, CS=%llu\n", - result->pipeline_statistics.ia_vertices, - result->pipeline_statistics.ia_primitives, - result->pipeline_statistics.vs_invocations, - result->pipeline_statistics.hs_invocations, - result->pipeline_statistics.ds_invocations, - result->pipeline_statistics.gs_invocations, - result->pipeline_statistics.gs_primitives, - result->pipeline_statistics.c_invocations, - result->pipeline_statistics.c_primitives, - result->pipeline_statistics.ps_invocations, - result->pipeline_statistics.cs_invocations); -#endif - break; - default: - assert(0); - } -} - -static boolean r600_get_query_result(struct pipe_context *ctx, - struct pipe_query *query, boolean wait, - union pipe_query_result *result) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - return rquery->ops->get_result(rctx, rquery, wait, result); -} - -static void r600_get_query_result_resource(struct pipe_context *ctx, - struct pipe_query *query, - boolean wait, - enum pipe_query_value_type result_type, - int index, - struct pipe_resource *resource, - unsigned offset) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index, - resource, offset); -} - -static void r600_query_hw_clear_result(struct r600_query_hw *query, - union pipe_query_result *result) -{ - util_query_clear_result(result, query->b.type); -} - -bool si_query_hw_get_result(struct r600_common_context *rctx, - struct r600_query *rquery, - bool wait, union pipe_query_result *result) -{ - struct r600_common_screen *rscreen = rctx->screen; - struct r600_query_hw *query = (struct r600_query_hw *)rquery; - struct r600_query_buffer *qbuf; - - query->ops->clear_result(query, result); - - for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { - unsigned usage = PIPE_TRANSFER_READ | - (wait ? 0 : PIPE_TRANSFER_DONTBLOCK); - unsigned results_base = 0; - void *map; - - if (rquery->b.flushed) - map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage); - else - map = si_buffer_map_sync_with_rings(rctx, qbuf->buf, usage); - - if (!map) - return false; - - while (results_base != qbuf->results_end) { - query->ops->add_result(rscreen, query, map + results_base, - result); - results_base += query->result_size; - } - } - - /* Convert the time to expected units. */ - if (rquery->type == PIPE_QUERY_TIME_ELAPSED || - rquery->type == PIPE_QUERY_TIMESTAMP) { - result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq; - } - return true; -} - -/* Create the compute shader that is used to collect the results. - * - * One compute grid with a single thread is launched for every query result - * buffer. The thread (optionally) reads a previous summary buffer, then - * accumulates data from the query result buffer, and writes the result either - * to a summary buffer to be consumed by the next grid invocation or to the - * user-supplied buffer. - * - * Data layout: - * - * CONST - * 0.x = end_offset - * 0.y = result_stride - * 0.z = result_count - * 0.w = bit field: - * 1: read previously accumulated values - * 2: write accumulated values for chaining - * 4: write result available - * 8: convert result to boolean (0/1) - * 16: only read one dword and use that as result - * 32: apply timestamp conversion - * 64: store full 64 bits result - * 128: store signed 32 bits result - * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs - * 1.x = fence_offset - * 1.y = pair_stride - * 1.z = pair_count - * - * BUFFER[0] = query result buffer - * BUFFER[1] = previous summary buffer - * BUFFER[2] = next summary buffer or user-supplied buffer - */ -static void r600_create_query_result_shader(struct r600_common_context *rctx) -{ - /* TEMP[0].xy = accumulated result so far - * TEMP[0].z = result not available - * - * TEMP[1].x = current result index - * TEMP[1].y = current pair index - */ - static const char text_tmpl[] = - "COMP\n" - "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n" - "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" - "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" - "DCL BUFFER[0]\n" - "DCL BUFFER[1]\n" - "DCL BUFFER[2]\n" - "DCL CONST[0][0..1]\n" - "DCL TEMP[0..5]\n" - "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n" - "IMM[1] UINT32 {1, 2, 4, 8}\n" - "IMM[2] UINT32 {16, 32, 64, 128}\n" - "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */ - "IMM[4] UINT32 {256, 0, 0, 0}\n" - - "AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n" - "UIF TEMP[5]\n" - /* Check result availability. */ - "LOAD TEMP[1].x, BUFFER[0], CONST[0][1].xxxx\n" - "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n" - "MOV TEMP[1], TEMP[0].zzzz\n" - "NOT TEMP[0].z, TEMP[0].zzzz\n" - - /* Load result if available. */ - "UIF TEMP[1]\n" - "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n" - "ENDIF\n" - "ELSE\n" - /* Load previously accumulated result if requested. */ - "MOV TEMP[0], IMM[0].xxxx\n" - "AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n" - "UIF TEMP[4]\n" - "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n" - "ENDIF\n" - - "MOV TEMP[1].x, IMM[0].xxxx\n" - "BGNLOOP\n" - /* Break if accumulated result so far is not available. */ - "UIF TEMP[0].zzzz\n" - "BRK\n" - "ENDIF\n" - - /* Break if result_index >= result_count. */ - "USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n" - "UIF TEMP[5]\n" - "BRK\n" - "ENDIF\n" - - /* Load fence and check result availability */ - "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n" - "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n" - "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n" - "NOT TEMP[0].z, TEMP[0].zzzz\n" - "UIF TEMP[0].zzzz\n" - "BRK\n" - "ENDIF\n" - - "MOV TEMP[1].y, IMM[0].xxxx\n" - "BGNLOOP\n" - /* Load start and end. */ - "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n" - "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n" - "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" - - "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n" - "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n" - - "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n" - - "AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n" - "UIF TEMP[5].zzzz\n" - /* Load second start/end half-pair and - * take the difference - */ - "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n" - "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" - "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n" - - "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n" - "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n" - "ENDIF\n" - - "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n" - - /* Increment pair index */ - "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n" - "USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n" - "UIF TEMP[5]\n" - "BRK\n" - "ENDIF\n" - "ENDLOOP\n" - - /* Increment result index */ - "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n" - "ENDLOOP\n" - "ENDIF\n" - - "AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n" - "UIF TEMP[4]\n" - /* Store accumulated data for chaining. */ - "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n" - "ELSE\n" - "AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n" - "UIF TEMP[4]\n" - /* Store result availability. */ - "NOT TEMP[0].z, TEMP[0]\n" - "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n" - "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n" - - "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n" - "UIF TEMP[4]\n" - "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n" - "ENDIF\n" - "ELSE\n" - /* Store result if it is available. */ - "NOT TEMP[4], TEMP[0].zzzz\n" - "UIF TEMP[4]\n" - /* Apply timestamp conversion */ - "AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n" - "UIF TEMP[4]\n" - "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n" - "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n" - "ENDIF\n" - - /* Convert to boolean */ - "AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n" - "UIF TEMP[4]\n" - "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n" - "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n" - "MOV TEMP[0].y, IMM[0].xxxx\n" - "ENDIF\n" - - "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n" - "UIF TEMP[4]\n" - "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n" - "ELSE\n" - /* Clamping */ - "UIF TEMP[0].yyyy\n" - "MOV TEMP[0].x, IMM[0].wwww\n" - "ENDIF\n" - - "AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n" - "UIF TEMP[4]\n" - "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n" - "ENDIF\n" - - "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n" - "ENDIF\n" - "ENDIF\n" - "ENDIF\n" - "ENDIF\n" - - "END\n"; - - char text[sizeof(text_tmpl) + 32]; - struct tgsi_token tokens[1024]; - struct pipe_compute_state state = {}; - - /* Hard code the frequency into the shader so that the backend can - * use the full range of optimizations for divide-by-constant. - */ - snprintf(text, sizeof(text), text_tmpl, - rctx->screen->info.clock_crystal_freq); - - if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { - assert(false); - return; - } - - state.ir_type = PIPE_SHADER_IR_TGSI; - state.prog = tokens; - - rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state); -} - -static void r600_restore_qbo_state(struct r600_common_context *rctx, - struct r600_qbo_state *st) -{ - rctx->b.bind_compute_state(&rctx->b, st->saved_compute); - - rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); - pipe_resource_reference(&st->saved_const0.buffer, NULL); - - rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); - for (unsigned i = 0; i < 3; ++i) - pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL); -} - -static void r600_query_hw_get_result_resource(struct r600_common_context *rctx, - struct r600_query *rquery, - bool wait, - enum pipe_query_value_type result_type, - int index, - struct pipe_resource *resource, - unsigned offset) -{ - struct r600_query_hw *query = (struct r600_query_hw *)rquery; - struct r600_query_buffer *qbuf; - struct r600_query_buffer *qbuf_prev; - struct pipe_resource *tmp_buffer = NULL; - unsigned tmp_buffer_offset = 0; - struct r600_qbo_state saved_state = {}; - struct pipe_grid_info grid = {}; - struct pipe_constant_buffer constant_buffer = {}; - struct pipe_shader_buffer ssbo[3]; - struct r600_hw_query_params params; - struct { - uint32_t end_offset; - uint32_t result_stride; - uint32_t result_count; - uint32_t config; - uint32_t fence_offset; - uint32_t pair_stride; - uint32_t pair_count; - } consts; - - if (!rctx->query_result_shader) { - r600_create_query_result_shader(rctx); - if (!rctx->query_result_shader) - return; - } - - if (query->buffer.previous) { - u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16, - &tmp_buffer_offset, &tmp_buffer); - if (!tmp_buffer) - return; - } - - rctx->save_qbo_state(&rctx->b, &saved_state); - - r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, ¶ms); - consts.end_offset = params.end_offset - params.start_offset; - consts.fence_offset = params.fence_offset - params.start_offset; - consts.result_stride = query->result_size; - consts.pair_stride = params.pair_stride; - consts.pair_count = params.pair_count; - - constant_buffer.buffer_size = sizeof(consts); - constant_buffer.user_buffer = &consts; - - ssbo[1].buffer = tmp_buffer; - ssbo[1].buffer_offset = tmp_buffer_offset; - ssbo[1].buffer_size = 16; - - ssbo[2] = ssbo[1]; - - rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader); - - grid.block[0] = 1; - grid.block[1] = 1; - grid.block[2] = 1; - grid.grid[0] = 1; - grid.grid[1] = 1; - grid.grid[2] = 1; - - consts.config = 0; - if (index < 0) - consts.config |= 4; - if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || - query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) - consts.config |= 8; - else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || - query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) - consts.config |= 8 | 256; - else if (query->b.type == PIPE_QUERY_TIMESTAMP || - query->b.type == PIPE_QUERY_TIME_ELAPSED) - consts.config |= 32; - - switch (result_type) { - case PIPE_QUERY_TYPE_U64: - case PIPE_QUERY_TYPE_I64: - consts.config |= 64; - break; - case PIPE_QUERY_TYPE_I32: - consts.config |= 128; - break; - case PIPE_QUERY_TYPE_U32: - break; - } - - rctx->flags |= rctx->screen->barrier_flags.cp_to_L2; - - for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) { - if (query->b.type != PIPE_QUERY_TIMESTAMP) { - qbuf_prev = qbuf->previous; - consts.result_count = qbuf->results_end / query->result_size; - consts.config &= ~3; - if (qbuf != &query->buffer) - consts.config |= 1; - if (qbuf->previous) - consts.config |= 2; - } else { - /* Only read the last timestamp. */ - qbuf_prev = NULL; - consts.result_count = 0; - consts.config |= 16; - params.start_offset += qbuf->results_end - query->result_size; - } - - rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer); - - ssbo[0].buffer = &qbuf->buf->b.b; - ssbo[0].buffer_offset = params.start_offset; - ssbo[0].buffer_size = qbuf->results_end - params.start_offset; - - if (!qbuf->previous) { - ssbo[2].buffer = resource; - ssbo[2].buffer_offset = offset; - ssbo[2].buffer_size = 8; - - ((struct r600_resource *)resource)->TC_L2_dirty = true; - } - - rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo); - - if (wait && qbuf == &query->buffer) { - uint64_t va; - - /* Wait for result availability. Wait only for readiness - * of the last entry, since the fence writes should be - * serialized in the CP. - */ - va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size; - va += params.fence_offset; - - si_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000); - } - - rctx->b.launch_grid(&rctx->b, &grid); - rctx->flags |= rctx->screen->barrier_flags.compute_to_L2; - } - - r600_restore_qbo_state(rctx, &saved_state); - pipe_resource_reference(&tmp_buffer, NULL); -} - -static void r600_render_condition(struct pipe_context *ctx, - struct pipe_query *query, - boolean condition, - enum pipe_render_cond_flag mode) -{ - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query_hw *rquery = (struct r600_query_hw *)query; - struct r600_atom *atom = &rctx->render_cond_atom; - - if (query) { - bool needs_workaround = false; - - /* There was a firmware regression in VI which causes successive - * SET_PREDICATION packets to give the wrong answer for - * non-inverted stream overflow predication. - */ - if (((rctx->chip_class == VI && rctx->screen->info.pfp_fw_feature < 49) || - (rctx->chip_class == GFX9 && rctx->screen->info.pfp_fw_feature < 38)) && - !condition && - (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || - (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE && - (rquery->buffer.previous || - rquery->buffer.results_end > rquery->result_size)))) { - needs_workaround = true; - } - - if (needs_workaround && !rquery->workaround_buf) { - bool old_force_off = rctx->render_cond_force_off; - rctx->render_cond_force_off = true; - - u_suballocator_alloc( - rctx->allocator_zeroed_memory, 8, 8, - &rquery->workaround_offset, - (struct pipe_resource **)&rquery->workaround_buf); - - /* Reset to NULL to avoid a redundant SET_PREDICATION - * from launching the compute grid. - */ - rctx->render_cond = NULL; - - ctx->get_query_result_resource( - ctx, query, true, PIPE_QUERY_TYPE_U64, 0, - &rquery->workaround_buf->b.b, rquery->workaround_offset); - - /* Settings this in the render cond atom is too late, - * so set it here. */ - rctx->flags |= rctx->screen->barrier_flags.L2_to_cp | - R600_CONTEXT_FLUSH_FOR_RENDER_COND; - - rctx->render_cond_force_off = old_force_off; - } - } - - rctx->render_cond = query; - rctx->render_cond_invert = condition; - rctx->render_cond_mode = mode; - - rctx->set_atom_dirty(rctx, atom, query != NULL); -} - -void si_suspend_queries(struct r600_common_context *ctx) -{ - struct r600_query_hw *query; - - LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) { - r600_query_hw_emit_stop(ctx, query); - } - assert(ctx->num_cs_dw_queries_suspend == 0); -} - -static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx, - struct list_head *query_list) -{ - struct r600_query_hw *query; - unsigned num_dw = 0; - - LIST_FOR_EACH_ENTRY(query, query_list, list) { - /* begin + end */ - num_dw += query->num_cs_dw_begin + query->num_cs_dw_end; - - /* Workaround for the fact that - * num_cs_dw_nontimer_queries_suspend is incremented for every - * resumed query, which raises the bar in need_cs_space for - * queries about to be resumed. - */ - num_dw += query->num_cs_dw_end; - } - /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */ - num_dw += 13; - - return num_dw; -} - -void si_resume_queries(struct r600_common_context *ctx) -{ - struct r600_query_hw *query; - unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries); - - assert(ctx->num_cs_dw_queries_suspend == 0); - - /* Check CS space here. Resuming must not be interrupted by flushes. */ - ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true); - - LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) { - r600_query_hw_emit_start(ctx, query); - } -} - -#define XFULL(name_, query_type_, type_, result_type_, group_id_) \ - { \ - .name = name_, \ - .query_type = R600_QUERY_##query_type_, \ - .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ - .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ - .group_id = group_id_ \ - } - -#define X(name_, query_type_, type_, result_type_) \ - XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0) - -#define XG(group_, name_, query_type_, type_, result_type_) \ - XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_) - -static struct pipe_driver_query_info r600_driver_query_list[] = { - X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE), - X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE), - X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE), - X("draw-calls", DRAW_CALLS, UINT64, AVERAGE), - X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE), - X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE), - X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE), - X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE), - X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE), - X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE), - X("dma-calls", DMA_CALLS, UINT64, AVERAGE), - X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE), - X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE), - X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE), - X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE), - X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE), - X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE), - X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE), - X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE), - X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE), - X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE), - X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE), - X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE), - X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE), - X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE), - X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), - X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), - X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE), - X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE), - X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), - X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE), - X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE), - X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE), - X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE), - X("GFX-IB-size", GFX_IB_SIZE, UINT64, AVERAGE), - X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), - X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE), - X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE), - X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), - X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE), - X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), - X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE), - - /* GPIN queries are for the benefit of old versions of GPUPerfStudio, - * which use it as a fallback path to detect the GPU type. - * - * Note: The names of these queries are significant for GPUPerfStudio - * (and possibly their order as well). */ - XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE), - XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE), - XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE), - XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE), - XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE), - - X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE), - X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE), - X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE), - - /* The following queries must be at the end of the list because their - * availability is adjusted dynamically based on the DRM version. */ - X("GPU-load", GPU_LOAD, UINT64, AVERAGE), - X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE), - X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE), - X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE), - X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE), - X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE), - X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE), - X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE), - X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE), - X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE), - X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE), - X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE), - X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE), - X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE), - X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE), - X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE), - X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE), - X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE), - X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE), - X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE), - X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE), -}; - -#undef X -#undef XG -#undef XFULL - -static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) -{ - if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) - return ARRAY_SIZE(r600_driver_query_list); - else if (rscreen->info.drm_major == 3) { - if (rscreen->chip_class >= VI) - return ARRAY_SIZE(r600_driver_query_list); - else - return ARRAY_SIZE(r600_driver_query_list) - 7; - } - else - return ARRAY_SIZE(r600_driver_query_list) - 25; -} - -static int r600_get_driver_query_info(struct pipe_screen *screen, - unsigned index, - struct pipe_driver_query_info *info) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - unsigned num_queries = r600_get_num_queries(rscreen); - - if (!info) { - unsigned num_perfcounters = - si_get_perfcounter_info(rscreen, 0, NULL); - - return num_queries + num_perfcounters; - } - - if (index >= num_queries) - return si_get_perfcounter_info(rscreen, index - num_queries, info); - - *info = r600_driver_query_list[index]; - - switch (info->query_type) { - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_MAPPED_VRAM: - info->max_value.u64 = rscreen->info.vram_size; - break; - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_MAPPED_GTT: - info->max_value.u64 = rscreen->info.gart_size; - break; - case R600_QUERY_GPU_TEMPERATURE: - info->max_value.u64 = 125; - break; - case R600_QUERY_VRAM_VIS_USAGE: - info->max_value.u64 = rscreen->info.vram_vis_size; - break; - } - - if (info->group_id != ~(unsigned)0 && rscreen->perfcounters) - info->group_id += rscreen->perfcounters->num_groups; - - return 1; -} - -/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware - * performance counter groups, so be careful when changing this and related - * functions. - */ -static int r600_get_driver_query_group_info(struct pipe_screen *screen, - unsigned index, - struct pipe_driver_query_group_info *info) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; - unsigned num_pc_groups = 0; - - if (rscreen->perfcounters) - num_pc_groups = rscreen->perfcounters->num_groups; - - if (!info) - return num_pc_groups + R600_NUM_SW_QUERY_GROUPS; - - if (index < num_pc_groups) - return si_get_perfcounter_group_info(rscreen, index, info); - - index -= num_pc_groups; - if (index >= R600_NUM_SW_QUERY_GROUPS) - return 0; - - info->name = "GPIN"; - info->max_active_queries = 5; - info->num_queries = 5; - return 1; -} - -void si_init_query_functions(struct r600_common_context *rctx) -{ - rctx->b.create_query = r600_create_query; - rctx->b.create_batch_query = si_create_batch_query; - rctx->b.destroy_query = r600_destroy_query; - rctx->b.begin_query = r600_begin_query; - rctx->b.end_query = r600_end_query; - rctx->b.get_query_result = r600_get_query_result; - rctx->b.get_query_result_resource = r600_get_query_result_resource; - rctx->render_cond_atom.emit = r600_emit_query_predication; - - if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0) - rctx->b.render_condition = r600_render_condition; - - LIST_INITHEAD(&rctx->active_queries); -} - -void si_init_screen_query_functions(struct r600_common_screen *rscreen) -{ - rscreen->b.get_driver_query_info = r600_get_driver_query_info; - rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info; -} diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_query.h b/lib/mesa/src/gallium/drivers/radeon/r600_query.h deleted file mode 100644 index 04943da36..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_query.h +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Nicolai Hähnle <nicolai.haehnle@amd.com> - * - */ - -#ifndef R600_QUERY_H -#define R600_QUERY_H - -#include "util/u_threaded_context.h" - -struct pipe_context; -struct pipe_query; -struct pipe_resource; - -struct r600_common_context; -struct r600_common_screen; -struct r600_query; -struct r600_query_hw; -struct r600_resource; - -enum { - R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, - R600_QUERY_DECOMPRESS_CALLS, - R600_QUERY_MRT_DRAW_CALLS, - R600_QUERY_PRIM_RESTART_CALLS, - R600_QUERY_SPILL_DRAW_CALLS, - R600_QUERY_COMPUTE_CALLS, - R600_QUERY_SPILL_COMPUTE_CALLS, - R600_QUERY_DMA_CALLS, - R600_QUERY_CP_DMA_CALLS, - R600_QUERY_NUM_VS_FLUSHES, - R600_QUERY_NUM_PS_FLUSHES, - R600_QUERY_NUM_CS_FLUSHES, - R600_QUERY_NUM_CB_CACHE_FLUSHES, - R600_QUERY_NUM_DB_CACHE_FLUSHES, - R600_QUERY_NUM_L2_INVALIDATES, - R600_QUERY_NUM_L2_WRITEBACKS, - R600_QUERY_NUM_RESIDENT_HANDLES, - R600_QUERY_TC_OFFLOADED_SLOTS, - R600_QUERY_TC_DIRECT_SLOTS, - R600_QUERY_TC_NUM_SYNCS, - R600_QUERY_CS_THREAD_BUSY, - R600_QUERY_GALLIUM_THREAD_BUSY, - R600_QUERY_REQUESTED_VRAM, - R600_QUERY_REQUESTED_GTT, - R600_QUERY_MAPPED_VRAM, - R600_QUERY_MAPPED_GTT, - R600_QUERY_BUFFER_WAIT_TIME, - R600_QUERY_NUM_MAPPED_BUFFERS, - R600_QUERY_NUM_GFX_IBS, - R600_QUERY_NUM_SDMA_IBS, - R600_QUERY_GFX_BO_LIST_SIZE, - R600_QUERY_GFX_IB_SIZE, - R600_QUERY_NUM_BYTES_MOVED, - R600_QUERY_NUM_EVICTIONS, - R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS, - R600_QUERY_VRAM_USAGE, - R600_QUERY_VRAM_VIS_USAGE, - R600_QUERY_GTT_USAGE, - R600_QUERY_GPU_TEMPERATURE, - R600_QUERY_CURRENT_GPU_SCLK, - R600_QUERY_CURRENT_GPU_MCLK, - R600_QUERY_GPU_LOAD, - R600_QUERY_GPU_SHADERS_BUSY, - R600_QUERY_GPU_TA_BUSY, - R600_QUERY_GPU_GDS_BUSY, - R600_QUERY_GPU_VGT_BUSY, - R600_QUERY_GPU_IA_BUSY, - R600_QUERY_GPU_SX_BUSY, - R600_QUERY_GPU_WD_BUSY, - R600_QUERY_GPU_BCI_BUSY, - R600_QUERY_GPU_SC_BUSY, - R600_QUERY_GPU_PA_BUSY, - R600_QUERY_GPU_DB_BUSY, - R600_QUERY_GPU_CP_BUSY, - R600_QUERY_GPU_CB_BUSY, - R600_QUERY_GPU_SDMA_BUSY, - R600_QUERY_GPU_PFP_BUSY, - R600_QUERY_GPU_MEQ_BUSY, - R600_QUERY_GPU_ME_BUSY, - R600_QUERY_GPU_SURF_SYNC_BUSY, - R600_QUERY_GPU_CP_DMA_BUSY, - R600_QUERY_GPU_SCRATCH_RAM_BUSY, - R600_QUERY_NUM_COMPILATIONS, - R600_QUERY_NUM_SHADERS_CREATED, - R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO, - R600_QUERY_NUM_SHADER_CACHE_HITS, - R600_QUERY_GPIN_ASIC_ID, - R600_QUERY_GPIN_NUM_SIMD, - R600_QUERY_GPIN_NUM_RB, - R600_QUERY_GPIN_NUM_SPI, - R600_QUERY_GPIN_NUM_SE, - - R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100, -}; - -enum { - R600_QUERY_GROUP_GPIN = 0, - R600_NUM_SW_QUERY_GROUPS -}; - -struct r600_query_ops { - void (*destroy)(struct r600_common_screen *, struct r600_query *); - bool (*begin)(struct r600_common_context *, struct r600_query *); - bool (*end)(struct r600_common_context *, struct r600_query *); - bool (*get_result)(struct r600_common_context *, - struct r600_query *, bool wait, - union pipe_query_result *result); - void (*get_result_resource)(struct r600_common_context *, - struct r600_query *, bool wait, - enum pipe_query_value_type result_type, - int index, - struct pipe_resource *resource, - unsigned offset); -}; - -struct r600_query { - struct threaded_query b; - struct r600_query_ops *ops; - - /* The type of query */ - unsigned type; -}; - -enum { - R600_QUERY_HW_FLAG_NO_START = (1 << 0), - /* gap */ - /* whether begin_query doesn't clear the result */ - R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), -}; - -struct r600_query_hw_ops { - bool (*prepare_buffer)(struct r600_common_screen *, - struct r600_query_hw *, - struct r600_resource *); - void (*emit_start)(struct r600_common_context *, - struct r600_query_hw *, - struct r600_resource *buffer, uint64_t va); - void (*emit_stop)(struct r600_common_context *, - struct r600_query_hw *, - struct r600_resource *buffer, uint64_t va); - void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); - void (*add_result)(struct r600_common_screen *screen, - struct r600_query_hw *, void *buffer, - union pipe_query_result *result); -}; - -struct r600_query_buffer { - /* The buffer where query results are stored. */ - struct r600_resource *buf; - /* Offset of the next free result after current query data */ - unsigned results_end; - /* If a query buffer is full, a new buffer is created and the old one - * is put in here. When we calculate the result, we sum up the samples - * from all buffers. */ - struct r600_query_buffer *previous; -}; - -struct r600_query_hw { - struct r600_query b; - struct r600_query_hw_ops *ops; - unsigned flags; - - /* The query buffer and how many results are in it. */ - struct r600_query_buffer buffer; - /* Size of the result in memory for both begin_query and end_query, - * this can be one or two numbers, or it could even be a size of a structure. */ - unsigned result_size; - /* The number of dwords for begin_query or end_query. */ - unsigned num_cs_dw_begin; - unsigned num_cs_dw_end; - /* Linked list of queries */ - struct list_head list; - /* For transform feedback: which stream the query is for */ - unsigned stream; - - /* Workaround via compute shader */ - struct r600_resource *workaround_buf; - unsigned workaround_offset; -}; - -bool si_query_hw_init(struct r600_common_screen *rscreen, - struct r600_query_hw *query); -void si_query_hw_destroy(struct r600_common_screen *rscreen, - struct r600_query *rquery); -bool si_query_hw_begin(struct r600_common_context *rctx, - struct r600_query *rquery); -bool si_query_hw_end(struct r600_common_context *rctx, - struct r600_query *rquery); -bool si_query_hw_get_result(struct r600_common_context *rctx, - struct r600_query *rquery, - bool wait, - union pipe_query_result *result); - -/* Performance counters */ -enum { - /* This block is part of the shader engine */ - R600_PC_BLOCK_SE = (1 << 0), - - /* Expose per-instance groups instead of summing all instances (within - * an SE). */ - R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), - - /* Expose per-SE groups instead of summing instances across SEs. */ - R600_PC_BLOCK_SE_GROUPS = (1 << 2), - - /* Shader block */ - R600_PC_BLOCK_SHADER = (1 << 3), - - /* Non-shader block with perfcounters windowed by shaders. */ - R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), -}; - -/* Describes a hardware block with performance counters. Multiple instances of - * each block, possibly per-SE, may exist on the chip. Depending on the block - * and on the user's configuration, we either - * (a) expose every instance as a performance counter group, - * (b) expose a single performance counter group that reports the sum over all - * instances, or - * (c) expose one performance counter group per instance, but summed over all - * shader engines. - */ -struct r600_perfcounter_block { - const char *basename; - unsigned flags; - unsigned num_counters; - unsigned num_selectors; - unsigned num_instances; - - unsigned num_groups; - char *group_names; - unsigned group_name_stride; - - char *selector_names; - unsigned selector_name_stride; - - void *data; -}; - -struct r600_perfcounters { - unsigned num_groups; - unsigned num_blocks; - struct r600_perfcounter_block *blocks; - - unsigned num_start_cs_dwords; - unsigned num_stop_cs_dwords; - unsigned num_instance_cs_dwords; - unsigned num_shaders_cs_dwords; - - unsigned num_shader_types; - const char * const *shader_type_suffixes; - const unsigned *shader_type_bits; - - void (*get_size)(struct r600_perfcounter_block *, - unsigned count, unsigned *selectors, - unsigned *num_select_dw, unsigned *num_read_dw); - - void (*emit_instance)(struct r600_common_context *, - int se, int instance); - void (*emit_shaders)(struct r600_common_context *, unsigned shaders); - void (*emit_select)(struct r600_common_context *, - struct r600_perfcounter_block *, - unsigned count, unsigned *selectors); - void (*emit_start)(struct r600_common_context *, - struct r600_resource *buffer, uint64_t va); - void (*emit_stop)(struct r600_common_context *, - struct r600_resource *buffer, uint64_t va); - void (*emit_read)(struct r600_common_context *, - struct r600_perfcounter_block *, - unsigned count, unsigned *selectors, - struct r600_resource *buffer, uint64_t va); - - void (*cleanup)(struct r600_common_screen *); - - bool separate_se; - bool separate_instance; -}; - -struct pipe_query *si_create_batch_query(struct pipe_context *ctx, - unsigned num_queries, - unsigned *query_types); - -int si_get_perfcounter_info(struct r600_common_screen *, - unsigned index, - struct pipe_driver_query_info *info); -int si_get_perfcounter_group_info(struct r600_common_screen *, - unsigned index, - struct pipe_driver_query_group_info *info); - -bool si_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); -void si_perfcounters_add_block(struct r600_common_screen *, - struct r600_perfcounters *, - const char *name, unsigned flags, - unsigned counters, unsigned selectors, - unsigned instances, void *data); -void si_perfcounters_do_destroy(struct r600_perfcounters *); -void si_query_hw_reset_buffers(struct r600_common_context *rctx, - struct r600_query_hw *query); - -struct r600_qbo_state { - void *saved_compute; - struct pipe_constant_buffer saved_const0; - struct pipe_shader_buffer saved_ssbo[3]; -}; - -#endif /* R600_QUERY_H */ diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c b/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c deleted file mode 100644 index f7002bc39..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* This file implements randomized SDMA texture blit tests. */ - -#include "r600_pipe_common.h" -#include "util/u_surface.h" -#include "util/rand_xor.h" - -static uint64_t seed_xorshift128plus[2]; - -#define RAND_NUM_SIZE 8 - -/* The GPU blits are emulated on the CPU using these CPU textures. */ - -struct cpu_texture { - uint8_t *ptr; - uint64_t size; - uint64_t layer_stride; - unsigned stride; -}; - -static void alloc_cpu_texture(struct cpu_texture *tex, - struct pipe_resource *templ, int bpp) -{ - tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE); - tex->layer_stride = (uint64_t)tex->stride * templ->height0; - tex->size = tex->layer_stride * templ->array_size; - tex->ptr = malloc(tex->size); - assert(tex->ptr); -} - -static void set_random_pixels(struct pipe_context *ctx, - struct pipe_resource *tex, - struct cpu_texture *cpu) -{ - struct pipe_transfer *t; - uint8_t *map; - int x,y,z; - - map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE, - 0, 0, 0, tex->width0, tex->height0, - tex->array_size, &t); - assert(map); - - for (z = 0; z < tex->array_size; z++) { - for (y = 0; y < tex->height0; y++) { - uint64_t *ptr = (uint64_t*) - (map + t->layer_stride*z + t->stride*y); - uint64_t *ptr_cpu = (uint64_t*) - (cpu->ptr + cpu->layer_stride*z + cpu->stride*y); - unsigned size = cpu->stride / RAND_NUM_SIZE; - - assert(t->stride % RAND_NUM_SIZE == 0); - assert(cpu->stride % RAND_NUM_SIZE == 0); - - for (x = 0; x < size; x++) { - *ptr++ = *ptr_cpu++ = - rand_xorshift128plus(seed_xorshift128plus); - } - } - } - - pipe_transfer_unmap(ctx, t); -} - -static bool compare_textures(struct pipe_context *ctx, - struct pipe_resource *tex, - struct cpu_texture *cpu, int bpp) -{ - struct pipe_transfer *t; - uint8_t *map; - int y,z; - bool pass = true; - - map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ, - 0, 0, 0, tex->width0, tex->height0, - tex->array_size, &t); - assert(map); - - for (z = 0; z < tex->array_size; z++) { - for (y = 0; y < tex->height0; y++) { - uint8_t *ptr = map + t->layer_stride*z + t->stride*y; - uint8_t *cpu_ptr = cpu->ptr + - cpu->layer_stride*z + cpu->stride*y; - - if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) { - pass = false; - goto done; - } - } - } -done: - pipe_transfer_unmap(ctx, t); - return pass; -} - -static enum pipe_format get_format_from_bpp(int bpp) -{ - switch (bpp) { - case 1: - return PIPE_FORMAT_R8_UINT; - case 2: - return PIPE_FORMAT_R16_UINT; - case 4: - return PIPE_FORMAT_R32_UINT; - case 8: - return PIPE_FORMAT_R32G32_UINT; - case 16: - return PIPE_FORMAT_R32G32B32A32_UINT; - default: - assert(0); - return PIPE_FORMAT_NONE; - } -} - -static const char *array_mode_to_string(struct r600_common_screen *rscreen, - struct radeon_surf *surf) -{ - if (rscreen->chip_class >= GFX9) { - /* TODO */ - return " UNKNOWN"; - } else { - switch (surf->u.legacy.level[0].mode) { - case RADEON_SURF_MODE_LINEAR_ALIGNED: - return "LINEAR_ALIGNED"; - case RADEON_SURF_MODE_1D: - return "1D_TILED_THIN1"; - case RADEON_SURF_MODE_2D: - return "2D_TILED_THIN1"; - default: - assert(0); - return " UNKNOWN"; - } - } -} - -static unsigned generate_max_tex_side(unsigned max_tex_side) -{ - switch (rand() % 4) { - case 0: - /* Try to hit large sizes in 1/4 of the cases. */ - return max_tex_side; - case 1: - /* Try to hit 1D tiling in 1/4 of the cases. */ - return 128; - default: - /* Try to hit common sizes in 2/4 of the cases. */ - return 2048; - } -} - -void si_test_dma(struct r600_common_screen *rscreen) -{ - struct pipe_screen *screen = &rscreen->b; - struct pipe_context *ctx = screen->context_create(screen, NULL, 0); - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - uint64_t max_alloc_size; - unsigned i, iterations, num_partial_copies, max_levels, max_tex_side; - unsigned num_pass = 0, num_fail = 0; - - max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); - max_tex_side = 1 << (max_levels - 1); - - /* Max 128 MB allowed for both textures. */ - max_alloc_size = 128 * 1024 * 1024; - - /* the seed for random test parameters */ - srand(0x9b47d95b); - /* the seed for random pixel data */ - s_rand_xorshift128plus(seed_xorshift128plus, false); - - iterations = 1000000000; /* just kill it when you are bored */ - num_partial_copies = 30; - - /* These parameters are randomly generated per test: - * - whether to do one whole-surface copy or N partial copies per test - * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D) - * - which texture dimensions to use - * - whether to use VRAM (all tiling modes) and GTT (staging, linear - * only) allocations - * - random initial pixels in src - * - generate random subrectangle copies for partial blits - */ - for (i = 0; i < iterations; i++) { - struct pipe_resource tsrc = {}, tdst = {}, *src, *dst; - struct r600_texture *rdst; - struct r600_texture *rsrc; - struct cpu_texture src_cpu, dst_cpu; - unsigned bpp, max_width, max_height, max_depth, j, num; - unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen; - unsigned max_tex_layers; - bool pass; - bool do_partial_copies = rand() & 1; - - /* generate a random test case */ - tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY; - tsrc.depth0 = tdst.depth0 = 1; - - bpp = 1 << (rand() % 5); - tsrc.format = tdst.format = get_format_from_bpp(bpp); - - max_tex_side_gen = generate_max_tex_side(max_tex_side); - max_tex_layers = rand() % 4 ? 1 : 5; - - tsrc.width0 = (rand() % max_tex_side_gen) + 1; - tsrc.height0 = (rand() % max_tex_side_gen) + 1; - tsrc.array_size = (rand() % max_tex_layers) + 1; - - /* Have a 1/4 chance of getting power-of-two dimensions. */ - if (rand() % 4 == 0) { - tsrc.width0 = util_next_power_of_two(tsrc.width0); - tsrc.height0 = util_next_power_of_two(tsrc.height0); - } - - if (!do_partial_copies) { - /* whole-surface copies only, same dimensions */ - tdst = tsrc; - } else { - max_tex_side_gen = generate_max_tex_side(max_tex_side); - max_tex_layers = rand() % 4 ? 1 : 5; - - /* many partial copies, dimensions can be different */ - tdst.width0 = (rand() % max_tex_side_gen) + 1; - tdst.height0 = (rand() % max_tex_side_gen) + 1; - tdst.array_size = (rand() % max_tex_layers) + 1; - - /* Have a 1/4 chance of getting power-of-two dimensions. */ - if (rand() % 4 == 0) { - tdst.width0 = util_next_power_of_two(tdst.width0); - tdst.height0 = util_next_power_of_two(tdst.height0); - } - } - - /* check texture sizes */ - if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp + - (uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp > - max_alloc_size) { - /* too large, try again */ - i--; - continue; - } - - /* VRAM + the tiling mode depends on dimensions (3/4 of cases), - * or GTT + linear only (1/4 of cases) - */ - tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING; - tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING; - - /* Allocate textures (both the GPU and CPU copies). - * The CPU will emulate what the GPU should be doing. - */ - src = screen->resource_create(screen, &tsrc); - dst = screen->resource_create(screen, &tdst); - assert(src); - assert(dst); - rdst = (struct r600_texture*)dst; - rsrc = (struct r600_texture*)src; - alloc_cpu_texture(&src_cpu, &tsrc, bpp); - alloc_cpu_texture(&dst_cpu, &tdst, bpp); - - printf("%4u: dst = (%5u x %5u x %u, %s), " - " src = (%5u x %5u x %u, %s), bpp = %2u, ", - i, tdst.width0, tdst.height0, tdst.array_size, - array_mode_to_string(rscreen, &rdst->surface), - tsrc.width0, tsrc.height0, tsrc.array_size, - array_mode_to_string(rscreen, &rsrc->surface), bpp); - fflush(stdout); - - /* set src pixels */ - set_random_pixels(ctx, src, &src_cpu); - - /* clear dst pixels */ - rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true); - memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size); - - /* preparation */ - max_width = MIN2(tsrc.width0, tdst.width0); - max_height = MIN2(tsrc.height0, tdst.height0); - max_depth = MIN2(tsrc.array_size, tdst.array_size); - - num = do_partial_copies ? num_partial_copies : 1; - for (j = 0; j < num; j++) { - int width, height, depth; - int srcx, srcy, srcz, dstx, dsty, dstz; - struct pipe_box box; - unsigned old_num_draw_calls = rctx->num_draw_calls; - unsigned old_num_dma_calls = rctx->num_dma_calls; - - if (!do_partial_copies) { - /* copy whole src to dst */ - width = max_width; - height = max_height; - depth = max_depth; - - srcx = srcy = srcz = dstx = dsty = dstz = 0; - } else { - /* random sub-rectangle copies from src to dst */ - depth = (rand() % max_depth) + 1; - srcz = rand() % (tsrc.array_size - depth + 1); - dstz = rand() % (tdst.array_size - depth + 1); - - /* special code path to hit the tiled partial copies */ - if (!rsrc->surface.is_linear && - !rdst->surface.is_linear && - rand() & 1) { - if (max_width < 8 || max_height < 8) - continue; - width = ((rand() % (max_width / 8)) + 1) * 8; - height = ((rand() % (max_height / 8)) + 1) * 8; - - srcx = rand() % (tsrc.width0 - width + 1) & ~0x7; - srcy = rand() % (tsrc.height0 - height + 1) & ~0x7; - - dstx = rand() % (tdst.width0 - width + 1) & ~0x7; - dsty = rand() % (tdst.height0 - height + 1) & ~0x7; - } else { - /* just make sure that it doesn't divide by zero */ - assert(max_width > 0 && max_height > 0); - - width = (rand() % max_width) + 1; - height = (rand() % max_height) + 1; - - srcx = rand() % (tsrc.width0 - width + 1); - srcy = rand() % (tsrc.height0 - height + 1); - - dstx = rand() % (tdst.width0 - width + 1); - dsty = rand() % (tdst.height0 - height + 1); - } - - /* special code path to hit out-of-bounds reads in L2T */ - if (rsrc->surface.is_linear && - !rdst->surface.is_linear && - rand() % 4 == 0) { - srcx = 0; - srcy = 0; - srcz = 0; - } - } - - /* GPU copy */ - u_box_3d(srcx, srcy, srcz, width, height, depth, &box); - rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box); - - /* See which engine was used. */ - gfx_blits += rctx->num_draw_calls > old_num_draw_calls; - dma_blits += rctx->num_dma_calls > old_num_dma_calls; - - /* CPU copy */ - util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride, - dst_cpu.layer_stride, - dstx, dsty, dstz, width, height, depth, - src_cpu.ptr, src_cpu.stride, - src_cpu.layer_stride, - srcx, srcy, srcz); - } - - pass = compare_textures(ctx, dst, &dst_cpu, bpp); - if (pass) - num_pass++; - else - num_fail++; - - printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n", - gfx_blits, dma_blits, pass ? "pass" : "fail", - num_pass, num_pass+num_fail); - - /* cleanup */ - pipe_resource_reference(&src, NULL); - pipe_resource_reference(&dst, NULL); - free(src_cpu.ptr); - free(dst_cpu.ptr); - } - - ctx->destroy(ctx); - exit(0); -} diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_texture.c b/lib/mesa/src/gallium/drivers/radeon/r600_texture.c deleted file mode 100644 index 3d623c251..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/r600_texture.c +++ /dev/null @@ -1,2933 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson - */ -#include "r600_pipe_common.h" -#include "r600_cs.h" -#include "r600_query.h" -#include "util/u_format.h" -#include "util/u_log.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" -#include "util/u_surface.h" -#include "os/os_time.h" -#include <errno.h> -#include <inttypes.h> -#include "state_tracker/drm_driver.h" -#include "amd/common/sid.h" - -static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, - struct r600_texture *rtex); -static enum radeon_surf_mode -r600_choose_tiling(struct r600_common_screen *rscreen, - const struct pipe_resource *templ); - - -bool si_prepare_for_dma_blit(struct r600_common_context *rctx, - struct r600_texture *rdst, - unsigned dst_level, unsigned dstx, - unsigned dsty, unsigned dstz, - struct r600_texture *rsrc, - unsigned src_level, - const struct pipe_box *src_box) -{ - if (!rctx->dma.cs) - return false; - - if (rdst->surface.bpe != rsrc->surface.bpe) - return false; - - /* MSAA: Blits don't exist in the real world. */ - if (rsrc->resource.b.b.nr_samples > 1 || - rdst->resource.b.b.nr_samples > 1) - return false; - - /* Depth-stencil surfaces: - * When dst is linear, the DB->CB copy preserves HTILE. - * When dst is tiled, the 3D path must be used to update HTILE. - */ - if (rsrc->is_depth || rdst->is_depth) - return false; - - /* DCC as: - * src: Use the 3D path. DCC decompression is expensive. - * dst: Use the 3D path to compress the pixels with DCC. - */ - if (vi_dcc_enabled(rsrc, src_level) || - vi_dcc_enabled(rdst, dst_level)) - return false; - - /* CMASK as: - * src: Both texture and SDMA paths need decompression. Use SDMA. - * dst: If overwriting the whole texture, discard CMASK and use - * SDMA. Otherwise, use the 3D path. - */ - if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) { - /* The CMASK clear is only enabled for the first level. */ - assert(dst_level == 0); - if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level, - dstx, dsty, dstz, src_box->width, - src_box->height, src_box->depth)) - return false; - - r600_texture_discard_cmask(rctx->screen, rdst); - } - - /* All requirements are met. Prepare textures for SDMA. */ - if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level)) - rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b); - - assert(!(rsrc->dirty_level_mask & (1 << src_level))); - assert(!(rdst->dirty_level_mask & (1 << dst_level))); - - return true; -} - -/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */ -static void r600_copy_region_with_blit(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) -{ - struct pipe_blit_info blit; - - memset(&blit, 0, sizeof(blit)); - blit.src.resource = src; - blit.src.format = src->format; - blit.src.level = src_level; - blit.src.box = *src_box; - blit.dst.resource = dst; - blit.dst.format = dst->format; - blit.dst.level = dst_level; - blit.dst.box.x = dstx; - blit.dst.box.y = dsty; - blit.dst.box.z = dstz; - blit.dst.box.width = src_box->width; - blit.dst.box.height = src_box->height; - blit.dst.box.depth = src_box->depth; - blit.mask = util_format_get_mask(src->format) & - util_format_get_mask(dst->format); - blit.filter = PIPE_TEX_FILTER_NEAREST; - - if (blit.mask) { - pipe->blit(pipe, &blit); - } -} - -/* Copy from a full GPU texture to a transfer's staging one. */ -static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; - struct pipe_resource *dst = &rtransfer->staging->b.b; - struct pipe_resource *src = transfer->resource; - - if (src->nr_samples > 1) { - r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, - src, transfer->level, &transfer->box); - return; - } - - rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, - &transfer->box); -} - -/* Copy from a transfer's staging texture to a full GPU one. */ -static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; - struct pipe_resource *dst = transfer->resource; - struct pipe_resource *src = &rtransfer->staging->b.b; - struct pipe_box sbox; - - u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox); - - if (dst->nr_samples > 1) { - r600_copy_region_with_blit(ctx, dst, transfer->level, - transfer->box.x, transfer->box.y, transfer->box.z, - src, 0, &sbox); - return; - } - - rctx->dma_copy(ctx, dst, transfer->level, - transfer->box.x, transfer->box.y, transfer->box.z, - src, 0, &sbox); -} - -static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen, - struct r600_texture *rtex, unsigned level, - const struct pipe_box *box, - unsigned *stride, - unsigned *layer_stride) -{ - if (rscreen->chip_class >= GFX9) { - *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe; - *layer_stride = rtex->surface.u.gfx9.surf_slice_size; - - if (!box) - return 0; - - /* Each texture is an array of slices. Each slice is an array - * of mipmap levels. */ - return box->z * rtex->surface.u.gfx9.surf_slice_size + - rtex->surface.u.gfx9.offset[level] + - (box->y / rtex->surface.blk_h * - rtex->surface.u.gfx9.surf_pitch + - box->x / rtex->surface.blk_w) * rtex->surface.bpe; - } else { - *stride = rtex->surface.u.legacy.level[level].nblk_x * - rtex->surface.bpe; - *layer_stride = rtex->surface.u.legacy.level[level].slice_size; - - if (!box) - return rtex->surface.u.legacy.level[level].offset; - - /* Each texture is an array of mipmap levels. Each level is - * an array of slices. */ - return rtex->surface.u.legacy.level[level].offset + - box->z * rtex->surface.u.legacy.level[level].slice_size + - (box->y / rtex->surface.blk_h * - rtex->surface.u.legacy.level[level].nblk_x + - box->x / rtex->surface.blk_w) * rtex->surface.bpe; - } -} - -static int r600_init_surface(struct r600_common_screen *rscreen, - struct radeon_surf *surface, - const struct pipe_resource *ptex, - enum radeon_surf_mode array_mode, - unsigned pitch_in_bytes_override, - unsigned offset, - bool is_imported, - bool is_scanout, - bool is_flushed_depth, - bool tc_compatible_htile) -{ - const struct util_format_description *desc = - util_format_description(ptex->format); - bool is_depth, is_stencil; - int r; - unsigned i, bpe, flags = 0; - - is_depth = util_format_has_depth(desc); - is_stencil = util_format_has_stencil(desc); - - if (!is_flushed_depth && - ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { - bpe = 4; /* stencil is allocated separately on evergreen */ - } else { - bpe = util_format_get_blocksize(ptex->format); - assert(util_is_power_of_two(bpe)); - } - - if (!is_flushed_depth && is_depth) { - flags |= RADEON_SURF_ZBUFFER; - - if (tc_compatible_htile && - (rscreen->chip_class >= GFX9 || - array_mode == RADEON_SURF_MODE_2D)) { - /* TC-compatible HTILE only supports Z32_FLOAT. - * GFX9 also supports Z16_UNORM. - * On VI, promote Z16 to Z32. DB->CB copies will convert - * the format for transfers. - */ - if (rscreen->chip_class == VI) - bpe = 4; - - flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; - } - - if (is_stencil) - flags |= RADEON_SURF_SBUFFER; - } - - if (rscreen->chip_class >= VI && - (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC || - ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT)) - flags |= RADEON_SURF_DISABLE_DCC; - - if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) { - /* This should catch bugs in gallium users setting incorrect flags. */ - assert(ptex->nr_samples <= 1 && - ptex->array_size == 1 && - ptex->depth0 == 1 && - ptex->last_level == 0 && - !(flags & RADEON_SURF_Z_OR_SBUFFER)); - - flags |= RADEON_SURF_SCANOUT; - } - - if (ptex->bind & PIPE_BIND_SHARED) - flags |= RADEON_SURF_SHAREABLE; - if (is_imported) - flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE; - if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING)) - flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; - - r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe, - array_mode, surface); - if (r) { - return r; - } - - unsigned pitch = pitch_in_bytes_override / bpe; - - if (rscreen->chip_class >= GFX9) { - if (pitch) { - surface->u.gfx9.surf_pitch = pitch; - surface->u.gfx9.surf_slice_size = - (uint64_t)pitch * surface->u.gfx9.surf_height * bpe; - } - surface->u.gfx9.surf_offset = offset; - } else { - if (pitch) { - surface->u.legacy.level[0].nblk_x = pitch; - surface->u.legacy.level[0].slice_size = - ((uint64_t)pitch * surface->u.legacy.level[0].nblk_y * bpe); - } - - if (offset) { - for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i) - surface->u.legacy.level[i].offset += offset; - } - } - return 0; -} - -static void r600_texture_init_metadata(struct r600_common_screen *rscreen, - struct r600_texture *rtex, - struct radeon_bo_metadata *metadata) -{ - struct radeon_surf *surface = &rtex->surface; - - memset(metadata, 0, sizeof(*metadata)); - - if (rscreen->chip_class >= GFX9) { - metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode; - } else { - metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? - RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; - metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? - RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; - metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; - metadata->u.legacy.bankw = surface->u.legacy.bankw; - metadata->u.legacy.bankh = surface->u.legacy.bankh; - metadata->u.legacy.tile_split = surface->u.legacy.tile_split; - metadata->u.legacy.mtilea = surface->u.legacy.mtilea; - metadata->u.legacy.num_banks = surface->u.legacy.num_banks; - metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; - metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; - } -} - -static void r600_surface_import_metadata(struct r600_common_screen *rscreen, - struct radeon_surf *surf, - struct radeon_bo_metadata *metadata, - enum radeon_surf_mode *array_mode, - bool *is_scanout) -{ - if (rscreen->chip_class >= GFX9) { - if (metadata->u.gfx9.swizzle_mode > 0) - *array_mode = RADEON_SURF_MODE_2D; - else - *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - - *is_scanout = metadata->u.gfx9.swizzle_mode == 0 || - metadata->u.gfx9.swizzle_mode % 4 == 2; - - surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode; - } else { - surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; - surf->u.legacy.bankw = metadata->u.legacy.bankw; - surf->u.legacy.bankh = metadata->u.legacy.bankh; - surf->u.legacy.tile_split = metadata->u.legacy.tile_split; - surf->u.legacy.mtilea = metadata->u.legacy.mtilea; - surf->u.legacy.num_banks = metadata->u.legacy.num_banks; - - if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED) - *array_mode = RADEON_SURF_MODE_2D; - else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED) - *array_mode = RADEON_SURF_MODE_1D; - else - *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - - *is_scanout = metadata->u.legacy.scanout; - } -} - -static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx, - struct r600_texture *rtex) -{ - struct r600_common_screen *rscreen = rctx->screen; - struct pipe_context *ctx = &rctx->b; - - if (ctx == rscreen->aux_context) - mtx_lock(&rscreen->aux_context_lock); - - ctx->flush_resource(ctx, &rtex->resource.b.b); - ctx->flush(ctx, NULL, 0); - - if (ctx == rscreen->aux_context) - mtx_unlock(&rscreen->aux_context_lock); -} - -static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - if (!rtex->cmask.size) - return; - - assert(rtex->resource.b.b.nr_samples <= 1); - - /* Disable CMASK. */ - memset(&rtex->cmask, 0, sizeof(rtex->cmask)); - rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8; - rtex->dirty_level_mask = 0; - - rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1); - - if (rtex->cmask_buffer != &rtex->resource) - r600_resource_reference(&rtex->cmask_buffer, NULL); - - /* Notify all contexts about the change. */ - p_atomic_inc(&rscreen->dirty_tex_counter); - p_atomic_inc(&rscreen->compressed_colortex_counter); -} - -static bool r600_can_disable_dcc(struct r600_texture *rtex) -{ - /* We can't disable DCC if it can be written by another process. */ - return rtex->dcc_offset && - (!rtex->resource.b.is_shared || - !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE)); -} - -static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - if (!r600_can_disable_dcc(rtex)) - return false; - - assert(rtex->dcc_separate_buffer == NULL); - - /* Disable DCC. */ - rtex->dcc_offset = 0; - - /* Notify all contexts about the change. */ - p_atomic_inc(&rscreen->dirty_tex_counter); - return true; -} - -/** - * Disable DCC for the texture. (first decompress, then discard metadata). - * - * There is unresolved multi-context synchronization issue between - * screen::aux_context and the current context. If applications do this with - * multiple contexts, it's already undefined behavior for them and we don't - * have to worry about that. The scenario is: - * - * If context 1 disables DCC and context 2 has queued commands that write - * to the texture via CB with DCC enabled, and the order of operations is - * as follows: - * context 2 queues draw calls rendering to the texture, but doesn't flush - * context 1 disables DCC and flushes - * context 1 & 2 reset descriptors and FB state - * context 2 flushes (new compressed tiles written by the draw calls) - * context 1 & 2 read garbage, because DCC is disabled, yet there are - * compressed tiled - * - * \param rctx the current context if you have one, or rscreen->aux_context - * if you don't. - */ -bool si_texture_disable_dcc(struct r600_common_context *rctx, - struct r600_texture *rtex) -{ - struct r600_common_screen *rscreen = rctx->screen; - - if (!r600_can_disable_dcc(rtex)) - return false; - - if (&rctx->b == rscreen->aux_context) - mtx_lock(&rscreen->aux_context_lock); - - /* Decompress DCC. */ - rctx->decompress_dcc(&rctx->b, rtex); - rctx->b.flush(&rctx->b, NULL, 0); - - if (&rctx->b == rscreen->aux_context) - mtx_unlock(&rscreen->aux_context_lock); - - return r600_texture_discard_dcc(rscreen, rtex); -} - -static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, - struct r600_texture *rtex, - unsigned new_bind_flag, - bool invalidate_storage) -{ - struct pipe_screen *screen = rctx->b.screen; - struct r600_texture *new_tex; - struct pipe_resource templ = rtex->resource.b.b; - unsigned i; - - templ.bind |= new_bind_flag; - - if (rtex->resource.b.is_shared) - return; - - if (new_bind_flag == PIPE_BIND_LINEAR) { - if (rtex->surface.is_linear) - return; - - /* This fails with MSAA, depth, and compressed textures. */ - if (r600_choose_tiling(rctx->screen, &templ) != - RADEON_SURF_MODE_LINEAR_ALIGNED) - return; - } - - new_tex = (struct r600_texture*)screen->resource_create(screen, &templ); - if (!new_tex) - return; - - /* Copy the pixels to the new texture. */ - if (!invalidate_storage) { - for (i = 0; i <= templ.last_level; i++) { - struct pipe_box box; - - u_box_3d(0, 0, 0, - u_minify(templ.width0, i), u_minify(templ.height0, i), - util_max_layer(&templ, i) + 1, &box); - - rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0, - &rtex->resource.b.b, i, &box); - } - } - - if (new_bind_flag == PIPE_BIND_LINEAR) { - r600_texture_discard_cmask(rctx->screen, rtex); - r600_texture_discard_dcc(rctx->screen, rtex); - } - - /* Replace the structure fields of rtex. */ - rtex->resource.b.b.bind = templ.bind; - pb_reference(&rtex->resource.buf, new_tex->resource.buf); - rtex->resource.gpu_address = new_tex->resource.gpu_address; - rtex->resource.vram_usage = new_tex->resource.vram_usage; - rtex->resource.gart_usage = new_tex->resource.gart_usage; - rtex->resource.bo_size = new_tex->resource.bo_size; - rtex->resource.bo_alignment = new_tex->resource.bo_alignment; - rtex->resource.domains = new_tex->resource.domains; - rtex->resource.flags = new_tex->resource.flags; - rtex->size = new_tex->size; - rtex->db_render_format = new_tex->db_render_format; - rtex->db_compatible = new_tex->db_compatible; - rtex->can_sample_z = new_tex->can_sample_z; - rtex->can_sample_s = new_tex->can_sample_s; - rtex->surface = new_tex->surface; - rtex->fmask = new_tex->fmask; - rtex->cmask = new_tex->cmask; - rtex->cb_color_info = new_tex->cb_color_info; - rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode; - rtex->htile_offset = new_tex->htile_offset; - rtex->tc_compatible_htile = new_tex->tc_compatible_htile; - rtex->depth_cleared = new_tex->depth_cleared; - rtex->stencil_cleared = new_tex->stencil_cleared; - rtex->non_disp_tiling = new_tex->non_disp_tiling; - rtex->dcc_gather_statistics = new_tex->dcc_gather_statistics; - rtex->framebuffers_bound = new_tex->framebuffers_bound; - - if (new_bind_flag == PIPE_BIND_LINEAR) { - assert(!rtex->htile_offset); - assert(!rtex->cmask.size); - assert(!rtex->fmask.size); - assert(!rtex->dcc_offset); - assert(!rtex->is_depth); - } - - r600_texture_reference(&new_tex, NULL); - - p_atomic_inc(&rctx->screen->dirty_tex_counter); -} - -static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_context *ctx, - struct pipe_resource *resource, - struct winsys_handle *whandle, - unsigned usage) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct r600_common_context *rctx; - struct r600_resource *res = (struct r600_resource*)resource; - struct r600_texture *rtex = (struct r600_texture*)resource; - struct radeon_bo_metadata metadata; - bool update_metadata = false; - unsigned stride, offset, slice_size; - bool flush = false; - - ctx = threaded_context_unwrap_sync(ctx); - rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context); - - if (resource->target != PIPE_BUFFER) { - /* This is not supported now, but it might be required for OpenCL - * interop in the future. - */ - if (resource->nr_samples > 1 || rtex->is_depth) - return false; - - /* Move a suballocated texture into a non-suballocated allocation. */ - if (rscreen->ws->buffer_is_suballocated(res->buf) || - rtex->surface.tile_swizzle || - (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && - whandle->type != DRM_API_HANDLE_TYPE_KMS)) { - assert(!res->b.is_shared); - r600_reallocate_texture_inplace(rctx, rtex, - PIPE_BIND_SHARED, false); - flush = true; - assert(res->b.b.bind & PIPE_BIND_SHARED); - assert(res->flags & RADEON_FLAG_NO_SUBALLOC); - assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)); - assert(rtex->surface.tile_swizzle == 0); - } - - /* Since shader image stores don't support DCC on VI, - * disable it for external clients that want write - * access. - */ - if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) { - if (si_texture_disable_dcc(rctx, rtex)) { - update_metadata = true; - /* si_texture_disable_dcc flushes the context */ - flush = false; - } - } - - if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && - (rtex->cmask.size || rtex->dcc_offset)) { - /* Eliminate fast clear (both CMASK and DCC) */ - r600_eliminate_fast_color_clear(rctx, rtex); - /* eliminate_fast_color_clear flushes the context */ - flush = false; - - /* Disable CMASK if flush_resource isn't going - * to be called. - */ - if (rtex->cmask.size) - r600_texture_discard_cmask(rscreen, rtex); - } - - /* Set metadata. */ - if (!res->b.is_shared || update_metadata) { - r600_texture_init_metadata(rscreen, rtex, &metadata); - if (rscreen->query_opaque_metadata) - rscreen->query_opaque_metadata(rscreen, rtex, - &metadata); - - rscreen->ws->buffer_set_metadata(res->buf, &metadata); - } - - if (rscreen->chip_class >= GFX9) { - offset = rtex->surface.u.gfx9.surf_offset; - stride = rtex->surface.u.gfx9.surf_pitch * - rtex->surface.bpe; - slice_size = rtex->surface.u.gfx9.surf_slice_size; - } else { - offset = rtex->surface.u.legacy.level[0].offset; - stride = rtex->surface.u.legacy.level[0].nblk_x * - rtex->surface.bpe; - slice_size = rtex->surface.u.legacy.level[0].slice_size; - } - } else { - /* Buffer exports are for the OpenCL interop. */ - /* Move a suballocated buffer into a non-suballocated allocation. */ - if (rscreen->ws->buffer_is_suballocated(res->buf) || - /* A DMABUF export always fails if the BO is local. */ - rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) { - assert(!res->b.is_shared); - - /* Allocate a new buffer with PIPE_BIND_SHARED. */ - struct pipe_resource templ = res->b.b; - templ.bind |= PIPE_BIND_SHARED; - - struct pipe_resource *newb = - screen->resource_create(screen, &templ); - if (!newb) - return false; - - /* Copy the old buffer contents to the new one. */ - struct pipe_box box; - u_box_1d(0, newb->width0, &box); - rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0, - &res->b.b, 0, &box); - flush = true; - /* Move the new buffer storage to the old pipe_resource. */ - si_replace_buffer_storage(&rctx->b, &res->b.b, newb); - pipe_resource_reference(&newb, NULL); - - assert(res->b.b.bind & PIPE_BIND_SHARED); - assert(res->flags & RADEON_FLAG_NO_SUBALLOC); - } - - /* Buffers */ - offset = 0; - stride = 0; - slice_size = 0; - } - - if (flush) - rctx->b.flush(&rctx->b, NULL, 0); - - if (res->b.is_shared) { - /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user - * doesn't set it. - */ - res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; - if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) - res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; - } else { - res->b.is_shared = true; - res->external_usage = usage; - } - - return rscreen->ws->buffer_get_handle(res->buf, stride, offset, - slice_size, whandle); -} - -static void r600_texture_destroy(struct pipe_screen *screen, - struct pipe_resource *ptex) -{ - struct r600_texture *rtex = (struct r600_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - - r600_texture_reference(&rtex->flushed_depth_texture, NULL); - - if (rtex->cmask_buffer != &rtex->resource) { - r600_resource_reference(&rtex->cmask_buffer, NULL); - } - pb_reference(&resource->buf, NULL); - r600_resource_reference(&rtex->dcc_separate_buffer, NULL); - r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL); - FREE(rtex); -} - -static const struct u_resource_vtbl r600_texture_vtbl; - -/* The number of samples can be specified independently of the texture. */ -void si_texture_get_fmask_info(struct r600_common_screen *rscreen, - struct r600_texture *rtex, - unsigned nr_samples, - struct r600_fmask_info *out) -{ - /* FMASK is allocated like an ordinary texture. */ - struct pipe_resource templ = rtex->resource.b.b; - struct radeon_surf fmask = {}; - unsigned flags, bpe; - - memset(out, 0, sizeof(*out)); - - if (rscreen->chip_class >= GFX9) { - out->alignment = rtex->surface.u.gfx9.fmask_alignment; - out->size = rtex->surface.u.gfx9.fmask_size; - return; - } - - templ.nr_samples = 1; - flags = rtex->surface.flags | RADEON_SURF_FMASK; - - switch (nr_samples) { - case 2: - case 4: - bpe = 1; - break; - case 8: - bpe = 4; - break; - default: - R600_ERR("Invalid sample count for FMASK allocation.\n"); - return; - } - - if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe, - RADEON_SURF_MODE_2D, &fmask)) { - R600_ERR("Got error in surface_init while allocating FMASK.\n"); - return; - } - - assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); - - out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64; - if (out->slice_tile_max) - out->slice_tile_max -= 1; - - out->tile_mode_index = fmask.u.legacy.tiling_index[0]; - out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x; - out->bank_height = fmask.u.legacy.bankh; - out->tile_swizzle = fmask.tile_swizzle; - out->alignment = MAX2(256, fmask.surf_alignment); - out->size = fmask.surf_size; -} - -static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - si_texture_get_fmask_info(rscreen, rtex, - rtex->resource.b.b.nr_samples, &rtex->fmask); - - rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment); - rtex->size = rtex->fmask.offset + rtex->fmask.size; -} - -static void si_texture_get_cmask_info(struct r600_common_screen *rscreen, - struct r600_texture *rtex, - struct r600_cmask_info *out) -{ - unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; - unsigned num_pipes = rscreen->info.num_tile_pipes; - unsigned cl_width, cl_height; - - if (rscreen->chip_class >= GFX9) { - out->alignment = rtex->surface.u.gfx9.cmask_alignment; - out->size = rtex->surface.u.gfx9.cmask_size; - return; - } - - switch (num_pipes) { - case 2: - cl_width = 32; - cl_height = 16; - break; - case 4: - cl_width = 32; - cl_height = 32; - break; - case 8: - cl_width = 64; - cl_height = 32; - break; - case 16: /* Hawaii */ - cl_width = 64; - cl_height = 64; - break; - default: - assert(0); - return; - } - - unsigned base_align = num_pipes * pipe_interleave_bytes; - - unsigned width = align(rtex->resource.b.b.width0, cl_width*8); - unsigned height = align(rtex->resource.b.b.height0, cl_height*8); - unsigned slice_elements = (width * height) / (8*8); - - /* Each element of CMASK is a nibble. */ - unsigned slice_bytes = slice_elements / 2; - - out->slice_tile_max = (width * height) / (128*128); - if (out->slice_tile_max) - out->slice_tile_max -= 1; - - out->alignment = MAX2(256, base_align); - out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) * - align(slice_bytes, base_align); -} - -static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); - - rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment); - rtex->size = rtex->cmask.offset + rtex->cmask.size; - - rtex->cb_color_info |= S_028C70_FAST_CLEAR(1); -} - -static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - if (rtex->cmask_buffer) - return; - - assert(rtex->cmask.size == 0); - - si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); - - rtex->cmask_buffer = (struct r600_resource *) - si_aligned_buffer_create(&rscreen->b, - R600_RESOURCE_FLAG_UNMAPPABLE, - PIPE_USAGE_DEFAULT, - rtex->cmask.size, - rtex->cmask.alignment); - if (rtex->cmask_buffer == NULL) { - rtex->cmask.size = 0; - return; - } - - /* update colorbuffer state bits */ - rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8; - - rtex->cb_color_info |= S_028C70_FAST_CLEAR(1); - - p_atomic_inc(&rscreen->compressed_colortex_counter); -} - -static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - unsigned cl_width, cl_height, width, height; - unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; - unsigned num_pipes = rscreen->info.num_tile_pipes; - - assert(rscreen->chip_class <= VI); - - rtex->surface.htile_size = 0; - - /* HTILE is broken with 1D tiling on old kernels and CIK. */ - if (rscreen->chip_class >= CIK && - rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && - rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38) - return; - - /* Overalign HTILE on P2 configs to work around GPU hangs in - * piglit/depthstencil-render-miplevels 585. - * - * This has been confirmed to help Kabini & Stoney, where the hangs - * are always reproducible. I think I have seen the test hang - * on Carrizo too, though it was very rare there. - */ - if (rscreen->chip_class >= CIK && num_pipes < 4) - num_pipes = 4; - - switch (num_pipes) { - case 1: - cl_width = 32; - cl_height = 16; - break; - case 2: - cl_width = 32; - cl_height = 32; - break; - case 4: - cl_width = 64; - cl_height = 32; - break; - case 8: - cl_width = 64; - cl_height = 64; - break; - case 16: - cl_width = 128; - cl_height = 64; - break; - default: - assert(0); - return; - } - - width = align(rtex->resource.b.b.width0, cl_width * 8); - height = align(rtex->resource.b.b.height0, cl_height * 8); - - slice_elements = (width * height) / (8 * 8); - slice_bytes = slice_elements * 4; - - pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; - base_align = num_pipes * pipe_interleave_bytes; - - rtex->surface.htile_alignment = base_align; - rtex->surface.htile_size = - (util_max_layer(&rtex->resource.b.b, 0) + 1) * - align(slice_bytes, base_align); -} - -static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - if (rscreen->chip_class <= VI && !rtex->tc_compatible_htile) - r600_texture_get_htile_size(rscreen, rtex); - - if (!rtex->surface.htile_size) - return; - - rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment); - rtex->size = rtex->htile_offset + rtex->surface.htile_size; -} - -void si_print_texture_info(struct r600_common_screen *rscreen, - struct r600_texture *rtex, struct u_log_context *log) -{ - int i; - - /* Common parameters. */ - u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, " - "blk_h=%u, array_size=%u, last_level=%u, " - "bpe=%u, nsamples=%u, flags=0x%x, %s\n", - rtex->resource.b.b.width0, rtex->resource.b.b.height0, - rtex->resource.b.b.depth0, rtex->surface.blk_w, - rtex->surface.blk_h, - rtex->resource.b.b.array_size, rtex->resource.b.b.last_level, - rtex->surface.bpe, rtex->resource.b.b.nr_samples, - rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format)); - - if (rscreen->chip_class >= GFX9) { - u_log_printf(log, " Surf: size=%"PRIu64", slice_size=%"PRIu64", " - "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n", - rtex->surface.surf_size, - rtex->surface.u.gfx9.surf_slice_size, - rtex->surface.surf_alignment, - rtex->surface.u.gfx9.surf.swizzle_mode, - rtex->surface.u.gfx9.surf.epitch, - rtex->surface.u.gfx9.surf_pitch); - - if (rtex->fmask.size) { - u_log_printf(log, " FMASK: offset=%"PRIu64", size=%"PRIu64", " - "alignment=%u, swmode=%u, epitch=%u\n", - rtex->fmask.offset, - rtex->surface.u.gfx9.fmask_size, - rtex->surface.u.gfx9.fmask_alignment, - rtex->surface.u.gfx9.fmask.swizzle_mode, - rtex->surface.u.gfx9.fmask.epitch); - } - - if (rtex->cmask.size) { - u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", " - "alignment=%u, rb_aligned=%u, pipe_aligned=%u\n", - rtex->cmask.offset, - rtex->surface.u.gfx9.cmask_size, - rtex->surface.u.gfx9.cmask_alignment, - rtex->surface.u.gfx9.cmask.rb_aligned, - rtex->surface.u.gfx9.cmask.pipe_aligned); - } - - if (rtex->htile_offset) { - u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", alignment=%u, " - "rb_aligned=%u, pipe_aligned=%u\n", - rtex->htile_offset, - rtex->surface.htile_size, - rtex->surface.htile_alignment, - rtex->surface.u.gfx9.htile.rb_aligned, - rtex->surface.u.gfx9.htile.pipe_aligned); - } - - if (rtex->dcc_offset) { - u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", " - "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", - rtex->dcc_offset, rtex->surface.dcc_size, - rtex->surface.dcc_alignment, - rtex->surface.u.gfx9.dcc_pitch_max, - rtex->surface.num_dcc_levels); - } - - if (rtex->surface.u.gfx9.stencil_offset) { - u_log_printf(log, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n", - rtex->surface.u.gfx9.stencil_offset, - rtex->surface.u.gfx9.stencil.swizzle_mode, - rtex->surface.u.gfx9.stencil.epitch); - } - return; - } - - u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, " - "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", - rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw, - rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea, - rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config, - (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0); - - if (rtex->fmask.size) - u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, " - "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n", - rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment, - rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height, - rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index); - - if (rtex->cmask.size) - u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, " - "slice_tile_max=%u\n", - rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment, - rtex->cmask.slice_tile_max); - - if (rtex->htile_offset) - u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", " - "alignment=%u, TC_compatible = %u\n", - rtex->htile_offset, rtex->surface.htile_size, - rtex->surface.htile_alignment, - rtex->tc_compatible_htile); - - if (rtex->dcc_offset) { - u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n", - rtex->dcc_offset, rtex->surface.dcc_size, - rtex->surface.dcc_alignment); - for (i = 0; i <= rtex->resource.b.b.last_level; i++) - u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", " - "fast_clear_size=%"PRIu64"\n", - i, i < rtex->surface.num_dcc_levels, - rtex->surface.u.legacy.level[i].dcc_offset, - rtex->surface.u.legacy.level[i].dcc_fast_clear_size); - } - - for (i = 0; i <= rtex->resource.b.b.last_level; i++) - u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", " - "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " - "mode=%u, tiling_index = %u\n", - i, rtex->surface.u.legacy.level[i].offset, - rtex->surface.u.legacy.level[i].slice_size, - u_minify(rtex->resource.b.b.width0, i), - u_minify(rtex->resource.b.b.height0, i), - u_minify(rtex->resource.b.b.depth0, i), - rtex->surface.u.legacy.level[i].nblk_x, - rtex->surface.u.legacy.level[i].nblk_y, - rtex->surface.u.legacy.level[i].mode, - rtex->surface.u.legacy.tiling_index[i]); - - if (rtex->surface.has_stencil) { - u_log_printf(log, " StencilLayout: tilesplit=%u\n", - rtex->surface.u.legacy.stencil_tile_split); - for (i = 0; i <= rtex->resource.b.b.last_level; i++) { - u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", " - "slice_size=%"PRIu64", npix_x=%u, " - "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " - "mode=%u, tiling_index = %u\n", - i, rtex->surface.u.legacy.stencil_level[i].offset, - rtex->surface.u.legacy.stencil_level[i].slice_size, - u_minify(rtex->resource.b.b.width0, i), - u_minify(rtex->resource.b.b.height0, i), - u_minify(rtex->resource.b.b.depth0, i), - rtex->surface.u.legacy.stencil_level[i].nblk_x, - rtex->surface.u.legacy.stencil_level[i].nblk_y, - rtex->surface.u.legacy.stencil_level[i].mode, - rtex->surface.u.legacy.stencil_tiling_index[i]); - } - } -} - -/* Common processing for r600_texture_create and r600_texture_from_handle */ -static struct r600_texture * -r600_texture_create_object(struct pipe_screen *screen, - const struct pipe_resource *base, - struct pb_buffer *buf, - struct radeon_surf *surface) -{ - struct r600_texture *rtex; - struct r600_resource *resource; - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - - rtex = CALLOC_STRUCT(r600_texture); - if (!rtex) - return NULL; - - resource = &rtex->resource; - resource->b.b = *base; - resource->b.b.next = NULL; - resource->b.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->b.b.reference, 1); - resource->b.b.screen = screen; - - /* don't include stencil-only formats which we don't support for rendering */ - rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format)); - - rtex->surface = *surface; - rtex->size = rtex->surface.surf_size; - - rtex->tc_compatible_htile = rtex->surface.htile_size != 0 && - (rtex->surface.flags & - RADEON_SURF_TC_COMPATIBLE_HTILE); - - /* TC-compatible HTILE: - * - VI only supports Z32_FLOAT. - * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ - if (rtex->tc_compatible_htile) { - if (rscreen->chip_class >= GFX9 && - base->format == PIPE_FORMAT_Z16_UNORM) - rtex->db_render_format = base->format; - else { - rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; - rtex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT && - base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT; - } - } else { - rtex->db_render_format = base->format; - } - - /* Tiled depth textures utilize the non-displayable tile order. - * This must be done after r600_setup_surface. - * Applies to R600-Cayman. */ - rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D; - /* Applies to GCN. */ - rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode; - - /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers - * between frames, so the only thing that can enable separate DCC - * with DRI2 is multiple slow clears within a frame. - */ - rtex->ps_draw_ratio = 0; - - if (rtex->is_depth) { - if (rscreen->chip_class >= GFX9) { - rtex->can_sample_z = true; - rtex->can_sample_s = true; - } else { - rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted; - rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted; - } - - if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | - R600_RESOURCE_FLAG_FLUSHED_DEPTH))) { - rtex->db_compatible = true; - - if (!(rscreen->debug_flags & DBG(NO_HYPERZ))) - r600_texture_allocate_htile(rscreen, rtex); - } - } else { - if (base->nr_samples > 1) { - if (!buf) { - r600_texture_allocate_fmask(rscreen, rtex); - r600_texture_allocate_cmask(rscreen, rtex); - rtex->cmask_buffer = &rtex->resource; - } - if (!rtex->fmask.size || !rtex->cmask.size) { - FREE(rtex); - return NULL; - } - } - - /* Shared textures must always set up DCC here. - * If it's not present, it will be disabled by - * apply_opaque_metadata later. - */ - if (rtex->surface.dcc_size && - (buf || !(rscreen->debug_flags & DBG(NO_DCC))) && - !(rtex->surface.flags & RADEON_SURF_SCANOUT)) { - /* Reserve space for the DCC buffer. */ - rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment); - rtex->size = rtex->dcc_offset + rtex->surface.dcc_size; - } - } - - /* Now create the backing buffer. */ - if (!buf) { - si_init_resource_fields(rscreen, resource, rtex->size, - rtex->surface.surf_alignment); - - if (!si_alloc_resource(rscreen, resource)) { - FREE(rtex); - return NULL; - } - } else { - resource->buf = buf; - resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf); - resource->bo_size = buf->size; - resource->bo_alignment = buf->alignment; - resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf); - if (resource->domains & RADEON_DOMAIN_VRAM) - resource->vram_usage = buf->size; - else if (resource->domains & RADEON_DOMAIN_GTT) - resource->gart_usage = buf->size; - } - - if (rtex->cmask.size) { - /* Initialize the cmask to 0xCC (= compressed state). */ - si_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, - rtex->cmask.offset, rtex->cmask.size, - 0xCCCCCCCC); - } - if (rtex->htile_offset) { - uint32_t clear_value = 0; - - if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile) - clear_value = 0x0000030F; - - si_screen_clear_buffer(rscreen, &rtex->resource.b.b, - rtex->htile_offset, - rtex->surface.htile_size, - clear_value); - } - - /* Initialize DCC only if the texture is not being imported. */ - if (!buf && rtex->dcc_offset) { - si_screen_clear_buffer(rscreen, &rtex->resource.b.b, - rtex->dcc_offset, - rtex->surface.dcc_size, - 0xFFFFFFFF); - } - - /* Initialize the CMASK base register value. */ - rtex->cmask.base_address_reg = - (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; - - if (rscreen->debug_flags & DBG(VM)) { - fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n", - rtex->resource.gpu_address, - rtex->resource.gpu_address + rtex->resource.buf->size, - base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1, - base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format)); - } - - if (rscreen->debug_flags & DBG(TEX)) { - puts("Texture:"); - struct u_log_context log; - u_log_context_init(&log); - si_print_texture_info(rscreen, rtex, &log); - u_log_new_page_print(&log, stdout); - fflush(stdout); - u_log_context_destroy(&log); - } - - return rtex; -} - -static enum radeon_surf_mode -r600_choose_tiling(struct r600_common_screen *rscreen, - const struct pipe_resource *templ) -{ - const struct util_format_description *desc = util_format_description(templ->format); - bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING; - bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) && - !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH); - - /* MSAA resources must be 2D tiled. */ - if (templ->nr_samples > 1) - return RADEON_SURF_MODE_2D; - - /* Transfer resources should be linear. */ - if (templ->flags & R600_RESOURCE_FLAG_TRANSFER) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - - /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI, - * which requires 2D tiling. - */ - if (rscreen->chip_class == VI && - is_depth_stencil && - (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY)) - return RADEON_SURF_MODE_2D; - - /* Handle common candidates for the linear mode. - * Compressed textures and DB surfaces must always be tiled. - */ - if (!force_tiling && - !is_depth_stencil && - !util_format_is_compressed(templ->format)) { - if (rscreen->debug_flags & DBG(NO_TILING)) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - - /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - - /* Cursors are linear on SI. - * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */ - if (templ->bind & PIPE_BIND_CURSOR) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - - if (templ->bind & PIPE_BIND_LINEAR) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - - /* Textures with a very small height are recommended to be linear. */ - if (templ->target == PIPE_TEXTURE_1D || - templ->target == PIPE_TEXTURE_1D_ARRAY || - /* Only very thin and long 2D textures should benefit from - * linear_aligned. */ - (templ->width0 > 8 && templ->height0 <= 2)) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - - /* Textures likely to be mapped often. */ - if (templ->usage == PIPE_USAGE_STAGING || - templ->usage == PIPE_USAGE_STREAM) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - } - - /* Make small textures 1D tiled. */ - if (templ->width0 <= 16 || templ->height0 <= 16 || - (rscreen->debug_flags & DBG(NO_2D_TILING))) - return RADEON_SURF_MODE_1D; - - /* The allocator will switch to 1D if needed. */ - return RADEON_SURF_MODE_2D; -} - -struct pipe_resource *si_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_surf surface = {0}; - bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; - bool tc_compatible_htile = - rscreen->chip_class >= VI && - (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) && - !(rscreen->debug_flags & DBG(NO_HYPERZ)) && - !is_flushed_depth && - templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */ - util_format_is_depth_or_stencil(templ->format); - - int r; - - r = r600_init_surface(rscreen, &surface, templ, - r600_choose_tiling(rscreen, templ), 0, 0, - false, false, is_flushed_depth, - tc_compatible_htile); - if (r) { - return NULL; - } - - return (struct pipe_resource *) - r600_texture_create_object(screen, templ, NULL, &surface); -} - -static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, - const struct pipe_resource *templ, - struct winsys_handle *whandle, - unsigned usage) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct pb_buffer *buf = NULL; - unsigned stride = 0, offset = 0; - enum radeon_surf_mode array_mode; - struct radeon_surf surface = {}; - int r; - struct radeon_bo_metadata metadata = {}; - struct r600_texture *rtex; - bool is_scanout; - - /* Support only 2D textures without mipmaps */ - if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || - templ->depth0 != 1 || templ->last_level != 0) - return NULL; - - buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset); - if (!buf) - return NULL; - - rscreen->ws->buffer_get_metadata(buf, &metadata); - r600_surface_import_metadata(rscreen, &surface, &metadata, - &array_mode, &is_scanout); - - r = r600_init_surface(rscreen, &surface, templ, array_mode, stride, - offset, true, is_scanout, false, false); - if (r) { - return NULL; - } - - rtex = r600_texture_create_object(screen, templ, buf, &surface); - if (!rtex) - return NULL; - - rtex->resource.b.is_shared = true; - rtex->resource.external_usage = usage; - - if (rscreen->apply_opaque_metadata) - rscreen->apply_opaque_metadata(rscreen, rtex, &metadata); - - assert(rtex->surface.tile_swizzle == 0); - return &rtex->resource.b.b; -} - -bool si_init_flushed_depth_texture(struct pipe_context *ctx, - struct pipe_resource *texture, - struct r600_texture **staging) -{ - struct r600_texture *rtex = (struct r600_texture*)texture; - struct pipe_resource resource; - struct r600_texture **flushed_depth_texture = staging ? - staging : &rtex->flushed_depth_texture; - enum pipe_format pipe_format = texture->format; - - if (!staging) { - if (rtex->flushed_depth_texture) - return true; /* it's ready */ - - if (!rtex->can_sample_z && rtex->can_sample_s) { - switch (pipe_format) { - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - /* Save memory by not allocating the S plane. */ - pipe_format = PIPE_FORMAT_Z32_FLOAT; - break; - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - case PIPE_FORMAT_S8_UINT_Z24_UNORM: - /* Save memory bandwidth by not copying the - * stencil part during flush. - * - * This potentially increases memory bandwidth - * if an application uses both Z and S texturing - * simultaneously (a flushed Z24S8 texture - * would be stored compactly), but how often - * does that really happen? - */ - pipe_format = PIPE_FORMAT_Z24X8_UNORM; - break; - default:; - } - } else if (!rtex->can_sample_s && rtex->can_sample_z) { - assert(util_format_has_stencil(util_format_description(pipe_format))); - - /* DB->CB copies to an 8bpp surface don't work. */ - pipe_format = PIPE_FORMAT_X24S8_UINT; - } - } - - memset(&resource, 0, sizeof(resource)); - resource.target = texture->target; - resource.format = pipe_format; - resource.width0 = texture->width0; - resource.height0 = texture->height0; - resource.depth0 = texture->depth0; - resource.array_size = texture->array_size; - resource.last_level = texture->last_level; - resource.nr_samples = texture->nr_samples; - resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; - resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL; - resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH; - - if (staging) - resource.flags |= R600_RESOURCE_FLAG_TRANSFER; - - *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource); - if (*flushed_depth_texture == NULL) { - R600_ERR("failed to create temporary texture to hold flushed depth\n"); - return false; - } - - (*flushed_depth_texture)->non_disp_tiling = false; - return true; -} - -/** - * Initialize the pipe_resource descriptor to be of the same size as the box, - * which is supposed to hold a subregion of the texture "orig" at the given - * mipmap level. - */ -static void r600_init_temp_resource_from_box(struct pipe_resource *res, - struct pipe_resource *orig, - const struct pipe_box *box, - unsigned level, unsigned flags) -{ - memset(res, 0, sizeof(*res)); - res->format = orig->format; - res->width0 = box->width; - res->height0 = box->height; - res->depth0 = 1; - res->array_size = 1; - res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; - res->flags = flags; - - /* We must set the correct texture target and dimensions for a 3D box. */ - if (box->depth > 1 && util_max_layer(orig, level) > 0) { - res->target = PIPE_TEXTURE_2D_ARRAY; - res->array_size = box->depth; - } else { - res->target = PIPE_TEXTURE_2D; - } -} - -static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, - struct r600_texture *rtex, - unsigned transfer_usage, - const struct pipe_box *box) -{ - return !rtex->resource.b.is_shared && - !(transfer_usage & PIPE_TRANSFER_READ) && - rtex->resource.b.b.last_level == 0 && - util_texrange_covers_whole_level(&rtex->resource.b.b, 0, - box->x, box->y, box->z, - box->width, box->height, - box->depth); -} - -static void r600_texture_invalidate_storage(struct r600_common_context *rctx, - struct r600_texture *rtex) -{ - struct r600_common_screen *rscreen = rctx->screen; - - /* There is no point in discarding depth and tiled buffers. */ - assert(!rtex->is_depth); - assert(rtex->surface.is_linear); - - /* Reallocate the buffer in the same pipe_resource. */ - si_alloc_resource(rscreen, &rtex->resource); - - /* Initialize the CMASK base address (needed even without CMASK). */ - rtex->cmask.base_address_reg = - (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; - - p_atomic_inc(&rscreen->dirty_tex_counter); - - rctx->num_alloc_tex_transfer_bytes += rtex->size; -} - -static void *r600_texture_transfer_map(struct pipe_context *ctx, - struct pipe_resource *texture, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **ptransfer) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct r600_texture *rtex = (struct r600_texture*)texture; - struct r600_transfer *trans; - struct r600_resource *buf; - unsigned offset = 0; - char *map; - bool use_staging_texture = false; - - assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER)); - assert(box->width && box->height && box->depth); - - /* Depth textures use staging unconditionally. */ - if (!rtex->is_depth) { - /* Degrade the tile mode if we get too many transfers on APUs. - * On dGPUs, the staging texture is always faster. - * Only count uploads that are at least 4x4 pixels large. - */ - if (!rctx->screen->info.has_dedicated_vram && - level == 0 && - box->width >= 4 && box->height >= 4 && - p_atomic_inc_return(&rtex->num_level0_transfers) == 10) { - bool can_invalidate = - r600_can_invalidate_texture(rctx->screen, rtex, - usage, box); - - r600_reallocate_texture_inplace(rctx, rtex, - PIPE_BIND_LINEAR, - can_invalidate); - } - - /* Tiled textures need to be converted into a linear texture for CPU - * access. The staging texture is always linear and is placed in GART. - * - * Reading from VRAM or GTT WC is slow, always use the staging - * texture in this case. - * - * Use the staging texture for uploads if the underlying BO - * is busy. - */ - if (!rtex->surface.is_linear) - use_staging_texture = true; - else if (usage & PIPE_TRANSFER_READ) - use_staging_texture = - rtex->resource.domains & RADEON_DOMAIN_VRAM || - rtex->resource.flags & RADEON_FLAG_GTT_WC; - /* Write & linear only: */ - else if (si_rings_is_buffer_referenced(rctx, rtex->resource.buf, - RADEON_USAGE_READWRITE) || - !rctx->ws->buffer_wait(rtex->resource.buf, 0, - RADEON_USAGE_READWRITE)) { - /* It's busy. */ - if (r600_can_invalidate_texture(rctx->screen, rtex, - usage, box)) - r600_texture_invalidate_storage(rctx, rtex); - else - use_staging_texture = true; - } - } - - trans = CALLOC_STRUCT(r600_transfer); - if (!trans) - return NULL; - pipe_resource_reference(&trans->b.b.resource, texture); - trans->b.b.level = level; - trans->b.b.usage = usage; - trans->b.b.box = *box; - - if (rtex->is_depth) { - struct r600_texture *staging_depth; - - if (rtex->resource.b.b.nr_samples > 1) { - /* MSAA depth buffers need to be converted to single sample buffers. - * - * Mapping MSAA depth buffers can occur if ReadPixels is called - * with a multisample GLX visual. - * - * First downsample the depth buffer to a temporary texture, - * then decompress the temporary one to staging. - * - * Only the region being mapped is transfered. - */ - struct pipe_resource resource; - - r600_init_temp_resource_from_box(&resource, texture, box, level, 0); - - if (!si_init_flushed_depth_texture(ctx, &resource, &staging_depth)) { - R600_ERR("failed to create temporary texture to hold untiled copy\n"); - FREE(trans); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource); - if (!temp) { - R600_ERR("failed to create a temporary depth texture\n"); - FREE(trans); - return NULL; - } - - r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box); - rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth, - 0, 0, 0, box->depth, 0, 0); - pipe_resource_reference(&temp, NULL); - } - - /* Just get the strides. */ - r600_texture_get_offset(rctx->screen, staging_depth, level, NULL, - &trans->b.b.stride, - &trans->b.b.layer_stride); - } else { - /* XXX: only readback the rectangle which is being mapped? */ - /* XXX: when discard is true, no need to read back from depth texture */ - if (!si_init_flushed_depth_texture(ctx, texture, &staging_depth)) { - R600_ERR("failed to create temporary texture to hold untiled copy\n"); - FREE(trans); - return NULL; - } - - rctx->blit_decompress_depth(ctx, rtex, staging_depth, - level, level, - box->z, box->z + box->depth - 1, - 0, 0); - - offset = r600_texture_get_offset(rctx->screen, staging_depth, - level, box, - &trans->b.b.stride, - &trans->b.b.layer_stride); - } - - trans->staging = (struct r600_resource*)staging_depth; - buf = trans->staging; - } else if (use_staging_texture) { - struct pipe_resource resource; - struct r600_texture *staging; - - r600_init_temp_resource_from_box(&resource, texture, box, level, - R600_RESOURCE_FLAG_TRANSFER); - resource.usage = (usage & PIPE_TRANSFER_READ) ? - PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; - - /* Create the temporary texture. */ - staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource); - if (!staging) { - R600_ERR("failed to create temporary texture to hold untiled copy\n"); - FREE(trans); - return NULL; - } - trans->staging = &staging->resource; - - /* Just get the strides. */ - r600_texture_get_offset(rctx->screen, staging, 0, NULL, - &trans->b.b.stride, - &trans->b.b.layer_stride); - - if (usage & PIPE_TRANSFER_READ) - r600_copy_to_staging_texture(ctx, trans); - else - usage |= PIPE_TRANSFER_UNSYNCHRONIZED; - - buf = trans->staging; - } else { - /* the resource is mapped directly */ - offset = r600_texture_get_offset(rctx->screen, rtex, level, box, - &trans->b.b.stride, - &trans->b.b.layer_stride); - buf = &rtex->resource; - } - - if (!(map = si_buffer_map_sync_with_rings(rctx, buf, usage))) { - r600_resource_reference(&trans->staging, NULL); - FREE(trans); - return NULL; - } - - *ptransfer = &trans->b.b; - return map + offset; -} - -static void r600_texture_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer* transfer) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct pipe_resource *texture = transfer->resource; - struct r600_texture *rtex = (struct r600_texture*)texture; - - if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) { - if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) { - ctx->resource_copy_region(ctx, texture, transfer->level, - transfer->box.x, transfer->box.y, transfer->box.z, - &rtransfer->staging->b.b, transfer->level, - &transfer->box); - } else { - r600_copy_from_staging_texture(ctx, rtransfer); - } - } - - if (rtransfer->staging) { - rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size; - r600_resource_reference(&rtransfer->staging, NULL); - } - - /* Heuristic for {upload, draw, upload, draw, ..}: - * - * Flush the gfx IB if we've allocated too much texture storage. - * - * The idea is that we don't want to build IBs that use too much - * memory and put pressure on the kernel memory manager and we also - * want to make temporary and invalidated buffers go idle ASAP to - * decrease the total memory usage or make them reusable. The memory - * usage will be slightly higher than given here because of the buffer - * cache in the winsys. - * - * The result is that the kernel memory manager is never a bottleneck. - */ - if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) { - rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); - rctx->num_alloc_tex_transfer_bytes = 0; - } - - pipe_resource_reference(&transfer->resource, NULL); - FREE(transfer); -} - -static const struct u_resource_vtbl r600_texture_vtbl = -{ - NULL, /* get_handle */ - r600_texture_destroy, /* resource_destroy */ - r600_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region, /* transfer_flush_region */ - r600_texture_transfer_unmap, /* transfer_unmap */ -}; - -/* DCC channel type categories within which formats can be reinterpreted - * while keeping the same DCC encoding. The swizzle must also match. */ -enum dcc_channel_type { - dcc_channel_float32, - dcc_channel_uint32, - dcc_channel_sint32, - dcc_channel_float16, - dcc_channel_uint16, - dcc_channel_sint16, - dcc_channel_uint_10_10_10_2, - dcc_channel_uint8, - dcc_channel_sint8, - dcc_channel_incompatible, -}; - -/* Return the type of DCC encoding. */ -static enum dcc_channel_type -vi_get_dcc_channel_type(const struct util_format_description *desc) -{ - int i; - - /* Find the first non-void channel. */ - for (i = 0; i < desc->nr_channels; i++) - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) - break; - if (i == desc->nr_channels) - return dcc_channel_incompatible; - - switch (desc->channel[i].size) { - case 32: - if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) - return dcc_channel_float32; - if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) - return dcc_channel_uint32; - return dcc_channel_sint32; - case 16: - if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) - return dcc_channel_float16; - if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) - return dcc_channel_uint16; - return dcc_channel_sint16; - case 10: - return dcc_channel_uint_10_10_10_2; - case 8: - if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) - return dcc_channel_uint8; - return dcc_channel_sint8; - default: - return dcc_channel_incompatible; - } -} - -/* Return if it's allowed to reinterpret one format as another with DCC enabled. */ -bool vi_dcc_formats_compatible(enum pipe_format format1, - enum pipe_format format2) -{ - const struct util_format_description *desc1, *desc2; - enum dcc_channel_type type1, type2; - int i; - - if (format1 == format2) - return true; - - desc1 = util_format_description(format1); - desc2 = util_format_description(format2); - - if (desc1->nr_channels != desc2->nr_channels) - return false; - - /* Swizzles must be the same. */ - for (i = 0; i < desc1->nr_channels; i++) - if (desc1->swizzle[i] <= PIPE_SWIZZLE_W && - desc2->swizzle[i] <= PIPE_SWIZZLE_W && - desc1->swizzle[i] != desc2->swizzle[i]) - return false; - - type1 = vi_get_dcc_channel_type(desc1); - type2 = vi_get_dcc_channel_type(desc2); - - return type1 != dcc_channel_incompatible && - type2 != dcc_channel_incompatible && - type1 == type2; -} - -bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, - unsigned level, - enum pipe_format view_format) -{ - struct r600_texture *rtex = (struct r600_texture *)tex; - - return vi_dcc_enabled(rtex, level) && - !vi_dcc_formats_compatible(tex->format, view_format); -} - -/* This can't be merged with the above function, because - * vi_dcc_formats_compatible should be called only when DCC is enabled. */ -void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx, - struct pipe_resource *tex, - unsigned level, - enum pipe_format view_format) -{ - struct r600_texture *rtex = (struct r600_texture *)tex; - - if (vi_dcc_formats_are_incompatible(tex, level, view_format)) - if (!si_texture_disable_dcc(rctx, (struct r600_texture*)tex)) - rctx->decompress_dcc(&rctx->b, rtex); -} - -struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_surface *templ, - unsigned width0, unsigned height0, - unsigned width, unsigned height) -{ - struct r600_surface *surface = CALLOC_STRUCT(r600_surface); - - if (!surface) - return NULL; - - assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level)); - assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level)); - - pipe_reference_init(&surface->base.reference, 1); - pipe_resource_reference(&surface->base.texture, texture); - surface->base.context = pipe; - surface->base.format = templ->format; - surface->base.width = width; - surface->base.height = height; - surface->base.u = templ->u; - - surface->width0 = width0; - surface->height0 = height0; - - surface->dcc_incompatible = - texture->target != PIPE_BUFFER && - vi_dcc_formats_are_incompatible(texture, templ->u.tex.level, - templ->format); - return &surface->base; -} - -static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, - struct pipe_resource *tex, - const struct pipe_surface *templ) -{ - unsigned level = templ->u.tex.level; - unsigned width = u_minify(tex->width0, level); - unsigned height = u_minify(tex->height0, level); - unsigned width0 = tex->width0; - unsigned height0 = tex->height0; - - if (tex->target != PIPE_BUFFER && templ->format != tex->format) { - const struct util_format_description *tex_desc - = util_format_description(tex->format); - const struct util_format_description *templ_desc - = util_format_description(templ->format); - - assert(tex_desc->block.bits == templ_desc->block.bits); - - /* Adjust size of surface if and only if the block width or - * height is changed. */ - if (tex_desc->block.width != templ_desc->block.width || - tex_desc->block.height != templ_desc->block.height) { - unsigned nblks_x = util_format_get_nblocksx(tex->format, width); - unsigned nblks_y = util_format_get_nblocksy(tex->format, height); - - width = nblks_x * templ_desc->block.width; - height = nblks_y * templ_desc->block.height; - - width0 = util_format_get_nblocksx(tex->format, width0); - height0 = util_format_get_nblocksy(tex->format, height0); - } - } - - return si_create_surface_custom(pipe, tex, templ, - width0, height0, - width, height); -} - -static void r600_surface_destroy(struct pipe_context *pipe, - struct pipe_surface *surface) -{ - struct r600_surface *surf = (struct r600_surface*)surface; - r600_resource_reference(&surf->cb_buffer_fmask, NULL); - r600_resource_reference(&surf->cb_buffer_cmask, NULL); - pipe_resource_reference(&surface->texture, NULL); - FREE(surface); -} - -static void r600_clear_texture(struct pipe_context *pipe, - struct pipe_resource *tex, - unsigned level, - const struct pipe_box *box, - const void *data) -{ - struct pipe_screen *screen = pipe->screen; - struct r600_texture *rtex = (struct r600_texture*)tex; - struct pipe_surface tmpl = {{0}}; - struct pipe_surface *sf; - const struct util_format_description *desc = - util_format_description(tex->format); - - tmpl.format = tex->format; - tmpl.u.tex.first_layer = box->z; - tmpl.u.tex.last_layer = box->z + box->depth - 1; - tmpl.u.tex.level = level; - sf = pipe->create_surface(pipe, tex, &tmpl); - if (!sf) - return; - - if (rtex->is_depth) { - unsigned clear; - float depth; - uint8_t stencil = 0; - - /* Depth is always present. */ - clear = PIPE_CLEAR_DEPTH; - desc->unpack_z_float(&depth, 0, data, 0, 1, 1); - - if (rtex->surface.has_stencil) { - clear |= PIPE_CLEAR_STENCIL; - desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1); - } - - pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil, - box->x, box->y, - box->width, box->height, false); - } else { - union pipe_color_union color; - - /* pipe_color_union requires the full vec4 representation. */ - if (util_format_is_pure_uint(tex->format)) - desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1); - else if (util_format_is_pure_sint(tex->format)) - desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1); - else - desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1); - - if (screen->is_format_supported(screen, tex->format, - tex->target, 0, - PIPE_BIND_RENDER_TARGET)) { - pipe->clear_render_target(pipe, sf, &color, - box->x, box->y, - box->width, box->height, false); - } else { - /* Software fallback - just for R9G9B9E5_FLOAT */ - util_clear_render_target(pipe, sf, &color, - box->x, box->y, - box->width, box->height); - } - } - pipe_surface_reference(&sf, NULL); -} - -unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap) -{ - const struct util_format_description *desc = util_format_description(format); - -#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz) - - if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ - return V_028C70_SWAP_STD; - - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) - return ~0U; - - switch (desc->nr_channels) { - case 1: - if (HAS_SWIZZLE(0,X)) - return V_028C70_SWAP_STD; /* X___ */ - else if (HAS_SWIZZLE(3,X)) - return V_028C70_SWAP_ALT_REV; /* ___X */ - break; - case 2: - if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) || - (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) || - (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y))) - return V_028C70_SWAP_STD; /* XY__ */ - else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) || - (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) || - (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X))) - /* YX__ */ - return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV); - else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y)) - return V_028C70_SWAP_ALT; /* X__Y */ - else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X)) - return V_028C70_SWAP_ALT_REV; /* Y__X */ - break; - case 3: - if (HAS_SWIZZLE(0,X)) - return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD); - else if (HAS_SWIZZLE(0,Z)) - return V_028C70_SWAP_STD_REV; /* ZYX */ - break; - case 4: - /* check the middle channels, the 1st and 4th channel can be NONE */ - if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) { - return V_028C70_SWAP_STD; /* XYZW */ - } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) { - return V_028C70_SWAP_STD_REV; /* WZYX */ - } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) { - return V_028C70_SWAP_ALT; /* ZYXW */ - } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) { - /* YZWX */ - if (desc->is_array) - return V_028C70_SWAP_ALT_REV; - else - return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV); - } - break; - } - return ~0U; -} - -/* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */ - -static void vi_dcc_clean_up_context_slot(struct r600_common_context *rctx, - int slot) -{ - int i; - - if (rctx->dcc_stats[slot].query_active) - vi_separate_dcc_stop_query(&rctx->b, - rctx->dcc_stats[slot].tex); - - for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats[slot].ps_stats); i++) - if (rctx->dcc_stats[slot].ps_stats[i]) { - rctx->b.destroy_query(&rctx->b, - rctx->dcc_stats[slot].ps_stats[i]); - rctx->dcc_stats[slot].ps_stats[i] = NULL; - } - - r600_texture_reference(&rctx->dcc_stats[slot].tex, NULL); -} - -/** - * Return the per-context slot where DCC statistics queries for the texture live. - */ -static unsigned vi_get_context_dcc_stats_index(struct r600_common_context *rctx, - struct r600_texture *tex) -{ - int i, empty_slot = -1; - - /* Remove zombie textures (textures kept alive by this array only). */ - for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) - if (rctx->dcc_stats[i].tex && - rctx->dcc_stats[i].tex->resource.b.b.reference.count == 1) - vi_dcc_clean_up_context_slot(rctx, i); - - /* Find the texture. */ - for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) { - /* Return if found. */ - if (rctx->dcc_stats[i].tex == tex) { - rctx->dcc_stats[i].last_use_timestamp = os_time_get(); - return i; - } - - /* Record the first seen empty slot. */ - if (empty_slot == -1 && !rctx->dcc_stats[i].tex) - empty_slot = i; - } - - /* Not found. Remove the oldest member to make space in the array. */ - if (empty_slot == -1) { - int oldest_slot = 0; - - /* Find the oldest slot. */ - for (i = 1; i < ARRAY_SIZE(rctx->dcc_stats); i++) - if (rctx->dcc_stats[oldest_slot].last_use_timestamp > - rctx->dcc_stats[i].last_use_timestamp) - oldest_slot = i; - - /* Clean up the oldest slot. */ - vi_dcc_clean_up_context_slot(rctx, oldest_slot); - empty_slot = oldest_slot; - } - - /* Add the texture to the new slot. */ - r600_texture_reference(&rctx->dcc_stats[empty_slot].tex, tex); - rctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get(); - return empty_slot; -} - -static struct pipe_query * -vi_create_resuming_pipestats_query(struct pipe_context *ctx) -{ - struct r600_query_hw *query = (struct r600_query_hw*) - ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0); - - query->flags |= R600_QUERY_HW_FLAG_BEGIN_RESUMES; - return (struct pipe_query*)query; -} - -/** - * Called when binding a color buffer. - */ -void vi_separate_dcc_start_query(struct pipe_context *ctx, - struct r600_texture *tex) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - unsigned i = vi_get_context_dcc_stats_index(rctx, tex); - - assert(!rctx->dcc_stats[i].query_active); - - if (!rctx->dcc_stats[i].ps_stats[0]) - rctx->dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(ctx); - - /* begin or resume the query */ - ctx->begin_query(ctx, rctx->dcc_stats[i].ps_stats[0]); - rctx->dcc_stats[i].query_active = true; -} - -/** - * Called when unbinding a color buffer. - */ -void vi_separate_dcc_stop_query(struct pipe_context *ctx, - struct r600_texture *tex) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - unsigned i = vi_get_context_dcc_stats_index(rctx, tex); - - assert(rctx->dcc_stats[i].query_active); - assert(rctx->dcc_stats[i].ps_stats[0]); - - /* pause or end the query */ - ctx->end_query(ctx, rctx->dcc_stats[i].ps_stats[0]); - rctx->dcc_stats[i].query_active = false; -} - -static bool vi_should_enable_separate_dcc(struct r600_texture *tex) -{ - /* The minimum number of fullscreen draws per frame that is required - * to enable DCC. */ - return tex->ps_draw_ratio + tex->num_slow_clears >= 5; -} - -/* Called by fast clear. */ -static void vi_separate_dcc_try_enable(struct r600_common_context *rctx, - struct r600_texture *tex) -{ - /* The intent is to use this with shared displayable back buffers, - * but it's not strictly limited only to them. - */ - if (!tex->resource.b.is_shared || - !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) || - tex->resource.b.b.target != PIPE_TEXTURE_2D || - tex->resource.b.b.last_level > 0 || - !tex->surface.dcc_size) - return; - - if (tex->dcc_offset) - return; /* already enabled */ - - /* Enable the DCC stat gathering. */ - if (!tex->dcc_gather_statistics) { - tex->dcc_gather_statistics = true; - vi_separate_dcc_start_query(&rctx->b, tex); - } - - if (!vi_should_enable_separate_dcc(tex)) - return; /* stats show that DCC decompression is too expensive */ - - assert(tex->surface.num_dcc_levels); - assert(!tex->dcc_separate_buffer); - - r600_texture_discard_cmask(rctx->screen, tex); - - /* Get a DCC buffer. */ - if (tex->last_dcc_separate_buffer) { - assert(tex->dcc_gather_statistics); - assert(!tex->dcc_separate_buffer); - tex->dcc_separate_buffer = tex->last_dcc_separate_buffer; - tex->last_dcc_separate_buffer = NULL; - } else { - tex->dcc_separate_buffer = (struct r600_resource*) - si_aligned_buffer_create(rctx->b.screen, - R600_RESOURCE_FLAG_UNMAPPABLE, - PIPE_USAGE_DEFAULT, - tex->surface.dcc_size, - tex->surface.dcc_alignment); - if (!tex->dcc_separate_buffer) - return; - } - - /* dcc_offset is the absolute GPUVM address. */ - tex->dcc_offset = tex->dcc_separate_buffer->gpu_address; - - /* no need to flag anything since this is called by fast clear that - * flags framebuffer state - */ -} - -/** - * Called by pipe_context::flush_resource, the place where DCC decompression - * takes place. - */ -void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, - struct r600_texture *tex) -{ - struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct pipe_query *tmp; - unsigned i = vi_get_context_dcc_stats_index(rctx, tex); - bool query_active = rctx->dcc_stats[i].query_active; - bool disable = false; - - if (rctx->dcc_stats[i].ps_stats[2]) { - union pipe_query_result result; - - /* Read the results. */ - ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2], - true, &result); - si_query_hw_reset_buffers(rctx, - (struct r600_query_hw*) - rctx->dcc_stats[i].ps_stats[2]); - - /* Compute the approximate number of fullscreen draws. */ - tex->ps_draw_ratio = - result.pipeline_statistics.ps_invocations / - (tex->resource.b.b.width0 * tex->resource.b.b.height0); - rctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio; - - disable = tex->dcc_separate_buffer && - !vi_should_enable_separate_dcc(tex); - } - - tex->num_slow_clears = 0; - - /* stop the statistics query for ps_stats[0] */ - if (query_active) - vi_separate_dcc_stop_query(ctx, tex); - - /* Move the queries in the queue by one. */ - tmp = rctx->dcc_stats[i].ps_stats[2]; - rctx->dcc_stats[i].ps_stats[2] = rctx->dcc_stats[i].ps_stats[1]; - rctx->dcc_stats[i].ps_stats[1] = rctx->dcc_stats[i].ps_stats[0]; - rctx->dcc_stats[i].ps_stats[0] = tmp; - - /* create and start a new query as ps_stats[0] */ - if (query_active) - vi_separate_dcc_start_query(ctx, tex); - - if (disable) { - assert(!tex->last_dcc_separate_buffer); - tex->last_dcc_separate_buffer = tex->dcc_separate_buffer; - tex->dcc_separate_buffer = NULL; - tex->dcc_offset = 0; - /* no need to flag anything since this is called after - * decompression that re-sets framebuffer state - */ - } -} - -/* FAST COLOR CLEAR */ - -static void evergreen_set_clear_color(struct r600_texture *rtex, - enum pipe_format surface_format, - const union pipe_color_union *color) -{ - union util_color uc; - - memset(&uc, 0, sizeof(uc)); - - if (rtex->surface.bpe == 16) { - /* DCC fast clear only: - * CLEAR_WORD0 = R = G = B - * CLEAR_WORD1 = A - */ - assert(color->ui[0] == color->ui[1] && - color->ui[0] == color->ui[2]); - uc.ui[0] = color->ui[0]; - uc.ui[1] = color->ui[3]; - } else if (util_format_is_pure_uint(surface_format)) { - util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1); - } else if (util_format_is_pure_sint(surface_format)) { - util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1); - } else { - util_pack_color(color->f, surface_format, &uc); - } - - memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); -} - -static bool vi_get_fast_clear_parameters(enum pipe_format surface_format, - const union pipe_color_union *color, - uint32_t* reset_value, - bool* clear_words_needed) -{ - bool values[4] = {}; - int i; - bool main_value = false; - bool extra_value = false; - int extra_channel; - - /* This is needed to get the correct DCC clear value for luminance formats. - * 1) Get the linear format (because the next step can't handle L8_SRGB). - * 2) Convert luminance to red. (the real hw format for luminance) - */ - surface_format = util_format_linear(surface_format); - surface_format = util_format_luminance_to_red(surface_format); - - const struct util_format_description *desc = util_format_description(surface_format); - - if (desc->block.bits == 128 && - (color->ui[0] != color->ui[1] || - color->ui[0] != color->ui[2])) - return false; - - *clear_words_needed = true; - *reset_value = 0x20202020U; - - /* If we want to clear without needing a fast clear eliminate step, we - * can set each channel to 0 or 1 (or 0/max for integer formats). We - * have two sets of flags, one for the last or first channel(extra) and - * one for the other channels(main). - */ - - if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT || - surface_format == PIPE_FORMAT_B5G6R5_UNORM || - surface_format == PIPE_FORMAT_B5G6R5_SRGB || - util_format_is_alpha(surface_format)) { - extra_channel = -1; - } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { - if(si_translate_colorswap(surface_format, false) <= 1) - extra_channel = desc->nr_channels - 1; - else - extra_channel = 0; - } else - return true; - - for (i = 0; i < 4; ++i) { - int index = desc->swizzle[i] - PIPE_SWIZZLE_X; - - if (desc->swizzle[i] < PIPE_SWIZZLE_X || - desc->swizzle[i] > PIPE_SWIZZLE_W) - continue; - - if (desc->channel[i].pure_integer && - desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - /* Use the maximum value for clamping the clear color. */ - int max = u_bit_consecutive(0, desc->channel[i].size - 1); - - values[i] = color->i[i] != 0; - if (color->i[i] != 0 && MIN2(color->i[i], max) != max) - return true; - } else if (desc->channel[i].pure_integer && - desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { - /* Use the maximum value for clamping the clear color. */ - unsigned max = u_bit_consecutive(0, desc->channel[i].size); - - values[i] = color->ui[i] != 0U; - if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max) - return true; - } else { - values[i] = color->f[i] != 0.0F; - if (color->f[i] != 0.0F && color->f[i] != 1.0F) - return true; - } - - if (index == extra_channel) - extra_value = values[i]; - else - main_value = values[i]; - } - - for (int i = 0; i < 4; ++i) - if (values[i] != main_value && - desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel && - desc->swizzle[i] >= PIPE_SWIZZLE_X && - desc->swizzle[i] <= PIPE_SWIZZLE_W) - return true; - - *clear_words_needed = false; - if (main_value) - *reset_value |= 0x80808080U; - - if (extra_value) - *reset_value |= 0x40404040U; - return true; -} - -void vi_dcc_clear_level(struct r600_common_context *rctx, - struct r600_texture *rtex, - unsigned level, unsigned clear_value) -{ - struct pipe_resource *dcc_buffer; - uint64_t dcc_offset, clear_size; - - assert(vi_dcc_enabled(rtex, level)); - - if (rtex->dcc_separate_buffer) { - dcc_buffer = &rtex->dcc_separate_buffer->b.b; - dcc_offset = 0; - } else { - dcc_buffer = &rtex->resource.b.b; - dcc_offset = rtex->dcc_offset; - } - - if (rctx->chip_class >= GFX9) { - /* Mipmap level clears aren't implemented. */ - assert(rtex->resource.b.b.last_level == 0); - /* MSAA needs a different clear size. */ - assert(rtex->resource.b.b.nr_samples <= 1); - clear_size = rtex->surface.dcc_size; - } else { - unsigned num_layers = util_max_layer(&rtex->resource.b.b, level) + 1; - - dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset; - clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size * - num_layers; - } - - rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, clear_size, - clear_value, R600_COHERENCY_CB_META); -} - -/* Set the same micro tile mode as the destination of the last MSAA resolve. - * This allows hitting the MSAA resolve fast path, which requires that both - * src and dst micro tile modes match. - */ -static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - if (rtex->resource.b.is_shared || - rtex->resource.b.b.nr_samples <= 1 || - rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode) - return; - - assert(rscreen->chip_class >= GFX9 || - rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); - assert(rtex->resource.b.b.last_level == 0); - - if (rscreen->chip_class >= GFX9) { - /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */ - assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4); - - /* If you do swizzle_mode % 4, you'll get: - * 0 = Depth - * 1 = Standard, - * 2 = Displayable - * 3 = Rotated - * - * Depth-sample order isn't allowed: - */ - assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0); - - switch (rtex->last_msaa_resolve_target_micro_mode) { - case RADEON_MICRO_MODE_DISPLAY: - rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; - rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */ - break; - case RADEON_MICRO_MODE_THIN: - rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; - rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */ - break; - case RADEON_MICRO_MODE_ROTATED: - rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; - rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */ - break; - default: /* depth */ - assert(!"unexpected micro mode"); - return; - } - } else if (rscreen->chip_class >= CIK) { - /* These magic numbers were copied from addrlib. It doesn't use - * any definitions for them either. They are all 2D_TILED_THIN1 - * modes with different bpp and micro tile mode. - */ - switch (rtex->last_msaa_resolve_target_micro_mode) { - case RADEON_MICRO_MODE_DISPLAY: - rtex->surface.u.legacy.tiling_index[0] = 10; - break; - case RADEON_MICRO_MODE_THIN: - rtex->surface.u.legacy.tiling_index[0] = 14; - break; - case RADEON_MICRO_MODE_ROTATED: - rtex->surface.u.legacy.tiling_index[0] = 28; - break; - default: /* depth, thick */ - assert(!"unexpected micro mode"); - return; - } - } else { /* SI */ - switch (rtex->last_msaa_resolve_target_micro_mode) { - case RADEON_MICRO_MODE_DISPLAY: - switch (rtex->surface.bpe) { - case 1: - rtex->surface.u.legacy.tiling_index[0] = 10; - break; - case 2: - rtex->surface.u.legacy.tiling_index[0] = 11; - break; - default: /* 4, 8 */ - rtex->surface.u.legacy.tiling_index[0] = 12; - break; - } - break; - case RADEON_MICRO_MODE_THIN: - switch (rtex->surface.bpe) { - case 1: - rtex->surface.u.legacy.tiling_index[0] = 14; - break; - case 2: - rtex->surface.u.legacy.tiling_index[0] = 15; - break; - case 4: - rtex->surface.u.legacy.tiling_index[0] = 16; - break; - default: /* 8, 16 */ - rtex->surface.u.legacy.tiling_index[0] = 17; - break; - } - break; - default: /* depth, thick */ - assert(!"unexpected micro mode"); - return; - } - } - - rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode; - - p_atomic_inc(&rscreen->dirty_tex_counter); -} - -void si_do_fast_color_clear(struct r600_common_context *rctx, - struct pipe_framebuffer_state *fb, - struct r600_atom *fb_state, - unsigned *buffers, ubyte *dirty_cbufs, - const union pipe_color_union *color) -{ - int i; - - /* This function is broken in BE, so just disable this path for now */ -#ifdef PIPE_ARCH_BIG_ENDIAN - return; -#endif - - if (rctx->render_cond) - return; - - for (i = 0; i < fb->nr_cbufs; i++) { - struct r600_texture *tex; - unsigned clear_bit = PIPE_CLEAR_COLOR0 << i; - - if (!fb->cbufs[i]) - continue; - - /* if this colorbuffer is not being cleared */ - if (!(*buffers & clear_bit)) - continue; - - tex = (struct r600_texture *)fb->cbufs[i]->texture; - - /* the clear is allowed if all layers are bound */ - if (fb->cbufs[i]->u.tex.first_layer != 0 || - fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) { - continue; - } - - /* cannot clear mipmapped textures */ - if (fb->cbufs[i]->texture->last_level != 0) { - continue; - } - - /* only supported on tiled surfaces */ - if (tex->surface.is_linear) { - continue; - } - - /* shared textures can't use fast clear without an explicit flush, - * because there is no way to communicate the clear color among - * all clients - */ - if (tex->resource.b.is_shared && - !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) - continue; - - /* fast color clear with 1D tiling doesn't work on old kernels and CIK */ - if (rctx->chip_class == CIK && - tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && - rctx->screen->info.drm_major == 2 && - rctx->screen->info.drm_minor < 38) { - continue; - } - - /* Fast clear is the most appropriate place to enable DCC for - * displayable surfaces. - */ - if (rctx->chip_class >= VI && - !(rctx->screen->debug_flags & DBG(NO_DCC_FB))) { - vi_separate_dcc_try_enable(rctx, tex); - - /* RB+ isn't supported with a CMASK clear only on Stoney, - * so all clears are considered to be hypothetically slow - * clears, which is weighed when determining whether to - * enable separate DCC. - */ - if (tex->dcc_gather_statistics && - rctx->family == CHIP_STONEY) - tex->num_slow_clears++; - } - - /* Try to clear DCC first, otherwise try CMASK. */ - if (vi_dcc_enabled(tex, 0)) { - uint32_t reset_value; - bool clear_words_needed; - - if (rctx->screen->debug_flags & DBG(NO_DCC_CLEAR)) - continue; - - if (!vi_get_fast_clear_parameters(fb->cbufs[i]->format, - color, &reset_value, - &clear_words_needed)) - continue; - - vi_dcc_clear_level(rctx, tex, 0, reset_value); - - unsigned level_bit = 1 << fb->cbufs[i]->u.tex.level; - if (clear_words_needed) { - bool need_compressed_update = !tex->dirty_level_mask; - - tex->dirty_level_mask |= level_bit; - - if (need_compressed_update) - p_atomic_inc(&rctx->screen->compressed_colortex_counter); - } - tex->separate_dcc_dirty = true; - } else { - /* 128-bit formats are unusupported */ - if (tex->surface.bpe > 8) { - continue; - } - - /* RB+ doesn't work with CMASK fast clear on Stoney. */ - if (rctx->family == CHIP_STONEY) - continue; - - /* ensure CMASK is enabled */ - r600_texture_alloc_cmask_separate(rctx->screen, tex); - if (tex->cmask.size == 0) { - continue; - } - - /* Do the fast clear. */ - rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, - tex->cmask.offset, tex->cmask.size, 0, - R600_COHERENCY_CB_META); - - bool need_compressed_update = !tex->dirty_level_mask; - - tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; - - if (need_compressed_update) - p_atomic_inc(&rctx->screen->compressed_colortex_counter); - } - - /* We can change the micro tile mode before a full clear. */ - si_set_optimal_micro_tile_mode(rctx->screen, tex); - - evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); - - if (dirty_cbufs) - *dirty_cbufs |= 1 << i; - rctx->set_atom_dirty(rctx, fb_state, true); - *buffers &= ~clear_bit; - } -} - -static struct pipe_memory_object * -r600_memobj_from_handle(struct pipe_screen *screen, - struct winsys_handle *whandle, - bool dedicated) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object); - struct pb_buffer *buf = NULL; - uint32_t stride, offset; - - if (!memobj) - return NULL; - - buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, - &stride, &offset); - if (!buf) { - free(memobj); - return NULL; - } - - memobj->b.dedicated = dedicated; - memobj->buf = buf; - memobj->stride = stride; - memobj->offset = offset; - - return (struct pipe_memory_object *)memobj; - -} - -static void -r600_memobj_destroy(struct pipe_screen *screen, - struct pipe_memory_object *_memobj) -{ - struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj; - - pb_reference(&memobj->buf, NULL); - free(memobj); -} - -static struct pipe_resource * -r600_texture_from_memobj(struct pipe_screen *screen, - const struct pipe_resource *templ, - struct pipe_memory_object *_memobj, - uint64_t offset) -{ - int r; - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj; - struct r600_texture *rtex; - struct radeon_surf surface = {}; - struct radeon_bo_metadata metadata = {}; - enum radeon_surf_mode array_mode; - bool is_scanout; - struct pb_buffer *buf = NULL; - - if (memobj->b.dedicated) { - rscreen->ws->buffer_get_metadata(memobj->buf, &metadata); - r600_surface_import_metadata(rscreen, &surface, &metadata, - &array_mode, &is_scanout); - } else { - /** - * The bo metadata is unset for un-dedicated images. So we fall - * back to linear. See answer to question 5 of the - * VK_KHX_external_memory spec for some details. - * - * It is possible that this case isn't going to work if the - * surface pitch isn't correctly aligned by default. - * - * In order to support it correctly we require multi-image - * metadata to be syncrhonized between radv and radeonsi. The - * semantics of associating multiple image metadata to a memory - * object on the vulkan export side are not concretely defined - * either. - * - * All the use cases we are aware of at the moment for memory - * objects use dedicated allocations. So lets keep the initial - * implementation simple. - * - * A possible alternative is to attempt to reconstruct the - * tiling information when the TexParameter TEXTURE_TILING_EXT - * is set. - */ - array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - is_scanout = false; - - } - - r = r600_init_surface(rscreen, &surface, templ, - array_mode, memobj->stride, - offset, true, is_scanout, - false, false); - if (r) - return NULL; - - rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface); - if (!rtex) - return NULL; - - /* r600_texture_create_object doesn't increment refcount of - * memobj->buf, so increment it here. - */ - pb_reference(&buf, memobj->buf); - - rtex->resource.b.is_shared = true; - rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE; - - if (rscreen->apply_opaque_metadata) - rscreen->apply_opaque_metadata(rscreen, rtex, &metadata); - - return &rtex->resource.b.b; -} - -static bool si_check_resource_capability(struct pipe_screen *screen, - struct pipe_resource *resource, - unsigned bind) -{ - struct r600_texture *tex = (struct r600_texture*)resource; - - /* Buffers only support the linear flag. */ - if (resource->target == PIPE_BUFFER) - return (bind & ~PIPE_BIND_LINEAR) == 0; - - if (bind & PIPE_BIND_LINEAR && !tex->surface.is_linear) - return false; - - if (bind & PIPE_BIND_SCANOUT && !tex->surface.is_displayable) - return false; - - /* TODO: PIPE_BIND_CURSOR - do we care? */ - return true; -} - -void si_init_screen_texture_functions(struct r600_common_screen *rscreen) -{ - rscreen->b.resource_from_handle = r600_texture_from_handle; - rscreen->b.resource_get_handle = r600_texture_get_handle; - rscreen->b.resource_from_memobj = r600_texture_from_memobj; - rscreen->b.memobj_create_from_handle = r600_memobj_from_handle; - rscreen->b.memobj_destroy = r600_memobj_destroy; - rscreen->b.check_resource_capability = si_check_resource_capability; -} - -void si_init_context_texture_functions(struct r600_common_context *rctx) -{ - rctx->b.create_surface = r600_create_surface; - rctx->b.surface_destroy = r600_surface_destroy; - rctx->b.clear_texture = r600_clear_texture; -} diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c index e91cb2155..0f3b43de8 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c @@ -25,12 +25,6 @@ * **************************************************************************/ -/* - * Authors: - * Christian König <christian.koenig@amd.com> - * - */ - #include <sys/types.h> #include <assert.h> #include <errno.h> @@ -45,7 +39,7 @@ #include "vl/vl_defines.h" #include "vl/vl_mpeg12_decoder.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_uvd.h" @@ -73,7 +67,7 @@ struct ruvd_decoder { struct pipe_screen *screen; struct radeon_winsys* ws; - struct radeon_winsys_cs* cs; + struct radeon_cmdbuf* cs; unsigned cur_buffer; @@ -122,8 +116,7 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, int reloc_idx; reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, - RADEON_PRIO_UVD); + domain, 0); if (!dec->use_legacy) { uint64_t addr; addr = dec->ws->buffer_get_virtual_address(buf); @@ -337,7 +330,7 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_ static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec) { - if (((struct r600_common_screen*)dec->screen)->family < CHIP_VEGA10) + if (((struct si_screen*)dec->screen)->info.family < CHIP_VEGA10) return 16; else return 32; @@ -402,7 +395,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec) max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); dpb_size = image_size * max_references; if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) { + (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) { dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); dpb_size += align(width_in_mb * height_in_mb * 32, alignment); } @@ -412,7 +405,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec) // reference picture buffer dpb_size = image_size * max_references; if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) { + (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) { // macroblock context buffer dpb_size += width_in_mb * height_in_mb * max_references * 192; // IT surface buffer @@ -612,7 +605,7 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) + if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO) result.sps_info_flags |= 1 << 9; if (pic->UseRefPicList == true) result.sps_info_flags |= 1 << 10; @@ -971,139 +964,6 @@ static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, return result; } -static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic) -{ - int size = 0, saved_size, len_pos, i; - uint16_t *bs; - uint8_t *buf = dec->bs_ptr; - - /* SOI */ - buf[size++] = 0xff; - buf[size++] = 0xd8; - - /* DQT */ - buf[size++] = 0xff; - buf[size++] = 0xdb; - - len_pos = size++; - size++; - - for (i = 0; i < 4; ++i) { - if (pic->quantization_table.load_quantiser_table[i] == 0) - continue; - - buf[size++] = i; - memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64); - size += 64; - } - - bs = (uint16_t*)&buf[len_pos]; - *bs = util_bswap16(size - 4); - - saved_size = size; - - /* DHT */ - buf[size++] = 0xff; - buf[size++] = 0xc4; - - len_pos = size++; - size++; - - for (i = 0; i < 2; ++i) { - if (pic->huffman_table.load_huffman_table[i] == 0) - continue; - - buf[size++] = 0x00 | i; - memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16); - size += 16; - memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12); - size += 12; - } - - for (i = 0; i < 2; ++i) { - if (pic->huffman_table.load_huffman_table[i] == 0) - continue; - - buf[size++] = 0x10 | i; - memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16); - size += 16; - memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162); - size += 162; - } - - bs = (uint16_t*)&buf[len_pos]; - *bs = util_bswap16(size - saved_size - 2); - - saved_size = size; - - /* DRI */ - if (pic->slice_parameter.restart_interval) { - buf[size++] = 0xff; - buf[size++] = 0xdd; - buf[size++] = 0x00; - buf[size++] = 0x04; - bs = (uint16_t*)&buf[size++]; - *bs = util_bswap16(pic->slice_parameter.restart_interval); - saved_size = ++size; - } - - /* SOF */ - buf[size++] = 0xff; - buf[size++] = 0xc0; - - len_pos = size++; - size++; - - buf[size++] = 0x08; - - bs = (uint16_t*)&buf[size++]; - *bs = util_bswap16(pic->picture_parameter.picture_height); - size++; - - bs = (uint16_t*)&buf[size++]; - *bs = util_bswap16(pic->picture_parameter.picture_width); - size++; - - buf[size++] = pic->picture_parameter.num_components; - - for (i = 0; i < pic->picture_parameter.num_components; ++i) { - buf[size++] = pic->picture_parameter.components[i].component_id; - buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 | - pic->picture_parameter.components[i].v_sampling_factor; - buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector; - } - - bs = (uint16_t*)&buf[len_pos]; - *bs = util_bswap16(size - saved_size - 2); - - saved_size = size; - - /* SOS */ - buf[size++] = 0xff; - buf[size++] = 0xda; - - len_pos = size++; - size++; - - buf[size++] = pic->slice_parameter.num_components; - - for (i = 0; i < pic->slice_parameter.num_components; ++i) { - buf[size++] = pic->slice_parameter.components[i].component_selector; - buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 | - pic->slice_parameter.components[i].ac_table_selector; - } - - buf[size++] = 0x00; - buf[size++] = 0x3f; - buf[size++] = 0x00; - - bs = (uint16_t*)&buf[len_pos]; - *bs = util_bswap16(size - saved_size - 2); - - dec->bs_ptr += size; - dec->bs_size += size; -} - /** * destroy this video decoder */ @@ -1182,7 +1042,6 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, const unsigned *sizes) { struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - enum pipe_video_format format = u_reduce_video_profile(picture->profile); unsigned i; assert(decoder); @@ -1190,16 +1049,10 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, if (!dec->bs_ptr) return; - if (format == PIPE_VIDEO_FORMAT_JPEG) - get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture); - for (i = 0; i < num_buffers; ++i) { struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; unsigned new_size = dec->bs_size + sizes[i]; - if (format == PIPE_VIDEO_FORMAT_JPEG) - new_size += 2; /* save for EOI */ - if (new_size > buf->res->buf->size) { dec->ws->buffer_unmap(buf->res->buf); if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { @@ -1219,13 +1072,6 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, dec->bs_size += sizes[i]; dec->bs_ptr += sizes[i]; } - - if (format == PIPE_VIDEO_FORMAT_JPEG) { - ((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */ - ((uint8_t *)dec->bs_ptr)[1] = 0xd9; - dec->bs_size += 2; - dec->bs_ptr += 2; - } } /** @@ -1275,11 +1121,11 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder, dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); if (dec->stream_type == RUVD_CODEC_H264_PERF && - ((struct r600_common_screen*)dec->screen)->family >= CHIP_POLARIS10) + ((struct si_screen*)dec->screen)->info.family >= CHIP_POLARIS10) dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); - if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY) + if (((struct si_screen*)dec->screen)->info.family >= CHIP_STONEY) dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2; switch (u_reduce_video_profile(picture->profile)) { @@ -1351,7 +1197,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder, FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); set_reg(dec, dec->reg.cntl, 1); - flush(dec, RADEON_FLUSH_ASYNC); + flush(dec, PIPE_FLUSH_ASYNC); next_buffer(dec); } @@ -1369,17 +1215,14 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte const struct pipe_video_codec *templ, ruvd_set_dtb set_dtb) { - struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws; - struct r600_common_context *rctx = (struct r600_common_context*)context; + struct si_context *sctx = (struct si_context*)context; + struct radeon_winsys *ws = sctx->ws; unsigned dpb_size; unsigned width = templ->width, height = templ->height; unsigned bs_buf_size; - struct radeon_info info; struct ruvd_decoder *dec; int r, i; - ws->query_info(ws, &info); - switch(u_reduce_video_profile(templ->profile)) { case PIPE_VIDEO_FORMAT_MPEG12: if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) @@ -1405,7 +1248,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte if (!dec) return NULL; - if (info.drm_major < 3) + if (sctx->screen->info.drm_major < 3) dec->use_legacy = true; dec->base = *templ; @@ -1420,12 +1263,12 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte dec->base.end_frame = ruvd_end_frame; dec->base.flush = ruvd_flush; - dec->stream_type = profile2stream_type(dec, info.family); + dec->stream_type = profile2stream_type(dec, sctx->family); dec->set_dtb = set_dtb; dec->stream_handle = si_vid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL); + dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL); if (!dec->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; @@ -1433,7 +1276,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte for (i = 0; i < 16; i++) dec->render_pic_list[i] = NULL; - dec->fb_size = (info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : + dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE; bs_buf_size = width * height * (512 / (16 * 16)); for (i = 0; i < NUM_BUFFERS; ++i) { @@ -1466,7 +1309,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte si_vid_clear_buffer(context, &dec->dpb); } - if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) { + if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) { unsigned ctx_size = calc_ctx_size_h264_perf(dec); if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { RVID_ERR("Can't allocated context buffer.\n"); @@ -1475,7 +1318,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte si_vid_clear_buffer(context, &dec->ctx); } - if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) { + if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) { if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, UVD_SESSION_CONTEXT_SIZE, PIPE_USAGE_DEFAULT)) { @@ -1485,7 +1328,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte si_vid_clear_buffer(context, &dec->sessionctx); } - if (info.family >= CHIP_VEGA10) { + if (sctx->family >= CHIP_VEGA10) { dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15; dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15; dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15; @@ -1539,7 +1382,7 @@ static unsigned texture_offset(struct radeon_surf *surface, unsigned layer, default: case RUVD_SURFACE_TYPE_LEGACY: return surface->u.legacy.level[0].offset + - layer * surface->u.legacy.level[0].slice_size; + layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4; break; case RUVD_SURFACE_TYPE_GFX9: return surface->u.gfx9.surf_offset + diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h index 2bb2ce21d..583b4d5e4 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h @@ -25,12 +25,6 @@ * **************************************************************************/ -/* - * Authors: - * Christian König <christian.koenig@amd.com> - * - */ - #ifndef RADEON_UVD_H #define RADEON_UVD_H diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c index 9e98741fb..8972253c7 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c @@ -25,12 +25,6 @@ * **************************************************************************/ -/* - * Authors: - * Christian König <christian.koenig@amd.com> - * - */ - #include <stdio.h> #include "pipe/p_video_codec.h" @@ -40,7 +34,7 @@ #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vce.h" @@ -59,7 +53,7 @@ */ static void flush(struct rvce_encoder *enc) { - enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL); + enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); enc->task_info_idx = 0; enc->bs_idx = 0; } @@ -225,10 +219,10 @@ struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc) void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset, signed *chroma_offset) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen; + struct si_screen *sscreen = (struct si_screen *)enc->screen; unsigned pitch, vpitch, fsize; - if (rscreen->chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); } else { @@ -394,18 +388,18 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, struct radeon_winsys* ws, rvce_get_buffer get_buffer) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen; - struct r600_common_context *rctx = (struct r600_common_context*)context; + struct si_screen *sscreen = (struct si_screen *)context->screen; + struct si_context *sctx = (struct si_context*)context; struct rvce_encoder *enc; struct pipe_video_buffer *tmp_buf, templat = {}; struct radeon_surf *tmp_surf; unsigned cpb_size; - if (!rscreen->info.vce_fw_version) { + if (!sscreen->info.vce_fw_version) { RVID_ERR("Kernel doesn't supports VCE!\n"); return NULL; - } else if (!si_vce_is_fw_version_supported(rscreen)) { + } else if (!si_vce_is_fw_version_supported(sscreen)) { RVID_ERR("Unsupported VCE fw version loaded!\n"); return NULL; } @@ -414,20 +408,21 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, if (!enc) return NULL; - if (rscreen->info.drm_major == 3) + if (sscreen->info.drm_major == 3) enc->use_vm = true; - if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) || - rscreen->info.drm_major == 3) + if ((sscreen->info.drm_major == 2 && sscreen->info.drm_minor >= 42) || + sscreen->info.drm_major == 3) enc->use_vui = true; - if (rscreen->info.family >= CHIP_TONGA && - rscreen->info.family != CHIP_STONEY && - rscreen->info.family != CHIP_POLARIS11 && - rscreen->info.family != CHIP_POLARIS12) + if (sscreen->info.family >= CHIP_TONGA && + sscreen->info.family != CHIP_STONEY && + sscreen->info.family != CHIP_POLARIS11 && + sscreen->info.family != CHIP_POLARIS12 && + sscreen->info.family != CHIP_VEGAM) enc->dual_pipe = true; /* TODO enable B frame with dual instance */ - if ((rscreen->info.family >= CHIP_TONGA) && + if ((sscreen->info.family >= CHIP_TONGA) && (templ->max_references == 1) && - (rscreen->info.vce_harvest_config == 0)) + (sscreen->info.vce_harvest_config == 0)) enc->dual_inst = true; enc->base = *templ; @@ -443,7 +438,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc); + enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc); if (!enc->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; @@ -465,7 +460,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); - cpb_size = (rscreen->chip_class < GFX9) ? + cpb_size = (sscreen->info.chip_class < GFX9) ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * align(tmp_surf->u.legacy.level[0].nblk_y, 32) : @@ -489,7 +484,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, reset_cpb(enc); - switch (rscreen->info.vce_fw_version) { + switch (sscreen->info.vce_fw_version) { case FW_40_2_2: si_vce_40_2_2_init(enc); si_get_pic_param = si_vce_40_2_2_get_param; @@ -511,7 +506,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, break; default: - if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) { + if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) { si_vce_52_init(enc); si_get_pic_param = si_vce_52_get_param; } else @@ -534,9 +529,9 @@ error: /** * check if kernel has the right fw version loaded */ -bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen) +bool si_vce_is_fw_version_supported(struct si_screen *sscreen) { - switch (rscreen->info.vce_fw_version) { + switch (sscreen->info.vce_fw_version) { case FW_40_2_2: case FW_50_0_1: case FW_50_1_2: @@ -547,7 +542,7 @@ bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen) case FW_52_8_3: return true; default: - if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) + if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) return true; else return false; @@ -564,7 +559,7 @@ void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, int reloc_idx; reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, RADEON_PRIO_VCE); + domain, 0); if (enc->use_vm) { uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c index 3be38bca1..e17468c90 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c @@ -25,12 +25,6 @@ * **************************************************************************/ -/* - * Authors: - * Christian König <christian.koenig@amd.com> - * - */ - #include <stdio.h> #include "pipe/p_video_codec.h" @@ -40,12 +34,10 @@ #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "si_pipe.h" #include "radeon_video.h" #include "radeon_vce.h" -static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 }; - static void session(struct rvce_encoder *enc) { RVCE_BEGIN(0x00000001); // session cmd @@ -88,8 +80,7 @@ static void create(struct rvce_encoder *enc) RVCE_BEGIN(0x01000001); // create cmd RVCE_CS(0x00000000); // encUseCircularBuffer - RVCE_CS(profiles[enc->base.profile - - PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile + RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile RVCE_CS(enc->base.level); // encLevel RVCE_CS(0x00000000); // encPicStructRestriction RVCE_CS(enc->base.width); // encImageWidth diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c index 96bb557eb..f4cbc9bb8 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c @@ -25,12 +25,6 @@ * **************************************************************************/ -/* - * Authors: - * Christian König <christian.koenig@amd.com> - * - */ - #include <stdio.h> #include "pipe/p_video_codec.h" @@ -40,7 +34,7 @@ #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "si_pipe.h" #include "radeon_video.h" #include "radeon_vce.h" diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c index 09fe424fd..fc7ddc62a 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c @@ -34,12 +34,10 @@ #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vce.h" -static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 }; - static void get_rate_control_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method; @@ -162,24 +160,23 @@ void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_ enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x00000201; else enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x01000201; - enc->enc_pic.is_idr = pic->is_idr; + enc->enc_pic.is_idr = (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); } static void create(struct rvce_encoder *enc) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen; + struct si_screen *sscreen = (struct si_screen *)enc->screen; enc->task_info(enc, 0x00000000, 0, 0, 0); RVCE_BEGIN(0x01000001); // create cmd RVCE_CS(enc->enc_pic.ec.enc_use_circular_buffer); - RVCE_CS(profiles[enc->base.profile - - PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile + RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile RVCE_CS(enc->base.level); // encLevel RVCE_CS(enc->enc_pic.ec.enc_pic_struct_restriction); RVCE_CS(enc->base.width); // encImageWidth RVCE_CS(enc->base.height); // encImageHeight - if (rscreen->chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw @@ -200,7 +197,7 @@ static void create(struct rvce_encoder *enc) static void encode(struct rvce_encoder *enc) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen; + struct si_screen *sscreen = (struct si_screen *)enc->screen; signed luma_offset, chroma_offset, bs_offset; unsigned dep, bs_idx = enc->bs_idx++; int i; @@ -250,7 +247,7 @@ static void encode(struct rvce_encoder *enc) RVCE_CS(enc->enc_pic.eo.end_of_sequence); RVCE_CS(enc->enc_pic.eo.end_of_stream); - if (rscreen->chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c index 59724869b..75ef4a5d4 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c @@ -35,13 +35,15 @@ #include "vl/vl_mpeg12_decoder.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vcn_dec.h" +#include "vl/vl_probs_table.h" #define FB_BUFFER_OFFSET 0x1000 #define FB_BUFFER_SIZE 2048 #define IT_SCALING_TABLE_SIZE 992 +#define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256) #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024) #define RDECODE_GPCOM_VCPU_CMD 0x2070c @@ -49,37 +51,10 @@ #define RDECODE_GPCOM_VCPU_DATA1 0x20714 #define RDECODE_ENGINE_CNTL 0x20718 -#define NUM_BUFFERS 4 #define NUM_MPEG2_REFS 6 #define NUM_H264_REFS 17 #define NUM_VC1_REFS 5 - -struct radeon_decoder { - struct pipe_video_codec base; - - unsigned stream_handle; - unsigned stream_type; - unsigned frame_number; - - struct pipe_screen *screen; - struct radeon_winsys *ws; - struct radeon_winsys_cs *cs; - - void *msg; - uint32_t *fb; - uint8_t *it; - void *bs_ptr; - - struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS]; - struct rvid_buffer bs_buffers[NUM_BUFFERS]; - struct rvid_buffer dpb; - struct rvid_buffer ctx; - struct rvid_buffer sessionctx; - - unsigned bs_size; - unsigned cur_buffer; - void *render_pic_list[16]; -}; +#define NUM_VP9_REFS 8 static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec, struct pipe_h264_picture_desc *pic) @@ -200,7 +175,7 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec, result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) + if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO) result.sps_info_flags |= 1 << 9; if (pic->UseRefPicList == true) result.sps_info_flags |= 1 << 10; @@ -358,6 +333,205 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec, return result; } +static void fill_probs_table(void *ptr) +{ + rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr; + + memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4)); + memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8)); + memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16)); + memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32)); + memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs)); + memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs)); + memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p)); + memcpy(probs->switchable_interp_prob, default_switchable_interp_prob, sizeof(default_switchable_interp_prob)); + memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs)); + memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs)); + memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs)); + memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p)); + memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p)); + memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p)); + memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32)); + memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16)); + memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8)); + memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints)); + memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components)); + memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t)); +} + +static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec, + struct pipe_video_buffer *target, + struct pipe_vp9_picture_desc *pic) +{ + rvcn_dec_message_vp9_t result; + unsigned i; + + memset(&result, 0, sizeof(result)); + + /* segment table */ + rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs); + + if (pic->picture_parameter.pic_fields.segmentation_enabled) { + for (i = 0; i < 8; ++i) { + prbs->seg.feature_data[i] = + (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) | + ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) | + ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24); + prbs->seg.feature_mask[i] = + (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) | + (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) | + (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) | + (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3); + } + + for (i = 0; i < 7; ++i) + prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i]; + + for (i = 0; i < 3; ++i) + prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i]; + + prbs->seg.abs_delta = 0; + } else + memset(&prbs->seg, 0, 256); + + result.frame_header_flags = + (pic->picture_parameter.pic_fields.frame_type << + RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.pic_fields.error_resilient_mode << + RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.pic_fields.intra_only << + RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.pic_fields.allow_high_precision_mv << + RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode << + RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.pic_fields.refresh_frame_context << + RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.pic_fields.segmentation_enabled << + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.pic_fields.segmentation_update_map << + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.pic_fields.segmentation_temporal_update << + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.mode_ref_delta_enabled << + RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK; + + result.frame_header_flags |= + (pic->picture_parameter.mode_ref_delta_update << + RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK; + + result.frame_header_flags |= ((dec->show_frame && + !pic->picture_parameter.pic_fields.error_resilient_mode) + << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK; + dec->show_frame = pic->picture_parameter.pic_fields.show_frame; + + result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type; + + result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx; + result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context; + + result.filter_level = pic->picture_parameter.filter_level; + result.sharpness_level = pic->picture_parameter.sharpness_level; + + for (i = 0; i < 8; ++i) + memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2); + + if (pic->picture_parameter.pic_fields.lossless_flag) { + result.base_qindex = 0; + result.y_dc_delta_q = 0; + result.uv_ac_delta_q = 0; + result.uv_dc_delta_q = 0; + } else { + result.base_qindex = pic->picture_parameter.base_qindex; + result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q; + result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q; + result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q; + } + + result.log2_tile_cols = pic->picture_parameter.log2_tile_columns; + result.log2_tile_rows = pic->picture_parameter.log2_tile_rows; + result.chroma_format = 1; + result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 + = (pic->picture_parameter.bit_depth - 8); + + result.vp9_frame_size = align(dec->bs_size, 128); + result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes; + result.compressed_header_size = pic->picture_parameter.first_partition_size; + + assert(dec->base.max_references + 1 <= 16); + + for (i = 0 ; i < dec->base.max_references + 1 ; ++i) { + if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) { + result.curr_pic_idx = + (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base); + break; + } else if (!dec->render_pic_list[i]) { + dec->render_pic_list[i] = target; + result.curr_pic_idx = dec->ref_idx; + vl_video_buffer_set_associated_data(target, &dec->base, + (void *)(uintptr_t)dec->ref_idx++, + &radeon_dec_destroy_associated_data); + break; + } + } + + for (i = 0 ; i < 8; i++) { + result.ref_frame_map[i] = (pic->ref[i]) ? + (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : + 0x7f; + } + + result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame]; + result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias; + result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame]; + result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias; + result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame]; + result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias; + + if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) { + if (target->buffer_format == PIPE_FORMAT_P016) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.p010_mode = 0; + result.luma_10to8 = 1; + result.chroma_10to8 = 1; + } + } + + return result; +} + static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec) { unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); @@ -554,15 +728,15 @@ static rvcn_dec_message_mpeg4_asp_vld_t get_mpeg4_msg(struct radeon_decoder *dec result.vop_time_increment_resolution = pic->vop_time_increment_resolution; - result.short_video_header |= pic->short_video_header << 0; - result.interlaced |= pic->interlaced << 2; - result.load_intra_quant_mat |= 1 << 3; - result.load_nonintra_quant_mat |= 1 << 4; - result.quarter_sample |= pic->quarter_sample << 5; - result.complexity_estimation_disable |= 1 << 6; - result.resync_marker_disable |= pic->resync_marker_disable << 7; - result.newpred_enable |= 0 << 10; // - result.reduced_resolution_vop_enable |= 0 << 11; + result.short_video_header = pic->short_video_header; + result.interlaced = pic->interlaced; + result.load_intra_quant_mat = 1; + result.load_nonintra_quant_mat = 1; + result.quarter_sample = pic->quarter_sample; + result.complexity_estimation_disable = 1; + result.resync_marker_disable = pic->resync_marker_disable; + result.newpred_enable = 0; + result.reduced_resolution_vop_enable = 0; result.quant_type = pic->quant_type; @@ -603,10 +777,10 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, struct pipe_video_buffer *target, struct pipe_picture_desc *picture) { - struct r600_texture *luma = (struct r600_texture *) - ((struct vl_video_buffer *)target)->resources[0]; - struct r600_texture *chroma = (struct r600_texture *) - ((struct vl_video_buffer *)target)->resources[1]; + struct si_texture *luma = (struct si_texture *) + ((struct vl_video_buffer *)target)->resources[0]; + struct si_texture *chroma = (struct si_texture *) + ((struct vl_video_buffer *)target)->resources[1]; rvcn_dec_message_header_t *header; rvcn_dec_message_index_t *index; rvcn_dec_message_decode_t *decode; @@ -640,16 +814,16 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, index->size = sizeof(rvcn_dec_message_avc_t); index->filled = 0; - decode->stream_type = dec->stream_type;; + decode->stream_type = dec->stream_type; decode->decode_flags = 0x1; - decode->width_in_samples = dec->base.width;; - decode->height_in_samples = dec->base.height;; + decode->width_in_samples = dec->base.width; + decode->height_in_samples = dec->base.height; decode->bsd_size = align(dec->bs_size, 128); decode->dpb_size = dec->dpb.res->buf->size; decode->dt_size = - ((struct r600_resource *)((struct vl_video_buffer *)target)->resources[0])->buf->size + - ((struct r600_resource *)((struct vl_video_buffer *)target)->resources[1])->buf->size; + r600_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size + + r600_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size; decode->sct_size = 0; decode->sc_coeff_size = 0; @@ -736,6 +910,43 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, index->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD; break; } + case PIPE_VIDEO_FORMAT_VP9: { + rvcn_dec_message_vp9_t vp9 = + get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc*)picture); + + memcpy(codec, (void*)&vp9, sizeof(rvcn_dec_message_vp9_t)); + index->message_id = RDECODE_MESSAGE_VP9; + + if (dec->ctx.res == NULL) { + unsigned ctx_size; + uint8_t *ptr; + + /* default probability + probability data */ + ctx_size = 2304 * 5; + + /* SRE collocated context data */ + ctx_size += 32 * 2 * 64 * 64; + + /* SMP collocated context data */ + ctx_size += 9 * 64 * 2 * 64 * 64; + + /* SDB left tile pixel */ + ctx_size += 8 * 2 * 4096; + + if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) + ctx_size += 8 * 2 * 4096; + + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) + RVID_ERR("Can't allocated context buffer.\n"); + si_vid_clear_buffer(dec->base.context, &dec->ctx); + + /* ctx needs probs table */ + ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE); + fill_probs_table(ptr); + dec->ws->buffer_unmap(dec->ctx.res->buf); + } + break; + } default: assert(0); return NULL; @@ -744,7 +955,7 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, if (dec->ctx.res) decode->hw_ctxt_size = dec->ctx.res->buf->size; - return luma->resource.buf; + return luma->buffer.buf; } static void rvcn_dec_message_destroy(struct radeon_decoder *dec) @@ -791,7 +1002,7 @@ static void send_cmd(struct radeon_decoder *dec, unsigned cmd, uint64_t addr; dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, RADEON_PRIO_UVD); + domain, 0); addr = dec->ws->buffer_get_virtual_address(buf); addr = addr + off; @@ -807,14 +1018,20 @@ static bool have_it(struct radeon_decoder *dec) dec->stream_type == RDECODE_CODEC_H265; } +/* do the codec needs an probs buffer? */ +static bool have_probs(struct radeon_decoder *dec) +{ + return dec->stream_type == RDECODE_CODEC_VP9; +} + /* map the next available message/feedback/itscaling buffer */ -static void map_msg_fb_it_buf(struct radeon_decoder *dec) +static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec) { struct rvid_buffer* buf; uint8_t *ptr; /* grab the current message/feedback buffer */ - buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; /* and map it for CPU access */ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); @@ -825,6 +1042,8 @@ static void map_msg_fb_it_buf(struct radeon_decoder *dec) dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); if (have_it(dec)) dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); + else if (have_probs(dec)) + dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); } /* unmap and send a message command to the VCPU */ @@ -837,13 +1056,14 @@ static void send_msg_buf(struct radeon_decoder *dec) return; /* grab the current message buffer */ - buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; /* unmap the buffer */ dec->ws->buffer_unmap(buf->res->buf); dec->msg = NULL; dec->fb = NULL; dec->it = NULL; + dec->probs = NULL; if (dec->sessionctx.res) send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER, @@ -1019,6 +1239,18 @@ static unsigned calc_dpb_size(struct radeon_decoder *dec) dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); break; + case PIPE_VIDEO_FORMAT_VP9: + max_references = MAX2(max_references, 9); + + dpb_size = (4096 * 3000 * 3 / 2) * max_references; + if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) + dpb_size *= (3 / 2); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + dpb_size = 0; + break; + default: // something is missing here assert(0); @@ -1040,7 +1272,7 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder) assert(decoder); - map_msg_fb_it_buf(dec); + map_msg_fb_it_probs_buf(dec); rvcn_dec_message_destroy(dec); send_msg_buf(dec); @@ -1049,7 +1281,7 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder) dec->ws->cs_destroy(dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); si_vid_destroy_buffer(&dec->bs_buffers[i]); } @@ -1073,7 +1305,8 @@ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder, assert(decoder); frame = ++dec->frame_number; - vl_video_buffer_set_associated_data(target, decoder, (void *)frame, + if (dec->stream_type != RDECODE_CODEC_VP9) + vl_video_buffer_set_associated_data(target, decoder, (void *)frame, &radeon_dec_destroy_associated_data); dec->bs_size = 0; @@ -1139,28 +1372,22 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder, } /** - * end decoding of the current frame + * send cmd for vcn dec */ -static void radeon_dec_end_frame(struct pipe_video_codec *decoder, +void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target, struct pipe_picture_desc *picture) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; struct pb_buffer *dt; - struct rvid_buffer *msg_fb_it_buf, *bs_buf; - - assert(decoder); + struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf; - if (!dec->bs_ptr) - return; - - msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; bs_buf = &dec->bs_buffers[dec->cur_buffer]; memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size); dec->ws->buffer_unmap(bs_buf->res->buf); - map_msg_fb_it_buf(dec); + map_msg_fb_it_probs_buf(dec); dt = rvcn_dec_message_decode(dec, target, picture); rvcn_dec_message_feedback(dec); send_msg_buf(dec); @@ -1174,14 +1401,34 @@ static void radeon_dec_end_frame(struct pipe_video_codec *decoder, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, + send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf, FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); if (have_it(dec)) - send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf, + send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf, + FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + else if (have_probs(dec)) + send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf, FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); set_reg(dec, RDECODE_ENGINE_CNTL, 1); +} + +/** + * end decoding of the current frame + */ +static void radeon_dec_end_frame(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + struct radeon_decoder *dec = (struct radeon_decoder*)decoder; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + dec->send_cmd(dec, target, picture); - flush(dec, RADEON_FLUSH_ASYNC); + flush(dec, PIPE_FLUSH_ASYNC); next_buffer(dec); } @@ -1198,10 +1445,10 @@ static void radeon_dec_flush(struct pipe_video_codec *decoder) struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, const struct pipe_video_codec *templ) { - struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws; - struct r600_common_context *rctx = (struct r600_common_context*)context; + struct si_context *sctx = (struct si_context*)context; + struct radeon_winsys *ws = sctx->ws; unsigned width = templ->width, height = templ->height; - unsigned dpb_size, bs_buf_size, stream_type = 0; + unsigned dpb_size, bs_buf_size, stream_type = 0, ring = RING_VCN_DEC; struct radeon_decoder *dec; int r, i; @@ -1227,6 +1474,13 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, case PIPE_VIDEO_FORMAT_HEVC: stream_type = RDECODE_CODEC_H265; break; + case PIPE_VIDEO_FORMAT_VP9: + stream_type = RDECODE_CODEC_VP9; + break; + case PIPE_VIDEO_FORMAT_JPEG: + stream_type = RDECODE_CODEC_JPEG; + ring = RING_VCN_JPEG; + break; default: assert(0); break; @@ -1253,7 +1507,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, dec->stream_handle = si_vid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(rctx->ctx, RING_VCN_DEC, NULL, NULL); + dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL); if (!dec->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; @@ -1263,12 +1517,14 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, dec->render_pic_list[i] = NULL; bs_buf_size = width * height * (512 / (16 * 16)); for (i = 0; i < NUM_BUFFERS; ++i) { - unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; + unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; if (have_it(dec)) - msg_fb_it_size += IT_SCALING_TABLE_SIZE; + msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE; + else if (have_probs(dec)) + msg_fb_it_probs_size += VP9_PROBS_TABLE_SIZE; /* use vram to improve performance, workaround an unknown bug */ - if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], - msg_fb_it_size, PIPE_USAGE_DEFAULT)) { + if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i], + msg_fb_it_probs_size, PIPE_USAGE_DEFAULT)) { RVID_ERR("Can't allocated message buffers.\n"); goto error; } @@ -1279,18 +1535,29 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, goto error; } - si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]); + si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]); si_vid_clear_buffer(context, &dec->bs_buffers[i]); - } - dpb_size = calc_dpb_size(dec); + if (have_probs(dec)) { + struct rvid_buffer* buf; + void *ptr; - if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated dpb.\n"); - goto error; + buf = &dec->msg_fb_it_probs_buffers[i]; + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE; + fill_probs_table(ptr); + dec->ws->buffer_unmap(buf->res->buf); + } } - si_vid_clear_buffer(context, &dec->dpb); + dpb_size = calc_dpb_size(dec); + if (dpb_size) { + if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated dpb.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->dpb); + } if (dec->stream_type == RDECODE_CODEC_H264_PERF) { unsigned ctx_size = calc_ctx_size_h264_perf(dec); @@ -1309,7 +1576,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, } si_vid_clear_buffer(context, &dec->sessionctx); - map_msg_fb_it_buf(dec); + map_msg_fb_it_probs_buf(dec); rvcn_dec_message_create(dec); send_msg_buf(dec); r = flush(dec, 0); @@ -1318,13 +1585,18 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, next_buffer(dec); + if (stream_type == RDECODE_CODEC_JPEG) + dec->send_cmd = send_cmd_jpeg; + else + dec->send_cmd = send_cmd_dec; + return &dec->base; error: if (dec->cs) dec->ws->cs_destroy(dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); si_vid_destroy_buffer(&dec->bs_buffers[i]); } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h index accffef6d..a6a726f46 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h @@ -43,10 +43,20 @@ #define RDECODE_PKT2() (RDECODE_PKT_TYPE_S(2)) +#define RDECODE_PKT_REG_J(x) ((unsigned)(x) & 0x3FFFF) +#define RDECODE_PKT_RES_J(x) (((unsigned)(x) & 0x3F) << 18) +#define RDECODE_PKT_COND_J(x) (((unsigned)(x) & 0xF) << 24) +#define RDECODE_PKT_TYPE_J(x) (((unsigned)(x) & 0xF) << 28) +#define RDECODE_PKTJ(reg, cond, type) (RDECODE_PKT_REG_J(reg) | \ + RDECODE_PKT_RES_J(0) | \ + RDECODE_PKT_COND_J(cond) | \ + RDECODE_PKT_TYPE_J(type)) + #define RDECODE_CMD_MSG_BUFFER 0x00000000 #define RDECODE_CMD_DPB_BUFFER 0x00000001 #define RDECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 #define RDECODE_CMD_FEEDBACK_BUFFER 0x00000003 +#define RDECODE_CMD_PROB_TBL_BUFFER 0x00000004 #define RDECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 #define RDECODE_CMD_BITSTREAM_BUFFER 0x00000100 #define RDECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 @@ -61,7 +71,9 @@ #define RDECODE_CODEC_MPEG2_VLD 0x00000003 #define RDECODE_CODEC_MPEG4 0x00000004 #define RDECODE_CODEC_H264_PERF 0x00000007 +#define RDECODE_CODEC_JPEG 0x00000008 #define RDECODE_CODEC_H265 0x00000010 +#define RDECODE_CODEC_VP9 0x00000011 #define RDECODE_ARRAY_MODE_LINEAR 0x00000000 #define RDECODE_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 @@ -100,11 +112,118 @@ #define RDECODE_MESSAGE_MPEG2_VLD 0x0000000A #define RDECODE_MESSAGE_MPEG4_ASP_VLD 0x0000000B #define RDECODE_MESSAGE_HEVC 0x0000000D +#define RDECODE_MESSAGE_VP9 0x0000000E #define RDECODE_FEEDBACK_PROFILING 0x00000001 #define RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT 7 +#define NUM_BUFFERS 4 + +#define RDECODE_VP9_PROBS_DATA_SIZE 2304 + +#define mmUVD_JPEG_CNTL 0x0200 +#define mmUVD_JPEG_CNTL_BASE_IDX 1 +#define mmUVD_JPEG_RB_BASE 0x0201 +#define mmUVD_JPEG_RB_BASE_BASE_IDX 1 +#define mmUVD_JPEG_RB_WPTR 0x0202 +#define mmUVD_JPEG_RB_WPTR_BASE_IDX 1 +#define mmUVD_JPEG_RB_RPTR 0x0203 +#define mmUVD_JPEG_RB_RPTR_BASE_IDX 1 +#define mmUVD_JPEG_RB_SIZE 0x0204 +#define mmUVD_JPEG_RB_SIZE_BASE_IDX 1 +#define mmUVD_JPEG_TIER_CNTL2 0x021a +#define mmUVD_JPEG_TIER_CNTL2_BASE_IDX 1 +#define mmUVD_JPEG_UV_TILING_CTRL 0x021c +#define mmUVD_JPEG_UV_TILING_CTRL_BASE_IDX 1 +#define mmUVD_JPEG_TILING_CTRL 0x021e +#define mmUVD_JPEG_TILING_CTRL_BASE_IDX 1 +#define mmUVD_JPEG_OUTBUF_RPTR 0x0220 +#define mmUVD_JPEG_OUTBUF_RPTR_BASE_IDX 1 +#define mmUVD_JPEG_OUTBUF_WPTR 0x0221 +#define mmUVD_JPEG_OUTBUF_WPTR_BASE_IDX 1 +#define mmUVD_JPEG_PITCH 0x0222 +#define mmUVD_JPEG_PITCH_BASE_IDX 1 +#define mmUVD_JPEG_INT_EN 0x0229 +#define mmUVD_JPEG_INT_EN_BASE_IDX 1 +#define mmUVD_JPEG_UV_PITCH 0x022b +#define mmUVD_JPEG_UV_PITCH_BASE_IDX 1 +#define mmUVD_JPEG_INDEX 0x023e +#define mmUVD_JPEG_INDEX_BASE_IDX 1 +#define mmUVD_JPEG_DATA 0x023f +#define mmUVD_JPEG_DATA_BASE_IDX 1 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x0438 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_BASE_IDX 1 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x0439 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_BASE_IDX 1 +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x045a +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_BASE_IDX 1 +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x045b +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW_BASE_IDX 1 +#define mmUVD_CTX_INDEX 0x0528 +#define mmUVD_CTX_INDEX_BASE_IDX 1 +#define mmUVD_CTX_DATA 0x0529 +#define mmUVD_CTX_DATA_BASE_IDX 1 +#define mmUVD_SOFT_RESET 0x05a0 +#define mmUVD_SOFT_RESET_BASE_IDX 1 + +#define UVD_BASE_INST0_SEG0 0x00007800 +#define UVD_BASE_INST0_SEG1 0x00007E00 +#define UVD_BASE_INST0_SEG2 0 +#define UVD_BASE_INST0_SEG3 0 +#define UVD_BASE_INST0_SEG4 0 + +#define SOC15_REG_ADDR(reg) (UVD_BASE_INST0_SEG1 + reg) + +#define COND0 0 +#define COND1 1 +#define COND2 2 +#define COND3 3 +#define COND4 4 +#define COND5 5 +#define COND6 6 +#define COND7 7 + +#define TYPE0 0 +#define TYPE1 1 +#define TYPE2 2 +#define TYPE3 3 +#define TYPE4 4 +#define TYPE5 5 +#define TYPE6 6 +#define TYPE7 7 + +/* VP9 Frame header flags */ +#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT (13) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT (12) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT (11) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT (10) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT (9) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT (8) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT (7) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT (6) +#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT (5) +#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT (4) +#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT (3) +#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT (2) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT (1) +#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_SHIFT (0) + +#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK (0x00002000) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK (0x00001000) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK (0x00000800) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK (0x00000400) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK (0x00000200) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK (0x00000100) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK (0x00000080) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK (0x00000040) +#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK (0x00000020) +#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK (0x00000010) +#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK (0x00000008) +#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK (0x00000004) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK (0x00000002) +#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_MASK (0x00000001) + typedef struct rvcn_dec_message_index_s { unsigned int message_id; unsigned int offset; @@ -443,6 +562,47 @@ typedef struct rvcn_dec_message_hevc_s { unsigned char direct_reflist[2][15]; } rvcn_dec_message_hevc_t; +typedef struct rvcn_dec_message_vp9_s { + unsigned int frame_header_flags; + + unsigned char frame_context_idx; + unsigned char reset_frame_context; + + unsigned char curr_pic_idx; + unsigned char interp_filter; + + unsigned char filter_level; + unsigned char sharpness_level; + unsigned char lf_adj_level[8][4][2]; + unsigned char base_qindex; + signed char y_dc_delta_q; + signed char uv_ac_delta_q; + signed char uv_dc_delta_q; + + unsigned char log2_tile_cols; + unsigned char log2_tile_rows; + unsigned char tx_mode; + unsigned char reference_mode; + unsigned char chroma_format; + + unsigned char ref_frame_map[8]; + + unsigned char frame_refs[3]; + unsigned char ref_frame_sign_bias[3]; + unsigned char frame_to_show; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + + unsigned char p010_mode; + unsigned char msb_mode; + unsigned char luma_10to8; + unsigned char chroma_10to8; + + unsigned int vp9_frame_size; + unsigned int compressed_header_size; + unsigned int uncompressed_header_size; +} rvcn_dec_message_vp9_t; + typedef struct rvcn_dec_feature_index_s { unsigned int feature_id; unsigned int offset; @@ -500,6 +660,118 @@ typedef struct rvcn_dec_feedback_profiling_s { unsigned int dmaHwCrc32Value2; } rvcn_dec_feedback_profiling_t; +typedef struct rvcn_dec_vp9_nmv_ctx_mask_s { + unsigned short classes_mask[2]; + unsigned short bits_mask[2]; + unsigned char joints_mask; + unsigned char sign_mask[2]; + unsigned char class0_mask[2]; + unsigned char class0_fp_mask[2]; + unsigned char fp_mask[2]; + unsigned char class0_hp_mask[2]; + unsigned char hp_mask[2]; + unsigned char reserve[11]; +} rvcn_dec_vp9_nmv_ctx_mask_t; + +typedef struct rvcn_dec_vp9_nmv_component_s{ + unsigned char sign; + unsigned char classes[10]; + unsigned char class0[1]; + unsigned char bits[10]; + unsigned char class0_fp[2][3]; + unsigned char fp[3]; + unsigned char class0_hp; + unsigned char hp; +} rvcn_dec_vp9_nmv_component_t; + +typedef struct rvcn_dec_vp9_probs_s { + rvcn_dec_vp9_nmv_ctx_mask_t nmvc_mask; + unsigned char coef_probs[4][2][2][6][6][3]; + unsigned char y_mode_prob[4][9]; + unsigned char uv_mode_prob[10][9]; + unsigned char single_ref_prob[5][2]; + unsigned char switchable_interp_prob[4][2]; + unsigned char partition_prob[16][3]; + unsigned char inter_mode_probs[7][3]; + unsigned char mbskip_probs[3]; + unsigned char intra_inter_prob[4]; + unsigned char comp_inter_prob[5]; + unsigned char comp_ref_prob[5]; + unsigned char tx_probs_32x32[2][3]; + unsigned char tx_probs_16x16[2][2]; + unsigned char tx_probs_8x8[2][1]; + unsigned char mv_joints[3]; + rvcn_dec_vp9_nmv_component_t mv_comps[2]; +} rvcn_dec_vp9_probs_t; + +typedef struct rvcn_dec_vp9_probs_segment_s { + union { + rvcn_dec_vp9_probs_t probs; + unsigned char probs_data[RDECODE_VP9_PROBS_DATA_SIZE]; + }; + + union { + struct { + unsigned int feature_data[8]; + unsigned char tree_probs[7]; + unsigned char pred_probs[3]; + unsigned char abs_delta; + unsigned char feature_mask[8]; + } seg; + unsigned char segment_data[256]; + }; +} rvcn_dec_vp9_probs_segment_t; + +struct jpeg_params { + unsigned bsd_size; + unsigned dt_pitch; + unsigned dt_uv_pitch; + unsigned dt_luma_top_offset; + unsigned dt_chroma_top_offset; +}; + +struct radeon_decoder { + struct pipe_video_codec base; + + unsigned stream_handle; + unsigned stream_type; + unsigned frame_number; + + struct pipe_screen *screen; + struct radeon_winsys *ws; + struct radeon_cmdbuf *cs; + + void *msg; + uint32_t *fb; + uint8_t *it; + uint8_t *probs; + void *bs_ptr; + + struct rvid_buffer msg_fb_it_probs_buffers[NUM_BUFFERS]; + struct rvid_buffer bs_buffers[NUM_BUFFERS]; + struct rvid_buffer dpb; + struct rvid_buffer ctx; + struct rvid_buffer sessionctx; + + unsigned bs_size; + unsigned cur_buffer; + void *render_pic_list[16]; + bool show_frame; + unsigned ref_idx; + struct jpeg_params jpg; + void (*send_cmd)(struct radeon_decoder *dec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); +}; + +void send_cmd_dec(struct radeon_decoder *dec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); + +void send_cmd_jpeg(struct radeon_decoder *dec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); + struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, const struct pipe_video_codec *templat); diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_video.c b/lib/mesa/src/gallium/drivers/radeon/radeon_video.c index 2a7ad187b..a39ce4cc7 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_video.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_video.c @@ -25,12 +25,6 @@ * **************************************************************************/ -/* - * Authors: - * Christian König <christian.koenig@amd.com> - * - */ - #include <unistd.h> #include "util/u_memory.h" @@ -39,12 +33,10 @@ #include "vl/vl_defines.h" #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vce.h" -#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) - /* generate an stream handle */ unsigned si_vid_alloc_stream_handle() { @@ -71,9 +63,8 @@ bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer * able to move buffers around individually, so request a * non-sub-allocated buffer. */ - buffer->res = (struct r600_resource *) - pipe_buffer_create(screen, PIPE_BIND_SHARED, - usage, size); + buffer->res = r600_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED, + usage, size)); return buffer->res != NULL; } @@ -85,11 +76,11 @@ void si_vid_destroy_buffer(struct rvid_buffer *buffer) } /* reallocate a buffer, preserving its content */ -bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs, +bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs, struct rvid_buffer *new_buf, unsigned new_size) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; - struct radeon_winsys* ws = rscreen->ws; + struct si_screen *sscreen = (struct si_screen *)screen; + struct radeon_winsys* ws = sscreen->ws; unsigned bytes = MIN2(new_buf->res->buf->size, new_size); struct rvid_buffer old_buf = *new_buf; void *src = NULL, *dst = NULL; @@ -127,10 +118,9 @@ error: /* clear the buffer with zeros */ void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) { - struct r600_common_context *rctx = (struct r600_common_context*)context; + struct si_context *sctx = (struct si_context*)context; - rctx->dma_clear_buffer(context, &buffer->res->b.b, 0, - buffer->res->buf->size, 0); + si_sdma_clear_buffer(sctx, &buffer->res->b.b, 0, buffer->res->buf->size, 0); context->flush(context, NULL, 0); } @@ -138,25 +128,23 @@ void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffe * join surfaces into the same buffer with identical tiling params * sumup their sizes and replace the backend buffers with a single bo */ -void si_vid_join_surfaces(struct r600_common_context *rctx, +void si_vid_join_surfaces(struct si_context *sctx, struct pb_buffer** buffers[VL_NUM_COMPONENTS], struct radeon_surf *surfaces[VL_NUM_COMPONENTS]) { - struct radeon_winsys* ws; + struct radeon_winsys *ws = sctx->ws;; unsigned best_tiling, best_wh, off; unsigned size, alignment; struct pb_buffer *pb; unsigned i, j; - ws = rctx->ws; - for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) { unsigned wh; if (!surfaces[i]) continue; - if (rctx->chip_class < GFX9) { + if (sctx->chip_class < GFX9) { /* choose the smallest bank w/h for now */ wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh; if (wh < best_wh) { @@ -173,7 +161,7 @@ void si_vid_join_surfaces(struct r600_common_context *rctx, /* adjust the texture layer offsets */ off = align(off, surfaces[i]->surf_alignment); - if (rctx->chip_class < GFX9) { + if (sctx->chip_class < GFX9) { /* copy the tiling parameters */ surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw; surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh; @@ -220,149 +208,3 @@ void si_vid_join_surfaces(struct r600_common_context *rctx, pb_reference(&pb, NULL); } - -int si_vid_get_video_param(struct pipe_screen *screen, - enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint, - enum pipe_video_cap param) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; - enum pipe_video_format codec = u_reduce_video_profile(profile); - struct radeon_info info; - - rscreen->ws->query_info(rscreen->ws, &info); - - if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { - switch (param) { - case PIPE_VIDEO_CAP_SUPPORTED: - return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && - si_vce_is_fw_version_supported(rscreen); - case PIPE_VIDEO_CAP_NPOT_TEXTURES: - return 1; - case PIPE_VIDEO_CAP_MAX_WIDTH: - return (rscreen->family < CHIP_TONGA) ? 2048 : 4096; - case PIPE_VIDEO_CAP_MAX_HEIGHT: - return (rscreen->family < CHIP_TONGA) ? 1152 : 2304; - case PIPE_VIDEO_CAP_PREFERED_FORMAT: - return PIPE_FORMAT_NV12; - case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - return false; - case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: - return false; - case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: - return true; - case PIPE_VIDEO_CAP_STACKED_FRAMES: - return (rscreen->family < CHIP_TONGA) ? 1 : 2; - default: - return 0; - } - } - - switch (param) { - case PIPE_VIDEO_CAP_SUPPORTED: - switch (codec) { - case PIPE_VIDEO_FORMAT_MPEG12: - return profile != PIPE_VIDEO_PROFILE_MPEG1; - case PIPE_VIDEO_FORMAT_MPEG4: - return 1; - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - if ((rscreen->family == CHIP_POLARIS10 || - rscreen->family == CHIP_POLARIS11) && - info.uvd_fw_version < UVD_FW_1_66_16 ) { - RVID_ERR("POLARIS10/11 firmware version need to be updated.\n"); - return false; - } - return true; - case PIPE_VIDEO_FORMAT_VC1: - return true; - case PIPE_VIDEO_FORMAT_HEVC: - /* Carrizo only supports HEVC Main */ - if (rscreen->family >= CHIP_STONEY) - return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN || - profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10); - else if (rscreen->family >= CHIP_CARRIZO) - return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN; - return false; - case PIPE_VIDEO_FORMAT_JPEG: - if (rscreen->family < CHIP_CARRIZO || rscreen->family >= CHIP_VEGA10) - return false; - if (!(rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 19)) { - RVID_ERR("No MJPEG support for the kernel version\n"); - return false; - } - return true; - default: - return false; - } - case PIPE_VIDEO_CAP_NPOT_TEXTURES: - return 1; - case PIPE_VIDEO_CAP_MAX_WIDTH: - return (rscreen->family < CHIP_TONGA) ? 2048 : 4096; - case PIPE_VIDEO_CAP_MAX_HEIGHT: - return (rscreen->family < CHIP_TONGA) ? 1152 : 4096; - case PIPE_VIDEO_CAP_PREFERED_FORMAT: - if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - return PIPE_FORMAT_P016; - else - return PIPE_FORMAT_NV12; - - case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: { - enum pipe_video_format format = u_reduce_video_profile(profile); - - if (format == PIPE_VIDEO_FORMAT_HEVC) - return false; //The firmware doesn't support interlaced HEVC. - else if (format == PIPE_VIDEO_FORMAT_JPEG) - return false; - return true; - } - case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: - return true; - case PIPE_VIDEO_CAP_MAX_LEVEL: - switch (profile) { - case PIPE_VIDEO_PROFILE_MPEG1: - return 0; - case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: - case PIPE_VIDEO_PROFILE_MPEG2_MAIN: - return 3; - case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE: - return 3; - case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE: - return 5; - case PIPE_VIDEO_PROFILE_VC1_SIMPLE: - return 1; - case PIPE_VIDEO_PROFILE_VC1_MAIN: - return 2; - case PIPE_VIDEO_PROFILE_VC1_ADVANCED: - return 4; - case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: - case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: - case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: - return (rscreen->family < CHIP_TONGA) ? 41 : 52; - case PIPE_VIDEO_PROFILE_HEVC_MAIN: - case PIPE_VIDEO_PROFILE_HEVC_MAIN_10: - return 186; - default: - return 0; - } - default: - return 0; - } -} - -boolean si_vid_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, - enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint) -{ - /* HEVC 10 bit decoding should use P016 instead of NV12 if possible */ - if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - return (format == PIPE_FORMAT_NV12) || - (format == PIPE_FORMAT_P016); - - /* we can only handle this one with UVD */ - if (profile != PIPE_VIDEO_PROFILE_UNKNOWN) - return format == PIPE_FORMAT_NV12; - - return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint); -} diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_video.h b/lib/mesa/src/gallium/drivers/radeon/radeon_video.h index 7e70be98b..71904b313 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_video.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_video.h @@ -25,12 +25,6 @@ * **************************************************************************/ -/* - * Authors: - * Christian König <christian.koenig@amd.com> - * - */ - #ifndef RADEON_VIDEO_H #define RADEON_VIDEO_H @@ -40,6 +34,8 @@ #define RVID_ERR(fmt, args...) \ fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args) +#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) + /* video buffer representation */ struct rvid_buffer { @@ -58,7 +54,7 @@ bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer void si_vid_destroy_buffer(struct rvid_buffer *buffer); /* reallocate a buffer, preserving its content */ -bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs, +bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs, struct rvid_buffer *new_buf, unsigned new_size); /* clear the buffer with zeros */ @@ -66,20 +62,8 @@ void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffe /* join surfaces into the same buffer with identical tiling params sumup their sizes and replace the backend buffers with a single bo */ -void si_vid_join_surfaces(struct r600_common_context *rctx, +void si_vid_join_surfaces(struct si_context *sctx, struct pb_buffer** buffers[VL_NUM_COMPONENTS], struct radeon_surf *surfaces[VL_NUM_COMPONENTS]); -/* returns supported codecs and other parameters */ -int si_vid_get_video_param(struct pipe_screen *screen, - enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint, - enum pipe_video_cap param); - -/* the hardware only supports NV12 */ -boolean si_vid_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, - enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint); - #endif // RADEON_VIDEO_H diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h b/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h index 206c299ac..c6800808c 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h @@ -1,6 +1,8 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * Copyright 2018 Advanced Micro Devices, Inc. + * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,14 +28,18 @@ /* The public winsys interface header for the radeon driver. */ +/* Whether the next IB can start immediately and not wait for draws and + * dispatches from the current IB to finish. */ +#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31) + +#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \ + (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW) + #include "pipebuffer/pb_buffer.h" #include "amd/common/ac_gpu_info.h" #include "amd/common/ac_surface.h" -#define RADEON_FLUSH_ASYNC (1 << 0) -#define RADEON_FLUSH_END_OF_FRAME (1 << 1) - /* Tiling flags. */ enum radeon_bo_layout { RADEON_LAYOUT_LINEAR = 0, @@ -55,6 +61,8 @@ enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_NO_SUBALLOC = (1 << 2), RADEON_FLAG_SPARSE = (1 << 3), RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), + RADEON_FLAG_READ_ONLY = (1 << 5), + RADEON_FLAG_32BIT = (1 << 6), }; enum radeon_bo_usage { /* bitfield */ @@ -78,6 +86,8 @@ enum ring_type { RING_VCE, RING_UVD_ENC, RING_VCN_DEC, + RING_VCN_ENC, + RING_VCN_JPEG, RING_LAST, }; @@ -106,71 +116,65 @@ enum radeon_value_id { RADEON_CS_THREAD_TIME, }; -/* Each group of four has the same priority. */ enum radeon_bo_priority { + /* Each group of two has the same priority. */ RADEON_PRIO_FENCE = 0, RADEON_PRIO_TRACE, - RADEON_PRIO_SO_FILLED_SIZE, + + RADEON_PRIO_SO_FILLED_SIZE = 2, RADEON_PRIO_QUERY, RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ - RADEON_PRIO_DRAW_INDIRECT, - RADEON_PRIO_INDEX_BUFFER, - RADEON_PRIO_VCE = 8, - RADEON_PRIO_UVD, - RADEON_PRIO_SDMA_BUFFER, - RADEON_PRIO_SDMA_TEXTURE, + RADEON_PRIO_DRAW_INDIRECT = 6, + RADEON_PRIO_INDEX_BUFFER, - RADEON_PRIO_CP_DMA = 12, + RADEON_PRIO_CP_DMA = 8, + RADEON_PRIO_BORDER_COLORS, - RADEON_PRIO_CONST_BUFFER = 16, + RADEON_PRIO_CONST_BUFFER = 10, RADEON_PRIO_DESCRIPTORS, - RADEON_PRIO_BORDER_COLORS, - RADEON_PRIO_SAMPLER_BUFFER = 20, + RADEON_PRIO_SAMPLER_BUFFER = 12, RADEON_PRIO_VERTEX_BUFFER, - RADEON_PRIO_SHADER_RW_BUFFER = 24, + RADEON_PRIO_SHADER_RW_BUFFER = 14, RADEON_PRIO_COMPUTE_GLOBAL, - RADEON_PRIO_SAMPLER_TEXTURE = 28, + RADEON_PRIO_SAMPLER_TEXTURE = 16, RADEON_PRIO_SHADER_RW_IMAGE, - RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 32, + RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18, + RADEON_PRIO_COLOR_BUFFER, - RADEON_PRIO_COLOR_BUFFER = 36, + RADEON_PRIO_DEPTH_BUFFER = 20, - RADEON_PRIO_DEPTH_BUFFER = 40, + RADEON_PRIO_COLOR_BUFFER_MSAA = 22, - RADEON_PRIO_COLOR_BUFFER_MSAA = 44, + RADEON_PRIO_DEPTH_BUFFER_MSAA = 24, - RADEON_PRIO_DEPTH_BUFFER_MSAA = 48, - - RADEON_PRIO_CMASK = 52, - RADEON_PRIO_DCC, - RADEON_PRIO_HTILE, + RADEON_PRIO_SEPARATE_META = 26, RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */ - RADEON_PRIO_SHADER_RINGS = 56, + RADEON_PRIO_SHADER_RINGS = 28, - RADEON_PRIO_SCRATCH_BUFFER = 60, - /* 63 is the maximum value */ + RADEON_PRIO_SCRATCH_BUFFER = 30, + /* 31 is the maximum value */ }; struct winsys_handle; struct radeon_winsys_ctx; -struct radeon_winsys_cs_chunk { +struct radeon_cmdbuf_chunk { unsigned cdw; /* Number of used dwords. */ unsigned max_dw; /* Maximum number of dwords. */ uint32_t *buf; /* The base pointer of the chunk. */ }; -struct radeon_winsys_cs { - struct radeon_winsys_cs_chunk current; - struct radeon_winsys_cs_chunk *prev; +struct radeon_cmdbuf { + struct radeon_cmdbuf_chunk current; + struct radeon_cmdbuf_chunk *prev; unsigned num_prev; /* Number of previous chunks. */ unsigned max_prev; /* Space in array pointed to by prev. */ unsigned prev_dw; /* Total number of dwords in previous chunks. */ @@ -221,7 +225,7 @@ enum radeon_feature_id { struct radeon_bo_list_item { uint64_t bo_size; uint64_t vm_address; - uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ + uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ }; struct radeon_winsys { @@ -254,6 +258,14 @@ struct radeon_winsys { void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info); + /** + * A hint for the winsys that it should pin its execution threads to + * a group of cores sharing a specific L3 cache if the CPU has multiple + * L3 caches. This is needed for good multithreading performance on + * AMD Zen CPUs. + */ + void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache); + /************************************************************************** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * @@ -288,7 +300,7 @@ struct radeon_winsys { * \return The pointer at the beginning of the buffer. */ void *(*buffer_map)(struct pb_buffer *buf, - struct radeon_winsys_cs *cs, + struct radeon_cmdbuf *cs, enum pipe_transfer_usage usage); /** @@ -451,7 +463,7 @@ struct radeon_winsys { * \param flush Flush callback function associated with the command stream. * \param user User pointer that will be passed to the flush callback. */ - struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys_ctx *ctx, + struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), @@ -462,7 +474,7 @@ struct radeon_winsys { * * \param cs A command stream to destroy. */ - void (*cs_destroy)(struct radeon_winsys_cs *cs); + void (*cs_destroy)(struct radeon_cmdbuf *cs); /** * Add a buffer. Each buffer used by a CS must be added using this function. @@ -475,7 +487,7 @@ struct radeon_winsys { * placed in the requested domain. 15 is the maximum. * \return Buffer index. */ - unsigned (*cs_add_buffer)(struct radeon_winsys_cs *cs, + unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domain, @@ -491,7 +503,7 @@ struct radeon_winsys { * \param buf Buffer * \return The buffer index, or -1 if the buffer has not been added. */ - int (*cs_lookup_buffer)(struct radeon_winsys_cs *cs, + int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf); /** @@ -502,7 +514,7 @@ struct radeon_winsys { * * \param cs A command stream to validate. */ - bool (*cs_validate)(struct radeon_winsys_cs *cs); + bool (*cs_validate)(struct radeon_cmdbuf *cs); /** * Check whether the given number of dwords is available in the IB. @@ -511,7 +523,7 @@ struct radeon_winsys { * \param cs A command stream. * \param dw Number of CS dwords requested by the caller. */ - bool (*cs_check_space)(struct radeon_winsys_cs *cs, unsigned dw); + bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw); /** * Return the buffer list. @@ -523,29 +535,31 @@ struct radeon_winsys { * \param list Returned buffer list. Set to NULL to query the count only. * \return The buffer count. */ - unsigned (*cs_get_buffer_list)(struct radeon_winsys_cs *cs, + unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, struct radeon_bo_list_item *list); /** * Flush a command stream. * * \param cs A command stream to flush. - * \param flags, RADEON_FLUSH_ASYNC or 0. + * \param flags, PIPE_FLUSH_* flags. * \param fence Pointer to a fence. If non-NULL, a fence is inserted * after the CS and is returned through this parameter. * \return Negative POSIX error code or 0 for success. * Asynchronous submissions never return an error. */ - int (*cs_flush)(struct radeon_winsys_cs *cs, + int (*cs_flush)(struct radeon_cmdbuf *cs, unsigned flags, struct pipe_fence_handle **fence); /** * Create a fence before the CS is flushed. * The user must flush manually to complete the initializaton of the fence. - * The fence must not be used before the flush. + * + * The fence must not be used for anything except \ref cs_add_fence_dependency + * before the flush. */ - struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_winsys_cs *cs); + struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs); /** * Return true if a buffer is referenced by a command stream. @@ -553,7 +567,7 @@ struct radeon_winsys { * \param cs A command stream. * \param buf A winsys buffer. */ - bool (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs, + bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, enum radeon_bo_usage usage); @@ -564,7 +578,7 @@ struct radeon_winsys { * \param fid Feature ID, one of RADEON_FID_* * \param enable Whether to enable or disable the feature. */ - bool (*cs_request_feature)(struct radeon_winsys_cs *cs, + bool (*cs_request_feature)(struct radeon_cmdbuf *cs, enum radeon_feature_id fid, bool enable); /** @@ -572,16 +586,22 @@ struct radeon_winsys { * * \param cs A command stream. */ - void (*cs_sync_flush)(struct radeon_winsys_cs *cs); + void (*cs_sync_flush)(struct radeon_cmdbuf *cs); /** * Add a fence dependency to the CS, so that the CS will wait for * the fence before execution. */ - void (*cs_add_fence_dependency)(struct radeon_winsys_cs *cs, + void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence); /** + * Signal a syncobj when the CS finishes execution. + */ + void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, + struct pipe_fence_handle *fence); + + /** * Wait for the fence and return true if the fence has been signalled. * The timeout of 0 will only return the status. * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence @@ -598,6 +618,12 @@ struct radeon_winsys { struct pipe_fence_handle *src); /** + * Create a new fence object corresponding to the given syncobj fd. + */ + struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, + int fd); + + /** * Create a new fence object corresponding to the given sync_file. */ struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, @@ -639,17 +665,17 @@ struct radeon_winsys { const char* (*get_chip_name)(struct radeon_winsys *ws); }; -static inline bool radeon_emitted(struct radeon_winsys_cs *cs, unsigned num_dw) +static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw) { return cs && (cs->prev_dw + cs->current.cdw > num_dw); } -static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value) +static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value) { cs->current.buf[cs->current.cdw++] = value; } -static inline void radeon_emit_array(struct radeon_winsys_cs *cs, +static inline void radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values, unsigned count) { memcpy(cs->current.buf + cs->current.cdw, values, count * 4); @@ -658,9 +684,14 @@ static inline void radeon_emit_array(struct radeon_winsys_cs *cs, enum radeon_heap { RADEON_HEAP_VRAM_NO_CPU_ACCESS, + RADEON_HEAP_VRAM_READ_ONLY, + RADEON_HEAP_VRAM_READ_ONLY_32BIT, + RADEON_HEAP_VRAM_32BIT, RADEON_HEAP_VRAM, - RADEON_HEAP_VRAM_GTT, /* combined heaps */ RADEON_HEAP_GTT_WC, + RADEON_HEAP_GTT_WC_READ_ONLY, + RADEON_HEAP_GTT_WC_READ_ONLY_32BIT, + RADEON_HEAP_GTT_WC_32BIT, RADEON_HEAP_GTT, RADEON_MAX_SLAB_HEAPS, RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS, @@ -670,11 +701,15 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea { switch (heap) { case RADEON_HEAP_VRAM_NO_CPU_ACCESS: + case RADEON_HEAP_VRAM_READ_ONLY: + case RADEON_HEAP_VRAM_READ_ONLY_32BIT: + case RADEON_HEAP_VRAM_32BIT: case RADEON_HEAP_VRAM: return RADEON_DOMAIN_VRAM; - case RADEON_HEAP_VRAM_GTT: - return RADEON_DOMAIN_VRAM_GTT; case RADEON_HEAP_GTT_WC: + case RADEON_HEAP_GTT_WC_READ_ONLY: + case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: + case RADEON_HEAP_GTT_WC_32BIT: case RADEON_HEAP_GTT: return RADEON_DOMAIN_GTT; default: @@ -685,40 +720,35 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea static inline unsigned radeon_flags_from_heap(enum radeon_heap heap) { + unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | + (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0); + switch (heap) { case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - return RADEON_FLAG_GTT_WC | - RADEON_FLAG_NO_CPU_ACCESS | - RADEON_FLAG_NO_INTERPROCESS_SHARING; + return flags | + RADEON_FLAG_NO_CPU_ACCESS; - case RADEON_HEAP_VRAM: - case RADEON_HEAP_VRAM_GTT: - case RADEON_HEAP_GTT_WC: - return RADEON_FLAG_GTT_WC | - RADEON_FLAG_NO_INTERPROCESS_SHARING; + case RADEON_HEAP_VRAM_READ_ONLY: + case RADEON_HEAP_GTT_WC_READ_ONLY: + return flags | + RADEON_FLAG_READ_ONLY; - case RADEON_HEAP_GTT: - default: - return RADEON_FLAG_NO_INTERPROCESS_SHARING; - } -} + case RADEON_HEAP_VRAM_READ_ONLY_32BIT: + case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: + return flags | + RADEON_FLAG_READ_ONLY | + RADEON_FLAG_32BIT; + + case RADEON_HEAP_VRAM_32BIT: + case RADEON_HEAP_GTT_WC_32BIT: + return flags | + RADEON_FLAG_32BIT; -/* The pb cache bucket is chosen to minimize pb_cache misses. - * It must be between 0 and 3 inclusive. - */ -static inline unsigned radeon_get_pb_cache_bucket_index(enum radeon_heap heap) -{ - switch (heap) { - case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - return 0; case RADEON_HEAP_VRAM: - case RADEON_HEAP_VRAM_GTT: - return 1; case RADEON_HEAP_GTT_WC: - return 2; case RADEON_HEAP_GTT: default: - return 3; + return flags; } } @@ -738,22 +768,60 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, /* Unsupported flags: NO_SUBALLOC, SPARSE. */ if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | - RADEON_FLAG_NO_INTERPROCESS_SHARING)) + RADEON_FLAG_NO_INTERPROCESS_SHARING | + RADEON_FLAG_READ_ONLY | + RADEON_FLAG_32BIT)) return -1; switch (domain) { case RADEON_DOMAIN_VRAM: - if (flags & RADEON_FLAG_NO_CPU_ACCESS) + switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | + RADEON_FLAG_READ_ONLY | + RADEON_FLAG_32BIT)) { + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY: + assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense"); + return -1; + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT: + assert(!"NO_CPU_ACCESS with 32BIT is disallowed"); + return -1; + case RADEON_FLAG_NO_CPU_ACCESS: return RADEON_HEAP_VRAM_NO_CPU_ACCESS; - else + case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + return RADEON_HEAP_VRAM_READ_ONLY_32BIT; + case RADEON_FLAG_READ_ONLY: + return RADEON_HEAP_VRAM_READ_ONLY; + case RADEON_FLAG_32BIT: + return RADEON_HEAP_VRAM_32BIT; + case 0: return RADEON_HEAP_VRAM; - case RADEON_DOMAIN_VRAM_GTT: - return RADEON_HEAP_VRAM_GTT; + } + break; case RADEON_DOMAIN_GTT: - if (flags & RADEON_FLAG_GTT_WC) + switch (flags & (RADEON_FLAG_GTT_WC | + RADEON_FLAG_READ_ONLY | + RADEON_FLAG_32BIT)) { + case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT; + case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY: + return RADEON_HEAP_GTT_WC_READ_ONLY; + case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT: + return RADEON_HEAP_GTT_WC_32BIT; + case RADEON_FLAG_GTT_WC: return RADEON_HEAP_GTT_WC; - else + case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + case RADEON_FLAG_READ_ONLY: + assert(!"READ_ONLY without WC is disallowed"); + return -1; + case RADEON_FLAG_32BIT: + assert(!"32BIT without WC is disallowed"); + return -1; + case 0: return RADEON_HEAP_GTT; + } + break; + default: + break; } return -1; } |