summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2019-01-29 11:52:33 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2019-01-29 11:52:33 +0000
commit37bbf6a1792773f11c15a4da1588a7520ee2fb4e (patch)
tree64944d4aa665a1e479cfc004e446593062254550 /lib/mesa/src/gallium/drivers/vc4
parent6b139c2063623e9310025247cd966490b9aa57ea (diff)
Merge Mesa 18.3.2
Diffstat (limited to 'lib/mesa/src/gallium/drivers/vc4')
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/Makefile.am5
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/Makefile.in71
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_blit.c229
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c141
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h35
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_cl.c14
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_cl.h15
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_cl_dump.c7
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_context.c37
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_context.h36
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_draw.c46
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_job.c44
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_program.c137
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_qir.c62
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_qir.h3
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_qir_live_variables.c2
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c1
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_resource.c266
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_resource.h3
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_screen.c261
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_screen.h7
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c5
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_state.c49
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_tiling.c13
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c237
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_uniforms.c65
26 files changed, 1165 insertions, 626 deletions
diff --git a/lib/mesa/src/gallium/drivers/vc4/Makefile.am b/lib/mesa/src/gallium/drivers/vc4/Makefile.am
index c3e49af97..4c7dd843d 100644
--- a/lib/mesa/src/gallium/drivers/vc4/Makefile.am
+++ b/lib/mesa/src/gallium/drivers/vc4/Makefile.am
@@ -30,7 +30,8 @@ AM_CFLAGS = \
-I$(top_builddir)/src/compiler/nir \
-I$(top_srcdir)/include/drm-uapi \
-I$(top_builddir)/src \
- -I$(top_srcdir)/src/broadcom/cle \
+ -I$(top_srcdir)/src/broadcom \
+ -I$(top_builddir)/src/broadcom \
$(LIBDRM_CFLAGS) \
$(GALLIUM_DRIVER_CFLAGS) \
$(SIM_CFLAGS) \
@@ -54,4 +55,4 @@ endif
libvc4_la_LDFLAGS = $(SIM_LDFLAGS)
-EXTRA_DIST = kernel/README
+EXTRA_DIST = kernel/README meson.build
diff --git a/lib/mesa/src/gallium/drivers/vc4/Makefile.in b/lib/mesa/src/gallium/drivers/vc4/Makefile.in
index 195f7e2c1..f55b61922 100644
--- a/lib/mesa/src/gallium/drivers/vc4/Makefile.in
+++ b/lib/mesa/src/gallium/drivers/vc4/Makefile.in
@@ -78,15 +78,19 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
@HAVE_LIBDRM_TRUE@am__append_1 = \
@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS)
-@HAVE_DRISW_TRUE@am__append_2 = \
+@HAVE_PLATFORM_ANDROID_TRUE@am__append_2 = \
+@HAVE_PLATFORM_ANDROID_TRUE@ $(ANDROID_LIBS) \
+@HAVE_PLATFORM_ANDROID_TRUE@ $(BACKTRACE_LIBS)
+
+@HAVE_DRISW_TRUE@am__append_3 = \
@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
-@HAVE_DRISW_KMS_TRUE@am__append_3 = \
+@HAVE_DRISW_KMS_TRUE@am__append_4 = \
@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS)
-@HAVE_ARM_ASM_TRUE@am__append_4 = libvc4_neon.la
@HAVE_ARM_ASM_TRUE@am__append_5 = libvc4_neon.la
+@HAVE_ARM_ASM_TRUE@am__append_6 = libvc4_neon.la
subdir = src/gallium/drivers/vc4
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
@@ -106,7 +110,7 @@ mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
-libvc4_la_DEPENDENCIES = $(am__append_5)
+libvc4_la_DEPENDENCIES = $(am__append_6)
am__dirstamp = $(am__leading_dot)dirstamp
am__objects_1 = kernel/vc4_gem.lo kernel/vc4_render_cl.lo \
kernel/vc4_validate.lo kernel/vc4_validate_shaders.lo \
@@ -201,6 +205,8 @@ AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
+BACKTRACE_CFLAGS = @BACKTRACE_CFLAGS@
+BACKTRACE_LIBS = @BACKTRACE_LIBS@
BSYMBOLIC = @BSYMBOLIC@
CC = @CC@
CCAS = @CCAS@
@@ -214,6 +220,7 @@ CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
+CXX11_CXXFLAGS = @CXX11_CXXFLAGS@
CXXCPP = @CXXCPP@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
@@ -247,8 +254,6 @@ EXEEXT = @EXEEXT@
EXPAT_CFLAGS = @EXPAT_CFLAGS@
EXPAT_LIBS = @EXPAT_LIBS@
FGREP = @FGREP@
-FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
-FREEDRENO_LIBS = @FREEDRENO_LIBS@
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
@@ -267,8 +272,8 @@ GL_LIB_DEPS = @GL_LIB_DEPS@
GL_PC_CFLAGS = @GL_PC_CFLAGS@
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GL_PKGCONF_LIB = @GL_PKGCONF_LIB@
GREP = @GREP@
-HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
I915_CFLAGS = @I915_CFLAGS@
I915_LIBS = @I915_LIBS@
INDENT = @INDENT@
@@ -280,6 +285,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDFLAGS = @LDFLAGS@
+LD_BUILD_ID = @LD_BUILD_ID@
LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
LEX = @LEX@
LEXLIB = @LEXLIB@
@@ -317,7 +323,7 @@ MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
NINE_MAJOR = @NINE_MAJOR@
NINE_MINOR = @NINE_MINOR@
-NINE_TINY = @NINE_TINY@
+NINE_PATCH = @NINE_PATCH@
NINE_VERSION = @NINE_VERSION@
NM = @NM@
NMEDIT = @NMEDIT@
@@ -330,6 +336,9 @@ OBJEXT = @OBJEXT@
OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
+OMX_TIZONIA_CFLAGS = @OMX_TIZONIA_CFLAGS@
+OMX_TIZONIA_LIBS = @OMX_TIZONIA_LIBS@
+OMX_TIZONIA_LIB_INSTALL_DIR = @OMX_TIZONIA_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -357,11 +366,16 @@ PTHREAD_CC = @PTHREAD_CC@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
PWR8_CFLAGS = @PWR8_CFLAGS@
-PYTHON2 = @PYTHON2@
+PYTHON = @PYTHON@
+PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@
+PYTHON_PLATFORM = @PYTHON_PLATFORM@
+PYTHON_PREFIX = @PYTHON_PREFIX@
+PYTHON_VERSION = @PYTHON_VERSION@
RADEON_CFLAGS = @RADEON_CFLAGS@
RADEON_LIBS = @RADEON_LIBS@
RANLIB = @RANLIB@
RM = @RM@
+SCANNER_ARG = @SCANNER_ARG@
SED = @SED@
SELINUX_CFLAGS = @SELINUX_CFLAGS@
SELINUX_LIBS = @SELINUX_LIBS@
@@ -373,9 +387,10 @@ SSE41_CFLAGS = @SSE41_CFLAGS@
STRIP = @STRIP@
SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@
SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@
-SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@
SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@
SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@
+V3D_SIMULATOR_CFLAGS = @V3D_SIMULATOR_CFLAGS@
+V3D_SIMULATOR_LIBS = @V3D_SIMULATOR_LIBS@
VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
VALGRIND_LIBS = @VALGRIND_LIBS@
VA_CFLAGS = @VA_CFLAGS@
@@ -383,8 +398,8 @@ VA_LIBS = @VA_LIBS@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
-VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
-VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
+VC4_CFLAGS = @VC4_CFLAGS@
+VC4_LIBS = @VC4_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -398,7 +413,11 @@ VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
+WAYLAND_EGL_CFLAGS = @WAYLAND_EGL_CFLAGS@
+WAYLAND_EGL_LIBS = @WAYLAND_EGL_LIBS@
+WAYLAND_PROTOCOLS_CFLAGS = @WAYLAND_PROTOCOLS_CFLAGS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
+WAYLAND_PROTOCOLS_LIBS = @WAYLAND_PROTOCOLS_LIBS@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
@@ -408,16 +427,20 @@ WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
-XA_TINY = @XA_TINY@
+XA_PATCH = @XA_PATCH@
XA_VERSION = @XA_VERSION@
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@
XCB_DRI3_LIBS = @XCB_DRI3_LIBS@
-XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
-XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XCB_DRI3_MODIFIERS_CFLAGS = @XCB_DRI3_MODIFIERS_CFLAGS@
+XCB_DRI3_MODIFIERS_LIBS = @XCB_DRI3_MODIFIERS_LIBS@
+XCB_RANDR_CFLAGS = @XCB_RANDR_CFLAGS@
+XCB_RANDR_LIBS = @XCB_RANDR_LIBS@
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
XLIBGL_LIBS = @XLIBGL_LIBS@
+XLIB_RANDR_CFLAGS = @XLIB_RANDR_CFLAGS@
+XLIB_RANDR_LIBS = @XLIB_RANDR_LIBS@
XVMC_CFLAGS = @XVMC_CFLAGS@
XVMC_LIBS = @XVMC_LIBS@
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
@@ -472,9 +495,13 @@ mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
+pkgpyexecdir = @pkgpyexecdir@
+pkgpythondir = @pkgpythondir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
+pyexecdir = @pyexecdir@
+pythondir = @pythondir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
@@ -601,7 +628,8 @@ GALLIUM_TARGET_CFLAGS = \
$(VISIBILITY_CFLAGS)
GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \
- $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1)
+ $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1) \
+ $(am__append_2)
GALLIUM_WINSYS_CFLAGS = \
-I$(top_srcdir)/src \
-I$(top_srcdir)/include \
@@ -613,26 +641,27 @@ GALLIUM_WINSYS_CFLAGS = \
GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
$(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
$(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
- $(am__append_2) $(am__append_3)
+ $(am__append_3) $(am__append_4)
@USE_VC4_SIMULATOR_TRUE@SIM_LDFLAGS = -lsimpenrose
AM_CFLAGS = \
-I$(top_builddir)/src/compiler/nir \
-I$(top_srcdir)/include/drm-uapi \
-I$(top_builddir)/src \
- -I$(top_srcdir)/src/broadcom/cle \
+ -I$(top_srcdir)/src/broadcom \
+ -I$(top_builddir)/src/broadcom \
$(LIBDRM_CFLAGS) \
$(GALLIUM_DRIVER_CFLAGS) \
$(SIM_CFLAGS) \
$(VALGRIND_CFLAGS) \
$()
-noinst_LTLIBRARIES = libvc4.la $(am__append_4)
+noinst_LTLIBRARIES = libvc4.la $(am__append_5)
libvc4_la_SOURCES = $(C_SOURCES)
-libvc4_la_LIBADD = $(SIM_LIB) $() $(am__append_5)
+libvc4_la_LIBADD = $(SIM_LIB) $() $(am__append_6)
@HAVE_ARM_ASM_TRUE@libvc4_neon_la_SOURCES = $(NEON_C_SOURCES)
@HAVE_ARM_ASM_TRUE@libvc4_neon_la_CFLAGS = $(AM_CFLAGS) -mfpu=neon
libvc4_la_LDFLAGS = $(SIM_LDFLAGS)
-EXTRA_DIST = kernel/README
+EXTRA_DIST = kernel/README meson.build
all: all-am
.SUFFIXES:
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_blit.c b/lib/mesa/src/gallium/drivers/vc4/vc4_blit.c
index 7f4c76968..d3cc5152a 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_blit.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_blit.c
@@ -24,6 +24,7 @@
#include "util/u_format.h"
#include "util/u_surface.h"
#include "util/u_blitter.h"
+#include "compiler/nir/nir_builder.h"
#include "vc4_context.h"
static struct pipe_surface *
@@ -183,6 +184,231 @@ vc4_blitter_save(struct vc4_context *vc4)
vc4->fragtex.num_textures, vc4->fragtex.textures);
}
+static void *vc4_get_yuv_vs(struct pipe_context *pctx)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+ struct pipe_screen *pscreen = pctx->screen;
+
+ if (vc4->yuv_linear_blit_vs)
+ return vc4->yuv_linear_blit_vs;
+
+ const struct nir_shader_compiler_options *options =
+ pscreen->get_compiler_options(pscreen,
+ PIPE_SHADER_IR_NIR,
+ PIPE_SHADER_VERTEX);
+
+ nir_builder b;
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
+ b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs");
+
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec4, "pos");
+
+ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "gl_Position");
+ pos_out->data.location = VARYING_SLOT_POS;
+
+ nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
+
+ struct pipe_shader_state shader_tmpl = {
+ .type = PIPE_SHADER_IR_NIR,
+ .ir.nir = b.shader,
+ };
+
+ vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl);
+
+ return vc4->yuv_linear_blit_vs;
+}
+
+static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+ struct pipe_screen *pscreen = pctx->screen;
+ struct pipe_shader_state **cached_shader;
+ const char *name;
+
+ if (cpp == 1) {
+ cached_shader = &vc4->yuv_linear_blit_fs_8bit;
+ name = "linear_blit_8bit_fs";
+ } else {
+ cached_shader = &vc4->yuv_linear_blit_fs_16bit;
+ name = "linear_blit_16bit_fs";
+ }
+
+ if (*cached_shader)
+ return *cached_shader;
+
+ const struct nir_shader_compiler_options *options =
+ pscreen->get_compiler_options(pscreen,
+ PIPE_SHADER_IR_NIR,
+ PIPE_SHADER_FRAGMENT);
+
+ nir_builder b;
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
+ b.shader->info.name = ralloc_strdup(b.shader, name);
+
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *glsl_int = glsl_int_type();
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_COLOR;
+
+ nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec4, "pos");
+ pos_in->data.location = VARYING_SLOT_POS;
+ nir_ssa_def *pos = nir_load_var(&b, pos_in);
+
+ nir_ssa_def *one = nir_imm_int(&b, 1);
+ nir_ssa_def *two = nir_imm_int(&b, 2);
+
+ nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
+ nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
+
+ nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform,
+ glsl_int, "stride");
+ nir_ssa_def *stride = nir_load_var(&b, stride_in);
+
+ nir_ssa_def *x_offset;
+ nir_ssa_def *y_offset;
+ if (cpp == 1) {
+ nir_ssa_def *intra_utile_x_offset =
+ nir_ishl(&b, nir_iand(&b, x, one), two);
+ nir_ssa_def *inter_utile_x_offset =
+ nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one);
+
+ x_offset = nir_iadd(&b,
+ intra_utile_x_offset,
+ inter_utile_x_offset);
+ y_offset = nir_imul(&b,
+ nir_iadd(&b,
+ nir_ishl(&b, y, one),
+ nir_ushr(&b, nir_iand(&b, x, two), one)),
+ stride);
+ } else {
+ x_offset = nir_ishl(&b, x, two);
+ y_offset = nir_imul(&b, y, stride);
+ }
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
+ load->num_components = 1;
+ nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
+ load->src[0] = nir_src_for_ssa(one);
+ load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset));
+ nir_builder_instr_insert(&b, &load->instr);
+
+ nir_store_var(&b, color_out,
+ nir_unpack_unorm_4x8(&b, &load->dest.ssa),
+ 0xf);
+
+ struct pipe_shader_state shader_tmpl = {
+ .type = PIPE_SHADER_IR_NIR,
+ .ir.nir = b.shader,
+ };
+
+ *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl);
+
+ return *cached_shader;
+}
+
+static bool
+vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_resource *src = vc4_resource(info->src.resource);
+ struct vc4_resource *dst = vc4_resource(info->dst.resource);
+ bool ok;
+
+ if (src->tiled)
+ return false;
+ if (src->base.format != PIPE_FORMAT_R8_UNORM &&
+ src->base.format != PIPE_FORMAT_R8G8_UNORM)
+ return false;
+
+ /* YUV blits always turn raster-order to tiled */
+ assert(dst->base.format == src->base.format);
+ assert(dst->tiled);
+
+ /* Always 1:1 and at the origin */
+ assert(info->src.box.x == 0 && info->dst.box.x == 0);
+ assert(info->src.box.y == 0 && info->dst.box.y == 0);
+ assert(info->src.box.width == info->dst.box.width);
+ assert(info->src.box.height == info->dst.box.height);
+
+ if ((src->slices[info->src.level].offset & 3) ||
+ (src->slices[info->src.level].stride & 3)) {
+ perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n",
+ src->slices[info->src.level].offset,
+ src->slices[info->src.level].stride);
+ goto fallback;
+ }
+
+ vc4_blitter_save(vc4);
+
+ /* Create a renderable surface mapping the T-tiled shadow buffer.
+ */
+ struct pipe_surface dst_tmpl;
+ util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
+ info->dst.level, info->dst.box.z);
+ dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM;
+ struct pipe_surface *dst_surf =
+ pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
+ if (!dst_surf) {
+ fprintf(stderr, "Failed to create YUV dst surface\n");
+ util_blitter_unset_running_flag(vc4->blitter);
+ return false;
+ }
+ dst_surf->width /= 2;
+ if (dst->cpp == 1)
+ dst_surf->height /= 2;
+
+ /* Set the constant buffer. */
+ uint32_t stride = src->slices[info->src.level].stride;
+ struct pipe_constant_buffer cb_uniforms = {
+ .user_buffer = &stride,
+ .buffer_size = sizeof(stride),
+ };
+ pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, &cb_uniforms);
+ struct pipe_constant_buffer cb_src = {
+ .buffer = info->src.resource,
+ .buffer_offset = src->slices[info->src.level].offset,
+ .buffer_size = (src->bo->size -
+ src->slices[info->src.level].offset),
+ };
+ pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_src);
+
+ /* Unbind the textures, to make sure we don't try to recurse into the
+ * shadow blit.
+ */
+ pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
+ pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
+
+ util_blitter_custom_shader(vc4->blitter, dst_surf,
+ vc4_get_yuv_vs(pctx),
+ vc4_get_yuv_fs(pctx, src->cpp));
+
+ util_blitter_restore_textures(vc4->blitter);
+ util_blitter_restore_constant_buffer_state(vc4->blitter);
+ /* Restore cb1 (util_blitter doesn't handle this one). */
+ struct pipe_constant_buffer cb_disabled = { 0 };
+ pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_disabled);
+
+ pipe_surface_reference(&dst_surf, NULL);
+
+ return true;
+
+fallback:
+ /* Do an immediate SW fallback, since the render blit path
+ * would just recurse.
+ */
+ ok = util_try_blit_via_copy_region(pctx, info);
+ assert(ok); (void)ok;
+
+ return true;
+}
+
static bool
vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
{
@@ -218,6 +444,9 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
{
struct pipe_blit_info info = *blit_info;
+ if (vc4_yuv_blit(pctx, blit_info))
+ return;
+
if (vc4_tile_blit(pctx, blit_info))
return;
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c b/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c
index d06d55f86..54f9d9c26 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -30,6 +30,7 @@
#include "util/u_hash_table.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
#include "util/ralloc.h"
#include "vc4_context.h"
@@ -49,6 +50,13 @@ static void
vc4_bo_cache_free_all(struct vc4_bo_cache *cache);
void
+vc4_bo_debug_describe(char* buf, const struct vc4_bo *ptr)
+{
+ util_sprintf(buf, "vc4_bo<%s,%u,%u>", ptr->name ? ptr->name : "?",
+ ptr->handle, ptr->size);
+}
+
+void
vc4_bo_label(struct vc4_screen *screen, struct vc4_bo *bo, const char *fmt, ...)
{
/* Perform BO labeling by default on debug builds (so that you get
@@ -113,35 +121,105 @@ vc4_bo_remove_from_cache(struct vc4_bo_cache *cache, struct vc4_bo *bo)
cache->bo_size -= bo->size;
}
+static void vc4_bo_purgeable(struct vc4_bo *bo)
+{
+ struct drm_vc4_gem_madvise arg = {
+ .handle = bo->handle,
+ .madv = VC4_MADV_DONTNEED,
+ };
+
+ if (bo->screen->has_madvise)
+ vc4_ioctl(bo->screen->fd, DRM_IOCTL_VC4_GEM_MADVISE, &arg);
+}
+
+static bool vc4_bo_unpurgeable(struct vc4_bo *bo)
+{
+ struct drm_vc4_gem_madvise arg = {
+ .handle = bo->handle,
+ .madv = VC4_MADV_WILLNEED,
+ };
+
+ if (!bo->screen->has_madvise)
+ return true;
+
+ if (vc4_ioctl(bo->screen->fd, DRM_IOCTL_VC4_GEM_MADVISE, &arg))
+ return false;
+
+ return arg.retained;
+}
+
+static void
+vc4_bo_free(struct vc4_bo *bo)
+{
+ struct vc4_screen *screen = bo->screen;
+
+ if (bo->map) {
+ if (using_vc4_simulator && bo->name &&
+ strcmp(bo->name, "winsys") == 0) {
+ free(bo->map);
+ } else {
+ munmap(bo->map, bo->size);
+ VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
+ }
+ }
+
+ struct drm_gem_close c;
+ memset(&c, 0, sizeof(c));
+ c.handle = bo->handle;
+ int ret = vc4_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
+ if (ret != 0)
+ fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
+
+ screen->bo_count--;
+ screen->bo_size -= bo->size;
+
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s%s%dkb:\n",
+ bo->name ? bo->name : "",
+ bo->name ? " " : "",
+ bo->size / 1024);
+ vc4_bo_dump_stats(screen);
+ }
+
+ free(bo);
+}
+
static struct vc4_bo *
vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
{
struct vc4_bo_cache *cache = &screen->bo_cache;
uint32_t page_index = size / 4096 - 1;
+ struct vc4_bo *iter, *tmp, *bo = NULL;
if (cache->size_list_size <= page_index)
return NULL;
- struct vc4_bo *bo = NULL;
mtx_lock(&cache->lock);
- if (!list_empty(&cache->size_list[page_index])) {
- bo = LIST_ENTRY(struct vc4_bo, cache->size_list[page_index].next,
- size_list);
-
- /* Check that the BO has gone idle. If not, then we want to
- * allocate something new instead, since we assume that the
- * user will proceed to CPU map it and fill it with stuff.
+ LIST_FOR_EACH_ENTRY_SAFE(iter, tmp, &cache->size_list[page_index],
+ size_list) {
+ /* Check that the BO has gone idle. If not, then none of the
+ * other BOs (pushed to the list after later rendering) are
+ * likely to be idle, either.
*/
- if (!vc4_bo_wait(bo, 0, NULL)) {
- mtx_unlock(&cache->lock);
- return NULL;
- }
+ if (!vc4_bo_wait(iter, 0, NULL))
+ break;
+
+ if (!vc4_bo_unpurgeable(iter)) {
+ /* The BO has been purged. Free it and try to find
+ * another one in the cache.
+ */
+ vc4_bo_remove_from_cache(cache, iter);
+ vc4_bo_free(iter);
+ continue;
+ }
+ bo = iter;
pipe_reference_init(&bo->reference, 1);
vc4_bo_remove_from_cache(cache, bo);
vc4_bo_label(screen, bo, "%s", name);
bo->name = name;
+ break;
}
mtx_unlock(&cache->lock);
return bo;
@@ -221,42 +299,6 @@ vc4_bo_last_unreference(struct vc4_bo *bo)
}
static void
-vc4_bo_free(struct vc4_bo *bo)
-{
- struct vc4_screen *screen = bo->screen;
-
- if (bo->map) {
- if (using_vc4_simulator && bo->name &&
- strcmp(bo->name, "winsys") == 0) {
- free(bo->map);
- } else {
- munmap(bo->map, bo->size);
- VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
- }
- }
-
- struct drm_gem_close c;
- memset(&c, 0, sizeof(c));
- c.handle = bo->handle;
- int ret = vc4_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
- if (ret != 0)
- fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
-
- screen->bo_count--;
- screen->bo_size -= bo->size;
-
- if (dump_stats) {
- fprintf(stderr, "Freed %s%s%dkb:\n",
- bo->name ? bo->name : "",
- bo->name ? " " : "",
- bo->size / 1024);
- vc4_bo_dump_stats(screen);
- }
-
- free(bo);
-}
-
-static void
free_stale_bos(struct vc4_screen *screen, time_t time)
{
struct vc4_bo_cache *cache = &screen->bo_cache;
@@ -325,6 +367,7 @@ vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
cache->size_list_size = page_index + 1;
}
+ vc4_bo_purgeable(bo);
bo->free_time = time;
list_addtail(&bo->size_list, &cache->size_list[page_index]);
list_addtail(&bo->time_list, &cache->time_list);
@@ -354,7 +397,7 @@ vc4_bo_open_handle(struct vc4_screen *screen,
bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
if (bo) {
- pipe_reference(NULL, &bo->reference);
+ vc4_bo_reference(bo);
goto done;
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h b/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h
index 4e7b23e08..9fa477442 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -39,6 +39,14 @@ struct vc4_bo {
uint32_t handle;
uint32_t size;
+ /* This will be read/written by multiple threads without a lock -- you
+ * should take a snapshot and use it to see if you happen to be in the
+ * CL's handles at this position, to make most lookups O(1). It's
+ * volatile to make sure that the compiler doesn't emit multiple loads
+ * from the address, which would make the lookup racy.
+ */
+ volatile uint32_t last_hindex;
+
/** Entry in the linked list of buffers freed, by age. */
struct list_head time_list;
/** Entry in the per-page-count linked list of buffers freed (by age). */
@@ -65,18 +73,13 @@ struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd,
bool vc4_bo_flink(struct vc4_bo *bo, uint32_t *name);
int vc4_bo_get_dmabuf(struct vc4_bo *bo);
-static inline void
-vc4_bo_set_reference(struct vc4_bo **old_bo, struct vc4_bo *new_bo)
-{
- if (pipe_reference(&(*old_bo)->reference, &new_bo->reference))
- vc4_bo_last_unreference(*old_bo);
- *old_bo = new_bo;
-}
-
+void vc4_bo_debug_describe(char* buf, const struct vc4_bo *ptr);
static inline struct vc4_bo *
vc4_bo_reference(struct vc4_bo *bo)
{
- pipe_reference(NULL, &bo->reference);
+ pipe_reference_described(NULL, &bo->reference,
+ (debug_reference_descriptor)
+ vc4_bo_debug_describe);
return bo;
}
@@ -89,13 +92,18 @@ vc4_bo_unreference(struct vc4_bo **bo)
if ((*bo)->private) {
/* Avoid the mutex for private BOs */
- if (pipe_reference(&(*bo)->reference, NULL))
+ if (pipe_reference_described(&(*bo)->reference, NULL,
+ (debug_reference_descriptor)
+ vc4_bo_debug_describe)) {
vc4_bo_last_unreference(*bo);
+ }
} else {
screen = (*bo)->screen;
mtx_lock(&screen->bo_handles_mutex);
- if (pipe_reference(&(*bo)->reference, NULL)) {
+ if (pipe_reference_described(&(*bo)->reference, NULL,
+ (debug_reference_descriptor)
+ vc4_bo_debug_describe)) {
util_hash_table_remove(screen->bo_handles,
(void *)(uintptr_t)(*bo)->handle);
vc4_bo_last_unreference(*bo);
@@ -113,8 +121,11 @@ vc4_bo_unreference_locked_timed(struct vc4_bo **bo, time_t time)
if (!*bo)
return;
- if (pipe_reference(&(*bo)->reference, NULL))
+ if (pipe_reference_described(&(*bo)->reference, NULL,
+ (debug_reference_descriptor)
+ vc4_bo_debug_describe)) {
vc4_bo_last_unreference_locked_timed(*bo, time);
+ }
*bo = NULL;
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_cl.c b/lib/mesa/src/gallium/drivers/vc4/vc4_cl.c
index 508281a27..7ae092ebc 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_cl.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_cl.c
@@ -61,10 +61,19 @@ vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo)
{
uint32_t hindex;
uint32_t *current_handles = job->bo_handles.base;
+ uint32_t cl_hindex_count = cl_offset(&job->bo_handles) / 4;
+ uint32_t last_hindex = bo->last_hindex; /* volatile read! */
- for (hindex = 0; hindex < cl_offset(&job->bo_handles) / 4; hindex++) {
- if (current_handles[hindex] == bo->handle)
+ if (last_hindex < cl_hindex_count &&
+ current_handles[last_hindex] == bo->handle) {
+ return last_hindex;
+ }
+
+ for (hindex = 0; hindex < cl_hindex_count; hindex++) {
+ if (current_handles[hindex] == bo->handle) {
+ bo->last_hindex = hindex;
return hindex;
+ }
}
struct vc4_cl_out *out;
@@ -79,5 +88,6 @@ vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo)
job->bo_space += bo->size;
+ bo->last_hindex = hindex;
return hindex;
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_cl.h b/lib/mesa/src/gallium/drivers/vc4/vc4_cl.h
index 8df9dbfe6..39d1d347b 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_cl.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_cl.h
@@ -159,21 +159,6 @@ cl_aligned_f(struct vc4_cl_out **cl, float f)
cl_aligned_u32(cl, fui(f));
}
-static inline void
-cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
-{
- assert(n == 1 || n == 2);
- assert(cl->reloc_count == 0);
-#ifndef NDEBUG
- cl->reloc_count = n;
-#endif
-
- cl_u8(out, VC4_PACKET_GEM_HANDLES);
- cl->reloc_next = *out;
- cl_u32(out, 0); /* Space where hindex will be written. */
- cl_u32(out, 0); /* Space where hindex will be written. */
-}
-
static inline struct vc4_cl_out *
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
{
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_cl_dump.c b/lib/mesa/src/gallium/drivers/vc4/vc4_cl_dump.c
index ca1b9a315..a6ae0cf80 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -28,6 +28,7 @@
#include "kernel/vc4_packet.h"
#include "broadcom/cle/v3d_decoder.h"
+#include "broadcom/clif/clif_dump.h"
void
vc4_dump_cl(void *cl, uint32_t size, bool is_render)
@@ -41,6 +42,8 @@ vc4_dump_cl(void *cl, uint32_t size, bool is_render)
};
struct v3d_spec *spec = v3d_spec_load(&devinfo);
+ struct clif_dump *clif = clif_dump_init(&devinfo, stderr, true);
+
uint32_t offset = 0, hw_offset = 0;
uint8_t *p = cl;
@@ -60,7 +63,7 @@ vc4_dump_cl(void *cl, uint32_t size, bool is_render)
fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s\n",
offset, hw_offset, header, v3d_group_get_name(inst));
- v3d_print_group(stderr, inst, offset, p, "");
+ v3d_print_group(clif, inst, offset, p);
switch (header) {
case VC4_PACKET_HALT:
@@ -75,5 +78,7 @@ vc4_dump_cl(void *cl, uint32_t size, bool is_render)
hw_offset += length;
p += length;
}
+
+ clif_dump_destroy(clif);
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_context.c b/lib/mesa/src/gallium/drivers/vc4/vc4_context.c
index a9e7ff91f..ffd7d4c85 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_context.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_context.c
@@ -42,7 +42,6 @@ vc4_flush(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
- struct hash_entry *entry;
hash_table_foreach(vc4->jobs, entry) {
struct vc4_job *job = entry->data;
vc4_job_submit(vc4, job);
@@ -59,8 +58,17 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
if (fence) {
struct pipe_screen *screen = pctx->screen;
+ int fd = -1;
+
+ if (flags & PIPE_FLUSH_FENCE_FD) {
+ /* The vc4_fence takes ownership of the returned fd. */
+ drmSyncobjExportSyncFile(vc4->fd, vc4->job_syncobj,
+ &fd);
+ }
+
struct vc4_fence *f = vc4_fence_create(vc4->screen,
- vc4->last_emit_seqno);
+ vc4->last_emit_seqno,
+ fd);
screen->fence_reference(screen, fence, NULL);
*fence = (struct pipe_fence_handle *)f;
}
@@ -115,8 +123,22 @@ vc4_context_destroy(struct pipe_context *pctx)
pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
+ if (vc4->yuv_linear_blit_vs)
+ pctx->delete_vs_state(pctx, vc4->yuv_linear_blit_vs);
+ if (vc4->yuv_linear_blit_fs_8bit)
+ pctx->delete_fs_state(pctx, vc4->yuv_linear_blit_fs_8bit);
+ if (vc4->yuv_linear_blit_fs_16bit)
+ pctx->delete_fs_state(pctx, vc4->yuv_linear_blit_fs_16bit);
+
vc4_program_fini(pctx);
+ if (vc4->screen->has_syncobj) {
+ drmSyncobjDestroy(vc4->fd, vc4->job_syncobj);
+ drmSyncobjDestroy(vc4->fd, vc4->in_syncobj);
+ }
+ if (vc4->in_fence_fd >= 0)
+ close(vc4->in_fence_fd);
+
ralloc_free(vc4);
}
@@ -125,6 +147,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
{
struct vc4_screen *screen = vc4_screen(pscreen);
struct vc4_context *vc4;
+ int err;
/* Prevent dumping of the shaders built during context setup. */
uint32_t saved_shaderdb_flag = vc4_debug & VC4_DEBUG_SHADERDB;
@@ -150,10 +173,16 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
vc4_query_init(pctx);
vc4_resource_context_init(pctx);
- vc4_job_init(vc4);
-
vc4->fd = screen->fd;
+ err = vc4_job_init(vc4);
+ if (err)
+ goto fail;
+
+ err = vc4_fence_context_init(vc4);
+ if (err)
+ goto fail;
+
slab_create_child(&vc4->transfer_pool, &screen->transfer_pool);
vc4->uploader = u_upload_create_default(&vc4->base);
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_context.h b/lib/mesa/src/gallium/drivers/vc4/vc4_context.h
index 4a1e4093f..ce8bcffac 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_context.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_context.h
@@ -78,6 +78,7 @@
#define VC4_DIRTY_COMPILED_VS (1 << 24)
#define VC4_DIRTY_COMPILED_FS (1 << 25)
#define VC4_DIRTY_FS_INPUTS (1 << 26)
+#define VC4_DIRTY_UBO_1_SIZE (1 << 27)
struct vc4_sampler_view {
struct pipe_sampler_view base;
@@ -219,6 +220,13 @@ struct vc4_job_key {
struct pipe_surface *zsbuf;
};
+struct vc4_hwperfmon {
+ uint32_t id;
+ uint64_t last_seqno;
+ uint8_t events[DRM_VC4_MAX_PERF_COUNTERS];
+ uint64_t counters[DRM_VC4_MAX_PERF_COUNTERS];
+};
+
/**
* A complete bin/render job.
*
@@ -243,6 +251,9 @@ struct vc4_job {
*/
uint32_t bo_space;
+ /* Last BO hindex referenced from VC4_PACKET_GEM_HANDLES. */
+ uint32_t last_gem_handle_hindex;
+
/** @{ Surfaces to submit rendering for. */
struct pipe_surface *color_read;
struct pipe_surface *color_write;
@@ -306,6 +317,9 @@ struct vc4_job {
/** Any flags to be passed in drm_vc4_submit_cl.flags. */
uint32_t flags;
+ /* Performance monitor attached to this job. */
+ struct vc4_hwperfmon *perfmon;
+
struct vc4_job_key key;
};
@@ -363,6 +377,10 @@ struct vc4_context {
struct u_upload_mgr *uploader;
+ struct pipe_shader_state *yuv_linear_blit_vs;
+ struct pipe_shader_state *yuv_linear_blit_fs_8bit;
+ struct pipe_shader_state *yuv_linear_blit_fs_16bit;
+
/** @{ Current pipeline state objects */
struct pipe_scissor_state scissor;
struct pipe_blend_state *blend;
@@ -387,7 +405,16 @@ struct vc4_context {
struct pipe_viewport_state viewport;
struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
struct vc4_vertexbuf_stateobj vertexbuf;
+
+ struct vc4_hwperfmon *perfmon;
/** @} */
+
+ /** Handle of syncobj containing the last submitted job fence. */
+ uint32_t job_syncobj;
+
+ int in_fence_fd;
+ /** Handle of the syncobj that holds in_fence_fd for submission. */
+ uint32_t in_syncobj;
};
struct vc4_rasterizer_state {
@@ -444,6 +471,12 @@ vc4_sampler_state(struct pipe_sampler_state *psampler)
return (struct vc4_sampler_state *)psampler;
}
+int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info);
+int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_info *info);
+
struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
void *priv, unsigned flags);
void vc4_draw_init(struct pipe_context *pctx);
@@ -476,7 +509,8 @@ void vc4_write_uniforms(struct vc4_context *vc4,
struct vc4_texture_stateobj *texstate);
void vc4_flush(struct pipe_context *pctx);
-void vc4_job_init(struct vc4_context *vc4);
+int vc4_job_init(struct vc4_context *vc4);
+int vc4_fence_context_init(struct vc4_context *vc4);
struct vc4_job *vc4_get_job(struct vc4_context *vc4,
struct pipe_surface *cbuf,
struct pipe_surface *zsbuf);
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_draw.c b/lib/mesa/src/gallium/drivers/vc4/vc4_draw.c
index 556855420..06785516c 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_draw.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_draw.c
@@ -40,7 +40,7 @@ vc4_get_draw_cl_space(struct vc4_job *job, int vert_count)
/* The SW-5891 workaround may cause us to emit multiple shader recs
* and draw packets.
*/
- int num_draws = DIV_ROUND_UP(vert_count, 65535) + 1;
+ int num_draws = DIV_ROUND_UP(vert_count, 65535 - 2) + 1;
/* Binner gets our packet state -- vc4_emit.c contents,
* and the primitive itself.
@@ -222,6 +222,8 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
attr.coordinate_shader_vpm_offset = 0;
attr.vertex_shader_vpm_offset = 0;
}
+
+ vc4_bo_unreference(&bo);
}
cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
@@ -286,6 +288,7 @@ static void
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct pipe_draw_info local_info;
if (!info->count_from_stream_output && !info->indirect &&
!info->primitive_restart &&
@@ -293,11 +296,19 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
return;
if (info->mode >= PIPE_PRIM_QUADS) {
- util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
- util_primconvert_draw_vbo(vc4->primconvert, info);
- perf_debug("Fallback conversion for %d %s vertices\n",
- info->count, u_prim_name(info->mode));
- return;
+ if (info->mode == PIPE_PRIM_QUADS &&
+ info->count == 4 &&
+ !vc4->rasterizer->base.flatshade) {
+ local_info = *info;
+ local_info.mode = PIPE_PRIM_TRIANGLE_FAN;
+ info = &local_info;
+ } else {
+ util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
+ util_primconvert_draw_vbo(vc4->primconvert, info);
+ perf_debug("Fallback conversion for %d %s vertices\n",
+ info->count, u_prim_name(info->mode));
+ return;
+ }
}
/* Before setting up the draw, do any fixup blits necessary. */
@@ -377,7 +388,25 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
struct vc4_resource *rsc = vc4_resource(prsc);
struct vc4_cl_out *bcl = cl_start(&job->bcl);
- cl_start_reloc(&job->bcl, &bcl, 1);
+
+ /* The original design for the VC4 kernel UABI had multiple
+ * packets that used relocations in the BCL (some of which
+ * needed two BOs), but later modifications eliminated all but
+ * this one usage. We have an arbitrary 32-bit offset value,
+ * and need to also supply an arbitrary 32-bit index buffer
+ * GEM handle, so we have this fake packet we emit in our BCL
+ * to be validated, which the kernel uses at validation time
+ * to perform the relocation in the IB packet (without
+ * emitting to the actual HW).
+ */
+ uint32_t hindex = vc4_gem_hindex(job, rsc->bo);
+ if (job->last_gem_handle_hindex != hindex) {
+ cl_u8(&bcl, VC4_PACKET_GEM_HANDLES);
+ cl_u32(&bcl, hindex);
+ cl_u32(&bcl, 0);
+ job->last_gem_handle_hindex = hindex;
+ }
+
cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
cl_u8(&bcl,
info->mode |
@@ -385,8 +414,9 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
VC4_INDEX_BUFFER_U16:
VC4_INDEX_BUFFER_U8));
cl_u32(&bcl, info->count);
- cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
+ cl_u32(&bcl, offset);
cl_u32(&bcl, vc4->max_index);
+
cl_end(&job->bcl, bcl);
job->draw_calls_queued++;
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_job.c b/lib/mesa/src/gallium/drivers/vc4/vc4_job.c
index 7fe20c16b..f38c46475 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_job.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_job.c
@@ -90,6 +90,11 @@ vc4_job_create(struct vc4_context *vc4)
job->draw_max_x = 0;
job->draw_max_y = 0;
+ job->last_gem_handle_hindex = ~0;
+
+ if (vc4->perfmon)
+ job->perfmon = vc4->perfmon;
+
return job;
}
@@ -113,7 +118,6 @@ vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
vc4_flush_jobs_writing_resource(vc4, prsc);
- struct hash_entry *entry;
hash_table_foreach(vc4->jobs, entry) {
struct vc4_job *job = entry->data;
@@ -453,6 +457,8 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
submit.shader_rec_count = job->shader_rec_count;
submit.uniforms = (uintptr_t)job->uniforms.base;
submit.uniforms_size = cl_offset(&job->uniforms);
+ if (job->perfmon)
+ submit.perfmonid = job->perfmon->id;
assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
submit.min_x_tile = job->draw_min_x / job->tile_width;
@@ -470,6 +476,19 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
}
submit.flags |= job->flags;
+ if (vc4->screen->has_syncobj) {
+ submit.out_sync = vc4->job_syncobj;
+
+ if (vc4->in_fence_fd >= 0) {
+ /* This replaces the fence in the syncobj. */
+ drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj,
+ vc4->in_fence_fd);
+ submit.in_sync = vc4->in_syncobj;
+ close(vc4->in_fence_fd);
+ vc4->in_fence_fd = -1;
+ }
+ }
+
if (!(vc4_debug & VC4_DEBUG_NORAST)) {
int ret;
@@ -485,6 +504,8 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
warned = true;
} else if (!ret) {
vc4->last_emit_seqno = submit.seqno;
+ if (job->perfmon)
+ job->perfmon->last_seqno = submit.seqno;
}
}
@@ -521,7 +542,7 @@ vc4_job_hash(const void *key)
return _mesa_hash_data(key, sizeof(struct vc4_job_key));
}
-void
+int
vc4_job_init(struct vc4_context *vc4)
{
vc4->jobs = _mesa_hash_table_create(vc4,
@@ -530,5 +551,24 @@ vc4_job_init(struct vc4_context *vc4)
vc4->write_jobs = _mesa_hash_table_create(vc4,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
+
+ if (vc4->screen->has_syncobj) {
+ /* Create the syncobj as signaled since with no job executed
+ * there is nothing to wait on.
+ */
+ int ret = drmSyncobjCreate(vc4->fd,
+ DRM_SYNCOBJ_CREATE_SIGNALED,
+ &vc4->job_syncobj);
+ if (ret) {
+ /* If the screen indicated syncobj support, we should
+ * be able to create a signaled syncobj.
+ * At this point it is too late to pretend the screen
+ * has no syncobj support.
+ */
+ return ret;
+ }
+ }
+
+ return 0;
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_program.c b/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
index 98cdfdf33..bc9bd76ae 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
@@ -38,6 +38,7 @@
#include "vc4_context.h"
#include "vc4_qpu.h"
#include "vc4_qir.h"
+#include "mesa/state_tracker/st_glsl_types.h"
static struct qreg
ntq_get_src(struct vc4_compile *c, nir_src src, int i);
@@ -50,6 +51,12 @@ type_size(const struct glsl_type *type)
return glsl_count_attribute_slots(type, false);
}
+static int
+uniforms_type_size(const struct glsl_type *type)
+{
+ return st_glsl_storage_type_size(type, false);
+}
+
static void
resize_qreg_array(struct vc4_compile *c,
struct qreg **regs,
@@ -137,6 +144,32 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
return qir_TEX_RESULT(c);
}
+static struct qreg
+vc4_ubo_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
+{
+ nir_const_value *buffer_index =
+ nir_src_as_const_value(intr->src[0]);
+ assert(buffer_index->u32[0] == 1);
+ assert(c->stage == QSTAGE_FRAG);
+
+ struct qreg offset = ntq_get_src(c, intr->src[1], 0);
+
+ /* Clamp to [0, array size). Note that MIN/MAX are signed. */
+ offset = qir_MAX(c, offset, qir_uniform_ui(c, 0));
+ offset = qir_MIN_NOIMM(c, offset,
+ qir_uniform_ui(c, c->fs_key->ubo_1_size - 4));
+
+ qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
+ offset,
+ qir_uniform(c, QUNIFORM_UBO_ADDR, buffer_index->u32[0]));
+
+ c->num_texture_samples++;
+
+ ntq_emit_thrsw(c);
+
+ return qir_TEX_RESULT(c);
+}
+
nir_ssa_def *
vc4_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
{
@@ -287,7 +320,7 @@ static struct qreg
ntq_get_alu_src(struct vc4_compile *c, nir_alu_instr *instr,
unsigned src)
{
- assert(util_is_power_of_two(instr->dest.write_mask));
+ assert(util_is_power_of_two_or_zero(instr->dest.write_mask));
unsigned chan = ffs(instr->dest.write_mask) - 1;
struct qreg r = ntq_get_src(c, instr->src[src].src,
instr->src[src].swizzle[chan]);
@@ -654,24 +687,44 @@ ntq_fceil(struct vc4_compile *c, struct qreg src)
}
static struct qreg
+ntq_shrink_sincos_input_range(struct vc4_compile *c, struct qreg x)
+{
+ /* Since we're using a Taylor approximation, we want to have a small
+ * number of coefficients and take advantage of sin/cos repeating
+ * every 2pi. We keep our x as close to 0 as we can, since the series
+ * will be less accurate as |x| increases. (Also, be careful of
+ * shifting the input x value to be tricky with sin/cos relations,
+ * because getting accurate values for x==0 is very important for SDL
+ * rendering)
+ */
+ struct qreg scaled_x =
+ qir_FMUL(c, x,
+ qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
+ /* Note: FTOI truncates toward 0. */
+ struct qreg x_frac = qir_FSUB(c, scaled_x,
+ qir_ITOF(c, qir_FTOI(c, scaled_x)));
+ /* Map [0.5, 1] to [-0.5, 0] */
+ qir_SF(c, qir_FSUB(c, x_frac, qir_uniform_f(c, 0.5)));
+ qir_FSUB_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NC;
+ /* Map [-1, -0.5] to [0, 0.5] */
+ qir_SF(c, qir_FADD(c, x_frac, qir_uniform_f(c, 0.5)));
+ qir_FADD_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NS;
+
+ return x_frac;
+}
+
+static struct qreg
ntq_fsin(struct vc4_compile *c, struct qreg src)
{
float coeff[] = {
- -2.0 * M_PI,
- pow(2.0 * M_PI, 3) / (3 * 2 * 1),
- -pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
- pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
- -pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+ 2.0 * M_PI,
+ -pow(2.0 * M_PI, 3) / (3 * 2 * 1),
+ pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
+ -pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
+ pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
};
- struct qreg scaled_x =
- qir_FMUL(c,
- src,
- qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
-
- struct qreg x = qir_FADD(c,
- ntq_ffract(c, scaled_x),
- qir_uniform_f(c, -0.5));
+ struct qreg x = ntq_shrink_sincos_input_range(c, src);
struct qreg x2 = qir_FMUL(c, x, x);
struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
@@ -689,21 +742,15 @@ static struct qreg
ntq_fcos(struct vc4_compile *c, struct qreg src)
{
float coeff[] = {
- -1.0f,
- pow(2.0 * M_PI, 2) / (2 * 1),
- -pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
- pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
- -pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
- pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+ 1.0f,
+ -pow(2.0 * M_PI, 2) / (2 * 1),
+ pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
+ -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
+ pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+ -pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
};
- struct qreg scaled_x =
- qir_FMUL(c, src,
- qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
- struct qreg x_frac = qir_FADD(c,
- ntq_ffract(c, scaled_x),
- qir_uniform_f(c, -0.5));
-
+ struct qreg x_frac = ntq_shrink_sincos_input_range(c, src);
struct qreg sum = qir_uniform_f(c, coeff[0]);
struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
struct qreg x = x2; /* Current x^2, x^4, or x^6 */
@@ -711,13 +758,10 @@ ntq_fcos(struct vc4_compile *c, struct qreg src)
if (i != 1)
x = qir_FMUL(c, x, x2);
- struct qreg mul = qir_FMUL(c,
+ sum = qir_FADD(c, qir_FMUL(c,
x,
- qir_uniform_f(c, coeff[i]));
- if (i == 0)
- sum = mul;
- else
- sum = qir_FADD(c, sum, mul);
+ qir_uniform_f(c, coeff[i])),
+ sum);
}
return sum;
}
@@ -1337,7 +1381,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
/* We have a scalar result, so the instruction should only have a
* single channel written to.
*/
- assert(util_is_power_of_two(instr->dest.write_mask));
+ assert(util_is_power_of_two_or_zero(instr->dest.write_mask));
ntq_store_dest(c, &instr->dest.dest,
ffs(instr->dest.write_mask) - 1, result);
}
@@ -1659,7 +1703,7 @@ static void
ntq_setup_uniforms(struct vc4_compile *c)
{
nir_foreach_variable(var, &c->s->uniforms) {
- uint32_t vec4_count = type_size(var->type);
+ uint32_t vec4_count = uniforms_type_size(var->type);
unsigned vec4_size = 4 * sizeof(float);
declare_uniform_range(c, var->data.driver_location * vec4_size,
@@ -1775,6 +1819,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
}
break;
+ case nir_intrinsic_load_ubo:
+ assert(instr->num_components == 1);
+ ntq_store_dest(c, &instr->dest, 0, vc4_ubo_load(c, instr));
+ break;
+
case nir_intrinsic_load_user_clip_plane:
for (int i = 0; i < instr->num_components; i++) {
ntq_store_dest(c, &instr->dest, i,
@@ -2180,13 +2229,16 @@ nir_to_qir(struct vc4_compile *c)
}
static const nir_shader_compiler_options nir_options = {
+ .lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_fdiv = true,
.lower_ffma = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsat = true,
.lower_fsqrt = true,
+ .lower_ldexp = true,
.lower_negate = true,
.native_integers = true,
.max_unroll_iterations = 32,
@@ -2435,9 +2487,10 @@ vc4_shader_state_create(struct pipe_context *pctx,
*/
s = cso->ir.nir;
- NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size,
+ NIR_PASS_V(s, nir_lower_io, nir_var_uniform,
+ uniforms_type_size,
(nir_lower_io_options)0);
- } else {
+ } else {
assert(cso->type == PIPE_SHADER_IR_TGSI);
if (vc4_debug & VC4_DEBUG_TGSI) {
@@ -2449,6 +2502,10 @@ vc4_shader_state_create(struct pipe_context *pctx,
s = tgsi_to_nir(cso->tokens, &nir_options);
}
+ NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform,
+ type_size,
+ (nir_lower_io_options)0);
+
NIR_PASS_V(s, nir_opt_global_to_local);
NIR_PASS_V(s, nir_lower_regs_to_ssa);
NIR_PASS_V(s, nir_normalize_cubemap_coords);
@@ -2724,7 +2781,8 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
VC4_DIRTY_RASTERIZER |
VC4_DIRTY_SAMPLE_MASK |
VC4_DIRTY_FRAGTEX |
- VC4_DIRTY_UNCOMPILED_FS))) {
+ VC4_DIRTY_UNCOMPILED_FS |
+ VC4_DIRTY_UBO_1_SIZE))) {
return;
}
@@ -2768,6 +2826,7 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
PIPE_SPRITE_COORD_UPPER_LEFT);
}
+ key->ubo_1_size = vc4->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer_size;
key->light_twoside = vc4->rasterizer->base.light_twoside;
struct vc4_compiled_shader *old_fs = vc4->prog.fs;
@@ -2916,7 +2975,6 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_uncompiled_shader *so = hwcso;
- struct hash_entry *entry;
hash_table_foreach(vc4->fs_cache, entry) {
delete_from_cache_if_matches(vc4->fs_cache, &vc4->prog.fs,
entry, so);
@@ -2973,7 +3031,6 @@ vc4_program_fini(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
- struct hash_entry *entry;
hash_table_foreach(vc4->fs_cache, entry) {
struct vc4_compiled_shader *shader = entry->data;
vc4_bo_unreference(&shader->bo);
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c
index c829e7f93..71f06aebf 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c
@@ -343,13 +343,57 @@ qir_channels_written(struct qinst *inst)
unreachable("Bad pack field");
}
+char *
+qir_describe_uniform(enum quniform_contents contents, uint32_t data,
+ const uint32_t *uniforms)
+{
+ static const char *quniform_names[] = {
+ [QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale",
+ [QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale",
+ [QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset",
+ [QUNIFORM_VIEWPORT_Z_SCALE] = "vp_z_scale",
+ [QUNIFORM_TEXTURE_CONFIG_P0] = "tex_p0",
+ [QUNIFORM_TEXTURE_CONFIG_P1] = "tex_p1",
+ [QUNIFORM_TEXTURE_CONFIG_P2] = "tex_p2",
+ [QUNIFORM_TEXTURE_FIRST_LEVEL] = "tex_first_level",
+ };
+
+ switch (contents) {
+ case QUNIFORM_CONSTANT:
+ return ralloc_asprintf(NULL, "0x%08x / %f", data, uif(data));
+ case QUNIFORM_UNIFORM:
+ if (uniforms) {
+ uint32_t unif = uniforms[data];
+ return ralloc_asprintf(NULL, "unif[%d] = 0x%08x / %f",
+ data, unif, uif(unif));
+ } else {
+ return ralloc_asprintf(NULL, "unif[%d]", data);
+ }
+
+ case QUNIFORM_TEXTURE_CONFIG_P0:
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ case QUNIFORM_TEXTURE_CONFIG_P2:
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ return ralloc_asprintf(NULL, "%s[%d]",
+ quniform_names[contents], data);
+
+ default:
+ if (contents < ARRAY_SIZE(quniform_names) &&
+ quniform_names[contents]) {
+ return ralloc_asprintf(NULL, "%s",
+ quniform_names[contents]);
+ } else {
+ return ralloc_asprintf(NULL, "??? %d", contents);
+ }
+ }
+}
+
static void
qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
{
static const char *files[] = {
[QFILE_TEMP] = "t",
[QFILE_VARY] = "v",
- [QFILE_UNIF] = "u",
[QFILE_TLB_COLOR_WRITE] = "tlb_c",
[QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms",
[QFILE_TLB_Z_WRITE] = "tlb_z",
@@ -403,16 +447,18 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
fprintf(stderr, "%s", files[reg.file]);
break;
- default:
- fprintf(stderr, "%s%d", files[reg.file], reg.index);
+ case QFILE_UNIF: {
+ char *desc = qir_describe_uniform(c->uniform_contents[reg.index],
+ c->uniform_data[reg.index],
+ NULL);
+ fprintf(stderr, "u%d (%s)", reg.index, desc);
+ ralloc_free(desc);
break;
}
- if (reg.file == QFILE_UNIF &&
- c->uniform_contents[reg.index] == QUNIFORM_CONSTANT) {
- fprintf(stderr, " (0x%08x / %f)",
- c->uniform_data[reg.index],
- uif(c->uniform_data[reg.index]));
+ default:
+ fprintf(stderr, "%s%d", files[reg.file], reg.index);
+ break;
}
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h
index 90acaef28..1aa5f652f 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h
@@ -363,6 +363,7 @@ struct vc4_fs_key {
uint8_t alpha_test_func;
uint8_t logicop_func;
uint32_t point_sprite_mask;
+ uint32_t ubo_1_size;
struct pipe_rt_blend_state blend;
};
@@ -591,6 +592,8 @@ uint8_t qir_channels_written(struct qinst *inst);
void qir_dump(struct vc4_compile *c);
void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
+char *qir_describe_uniform(enum quniform_contents contents, uint32_t data,
+ const uint32_t *uniforms);
const char *qir_get_stage_name(enum qstage stage);
void qir_validate(struct vc4_compile *c);
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qir_live_variables.c b/lib/mesa/src/gallium/drivers/vc4/vc4_qir_live_variables.c
index 7108b3ee9..5629ce044 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_qir_live_variables.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qir_live_variables.c
@@ -173,8 +173,6 @@ qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
static void
sf_state_clear(struct hash_table *partial_update_ht)
{
- struct hash_entry *entry;
-
hash_table_foreach(partial_update_ht, entry) {
struct partial_update_state *state = entry->data;
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/lib/mesa/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index ad19f06d3..d7c22e75c 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -136,7 +136,6 @@ qir_lower_uniforms(struct vc4_compile *c)
*/
uint32_t max_count = 0;
uint32_t max_index = 0;
- struct hash_entry *entry;
hash_table_foreach(ht, entry) {
uint32_t count = (uintptr_t)entry->data;
uint32_t index = (uintptr_t)entry->key - 1;
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_resource.c b/lib/mesa/src/gallium/drivers/vc4/vc4_resource.c
index cdcbcc917..41e6ec5c1 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_resource.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_resource.c
@@ -22,11 +22,13 @@
* IN THE SOFTWARE.
*/
+#include "pipe/p_defines.h"
#include "util/u_blit.h"
#include "util/u_memory.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_surface.h"
+#include "util/u_transfer_helper.h"
#include "util/u_upload_mgr.h"
#include "drm_fourcc.h"
@@ -36,10 +38,6 @@
#include "vc4_resource.h"
#include "vc4_tiling.h"
-#ifndef DRM_FORMAT_MOD_INVALID
-#define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1)
-#endif
-
static bool
vc4_resource_bo_alloc(struct vc4_resource *rsc)
{
@@ -79,15 +77,8 @@ vc4_resource_transfer_unmap(struct pipe_context *pctx,
struct vc4_transfer *trans = vc4_transfer(ptrans);
if (trans->map) {
- struct vc4_resource *rsc;
- struct vc4_resource_slice *slice;
- if (trans->ss_resource) {
- rsc = vc4_resource(trans->ss_resource);
- slice = &rsc->slices[0];
- } else {
- rsc = vc4_resource(ptrans->resource);
- slice = &rsc->slices[ptrans->level];
- }
+ struct vc4_resource *rsc = vc4_resource(ptrans->resource);
+ struct vc4_resource_slice *slice = &rsc->slices[ptrans->level];
if (ptrans->usage & PIPE_TRANSFER_WRITE) {
vc4_store_tiled_image(rsc->bo->map + slice->offset +
@@ -100,51 +91,10 @@ vc4_resource_transfer_unmap(struct pipe_context *pctx,
free(trans->map);
}
- if (trans->ss_resource && (ptrans->usage & PIPE_TRANSFER_WRITE)) {
- struct pipe_blit_info blit;
- memset(&blit, 0, sizeof(blit));
-
- blit.src.resource = trans->ss_resource;
- blit.src.format = trans->ss_resource->format;
- blit.src.box.width = trans->ss_box.width;
- blit.src.box.height = trans->ss_box.height;
- blit.src.box.depth = 1;
-
- blit.dst.resource = ptrans->resource;
- blit.dst.format = ptrans->resource->format;
- blit.dst.level = ptrans->level;
- blit.dst.box = trans->ss_box;
-
- blit.mask = util_format_get_mask(ptrans->resource->format);
- blit.filter = PIPE_TEX_FILTER_NEAREST;
-
- pctx->blit(pctx, &blit);
-
- pipe_resource_reference(&trans->ss_resource, NULL);
- }
-
pipe_resource_reference(&ptrans->resource, NULL);
slab_free(&vc4->transfer_pool, ptrans);
}
-static struct pipe_resource *
-vc4_get_temp_resource(struct pipe_context *pctx,
- struct pipe_resource *prsc,
- const struct pipe_box *box)
-{
- struct pipe_resource temp_setup;
-
- memset(&temp_setup, 0, sizeof(temp_setup));
- temp_setup.target = prsc->target;
- temp_setup.format = prsc->format;
- temp_setup.width0 = box->width;
- temp_setup.height0 = box->height;
- temp_setup.depth0 = 1;
- temp_setup.array_size = 1;
-
- return pctx->screen->resource_create(pctx->screen, &temp_setup);
-}
-
static void *
vc4_resource_transfer_map(struct pipe_context *pctx,
struct pipe_resource *prsc,
@@ -164,7 +114,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
*/
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
- !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) &&
+ !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
prsc->last_level == 0 &&
prsc->width0 == box->width &&
prsc->height0 == box->height &&
@@ -218,50 +168,6 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
ptrans->usage = usage;
ptrans->box = *box;
- /* If the resource is multisampled, we need to resolve to single
- * sample. This seems like it should be handled at a higher layer.
- */
- if (prsc->nr_samples > 1) {
- trans->ss_resource = vc4_get_temp_resource(pctx, prsc, box);
- if (!trans->ss_resource)
- goto fail;
- assert(!trans->ss_resource->nr_samples);
-
- /* The ptrans->box gets modified for tile alignment, so save
- * the original box for unmap time.
- */
- trans->ss_box = *box;
-
- if (usage & PIPE_TRANSFER_READ) {
- struct pipe_blit_info blit;
- memset(&blit, 0, sizeof(blit));
-
- blit.src.resource = ptrans->resource;
- blit.src.format = ptrans->resource->format;
- blit.src.level = ptrans->level;
- blit.src.box = trans->ss_box;
-
- blit.dst.resource = trans->ss_resource;
- blit.dst.format = trans->ss_resource->format;
- blit.dst.box.width = trans->ss_box.width;
- blit.dst.box.height = trans->ss_box.height;
- blit.dst.box.depth = 1;
-
- blit.mask = util_format_get_mask(prsc->format);
- blit.filter = PIPE_TEX_FILTER_NEAREST;
-
- pctx->blit(pctx, &blit);
- vc4_flush_jobs_writing_resource(vc4, blit.dst.resource);
- }
-
- /* The rest of the mapping process should use our temporary. */
- prsc = trans->ss_resource;
- rsc = vc4_resource(prsc);
- ptrans->box.x = 0;
- ptrans->box.y = 0;
- ptrans->box.z = 0;
- }
-
if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
buf = vc4_bo_map_unsynchronized(rsc->bo);
else
@@ -275,9 +181,6 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
struct vc4_resource_slice *slice = &rsc->slices[level];
if (rsc->tiled) {
- uint32_t utile_w = vc4_utile_width(rsc->cpp);
- uint32_t utile_h = vc4_utile_height(rsc->cpp);
-
/* No direct mappings of tiled, since we need to manually
* tile/untile.
*/
@@ -298,49 +201,12 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
ptrans->box.height = (ptrans->box.height + 3) >> 2;
}
- /* We need to align the box to utile boundaries, since that's
- * what load/store operates on. This may cause us to need to
- * read out the original contents in that border area. Right
- * now we just read out the entire contents, including the
- * middle area that will just get overwritten.
- */
- uint32_t box_start_x = ptrans->box.x & (utile_w - 1);
- uint32_t box_start_y = ptrans->box.y & (utile_h - 1);
- bool needs_load = (usage & PIPE_TRANSFER_READ) != 0;
-
- if (box_start_x) {
- ptrans->box.width += box_start_x;
- ptrans->box.x -= box_start_x;
- needs_load = true;
- }
- if (box_start_y) {
- ptrans->box.height += box_start_y;
- ptrans->box.y -= box_start_y;
- needs_load = true;
- }
- if (ptrans->box.width & (utile_w - 1)) {
- /* We only need to force a load if our border region
- * we're extending into is actually part of the
- * texture.
- */
- uint32_t slice_width = u_minify(prsc->width0, level);
- if (ptrans->box.x + ptrans->box.width != slice_width)
- needs_load = true;
- ptrans->box.width = align(ptrans->box.width, utile_w);
- }
- if (ptrans->box.height & (utile_h - 1)) {
- uint32_t slice_height = u_minify(prsc->height0, level);
- if (ptrans->box.y + ptrans->box.height != slice_height)
- needs_load = true;
- ptrans->box.height = align(ptrans->box.height, utile_h);
- }
-
ptrans->stride = ptrans->box.width * rsc->cpp;
ptrans->layer_stride = ptrans->stride * ptrans->box.height;
trans->map = malloc(ptrans->layer_stride * ptrans->box.depth);
- if (needs_load) {
+ if (usage & PIPE_TRANSFER_READ) {
vc4_load_tiled_image(trans->map, ptrans->stride,
buf + slice->offset +
ptrans->box.z * rsc->cube_map_stride,
@@ -348,9 +214,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
slice->tiling, rsc->cpp,
&ptrans->box);
}
- return (trans->map +
- box_start_x * rsc->cpp +
- box_start_y * ptrans->stride);
+ return trans->map;
} else {
ptrans->stride = slice->stride;
ptrans->layer_stride = ptrans->stride;
@@ -368,6 +232,44 @@ fail:
}
static void
+vc4_texture_subdata(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
+{
+ struct vc4_resource *rsc = vc4_resource(prsc);
+ struct vc4_resource_slice *slice = &rsc->slices[level];
+
+ /* For a direct mapping, we can just take the u_transfer path. */
+ if (!rsc->tiled ||
+ box->depth != 1 ||
+ (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
+ return u_default_texture_subdata(pctx, prsc, level, usage, box,
+ data, stride, layer_stride);
+ }
+
+ /* Otherwise, map and store the texture data directly into the tiled
+ * texture.
+ */
+ void *buf;
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+ buf = vc4_bo_map_unsynchronized(rsc->bo);
+ else
+ buf = vc4_bo_map(rsc->bo);
+
+ vc4_store_tiled_image(buf + slice->offset +
+ box->z * rsc->cube_map_stride,
+ slice->stride,
+ (void *)data, stride,
+ slice->tiling, rsc->cpp,
+ box);
+}
+
+static void
vc4_resource_destroy(struct pipe_screen *pscreen,
struct pipe_resource *prsc)
{
@@ -406,7 +308,7 @@ vc4_resource_get_handle(struct pipe_screen *pscreen,
whandle->modifier = DRM_FORMAT_MOD_LINEAR;
switch (whandle->type) {
- case DRM_API_HANDLE_TYPE_SHARED:
+ case WINSYS_HANDLE_TYPE_SHARED:
if (screen->ro) {
/* This could probably be supported, assuming that a
* control node was used for pl111.
@@ -416,12 +318,12 @@ vc4_resource_get_handle(struct pipe_screen *pscreen,
}
return vc4_bo_flink(rsc->bo, &whandle->handle);
- case DRM_API_HANDLE_TYPE_KMS:
+ case WINSYS_HANDLE_TYPE_KMS:
if (screen->ro && renderonly_get_handle(rsc->scanout, whandle))
return TRUE;
whandle->handle = rsc->bo->handle;
return TRUE;
- case DRM_API_HANDLE_TYPE_FD:
+ case WINSYS_HANDLE_TYPE_FD:
/* FDs are cross-device, so we can export directly from vc4.
*/
whandle->handle = vc4_bo_get_dmabuf(rsc->bo);
@@ -564,8 +466,10 @@ get_resource_texture_format(struct pipe_resource *prsc)
if (prsc->nr_samples > 1) {
return ~0;
} else {
- assert(format == VC4_TEXTURE_TYPE_RGBA8888);
- return VC4_TEXTURE_TYPE_RGBA32R;
+ if (format == VC4_TEXTURE_TYPE_RGBA8888)
+ return VC4_TEXTURE_TYPE_RGBA32R;
+ else
+ return ~0;
}
}
@@ -668,7 +572,15 @@ vc4_resource_create_with_modifiers(struct pipe_screen *pscreen,
goto fail;
}
- if (screen->ro && tmpl->bind & PIPE_BIND_SCANOUT) {
+ /* Set up the "scanout resource" (the dmabuf export of our buffer to
+ * the KMS handle) if the buffer might ever have
+ * resource_get_handle(WINSYS_HANDLE_TYPE_KMS) called on it.
+ * create_with_modifiers() doesn't give us usage flags, so we have to
+ * assume that all calls with modifiers are scanout-possible.
+ */
+ if (screen->ro &&
+ ((tmpl->bind & PIPE_BIND_SCANOUT) ||
+ !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
rsc->scanout =
renderonly_scanout_for_resource(prsc, screen->ro, NULL);
if (!rsc->scanout)
@@ -708,19 +620,12 @@ vc4_resource_from_handle(struct pipe_screen *pscreen,
if (!rsc)
return NULL;
- if (whandle->offset != 0) {
- fprintf(stderr,
- "Attempt to import unsupported winsys offset %u\n",
- whandle->offset);
- return NULL;
- }
-
switch (whandle->type) {
- case DRM_API_HANDLE_TYPE_SHARED:
+ case WINSYS_HANDLE_TYPE_SHARED:
rsc->bo = vc4_bo_open_name(screen,
whandle->handle, whandle->stride);
break;
- case DRM_API_HANDLE_TYPE_FD:
+ case WINSYS_HANDLE_TYPE_FD:
rsc->bo = vc4_bo_open_dmabuf(screen,
whandle->handle, whandle->stride);
break;
@@ -766,6 +671,28 @@ vc4_resource_from_handle(struct pipe_screen *pscreen,
rsc->vc4_format = get_resource_texture_format(prsc);
vc4_setup_slices(rsc, "import");
+ if (whandle->offset != 0) {
+ if (rsc->tiled) {
+ fprintf(stderr,
+ "Attempt to import unsupported "
+ "winsys offset %u\n",
+ whandle->offset);
+ goto fail;
+ }
+
+ rsc->slices[0].offset += whandle->offset;
+
+ if (rsc->slices[0].offset + rsc->slices[0].size >
+ rsc->bo->size) {
+ fprintf(stderr, "Attempt to import "
+ "with overflowing offset (%d + %d > %d)\n",
+ whandle->offset,
+ rsc->slices[0].size,
+ rsc->bo->size);
+ goto fail;
+ }
+ }
+
if (screen->ro) {
/* Make sure that renderonly has a handle to our buffer in the
* display's fd, so that a later renderonly_get_handle()
@@ -779,7 +706,7 @@ vc4_resource_from_handle(struct pipe_screen *pscreen,
goto fail;
}
- if (whandle->stride != slice->stride) {
+ if (rsc->tiled && whandle->stride != slice->stride) {
static bool warned = false;
if (!warned) {
warned = true;
@@ -792,6 +719,8 @@ vc4_resource_from_handle(struct pipe_screen *pscreen,
slice->stride);
}
goto fail;
+ } else if (!rsc->tiled) {
+ slice->stride = whandle->stride;
}
return prsc;
@@ -1187,6 +1116,14 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
return shadow_rsc;
}
+static const struct u_transfer_vtbl transfer_vtbl = {
+ .resource_create = vc4_resource_create,
+ .resource_destroy = vc4_resource_destroy,
+ .transfer_map = vc4_resource_transfer_map,
+ .transfer_unmap = vc4_resource_transfer_unmap,
+ .transfer_flush_region = u_default_transfer_flush_region,
+};
+
void
vc4_resource_screen_init(struct pipe_screen *pscreen)
{
@@ -1199,6 +1136,9 @@ vc4_resource_screen_init(struct pipe_screen *pscreen)
pscreen->resource_destroy = u_resource_destroy_vtbl;
pscreen->resource_get_handle = vc4_resource_get_handle;
pscreen->resource_destroy = vc4_resource_destroy;
+ pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
+ false, false,
+ false, true);
/* Test if the kernel has GET_TILING; it will return -EINVAL if the
* ioctl does not exist, but -ENOENT if we pass an impossible handle.
@@ -1215,11 +1155,11 @@ vc4_resource_screen_init(struct pipe_screen *pscreen)
void
vc4_resource_context_init(struct pipe_context *pctx)
{
- pctx->transfer_map = vc4_resource_transfer_map;
- pctx->transfer_flush_region = u_default_transfer_flush_region;
- pctx->transfer_unmap = vc4_resource_transfer_unmap;
+ pctx->transfer_map = u_transfer_helper_transfer_map;
+ pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
+ pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
pctx->buffer_subdata = u_default_buffer_subdata;
- pctx->texture_subdata = u_default_texture_subdata;
+ pctx->texture_subdata = vc4_texture_subdata;
pctx->create_surface = vc4_create_surface;
pctx->surface_destroy = vc4_surface_destroy;
pctx->resource_copy_region = util_resource_copy_region;
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_resource.h b/lib/mesa/src/gallium/drivers/vc4/vc4_resource.h
index d4c491e50..8c0aadbcc 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_resource.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_resource.h
@@ -32,9 +32,6 @@
struct vc4_transfer {
struct pipe_transfer base;
void *map;
-
- struct pipe_resource *ss_resource;
- struct pipe_box ss_box;
};
struct vc4_resource_slice {
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_screen.c b/lib/mesa/src/gallium/drivers/vc4/vc4_screen.c
index 9879a4db1..e7f7c82c2 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_screen.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_screen.c
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
-#include "os/os_misc.h"
+#include "util/os_misc.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
@@ -32,6 +32,8 @@
#include "util/u_memory.h"
#include "util/u_format.h"
#include "util/u_hash_table.h"
+#include "util/u_screen.h"
+#include "util/u_transfer_helper.h"
#include "util/ralloc.h"
#include <xf86drm.h>
@@ -64,7 +66,7 @@ static const struct debug_named_value debug_options[] = {
"Flush after each draw call" },
{ "always_sync", VC4_DEBUG_ALWAYS_SYNC,
"Wait for finish after each flush" },
-#if USE_VC4_SIMULATOR
+#ifdef USE_VC4_SIMULATOR
{ "dump", VC4_DEBUG_DUMP,
"Write a GPU command stream trace file" },
#endif
@@ -105,10 +107,12 @@ vc4_screen_destroy(struct pipe_screen *pscreen)
slab_destroy_parent(&screen->transfer_pool);
free(screen->ro);
-#if USE_VC4_SIMULATOR
+#ifdef USE_VC4_SIMULATOR
vc4_simulator_destroy(screen);
#endif
+ u_transfer_helper_destroy(pscreen->transfer_helper);
+
close(screen->fd);
ralloc_free(pscreen);
}
@@ -140,17 +144,15 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_SHAREABLE_SHADERS:
- case PIPE_CAP_USER_CONSTANT_BUFFERS:
- case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- case PIPE_CAP_TWO_SIDED_STENCIL:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
- case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_TEXTURE_BARRIER:
return 1;
+ case PIPE_CAP_NATIVE_FENCE_FD:
+ return screen->has_syncobj;
+
case PIPE_CAP_TILE_RASTER_ORDER:
return vc4_has_feature(screen,
DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER);
@@ -160,15 +162,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_POINT_SPRITE:
return 1;
- case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
- return 256;
-
- case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return 120;
-
- case PIPE_CAP_MAX_VIEWPORTS:
- return 1;
-
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
@@ -177,130 +170,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
return 1;
- /* Unsupported features. */
- case PIPE_CAP_ANISOTROPIC_FILTER:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
- case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
- case PIPE_CAP_CUBE_MAP_ARRAY:
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
- case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
- case PIPE_CAP_SEAMLESS_CUBE_MAP:
- case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
- case PIPE_CAP_TGSI_INSTANCEID:
- case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_COMPUTE:
- case PIPE_CAP_START_INSTANCE:
- case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
- case PIPE_CAP_SHADER_STENCIL_EXPORT:
- case PIPE_CAP_TGSI_TEXCOORD:
- case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
- case PIPE_CAP_CONDITIONAL_RENDER:
- case PIPE_CAP_PRIMITIVE_RESTART:
- case PIPE_CAP_SM3:
- case PIPE_CAP_INDEP_BLEND_ENABLE:
- case PIPE_CAP_INDEP_BLEND_FUNC:
- case PIPE_CAP_DEPTH_CLIP_DISABLE:
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
- case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
- case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
- case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
- case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
- case PIPE_CAP_TEXTURE_GATHER_SM5:
- case PIPE_CAP_FAKE_SW_MSAA:
- case PIPE_CAP_TEXTURE_QUERY_LOD:
- case PIPE_CAP_SAMPLE_SHADING:
- case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
- case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_MAX_TEXEL_OFFSET:
- case PIPE_CAP_MAX_VERTEX_STREAMS:
- case PIPE_CAP_DRAW_INDIRECT:
- case PIPE_CAP_MULTI_DRAW_INDIRECT:
- case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
- case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
- case PIPE_CAP_CLIP_HALFZ:
- case PIPE_CAP_VERTEXID_NOBASE:
- case PIPE_CAP_POLYGON_OFFSET_CLAMP:
- case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
- case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
- case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
- case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
- case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
- case PIPE_CAP_DEPTH_BOUNDS_TEST:
- case PIPE_CAP_TGSI_TXQS:
- case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
- case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
- case PIPE_CAP_CLEAR_TEXTURE:
- case PIPE_CAP_DRAW_PARAMETERS:
- case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
- case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
- case PIPE_CAP_INVALIDATE_BUFFER:
- case PIPE_CAP_GENERATE_MIPMAP:
- case PIPE_CAP_STRING_MARKER:
- case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
- case PIPE_CAP_QUERY_BUFFER_OBJECT:
- case PIPE_CAP_QUERY_MEMORY_INFO:
- case PIPE_CAP_PCI_GROUP:
- case PIPE_CAP_PCI_BUS:
- case PIPE_CAP_PCI_DEVICE:
- case PIPE_CAP_PCI_FUNCTION:
- case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
- case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
- case PIPE_CAP_CULL_DISTANCE:
- case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
- case PIPE_CAP_TGSI_VOTE:
- case PIPE_CAP_MAX_WINDOW_RECTANGLES:
- case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
- case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
- case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
- case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
- case PIPE_CAP_NATIVE_FENCE_FD:
- case PIPE_CAP_TGSI_FS_FBFETCH:
- case PIPE_CAP_TGSI_MUL_ZERO_WINS:
- case PIPE_CAP_DOUBLES:
- case PIPE_CAP_INT64:
- case PIPE_CAP_INT64_DIVMOD:
- case PIPE_CAP_TGSI_TEX_TXF_LZ:
- case PIPE_CAP_TGSI_CLOCK:
- case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
- case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
- case PIPE_CAP_TGSI_BALLOT:
- case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
- case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
- case PIPE_CAP_POST_DEPTH_COVERAGE:
- case PIPE_CAP_BINDLESS_TEXTURE:
- case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
- case PIPE_CAP_QUERY_SO_OVERFLOW:
- case PIPE_CAP_MEMOBJ:
- case PIPE_CAP_LOAD_CONSTBUF:
- case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
- return 0;
-
- /* Stream output. */
- case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
- case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
- case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
- case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
- case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
- return 0;
-
- /* Geometry shader output, unsupported. */
- case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
- case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
- return 0;
-
/* Texturing. */
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
@@ -308,35 +177,9 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
/* Note: Not supported in hardware, just faking it. */
return 5;
- case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
- return 0;
-
- /* Render targets. */
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return 1;
-
- /* Queries. */
- case PIPE_CAP_QUERY_TIME_ELAPSED:
- case PIPE_CAP_QUERY_TIMESTAMP:
- return 0;
-
- case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
- case PIPE_CAP_MIN_TEXEL_OFFSET:
- return 0;
-
- case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
- return 2048;
-
- case PIPE_CAP_ENDIANNESS:
- return PIPE_ENDIAN_LITTLE;
-
- case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
- return 64;
case PIPE_CAP_VENDOR_ID:
return 0x14E4;
- case PIPE_CAP_DEVICE_ID:
- return 0xFFFFFFFF;
case PIPE_CAP_ACCELERATED:
return 1;
case PIPE_CAP_VIDEO_MEMORY: {
@@ -351,8 +194,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 1;
default:
- fprintf(stderr, "unknown param %d\n", param);
- return 0;
+ return u_pipe_screen_get_param_defaults(pscreen, param);
}
}
@@ -372,10 +214,10 @@ vc4_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
return 0.0f;
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
return 0.0f;
- case PIPE_CAPF_GUARD_BAND_LEFT:
- case PIPE_CAPF_GUARD_BAND_TOP:
- case PIPE_CAPF_GUARD_BAND_RIGHT:
- case PIPE_CAPF_GUARD_BAND_BOTTOM:
+
+ case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
+ case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
+ case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
return 0.0f;
default:
fprintf(stderr, "unknown paramf %d\n", param);
@@ -443,13 +285,17 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen,
return PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return 0;
- case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
- return 32;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
- case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+ case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+ case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
return 0;
+ case PIPE_SHADER_CAP_SCALAR_ISA:
+ return 1;
default:
fprintf(stderr, "unknown shader param %d\n", param);
return 0;
@@ -462,16 +308,18 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
+ unsigned storage_sample_count,
unsigned usage)
{
struct vc4_screen *screen = vc4_screen(pscreen);
- unsigned retval = 0;
+
+ if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
+ return false;
if (sample_count > 1 && sample_count != VC4_MAX_SAMPLES)
return FALSE;
- if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
- !util_format_is_supported(format, usage)) {
+ if (target >= PIPE_MAX_TEXTURE_TYPES) {
return FALSE;
}
@@ -521,46 +369,36 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
case PIPE_FORMAT_R8G8B8_SSCALED:
case PIPE_FORMAT_R8G8_SSCALED:
case PIPE_FORMAT_R8_SSCALED:
- retval |= PIPE_BIND_VERTEX_BUFFER;
break;
default:
- break;
+ return FALSE;
}
}
if ((usage & PIPE_BIND_RENDER_TARGET) &&
- vc4_rt_format_supported(format)) {
- retval |= PIPE_BIND_RENDER_TARGET;
+ !vc4_rt_format_supported(format)) {
+ return FALSE;
}
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- vc4_tex_format_supported(format) &&
- (format != PIPE_FORMAT_ETC1_RGB8 || screen->has_etc1)) {
- retval |= PIPE_BIND_SAMPLER_VIEW;
+ (!vc4_tex_format_supported(format) ||
+ (format == PIPE_FORMAT_ETC1_RGB8 && !screen->has_etc1))) {
+ return FALSE;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
- (format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
- format == PIPE_FORMAT_X8Z24_UNORM)) {
- retval |= PIPE_BIND_DEPTH_STENCIL;
+ format != PIPE_FORMAT_S8_UINT_Z24_UNORM &&
+ format != PIPE_FORMAT_X8Z24_UNORM) {
+ return FALSE;
}
if ((usage & PIPE_BIND_INDEX_BUFFER) &&
- (format == PIPE_FORMAT_I8_UINT ||
- format == PIPE_FORMAT_I16_UINT)) {
- retval |= PIPE_BIND_INDEX_BUFFER;
- }
-
-#if 0
- if (retval != usage) {
- fprintf(stderr,
- "not supported: format=%s, target=%d, sample_count=%d, "
- "usage=0x%x, retval=0x%x\n", util_format_name(format),
- target, sample_count, usage, retval);
+ format != PIPE_FORMAT_I8_UINT &&
+ format != PIPE_FORMAT_I16_UINT) {
+ return FALSE;
}
-#endif
- return retval == usage;
+ return TRUE;
}
static void
@@ -659,7 +497,9 @@ struct pipe_screen *
vc4_screen_create(int fd, struct renderonly *ro)
{
struct vc4_screen *screen = rzalloc(NULL, struct vc4_screen);
+ uint64_t syncobj_cap = 0;
struct pipe_screen *pscreen;
+ int err;
pscreen = &screen->base;
@@ -690,6 +530,14 @@ vc4_screen_create(int fd, struct renderonly *ro)
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_ETC1);
screen->has_threaded_fs =
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
+ screen->has_madvise =
+ vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_MADVISE);
+ screen->has_perfmon_ioctl =
+ vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_PERFMON);
+
+ err = drmGetCap(fd, DRM_CAP_SYNCOBJ, &syncobj_cap);
+ if (err == 0 && syncobj_cap)
+ screen->has_syncobj = true;
if (!vc4_get_chip_info(screen))
goto fail;
@@ -698,13 +546,13 @@ vc4_screen_create(int fd, struct renderonly *ro)
slab_create_parent(&screen->transfer_pool, sizeof(struct vc4_transfer), 16);
- vc4_fence_init(screen);
+ vc4_fence_screen_init(screen);
vc4_debug = debug_get_option_vc4_debug();
if (vc4_debug & VC4_DEBUG_SHADERDB)
vc4_debug |= VC4_DEBUG_NORAST;
-#if USE_VC4_SIMULATOR
+#ifdef USE_VC4_SIMULATOR
vc4_simulator_init(screen);
#endif
@@ -716,6 +564,11 @@ vc4_screen_create(int fd, struct renderonly *ro)
pscreen->get_compiler_options = vc4_screen_get_compiler_options;
pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers;
+ if (screen->has_perfmon_ioctl) {
+ pscreen->get_driver_query_group_info = vc4_get_driver_query_group_info;
+ pscreen->get_driver_query_info = vc4_get_driver_query_info;
+ }
+
return pscreen;
fail:
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_screen.h b/lib/mesa/src/gallium/drivers/vc4/vc4_screen.h
index 85108219e..f4550d1c2 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_screen.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_screen.h
@@ -95,7 +95,10 @@ struct vc4_screen {
bool has_control_flow;
bool has_etc1;
bool has_threaded_fs;
+ bool has_madvise;
bool has_tiling_ioctl;
+ bool has_perfmon_ioctl;
+ bool has_syncobj;
struct vc4_simulator_file *sim_file;
};
@@ -116,9 +119,9 @@ vc4_screen_get_compiler_options(struct pipe_screen *pscreen,
extern uint32_t vc4_debug;
void
-vc4_fence_init(struct vc4_screen *screen);
+vc4_fence_screen_init(struct vc4_screen *screen);
struct vc4_fence *
-vc4_fence_create(struct vc4_screen *screen, uint64_t seqno);
+vc4_fence_create(struct vc4_screen *screen, uint64_t seqno, int fd);
#endif /* VC4_SCREEN_H */
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c b/lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c
index a73e40969..37c098a04 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c
@@ -619,6 +619,11 @@ vc4_simulator_get_param_ioctl(int fd, struct drm_vc4_get_param *args)
args->value = true;
return 0;
+ case DRM_VC4_PARAM_SUPPORTS_MADVISE:
+ case DRM_VC4_PARAM_SUPPORTS_PERFMON:
+ errno = -EINVAL;
+ return -1;
+
case DRM_VC4_PARAM_V3D_IDENT0:
args->value = 0x02000000;
return 0;
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_state.c b/lib/mesa/src/gallium/drivers/vc4/vc4_state.c
index ed8d404a4..1e4657a79 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_state.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_state.c
@@ -23,6 +23,7 @@
*/
#include "pipe/p_state.h"
+#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -386,8 +387,6 @@ vc4_set_constant_buffer(struct pipe_context *pctx,
struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_constbuf_stateobj *so = &vc4->constbuf[shader];
- assert(index == 0);
-
/* Note that the state tracker can unbind constant buffers by
* passing NULL here.
*/
@@ -397,7 +396,10 @@ vc4_set_constant_buffer(struct pipe_context *pctx,
return;
}
- assert(!cb->buffer);
+ if (index == 1 && so->cb[index].buffer_size != cb->buffer_size)
+ vc4->dirty |= VC4_DIRTY_UBO_1_SIZE;
+
+ pipe_resource_reference(&so->cb[index].buffer, cb->buffer);
so->cb[index].buffer_offset = cb->buffer_offset;
so->cb[index].buffer_size = cb->buffer_size;
so->cb[index].user_buffer = cb->user_buffer;
@@ -413,21 +415,10 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
{
struct vc4_context *vc4 = vc4_context(pctx);
struct pipe_framebuffer_state *cso = &vc4->framebuffer;
- unsigned i;
vc4->job = NULL;
- for (i = 0; i < framebuffer->nr_cbufs; i++)
- pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
- for (; i < vc4->framebuffer.nr_cbufs; i++)
- pipe_surface_reference(&cso->cbufs[i], NULL);
-
- cso->nr_cbufs = framebuffer->nr_cbufs;
-
- pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);
-
- cso->width = framebuffer->width;
- cso->height = framebuffer->height;
+ util_copy_framebuffer_state(cso, framebuffer);
/* Nonzero texture mipmap levels are laid out as if they were in
* power-of-two-sized spaces. The renderbuffer config infers its
@@ -567,8 +558,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
so->base = *cso;
- pipe_reference(NULL, &prsc->reference);
- so->base.texture = prsc;
+ so->base.texture = NULL;
+ pipe_resource_reference(&so->base.texture, prsc);
so->base.reference.count = 1;
so->base.context = pctx;
@@ -581,14 +572,20 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
*/
if ((cso->u.tex.first_level &&
(cso->u.tex.first_level != cso->u.tex.last_level)) ||
- rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
+ rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R ||
+ rsc->vc4_format == ~0) {
struct vc4_resource *shadow_parent = rsc;
- struct pipe_resource tmpl = *prsc;
-
- tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
- tmpl.width0 = u_minify(tmpl.width0, cso->u.tex.first_level);
- tmpl.height0 = u_minify(tmpl.height0, cso->u.tex.first_level);
- tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;
+ struct pipe_resource tmpl = {
+ .target = prsc->target,
+ .format = prsc->format,
+ .width0 = u_minify(prsc->width0,
+ cso->u.tex.first_level),
+ .height0 = u_minify(prsc->height0,
+ cso->u.tex.first_level),
+ .bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
+ .last_level = cso->u.tex.last_level - cso->u.tex.first_level,
+ .nr_samples = prsc->nr_samples,
+ };
/* Create the shadow texture. The rest of the texture
* parameter setup will use the shadow.
@@ -617,7 +614,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
}
so->texture_p0 =
- (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
+ (VC4_SET_FIELD((rsc->slices[0].offset +
+ cso->u.tex.first_layer *
+ rsc->cube_map_stride) >> 12, VC4_TEX_P0_OFFSET) |
VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
VC4_SET_FIELD(so->force_first_level ?
cso->u.tex.last_level :
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_tiling.c b/lib/mesa/src/gallium/drivers/vc4/vc4_tiling.c
index 07e1c9c5f..2da520eb4 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_tiling.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_tiling.c
@@ -63,15 +63,6 @@ vc4_size_is_lt(uint32_t width, uint32_t height, int cpp)
height <= 4 * vc4_utile_height(cpp));
}
-static void
-check_box_utile_alignment(const struct pipe_box *box, int cpp)
-{
- assert(!(box->x & (vc4_utile_width(cpp) - 1)));
- assert(!(box->y & (vc4_utile_height(cpp) - 1)));
- assert(!(box->width & (vc4_utile_width(cpp) - 1)));
- assert(!(box->height & (vc4_utile_height(cpp) - 1)));
-}
-
/**
* Takes a utile x and y (and the number of utiles of width of the image) and
* returns the offset to the utile within a VC4_TILING_FORMAT_TF image.
@@ -216,8 +207,6 @@ vc4_load_tiled_image(void *dst, uint32_t dst_stride,
uint8_t tiling_format, int cpp,
const struct pipe_box *box)
{
- check_box_utile_alignment(box, cpp);
-
if (tiling_format == VC4_TILING_FORMAT_LT) {
vc4_load_lt_image(dst, dst_stride,
src, src_stride,
@@ -240,8 +229,6 @@ vc4_store_tiled_image(void *dst, uint32_t dst_stride,
uint8_t tiling_format, int cpp,
const struct pipe_box *box)
{
- check_box_utile_alignment(box, cpp);
-
if (tiling_format == VC4_TILING_FORMAT_LT) {
vc4_store_lt_image(dst, dst_stride,
src, src_stride,
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c b/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c
index 4a76c0ff7..ec42a3dc2 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c
@@ -41,6 +41,12 @@
#define NEON_TAG(x) x ## _base
#endif
+static inline uint32_t
+align_down(uint32_t val, uint32_t align)
+{
+ return val & ~(align - 1);
+}
+
/** Returns the stride in bytes of a 64-byte microtile. */
static uint32_t
vc4_utile_stride(int cpp)
@@ -252,11 +258,78 @@ vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp)
#endif
}
+/**
+ * Returns the X value into the address bits for LT tiling.
+ *
+ * The LT tile load/stores rely on the X bits not intersecting with the Y
+ * bits. Because of this, we have to choose to put the utile index within the
+ * LT tile into one of the two values, and we do so in swizzle_lt_x() to make
+ * NPOT handling easier.
+ */
+static uint32_t
+swizzle_lt_x(int x, int cpp)
+{
+ switch (cpp) {
+ case 1:
+ /* 8x8 inside of 4x4 */
+ return ((x & 0x7) << (0 - 0) |
+ (x & ~0x7) << (6 - 3));
+ case 2:
+ /* 8x4 inside of 4x4 */
+ return ((x & 0x7) << (1 - 0) |
+ (x & ~0x7) << (6 - 3));
+ case 4:
+ /* 4x4 inside of 4x4 */
+ return ((x & 0x3) << (2 - 0) |
+ (x & ~0x3) << (6 - 2));
+ case 8:
+ /* 2x4 inside of 4x4 */
+ return ((x & 0x1) << (3 - 0) |
+ (x & ~0x1) << (6 - 1));
+ default:
+ unreachable("bad cpp");
+ }
+}
-void
-NEON_TAG(vc4_load_lt_image)(void *dst, uint32_t dst_stride,
- void *src, uint32_t src_stride,
- int cpp, const struct pipe_box *box)
+/**
+ * Returns the Y value into the address bits for LT tiling.
+ *
+ * The LT tile load/stores rely on the X bits not intersecting with the Y
+ * bits.
+ */
+static uint32_t
+swizzle_lt_y(int y, int cpp)
+{
+
+ switch (cpp) {
+ case 1:
+ /* 8x8 inside of 4x4 */
+ return ((y & 0x7) << 3);
+ case 2:
+ /* 8x4 inside of 4x4 */
+ return ((y & 0x3) << 4);
+ case 4:
+ /* 4x4 inside of 4x4 */
+ return ((y & 0x3) << 4);
+ case 8:
+ /* 2x4 inside of 4x4 */
+ return ((y & 0x3) << 4);
+ default:
+ unreachable("bad cpp");
+ }
+}
+
+/**
+ * Helper for loading or storing to an LT image, where the box is aligned
+ * to utiles.
+ *
+ * This just breaks the box down into calls to the fast
+ * vc4_load_utile/vc4_store_utile helpers.
+ */
+static inline void
+vc4_lt_image_aligned(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, const struct pipe_box *box, bool to_cpu)
{
uint32_t utile_w = vc4_utile_width(cpp);
uint32_t utile_h = vc4_utile_height(cpp);
@@ -264,33 +337,149 @@ NEON_TAG(vc4_load_lt_image)(void *dst, uint32_t dst_stride,
uint32_t ystart = box->y;
for (uint32_t y = 0; y < box->height; y += utile_h) {
- for (int x = 0; x < box->width; x += utile_w) {
- vc4_load_utile(dst + (dst_stride * y +
- x * cpp),
- src + ((ystart + y) * src_stride +
- (xstart + x) * 64 / utile_w),
- dst_stride, cpp);
+ for (uint32_t x = 0; x < box->width; x += utile_w) {
+ void *gpu_tile = gpu + ((ystart + y) * gpu_stride +
+ (xstart + x) * 64 / utile_w);
+ if (to_cpu) {
+ vc4_load_utile(cpu + (cpu_stride * y +
+ x * cpp),
+ gpu_tile,
+ cpu_stride, cpp);
+ } else {
+ vc4_store_utile(gpu_tile,
+ cpu + (cpu_stride * y +
+ x * cpp),
+ cpu_stride, cpp);
+ }
+ }
+ }
+}
+
+/**
+ * Helper for loading or storing to an LT image, where the box is not aligned
+ * to utiles.
+ *
+ * This walks through the raster-order data, copying to/from the corresponding
+ * tiled pixel. This means we don't get write-combining on stores, but the
+ * loop is very few CPU instructions since the memcpy will be inlined.
+ */
+static inline void
+vc4_lt_image_unaligned(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, const struct pipe_box *box, bool to_cpu)
+{
+
+ /* These are the address bits for the start of the box, split out into
+ * x/y so that they can be incremented separately in their loops.
+ */
+ uint32_t offs_x0 = swizzle_lt_x(box->x, cpp);
+ uint32_t offs_y = swizzle_lt_y(box->y, cpp);
+ /* The *_mask values are "what bits of the address are from x or y" */
+ uint32_t x_mask = swizzle_lt_x(~0, cpp);
+ uint32_t y_mask = swizzle_lt_y(~0, cpp);
+ uint32_t incr_y = swizzle_lt_x(gpu_stride / cpp, cpp);
+
+ assert(!(x_mask & y_mask));
+
+ offs_x0 += incr_y * (box->y / vc4_utile_height(cpp));
+
+ for (uint32_t y = 0; y < box->height; y++) {
+ void *gpu_row = gpu + offs_y;
+
+ uint32_t offs_x = offs_x0;
+
+ for (uint32_t x = 0; x < box->width; x++) {
+ /* Use a memcpy here to move a pixel's worth of data.
+ * We're relying on this function to be inlined, so
+ * this will get expanded into the appropriate 1, 2,
+ * or 4-byte move.
+ */
+ if (to_cpu) {
+ memcpy(cpu + x * cpp, gpu_row + offs_x, cpp);
+ } else {
+ memcpy(gpu_row + offs_x, cpu + x * cpp, cpp);
+ }
+
+ /* This math trick with x_mask increments offs_x by 1
+ * in x.
+ */
+ offs_x = (offs_x - x_mask) & x_mask;
}
+
+ offs_y = (offs_y - y_mask) & y_mask;
+ /* When offs_y wraps (we hit the end of the utile), we
+ * increment offs_x0 by effectively the utile stride.
+ */
+ if (!offs_y)
+ offs_x0 += incr_y;
+
+ cpu += cpu_stride;
+ }
+}
+
+/**
+ * General LT image load/store helper.
+ */
+static inline void
+vc4_lt_image_helper(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, const struct pipe_box *box, bool to_cpu)
+{
+ if (box->x & (vc4_utile_width(cpp) - 1) ||
+ box->y & (vc4_utile_height(cpp) - 1) ||
+ box->width & (vc4_utile_width(cpp) - 1) ||
+ box->height & (vc4_utile_height(cpp) - 1)) {
+ vc4_lt_image_unaligned(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, box, to_cpu);
+ } else {
+ vc4_lt_image_aligned(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, box, to_cpu);
+ }
+}
+
+static inline void
+vc4_lt_image_cpp_helper(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, const struct pipe_box *box, bool to_cpu)
+{
+ switch (cpp) {
+ case 1:
+ vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 1, box,
+ to_cpu);
+ break;
+ case 2:
+ vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 2, box,
+ to_cpu);
+ break;
+ case 4:
+ vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 4, box,
+ to_cpu);
+ break;
+ case 8:
+ vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 8, box,
+ to_cpu);
+ break;
+ default:
+ unreachable("bad cpp");
}
}
void
+NEON_TAG(vc4_load_lt_image)(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ int cpp, const struct pipe_box *box)
+{
+ vc4_lt_image_cpp_helper(src, src_stride, dst, dst_stride, cpp, box,
+ true);
+}
+
+void
NEON_TAG(vc4_store_lt_image)(void *dst, uint32_t dst_stride,
void *src, uint32_t src_stride,
int cpp, const struct pipe_box *box)
{
- uint32_t utile_w = vc4_utile_width(cpp);
- uint32_t utile_h = vc4_utile_height(cpp);
- uint32_t xstart = box->x;
- uint32_t ystart = box->y;
-
- for (uint32_t y = 0; y < box->height; y += utile_h) {
- for (int x = 0; x < box->width; x += utile_w) {
- vc4_store_utile(dst + ((ystart + y) * dst_stride +
- (xstart + x) * 64 / utile_w),
- src + (src_stride * y +
- x * cpp),
- src_stride, cpp);
- }
- }
+ vc4_lt_image_cpp_helper(dst, dst_stride, src, src_stride, cpp, box,
+ false);
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_uniforms.c b/lib/mesa/src/gallium/drivers/vc4/vc4_uniforms.c
index 12e6504bb..3801fbc8f 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -224,14 +224,16 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
uinfo->num_texture_samples);
for (int i = 0; i < uinfo->count; i++) {
+ enum quniform_contents contents = uinfo->contents[i];
+ uint32_t data = uinfo->data[i];
- switch (uinfo->contents[i]) {
+ switch (contents) {
case QUNIFORM_CONSTANT:
- cl_aligned_u32(&uniforms, uinfo->data[i]);
+ cl_aligned_u32(&uniforms, data);
break;
case QUNIFORM_UNIFORM:
cl_aligned_u32(&uniforms,
- gallium_uniforms[uinfo->data[i]]);
+ gallium_uniforms[data]);
break;
case QUNIFORM_VIEWPORT_X_SCALE:
cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f);
@@ -249,41 +251,49 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
case QUNIFORM_USER_CLIP_PLANE:
cl_aligned_f(&uniforms,
- vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+ vc4->clip.ucp[data / 4][data % 4]);
break;
case QUNIFORM_TEXTURE_CONFIG_P0:
- write_texture_p0(job, &uniforms, texstate,
- uinfo->data[i]);
+ write_texture_p0(job, &uniforms, texstate, data);
break;
case QUNIFORM_TEXTURE_CONFIG_P1:
- write_texture_p1(job, &uniforms, texstate,
- uinfo->data[i]);
+ write_texture_p1(job, &uniforms, texstate, data);
break;
case QUNIFORM_TEXTURE_CONFIG_P2:
- write_texture_p2(job, &uniforms, texstate,
- uinfo->data[i]);
+ write_texture_p2(job, &uniforms, texstate, data);
break;
case QUNIFORM_TEXTURE_FIRST_LEVEL:
write_texture_first_level(job, &uniforms, texstate,
- uinfo->data[i]);
+ data);
break;
case QUNIFORM_UBO_ADDR:
- cl_aligned_reloc(job, &job->uniforms, &uniforms, ubo, 0);
+ if (data == 0) {
+ cl_aligned_reloc(job, &job->uniforms,
+ &uniforms, ubo, 0);
+ } else {
+ struct pipe_constant_buffer *c =
+ &cb->cb[data];
+ struct vc4_resource *rsc =
+ vc4_resource(c->buffer);
+
+ cl_aligned_reloc(job, &job->uniforms,
+ &uniforms,
+ rsc->bo, c->buffer_offset);
+ }
break;
case QUNIFORM_TEXTURE_MSAA_ADDR:
- write_texture_msaa_addr(job, &uniforms,
- texstate, uinfo->data[i]);
+ write_texture_msaa_addr(job, &uniforms, texstate, data);
break;
case QUNIFORM_TEXTURE_BORDER_COLOR:
write_texture_border_color(job, &uniforms,
- texstate, uinfo->data[i]);
+ texstate, data);
break;
case QUNIFORM_TEXRECT_SCALE_X:
@@ -291,7 +301,7 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
cl_aligned_u32(&uniforms,
get_texrect_scale(texstate,
uinfo->contents[i],
- uinfo->data[i]));
+ data));
break;
case QUNIFORM_BLEND_CONST_COLOR_X:
@@ -330,9 +340,9 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
case QUNIFORM_STENCIL:
cl_aligned_u32(&uniforms,
- vc4->zsa->stencil_uniforms[uinfo->data[i]] |
- (uinfo->data[i] <= 1 ?
- (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+ vc4->zsa->stencil_uniforms[data] |
+ (data <= 1 ?
+ (vc4->stencil_ref.ref_value[data] << 8) :
0));
break;
@@ -350,11 +360,18 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
cl_aligned_u32(&uniforms, 0xd0d0d0d0);
break;
}
-#if 0
- uint32_t written_val = *((uint32_t *)uniforms - 1);
- fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
- shader, i, written_val, uif(written_val));
-#endif
+
+ if (false) {
+ uint32_t written_val = *((uint32_t *)uniforms - 1);
+ char *desc = qir_describe_uniform(uinfo->contents[i],
+ uinfo->data[i],
+ gallium_uniforms);
+
+ fprintf(stderr, "%p/%d: 0x%08x %s\n",
+ shader, i, written_val, desc);
+
+ ralloc_free(desc);
+ }
}
cl_end(&job->uniforms, uniforms);