summaryrefslogtreecommitdiff
path: root/dist
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2013-09-05 13:11:20 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2013-09-05 13:11:20 +0000
commita0d72f02a5edb8dd5aa760de1e891e5e8b5a379c (patch)
tree240b6a20bd3b74779bf2eb59fb6a5879c415a730 /dist
parentd8ca9df6585400b918b964c60d84c35599edb86a (diff)
Import Mesa 9.2.0
Diffstat (limited to 'dist')
-rw-r--r--dist/Mesa/src/gallium/drivers/Makefile.am43
-rw-r--r--dist/Mesa/src/gallium/drivers/Makefile.in488
-rw-r--r--dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c301
-rw-r--r--dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c2545
-rw-r--r--dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h9
-rw-r--r--dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h215
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c132
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h4
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c64
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h119
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c29
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c933
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h19
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c697
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h26
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c3454
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h2971
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c1567
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h1551
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h6
-rw-r--r--dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h4
21 files changed, 7318 insertions, 7859 deletions
diff --git a/dist/Mesa/src/gallium/drivers/Makefile.am b/dist/Mesa/src/gallium/drivers/Makefile.am
index f8baa3cf9..22f54b7ad 100644
--- a/dist/Mesa/src/gallium/drivers/Makefile.am
+++ b/dist/Mesa/src/gallium/drivers/Makefile.am
@@ -1,7 +1,46 @@
AUTOMAKE_OPTIONS = subdir-objects
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ $(DEFINES)
-SUBDIRS = . galahad identity noop trace rbug
+AM_CFLAGS = $(VISIBILITY_CFLAGS)
+
+noinst_LTLIBRARIES =
+
+SUBDIRS = . trace rbug
+
+################################################################################
+
+noinst_LTLIBRARIES += galahad/libgalahad.la
+
+galahad_libgalahad_la_SOURCES = \
+ galahad/glhd_objects.c \
+ galahad/glhd_context.c \
+ galahad/glhd_screen.c
+
+################################################################################
+
+noinst_LTLIBRARIES += identity/libidentity.la
+
+identity_libidentity_la_SOURCES = \
+ identity/id_objects.c \
+ identity/id_context.c \
+ identity/id_screen.c
+
+################################################################################
+
+# Meta-driver which combines whichever software rasterizers have been
+# built into a single convenience library.
+
+noinst_LTLIBRARIES += noop/libnoop.la
+
+noop_libnoop_la_SOURCES = \
+ noop/noop_pipe.c \
+ noop/noop_state.c
################################################################################
@@ -47,7 +86,7 @@ endif
if HAVE_GALLIUM_NOUVEAU
-SUBDIRS += nouveau
+SUBDIRS += nouveau nv30 nv50 nvc0
endif
diff --git a/dist/Mesa/src/gallium/drivers/Makefile.in b/dist/Mesa/src/gallium/drivers/Makefile.in
index b4cb7b57c..654fd7dba 100644
--- a/dist/Mesa/src/gallium/drivers/Makefile.in
+++ b/dist/Mesa/src/gallium/drivers/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# Makefile.in generated by automake 1.12.2 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -13,52 +13,25 @@
# PARTICULAR PURPOSE.
@SET_MAKE@
+
VPATH = @srcdir@
-am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
-am__make_running_with_option = \
- case $${target_option-} in \
- ?) ;; \
- *) echo "am__make_running_with_option: internal error: invalid" \
- "target option '$${target_option-}' specified" >&2; \
- exit 1;; \
- esac; \
- has_opt=no; \
- sane_makeflags=$$MAKEFLAGS; \
- if $(am__is_gnu_make); then \
- sane_makeflags=$$MFLAGS; \
- else \
+am__make_dryrun = \
+ { \
+ am__dry=no; \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
- bs=\\; \
- sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
- | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
- esac; \
- fi; \
- skip_next=no; \
- strip_trailopt () \
- { \
- flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
- }; \
- for flg in $$sane_makeflags; do \
- test $$skip_next = yes && { skip_next=no; continue; }; \
- case $$flg in \
- *=*|--*) continue;; \
- -*I) strip_trailopt 'I'; skip_next=yes;; \
- -*I?*) strip_trailopt 'I';; \
- -*O) strip_trailopt 'O'; skip_next=yes;; \
- -*O?*) strip_trailopt 'O';; \
- -*l) strip_trailopt 'l'; skip_next=yes;; \
- -*l?*) strip_trailopt 'l';; \
- -[dEDm]) skip_next=yes;; \
- -[JT]) skip_next=yes;; \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
esac; \
- case $$flg in \
- *$$target_option*) has_opt=yes; break;; \
- esac; \
- done; \
- test $$has_opt = yes
-am__make_dryrun = (target_option=n; $(am__make_running_with_option))
-am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
@@ -93,7 +66,7 @@ target_triplet = @target@
@HAVE_GALLIUM_ILO_TRUE@am__append_5 = ilo
################################################################################
-@HAVE_GALLIUM_NOUVEAU_TRUE@am__append_6 = nouveau
+@HAVE_GALLIUM_NOUVEAU_TRUE@am__append_6 = nouveau nv30 nv50 nvc0
################################################################################
@HAVE_GALLIUM_SVGA_TRUE@am__append_7 = svga
@@ -113,21 +86,41 @@ target_triplet = @target@
################################################################################
@NEED_GALLIUM_LLVMPIPE_DRIVER_TRUE@am__append_12 = llvmpipe
subdir = src/gallium/drivers
-DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+ $(top_srcdir)/bin/depcomp
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
- $(top_srcdir)/m4/ax_gcc_builtin.m4 \
- $(top_srcdir)/m4/ax_prog_bison.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+ $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+ $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
$(top_srcdir)/m4/ax_prog_flex.m4 \
- $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
- $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
- $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
- $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
+ $(top_srcdir)/m4/ax_pthread.m4 \
+ $(top_srcdir)/m4/ax_python_module.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+galahad_libgalahad_la_LIBADD =
+am__dirstamp = $(am__leading_dot)dirstamp
+am_galahad_libgalahad_la_OBJECTS = galahad/glhd_objects.lo \
+ galahad/glhd_context.lo galahad/glhd_screen.lo
+galahad_libgalahad_la_OBJECTS = $(am_galahad_libgalahad_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+identity_libidentity_la_LIBADD =
+am_identity_libidentity_la_OBJECTS = identity/id_objects.lo \
+ identity/id_context.lo identity/id_screen.lo
+identity_libidentity_la_OBJECTS = \
+ $(am_identity_libidentity_la_OBJECTS)
+noop_libnoop_la_LIBADD =
+am_noop_libnoop_la_OBJECTS = noop/noop_pipe.lo noop/noop_state.lo
+noop_libnoop_la_OBJECTS = $(am_noop_libnoop_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
@@ -140,16 +133,39 @@ AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
-SOURCES =
-DIST_SOURCES =
-RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
- ctags-recursive dvi-recursive html-recursive info-recursive \
- install-data-recursive install-dvi-recursive \
- install-exec-recursive install-html-recursive \
- install-info-recursive install-pdf-recursive \
- install-ps-recursive install-recursive installcheck-recursive \
- installdirs-recursive pdf-recursive ps-recursive \
- tags-recursive uninstall-recursive
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(galahad_libgalahad_la_SOURCES) \
+ $(identity_libidentity_la_SOURCES) $(noop_libnoop_la_SOURCES)
+DIST_SOURCES = $(galahad_libgalahad_la_SOURCES) \
+ $(identity_libidentity_la_SOURCES) $(noop_libnoop_la_SOURCES)
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+ html-recursive info-recursive install-data-recursive \
+ install-dvi-recursive install-exec-recursive \
+ install-html-recursive install-info-recursive \
+ install-pdf-recursive install-ps-recursive install-recursive \
+ installcheck-recursive installdirs-recursive pdf-recursive \
+ ps-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
@@ -157,33 +173,13 @@ am__can_run_installinfo = \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
-am__recursive_targets = \
- $(RECURSIVE_TARGETS) \
- $(RECURSIVE_CLEAN_TARGETS) \
- $(am__extra_recursive_targets)
-AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
+ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
distdir
-am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
-# Read a list of newline-separated strings from the standard input,
-# and print each of them once, without duplicates. Input order is
-# *not* preserved.
-am__uniquify_input = $(AWK) '\
- BEGIN { nonempty = 0; } \
- { items[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in items) print i; }; } \
-'
-# Make sure the list of sources is unique. This is necessary because,
-# e.g., the same source file might be shared among _SOURCES variables
-# for different programs/libraries.
-am__define_uniq_tagged_files = \
- list='$(am__tagged_files)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
-DIST_SUBDIRS = . galahad identity noop trace rbug radeon freedreno \
- i915 ilo nouveau svga r300 r600 radeonsi softpipe llvmpipe
+DIST_SUBDIRS = . trace rbug radeon freedreno i915 ilo nouveau nv30 \
+ nv50 nvc0 svga r300 r600 radeonsi softpipe llvmpipe
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
@@ -218,30 +214,39 @@ AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
CLOCK_LIB = @CLOCK_LIB@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
CXX = @CXX@
CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
CYGPATH_W = @CYGPATH_W@
DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DLOPEN_LIBS = @DLOPEN_LIBS@
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
-DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@
-DRI3PROTO_LIBS = @DRI3PROTO_LIBS@
DRIGL_CFLAGS = @DRIGL_CFLAGS@
DRIGL_LIBS = @DRIGL_LIBS@
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
@@ -257,33 +262,41 @@ EGL_CFLAGS = @EGL_CFLAGS@
EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
EGREP = @EGREP@
ELF_LIB = @ELF_LIB@
EXEEXT = @EXEEXT@
-EXPAT_CFLAGS = @EXPAT_CFLAGS@
-EXPAT_LIBS = @EXPAT_LIBS@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
FGREP = @FGREP@
FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
FREEDRENO_LIBS = @FREEDRENO_LIBS@
-GALLIUM_PIPE_LOADER_CLIENT_DEFINES = @GALLIUM_PIPE_LOADER_CLIENT_DEFINES@
-GALLIUM_PIPE_LOADER_CLIENT_LIBS = @GALLIUM_PIPE_LOADER_CLIENT_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
-GC_SECTIONS = @GC_SECTIONS@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
GLPROTO_LIBS = @GLPROTO_LIBS@
GLX_TLS = @GLX_TLS@
GL_LIB = @GL_LIB@
GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
GL_PC_CFLAGS = @GL_PC_CFLAGS@
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
@@ -300,7 +313,7 @@ INTEL_CFLAGS = @INTEL_CFLAGS@
INTEL_LIBS = @INTEL_LIBS@
LD = @LD@
LDFLAGS = @LDFLAGS@
-LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
@@ -308,13 +321,16 @@ LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
LIBUDEV_LIBS = @LIBUDEV_LIBS@
LIB_DIR = @LIB_DIR@
-LIB_EXT = @LIB_EXT@
LIPO = @LIPO@
LLVM_BINDIR = @LLVM_BINDIR@
LLVM_CFLAGS = @LLVM_CFLAGS@
@@ -339,13 +355,10 @@ NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
NOUVEAU_LIBS = @NOUVEAU_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
-OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
OSMESA_LIB = @OSMESA_LIB@
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
OSMESA_PC_REQ = @OSMESA_PC_REQ@
@@ -365,8 +378,6 @@ PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
POSIX_SHELL = @POSIX_SHELL@
-PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@
-PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@
PTHREAD_CC = @PTHREAD_CC@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
@@ -375,7 +386,6 @@ RADEON_CFLAGS = @RADEON_CFLAGS@
RADEON_LIBS = @RADEON_LIBS@
RANLIB = @RANLIB@
SED = @SED@
-SELINUX_CFLAGS = @SELINUX_CFLAGS@
SELINUX_LIBS = @SELINUX_LIBS@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
@@ -387,6 +397,8 @@ VDPAU_MAJOR = @VDPAU_MAJOR@
VDPAU_MINOR = @VDPAU_MINOR@
VERSION = @VERSION@
VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
@@ -400,10 +412,15 @@ XA_TINY = @XA_TINY@
XA_VERSION = @XA_VERSION@
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
XVMC_CFLAGS = @XVMC_CFLAGS@
XVMC_LIBS = @XVMC_LIBS@
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
@@ -417,7 +434,9 @@ abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
@@ -470,14 +489,47 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = subdir-objects
-SUBDIRS = . galahad identity noop trace rbug $(am__append_1) \
- $(am__append_2) $(am__append_3) $(am__append_4) \
- $(am__append_5) $(am__append_6) $(am__append_7) \
- $(am__append_8) $(am__append_9) $(am__append_10) \
- $(am__append_11) $(am__append_12)
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ $(DEFINES)
+
+AM_CFLAGS = $(VISIBILITY_CFLAGS)
+
+################################################################################
+
+################################################################################
+
+################################################################################
+
+# Meta-driver which combines whichever software rasterizers have been
+# built into a single convenience library.
+noinst_LTLIBRARIES = galahad/libgalahad.la identity/libidentity.la \
+ noop/libnoop.la
+SUBDIRS = . trace rbug $(am__append_1) $(am__append_2) $(am__append_3) \
+ $(am__append_4) $(am__append_5) $(am__append_6) \
+ $(am__append_7) $(am__append_8) $(am__append_9) \
+ $(am__append_10) $(am__append_11) $(am__append_12)
+galahad_libgalahad_la_SOURCES = \
+ galahad/glhd_objects.c \
+ galahad/glhd_context.c \
+ galahad/glhd_screen.c
+
+identity_libidentity_la_SOURCES = \
+ identity/id_objects.c \
+ identity/id_context.c \
+ identity/id_screen.c
+
+noop_libnoop_la_SOURCES = \
+ noop/noop_pipe.c \
+ noop/noop_state.c
+
all: all-recursive
.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
@@ -509,11 +561,109 @@ $(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+galahad/$(am__dirstamp):
+ @$(MKDIR_P) galahad
+ @: > galahad/$(am__dirstamp)
+galahad/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) galahad/$(DEPDIR)
+ @: > galahad/$(DEPDIR)/$(am__dirstamp)
+galahad/glhd_objects.lo: galahad/$(am__dirstamp) \
+ galahad/$(DEPDIR)/$(am__dirstamp)
+galahad/glhd_context.lo: galahad/$(am__dirstamp) \
+ galahad/$(DEPDIR)/$(am__dirstamp)
+galahad/glhd_screen.lo: galahad/$(am__dirstamp) \
+ galahad/$(DEPDIR)/$(am__dirstamp)
+galahad/libgalahad.la: $(galahad_libgalahad_la_OBJECTS) $(galahad_libgalahad_la_DEPENDENCIES) $(EXTRA_galahad_libgalahad_la_DEPENDENCIES) galahad/$(am__dirstamp)
+ $(AM_V_CCLD)$(LINK) $(galahad_libgalahad_la_OBJECTS) $(galahad_libgalahad_la_LIBADD) $(LIBS)
+identity/$(am__dirstamp):
+ @$(MKDIR_P) identity
+ @: > identity/$(am__dirstamp)
+identity/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) identity/$(DEPDIR)
+ @: > identity/$(DEPDIR)/$(am__dirstamp)
+identity/id_objects.lo: identity/$(am__dirstamp) \
+ identity/$(DEPDIR)/$(am__dirstamp)
+identity/id_context.lo: identity/$(am__dirstamp) \
+ identity/$(DEPDIR)/$(am__dirstamp)
+identity/id_screen.lo: identity/$(am__dirstamp) \
+ identity/$(DEPDIR)/$(am__dirstamp)
+identity/libidentity.la: $(identity_libidentity_la_OBJECTS) $(identity_libidentity_la_DEPENDENCIES) $(EXTRA_identity_libidentity_la_DEPENDENCIES) identity/$(am__dirstamp)
+ $(AM_V_CCLD)$(LINK) $(identity_libidentity_la_OBJECTS) $(identity_libidentity_la_LIBADD) $(LIBS)
+noop/$(am__dirstamp):
+ @$(MKDIR_P) noop
+ @: > noop/$(am__dirstamp)
+noop/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) noop/$(DEPDIR)
+ @: > noop/$(DEPDIR)/$(am__dirstamp)
+noop/noop_pipe.lo: noop/$(am__dirstamp) noop/$(DEPDIR)/$(am__dirstamp)
+noop/noop_state.lo: noop/$(am__dirstamp) \
+ noop/$(DEPDIR)/$(am__dirstamp)
+noop/libnoop.la: $(noop_libnoop_la_OBJECTS) $(noop_libnoop_la_DEPENDENCIES) $(EXTRA_noop_libnoop_la_DEPENDENCIES) noop/$(am__dirstamp)
+ $(AM_V_CCLD)$(LINK) $(noop_libnoop_la_OBJECTS) $(noop_libnoop_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+ -rm -f galahad/*.$(OBJEXT)
+ -rm -f galahad/*.lo
+ -rm -f identity/*.$(OBJEXT)
+ -rm -f identity/*.lo
+ -rm -f noop/*.$(OBJEXT)
+ -rm -f noop/*.lo
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@galahad/$(DEPDIR)/glhd_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@galahad/$(DEPDIR)/glhd_objects.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@galahad/$(DEPDIR)/glhd_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@identity/$(DEPDIR)/id_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@identity/$(DEPDIR)/id_objects.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@identity/$(DEPDIR)/id_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@noop/$(DEPDIR)/noop_pipe.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@noop/$(DEPDIR)/noop_state.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
+ -rm -rf galahad/.libs galahad/_libs
+ -rm -rf identity/.libs identity/_libs
+ -rm -rf noop/.libs noop/_libs
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
@@ -521,13 +671,14 @@ clean-libtool:
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
-$(am__recursive_targets):
- @fail=; \
- if $(am__make_keepgoing); then \
- failcom='fail=yes'; \
- else \
- failcom='exit 1'; \
- fi; \
+$(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
@@ -548,13 +699,31 @@ $(am__recursive_targets):
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+ctags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+ done
+cscopelist-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) cscopelist); \
+ done
-ID: $(am__tagged_files)
- $(am__define_uniq_tagged_files); mkid -fID $$unique
-tags: tags-recursive
-TAGS: tags
-
-tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
@@ -570,7 +739,12 @@ tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
- $(am__define_uniq_tagged_files); \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
@@ -582,11 +756,15 @@ tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$$unique; \
fi; \
fi
-ctags: ctags-recursive
-
-CTAGS: ctags
-ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
- $(am__define_uniq_tagged_files); \
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
@@ -595,10 +773,9 @@ GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
-cscopelist: cscopelist-recursive
-cscopelist-am: $(am__tagged_files)
- list='$(am__tagged_files)'; \
+cscopelist: cscopelist-recursive $(HEADERS) $(SOURCES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
@@ -671,7 +848,7 @@ distdir: $(DISTFILES)
done
check-am: all-am
check: check-recursive
-all-am: Makefile
+all-am: Makefile $(LTLIBRARIES)
installdirs: installdirs-recursive
installdirs-am:
install: install-recursive
@@ -700,17 +877,26 @@ clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+ -rm -f galahad/$(DEPDIR)/$(am__dirstamp)
+ -rm -f galahad/$(am__dirstamp)
+ -rm -f identity/$(DEPDIR)/$(am__dirstamp)
+ -rm -f identity/$(am__dirstamp)
+ -rm -f noop/$(DEPDIR)/$(am__dirstamp)
+ -rm -f noop/$(am__dirstamp)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-recursive
-clean-am: clean-generic clean-libtool mostlyclean-am
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ mostlyclean-am
distclean: distclean-recursive
+ -rm -rf galahad/$(DEPDIR) identity/$(DEPDIR) noop/$(DEPDIR)
-rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
dvi: dvi-recursive
@@ -753,12 +939,14 @@ install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
+ -rm -rf galahad/$(DEPDIR) identity/$(DEPDIR) noop/$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
pdf: pdf-recursive
@@ -770,20 +958,24 @@ ps-am:
uninstall-am:
-.MAKE: $(am__recursive_targets) install-am install-strip
-
-.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
- check-am clean clean-generic clean-libtool cscopelist-am ctags \
- ctags-am distclean distclean-generic distclean-libtool \
- distclean-tags distdir dvi dvi-am html html-am info info-am \
- install install-am install-data install-data-am install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-info install-info-am install-man \
- install-pdf install-pdf-am install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- installdirs-am maintainer-clean maintainer-clean-generic \
- mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
- ps ps-am tags tags-am uninstall uninstall-am
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \
+ cscopelist-recursive ctags-recursive install-am install-strip \
+ tags-recursive
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+ all all-am check check-am clean clean-generic clean-libtool \
+ clean-noinstLTLIBRARIES cscopelist cscopelist-recursive ctags \
+ ctags-recursive distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs installdirs-am maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-recursive uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
index 8c3704bf6..4db095f56 100644
--- a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
+++ b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
@@ -107,21 +107,189 @@ static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
}
}
+/* Tracking for registers used, read-before-write (input), and
+ * write-after-read (output.. but not 100%)..
+ */
+
+#define MAX_REG 128
+
+typedef struct {
+ uint8_t full[MAX_REG/8];
+ uint8_t half[MAX_REG/8];
+} regmask_t;
+
+static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val)
+{
+ unsigned i = num / 8;
+ unsigned j = num % 8;
+ assert(num < MAX_REG);
+ if (full) {
+ regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j);
+ } else {
+ regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j);
+ }
+}
+
+static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
+{
+ unsigned i = num / 8;
+ unsigned j = num % 8;
+ assert(num < MAX_REG);
+ if (full) {
+ return (regmask->full[i] >> j) & 0x1;
+ } else {
+ return (regmask->half[i] >> j) & 0x1;
+ }
+}
+
+static unsigned regidx(reg_t reg)
+{
+ return (4 * reg.num) + reg.comp;
+}
+
+static struct {
+ regmask_t used;
+ regmask_t rbw; /* read before write */
+ regmask_t war; /* write after read */
+ regmask_t cnst; /* used consts */
+} regs;
+
+static void print_regs(regmask_t *regmask, bool full)
+{
+ int num, max = 0, cnt = 0;
+ int first, last;
+
+ void print_sequence(void)
+ {
+ if (first != MAX_REG) {
+ if (first == last) {
+ printf(" %d", first);
+ } else {
+ printf(" %d-%d", first, last);
+ }
+ }
+ }
+
+ first = last = MAX_REG;
+
+ for (num = 0; num < MAX_REG; num++) {
+ if (regmask_get(regmask, num, full)) {
+ if (num != (last + 1)) {
+ print_sequence();
+ first = num;
+ }
+ last = num;
+ max = num;
+ cnt++;
+ }
+ }
+
+ print_sequence();
+
+ printf(" (cnt=%d, max=%d)", cnt, max);
+}
+
+static void print_reg_stats(int level)
+{
+ printf("%sRegister Stats:\n", levels[level]);
+ printf("%s- used (half):", levels[level]);
+ print_regs(&regs.used, false);
+ printf("\n");
+ printf("%s- used (full):", levels[level]);
+ print_regs(&regs.used, true);
+ printf("\n");
+ printf("%s- input (half):", levels[level]);
+ print_regs(&regs.rbw, false);
+ printf("\n");
+ printf("%s- input (full):", levels[level]);
+ print_regs(&regs.rbw, true);
+ printf("\n");
+ printf("%s- const (half):", levels[level]);
+ print_regs(&regs.cnst, false);
+ printf("\n");
+ printf("%s- const (full):", levels[level]);
+ print_regs(&regs.cnst, true);
+ printf("\n");
+ printf("%s- output (half):", levels[level]);
+ print_regs(&regs.war, false);
+ printf(" (estimated)\n");
+ printf("%s- output (full):", levels[level]);
+ print_regs(&regs.war, true);
+ printf(" (estimated)\n");
+}
+
+/* we have to process the dst register after src to avoid tripping up
+ * the read-before-write detection
+ */
+static unsigned last_dst;
+static bool last_dst_full;
+static bool last_dst_valid = false;
/* current instruction repeat flag: */
static unsigned repeat;
+static void process_reg_dst(void)
+{
+ int i;
+
+ if (!last_dst_valid)
+ return;
+
+ for (i = 0; i <= repeat; i++) {
+ unsigned dst = last_dst + i;
+
+ regmask_set(&regs.war, dst, last_dst_full, 1);
+ regmask_set(&regs.used, dst, last_dst_full, 1);
+ }
+
+ last_dst_valid = false;
+}
+
static void print_reg_dst(reg_t reg, bool full, bool addr_rel)
{
+ /* presumably the special registers a0.c and p0.c don't count.. */
+ if (!(addr_rel || reg_special(reg))) {
+ last_dst = regidx(reg);
+ last_dst_full = full;
+ last_dst_valid = true;
+ }
print_reg(reg, full, false, false, false, false, false, addr_rel);
}
static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
bool neg, bool abs, bool addr_rel)
{
+ /* presumably the special registers a0.c and p0.c don't count.. */
+ if (!(addr_rel || c || im || reg_special(reg))) {
+ int i, num = regidx(reg);
+ for (i = 0; i <= repeat; i++) {
+ unsigned src = num + i;
+
+ if (!regmask_get(&regs.used, src, full))
+ regmask_set(&regs.rbw, src, full, 1);
+
+ regmask_set(&regs.war, src, full, 0);
+ regmask_set(&regs.used, src, full, 1);
+
+ if (!r)
+ break;
+ }
+ } else if (c) {
+ int i, num = regidx(reg);
+ for (i = 0; i <= repeat; i++) {
+ unsigned src = num + i;
+
+ regmask_set(&regs.cnst, src, full, 1);
+
+ if (!r)
+ break;
+ }
+ }
+
print_reg(reg, full, r, c, im, neg, abs, addr_rel);
}
+
static void print_instr_cat0(instr_t *instr)
{
instr_cat0_t *cat0 = &instr->cat0;
@@ -149,7 +317,8 @@ static void print_instr_cat1(instr_t *instr)
{
instr_cat1_t *cat1 = &instr->cat1;
- if (cat1->ul)
+ // XXX maybe a bug in libllvm disassembler?
+ if (cat1->src_rel)
printf("(ul)");
if (cat1->src_type == cat1->dst_type) {
@@ -186,11 +355,10 @@ static void print_instr_cat1(instr_t *instr)
/* I would just use %+d but trying to make it diff'able with
* libllvm-a3xx...
*/
- char type = cat1->src_rel_c ? 'c' : 'r';
if (cat1->off < 0)
- printf("%c<a0.x - %d>", type, -cat1->off);
+ printf("c<a0.x - %d>", -cat1->off);
else if (cat1->off > 0)
- printf("%c<a0.x + %d>", type, cat1->off);
+ printf("c<a0.x + %d>", cat1->off);
else
printf("c<a0.x>");
} else {
@@ -231,21 +399,9 @@ static void print_instr_cat2(instr_t *instr)
printf("(ei)");
print_reg_dst((reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
printf(", ");
-
- if (cat2->c1.src1_c) {
- print_reg_src((reg_t)(cat2->c1.src1), cat2->full, cat2->src1_r,
- cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg,
- cat2->src1_abs, false);
- } else if (cat2->rel1.src1_rel) {
- print_reg_src((reg_t)(cat2->rel1.src1), cat2->full, cat2->src1_r,
- cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg,
- cat2->src1_abs, cat2->rel1.src1_rel);
- } else {
- print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r,
- false, cat2->src1_im, cat2->src1_neg,
- cat2->src1_abs, false);
- }
-
+ print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r,
+ cat2->src1_c, cat2->src1_im, cat2->src1_neg,
+ cat2->src1_abs, cat2->src1_rel);
switch (cat2->opc) {
case OPC_ABSNEG_F:
case OPC_ABSNEG_S:
@@ -265,19 +421,9 @@ static void print_instr_cat2(instr_t *instr)
break;
default:
printf(", ");
- if (cat2->c2.src2_c) {
- print_reg_src((reg_t)(cat2->c2.src2), cat2->full, cat2->src2_r,
- cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg,
- cat2->src2_abs, false);
- } else if (cat2->rel2.src2_rel) {
- print_reg_src((reg_t)(cat2->rel2.src2), cat2->full, cat2->src2_r,
- cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg,
- cat2->src2_abs, cat2->rel2.src2_rel);
- } else {
- print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r,
- false, cat2->src2_im, cat2->src2_neg,
- cat2->src2_abs, false);
- }
+ print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r,
+ cat2->src2_c, cat2->src2_im, cat2->src2_neg,
+ cat2->src2_abs, cat2->src2_rel);
break;
}
}
@@ -285,42 +431,36 @@ static void print_instr_cat2(instr_t *instr)
static void print_instr_cat3(instr_t *instr)
{
instr_cat3_t *cat3 = &instr->cat3;
- bool full = instr_cat3_full(cat3);
+ bool full = true;
+
+ // XXX is this based on opc or some other bit?
+ switch (cat3->opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_U16:
+ case OPC_MAD_S16:
+ case OPC_SEL_B16:
+ case OPC_SEL_S16:
+ case OPC_SEL_F16:
+ case OPC_SAD_S16:
+ case OPC_SAD_S32: // really??
+ full = false;
+ break;
+ }
printf(" ");
print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false);
printf(", ");
- if (cat3->c1.src1_c) {
- print_reg_src((reg_t)(cat3->c1.src1), full,
- cat3->src1_r, cat3->c1.src1_c, false, cat3->src1_neg,
- false, false);
- } else if (cat3->rel1.src1_rel) {
- print_reg_src((reg_t)(cat3->rel1.src1), full,
- cat3->src1_r, cat3->rel1.src1_c, false, cat3->src1_neg,
- false, cat3->rel1.src1_rel);
- } else {
- print_reg_src((reg_t)(cat3->src1), full,
- cat3->src1_r, false, false, cat3->src1_neg,
- false, false);
- }
+ print_reg_src((reg_t)(cat3->src1), full,
+ cat3->src1_r, cat3->src1_c, false, cat3->src1_neg,
+ false, cat3->src1_rel);
printf(", ");
print_reg_src((reg_t)cat3->src2, full,
cat3->src2_r, cat3->src2_c, false, cat3->src2_neg,
false, false);
printf(", ");
- if (cat3->c2.src3_c) {
- print_reg_src((reg_t)(cat3->c2.src3), full,
- cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg,
- false, false);
- } else if (cat3->rel2.src3_rel) {
- print_reg_src((reg_t)(cat3->rel2.src3), full,
- cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg,
- false, cat3->rel2.src3_rel);
- } else {
- print_reg_src((reg_t)(cat3->src3), full,
- cat3->src3_r, false, false, cat3->src3_neg,
- false, false);
- }
+ print_reg_src((reg_t)(cat3->src3), full,
+ cat3->src3_r, cat3->src3_c, false, cat3->src3_neg,
+ false, cat3->src3_rel);
}
static void print_instr_cat4(instr_t *instr)
@@ -330,20 +470,9 @@ static void print_instr_cat4(instr_t *instr)
printf(" ");
print_reg_dst((reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
printf(", ");
-
- if (cat4->c.src_c) {
- print_reg_src((reg_t)(cat4->c.src), cat4->full,
- cat4->src_r, cat4->c.src_c, cat4->src_im,
- cat4->src_neg, cat4->src_abs, false);
- } else if (cat4->rel.src_rel) {
- print_reg_src((reg_t)(cat4->rel.src), cat4->full,
- cat4->src_r, cat4->rel.src_c, cat4->src_im,
- cat4->src_neg, cat4->src_abs, cat4->rel.src_rel);
- } else {
- print_reg_src((reg_t)(cat4->src), cat4->full,
- cat4->src_r, false, cat4->src_im,
- cat4->src_neg, cat4->src_abs, false);
- }
+ print_reg_src((reg_t)(cat4->src), cat4->full,
+ cat4->src_r, cat4->src_c, cat4->src_im,
+ cat4->src_neg, cat4->src_abs, cat4->src_rel);
if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
printf("\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
@@ -733,20 +862,26 @@ struct opc_info {
#undef OPC
};
-#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)]))
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)]))
-// XXX hack.. probably should move this table somewhere common:
-#include "ir3.h"
-const char *ir3_instr_name(struct ir3_instruction *instr)
+static uint32_t getopc(instr_t *instr)
{
- if (instr->category == -1) return "??meta??";
- return opcs[(instr->category << NOPC_BITS) | instr->opc].name;
+ switch (instr->opc_cat) {
+ case 0: return instr->cat0.opc;
+ case 1: return 0;
+ case 2: return instr->cat2.opc;
+ case 3: return instr->cat3.opc;
+ case 4: return instr->cat4.opc;
+ case 5: return instr->cat5.opc;
+ case 6: return instr->cat6.opc;
+ default: return 0;
+ }
}
static void print_instr(uint32_t *dwords, int level, int n)
{
instr_t *instr = (instr_t *)dwords;
- uint32_t opc = instr_opc(instr);
+ uint32_t opc = getopc(instr);
const char *name;
printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]);
@@ -790,6 +925,8 @@ static void print_instr(uint32_t *dwords, int level, int n)
}
printf("\n");
+
+ process_reg_dst();
}
int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
@@ -798,8 +935,12 @@ int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
assert((sizedwords % 2) == 0);
+ memset(&regs, 0, sizeof(regs));
+
for (i = 0; i < sizedwords; i += 2)
print_instr(&dwords[i], level, i/2);
+ print_reg_stats(level);
+
return 0;
}
diff --git a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 3159e7ade..eabe21cb7 100644
--- a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -39,168 +39,136 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
-#include "freedreno_lowering.h"
-
#include "fd3_compiler.h"
#include "fd3_program.h"
#include "fd3_util.h"
#include "instr-a3xx.h"
-#include "ir3.h"
+#include "ir-a3xx.h"
-/* NOTE on half/full precision:
- * Currently, the front end (ie. basically this file) does everything in
- * full precision (with the exception of trans_arl() which doesn't work
- * currently.. we reject anything with relative addressing and fallback
- * to old compiler).
- *
- * In the RA step, if half_precision, it will assign the output to hr0.x
- * but use full precision everywhere else.
- *
- * Eventually we'll need a better way to communicate type information
- * to RA so that it can more properly assign both half and full precision
- * registers. (And presumably double precision pairs for a4xx?) This
- * would let us make more use of half precision registers, while still
- * keeping things like tex coords in full precision registers.
- *
- * Since the RA is dealing with patching instruction types for half
- * precision output, we can ignore that in the front end and just always
- * create full precision instructions.
- */
+/* ************************************************************************* */
+/* split the out or find some helper to use.. like main/bitset.h.. */
-struct fd3_compile_context {
- const struct tgsi_token *tokens;
- bool free_tokens;
- struct ir3_shader *ir;
- struct fd3_shader_variant *so;
+#define MAX_REG 256
- struct ir3_block *block;
- struct ir3_instruction *current_instr;
+typedef uint8_t regmask_t[2 * MAX_REG / 8];
- /* we need to defer updates to block->outputs[] until the end
- * of an instruction (so we don't see new value until *after*
- * the src registers are processed)
- */
- struct {
- struct ir3_instruction *instr, **instrp;
- } output_updates[16];
- unsigned num_output_updates;
+static unsigned regmask_idx(struct ir3_register *reg)
+{
+ unsigned num = reg->num;
+ assert(num < MAX_REG);
+ if (reg->flags & IR3_REG_HALF)
+ num += MAX_REG;
+ return num;
+}
- /* are we in a sequence of "atomic" instructions?
- */
- bool atomic;
+static void regmask_set(regmask_t regmask, struct ir3_register *reg)
+{
+ unsigned idx = regmask_idx(reg);
+ regmask[idx / 8] |= 1 << (idx % 8);
+}
- /* For fragment shaders, from the hw perspective the only
- * actual input is r0.xy position register passed to bary.f.
- * But TGSI doesn't know that, it still declares things as
- * IN[] registers. So we do all the input tracking normally
- * and fix things up after compile_instructions()
- *
- * NOTE that frag_pos is the hardware position (possibly it
- * is actually an index or tag or some such.. it is *not*
- * values that can be directly used for gl_FragCoord..)
- */
- struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
+static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
+{
+ unsigned idx = regmask_idx(reg);
+ return regmask[idx / 8] & (1 << (idx % 8));
+}
+
+/* ************************************************************************* */
+
+struct fd3_compile_context {
+ const struct tgsi_token *tokens;
+ struct ir3_shader *ir;
+ struct fd3_shader_stateobj *so;
struct tgsi_parse_context parser;
unsigned type;
struct tgsi_shader_info info;
- /* for calculating input/output positions/linkages: */
- unsigned next_inloc;
+ /* last input dst (for setting (ei) flag): */
+ struct ir3_register *last_input;
+ unsigned next_inloc;
unsigned num_internal_temps;
- struct tgsi_src_register internal_temps[6];
+
+ /* track registers which need to synchronize w/ "complex alu" cat3
+ * instruction pipeline:
+ */
+ regmask_t needs_ss;
+
+ /* track registers which need to synchronize with texture fetch
+ * pipeline:
+ */
+ regmask_t needs_sy;
+
+ /* inputs start at r0, temporaries start after last input, and
+ * outputs start after last temporary.
+ *
+ * We could be more clever, because this is not a hw restriction,
+ * but probably best just to implement an optimizing pass to
+ * reduce the # of registers used and get rid of redundant mov's
+ * (to output register).
+ */
+ unsigned base_reg[TGSI_FILE_COUNT];
/* idx/slot for last compiler generated immediate */
unsigned immediate_idx;
- /* stack of branch instructions that mark (potentially nested)
- * branch if/else/loop/etc
+ /* stack of branch instructions that start (potentially nested)
+ * branch instructions, so that we can fix up the branch targets
+ * so that we can fix up the branch target on the corresponding
+ * END instruction
*/
- struct {
- struct ir3_instruction *instr, *cond;
- bool inv; /* true iff in else leg of branch */
- } branch[16];
+ struct ir3_instruction *branch[16];
unsigned int branch_count;
- /* list of kill instructions: */
- struct ir3_instruction *kill[16];
- unsigned int kill_count;
-
/* used when dst is same as one of the src, to avoid overwriting a
* src element before the remaining scalar instructions that make
* up the vector operation
*/
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
+ struct tgsi_src_register tmp_src;
};
-
-static void vectorize(struct fd3_compile_context *ctx,
- struct ir3_instruction *instr, struct tgsi_dst_register *dst,
- int nsrcs, ...);
-static void create_mov(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *dst, struct tgsi_src_register *src);
-static type_t get_ftype(struct fd3_compile_context *ctx);
-
static unsigned
-compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
+compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
const struct tgsi_token *tokens)
{
unsigned ret;
- struct tgsi_shader_info *info = &ctx->info;
- const struct fd_lowering_config lconfig = {
- .color_two_side = so->key.color_two_side,
- .lower_DST = true,
- .lower_XPD = true,
- .lower_SCS = true,
- .lower_LRP = true,
- .lower_FRC = true,
- .lower_POW = true,
- .lower_LIT = true,
- .lower_EXP = true,
- .lower_LOG = true,
- .lower_DP4 = true,
- .lower_DP3 = true,
- .lower_DPH = true,
- .lower_DP2 = true,
- .lower_DP2A = true,
- };
- ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info);
- ctx->free_tokens = !!ctx->tokens;
- if (!ctx->tokens) {
- /* no lowering */
- ctx->tokens = tokens;
- }
+ ctx->tokens = tokens;
ctx->ir = so->ir;
ctx->so = so;
+ ctx->last_input = NULL;
ctx->next_inloc = 8;
ctx->num_internal_temps = 0;
ctx->branch_count = 0;
- ctx->kill_count = 0;
- ctx->block = NULL;
- ctx->current_instr = NULL;
- ctx->num_output_updates = 0;
- ctx->atomic = false;
- ctx->frag_pos = NULL;
- ctx->frag_face = NULL;
-
- memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord));
-
-#define FM(x) (1 << TGSI_FILE_##x)
- /* optimize can't deal with relative addressing: */
- if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) |
- FM(OUTPUT) | FM(IMMEDIATE) | FM(CONSTANT)))
- return TGSI_PARSE_ERROR;
- /* Immediates go after constants: */
- so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
- ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
+ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+ memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
- ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
+ tgsi_scan_shader(tokens, &ctx->info);
+
+ /* Immediates go after constants: */
+ ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
+ ctx->base_reg[TGSI_FILE_IMMEDIATE] =
+ ctx->info.file_count[TGSI_FILE_CONSTANT];
+
+ /* Temporaries after outputs after inputs: */
+ ctx->base_reg[TGSI_FILE_INPUT] = 0;
+ ctx->base_reg[TGSI_FILE_OUTPUT] =
+ ctx->info.file_count[TGSI_FILE_INPUT];
+ ctx->base_reg[TGSI_FILE_TEMPORARY] =
+ ctx->info.file_count[TGSI_FILE_INPUT] +
+ ctx->info.file_count[TGSI_FILE_OUTPUT];
+
+ so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
+ ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] +
+ ctx->info.file_count[TGSI_FILE_IMMEDIATE]);
+
+ ret = tgsi_parse_init(&ctx->parser, tokens);
if (ret != TGSI_PARSE_OK)
return ret;
@@ -210,25 +178,8 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
}
static void
-compile_error(struct fd3_compile_context *ctx, const char *format, ...)
-{
- va_list ap;
- va_start(ap, format);
- _debug_vprintf(format, ap);
- va_end(ap);
- tgsi_dump(ctx->tokens, 0);
- debug_assert(0);
-}
-
-#define compile_assert(ctx, cond) do { \
- if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
- } while (0)
-
-static void
compile_free(struct fd3_compile_context *ctx)
{
- if (ctx->free_tokens)
- free((void *)ctx->tokens);
tgsi_parse_free(&ctx->parser);
}
@@ -242,385 +193,55 @@ struct instr_translater {
unsigned arg;
};
-static void
-instr_finish(struct fd3_compile_context *ctx)
-{
- unsigned i;
-
- if (ctx->atomic)
- return;
-
- for (i = 0; i < ctx->num_output_updates; i++)
- *(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr;
-
- ctx->num_output_updates = 0;
-}
-
-/* For "atomic" groups of instructions, for example the four scalar
- * instructions to perform a vec4 operation. Basically this just
- * blocks out handling of output_updates so the next scalar instruction
- * still sees the result from before the start of the atomic group.
- *
- * NOTE: when used properly, this could probably replace get/put_dst()
- * stuff.
- */
-static void
-instr_atomic_start(struct fd3_compile_context *ctx)
-{
- ctx->atomic = true;
-}
-
-static void
-instr_atomic_end(struct fd3_compile_context *ctx)
-{
- ctx->atomic = false;
- instr_finish(ctx);
-}
-
-static struct ir3_instruction *
-instr_create(struct fd3_compile_context *ctx, int category, opc_t opc)
-{
- instr_finish(ctx);
- return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc));
-}
-
-static struct ir3_instruction *
-instr_clone(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
-{
- instr_finish(ctx);
- return (ctx->current_instr = ir3_instr_clone(instr));
-}
-
-static struct ir3_block *
-push_block(struct fd3_compile_context *ctx)
-{
- struct ir3_block *block;
- unsigned ntmp, nin, nout;
-
-#define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1))
-
- /* hmm, give ourselves room to create 4 extra temporaries (vec4):
- */
- ntmp = SCALAR_REGS(TEMPORARY);
- ntmp += 4 * 4;
-
- nout = SCALAR_REGS(OUTPUT);
- nin = SCALAR_REGS(INPUT);
-
- /* for outermost block, 'inputs' are the actual shader INPUT
- * register file. Reads from INPUT registers always go back to
- * top block. For nested blocks, 'inputs' is used to track any
- * TEMPORARY file register from one of the enclosing blocks that
- * is ready in this block.
- */
- if (!ctx->block) {
- /* NOTE: fragment shaders actually have two inputs (r0.xy, the
- * position)
- */
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- int n = 2;
- if (ctx->info.reads_position)
- n += 4;
- if (ctx->info.uses_frontface)
- n += 4;
- nin = MAX2(n, nin);
- nout += ARRAY_SIZE(ctx->kill);
- }
- } else {
- nin = ntmp;
- }
-
- block = ir3_block_create(ctx->ir, ntmp, nin, nout);
-
- if ((ctx->type == TGSI_PROCESSOR_FRAGMENT) && !ctx->block)
- block->noutputs -= ARRAY_SIZE(ctx->kill);
-
- block->parent = ctx->block;
- ctx->block = block;
-
- return block;
-}
-
-static void
-pop_block(struct fd3_compile_context *ctx)
-{
- ctx->block = ctx->block->parent;
- compile_assert(ctx, ctx->block);
-}
-
-static void
-ssa_dst(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
- const struct tgsi_dst_register *dst, unsigned chan)
-{
- unsigned n = regid(dst->Index, chan);
- unsigned idx = ctx->num_output_updates;
-
- compile_assert(ctx, idx < ARRAY_SIZE(ctx->output_updates));
-
- /* NOTE: defer update of temporaries[idx] or output[idx]
- * until instr_finish(), so that if the current instruction
- * reads the same TEMP/OUT[] it gets the old value:
- *
- * bleh.. this might be a bit easier to just figure out
- * in instr_finish(). But at that point we've already
- * lost information about OUTPUT vs TEMPORARY register
- * file..
- */
-
- switch (dst->File) {
- case TGSI_FILE_OUTPUT:
- compile_assert(ctx, n < ctx->block->noutputs);
- ctx->output_updates[idx].instrp = &ctx->block->outputs[n];
- ctx->output_updates[idx].instr = instr;
- ctx->num_output_updates++;
- break;
- case TGSI_FILE_TEMPORARY:
- compile_assert(ctx, n < ctx->block->ntemporaries);
- ctx->output_updates[idx].instrp = &ctx->block->temporaries[n];
- ctx->output_updates[idx].instr = instr;
- ctx->num_output_updates++;
- break;
- }
-}
-
-static struct ir3_instruction *
-create_output(struct ir3_block *block, struct ir3_instruction *instr,
- unsigned n)
-{
- struct ir3_instruction *out;
-
- out = ir3_instr_create(block, -1, OPC_META_OUTPUT);
- out->inout.block = block;
- ir3_reg_create(out, n, 0);
- if (instr)
- ir3_reg_create(out, 0, IR3_REG_SSA)->instr = instr;
-
- return out;
-}
-
-static struct ir3_instruction *
-create_input(struct ir3_block *block, struct ir3_instruction *instr,
- unsigned n)
-{
- struct ir3_instruction *in;
-
- in = ir3_instr_create(block, -1, OPC_META_INPUT);
- in->inout.block = block;
- ir3_reg_create(in, n, 0);
- if (instr)
- ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr;
-
- return in;
-}
-
-static struct ir3_instruction *
-block_input(struct ir3_block *block, unsigned n)
-{
- /* references to INPUT register file always go back up to
- * top level:
- */
- if (block->parent)
- return block_input(block->parent, n);
- return block->inputs[n];
-}
-
-/* return temporary in scope, creating if needed meta-input node
- * to track block inputs
- */
-static struct ir3_instruction *
-block_temporary(struct ir3_block *block, unsigned n)
-{
- /* references to TEMPORARY register file, find the nearest
- * enclosing block which has already assigned this temporary,
- * creating meta-input instructions along the way to keep
- * track of block inputs
- */
- if (block->parent && !block->temporaries[n]) {
- /* if already have input for this block, reuse: */
- if (!block->inputs[n])
- block->inputs[n] = block_temporary(block->parent, n);
-
- /* and create new input to return: */
- return create_input(block, block->inputs[n], n);
- }
- return block->temporaries[n];
-}
-
-static struct ir3_instruction *
-create_immed(struct fd3_compile_context *ctx, float val)
-{
- /* this can happen when registers (or components of a TGSI
- * register) are used as src before they have been assigned
- * (undefined contents). To avoid confusing the rest of the
- * compiler, and to generally keep things peachy, substitute
- * an instruction that sets the src to 0.0. Or to keep
- * things undefined, I could plug in a random number? :-P
- *
- * NOTE: *don't* use instr_create() here!
- */
- struct ir3_instruction *instr;
- instr = ir3_instr_create(ctx->block, 1, 0);
- instr->cat1.src_type = get_ftype(ctx);
- instr->cat1.dst_type = get_ftype(ctx);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = val;
- return instr;
-}
-
-static void
-ssa_src(struct fd3_compile_context *ctx, struct ir3_register *reg,
- const struct tgsi_src_register *src, unsigned chan)
-{
- struct ir3_block *block = ctx->block;
- unsigned n = regid(src->Index, chan);
-
- switch (src->File) {
- case TGSI_FILE_INPUT:
- reg->flags |= IR3_REG_SSA;
- reg->instr = block_input(ctx->block, n);
- break;
- case TGSI_FILE_OUTPUT:
- /* really this should just happen in case of 'MOV_SAT OUT[n], ..',
- * for the following clamp instructions:
- */
- reg->flags |= IR3_REG_SSA;
- reg->instr = block->outputs[n];
- /* we don't have to worry about read from an OUTPUT that was
- * assigned outside of the current block, because the _SAT
- * clamp instructions will always be in the same block as
- * the original instruction which wrote the OUTPUT
- */
- compile_assert(ctx, reg->instr);
- break;
- case TGSI_FILE_TEMPORARY:
- reg->flags |= IR3_REG_SSA;
- reg->instr = block_temporary(ctx->block, n);
- break;
- }
-
- if ((reg->flags & IR3_REG_SSA) && !reg->instr) {
- /* this can happen when registers (or components of a TGSI
- * register) are used as src before they have been assigned
- * (undefined contents). To avoid confusing the rest of the
- * compiler, and to generally keep things peachy, substitute
- * an instruction that sets the src to 0.0. Or to keep
- * things undefined, I could plug in a random number? :-P
- *
- * NOTE: *don't* use instr_create() here!
- */
- reg->instr = create_immed(ctx, 0.0);
- }
-}
-
static struct ir3_register *
-add_dst_reg_wrmask(struct fd3_compile_context *ctx,
- struct ir3_instruction *instr, const struct tgsi_dst_register *dst,
- unsigned chan, unsigned wrmask)
+add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ const struct tgsi_dst_register *dst, unsigned chan)
{
unsigned flags = 0, num = 0;
- struct ir3_register *reg;
switch (dst->File) {
case TGSI_FILE_OUTPUT:
case TGSI_FILE_TEMPORARY:
- /* uses SSA */
- break;
- case TGSI_FILE_ADDRESS:
- num = REG_A0;
+ num = dst->Index + ctx->base_reg[dst->File];
break;
default:
- compile_error(ctx, "unsupported dst register file: %s\n",
+ DBG("unsupported dst register file: %s",
tgsi_file_name(dst->File));
+ assert(0);
break;
}
- if (dst->Indirect)
- flags |= IR3_REG_RELATIV;
-
- reg = ir3_reg_create(instr, regid(num, chan), flags);
-
- /* NOTE: do not call ssa_dst() if atomic.. vectorize()
- * itself will call ssa_dst(). This is to filter out
- * the (initially bogus) .x component dst which is
- * created (but not necessarily used, ie. if the net
- * result of the vector operation does not write to
- * the .x component)
- */
-
- reg->wrmask = wrmask;
- if (wrmask == 0x1) {
- /* normal case */
- if (!ctx->atomic)
- ssa_dst(ctx, instr, dst, chan);
- } else if ((dst->File == TGSI_FILE_TEMPORARY) ||
- (dst->File == TGSI_FILE_OUTPUT)) {
- unsigned i;
-
- /* if instruction writes multiple, we need to create
- * some place-holder collect the registers:
- */
- for (i = 0; i < 4; i++) {
- if (wrmask & (1 << i)) {
- struct ir3_instruction *collect =
- ir3_instr_create(ctx->block, -1, OPC_META_FO);
- collect->fo.off = i;
- /* unused dst reg: */
- ir3_reg_create(collect, 0, 0);
- /* and src reg used to hold original instr */
- ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = instr;
- if (!ctx->atomic)
- ssa_dst(ctx, collect, dst, chan+i);
- }
- }
- }
-
- return reg;
-}
+ if (ctx->so->half_precision)
+ flags |= IR3_REG_HALF;
-static struct ir3_register *
-add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
- const struct tgsi_dst_register *dst, unsigned chan)
-{
- return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1);
+ return ir3_reg_create(instr, regid(num, chan), flags);
}
static struct ir3_register *
-add_src_reg_wrmask(struct fd3_compile_context *ctx,
- struct ir3_instruction *instr, const struct tgsi_src_register *src,
- unsigned chan, unsigned wrmask)
+add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ const struct tgsi_src_register *src, unsigned chan)
{
unsigned flags = 0, num = 0;
struct ir3_register *reg;
- /* TODO we need to use a mov to temp for const >= 64.. or maybe
- * we could use relative addressing..
- */
- compile_assert(ctx, src->Index < 64);
-
switch (src->File) {
case TGSI_FILE_IMMEDIATE:
/* TODO if possible, use actual immediate instead of const.. but
* TGSI has vec4 immediates, we can only embed scalar (of limited
* size, depending on instruction..)
*/
- flags |= IR3_REG_CONST;
- num = src->Index + ctx->so->first_immediate;
- break;
case TGSI_FILE_CONSTANT:
flags |= IR3_REG_CONST;
- num = src->Index;
+ num = src->Index + ctx->base_reg[src->File];
break;
- case TGSI_FILE_OUTPUT:
- /* NOTE: we should only end up w/ OUTPUT file for things like
- * clamp()'ing saturated dst instructions
- */
case TGSI_FILE_INPUT:
case TGSI_FILE_TEMPORARY:
- /* uses SSA */
+ num = src->Index + ctx->base_reg[src->File];
break;
default:
- compile_error(ctx, "unsupported src register file: %s\n",
+ DBG("unsupported src register file: %s",
tgsi_file_name(src->File));
+ assert(0);
break;
}
@@ -628,54 +249,24 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx,
flags |= IR3_REG_ABS;
if (src->Negate)
flags |= IR3_REG_NEGATE;
- if (src->Indirect)
- flags |= IR3_REG_RELATIV;
+ if (ctx->so->half_precision)
+ flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
- reg->wrmask = wrmask;
- if (wrmask == 0x1) {
- /* normal case */
- ssa_src(ctx, reg, src, chan);
- } else if ((src->File == TGSI_FILE_TEMPORARY) ||
- (src->File == TGSI_FILE_OUTPUT) ||
- (src->File == TGSI_FILE_INPUT)) {
- struct ir3_instruction *collect;
- unsigned i;
-
- /* if instruction reads multiple, we need to create
- * some place-holder collect the registers:
- */
- collect = ir3_instr_create(ctx->block, -1, OPC_META_FI);
- ir3_reg_create(collect, 0, 0); /* unused dst reg */
-
- for (i = 0; i < 4; i++) {
- if (wrmask & (1 << i)) {
- /* and src reg used point to the original instr */
- ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
- src, chan + i);
- } else if (wrmask & ~((i << i) - 1)) {
- /* if any remaining components, then dummy
- * placeholder src reg to fill in the blanks:
- */
- ir3_reg_create(collect, 0, 0);
- }
- }
+ if (regmask_get(ctx->needs_ss, reg)) {
+ instr->flags |= IR3_INSTR_SS;
+ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+ }
- reg->flags |= IR3_REG_SSA;
- reg->instr = collect;
+ if (regmask_get(ctx->needs_sy, reg)) {
+ instr->flags |= IR3_INSTR_SY;
+ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
}
return reg;
}
-static struct ir3_register *
-add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
- const struct tgsi_src_register *src, unsigned chan)
-{
- return add_src_reg_wrmask(ctx, instr, src, chan, 0x1);
-}
-
static void
src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
{
@@ -694,38 +285,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
/* Get internal-temp src/dst to use for a sequence of instructions
* generated by a single TGSI op.
*/
-static struct tgsi_src_register *
+static void
get_internal_temp(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *tmp_dst)
-{
- struct tgsi_src_register *tmp_src;
- int n;
-
- tmp_dst->File = TGSI_FILE_TEMPORARY;
- tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
- tmp_dst->Indirect = 0;
- tmp_dst->Dimension = 0;
-
- /* assign next temporary: */
- n = ctx->num_internal_temps++;
- compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
- tmp_src = &ctx->internal_temps[n];
-
- tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
-
- src_from_dst(tmp_src, tmp_dst);
-
- return tmp_src;
-}
-
-/* Get internal half-precision temp src/dst to use for a sequence of
- * instructions generated by a single TGSI op.
- */
-static struct tgsi_src_register *
-get_internal_temp_hr(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *tmp_dst)
+ struct tgsi_dst_register *tmp_dst,
+ struct tgsi_src_register *tmp_src)
{
- struct tgsi_src_register *tmp_src;
int n;
tmp_dst->File = TGSI_FILE_TEMPORARY;
@@ -735,79 +299,23 @@ get_internal_temp_hr(struct fd3_compile_context *ctx,
/* assign next temporary: */
n = ctx->num_internal_temps++;
- compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
- tmp_src = &ctx->internal_temps[n];
- /* just use hr0 because no one else should be using half-
- * precision regs:
- */
- tmp_dst->Index = 0;
+ tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n;
src_from_dst(tmp_src, tmp_dst);
-
- return tmp_src;
-}
-
-static inline bool
-is_const(struct tgsi_src_register *src)
-{
- return (src->File == TGSI_FILE_CONSTANT) ||
- (src->File == TGSI_FILE_IMMEDIATE);
-}
-
-static inline bool
-is_relative(struct tgsi_src_register *src)
-{
- return src->Indirect;
-}
-
-static inline bool
-is_rel_or_const(struct tgsi_src_register *src)
-{
- return is_relative(src) || is_const(src);
-}
-
-static type_t
-get_ftype(struct fd3_compile_context *ctx)
-{
- return TYPE_F32;
-}
-
-static type_t
-get_utype(struct fd3_compile_context *ctx)
-{
- return TYPE_U32;
}
-static unsigned
-src_swiz(struct tgsi_src_register *src, int chan)
-{
- switch (chan) {
- case 0: return src->SwizzleX;
- case 1: return src->SwizzleY;
- case 2: return src->SwizzleZ;
- case 3: return src->SwizzleW;
- }
- assert(0);
- return 0;
-}
-
-/* for instructions that cannot take a const register as src, if needed
- * generate a move to temporary gpr:
+/* same as get_internal_temp, but w/ src.xxxx (for instructions that
+ * replicate their results)
*/
-static struct tgsi_src_register *
-get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
+static void
+get_internal_temp_repl(struct fd3_compile_context *ctx,
+ struct tgsi_dst_register *tmp_dst,
+ struct tgsi_src_register *tmp_src)
{
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
-
- compile_assert(ctx, is_rel_or_const(src));
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
-
- create_mov(ctx, &tmp_dst, src);
-
- return tmp_src;
+ get_internal_temp(ctx, tmp_dst, tmp_src);
+ tmp_src->SwizzleX = tmp_src->SwizzleY =
+ tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
}
static void
@@ -857,63 +365,46 @@ get_immediate(struct fd3_compile_context *ctx,
reg->SwizzleW = swiz2tgsi[swiz];
}
+static type_t
+get_type(struct fd3_compile_context *ctx)
+{
+ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+}
+
+static unsigned
+src_swiz(struct tgsi_src_register *src, int chan)
+{
+ switch (chan) {
+ case 0: return src->SwizzleX;
+ case 1: return src->SwizzleY;
+ case 2: return src->SwizzleZ;
+ case 3: return src->SwizzleW;
+ }
+ assert(0);
+ return 0;
+}
+
static void
create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
struct tgsi_src_register *src)
{
- type_t type_mov = get_ftype(ctx);
+ type_t type_mov = get_type(ctx);
unsigned i;
for (i = 0; i < 4; i++) {
/* move to destination: */
if (dst->WriteMask & (1 << i)) {
- struct ir3_instruction *instr;
-
- if (src->Absolute || src->Negate) {
- /* can't have abs or neg on a mov instr, so use
- * absneg.f instead to handle these cases:
- */
- instr = instr_create(ctx, 2, OPC_ABSNEG_F);
- } else {
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- }
-
+ struct ir3_instruction *instr =
+ ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = type_mov;
+ instr->cat1.dst_type = type_mov;
add_dst_reg(ctx, instr, dst, i);
add_src_reg(ctx, instr, src, src_swiz(src, i));
+ } else {
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
}
}
-}
-
-static void
-create_clamp(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *dst, struct tgsi_src_register *val,
- struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
-{
- struct ir3_instruction *instr;
-
- instr = instr_create(ctx, 2, OPC_MAX_F);
- vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
- instr = instr_create(ctx, 2, OPC_MIN_F);
- vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
-}
-
-static void
-create_clamp_imm(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *dst,
- uint32_t minval, uint32_t maxval)
-{
- struct tgsi_src_register minconst, maxconst;
- struct tgsi_src_register src;
-
- src_from_dst(&src, dst);
-
- get_immediate(ctx, &minconst, minval);
- get_immediate(ctx, &maxconst, maxval);
-
- create_clamp(ctx, dst, &src, &minconst, &maxconst);
}
static struct tgsi_dst_register *
@@ -924,13 +415,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
struct tgsi_src_register *src = &inst->Src[i].Register;
if ((src->File == dst->File) && (src->Index == dst->Index)) {
- if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) &&
- (src->SwizzleX == TGSI_SWIZZLE_X) &&
- (src->SwizzleY == TGSI_SWIZZLE_Y) &&
- (src->SwizzleZ == TGSI_SWIZZLE_Z) &&
- (src->SwizzleW == TGSI_SWIZZLE_W))
- continue;
- ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
+ get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
ctx->tmp_dst.WriteMask = dst->WriteMask;
dst = &ctx->tmp_dst;
break;
@@ -945,7 +430,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
{
/* if necessary, add mov back into original dst: */
if (dst != &inst->Dst[0].Register) {
- create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
+ create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
}
}
@@ -959,26 +444,14 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
va_list ap;
int i, j, n = 0;
- instr_atomic_start(ctx);
-
- add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
+ add_dst_reg(ctx, instr, dst, 0);
va_start(ap, nsrcs);
for (j = 0; j < nsrcs; j++) {
struct tgsi_src_register *src =
va_arg(ap, struct tgsi_src_register *);
unsigned flags = va_arg(ap, unsigned);
- struct ir3_register *reg;
- if (flags & IR3_REG_IMMED) {
- reg = ir3_reg_create(instr, 0, IR3_REG_IMMED);
- /* this is an ugly cast.. should have put flags first! */
- reg->iim_val = *(int *)&src;
- } else {
- reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
- }
- reg->flags |= flags & ~IR3_REG_NEGATE;
- if (flags & IR3_REG_NEGATE)
- reg->flags ^= IR3_REG_NEGATE;
+ add_src_reg(ctx, instr, src, 0)->flags |= flags;
}
va_end(ap);
@@ -989,32 +462,33 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
if (n++ == 0) {
cur = instr;
} else {
- cur = instr_clone(ctx, instr);
+ cur = ir3_instr_clone(instr);
+ cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP);
}
- ssa_dst(ctx, cur, dst, i);
-
/* fix-up dst register component: */
cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i);
/* fix-up src register component: */
va_start(ap, nsrcs);
for (j = 0; j < nsrcs; j++) {
- struct ir3_register *reg = cur->regs[j+1];
struct tgsi_src_register *src =
va_arg(ap, struct tgsi_src_register *);
- unsigned flags = va_arg(ap, unsigned);
- if (reg->flags & IR3_REG_SSA) {
- ssa_src(ctx, reg, src, src_swiz(src, i));
- } else if (!(flags & IR3_REG_IMMED)) {
- reg->num = regid(reg->num >> 2, src_swiz(src, i));
- }
+ (void)va_arg(ap, unsigned);
+ cur->regs[j+1]->num =
+ regid(cur->regs[j+1]->num >> 2,
+ src_swiz(src, i));
}
va_end(ap);
}
}
- instr_atomic_end(ctx);
+ /* pad w/ nop's.. at least until we are clever enough to
+ * figure out if we really need to..
+ */
+ for (; n < 4; n++) {
+ ir3_instr_create(instr->shader, 0, OPC_NOP);
+ }
}
/*
@@ -1023,832 +497,397 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
*/
static void
-trans_clamp(const struct instr_translater *t,
- struct fd3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src0 = &inst->Src[0].Register;
- struct tgsi_src_register *src1 = &inst->Src[1].Register;
- struct tgsi_src_register *src2 = &inst->Src[2].Register;
-
- create_clamp(ctx, dst, src0, src1, src2);
-
- put_dst(ctx, inst, dst);
-}
-
-/* ARL(x) = x, but mova from hrN.x to a0.. */
-static void
-trans_arl(const struct instr_translater *t,
+trans_dotp(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- struct tgsi_src_register *src = &inst->Src[0].Register;
- unsigned chan = src->SwizzleX;
- compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
-
- tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
-
- /* cov.{f32,f16}s16 Rtmp, Rsrc */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_ftype(ctx);
- instr->cat1.dst_type = TYPE_S16;
- add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
- add_src_reg(ctx, instr, src, chan);
-
- /* shl.b Rtmp, Rtmp, 2 */
- instr = instr_create(ctx, 2, OPC_SHL_B);
- add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
- add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
-
- /* mova a0, Rtmp */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = TYPE_S16;
- instr->cat1.dst_type = TYPE_S16;
- add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
- add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
-}
-
-/*
- * texture fetch/sample instructions:
- */
+ struct tgsi_src_register tmp_src;
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
+ unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW };
+ unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW };
+ opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
+ unsigned n = t->arg; /* number of components */
+ unsigned i;
-struct tex_info {
- int8_t order[4];
- unsigned src_wrmask, flags;
-};
+ get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
-static const struct tex_info *
-get_tex_info(struct fd3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- static const struct tex_info tex1d = {
- .order = { 0, -1, -1, -1 }, /* coord.x */
- .src_wrmask = TGSI_WRITEMASK_XY,
- .flags = 0,
- };
- static const struct tex_info tex1ds = {
- .order = { 0, -1, 2, -1 }, /* coord.xz */
- .src_wrmask = TGSI_WRITEMASK_XYZ,
- .flags = IR3_INSTR_S,
- };
- static const struct tex_info tex2d = {
- .order = { 0, 1, -1, -1 }, /* coord.xy */
- .src_wrmask = TGSI_WRITEMASK_XY,
- .flags = 0,
- };
- static const struct tex_info tex2ds = {
- .order = { 0, 1, 2, -1 }, /* coord.xyz */
- .src_wrmask = TGSI_WRITEMASK_XYZ,
- .flags = IR3_INSTR_S,
- };
- static const struct tex_info tex3d = {
- .order = { 0, 1, 2, -1 }, /* coord.xyz */
- .src_wrmask = TGSI_WRITEMASK_XYZ,
- .flags = IR3_INSTR_3D,
- };
- static const struct tex_info tex3ds = {
- .order = { 0, 1, 2, 3 }, /* coord.xyzw */
- .src_wrmask = TGSI_WRITEMASK_XYZW,
- .flags = IR3_INSTR_S | IR3_INSTR_3D,
- };
- static const struct tex_info txp1d = {
- .order = { 0, -1, 3, -1 }, /* coord.xw */
- .src_wrmask = TGSI_WRITEMASK_XYZ,
- .flags = IR3_INSTR_P,
- };
- static const struct tex_info txp1ds = {
- .order = { 0, -1, 2, 3 }, /* coord.xzw */
- .src_wrmask = TGSI_WRITEMASK_XYZW,
- .flags = IR3_INSTR_P | IR3_INSTR_S,
- };
- static const struct tex_info txp2d = {
- .order = { 0, 1, 3, -1 }, /* coord.xyw */
- .src_wrmask = TGSI_WRITEMASK_XYZ,
- .flags = IR3_INSTR_P,
- };
- static const struct tex_info txp2ds = {
- .order = { 0, 1, 2, 3 }, /* coord.xyzw */
- .src_wrmask = TGSI_WRITEMASK_XYZW,
- .flags = IR3_INSTR_P | IR3_INSTR_S,
- };
- static const struct tex_info txp3d = {
- .order = { 0, 1, 2, 3 }, /* coord.xyzw */
- .src_wrmask = TGSI_WRITEMASK_XYZW,
- .flags = IR3_INSTR_P | IR3_INSTR_3D,
- };
+ /* Blob compiler never seems to use a const in src1 position for
+ * mad.*, although there does seem (according to disassembler
+ * hidden in libllvm-a3xx.so) to be a bit to indicate that src1
+ * is a const. Not sure if this is a hw bug, or simply that the
+ * disassembler lies.
+ */
+ if ((src1->File == TGSI_FILE_IMMEDIATE) ||
+ (src1->File == TGSI_FILE_CONSTANT)) {
- unsigned tex = inst->Texture.Texture;
+ /* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
+ */
+ for (i = 0; i < 4; i++)
+ swiz1[i] = i;
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_TEX:
- switch (tex) {
- case TGSI_TEXTURE_1D:
- return &tex1d;
- case TGSI_TEXTURE_SHADOW1D:
- return &tex1ds;
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
- return &tex2d;
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
- return &tex2ds;
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
- return &tex3d;
- case TGSI_TEXTURE_SHADOWCUBE:
- return &tex3ds;
- default:
- compile_error(ctx, "unknown texture type: %s\n",
- tgsi_texture_names[tex]);
- return NULL;
- }
- break;
- case TGSI_OPCODE_TXP:
- switch (tex) {
- case TGSI_TEXTURE_1D:
- return &txp1d;
- case TGSI_TEXTURE_SHADOW1D:
- return &txp1ds;
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
- return &txp2d;
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
- return &txp2ds;
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
- return &txp3d;
- default:
- compile_error(ctx, "unknown texture type: %s\n",
- tgsi_texture_names[tex]);
- break;
- }
- break;
+ /* the first mul.f will clobber tmp.x, but that is ok
+ * because after that point we no longer need tmp.x:
+ */
+ create_mov(ctx, &tmp_dst, src1);
+ src1 = &tmp_src;
}
- compile_assert(ctx, 0);
- return NULL;
-}
-static struct tgsi_src_register *
-get_tex_coord(struct fd3_compile_context *ctx,
- struct tgsi_full_instruction *inst,
- const struct tex_info *tinf)
-{
- struct tgsi_src_register *coord = &inst->Src[0].Register;
- struct ir3_instruction *instr;
- unsigned tex = inst->Texture.Texture;
- bool needs_mov = false;
- unsigned i;
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, src0, swiz0[0]);
+ add_src_reg(ctx, instr, src1, swiz1[0]);
- /* cat5 instruction cannot seem to handle const or relative: */
- if (is_rel_or_const(coord))
- needs_mov = true;
+ for (i = 1; i < n; i++) {
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
- /* 1D textures we fix up w/ 0.0 as 2nd coord: */
- if ((tex == TGSI_TEXTURE_1D) || (tex == TGSI_TEXTURE_SHADOW1D))
- needs_mov = true;
+ instr = ir3_instr_create(ctx->ir, 3, opc_mad);
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, src0, swiz0[i]);
+ add_src_reg(ctx, instr, src1, swiz1[i]);
+ add_src_reg(ctx, instr, &tmp_src, 0);
+ }
- /* The texture sample instructions need to coord in successive
- * registers/components (ie. src.xy but not src.yx). And TXP
- * needs the .w component in .z for 2D.. so in some cases we
- * might need to emit some mov instructions to shuffle things
- * around:
- */
- for (i = 1; (i < 4) && (tinf->order[i] >= 0) && !needs_mov; i++)
- if (src_swiz(coord, i) != (src_swiz(coord, 0) + tinf->order[i]))
- needs_mov = true;
+ /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
+ if (t->tgsi_opc == TGSI_OPCODE_DPH) {
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
- if (needs_mov) {
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- unsigned j;
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, src1, swiz1[i]);
+ add_src_reg(ctx, instr, &tmp_src, 0);
- type_t type_mov = get_ftype(ctx);
+ n++;
+ }
- /* need to move things around: */
- tmp_src = get_internal_temp(ctx, &tmp_dst);
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
- for (j = 0; j < 4; j++) {
- if (tinf->order[j] < 0)
- continue;
- instr = instr_create(ctx, 1, 0); /* mov */
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- add_dst_reg(ctx, instr, &tmp_dst, j);
- add_src_reg(ctx, instr, coord,
- src_swiz(coord, tinf->order[j]));
- }
+ /* pad out to multiple of 4 scalar instructions: */
+ for (i = 2 * n; i % 4; i++) {
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ }
- /* fix up .y coord: */
- if ((tex == TGSI_TEXTURE_1D) ||
- (tex == TGSI_TEXTURE_SHADOW1D)) {
- instr = instr_create(ctx, 1, 0); /* mov */
- instr->cat1.src_type = type_mov;
- instr->cat1.dst_type = type_mov;
- add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = 0.5;
- }
+ create_mov(ctx, dst, &tmp_src);
+}
- coord = tmp_src;
- }
+/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
+static void
+trans_lrp(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register tmp_dst1, tmp_dst2;
+ struct tgsi_src_register tmp_src1, tmp_src2;
+ struct tgsi_src_register tmp_const;
+
+ get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
+ get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
+
+ get_immediate(ctx, &tmp_const, fui(1.0));
+
+ /* tmp1 = (a * b) */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ vectorize(ctx, instr, &tmp_dst1, 2,
+ &inst->Src[0].Register, 0,
+ &inst->Src[1].Register, 0);
+
+ /* tmp2 = (1 - a) */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ vectorize(ctx, instr, &tmp_dst2, 2,
+ &tmp_const, 0,
+ &inst->Src[0].Register, IR3_REG_NEGATE);
+
+ /* tmp2 = tmp2 * c */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ vectorize(ctx, instr, &tmp_dst2, 2,
+ &tmp_src2, 0,
+ &inst->Src[2].Register, 0);
- return coord;
+ /* dst = tmp1 + tmp2 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ vectorize(ctx, instr, &inst->Dst[0].Register, 2,
+ &tmp_src1, 0,
+ &tmp_src2, 0);
}
+/* FRC(x) = x - FLOOR(x) */
static void
-trans_samp(const struct instr_translater *t,
+trans_frac(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- struct tgsi_src_register *coord;
- struct tgsi_src_register *samp = &inst->Src[1].Register;
- const struct tex_info *tinf;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
- tinf = get_tex_info(ctx, inst);
- coord = get_tex_coord(ctx, inst, tinf);
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
- instr = instr_create(ctx, 5, t->opc);
- instr->cat5.type = get_ftype(ctx);
- instr->cat5.samp = samp->Index;
- instr->cat5.tex = samp->Index;
- instr->flags |= tinf->flags;
+ /* tmp = FLOOR(x) */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F);
+ vectorize(ctx, instr, &tmp_dst, 1,
+ &inst->Src[0].Register, 0);
- add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
- add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, tinf->src_wrmask);
+ /* dst = x - tmp */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ vectorize(ctx, instr, &inst->Dst[0].Register, 2,
+ &inst->Src[0].Register, 0,
+ &tmp_src, IR3_REG_NEGATE);
}
-/*
- * SEQ(a,b) = (a == b) ? 1.0 : 0.0
- * cmps.f.eq tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SNE(a,b) = (a != b) ? 1.0 : 0.0
- * cmps.f.ne tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SGE(a,b) = (a >= b) ? 1.0 : 0.0
- * cmps.f.ge tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SLE(a,b) = (a <= b) ? 1.0 : 0.0
- * cmps.f.le tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SGT(a,b) = (a > b) ? 1.0 : 0.0
- * cmps.f.gt tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * SLT(a,b) = (a < b) ? 1.0 : 0.0
- * cmps.f.lt tmp0, a, b
- * cov.u16f16 dst, tmp0
- *
- * CMP(a,b,c) = (a < 0.0) ? b : c
- * cmps.f.lt tmp0, a, {0.0}
- * sel.b16 dst, b, tmp0, c
- */
+/* POW(a,b) = EXP2(b * LOG2(a)) */
static void
-trans_cmp(const struct instr_translater *t,
+trans_pow(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
struct ir3_instruction *instr;
+ struct ir3_register *r;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- struct tgsi_src_register constval0;
- /* final instruction for CMP() uses orig src1 and src2: */
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *a0, *a1, *a2;
- unsigned condition;
-
- tmp_src = get_internal_temp(ctx, &tmp_dst);
+ struct tgsi_src_register tmp_src;
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
- a0 = &inst->Src[0].Register; /* a */
- a1 = &inst->Src[1].Register; /* b */
+ get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_FSEQ:
- condition = IR3_COND_EQ;
- break;
- case TGSI_OPCODE_SNE:
- case TGSI_OPCODE_FSNE:
- condition = IR3_COND_NE;
- break;
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_FSGE:
- condition = IR3_COND_GE;
- break;
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_FSLT:
- condition = IR3_COND_LT;
- break;
- case TGSI_OPCODE_SLE:
- condition = IR3_COND_LE;
- break;
- case TGSI_OPCODE_SGT:
- condition = IR3_COND_GT;
- break;
- case TGSI_OPCODE_CMP:
- get_immediate(ctx, &constval0, fui(0.0));
- a0 = &inst->Src[0].Register; /* a */
- a1 = &constval0; /* {0.0} */
- condition = IR3_COND_LT;
- break;
- default:
- compile_assert(ctx, 0);
- return;
- }
-
- if (is_const(a0) && is_const(a1))
- a0 = get_unconst(ctx, a0);
+ /* log2 Rtmp, Rsrc0 */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+ instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
+ r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, src0, src0->SwizzleX);
+ regmask_set(ctx->needs_ss, r);
- /* cmps.f.<cond> tmp, a0, a1 */
- instr = instr_create(ctx, 2, OPC_CMPS_F);
- instr->cat2.condition = condition;
- vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
+ /* mul.f Rtmp, Rtmp, Rsrc1 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, src1, src1->SwizzleX);
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_FSEQ:
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_FSGE:
- case TGSI_OPCODE_SLE:
- case TGSI_OPCODE_SNE:
- case TGSI_OPCODE_FSNE:
- case TGSI_OPCODE_SGT:
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_FSLT:
- /* cov.u16f16 dst, tmp0 */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = get_utype(ctx);
- instr->cat1.dst_type = get_ftype(ctx);
- vectorize(ctx, instr, dst, 1, tmp_src, 0);
- break;
- case TGSI_OPCODE_CMP:
- a1 = &inst->Src[1].Register;
- a2 = &inst->Src[2].Register;
- /* sel.{b32,b16} dst, src2, tmp, src1 */
- instr = instr_create(ctx, 3, OPC_SEL_B32);
- vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
+ /* blob compiler seems to ensure there are at least 6 instructions
+ * between a "simple" (non-cat4) instruction and a dependent cat4..
+ * probably we need to handle this in some other places too.
+ */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
- break;
- }
+ /* exp2 Rdst, Rtmp */
+ instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
+ r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, &tmp_src, 0);
+ regmask_set(ctx->needs_ss, r);
- put_dst(ctx, inst, dst);
+ create_mov(ctx, dst, &tmp_src);
}
-/*
- * USNE(a,b) = (a != b) ? 1 : 0
- * cmps.u32.ne dst, a, b
- *
- * USEQ(a,b) = (a == b) ? 1 : 0
- * cmps.u32.eq dst, a, b
- *
- * ISGE(a,b) = (a > b) ? 1 : 0
- * cmps.s32.ge dst, a, b
- *
- * USGE(a,b) = (a > b) ? 1 : 0
- * cmps.u32.ge dst, a, b
- *
- * ISLT(a,b) = (a < b) ? 1 : 0
- * cmps.s32.lt dst, a, b
- *
- * USLT(a,b) = (a < b) ? 1 : 0
- * cmps.u32.lt dst, a, b
- *
- * UCMP(a,b,c) = (a < 0) ? b : c
- * cmps.u32.lt tmp0, a, {0}
- * sel.b16 dst, b, tmp0, c
- */
+/* texture fetch/sample instructions: */
static void
-trans_icmp(const struct instr_translater *t,
+trans_samp(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
+ struct ir3_register *r;
struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register constval0;
- struct tgsi_src_register *a0, *a1, *a2;
- unsigned condition;
-
- a0 = &inst->Src[0].Register; /* a */
- a1 = &inst->Src[1].Register; /* b */
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *coord = &inst->Src[0].Register;
+ struct tgsi_src_register *samp = &inst->Src[1].Register;
+ unsigned tex = inst->Texture.Texture;
+ int8_t *order;
+ unsigned i, j, flags = 0;
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_USNE:
- condition = IR3_COND_NE;
- break;
- case TGSI_OPCODE_USEQ:
- condition = IR3_COND_EQ;
- break;
- case TGSI_OPCODE_ISGE:
- case TGSI_OPCODE_USGE:
- condition = IR3_COND_GE;
- break;
- case TGSI_OPCODE_ISLT:
- case TGSI_OPCODE_USLT:
- condition = IR3_COND_LT;
+ switch (t->arg) {
+ case TGSI_OPCODE_TEX:
+ order = (tex == TGSI_TEXTURE_2D) ?
+ (int8_t[4]){ 0, 1, -1, -1 } : /* 2D */
+ (int8_t[4]){ 0, 1, 2, -1 }; /* 3D */
break;
- case TGSI_OPCODE_UCMP:
- get_immediate(ctx, &constval0, 0);
- a0 = &inst->Src[0].Register; /* a */
- a1 = &constval0; /* {0} */
- condition = IR3_COND_LT;
+ case TGSI_OPCODE_TXP:
+ order = (tex == TGSI_TEXTURE_2D) ?
+ (int8_t[4]){ 0, 1, 3, -1 } : /* 2D */
+ (int8_t[4]){ 0, 1, 2, 3 }; /* 3D */
+ flags |= IR3_INSTR_P;
break;
-
default:
- compile_assert(ctx, 0);
- return;
+ assert(0);
+ break;
}
- if (is_const(a0) && is_const(a1))
- a0 = get_unconst(ctx, a0);
-
- if (t->tgsi_opc == TGSI_OPCODE_UCMP) {
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- tmp_src = get_internal_temp(ctx, &tmp_dst);
- /* cmps.u32.lt tmp, a0, a1 */
- instr = instr_create(ctx, 2, t->opc);
- instr->cat2.condition = condition;
- vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
-
- a1 = &inst->Src[1].Register;
- a2 = &inst->Src[2].Register;
- /* sel.{b32,b16} dst, src2, tmp, src1 */
- instr = instr_create(ctx, 3, OPC_SEL_B32);
- vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
- } else {
- /* cmps.{u32,s32}.<cond> dst, a0, a1 */
- instr = instr_create(ctx, 2, t->opc);
- instr->cat2.condition = condition;
- vectorize(ctx, instr, dst, 2, a0, 0, a1, 0);
- }
- put_dst(ctx, inst, dst);
-}
+ if (tex == TGSI_TEXTURE_3D)
+ flags |= IR3_INSTR_3D;
-/*
- * Conditional / Flow control
- */
+ /* The texture sample instructions need to coord in successive
+ * registers/components (ie. src.xy but not src.yx). And TXP
+ * needs the .w component in .z for 2D.. so in some cases we
+ * might need to emit some mov instructions to shuffle things
+ * around:
+ */
+ for (i = 1; (i < 4) && (order[i] >= 0); i++) {
+ if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) {
+ type_t type_mov = get_type(ctx);
-static void
-push_branch(struct fd3_compile_context *ctx, bool inv,
- struct ir3_instruction *instr, struct ir3_instruction *cond)
-{
- unsigned int idx = ctx->branch_count++;
- compile_assert(ctx, idx < ARRAY_SIZE(ctx->branch));
- ctx->branch[idx].instr = instr;
- ctx->branch[idx].inv = inv;
- /* else side of branch has same condition: */
- if (!inv)
- ctx->branch[idx].cond = cond;
-}
+ /* need to move things around: */
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
-static struct ir3_instruction *
-pop_branch(struct fd3_compile_context *ctx)
-{
- unsigned int idx = --ctx->branch_count;
- return ctx->branch[idx].instr;
-}
+ for (j = 0; (j < 4) && (order[j] >= 0); j++) {
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = type_mov;
+ instr->cat1.dst_type = type_mov;
+ add_dst_reg(ctx, instr, &tmp_dst, j);
+ add_src_reg(ctx, instr, coord,
+ src_swiz(coord, order[j]));
+ }
-static void
-trans_if(const struct instr_translater *t,
- struct fd3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
-{
- struct ir3_instruction *instr, *cond;
- struct tgsi_src_register *src = &inst->Src[0].Register;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src;
- struct tgsi_src_register constval;
+ coord = &tmp_src;
- get_immediate(ctx, &constval, fui(0.0));
- tmp_src = get_internal_temp(ctx, &tmp_dst);
+ if (j < 4)
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
- if (is_const(src))
- src = get_unconst(ctx, src);
+ break;
+ }
+ }
- /* cmps.f.ne tmp0, b, {0.0} */
- instr = instr_create(ctx, 2, OPC_CMPS_F);
- add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, src, src->SwizzleX);
- add_src_reg(ctx, instr, &constval, constval.SwizzleX);
- instr->cat2.condition = IR3_COND_NE;
+ instr = ir3_instr_create(ctx->ir, 5, t->opc);
+ instr->cat5.type = get_type(ctx);
+ instr->cat5.samp = samp->Index;
+ instr->cat5.tex = samp->Index;
+ instr->flags |= flags;
- compile_assert(ctx, instr->regs[1]->flags & IR3_REG_SSA); /* because get_unconst() */
- cond = instr->regs[1]->instr;
+ r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
+ r->wrmask = inst->Dst[0].Register.WriteMask;
- /* meta:flow tmp0 */
- instr = instr_create(ctx, -1, OPC_META_FLOW);
- ir3_reg_create(instr, 0, 0); /* dummy dst */
- add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
+ add_src_reg(ctx, instr, coord, coord->SwizzleX);
- push_branch(ctx, false, instr, cond);
- instr->flow.if_block = push_block(ctx);
+ regmask_set(ctx->needs_sy, r);
}
+/* CMP(a,b,c) = (a < 0) ? b : c */
static void
-trans_else(const struct instr_translater *t,
+trans_cmp(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
struct ir3_instruction *instr;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+ struct tgsi_src_register constval;
+ /* final instruction uses original src1 and src2, so we need get_dst() */
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
- pop_block(ctx);
+ /* cmps.f.ge tmp, src0, 0.0 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+ instr->cat2.condition = IR3_COND_GE;
+ get_immediate(ctx, &constval, fui(0.0));
+ vectorize(ctx, instr, &tmp_dst, 2,
+ &inst->Src[0].Register, 0,
+ &constval, 0);
- instr = pop_branch(ctx);
+ /* add.s tmp, tmp, -1 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ instr->repeat = 3;
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, &tmp_src, 0);
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
- compile_assert(ctx, (instr->category == -1) &&
- (instr->opc == OPC_META_FLOW));
+ /* sel.{f32,f16} dst, src2, tmp, src1 */
+ instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
+ OPC_SEL_F16 : OPC_SEL_F32);
+ vectorize(ctx, instr, &inst->Dst[0].Register, 3,
+ &inst->Src[2].Register, 0,
+ &tmp_src, 0,
+ &inst->Src[1].Register, 0);
- push_branch(ctx, true, instr, NULL);
- instr->flow.else_block = push_block(ctx);
+ put_dst(ctx, inst, dst);
}
-static struct ir3_instruction *
-find_temporary(struct ir3_block *block, unsigned n)
-{
- if (block->parent && !block->temporaries[n])
- return find_temporary(block->parent, n);
- return block->temporaries[n];
-}
+/*
+ * Conditional / Flow control
+ */
-static struct ir3_instruction *
-find_output(struct ir3_block *block, unsigned n)
+static unsigned
+find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
{
- if (block->parent && !block->outputs[n])
- return find_output(block->parent, n);
- return block->outputs[n];
+ unsigned i;
+ for (i = 0; i < ctx->ir->instrs_count; i++)
+ if (ctx->ir->instrs[i] == instr)
+ return i;
+ return ~0;
}
-static struct ir3_instruction *
-create_phi(struct fd3_compile_context *ctx, struct ir3_instruction *cond,
- struct ir3_instruction *a, struct ir3_instruction *b)
+static void
+push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
{
- struct ir3_instruction *phi;
-
- compile_assert(ctx, cond);
-
- /* Either side of the condition could be null.. which
- * indicates a variable written on only one side of the
- * branch. Normally this should only be variables not
- * used outside of that side of the branch. So we could
- * just 'return a ? a : b;' in that case. But for better
- * defined undefined behavior we just stick in imm{0.0}.
- * In the common case of a value only used within the
- * one side of the branch, the PHI instruction will not
- * get scheduled
- */
- if (!a)
- a = create_immed(ctx, 0.0);
- if (!b)
- b = create_immed(ctx, 0.0);
-
- phi = instr_create(ctx, -1, OPC_META_PHI);
- ir3_reg_create(phi, 0, 0); /* dummy dst */
- ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = cond;
- ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = a;
- ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = b;
-
- return phi;
+ ctx->branch[ctx->branch_count++] = instr;
}
static void
-trans_endif(const struct instr_translater *t,
- struct fd3_compile_context *ctx,
- struct tgsi_full_instruction *inst)
+pop_branch(struct fd3_compile_context *ctx)
{
struct ir3_instruction *instr;
- struct ir3_block *ifb, *elseb;
- struct ir3_instruction **ifout, **elseout;
- unsigned i, ifnout = 0, elsenout = 0;
-
- pop_block(ctx);
-
- instr = pop_branch(ctx);
- compile_assert(ctx, (instr->category == -1) &&
- (instr->opc == OPC_META_FLOW));
-
- ifb = instr->flow.if_block;
- elseb = instr->flow.else_block;
- /* if there is no else block, the parent block is used for the
- * branch-not-taken src of the PHI instructions:
+ /* if we were clever enough, we'd patch this up after the fact,
+ * and set (jp) flag on whatever the next instruction was, rather
+ * than inserting an extra nop..
*/
- if (!elseb)
- elseb = ifb->parent;
-
- /* worst case sizes: */
- ifnout = ifb->ntemporaries + ifb->noutputs;
- elsenout = elseb->ntemporaries + elseb->noutputs;
-
- ifout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * ifnout);
- if (elseb != ifb->parent)
- elseout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * elsenout);
-
- ifnout = 0;
- elsenout = 0;
-
- /* generate PHI instructions for any temporaries written: */
- for (i = 0; i < ifb->ntemporaries; i++) {
- struct ir3_instruction *a = ifb->temporaries[i];
- struct ir3_instruction *b = elseb->temporaries[i];
+ instr = ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ instr->flags |= IR3_INSTR_JP;
- /* if temporary written in if-block, or if else block
- * is present and temporary written in else-block:
- */
- if (a || ((elseb != ifb->parent) && b)) {
- struct ir3_instruction *phi;
-
- /* if only written on one side, find the closest
- * enclosing update on other side:
- */
- if (!a)
- a = find_temporary(ifb, i);
- if (!b)
- b = find_temporary(elseb, i);
-
- ifout[ifnout] = a;
- a = create_output(ifb, a, ifnout++);
-
- if (elseb != ifb->parent) {
- elseout[elsenout] = b;
- b = create_output(elseb, b, elsenout++);
- }
-
- phi = create_phi(ctx, instr, a, b);
- ctx->block->temporaries[i] = phi;
- }
- }
-
- compile_assert(ctx, ifb->noutputs == elseb->noutputs);
-
- /* .. and any outputs written: */
- for (i = 0; i < ifb->noutputs; i++) {
- struct ir3_instruction *a = ifb->outputs[i];
- struct ir3_instruction *b = elseb->outputs[i];
-
- /* if output written in if-block, or if else block
- * is present and output written in else-block:
- */
- if (a || ((elseb != ifb->parent) && b)) {
- struct ir3_instruction *phi;
-
- /* if only written on one side, find the closest
- * enclosing update on other side:
- */
- if (!a)
- a = find_output(ifb, i);
- if (!b)
- b = find_output(elseb, i);
-
- ifout[ifnout] = a;
- a = create_output(ifb, a, ifnout++);
-
- if (elseb != ifb->parent) {
- elseout[elsenout] = b;
- b = create_output(elseb, b, elsenout++);
- }
-
- phi = create_phi(ctx, instr, a, b);
- ctx->block->outputs[i] = phi;
- }
- }
-
- ifb->noutputs = ifnout;
- ifb->outputs = ifout;
-
- if (elseb != ifb->parent) {
- elseb->noutputs = elsenout;
- elseb->outputs = elseout;
- }
-
- // TODO maybe we want to compact block->inputs?
+ /* pop the branch instruction from the stack and fix up branch target: */
+ instr = ctx->branch[--ctx->branch_count];
+ instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1;
}
-/*
- * Kill
+/* We probably don't really want to translate if/else/endif into branches..
+ * the blob driver evaluates both legs of the if and then uses the sel
+ * instruction to pick which sides of the branch to "keep".. but figuring
+ * that out will take somewhat more compiler smarts. So hopefully branches
+ * don't kill performance too badly.
*/
-
static void
-trans_kill(const struct instr_translater *t,
+trans_if(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
- struct ir3_instruction *instr, *immed, *cond = NULL;
- bool inv = false;
-
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_KILL:
- /* unconditional kill, use enclosing if condition: */
- if (ctx->branch_count > 0) {
- unsigned int idx = ctx->branch_count - 1;
- cond = ctx->branch[idx].cond;
- inv = ctx->branch[idx].inv;
- } else {
- cond = create_immed(ctx, 1.0);
- }
-
- break;
- }
-
- compile_assert(ctx, cond);
+ struct ir3_instruction *instr;
+ struct tgsi_src_register *src = &inst->Src[0].Register;
+ struct tgsi_src_register constval;
- immed = create_immed(ctx, 0.0);
+ get_immediate(ctx, &constval, fui(0.0));
- /* cmps.f.ne p0.x, cond, {0.0} */
- instr = instr_create(ctx, 2, OPC_CMPS_F);
- instr->cat2.condition = IR3_COND_NE;
+ instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
ir3_reg_create(instr, regid(REG_P0, 0), 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
- cond = instr;
-
- /* kill p0.x */
- instr = instr_create(ctx, 0, OPC_KILL);
- instr->cat0.inv = inv;
- ir3_reg_create(instr, 0, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
+ add_src_reg(ctx, instr, &constval, constval.SwizzleX);
+ add_src_reg(ctx, instr, src, src->SwizzleX);
+ instr->cat2.condition = IR3_COND_EQ;
- ctx->kill[ctx->kill_count++] = instr;
+ instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
+ push_branch(ctx, instr);
}
-/*
- * Kill-If
- */
-
static void
-trans_killif(const struct instr_translater *t,
+trans_else(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
- struct tgsi_src_register *src = &inst->Src[0].Register;
- struct ir3_instruction *instr, *immed, *cond = NULL;
- bool inv = false;
-
- immed = create_immed(ctx, 0.0);
-
- /* cmps.f.ne p0.x, cond, {0.0} */
- instr = instr_create(ctx, 2, OPC_CMPS_F);
- instr->cat2.condition = IR3_COND_NE;
- ir3_reg_create(instr, regid(REG_P0, 0), 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
- add_src_reg(ctx, instr, src, src->SwizzleX);
-
- cond = instr;
-
- /* kill p0.x */
- instr = instr_create(ctx, 0, OPC_KILL);
- instr->cat0.inv = inv;
- ir3_reg_create(instr, 0, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
+ struct ir3_instruction *instr;
- ctx->kill[ctx->kill_count++] = instr;
+ /* for first half of if/else/endif, generate a jump past the else: */
+ instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP);
+ pop_branch(ctx);
+ push_branch(ctx, instr);
}
-/*
- * I2F / U2F / F2I / F2U
- */
static void
-trans_cov(const struct instr_translater *t,
+trans_endif(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
- struct ir3_instruction *instr;
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src = &inst->Src[0].Register;
-
- // cov.f32s32 dst, tmp0 /
- instr = instr_create(ctx, 1, 0);
- switch (t->tgsi_opc) {
- case TGSI_OPCODE_U2F:
- instr->cat1.src_type = TYPE_U32;
- instr->cat1.dst_type = TYPE_F32;
- break;
- case TGSI_OPCODE_I2F:
- instr->cat1.src_type = TYPE_S32;
- instr->cat1.dst_type = TYPE_F32;
- break;
- case TGSI_OPCODE_F2U:
- instr->cat1.src_type = TYPE_F32;
- instr->cat1.dst_type = TYPE_U32;
- break;
- case TGSI_OPCODE_F2I:
- instr->cat1.src_type = TYPE_F32;
- instr->cat1.dst_type = TYPE_S32;
- break;
-
- }
- vectorize(ctx, instr, dst, 1, src, 0);
+ pop_branch(ctx);
}
/*
@@ -1861,7 +900,7 @@ instr_cat0(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
- instr_create(ctx, 0, t->opc);
+ ir3_instr_create(ctx->ir, 0, t->opc);
}
static void
@@ -1871,7 +910,26 @@ instr_cat1(const struct instr_translater *t,
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
struct tgsi_src_register *src = &inst->Src[0].Register;
- create_mov(ctx, dst, src);
+
+ /* mov instructions can't handle a negate on src: */
+ if (src->Negate) {
+ struct tgsi_src_register constval;
+ struct ir3_instruction *instr;
+
+ /* since right now, we are using uniformly either TYPE_F16 or
+ * TYPE_F32, and we don't utilize the conversion possibilities
+ * of mov instructions, we can get away with substituting an
+ * add.f which can handle negate. Might need to revisit this
+ * in the future if we start supporting widening/narrowing or
+ * conversion to/from integer..
+ */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ get_immediate(ctx, &constval, fui(0.0));
+ vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
+ } else {
+ create_mov(ctx, dst, src);
+ /* create_mov() generates vector sequence, so no vectorize() */
+ }
put_dst(ctx, inst, dst);
}
@@ -1881,20 +939,19 @@ instr_cat2(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src0 = &inst->Src[0].Register;
- struct tgsi_src_register *src1 = &inst->Src[1].Register;
struct ir3_instruction *instr;
- unsigned src0_flags = 0, src1_flags = 0;
+ unsigned src0_flags = 0;
+
+ instr = ir3_instr_create(ctx->ir, 2, t->opc);
switch (t->tgsi_opc) {
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ instr->cat2.condition = t->arg;
+ break;
case TGSI_OPCODE_ABS:
- case TGSI_OPCODE_IABS:
src0_flags = IR3_REG_ABS;
break;
- case TGSI_OPCODE_SUB:
- case TGSI_OPCODE_INEG:
- src1_flags = IR3_REG_NEGATE;
- break;
}
switch (t->opc) {
@@ -1913,16 +970,13 @@ instr_cat2(const struct instr_translater *t,
case OPC_SETRM:
case OPC_CBITS_B:
/* these only have one src reg */
- instr = instr_create(ctx, 2, t->opc);
- vectorize(ctx, instr, dst, 1, src0, src0_flags);
+ vectorize(ctx, instr, dst, 1,
+ &inst->Src[0].Register, src0_flags);
break;
default:
- if (is_const(src0) && is_const(src1))
- src0 = get_unconst(ctx, src0);
-
- instr = instr_create(ctx, 2, t->opc);
- vectorize(ctx, instr, dst, 2, src0, src0_flags,
- src1, src1_flags);
+ vectorize(ctx, instr, dst, 2,
+ &inst->Src[0].Register, src0_flags,
+ &inst->Src[1].Register, 0);
break;
}
@@ -1935,26 +989,29 @@ instr_cat3(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src0 = &inst->Src[0].Register;
struct tgsi_src_register *src1 = &inst->Src[1].Register;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
struct ir3_instruction *instr;
- /* in particular, can't handle const for src1 for cat3..
- * for mad, we can swap first two src's if needed:
+ /* Blob compiler never seems to use a const in src1 position..
+ * although there does seem (according to disassembler hidden
+ * in libllvm-a3xx.so) to be a bit to indicate that src1 is a
+ * const. Not sure if this is a hw bug, or simply that the
+ * disassembler lies.
*/
- if (is_rel_or_const(src1)) {
- if (is_mad(t->opc) && !is_rel_or_const(src0)) {
- struct tgsi_src_register *tmp;
- tmp = src0;
- src0 = src1;
- src1 = tmp;
- } else {
- src1 = get_unconst(ctx, src1);
- }
+ if ((src1->File == TGSI_FILE_CONSTANT) ||
+ (src1->File == TGSI_FILE_IMMEDIATE)) {
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
+ create_mov(ctx, &tmp_dst, src1);
+ src1 = &tmp_src;
}
- instr = instr_create(ctx, 3, t->opc);
- vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? t->hopc : t->opc);
+ vectorize(ctx, instr, dst, 3,
+ &inst->Src[0].Register, 0,
+ src1, 0,
&inst->Src[2].Register, 0);
put_dst(ctx, inst, dst);
}
@@ -1965,22 +1022,15 @@ instr_cat4(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src = &inst->Src[0].Register;
struct ir3_instruction *instr;
- unsigned i;
- /* seems like blob compiler avoids const as src.. */
- if (is_const(src))
- src = get_unconst(ctx, src);
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+ instr = ir3_instr_create(ctx->ir, 4, t->opc);
- /* we need to replicate into each component: */
- for (i = 0; i < 4; i++) {
- if (dst->WriteMask & (1 << i)) {
- instr = instr_create(ctx, 4, t->opc);
- add_dst_reg(ctx, instr, dst, i);
- add_src_reg(ctx, instr, src, src->SwizzleX);
- }
- }
+ vectorize(ctx, instr, dst, 1,
+ &inst->Src[0].Register, 0);
+
+ regmask_set(ctx->needs_ss, instr->regs[0]);
put_dst(ctx, inst, dst);
}
@@ -1995,446 +1045,141 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),
INSTR(MUL, instr_cat2, .opc = OPC_MUL_F),
INSTR(ADD, instr_cat2, .opc = OPC_ADD_F),
- INSTR(SUB, instr_cat2, .opc = OPC_ADD_F),
+ INSTR(DP2, trans_dotp, .arg = 2),
+ INSTR(DP3, trans_dotp, .arg = 3),
+ INSTR(DP4, trans_dotp, .arg = 4),
+ INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */
INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
- INSTR(UADD, instr_cat2, .opc = OPC_ADD_U),
- INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S),
- INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U),
- INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S),
- INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U),
- INSTR(AND, instr_cat2, .opc = OPC_AND_B),
- INSTR(OR, instr_cat2, .opc = OPC_OR_B),
- INSTR(NOT, instr_cat2, .opc = OPC_NOT_B),
- INSTR(XOR, instr_cat2, .opc = OPC_XOR_B),
- INSTR(UMUL, instr_cat2, .opc = OPC_MUL_U),
- INSTR(SHL, instr_cat2, .opc = OPC_SHL_B),
- INSTR(USHR, instr_cat2, .opc = OPC_SHR_B),
- INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B),
- INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S),
- INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S),
- INSTR(AND, instr_cat2, .opc = OPC_AND_B),
+ INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
+ INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
- INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
- INSTR(CLAMP, trans_clamp),
+ INSTR(LRP, trans_lrp),
+ INSTR(FRC, trans_frac),
INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
- INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F),
- INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F),
- INSTR(CEIL, instr_cat2, .opc = OPC_CEIL_F),
- INSTR(ARL, trans_arl),
INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
+ INSTR(POW, trans_pow),
INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F),
- INSTR(COS, instr_cat4, .opc = OPC_COS),
- INSTR(SIN, instr_cat4, .opc = OPC_SIN),
+ INSTR(COS, instr_cat4, .opc = OPC_SIN),
+ INSTR(SIN, instr_cat4, .opc = OPC_COS),
INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
- INSTR(SGT, trans_cmp),
- INSTR(SLT, trans_cmp),
- INSTR(FSLT, trans_cmp),
- INSTR(SGE, trans_cmp),
- INSTR(FSGE, trans_cmp),
- INSTR(SLE, trans_cmp),
- INSTR(SNE, trans_cmp),
- INSTR(FSNE, trans_cmp),
- INSTR(SEQ, trans_cmp),
- INSTR(FSEQ, trans_cmp),
INSTR(CMP, trans_cmp),
- INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U),
- INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U),
- INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S),
- INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U),
- INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S),
- INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U),
- INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U),
INSTR(IF, trans_if),
- INSTR(UIF, trans_if),
INSTR(ELSE, trans_else),
INSTR(ENDIF, trans_endif),
INSTR(END, instr_cat0, .opc = OPC_END),
- INSTR(KILL, trans_kill, .opc = OPC_KILL),
- INSTR(KILL_IF, trans_killif, .opc = OPC_KILL),
- INSTR(I2F, trans_cov),
- INSTR(U2F, trans_cov),
- INSTR(F2I, trans_cov),
- INSTR(F2U, trans_cov),
};
-static fd3_semantic
-decl_semantic(const struct tgsi_declaration_semantic *sem)
-{
- return fd3_semantic_name(sem->Name, sem->Index);
-}
-
-static struct ir3_instruction *
-decl_in_frag_bary(struct fd3_compile_context *ctx, unsigned regid,
- unsigned j, unsigned inloc)
-{
- struct ir3_instruction *instr;
- struct ir3_register *src;
-
- /* bary.f dst, #inloc, r0.x */
- instr = instr_create(ctx, 2, OPC_BARY_F);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
- src = ir3_reg_create(instr, 0, IR3_REG_SSA);
- src->wrmask = 0x3;
- src->instr = ctx->frag_pos;
-
- return instr;
-}
-
-/* TGSI_SEMANTIC_POSITION
- * """"""""""""""""""""""
- *
- * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
- * fragment shader input contains the fragment's window position. The X
- * component starts at zero and always increases from left to right.
- * The Y component starts at zero and always increases but Y=0 may either
- * indicate the top of the window or the bottom depending on the fragment
- * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN).
- * The Z coordinate ranges from 0 to 1 to represent depth from the front
- * to the back of the Z buffer. The W component contains the reciprocol
- * of the interpolated vertex position W component.
- */
-static struct ir3_instruction *
-decl_in_frag_coord(struct fd3_compile_context *ctx, unsigned regid,
- unsigned j)
-{
- struct ir3_instruction *instr, *src;
-
- compile_assert(ctx, !ctx->frag_coord[j]);
-
- ctx->frag_coord[j] = create_input(ctx->block, NULL, 0);
-
-
- switch (j) {
- case 0: /* .x */
- case 1: /* .y */
- /* for frag_coord, we get unsigned values.. we need
- * to subtract (integer) 8 and divide by 16 (right-
- * shift by 4) then convert to float:
- */
-
- /* add.s tmp, src, -8 */
- instr = instr_create(ctx, 2, OPC_ADD_S);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j];
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8;
- src = instr;
-
- /* shr.b tmp, tmp, 4 */
- instr = instr_create(ctx, 2, OPC_SHR_B);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
- src = instr;
-
- /* mov.u32f32 dst, tmp */
- instr = instr_create(ctx, 1, 0);
- instr->cat1.src_type = TYPE_U32;
- instr->cat1.dst_type = TYPE_F32;
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
-
- break;
- case 2: /* .z */
- case 3: /* .w */
- /* seems that we can use these as-is: */
- instr = ctx->frag_coord[j];
- break;
- default:
- compile_error(ctx, "invalid channel\n");
- instr = create_immed(ctx, 0.0);
- break;
- }
-
- return instr;
-}
-
-/* TGSI_SEMANTIC_FACE
- * """"""""""""""""""
- *
- * This label applies to fragment shader inputs only and indicates that
- * the register contains front/back-face information of the form (F, 0,
- * 0, 1). The first component will be positive when the fragment belongs
- * to a front-facing polygon, and negative when the fragment belongs to a
- * back-facing polygon.
- */
-static struct ir3_instruction *
-decl_in_frag_face(struct fd3_compile_context *ctx, unsigned regid,
- unsigned j)
-{
- struct ir3_instruction *instr, *src;
-
- switch (j) {
- case 0: /* .x */
- compile_assert(ctx, !ctx->frag_face);
-
- ctx->frag_face = create_input(ctx->block, NULL, 0);
-
- /* for faceness, we always get -1 or 0 (int).. but TGSI expects
- * positive vs negative float.. and piglit further seems to
- * expect -1.0 or 1.0:
- *
- * mul.s tmp, hr0.x, 2
- * add.s tmp, tmp, 1
- * mov.s16f32, dst, tmp
- *
- */
-
- instr = instr_create(ctx, 2, OPC_MUL_S);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
- src = instr;
-
- instr = instr_create(ctx, 2, OPC_ADD_S);
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
- src = instr;
-
- instr = instr_create(ctx, 1, 0); /* mov */
- instr->cat1.src_type = TYPE_S32;
- instr->cat1.dst_type = TYPE_F32;
- ir3_reg_create(instr, regid, 0); /* dummy dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
-
- break;
- case 1: /* .y */
- case 2: /* .z */
- instr = create_immed(ctx, 0.0);
- break;
- case 3: /* .w */
- instr = create_immed(ctx, 1.0);
- break;
- default:
- compile_error(ctx, "invalid channel\n");
- instr = create_immed(ctx, 0.0);
- break;
- }
-
- return instr;
-}
-
-static void
+static int
decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_variant *so = ctx->so;
- unsigned name = decl->Semantic.Name;
- unsigned i;
+ struct fd3_shader_stateobj *so = ctx->so;
+ unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
+ unsigned i, flags = 0;
+ int nop = 0;
- /* I don't think we should get frag shader input without
- * semantic info? Otherwise how do inputs get linked to
- * vert outputs?
- */
- compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
- decl->Declaration.Semantic);
+ if (ctx->so->half_precision)
+ flags |= IR3_REG_HALF;
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
unsigned n = so->inputs_count++;
- unsigned r = regid(i, 0);
- unsigned ncomp, j;
+ unsigned r = regid(i + base, 0);
+ unsigned ncomp;
- /* we'll figure out the actual components used after scheduling */
+ /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
ncomp = 4;
- DBG("decl in -> r%d", i);
+ DBG("decl in -> r%d", i + base); // XXX
- compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
-
- so->inputs[n].semantic = decl_semantic(&decl->Semantic);
so->inputs[n].compmask = (1 << ncomp) - 1;
so->inputs[n].regid = r;
so->inputs[n].inloc = ctx->next_inloc;
+ ctx->next_inloc += ncomp;
- for (j = 0; j < ncomp; j++) {
- struct ir3_instruction *instr = NULL;
-
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- /* for fragment shaders, POSITION and FACE are handled
- * specially, not using normal varying / bary.f
- */
- if (name == TGSI_SEMANTIC_POSITION) {
- so->inputs[n].bary = false;
- so->frag_coord = true;
- instr = decl_in_frag_coord(ctx, r + j, j);
- } else if (name == TGSI_SEMANTIC_FACE) {
- so->inputs[n].bary = false;
- so->frag_face = true;
- instr = decl_in_frag_face(ctx, r + j, j);
- } else {
- so->inputs[n].bary = true;
- instr = decl_in_frag_bary(ctx, r + j, j,
- so->inputs[n].inloc + j - 8);
- }
- } else {
- instr = create_input(ctx->block, NULL, (i * 4) + j);
- }
+ so->total_in += ncomp;
- ctx->block->inputs[(i * 4) + j] = instr;
- }
+ /* for frag shaders, we need to generate the corresponding bary instr: */
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ struct ir3_instruction *instr;
- if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) {
- ctx->next_inloc += ncomp;
- so->total_in += ncomp;
+ instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F);
+ instr->repeat = ncomp - 1;
+
+ /* dst register: */
+ ctx->last_input = ir3_reg_create(instr, r, flags);
+
+ /* input position: */
+ ir3_reg_create(instr, 0, IR3_REG_IMMED | IR3_REG_R)->iim_val =
+ so->inputs[n].inloc - 8;
+
+ /* input base (always r0.x): */
+ ir3_reg_create(instr, regid(0,0), 0);
+
+ nop = 6;
}
}
+
+ return nop;
}
static void
decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_variant *so = ctx->so;
- unsigned comp = 0;
+ struct fd3_shader_stateobj *so = ctx->so;
+ unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
unsigned name = decl->Semantic.Name;
unsigned i;
- compile_assert(ctx, decl->Declaration.Semantic);
+ assert(decl->Declaration.Semantic); // TODO is this ever not true?
- DBG("decl out[%d] -> r%d", name, decl->Range.First);
+ DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX
if (ctx->type == TGSI_PROCESSOR_VERTEX) {
switch (name) {
case TGSI_SEMANTIC_POSITION:
- so->writes_pos = true;
+ so->pos_regid = regid(decl->Range.First + base, 0);
break;
case TGSI_SEMANTIC_PSIZE:
- so->writes_psize = true;
+ so->psize_regid = regid(decl->Range.First + base, 0);
break;
case TGSI_SEMANTIC_COLOR:
- case TGSI_SEMANTIC_BCOLOR:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_TEXCOORD:
+ for (i = decl->Range.First; i <= decl->Range.Last; i++)
+ so->outputs[so->outputs_count++].regid = regid(i + base, 0);
break;
default:
- compile_error(ctx, "unknown VS semantic name: %s\n",
+ DBG("unknown VS semantic name: %s",
tgsi_semantic_names[name]);
+ assert(0);
}
} else {
switch (name) {
- case TGSI_SEMANTIC_POSITION:
- comp = 2; /* tgsi will write to .z component */
- so->writes_pos = true;
- break;
case TGSI_SEMANTIC_COLOR:
+ so->color_regid = regid(decl->Range.First + base, 0);
break;
default:
- compile_error(ctx, "unknown FS semantic name: %s\n",
+ DBG("unknown VS semantic name: %s",
tgsi_semantic_names[name]);
+ assert(0);
}
}
-
- for (i = decl->Range.First; i <= decl->Range.Last; i++) {
- unsigned n = so->outputs_count++;
- unsigned ncomp, j;
-
- ncomp = 4;
-
- compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
-
- so->outputs[n].semantic = decl_semantic(&decl->Semantic);
- so->outputs[n].regid = regid(i, comp);
-
- /* avoid undefined outputs, stick a dummy mov from imm{0.0},
- * which if the output is actually assigned will be over-
- * written
- */
- for (j = 0; j < ncomp; j++)
- ctx->block->outputs[(i * 4) + j] = create_immed(ctx, 0.0);
- }
}
-/* from TGSI perspective, we actually have inputs. But most of the "inputs"
- * for a fragment shader are just bary.f instructions. The *actual* inputs
- * from the hw perspective are the frag_pos and optionally frag_coord and
- * frag_face.
- */
static void
-fixup_frag_inputs(struct fd3_compile_context *ctx)
+decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_variant *so = ctx->so;
- struct ir3_block *block = ctx->block;
- struct ir3_instruction **inputs;
- struct ir3_instruction *instr;
- int n, regid = 0;
-
- block->ninputs = 0;
-
- n = 4; /* always have frag_pos */
- n += COND(so->frag_face, 4);
- n += COND(so->frag_coord, 4);
-
- inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
-
- if (so->frag_face) {
- /* this ultimately gets assigned to hr0.x so doesn't conflict
- * with frag_coord/frag_pos..
- */
- inputs[block->ninputs++] = ctx->frag_face;
- ctx->frag_face->regs[0]->num = 0;
-
- /* remaining channels not used, but let's avoid confusing
- * other parts that expect inputs to come in groups of vec4
- */
- inputs[block->ninputs++] = NULL;
- inputs[block->ninputs++] = NULL;
- inputs[block->ninputs++] = NULL;
- }
-
- /* since we don't know where to set the regid for frag_coord,
- * we have to use r0.x for it. But we don't want to *always*
- * use r1.x for frag_pos as that could increase the register
- * footprint on simple shaders:
- */
- if (so->frag_coord) {
- ctx->frag_coord[0]->regs[0]->num = regid++;
- ctx->frag_coord[1]->regs[0]->num = regid++;
- ctx->frag_coord[2]->regs[0]->num = regid++;
- ctx->frag_coord[3]->regs[0]->num = regid++;
-
- inputs[block->ninputs++] = ctx->frag_coord[0];
- inputs[block->ninputs++] = ctx->frag_coord[1];
- inputs[block->ninputs++] = ctx->frag_coord[2];
- inputs[block->ninputs++] = ctx->frag_coord[3];
- }
-
- /* we always have frag_pos: */
- so->pos_regid = regid;
-
- /* r0.x */
- instr = create_input(block, NULL, block->ninputs);
- instr->regs[0]->num = regid++;
- inputs[block->ninputs++] = instr;
- ctx->frag_pos->regs[1]->instr = instr;
-
- /* r0.y */
- instr = create_input(block, NULL, block->ninputs);
- instr->regs[0]->num = regid++;
- inputs[block->ninputs++] = instr;
- ctx->frag_pos->regs[2]->instr = instr;
-
- block->inputs = inputs;
+ ctx->so->samplers_count++;
}
static void
compile_instructions(struct fd3_compile_context *ctx)
{
- push_block(ctx);
-
- /* for fragment shader, we have a single input register (usually
- * r0.xy) which is used as the base for bary.f varying fetch instrs:
- */
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- struct ir3_instruction *instr;
- instr = ir3_instr_create(ctx->block, -1, OPC_META_FI);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */
- ctx->frag_pos = instr;
- }
+ struct ir3_shader *ir = ctx->ir;
+ int nop = 0;
while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
tgsi_parse_token(&ctx->parser);
@@ -2446,7 +1191,9 @@ compile_instructions(struct fd3_compile_context *ctx)
if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
decl_out(ctx, decl);
} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
- decl_in(ctx, decl);
+ nop = decl_in(ctx, decl);
+ } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
+ decl_samp(ctx, decl);
}
break;
}
@@ -2458,7 +1205,6 @@ compile_instructions(struct fd3_compile_context *ctx)
struct tgsi_full_immediate *imm =
&ctx->parser.FullToken.FullImmediate;
unsigned n = ctx->so->immediates_count++;
- compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates));
memcpy(ctx->so->immediates[n].val, imm->u, 16);
break;
}
@@ -2468,196 +1214,55 @@ compile_instructions(struct fd3_compile_context *ctx)
unsigned opc = inst->Instruction.Opcode;
const struct instr_translater *t = &translaters[opc];
+ if (nop) {
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = nop - 1;
+ nop = 0;
+ }
+
if (t->fxn) {
t->fxn(t, ctx, inst);
ctx->num_internal_temps = 0;
} else {
- compile_error(ctx, "unknown TGSI opc: %s\n",
+ debug_printf("unknown TGSI opc: %s\n",
tgsi_get_opcode_name(opc));
+ tgsi_dump(ctx->tokens, 0);
+ assert(0);
}
- switch (inst->Instruction.Saturate) {
- case TGSI_SAT_ZERO_ONE:
- create_clamp_imm(ctx, &inst->Dst[0].Register,
- fui(0.0), fui(1.0));
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- create_clamp_imm(ctx, &inst->Dst[0].Register,
- fui(-1.0), fui(1.0));
- break;
- }
-
- instr_finish(ctx);
-
break;
}
default:
break;
}
}
-}
-static void
-compile_dump(struct fd3_compile_context *ctx)
-{
- const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag";
- static unsigned n = 0;
- char fname[16];
- FILE *f;
- snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++);
- f = fopen(fname, "w");
- if (!f)
- return;
- ir3_block_depth(ctx->block);
- ir3_shader_dump(ctx->ir, name, ctx->block, f);
- fclose(f);
+ if (ir->instrs_count > 0)
+ ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+
+ if (ctx->last_input)
+ ctx->last_input->flags |= IR3_REG_EI;
}
int
-fd3_compile_shader(struct fd3_shader_variant *so,
- const struct tgsi_token *tokens, struct fd3_shader_key key)
+fd3_compile_shader(struct fd3_shader_stateobj *so,
+ const struct tgsi_token *tokens)
{
struct fd3_compile_context ctx;
- struct ir3_block *block;
- struct ir3_instruction **inputs;
- unsigned i, j, actual_in;
- int ret = 0;
assert(!so->ir);
so->ir = ir3_shader_create();
- assert(so->ir);
+ so->color_regid = regid(63,0);
+ so->pos_regid = regid(63,0);
+ so->psize_regid = regid(63,0);
- if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) {
- ret = -1;
- goto out;
- }
+ if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK)
+ return -1;
compile_instructions(&ctx);
- block = ctx.block;
-
- /* keep track of the inputs from TGSI perspective.. */
- inputs = block->inputs;
-
- /* but fixup actual inputs for frag shader: */
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
- fixup_frag_inputs(&ctx);
-
- /* at this point, for binning pass, throw away unneeded outputs: */
- if (key.binning_pass) {
- for (i = 0, j = 0; i < so->outputs_count; i++) {
- unsigned name = sem2name(so->outputs[i].semantic);
- unsigned idx = sem2name(so->outputs[i].semantic);
-
- /* throw away everything but first position/psize */
- if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
- (name == TGSI_SEMANTIC_PSIZE))) {
- if (i != j) {
- so->outputs[j] = so->outputs[i];
- block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
- block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
- block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
- block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
- }
- j++;
- }
- }
- so->outputs_count = j;
- block->noutputs = j * 4;
- }
-
- /* at this point, we want the kill's in the outputs array too,
- * so that they get scheduled (since they have no dst).. we've
- * already ensured that the array is big enough in push_block():
- */
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
- for (i = 0; i < ctx.kill_count; i++)
- block->outputs[block->noutputs++] = ctx.kill[i];
- }
-
- if (fd_mesa_debug & FD_DBG_OPTDUMP)
- compile_dump(&ctx);
-
- ret = ir3_block_flatten(block);
- if (ret < 0)
- goto out;
- if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP))
- compile_dump(&ctx);
-
- ir3_block_cp(block);
-
- if (fd_mesa_debug & FD_DBG_OPTDUMP)
- compile_dump(&ctx);
-
- ir3_block_depth(block);
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("AFTER DEPTH:\n");
- ir3_dump_instr_list(block->head);
- }
-
- ir3_block_sched(block);
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("AFTER SCHED:\n");
- ir3_dump_instr_list(block->head);
- }
-
- ret = ir3_block_ra(block, so->type, key.half_precision,
- so->frag_coord, so->frag_face, &so->has_samp);
- if (ret)
- goto out;
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("AFTER RA:\n");
- ir3_dump_instr_list(block->head);
- }
-
- /* fixup input/outputs: */
- for (i = 0; i < so->outputs_count; i++) {
- so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
- /* preserve hack for depth output.. tgsi writes depth to .z,
- * but what we give the hw is the scalar register:
- */
- if ((ctx.type == TGSI_PROCESSOR_FRAGMENT) &&
- (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION))
- so->outputs[i].regid += 2;
- }
- /* Note that some or all channels of an input may be unused: */
- actual_in = 0;
- for (i = 0; i < so->inputs_count; i++) {
- unsigned j, regid = ~0, compmask = 0;
- so->inputs[i].ncomp = 0;
- for (j = 0; j < 4; j++) {
- struct ir3_instruction *in = inputs[(i*4) + j];
- if (in) {
- compmask |= (1 << j);
- regid = in->regs[0]->num - j;
- actual_in++;
- so->inputs[i].ncomp++;
- }
- }
- so->inputs[i].regid = regid;
- so->inputs[i].compmask = compmask;
- }
-
- /* fragment shader always gets full vec4's even if it doesn't
- * fetch all components, but vertex shader we need to update
- * with the actual number of components fetch, otherwise thing
- * will hang due to mismaptch between VFD_DECODE's and
- * TOTALATTRTOVS
- */
- if (so->type == SHADER_VERTEX)
- so->total_in = actual_in;
-
-out:
- if (ret) {
- ir3_shader_destroy(so->ir);
- so->ir = NULL;
- }
compile_free(&ctx);
- return ret;
+ return 0;
}
diff --git a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
index a53bb3ee9..1116f598a 100644
--- a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
+++ b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
@@ -32,12 +32,7 @@
#include "fd3_program.h"
#include "fd3_util.h"
-
-int fd3_compile_shader(struct fd3_shader_variant *so,
- const struct tgsi_token *tokens,
- struct fd3_shader_key key);
-int fd3_compile_shader_old(struct fd3_shader_variant *so,
- const struct tgsi_token *tokens,
- struct fd3_shader_key key);
+int fd3_compile_shader(struct fd3_shader_stateobj *so,
+ const struct tgsi_token *tokens);
#endif /* FD3_COMPILER_H_ */
diff --git a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
index a79998ef5..464a7e9d7 100644
--- a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
+++ b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
@@ -190,22 +190,6 @@ typedef enum {
OPC_LDC_4 = 30,
OPC_LDLV = 31,
- /* meta instructions (category -1): */
- /* placeholder instr to mark inputs/outputs: */
- OPC_META_INPUT = 0,
- OPC_META_OUTPUT = 1,
- /* The "fan-in" and "fan-out" instructions are used for keeping
- * track of instructions that write to multiple dst registers
- * (fan-out) like texture sample instructions, or read multiple
- * consecutive scalar registers (fan-in) (bary.f, texture samp)
- */
- OPC_META_FO = 2,
- OPC_META_FI = 3,
- /* branches/flow control */
- OPC_META_FLOW = 4,
- OPC_META_PHI = 5,
-
-
} opc_t;
typedef enum {
@@ -248,16 +232,13 @@ typedef union PACKED {
/* normal gpr or const src register: */
struct PACKED {
uint32_t comp : 2;
- uint32_t num : 10;
+ uint32_t num : 9;
};
/* for immediate val: */
int32_t iim_val : 11;
/* to make compiler happy: */
uint32_t dummy32;
- uint32_t dummy10 : 10;
uint32_t dummy11 : 11;
- uint32_t dummy12 : 12;
- uint32_t dummy13 : 13;
uint32_t dummy8 : 8;
} reg_t;
@@ -295,16 +276,12 @@ typedef struct PACKED {
/* for normal src register: */
struct PACKED {
uint32_t src : 11;
- /* at least low bit of pad must be zero or it will
- * look like a address relative src
- */
uint32_t pad : 21;
};
/* for address relative: */
struct PACKED {
int32_t off : 10;
- uint32_t src_rel_c : 1;
- uint32_t src_rel : 1;
+ uint32_t must_be_3 : 2;
uint32_t unknown : 20;
};
/* for immediate: */
@@ -317,7 +294,7 @@ typedef struct PACKED {
uint32_t repeat : 3;
uint32_t src_r : 1;
uint32_t ss : 1;
- uint32_t ul : 1;
+ uint32_t src_rel : 1;
uint32_t dst_type : 3;
uint32_t dst_rel : 1;
uint32_t src_type : 3;
@@ -333,49 +310,19 @@ typedef struct PACKED {
typedef struct PACKED {
/* dword0: */
- union PACKED {
- struct PACKED {
- uint32_t src1 : 11;
- uint32_t must_be_zero1: 2;
- uint32_t src1_im : 1; /* immediate */
- uint32_t src1_neg : 1; /* negate */
- uint32_t src1_abs : 1; /* absolute value */
- };
- struct PACKED {
- uint32_t src1 : 10;
- uint32_t src1_c : 1; /* relative-const */
- uint32_t src1_rel : 1; /* relative address */
- uint32_t must_be_zero : 1;
- uint32_t dummy : 3;
- } rel1;
- struct PACKED {
- uint32_t src1 : 12;
- uint32_t src1_c : 1; /* const */
- uint32_t dummy : 3;
- } c1;
- };
-
- union PACKED {
- struct PACKED {
- uint32_t src2 : 11;
- uint32_t must_be_zero2: 2;
- uint32_t src2_im : 1; /* immediate */
- uint32_t src2_neg : 1; /* negate */
- uint32_t src2_abs : 1; /* absolute value */
- };
- struct PACKED {
- uint32_t src2 : 10;
- uint32_t src2_c : 1; /* relative-const */
- uint32_t src2_rel : 1; /* relative address */
- uint32_t must_be_zero : 1;
- uint32_t dummy : 3;
- } rel2;
- struct PACKED {
- uint32_t src2 : 12;
- uint32_t src2_c : 1; /* const */
- uint32_t dummy : 3;
- } c2;
- };
+ uint32_t src1 : 11;
+ uint32_t src1_rel : 1; /* relative address */
+ uint32_t src1_c : 1; /* const */
+ uint32_t src1_im : 1; /* immediate */
+ uint32_t src1_neg : 1; /* negate */
+ uint32_t src1_abs : 1; /* absolute value */
+
+ uint32_t src2 : 11;
+ uint32_t src2_rel : 1; /* relative address */
+ uint32_t src2_c : 1; /* const */
+ uint32_t src2_im : 1; /* immediate */
+ uint32_t src2_neg : 1; /* negate */
+ uint32_t src2_abs : 1; /* absolute value */
/* dword1: */
uint32_t dst : 8;
@@ -396,49 +343,18 @@ typedef struct PACKED {
typedef struct PACKED {
/* dword0: */
- union PACKED {
- struct PACKED {
- uint32_t src1 : 11;
- uint32_t must_be_zero1: 2;
- uint32_t src2_c : 1;
- uint32_t src1_neg : 1;
- uint32_t src2_r : 1;
- };
- struct PACKED {
- uint32_t src1 : 10;
- uint32_t src1_c : 1;
- uint32_t src1_rel : 1;
- uint32_t must_be_zero : 1;
- uint32_t dummy : 3;
- } rel1;
- struct PACKED {
- uint32_t src1 : 12;
- uint32_t src1_c : 1;
- uint32_t dummy : 3;
- } c1;
- };
-
- union PACKED {
- struct PACKED {
- uint32_t src3 : 11;
- uint32_t must_be_zero2: 2;
- uint32_t src3_r : 1;
- uint32_t src2_neg : 1;
- uint32_t src3_neg : 1;
- };
- struct PACKED {
- uint32_t src3 : 10;
- uint32_t src3_c : 1;
- uint32_t src3_rel : 1;
- uint32_t must_be_zero : 1;
- uint32_t dummy : 3;
- } rel2;
- struct PACKED {
- uint32_t src3 : 12;
- uint32_t src3_c : 1;
- uint32_t dummy : 3;
- } c2;
- };
+ uint32_t src1 : 11;
+ uint32_t src1_rel : 1;
+ uint32_t src1_c : 1;
+ uint32_t src2_c : 1;
+ uint32_t src1_neg : 1;
+ uint32_t src2_r : 1;
+ uint32_t src3 : 11;
+ uint32_t src3_rel : 1;
+ uint32_t src3_c : 1;
+ uint32_t src3_r : 1;
+ uint32_t src2_neg : 1;
+ uint32_t src3_neg : 1;
/* dword1: */
uint32_t dst : 8;
@@ -454,46 +370,14 @@ typedef struct PACKED {
uint32_t opc_cat : 3;
} instr_cat3_t;
-static inline bool instr_cat3_full(instr_cat3_t *cat3)
-{
- switch (cat3->opc) {
- case OPC_MAD_F16:
- case OPC_MAD_U16:
- case OPC_MAD_S16:
- case OPC_SEL_B16:
- case OPC_SEL_S16:
- case OPC_SEL_F16:
- case OPC_SAD_S16:
- case OPC_SAD_S32: // really??
- return false;
- default:
- return true;
- }
-}
-
typedef struct PACKED {
/* dword0: */
- union PACKED {
- struct PACKED {
- uint32_t src : 11;
- uint32_t must_be_zero1: 2;
- uint32_t src_im : 1; /* immediate */
- uint32_t src_neg : 1; /* negate */
- uint32_t src_abs : 1; /* absolute value */
- };
- struct PACKED {
- uint32_t src : 10;
- uint32_t src_c : 1; /* relative-const */
- uint32_t src_rel : 1; /* relative address */
- uint32_t must_be_zero : 1;
- uint32_t dummy : 3;
- } rel;
- struct PACKED {
- uint32_t src : 12;
- uint32_t src_c : 1; /* const */
- uint32_t dummy : 3;
- } c;
- };
+ uint32_t src : 11;
+ uint32_t src_rel : 1;
+ uint32_t src_c : 1;
+ uint32_t src_im : 1;
+ uint32_t src_neg : 1;
+ uint32_t src_abs : 1;
uint32_t dummy1 : 16; /* seem to be ignored */
/* dword1: */
@@ -645,35 +529,4 @@ typedef union PACKED {
};
} instr_t;
-static inline uint32_t instr_opc(instr_t *instr)
-{
- switch (instr->opc_cat) {
- case 0: return instr->cat0.opc;
- case 1: return 0;
- case 2: return instr->cat2.opc;
- case 3: return instr->cat3.opc;
- case 4: return instr->cat4.opc;
- case 5: return instr->cat5.opc;
- case 6: return instr->cat6.opc;
- default: return 0;
- }
-}
-
-static inline bool is_mad(opc_t opc)
-{
- switch (opc) {
- case OPC_MAD_U16:
- case OPC_MADSH_U16:
- case OPC_MAD_S16:
- case OPC_MADSH_M16:
- case OPC_MAD_U24:
- case OPC_MAD_S24:
- case OPC_MAD_F16:
- case OPC_MAD_F32:
- return true;
- default:
- return false;
- }
-}
-
#endif /* INSTR_A3XX_H_ */
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c
index 90063761d..5b120e77d 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c
@@ -25,11 +25,9 @@
* Chia-I Wu <olv@lunarg.com>
*/
-#include "util/u_prim.h"
#include "intel_winsys.h"
#include "ilo_3d_pipeline.h"
-#include "ilo_blit.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_query.h"
@@ -47,7 +45,8 @@ process_query_for_occlusion_counter(struct ilo_3d *hw3d,
/* in pairs */
assert(q->reg_read % 2 == 0);
- vals = intel_bo_map(q->bo, false);
+ intel_bo_map(q->bo, false);
+ vals = intel_bo_get_virtual(q->bo);
for (i = 1; i < q->reg_read; i += 2)
depth_count += vals[i] - vals[i - 1];
intel_bo_unmap(q->bo);
@@ -71,7 +70,8 @@ process_query_for_timestamp(struct ilo_3d *hw3d, struct ilo_query *q)
assert(q->reg_read == 1);
- vals = intel_bo_map(q->bo, false);
+ intel_bo_map(q->bo, false);
+ vals = intel_bo_get_virtual(q->bo);
timestamp = vals[0];
intel_bo_unmap(q->bo);
@@ -88,7 +88,8 @@ process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q)
/* in pairs */
assert(q->reg_read % 2 == 0);
- vals = intel_bo_map(q->bo, false);
+ intel_bo_map(q->bo, false);
+ vals = intel_bo_get_virtual(q->bo);
for (i = 1; i < q->reg_read; i += 2)
elapsed += vals[i] - vals[i - 1];
@@ -101,41 +102,6 @@ process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q)
}
static void
-process_query_for_pipeline_statistics(struct ilo_3d *hw3d,
- struct ilo_query *q)
-{
- const uint64_t *vals;
- int i;
-
- assert(q->reg_read % 22 == 0);
-
- vals = intel_bo_map(q->bo, false);
-
- for (i = 0; i < q->reg_read; i += 22) {
- struct pipe_query_data_pipeline_statistics *stats =
- &q->data.pipeline_statistics;
- const uint64_t *begin = vals + i;
- const uint64_t *end = begin + 11;
-
- stats->ia_vertices += end[0] - begin[0];
- stats->ia_primitives += end[1] - begin[1];
- stats->vs_invocations += end[2] - begin[2];
- stats->gs_invocations += end[3] - begin[3];
- stats->gs_primitives += end[4] - begin[4];
- stats->c_invocations += end[5] - begin[5];
- stats->c_primitives += end[6] - begin[6];
- stats->ps_invocations += end[7] - begin[7];
- stats->hs_invocations += end[8] - begin[8];
- stats->ds_invocations += end[9] - begin[9];
- stats->cs_invocations += end[10] - begin[10];
- }
-
- intel_bo_unmap(q->bo);
-
- q->reg_read = 0;
-}
-
-static void
ilo_3d_resume_queries(struct ilo_3d *hw3d)
{
struct ilo_query *q;
@@ -159,17 +125,6 @@ ilo_3d_resume_queries(struct ilo_3d *hw3d)
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
}
-
- /* resume pipeline statistics queries */
- LIST_FOR_EACH_ENTRY(q, &hw3d->pipeline_statistics_queries, list) {
- /* accumulate the result if the bo is alreay full */
- if (q->reg_read >= q->reg_total)
- process_query_for_pipeline_statistics(hw3d, q);
-
- ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
- q->bo, q->reg_read);
- q->reg_read += 11;
- }
}
static void
@@ -190,14 +145,6 @@ ilo_3d_pause_queries(struct ilo_3d *hw3d)
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
}
-
- /* pause pipeline statistics queries */
- LIST_FOR_EACH_ENTRY(q, &hw3d->pipeline_statistics_queries, list) {
- assert(q->reg_read < q->reg_total);
- ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
- q->bo, q->reg_read);
- q->reg_read += 11;
- }
}
static void
@@ -208,10 +155,10 @@ ilo_3d_release_render_ring(struct ilo_cp *cp, void *data)
ilo_3d_pause_queries(hw3d);
}
-void
+static void
ilo_3d_own_render_ring(struct ilo_3d *hw3d)
{
- ilo_cp_set_ring(hw3d->cp, INTEL_RING_RENDER);
+ ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER);
if (ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve))
ilo_3d_resume_queries(hw3d);
@@ -273,25 +220,6 @@ ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
q->data.u64 = 0;
list_add(&q->list, &hw3d->prim_emitted_queries);
break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- /* reserve some space for pausing the query */
- q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
- ILO_3D_PIPELINE_WRITE_STATISTICS, NULL);
- hw3d->owner_reserve += q->reg_cmd_size;
- ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
-
- memset(&q->data.pipeline_statistics, 0,
- sizeof(q->data.pipeline_statistics));
-
- if (ilo_query_alloc_bo(q, 11 * 2, -1, hw3d->cp->winsys)) {
- /* XXX we should check the aperture size */
- ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
- q->bo, q->reg_read);
- q->reg_read += 11;
-
- list_add(&q->list, &hw3d->pipeline_statistics_queries);
- }
- break;
default:
assert(!"unknown query type");
break;
@@ -339,16 +267,6 @@ ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
case PIPE_QUERY_PRIMITIVES_EMITTED:
list_del(&q->list);
break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- list_del(&q->list);
-
- assert(q->reg_read + 11 <= q->reg_total);
- hw3d->owner_reserve -= q->reg_cmd_size;
- ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
- ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline,
- q->bo, q->reg_read);
- q->reg_read += 11;
- break;
default:
assert(!"unknown query type");
break;
@@ -379,10 +297,6 @@ ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q)
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- if (q->bo)
- process_query_for_pipeline_statistics(hw3d, q);
- break;
default:
assert(!"unknown query type");
break;
@@ -402,6 +316,10 @@ ilo_3d_cp_flushed(struct ilo_3d *hw3d)
ilo_3d_pipeline_invalidate(hw3d->pipeline,
ILO_3D_PIPELINE_INVALIDATE_BATCH_BO |
ILO_3D_PIPELINE_INVALIDATE_STATE_BO);
+ if (!hw3d->cp->render_ctx) {
+ ilo_3d_pipeline_invalidate(hw3d->pipeline,
+ ILO_3D_PIPELINE_INVALIDATE_HW);
+ }
hw3d->new_batch = true;
}
@@ -428,7 +346,6 @@ ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev)
list_inithead(&hw3d->time_elapsed_queries);
list_inithead(&hw3d->prim_generated_queries);
list_inithead(&hw3d->prim_emitted_queries);
- list_inithead(&hw3d->pipeline_statistics_queries);
hw3d->pipeline = ilo_3d_pipeline_create(cp, dev);
if (!hw3d->pipeline) {
@@ -484,7 +401,7 @@ draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
}
if (max_len > ilo_cp_space(hw3d->cp)) {
- ilo_cp_flush(hw3d->cp, "out of space");
+ ilo_cp_flush(hw3d->cp);
need_flush = false;
assert(max_len <= ilo_cp_space(hw3d->cp));
}
@@ -746,7 +663,7 @@ upload_shaders(struct ilo_3d *hw3d, struct ilo_shader_cache *shc)
intel_bo_unreference(hw3d->kernel.bo);
hw3d->kernel.bo = intel_winsys_alloc_buffer(hw3d->cp->winsys,
- "kernel bo", new_size, INTEL_DOMAIN_CPU);
+ "kernel bo", new_size, 0);
if (!hw3d->kernel.bo) {
ilo_err("failed to allocate kernel bo\n");
return false;
@@ -784,21 +701,6 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
struct ilo_3d *hw3d = ilo->hw3d;
int prim_generated, prim_emitted;
- if (ilo_debug & ILO_DEBUG_DRAW) {
- if (info->indexed) {
- ilo_printf("indexed draw %s: "
- "index start %d, count %d, vertex range [%d, %d]\n",
- u_prim_name(info->mode), info->start, info->count,
- info->min_index, info->max_index);
- }
- else {
- ilo_printf("draw %s: vertex start %d, count %d\n",
- u_prim_name(info->mode), info->start, info->count);
- }
-
- ilo_dump_dirty_flags(ilo->dirty);
- }
-
if (!ilo_3d_pass_render_condition(ilo))
return;
@@ -819,8 +721,6 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!upload_shaders(hw3d, ilo->shader_cache))
return;
- ilo_blit_resolve_framebuffer(ilo);
-
/* If draw_vbo ever fails, return immediately. */
if (!draw_vbo(hw3d, ilo, &prim_generated, &prim_emitted))
return;
@@ -859,14 +759,14 @@ ilo_texture_barrier(struct pipe_context *pipe)
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_3d *hw3d = ilo->hw3d;
- if (ilo->cp->ring != INTEL_RING_RENDER)
+ if (ilo->cp->ring != ILO_CP_RING_RENDER)
return;
ilo_3d_pipeline_emit_flush(hw3d->pipeline);
/* don't know why */
if (ilo->dev->gen >= ILO_GEN(7))
- ilo_cp_flush(hw3d->cp, "texture barrier");
+ ilo_cp_flush(hw3d->cp);
}
static void
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h
index 369594aff..f73b8177a 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h
@@ -60,7 +60,6 @@ struct ilo_3d {
struct list_head time_elapsed_queries;
struct list_head prim_generated_queries;
struct list_head prim_emitted_queries;
- struct list_head pipeline_statistics_queries;
struct ilo_3d_pipeline *pipeline;
};
@@ -75,9 +74,6 @@ void
ilo_3d_cp_flushed(struct ilo_3d *hw3d);
void
-ilo_3d_own_render_ring(struct ilo_3d *hw3d);
-
-void
ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q);
void
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c
index 1f18bbbed..dee3e0ce5 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c
@@ -28,7 +28,6 @@
#include "util/u_prim.h"
#include "intel_winsys.h"
-#include "ilo_blitter.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_state.h"
@@ -82,7 +81,6 @@ ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev)
ilo_3d_pipeline_init_gen6(p);
break;
case ILO_GEN(7):
- case ILO_GEN(7.5):
ilo_3d_pipeline_init_gen7(p);
break;
default:
@@ -95,7 +93,7 @@ ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev)
p->invalidate_flags = ILO_3D_PIPELINE_INVALIDATE_ALL;
p->workaround_bo = intel_winsys_alloc_buffer(p->cp->winsys,
- "PIPE_CONTROL workaround", 4096, INTEL_DOMAIN_INSTRUCTION);
+ "PIPE_CONTROL workaround", 4096, 0);
if (!p->workaround_bo) {
ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n");
FREE(p);
@@ -173,6 +171,7 @@ ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p,
while (true) {
struct ilo_cp_jmp_buf jmp;
+ int err;
/* we will rewind if aperture check below fails */
ilo_cp_setjmp(p->cp, &jmp);
@@ -184,7 +183,8 @@ ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p,
p->emit_draw(p, ilo);
ilo_cp_assert_no_implicit_flush(p->cp, false);
- if (intel_winsys_can_submit_bo(ilo->winsys, &p->cp->bo, 1)) {
+ err = intel_winsys_check_aperture_space(ilo->winsys, &p->cp->bo, 1);
+ if (!err) {
success = true;
break;
}
@@ -198,7 +198,7 @@ ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p,
}
else {
/* flush and try again */
- ilo_cp_flush(p->cp, "out of aperture");
+ ilo_cp_flush(p->cp);
}
}
@@ -236,7 +236,7 @@ ilo_3d_pipeline_emit_flush(struct ilo_3d_pipeline *p)
}
/**
- * Emit PIPE_CONTROL with GEN6_PIPE_CONTROL_WRITE_TIMESTAMP post-sync op.
+ * Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_TIMESTAMP post-sync op.
*/
void
ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p,
@@ -247,7 +247,7 @@ ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p,
}
/**
- * Emit PIPE_CONTROL with GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT post-sync op.
+ * Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_DEPTH_COUNT post-sync op.
*/
void
ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p,
@@ -257,56 +257,6 @@ ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p,
p->emit_write_depth_count(p, bo, index);
}
-/**
- * Emit MI_STORE_REGISTER_MEM to store statistics registers.
- */
-void
-ilo_3d_pipeline_emit_write_statistics(struct ilo_3d_pipeline *p,
- struct intel_bo *bo, int index)
-{
- handle_invalid_batch_bo(p, true);
- p->emit_write_statistics(p, bo, index);
-}
-
-void
-ilo_3d_pipeline_emit_rectlist(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter)
-{
- const int max_len = ilo_3d_pipeline_estimate_size(p,
- ILO_3D_PIPELINE_RECTLIST, blitter);
-
- if (max_len > ilo_cp_space(p->cp))
- ilo_cp_flush(p->cp, "out of space");
-
- while (true) {
- struct ilo_cp_jmp_buf jmp;
-
- /* we will rewind if aperture check below fails */
- ilo_cp_setjmp(p->cp, &jmp);
-
- handle_invalid_batch_bo(p, false);
-
- ilo_cp_assert_no_implicit_flush(p->cp, true);
- p->emit_rectlist(p, blitter);
- ilo_cp_assert_no_implicit_flush(p->cp, false);
-
- if (!intel_winsys_can_submit_bo(blitter->ilo->winsys, &p->cp->bo, 1)) {
- /* rewind */
- ilo_cp_longjmp(p->cp, &jmp);
-
- /* flush and try again */
- if (!ilo_cp_empty(p->cp)) {
- ilo_cp_flush(p->cp, "out of aperture");
- continue;
- }
- }
-
- break;
- }
-
- ilo_3d_pipeline_invalidate(p, ILO_3D_PIPELINE_INVALIDATE_HW);
-}
-
void
ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p,
unsigned sample_count,
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h
index 90c626e52..f26fa83c2 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h
@@ -29,10 +29,11 @@
#define ILO_3D_PIPELINE_H
#include "ilo_common.h"
-#include "ilo_gpe.h"
+#include "ilo_context.h"
+#include "ilo_gpe_gen6.h"
+#include "ilo_gpe_gen7.h"
struct intel_bo;
-struct ilo_blitter;
struct ilo_cp;
struct ilo_context;
@@ -50,8 +51,6 @@ enum ilo_3d_pipeline_action {
ILO_3D_PIPELINE_FLUSH,
ILO_3D_PIPELINE_WRITE_TIMESTAMP,
ILO_3D_PIPELINE_WRITE_DEPTH_COUNT,
- ILO_3D_PIPELINE_WRITE_STATISTICS,
- ILO_3D_PIPELINE_RECTLIST,
};
/**
@@ -84,11 +83,103 @@ struct ilo_3d_pipeline {
void (*emit_write_depth_count)(struct ilo_3d_pipeline *pipeline,
struct intel_bo *bo, int index);
- void (*emit_write_statistics)(struct ilo_3d_pipeline *pipeline,
- struct intel_bo *bo, int index);
-
- void (*emit_rectlist)(struct ilo_3d_pipeline *pipeline,
- const struct ilo_blitter *blitter);
+ /**
+ * all GPE functions of all GENs
+ */
+#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name gen6_ ## name
+ GEN6_EMIT(STATE_BASE_ADDRESS);
+ GEN6_EMIT(STATE_SIP);
+ GEN6_EMIT(PIPELINE_SELECT);
+ GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS);
+ GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS);
+ GEN6_EMIT(3DSTATE_URB);
+ GEN6_EMIT(3DSTATE_VERTEX_BUFFERS);
+ GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS);
+ GEN6_EMIT(3DSTATE_INDEX_BUFFER);
+ GEN6_EMIT(3DSTATE_VF_STATISTICS);
+ GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS);
+ GEN6_EMIT(3DSTATE_CC_STATE_POINTERS);
+ GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS);
+ GEN6_EMIT(3DSTATE_VS);
+ GEN6_EMIT(3DSTATE_GS);
+ GEN6_EMIT(3DSTATE_CLIP);
+ GEN6_EMIT(3DSTATE_SF);
+ GEN6_EMIT(3DSTATE_WM);
+ GEN6_EMIT(3DSTATE_CONSTANT_VS);
+ GEN6_EMIT(3DSTATE_CONSTANT_GS);
+ GEN6_EMIT(3DSTATE_CONSTANT_PS);
+ GEN6_EMIT(3DSTATE_SAMPLE_MASK);
+ GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE);
+ GEN6_EMIT(3DSTATE_DEPTH_BUFFER);
+ GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET);
+ GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN);
+ GEN6_EMIT(3DSTATE_LINE_STIPPLE);
+ GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS);
+ GEN6_EMIT(3DSTATE_GS_SVB_INDEX);
+ GEN6_EMIT(3DSTATE_MULTISAMPLE);
+ GEN6_EMIT(3DSTATE_STENCIL_BUFFER);
+ GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER);
+ GEN6_EMIT(3DSTATE_CLEAR_PARAMS);
+ GEN6_EMIT(PIPE_CONTROL);
+ GEN6_EMIT(3DPRIMITIVE);
+ GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA);
+ GEN6_EMIT(SF_VIEWPORT);
+ GEN6_EMIT(CLIP_VIEWPORT);
+ GEN6_EMIT(CC_VIEWPORT);
+ GEN6_EMIT(COLOR_CALC_STATE);
+ GEN6_EMIT(BLEND_STATE);
+ GEN6_EMIT(DEPTH_STENCIL_STATE);
+ GEN6_EMIT(SCISSOR_RECT);
+ GEN6_EMIT(BINDING_TABLE_STATE);
+ GEN6_EMIT(SURFACE_STATE);
+ GEN6_EMIT(so_SURFACE_STATE);
+ GEN6_EMIT(SAMPLER_STATE);
+ GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE);
+ GEN6_EMIT(push_constant_buffer);
+#undef GEN6_EMIT
+
+#define GEN7_EMIT(name) ilo_gpe_gen7_ ## name gen7_ ## name
+ GEN7_EMIT(3DSTATE_DEPTH_BUFFER);
+ GEN7_EMIT(3DSTATE_CC_STATE_POINTERS);
+ GEN7_EMIT(3DSTATE_GS);
+ GEN7_EMIT(3DSTATE_SF);
+ GEN7_EMIT(3DSTATE_WM);
+ GEN7_EMIT(3DSTATE_SAMPLE_MASK);
+ GEN7_EMIT(3DSTATE_CONSTANT_HS);
+ GEN7_EMIT(3DSTATE_CONSTANT_DS);
+ GEN7_EMIT(3DSTATE_HS);
+ GEN7_EMIT(3DSTATE_TE);
+ GEN7_EMIT(3DSTATE_DS);
+ GEN7_EMIT(3DSTATE_STREAMOUT);
+ GEN7_EMIT(3DSTATE_SBE);
+ GEN7_EMIT(3DSTATE_PS);
+ GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
+ GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
+ GEN7_EMIT(3DSTATE_BLEND_STATE_POINTERS);
+ GEN7_EMIT(3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_VS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_HS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_DS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_GS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_PS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_VS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_HS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_DS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_GS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_PS);
+ GEN7_EMIT(3DSTATE_URB_VS);
+ GEN7_EMIT(3DSTATE_URB_HS);
+ GEN7_EMIT(3DSTATE_URB_DS);
+ GEN7_EMIT(3DSTATE_URB_GS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
+ GEN7_EMIT(3DSTATE_SO_DECL_LIST);
+ GEN7_EMIT(3DSTATE_SO_BUFFER);
+ GEN7_EMIT(SF_CLIP_VIEWPORT);
+#undef GEN7_EMIT
/**
* HW states.
@@ -134,8 +225,6 @@ struct ilo_3d_pipeline {
uint32_t SURFACE_STATE[ILO_MAX_WM_SURFACES];
uint32_t SAMPLER_STATE;
uint32_t SAMPLER_BORDER_COLOR_STATE[ILO_MAX_SAMPLERS];
- uint32_t PUSH_CONSTANT_BUFFER;
- int PUSH_CONSTANT_BUFFER_size;
} wm;
} state;
};
@@ -181,14 +270,6 @@ ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index);
void
-ilo_3d_pipeline_emit_write_statistics(struct ilo_3d_pipeline *p,
- struct intel_bo *bo, int index);
-
-void
-ilo_3d_pipeline_emit_rectlist(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter);
-
-void
ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p,
unsigned sample_count,
unsigned sample_index,
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c
index de47e8d45..227614047 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c
@@ -517,17 +517,12 @@ static void dump_binding_table(struct brw_context *brw, uint32_t offset,
}
}
-static bool
+static void
init_brw(struct brw_context *brw, struct ilo_3d_pipeline *p)
{
brw->intel.gen = ILO_GEN_GET_MAJOR(p->dev->gen);
+ brw->intel.batch.bo_dst.virtual = intel_bo_get_virtual(p->cp->bo);
brw->intel.batch.bo = &brw->intel.batch.bo_dst;
-
- brw->intel.batch.bo_dst.virtual = intel_bo_map(p->cp->bo, false);
- if (!brw->intel.batch.bo_dst.virtual)
- return false;
-
- return true;
}
static void
@@ -536,8 +531,7 @@ dump_3d_state(struct ilo_3d_pipeline *p)
struct brw_context brw;
int num_states, i;
- if (!init_brw(&brw, p))
- return;
+ init_brw(&brw, p);
if (brw.intel.gen >= 7) {
dump_cc_viewport_state(&brw, p->state.CC_VIEWPORT);
@@ -620,11 +614,6 @@ dump_3d_state(struct ilo_3d_pipeline *p)
else
dump_gen7_sampler_state(&brw, p->state.wm.SAMPLER_STATE, num_states * 16);
- if (p->state.wm.PUSH_CONSTANT_BUFFER_size) {
- dump_wm_constants(&brw, p->state.wm.PUSH_CONSTANT_BUFFER,
- p->state.wm.PUSH_CONSTANT_BUFFER_size);
- }
-
dump_scissor(&brw, p->state.SCISSOR_RECT);
(void) dump_vs_state;
@@ -633,8 +622,7 @@ dump_3d_state(struct ilo_3d_pipeline *p)
(void) dump_sf_state;
(void) dump_wm_state;
(void) dump_cc_state_gen4;
-
- intel_bo_unmap(p->cp->bo);
+ (void) dump_wm_constants;
}
/**
@@ -643,6 +631,13 @@ dump_3d_state(struct ilo_3d_pipeline *p)
void
ilo_3d_pipeline_dump(struct ilo_3d_pipeline *p)
{
+ int err;
+
ilo_cp_dump(p->cp);
- dump_3d_state(p);
+
+ err = intel_bo_map(p->cp->bo, false);
+ if (!err) {
+ dump_3d_state(p);
+ intel_bo_unmap(p->cp->bo);
+ }
}
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
index c2da385e0..eefb2f96b 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
@@ -25,16 +25,14 @@
* Chia-I Wu <olv@lunarg.com>
*/
-#include "genhw/genhw.h"
#include "util/u_dual_blend.h"
#include "util/u_prim.h"
+#include "intel_reg.h"
-#include "ilo_blitter.h"
#include "ilo_3d.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_gpe_gen6.h"
-#include "ilo_gpe_gen7.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_3d_pipeline.h"
@@ -66,9 +64,9 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
*
* The workaround below necessitates this workaround.
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_CS_STALL |
- GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD,
NULL, 0, false, p->cp);
/* the caller will emit the post-sync op */
@@ -85,8 +83,8 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
* "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a
* PIPE_CONTROL with any non-zero post-sync-op is required."
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_WRITE_IMM,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_WRITE_IMMEDIATE,
p->workaround_bo, 0, false, p->cp);
}
@@ -105,9 +103,9 @@ gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p)
* requires driver to send a PIPE_CONTROL with a CS stall along with a
* Depth Flush prior to this command."
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_CS_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_CS_STALL,
0, 0, false, p->cp);
}
@@ -123,16 +121,16 @@ gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p)
* to emit a sequence of PIPE_CONTROLs prior to emitting depth related
* commands.
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false, p->cp);
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH,
NULL, 0, false, p->cp);
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false, p->cp);
}
@@ -152,8 +150,8 @@ gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
* field set (DW1 Bit 1), must be issued prior to any change to the
* value in this field (Maximum Number of Threads in 3DSTATE_WM)"
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_STALL_AT_SCOREBOARD,
NULL, 0, false, p->cp);
}
@@ -170,10 +168,10 @@ gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p)
* PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being
* buffered by VS FF, to the point that the FF dies.
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_STALL |
- GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
- GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_INSTRUCTION_FLUSH |
+ PIPE_CONTROL_STATE_CACHE_INVALIDATE,
NULL, 0, false, p->cp);
}
@@ -189,7 +187,7 @@ gen6_pipeline_common_select(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_PIPELINE_SELECT(p->dev, 0x0, p->cp);
+ p->gen6_PIPELINE_SELECT(p->dev, 0x0, p->cp);
}
}
@@ -203,7 +201,7 @@ gen6_pipeline_common_sip(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_STATE_SIP(p->dev, 0, p->cp);
+ p->gen6_STATE_SIP(p->dev, 0, p->cp);
}
}
@@ -218,7 +216,7 @@ gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_STATE_BASE_ADDRESS(p->dev,
+ p->gen6_STATE_BASE_ADDRESS(p->dev,
NULL, p->cp->bo, p->cp->bo, NULL, ilo->hw3d->kernel.bo,
0, 0, 0, 0, p->cp);
@@ -314,7 +312,7 @@ gen6_pipeline_common_urb(struct ilo_3d_pipeline *p,
gs_total_size = 0;
}
- gen6_emit_3DSTATE_URB(p->dev, vs_total_size, gs_total_size,
+ p->gen6_3DSTATE_URB(p->dev, vs_total_size, gs_total_size,
vs_entry_size, gs_entry_size, p->cp);
/*
@@ -340,7 +338,7 @@ gen6_pipeline_common_pointers_1(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_VIEWPORT_STATE_POINTERS */
if (session->viewport_state_changed) {
- gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev,
+ p->gen6_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev,
p->state.CLIP_VIEWPORT,
p->state.SF_VIEWPORT,
p->state.CC_VIEWPORT, p->cp);
@@ -356,7 +354,7 @@ gen6_pipeline_common_pointers_2(struct ilo_3d_pipeline *p,
if (session->cc_state_blend_changed ||
session->cc_state_dsa_changed ||
session->cc_state_cc_changed) {
- gen6_emit_3DSTATE_CC_STATE_POINTERS(p->dev,
+ p->gen6_3DSTATE_CC_STATE_POINTERS(p->dev,
p->state.BLEND_STATE,
p->state.DEPTH_STENCIL_STATE,
p->state.COLOR_CALC_STATE, p->cp);
@@ -366,7 +364,7 @@ gen6_pipeline_common_pointers_2(struct ilo_3d_pipeline *p,
if (session->sampler_state_vs_changed ||
session->sampler_state_gs_changed ||
session->sampler_state_fs_changed) {
- gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(p->dev,
+ p->gen6_3DSTATE_SAMPLER_STATE_POINTERS(p->dev,
p->state.vs.SAMPLER_STATE,
0,
p->state.wm.SAMPLER_STATE, p->cp);
@@ -380,7 +378,7 @@ gen6_pipeline_common_pointers_3(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_SCISSOR_STATE_POINTERS */
if (session->scissor_state_changed) {
- gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(p->dev,
+ p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(p->dev,
p->state.SCISSOR_RECT, p->cp);
}
@@ -388,7 +386,7 @@ gen6_pipeline_common_pointers_3(struct ilo_3d_pipeline *p,
if (session->binding_table_vs_changed ||
session->binding_table_gs_changed ||
session->binding_table_fs_changed) {
- gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(p->dev,
+ p->gen6_3DSTATE_BINDING_TABLE_POINTERS(p->dev,
p->state.vs.BINDING_TABLE_STATE,
p->state.gs.BINDING_TABLE_STATE,
p->state.wm.BINDING_TABLE_STATE, p->cp);
@@ -400,31 +398,18 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
- if (p->dev->gen >= ILO_GEN(7.5)) {
- /* 3DSTATE_INDEX_BUFFER */
- if (DIRTY(IB) || session->batch_bo_changed) {
- gen6_emit_3DSTATE_INDEX_BUFFER(p->dev,
- &ilo->ib, false, p->cp);
- }
-
- /* 3DSTATE_VF */
- if (session->primitive_restart_changed) {
- gen7_emit_3DSTATE_VF(p->dev, ilo->draw->primitive_restart,
- ilo->draw->restart_index, p->cp);
- }
- }
- else {
- /* 3DSTATE_INDEX_BUFFER */
- if (DIRTY(IB) || session->primitive_restart_changed ||
- session->batch_bo_changed) {
- gen6_emit_3DSTATE_INDEX_BUFFER(p->dev,
- &ilo->ib, ilo->draw->primitive_restart, p->cp);
- }
+ /* 3DSTATE_INDEX_BUFFER */
+ if (DIRTY(IB) || session->primitive_restart_changed ||
+ session->batch_bo_changed) {
+ p->gen6_3DSTATE_INDEX_BUFFER(p->dev,
+ &ilo->ib, ilo->draw->primitive_restart, p->cp);
}
/* 3DSTATE_VERTEX_BUFFERS */
- if (DIRTY(VB) || DIRTY(VE) || session->batch_bo_changed)
- gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev, ilo->ve, &ilo->vb, p->cp);
+ if (DIRTY(VB) || DIRTY(VE) || session->batch_bo_changed) {
+ p->gen6_3DSTATE_VERTEX_BUFFERS(p->dev,
+ ilo->vb.states, ilo->vb.enabled_mask, ilo->ve, p->cp);
+ }
/* 3DSTATE_VERTEX_ELEMENTS */
if (DIRTY(VE) || DIRTY(VS)) {
@@ -449,7 +434,7 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p,
prepend_generate_ids = true;
}
- gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev, ve,
+ p->gen6_3DSTATE_VERTEX_ELEMENTS(p->dev, ve,
last_velement_edgeflag, prepend_generate_ids, p->cp);
}
}
@@ -461,16 +446,16 @@ gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_VF_STATISTICS */
if (session->hw_ctx_changed)
- gen6_emit_3DSTATE_VF_STATISTICS(p->dev, false, p->cp);
+ p->gen6_3DSTATE_VF_STATISTICS(p->dev, false, p->cp);
}
-static void
+void
gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DPRIMITIVE */
- gen6_emit_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp);
+ p->gen6_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp);
p->state.has_gen6_wa_pipe_control = false;
}
@@ -492,7 +477,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
/* 3DSTATE_CONSTANT_VS */
if (emit_3dstate_constant_vs) {
- gen6_emit_3DSTATE_CONSTANT_VS(p->dev,
+ p->gen6_3DSTATE_CONSTANT_VS(p->dev,
&p->state.vs.PUSH_CONSTANT_BUFFER,
&p->state.vs.PUSH_CONSTANT_BUFFER_size,
1, p->cp);
@@ -502,7 +487,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
if (emit_3dstate_vs) {
const int num_samplers = ilo->sampler[PIPE_SHADER_VERTEX].count;
- gen6_emit_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp);
+ p->gen6_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp);
}
if (emit_3dstate_constant_vs && p->dev->gen == ILO_GEN(6))
@@ -516,14 +501,14 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_CONSTANT_GS */
if (session->pcb_state_gs_changed)
- gen6_emit_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp);
+ p->gen6_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp);
/* 3DSTATE_GS */
if (DIRTY(GS) || DIRTY(VS) ||
session->prim_changed || session->kernel_bo_changed) {
const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
- gen6_emit_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp);
+ p->gen6_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp);
}
}
@@ -583,7 +568,7 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_3DSTATE_GS_SVB_INDEX(p->dev,
+ p->gen6_3DSTATE_GS_SVB_INDEX(p->dev,
0, p->state.so_num_vertices, p->state.so_max_vertices,
false, p->cp);
@@ -600,7 +585,7 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p,
* 0xFFFFFFFF in order to not cause overflow in that SVBI."
*/
for (i = 1; i < 4; i++) {
- gen6_emit_3DSTATE_GS_SVB_INDEX(p->dev,
+ p->gen6_3DSTATE_GS_SVB_INDEX(p->dev,
i, 0, 0xffffffff, false, p->cp);
}
}
@@ -631,7 +616,7 @@ gen6_pipeline_clip(struct ilo_3d_pipeline *p,
}
}
- gen6_emit_3DSTATE_CLIP(p->dev, ilo->rasterizer,
+ p->gen6_3DSTATE_CLIP(p->dev, ilo->rasterizer,
ilo->fs, enable_guardband, 1, p->cp);
}
}
@@ -642,8 +627,10 @@ gen6_pipeline_sf(struct ilo_3d_pipeline *p,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_SF */
- if (DIRTY(RASTERIZER) || DIRTY(FS))
- gen6_emit_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fs, p->cp);
+ if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) {
+ p->gen6_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fs,
+ (ilo->gs) ? ilo->gs : ilo->vs, p->cp);
+ }
}
void
@@ -656,7 +643,7 @@ gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0,
+ p->gen6_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0,
ilo->fb.state.width, ilo->fb.state.height, p->cp);
}
}
@@ -667,26 +654,22 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_CONSTANT_PS */
- if (session->pcb_state_fs_changed) {
- gen6_emit_3DSTATE_CONSTANT_PS(p->dev,
- &p->state.wm.PUSH_CONSTANT_BUFFER,
- &p->state.wm.PUSH_CONSTANT_BUFFER_size,
- 1, p->cp);
- }
+ if (session->pcb_state_fs_changed)
+ p->gen6_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp);
/* 3DSTATE_WM */
if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || DIRTY(DSA) ||
DIRTY(RASTERIZER) || session->kernel_bo_changed) {
const int num_samplers = ilo->sampler[PIPE_SHADER_FRAGMENT].count;
const bool dual_blend = ilo->blend->dual_blend;
- const bool cc_may_kill = (ilo->dsa->dw_alpha ||
+ const bool cc_may_kill = (ilo->dsa->alpha.enabled ||
ilo->blend->alpha_to_coverage);
if (p->dev->gen == ILO_GEN(6) && session->hw_ctx_changed)
gen6_wa_pipe_control_wm_max_threads_stall(p);
- gen6_emit_3DSTATE_WM(p->dev, ilo->fs, num_samplers,
- ilo->rasterizer, dual_blend, cc_may_kill, 0, p->cp);
+ p->gen6_3DSTATE_WM(p->dev, ilo->fs, num_samplers,
+ ilo->rasterizer, dual_blend, cc_may_kill, p->cp);
}
}
@@ -707,11 +690,11 @@ gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
gen6_wa_pipe_control_wm_multisample_flush(p);
}
- gen6_emit_3DSTATE_MULTISAMPLE(p->dev,
+ p->gen6_3DSTATE_MULTISAMPLE(p->dev,
ilo->fb.num_samples, packed_sample_pos,
ilo->rasterizer->state.half_pixel_center, p->cp);
- gen6_emit_3DSTATE_SAMPLE_MASK(p->dev,
+ p->gen6_3DSTATE_SAMPLE_MASK(p->dev,
(ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1, p->cp);
}
}
@@ -724,37 +707,16 @@ gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p,
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
if (DIRTY(FB) || session->batch_bo_changed) {
const struct ilo_zs_surface *zs;
- struct ilo_zs_surface layer;
- uint32_t clear_params;
if (ilo->fb.state.zsbuf) {
const struct ilo_surface_cso *surface =
(const struct ilo_surface_cso *) ilo->fb.state.zsbuf;
- const struct ilo_texture_slice *slice =
- ilo_texture_get_slice(ilo_texture(surface->base.texture),
- surface->base.u.tex.level, surface->base.u.tex.first_layer);
- if (ilo->fb.offset_to_layers) {
- assert(surface->base.u.tex.first_layer ==
- surface->base.u.tex.last_layer);
-
- ilo_gpe_init_zs_surface(ilo->dev,
- ilo_texture(surface->base.texture),
- surface->base.format, surface->base.u.tex.level,
- surface->base.u.tex.first_layer, 1, true, &layer);
-
- zs = &layer;
- }
- else {
- assert(!surface->is_rt);
- zs = &surface->u.zs;
- }
-
- clear_params = slice->clear_value;
+ assert(!surface->is_rt);
+ zs = &surface->u.zs;
}
else {
zs = &ilo->fb.null_zs;
- clear_params = 0;
}
if (p->dev->gen == ILO_GEN(6)) {
@@ -762,10 +724,10 @@ gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p,
gen6_wa_pipe_control_wm_depth_flush(p);
}
- gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp);
- gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp);
- gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp);
- gen6_emit_3DSTATE_CLEAR_PARAMS(p->dev, clear_params, p->cp);
+ p->gen6_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp);
+
+ /* TODO */
+ p->gen6_3DSTATE_CLEAR_PARAMS(p->dev, 0, p->cp);
}
}
@@ -780,10 +742,10 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(p->dev,
+ p->gen6_3DSTATE_POLY_STIPPLE_PATTERN(p->dev,
&ilo->poly_stipple, p->cp);
- gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(p->dev, 0, 0, p->cp);
+ p->gen6_3DSTATE_POLY_STIPPLE_OFFSET(p->dev, 0, 0, p->cp);
}
/* 3DSTATE_LINE_STIPPLE */
@@ -791,7 +753,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_3DSTATE_LINE_STIPPLE(p->dev,
+ p->gen6_3DSTATE_LINE_STIPPLE(p->dev,
ilo->rasterizer->state.line_stipple_pattern,
ilo->rasterizer->state.line_stipple_factor + 1, p->cp);
}
@@ -801,7 +763,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_3DSTATE_AA_LINE_PARAMETERS(p->dev, p->cp);
+ p->gen6_3DSTATE_AA_LINE_PARAMETERS(p->dev, p->cp);
}
}
@@ -812,23 +774,23 @@ gen6_pipeline_state_viewports(struct ilo_3d_pipeline *p,
{
/* SF_CLIP_VIEWPORT and CC_VIEWPORT */
if (p->dev->gen >= ILO_GEN(7) && DIRTY(VIEWPORT)) {
- p->state.SF_CLIP_VIEWPORT = gen7_emit_SF_CLIP_VIEWPORT(p->dev,
+ p->state.SF_CLIP_VIEWPORT = p->gen7_SF_CLIP_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
- p->state.CC_VIEWPORT = gen6_emit_CC_VIEWPORT(p->dev,
+ p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
session->viewport_state_changed = true;
}
/* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */
else if (DIRTY(VIEWPORT)) {
- p->state.CLIP_VIEWPORT = gen6_emit_CLIP_VIEWPORT(p->dev,
+ p->state.CLIP_VIEWPORT = p->gen6_CLIP_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
- p->state.SF_VIEWPORT = gen6_emit_SF_VIEWPORT(p->dev,
+ p->state.SF_VIEWPORT = p->gen6_SF_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
- p->state.CC_VIEWPORT = gen6_emit_CC_VIEWPORT(p->dev,
+ p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
session->viewport_state_changed = true;
@@ -842,8 +804,8 @@ gen6_pipeline_state_cc(struct ilo_3d_pipeline *p,
{
/* BLEND_STATE */
if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA)) {
- p->state.BLEND_STATE = gen6_emit_BLEND_STATE(p->dev,
- ilo->blend, &ilo->fb, ilo->dsa, p->cp);
+ p->state.BLEND_STATE = p->gen6_BLEND_STATE(p->dev,
+ ilo->blend, &ilo->fb, &ilo->dsa->alpha, p->cp);
session->cc_state_blend_changed = true;
}
@@ -851,8 +813,8 @@ gen6_pipeline_state_cc(struct ilo_3d_pipeline *p,
/* COLOR_CALC_STATE */
if (DIRTY(DSA) || DIRTY(STENCIL_REF) || DIRTY(BLEND_COLOR)) {
p->state.COLOR_CALC_STATE =
- gen6_emit_COLOR_CALC_STATE(p->dev, &ilo->stencil_ref,
- ilo->dsa->alpha_ref, &ilo->blend_color, p->cp);
+ p->gen6_COLOR_CALC_STATE(p->dev, &ilo->stencil_ref,
+ ilo->dsa->alpha.ref_value, &ilo->blend_color, p->cp);
session->cc_state_cc_changed = true;
}
@@ -860,7 +822,7 @@ gen6_pipeline_state_cc(struct ilo_3d_pipeline *p,
/* DEPTH_STENCIL_STATE */
if (DIRTY(DSA)) {
p->state.DEPTH_STENCIL_STATE =
- gen6_emit_DEPTH_STENCIL_STATE(p->dev, ilo->dsa, p->cp);
+ p->gen6_DEPTH_STENCIL_STATE(p->dev, ilo->dsa, p->cp);
session->cc_state_dsa_changed = true;
}
@@ -874,7 +836,7 @@ gen6_pipeline_state_scissors(struct ilo_3d_pipeline *p,
/* SCISSOR_RECT */
if (DIRTY(SCISSOR) || DIRTY(VIEWPORT)) {
/* there should be as many scissors as there are viewports */
- p->state.SCISSOR_RECT = gen6_emit_SCISSOR_RECT(p->dev,
+ p->state.SCISSOR_RECT = p->gen6_SCISSOR_RECT(p->dev,
&ilo->scissor, ilo->viewport.count, p->cp);
session->scissor_state_changed = true;
@@ -897,31 +859,9 @@ gen6_pipeline_state_surfaces_rt(struct ilo_3d_pipeline *p,
const struct ilo_surface_cso *surface =
(const struct ilo_surface_cso *) fb->state.cbufs[i];
- if (!surface) {
- surface_state[i] =
- gen6_emit_SURFACE_STATE(p->dev, &fb->null_rt, true, p->cp);
- }
- else if (fb->offset_to_layers) {
- struct ilo_view_surface layer;
-
- assert(surface->base.u.tex.first_layer ==
- surface->base.u.tex.last_layer);
-
- ilo_gpe_init_view_surface_for_texture(ilo->dev,
- ilo_texture(surface->base.texture),
- surface->base.format,
- surface->base.u.tex.level, 1,
- surface->base.u.tex.first_layer, 1,
- true, true, &layer);
-
- surface_state[i] =
- gen6_emit_SURFACE_STATE(p->dev, &layer, true, p->cp);
- }
- else {
- assert(surface && surface->is_rt);
- surface_state[i] =
- gen6_emit_SURFACE_STATE(p->dev, &surface->u.rt, true, p->cp);
- }
+ assert(surface && surface->is_rt);
+ surface_state[i] =
+ p->gen6_SURFACE_STATE(p->dev, &surface->u.rt, true, p->cp);
}
/*
@@ -929,8 +869,14 @@ gen6_pipeline_state_surfaces_rt(struct ilo_3d_pipeline *p,
* brw_update_renderbuffer_surfaces() does. I don't know why.
*/
if (i == 0) {
+ struct ilo_view_surface null_surface;
+
+ ilo_gpe_init_view_surface_null(p->dev,
+ fb->state.width, fb->state.height,
+ 1, 0, &null_surface);
+
surface_state[i] =
- gen6_emit_SURFACE_STATE(p->dev, &fb->null_rt, true, p->cp);
+ p->gen6_SURFACE_STATE(p->dev, &null_surface, true, p->cp);
i++;
}
@@ -969,7 +915,7 @@ gen6_pipeline_state_surfaces_so(struct ilo_3d_pipeline *p,
(target < so->count) ? so->states[target] : NULL;
if (so_target) {
- surface_state[i] = gen6_emit_so_SURFACE_STATE(p->dev,
+ surface_state[i] = p->gen6_so_SURFACE_STATE(p->dev,
so_target, so_info, i, p->cp);
}
else {
@@ -1035,7 +981,7 @@ gen6_pipeline_state_surfaces_view(struct ilo_3d_pipeline *p,
(const struct ilo_view_cso *) view->states[i];
surface_state[i] =
- gen6_emit_SURFACE_STATE(p->dev, &cso->surface, false, p->cp);
+ p->gen6_SURFACE_STATE(p->dev, &cso->surface, false, p->cp);
}
else {
surface_state[i] = 0;
@@ -1056,39 +1002,45 @@ gen6_pipeline_state_surfaces_const(struct ilo_3d_pipeline *p,
{
const struct ilo_cbuf_state *cbuf = &ilo->cbuf[shader_type];
uint32_t *surface_state;
- bool *binding_table_changed;
int offset, count, i;
-
- if (!DIRTY(CBUF))
- return;
+ bool skip = false;
/* SURFACE_STATEs for constant buffers */
switch (shader_type) {
case PIPE_SHADER_VERTEX:
- offset = ILO_VS_CONST_SURFACE(0);
- surface_state = &p->state.vs.SURFACE_STATE[offset];
- binding_table_changed = &session->binding_table_vs_changed;
+ if (DIRTY(CBUF)) {
+ offset = ILO_VS_CONST_SURFACE(0);
+ surface_state = &p->state.vs.SURFACE_STATE[offset];
+
+ session->binding_table_vs_changed = true;
+ }
+ else {
+ skip = true;
+ }
break;
case PIPE_SHADER_FRAGMENT:
- offset = ILO_WM_CONST_SURFACE(0);
- surface_state = &p->state.wm.SURFACE_STATE[offset];
- binding_table_changed = &session->binding_table_fs_changed;
+ if (DIRTY(CBUF)) {
+ offset = ILO_WM_CONST_SURFACE(0);
+ surface_state = &p->state.wm.SURFACE_STATE[offset];
+
+ session->binding_table_fs_changed = true;
+ }
+ else {
+ skip = true;
+ }
break;
default:
- return;
+ skip = true;
break;
}
- /* constants are pushed via PCB */
- if (cbuf->enabled_mask == 0x1 && !cbuf->cso[0].resource) {
- memset(surface_state, 0, ILO_MAX_CONST_BUFFERS * 4);
+ if (skip)
return;
- }
count = util_last_bit(cbuf->enabled_mask);
for (i = 0; i < count; i++) {
if (cbuf->cso[i].resource) {
- surface_state[i] = gen6_emit_SURFACE_STATE(p->dev,
+ surface_state[i] = p->gen6_SURFACE_STATE(p->dev,
&cbuf->cso[i].surface, false, p->cp);
}
else {
@@ -1100,8 +1052,6 @@ gen6_pipeline_state_surfaces_const(struct ilo_3d_pipeline *p,
if (count && session->num_surfaces[shader_type] < offset + count)
session->num_surfaces[shader_type] = offset + count;
-
- *binding_table_changed = true;
}
static void
@@ -1158,7 +1108,7 @@ gen6_pipeline_state_binding_tables(struct ilo_3d_pipeline *p,
if (size < session->num_surfaces[shader_type])
size = session->num_surfaces[shader_type];
- *binding_table_state = gen6_emit_BINDING_TABLE_STATE(p->dev,
+ *binding_table_state = p->gen6_BINDING_TABLE_STATE(p->dev,
surface_state, size, p->cp);
*binding_table_state_size = size;
}
@@ -1222,13 +1172,13 @@ gen6_pipeline_state_samplers(struct ilo_3d_pipeline *p,
for (i = 0; i < num_samplers; i++) {
border_color_state[i] = (samplers[i]) ?
- gen6_emit_SAMPLER_BORDER_COLOR_STATE(p->dev,
+ p->gen6_SAMPLER_BORDER_COLOR_STATE(p->dev,
samplers[i], p->cp) : 0;
}
}
/* should we take the minimum of num_samplers and num_views? */
- *sampler_state = gen6_emit_SAMPLER_STATE(p->dev,
+ *sampler_state = p->gen6_SAMPLER_STATE(p->dev,
samplers, views,
border_color_state,
MIN2(num_samplers, num_views), p->cp);
@@ -1240,83 +1190,27 @@ gen6_pipeline_state_pcb(struct ilo_3d_pipeline *p,
struct gen6_pipeline_session *session)
{
/* push constant buffer for VS */
- if (DIRTY(VS) || DIRTY(CBUF) || DIRTY(CLIP)) {
- const int cbuf0_size = (ilo->vs) ?
- ilo_shader_get_kernel_param(ilo->vs,
- ILO_KERNEL_PCB_CBUF0_SIZE) : 0;
+ if (DIRTY(VS) || DIRTY(CLIP)) {
const int clip_state_size = (ilo->vs) ?
ilo_shader_get_kernel_param(ilo->vs,
ILO_KERNEL_VS_PCB_UCP_SIZE) : 0;
- const int total_size = cbuf0_size + clip_state_size;
- if (total_size) {
+ if (clip_state_size) {
void *pcb;
+ p->state.vs.PUSH_CONSTANT_BUFFER_size = clip_state_size;
p->state.vs.PUSH_CONSTANT_BUFFER =
- gen6_emit_push_constant_buffer(p->dev, total_size, &pcb, p->cp);
- p->state.vs.PUSH_CONSTANT_BUFFER_size = total_size;
-
- if (cbuf0_size) {
- const struct ilo_cbuf_state *cbuf =
- &ilo->cbuf[PIPE_SHADER_VERTEX];
-
- if (cbuf0_size <= cbuf->cso[0].user_buffer_size) {
- memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size);
- }
- else {
- memcpy(pcb, cbuf->cso[0].user_buffer,
- cbuf->cso[0].user_buffer_size);
- memset(pcb + cbuf->cso[0].user_buffer_size, 0,
- cbuf0_size - cbuf->cso[0].user_buffer_size);
- }
-
- pcb += cbuf0_size;
- }
-
- if (clip_state_size)
- memcpy(pcb, &ilo->clip, clip_state_size);
+ p->gen6_push_constant_buffer(p->dev,
+ p->state.vs.PUSH_CONSTANT_BUFFER_size, &pcb, p->cp);
- session->pcb_state_vs_changed = true;
+ memcpy(pcb, &ilo->clip, clip_state_size);
}
- else if (p->state.vs.PUSH_CONSTANT_BUFFER_size) {
- p->state.vs.PUSH_CONSTANT_BUFFER = 0;
+ else {
p->state.vs.PUSH_CONSTANT_BUFFER_size = 0;
-
- session->pcb_state_vs_changed = true;
- }
- }
-
- /* push constant buffer for FS */
- if (DIRTY(FS) || DIRTY(CBUF)) {
- const int cbuf0_size = (ilo->fs) ?
- ilo_shader_get_kernel_param(ilo->fs, ILO_KERNEL_PCB_CBUF0_SIZE) : 0;
-
- if (cbuf0_size) {
- const struct ilo_cbuf_state *cbuf = &ilo->cbuf[PIPE_SHADER_FRAGMENT];
- void *pcb;
-
- p->state.wm.PUSH_CONSTANT_BUFFER =
- gen6_emit_push_constant_buffer(p->dev, cbuf0_size, &pcb, p->cp);
- p->state.wm.PUSH_CONSTANT_BUFFER_size = cbuf0_size;
-
- if (cbuf0_size <= cbuf->cso[0].user_buffer_size) {
- memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size);
- }
- else {
- memcpy(pcb, cbuf->cso[0].user_buffer,
- cbuf->cso[0].user_buffer_size);
- memset(pcb + cbuf->cso[0].user_buffer_size, 0,
- cbuf0_size - cbuf->cso[0].user_buffer_size);
- }
-
- session->pcb_state_fs_changed = true;
+ p->state.vs.PUSH_CONSTANT_BUFFER = 0;
}
- else if (p->state.wm.PUSH_CONSTANT_BUFFER_size) {
- p->state.wm.PUSH_CONSTANT_BUFFER = 0;
- p->state.wm.PUSH_CONSTANT_BUFFER_size = 0;
- session->pcb_state_fs_changed = true;
- }
+ session->pcb_state_vs_changed = true;
}
}
@@ -1486,14 +1380,14 @@ ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p)
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
- GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
- GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- GEN6_PIPE_CONTROL_WRITE_NONE |
- GEN6_PIPE_CONTROL_CS_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_INSTRUCTION_FLUSH |
+ PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ PIPE_CONTROL_TC_FLUSH |
+ PIPE_CONTROL_NO_WRITE |
+ PIPE_CONTROL_CS_STALL,
0, 0, false, p->cp);
}
@@ -1504,9 +1398,9 @@ ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, true);
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_WRITE_TIMESTAMP,
- bo, index * sizeof(uint64_t),
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_WRITE_TIMESTAMP,
+ bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE,
true, p->cp);
}
@@ -1517,379 +1411,156 @@ ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p,
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_STALL |
- GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT,
- bo, index * sizeof(uint64_t),
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT,
+ bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE,
true, p->cp);
}
-void
-ilo_3d_pipeline_emit_write_statistics_gen6(struct ilo_3d_pipeline *p,
- struct intel_bo *bo, int index)
-{
- uint32_t regs[] = {
- GEN6_REG_IA_VERTICES_COUNT,
- GEN6_REG_IA_PRIMITIVES_COUNT,
- GEN6_REG_VS_INVOCATION_COUNT,
- GEN6_REG_GS_INVOCATION_COUNT,
- GEN6_REG_GS_PRIMITIVES_COUNT,
- GEN6_REG_CL_INVOCATION_COUNT,
- GEN6_REG_CL_PRIMITIVES_COUNT,
- GEN6_REG_PS_INVOCATION_COUNT,
- p->dev->gen >= ILO_GEN(7) ? GEN6_REG_HS_INVOCATION_COUNT : 0,
- p->dev->gen >= ILO_GEN(7) ? GEN6_REG_DS_INVOCATION_COUNT : 0,
- 0,
- };
- int i;
-
- p->emit_flush(p);
-
- for (i = 0; i < Elements(regs); i++) {
- const uint32_t bo_offset = (index + i) * sizeof(uint64_t);
-
- if (regs[i]) {
- /* store lower 32 bits */
- gen6_emit_MI_STORE_REGISTER_MEM(p->dev,
- bo, bo_offset, regs[i], p->cp);
- /* store higher 32 bits */
- gen6_emit_MI_STORE_REGISTER_MEM(p->dev,
- bo, bo_offset + 4, regs[i] + 4, p->cp);
- }
- else {
- gen6_emit_MI_STORE_DATA_IMM(p->dev,
- bo, bo_offset, 0, true, p->cp);
- }
- }
-}
-
-static void
-gen6_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- gen6_emit_3DSTATE_CONSTANT_VS(p->dev, NULL, NULL, 0, p->cp);
- gen6_emit_3DSTATE_VS(p->dev, NULL, 0, p->cp);
-
- gen6_wa_pipe_control_vs_const_flush(p);
-
- gen6_emit_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp);
- gen6_emit_3DSTATE_GS(p->dev, NULL, NULL, 0, p->cp);
-
- gen6_emit_3DSTATE_CLIP(p->dev, NULL, NULL, false, 0, p->cp);
- gen6_emit_3DSTATE_SF(p->dev, NULL, NULL, p->cp);
-}
-
-static void
-gen6_rectlist_wm(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- uint32_t hiz_op;
-
- switch (blitter->op) {
- case ILO_BLITTER_RECTLIST_CLEAR_ZS:
- hiz_op = GEN6_WM_DW4_DEPTH_CLEAR;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_Z:
- hiz_op = GEN6_WM_DW4_DEPTH_RESOLVE;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
- hiz_op = GEN6_WM_DW4_HIZ_RESOLVE;
- break;
- default:
- hiz_op = 0;
- break;
- }
-
- gen6_emit_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp);
-
- gen6_wa_pipe_control_wm_max_threads_stall(p);
- gen6_emit_3DSTATE_WM(p->dev, NULL, 0, NULL, false, false, hiz_op, p->cp);
-}
-
-static void
-gen6_rectlist_wm_depth(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- gen6_wa_pipe_control_wm_depth_flush(p);
-
- if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
- ILO_BLITTER_USE_FB_STENCIL)) {
- gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev,
- &blitter->fb.dst.u.zs, p->cp);
- }
-
- if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
- gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev,
- &blitter->fb.dst.u.zs, p->cp);
- }
-
- if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) {
- gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev,
- &blitter->fb.dst.u.zs, p->cp);
- }
-
- gen6_emit_3DSTATE_CLEAR_PARAMS(p->dev,
- blitter->depth_clear_value, p->cp);
-}
-
-static void
-gen6_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- const uint32_t *packed_sample_pos = (blitter->fb.num_samples > 1) ?
- &p->packed_sample_position_4x : &p->packed_sample_position_1x;
-
- gen6_wa_pipe_control_wm_multisample_flush(p);
-
- gen6_emit_3DSTATE_MULTISAMPLE(p->dev, blitter->fb.num_samples,
- packed_sample_pos, true, p->cp);
-
- gen6_emit_3DSTATE_SAMPLE_MASK(p->dev,
- (1 << blitter->fb.num_samples) - 1, p->cp);
-}
-
-static void
-gen6_rectlist_commands(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- gen6_wa_pipe_control_post_sync(p, false);
-
- gen6_rectlist_wm_multisample(p, blitter, session);
-
- gen6_emit_STATE_BASE_ADDRESS(p->dev,
- NULL, /* General State Base */
- p->cp->bo, /* Surface State Base */
- p->cp->bo, /* Dynamic State Base */
- NULL, /* Indirect Object Base */
- NULL, /* Instruction Base */
- 0, 0, 0, 0, p->cp);
-
- gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev,
- &blitter->ve, &blitter->vb, p->cp);
-
- gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev,
- &blitter->ve, false, false, p->cp);
-
- gen6_emit_3DSTATE_URB(p->dev,
- p->dev->urb_size, 0, blitter->ve.count * 4 * sizeof(float), 0, p->cp);
- /* 3DSTATE_URB workaround */
- if (p->state.gs.active) {
- ilo_3d_pipeline_emit_flush_gen6(p);
- p->state.gs.active = false;
- }
-
- if (blitter->uses &
- (ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_CC)) {
- gen6_emit_3DSTATE_CC_STATE_POINTERS(p->dev, 0,
- session->DEPTH_STENCIL_STATE, session->COLOR_CALC_STATE, p->cp);
- }
-
- gen6_rectlist_vs_to_sf(p, blitter, session);
- gen6_rectlist_wm(p, blitter, session);
-
- if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
- gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev,
- 0, 0, session->CC_VIEWPORT, p->cp);
- }
-
- gen6_rectlist_wm_depth(p, blitter, session);
-
- gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0,
- blitter->fb.width, blitter->fb.height, p->cp);
-
- gen6_emit_3DPRIMITIVE(p->dev, &blitter->draw, NULL, true, p->cp);
-}
-
-static void
-gen6_rectlist_states(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- if (blitter->uses & ILO_BLITTER_USE_DSA) {
- session->DEPTH_STENCIL_STATE =
- gen6_emit_DEPTH_STENCIL_STATE(p->dev, &blitter->dsa, p->cp);
- }
-
- if (blitter->uses & ILO_BLITTER_USE_CC) {
- session->COLOR_CALC_STATE =
- gen6_emit_COLOR_CALC_STATE(p->dev, &blitter->cc.stencil_ref,
- blitter->cc.alpha_ref, &blitter->cc.blend_color, p->cp);
- }
-
- if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
- session->CC_VIEWPORT =
- gen6_emit_CC_VIEWPORT(p->dev, &blitter->viewport, 1, p->cp);
- }
-}
-
-static void
-ilo_3d_pipeline_emit_rectlist_gen6(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter)
-{
- struct gen6_rectlist_session session;
-
- memset(&session, 0, sizeof(session));
- gen6_rectlist_states(p, blitter, &session);
- gen6_rectlist_commands(p, blitter, &session);
-}
-
static int
-gen6_pipeline_max_command_size(const struct ilo_3d_pipeline *p)
+gen6_pipeline_estimate_commands(const struct ilo_3d_pipeline *p,
+ const struct ilo_gpe_gen6 *gen6,
+ const struct ilo_context *ilo)
{
static int size;
+ enum ilo_gpe_gen6_command cmd;
+
+ if (size)
+ return size;
+
+ for (cmd = 0; cmd < ILO_GPE_GEN6_COMMAND_COUNT; cmd++) {
+ int count;
+
+ switch (cmd) {
+ case ILO_GPE_GEN6_PIPE_CONTROL:
+ /* for the workaround */
+ count = 2;
+ /* another one after 3DSTATE_URB */
+ count += 1;
+ /* and another one after 3DSTATE_CONSTANT_VS */
+ count += 1;
+ break;
+ case ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX:
+ /* there are 4 SVBIs */
+ count = 4;
+ break;
+ case ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS:
+ count = 33;
+ break;
+ case ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS:
+ count = 34;
+ break;
+ case ILO_GPE_GEN6_MEDIA_VFE_STATE:
+ case ILO_GPE_GEN6_MEDIA_CURBE_LOAD:
+ case ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD:
+ case ILO_GPE_GEN6_MEDIA_GATEWAY_STATE:
+ case ILO_GPE_GEN6_MEDIA_STATE_FLUSH:
+ case ILO_GPE_GEN6_MEDIA_OBJECT_WALKER:
+ /* media commands */
+ count = 0;
+ break;
+ default:
+ count = 1;
+ break;
+ }
- if (!size) {
- size += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 3;
- size += GEN6_3DSTATE_GS_SVB_INDEX__SIZE * 4;
- size += GEN6_PIPE_CONTROL__SIZE * 5;
-
- size +=
- GEN6_STATE_BASE_ADDRESS__SIZE +
- GEN6_STATE_SIP__SIZE +
- GEN6_3DSTATE_VF_STATISTICS__SIZE +
- GEN6_PIPELINE_SELECT__SIZE +
- GEN6_3DSTATE_BINDING_TABLE_POINTERS__SIZE +
- GEN6_3DSTATE_SAMPLER_STATE_POINTERS__SIZE +
- GEN6_3DSTATE_URB__SIZE +
- GEN6_3DSTATE_VERTEX_BUFFERS__SIZE +
- GEN6_3DSTATE_VERTEX_ELEMENTS__SIZE +
- GEN6_3DSTATE_INDEX_BUFFER__SIZE +
- GEN6_3DSTATE_VIEWPORT_STATE_POINTERS__SIZE +
- GEN6_3DSTATE_CC_STATE_POINTERS__SIZE +
- GEN6_3DSTATE_SCISSOR_STATE_POINTERS__SIZE +
- GEN6_3DSTATE_VS__SIZE +
- GEN6_3DSTATE_GS__SIZE +
- GEN6_3DSTATE_CLIP__SIZE +
- GEN6_3DSTATE_SF__SIZE +
- GEN6_3DSTATE_WM__SIZE +
- GEN6_3DSTATE_SAMPLE_MASK__SIZE +
- GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE +
- GEN6_3DSTATE_DEPTH_BUFFER__SIZE +
- GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE +
- GEN6_3DSTATE_POLY_STIPPLE_PATTERN__SIZE +
- GEN6_3DSTATE_LINE_STIPPLE__SIZE +
- GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE +
- GEN6_3DSTATE_MULTISAMPLE__SIZE +
- GEN6_3DSTATE_STENCIL_BUFFER__SIZE +
- GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE +
- GEN6_3DSTATE_CLEAR_PARAMS__SIZE +
- GEN6_3DPRIMITIVE__SIZE;
+ if (count)
+ size += gen6->estimate_command_size(p->dev, cmd, count);
}
return size;
}
-int
-gen6_pipeline_estimate_state_size(const struct ilo_3d_pipeline *p,
- const struct ilo_context *ilo)
+static int
+gen6_pipeline_estimate_states(const struct ilo_3d_pipeline *p,
+ const struct ilo_gpe_gen6 *gen6,
+ const struct ilo_context *ilo)
{
static int static_size;
- int sh_type, size;
+ int shader_type, count, size;
if (!static_size) {
- /* 64 bytes, or 16 dwords */
- const int alignment = 64 / 4;
-
- /* pad first */
- size = alignment - 1;
-
- /* CC states */
- size += align(GEN6_BLEND_STATE__SIZE * ILO_MAX_DRAW_BUFFERS, alignment);
- size += align(GEN6_DEPTH_STENCIL_STATE__SIZE, alignment);
- size += align(GEN6_COLOR_CALC_STATE__SIZE, alignment);
-
- /* viewport arrays */
- if (p->dev->gen >= ILO_GEN(7)) {
- size +=
- align(GEN7_SF_CLIP_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 16) +
- align(GEN6_CC_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 8) +
- align(GEN6_SCISSOR_RECT__SIZE * ILO_MAX_VIEWPORTS, 8);
- }
- else {
- size +=
- align(GEN6_SF_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 8) +
- align(GEN6_CLIP_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 8) +
- align(GEN6_CC_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 8) +
- align(GEN6_SCISSOR_RECT__SIZE * ILO_MAX_VIEWPORTS, 8);
- }
+ struct {
+ enum ilo_gpe_gen6_state state;
+ int count;
+ } static_states[] = {
+ /* viewports */
+ { ILO_GPE_GEN6_SF_VIEWPORT, 1 },
+ { ILO_GPE_GEN6_CLIP_VIEWPORT, 1 },
+ { ILO_GPE_GEN6_CC_VIEWPORT, 1 },
+ /* cc */
+ { ILO_GPE_GEN6_COLOR_CALC_STATE, 1 },
+ { ILO_GPE_GEN6_BLEND_STATE, ILO_MAX_DRAW_BUFFERS },
+ { ILO_GPE_GEN6_DEPTH_STENCIL_STATE, 1 },
+ /* scissors */
+ { ILO_GPE_GEN6_SCISSOR_RECT, 1 },
+ /* binding table (vs, gs, fs) */
+ { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES },
+ { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES },
+ { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES },
+ };
+ int i;
- static_size = size;
+ for (i = 0; i < Elements(static_states); i++) {
+ static_size += gen6->estimate_state_size(p->dev,
+ static_states[i].state,
+ static_states[i].count);
+ }
}
size = static_size;
- for (sh_type = 0; sh_type < PIPE_SHADER_TYPES; sh_type++) {
- const int alignment = 32 / 4;
- int num_samplers, num_surfaces, pcb_size;
-
- /* samplers */
- num_samplers = ilo->sampler[sh_type].count;
-
- /* sampler views and constant buffers */
- num_surfaces = ilo->view[sh_type].count +
- util_bitcount(ilo->cbuf[sh_type].enabled_mask);
-
- pcb_size = 0;
+ /*
+ * render targets (fs)
+ * stream outputs (gs)
+ * sampler views (vs, fs)
+ * constant buffers (vs, fs)
+ */
+ count = ilo->fb.state.nr_cbufs;
- switch (sh_type) {
- case PIPE_SHADER_VERTEX:
- if (ilo->vs) {
- if (p->dev->gen == ILO_GEN(6)) {
- const struct pipe_stream_output_info *so_info =
- ilo_shader_get_kernel_so_info(ilo->vs);
+ if (ilo->gs) {
+ const struct pipe_stream_output_info *so_info =
+ ilo_shader_get_kernel_so_info(ilo->gs);
- /* stream outputs */
- num_surfaces += so_info->num_outputs;
- }
+ count += so_info->num_outputs;
+ }
+ else if (ilo->vs) {
+ const struct pipe_stream_output_info *so_info =
+ ilo_shader_get_kernel_so_info(ilo->vs);
- pcb_size = ilo_shader_get_kernel_param(ilo->vs,
- ILO_KERNEL_PCB_CBUF0_SIZE);
- pcb_size += ilo_shader_get_kernel_param(ilo->vs,
- ILO_KERNEL_VS_PCB_UCP_SIZE);
- }
- break;
- case PIPE_SHADER_GEOMETRY:
- if (ilo->gs && p->dev->gen == ILO_GEN(6)) {
- const struct pipe_stream_output_info *so_info =
- ilo_shader_get_kernel_so_info(ilo->gs);
+ count += so_info->num_outputs;
+ }
- /* stream outputs */
- num_surfaces += so_info->num_outputs;
- }
- break;
- case PIPE_SHADER_FRAGMENT:
- /* render targets */
- num_surfaces += ilo->fb.state.nr_cbufs;
+ for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
+ count += ilo->view[shader_type].count;
+ count += util_bitcount(ilo->cbuf[shader_type].enabled_mask);
+ }
- if (ilo->fs) {
- pcb_size = ilo_shader_get_kernel_param(ilo->fs,
- ILO_KERNEL_PCB_CBUF0_SIZE);
- }
- break;
- default:
- break;
- }
+ if (count) {
+ size += gen6->estimate_state_size(p->dev,
+ ILO_GPE_GEN6_SURFACE_STATE, count);
+ }
- /* SAMPLER_STATE array and SAMPLER_BORDER_COLORs */
- if (num_samplers) {
- size += align(GEN6_SAMPLER_STATE__SIZE * num_samplers, alignment) +
- align(GEN6_SAMPLER_BORDER_COLOR__SIZE, alignment) * num_samplers;
+ /* samplers (vs, fs) */
+ for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
+ count = ilo->sampler[shader_type].count;
+ if (count) {
+ size += gen6->estimate_state_size(p->dev,
+ ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE, count);
+ size += gen6->estimate_state_size(p->dev,
+ ILO_GPE_GEN6_SAMPLER_STATE, count);
}
+ }
- /* BINDING_TABLE_STATE and SURFACE_STATEs */
- if (num_surfaces) {
- size += align(num_surfaces, alignment) +
- align(GEN6_SURFACE_STATE__SIZE, alignment) * num_surfaces;
- }
+ /* pcb (vs) */
+ if (ilo->vs &&
+ ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)) {
+ const int pcb_size =
+ ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE);
- /* PCB */
- if (pcb_size)
- size += align(pcb_size, alignment);
+ size += gen6->estimate_state_size(p->dev,
+ ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER, pcb_size);
}
return size;
@@ -1900,6 +1571,7 @@ ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p,
enum ilo_3d_pipeline_action action,
const void *arg)
{
+ const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
int size;
switch (action) {
@@ -1907,31 +1579,21 @@ ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p,
{
const struct ilo_context *ilo = arg;
- size = gen6_pipeline_max_command_size(p) +
- gen6_pipeline_estimate_state_size(p, ilo);
+ size = gen6_pipeline_estimate_commands(p, gen6, ilo) +
+ gen6_pipeline_estimate_states(p, gen6, ilo);
}
break;
case ILO_3D_PIPELINE_FLUSH:
- size = GEN6_PIPE_CONTROL__SIZE * 3;
+ size = gen6->estimate_command_size(p->dev,
+ ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3;
break;
case ILO_3D_PIPELINE_WRITE_TIMESTAMP:
- size = GEN6_PIPE_CONTROL__SIZE * 2;
+ size = gen6->estimate_command_size(p->dev,
+ ILO_GPE_GEN6_PIPE_CONTROL, 1) * 2;
break;
case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT:
- size = GEN6_PIPE_CONTROL__SIZE * 3;
- break;
- case ILO_3D_PIPELINE_WRITE_STATISTICS:
- {
- const int num_regs = 8;
- const int num_pads = 3;
-
- size = GEN6_PIPE_CONTROL__SIZE;
- size += GEN6_MI_STORE_REGISTER_MEM__SIZE * 2 * num_regs;
- size += GEN6_MI_STORE_DATA_IMM__SIZE * num_pads;
- }
- break;
- case ILO_3D_PIPELINE_RECTLIST:
- size = 64 + 256; /* states + commands */
+ size = gen6->estimate_command_size(p->dev,
+ ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3;
break;
default:
assert(!"unknown 3D pipeline action");
@@ -1945,11 +1607,64 @@ ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p,
void
ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p)
{
+ const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
+
p->estimate_size = ilo_3d_pipeline_estimate_size_gen6;
p->emit_draw = ilo_3d_pipeline_emit_draw_gen6;
p->emit_flush = ilo_3d_pipeline_emit_flush_gen6;
p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6;
p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6;
- p->emit_write_statistics = ilo_3d_pipeline_emit_write_statistics_gen6;
- p->emit_rectlist = ilo_3d_pipeline_emit_rectlist_gen6;
+
+#define GEN6_USE(p, name, from) \
+ p->gen6_ ## name = from->emit_ ## name
+ GEN6_USE(p, STATE_BASE_ADDRESS, gen6);
+ GEN6_USE(p, STATE_SIP, gen6);
+ GEN6_USE(p, PIPELINE_SELECT, gen6);
+ GEN6_USE(p, 3DSTATE_BINDING_TABLE_POINTERS, gen6);
+ GEN6_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS, gen6);
+ GEN6_USE(p, 3DSTATE_URB, gen6);
+ GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen6);
+ GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen6);
+ GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen6);
+ GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen6);
+ GEN6_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS, gen6);
+ GEN6_USE(p, 3DSTATE_CC_STATE_POINTERS, gen6);
+ GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
+ GEN6_USE(p, 3DSTATE_VS, gen6);
+ GEN6_USE(p, 3DSTATE_GS, gen6);
+ GEN6_USE(p, 3DSTATE_CLIP, gen6);
+ GEN6_USE(p, 3DSTATE_SF, gen6);
+ GEN6_USE(p, 3DSTATE_WM, gen6);
+ GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen6);
+ GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen6);
+ GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen6);
+ GEN6_USE(p, 3DSTATE_SAMPLE_MASK, gen6);
+ GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen6);
+ GEN6_USE(p, 3DSTATE_DEPTH_BUFFER, gen6);
+ GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
+ GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
+ GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen6);
+ GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen6);
+ GEN6_USE(p, 3DSTATE_GS_SVB_INDEX, gen6);
+ GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen6);
+ GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen6);
+ GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
+ GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen6);
+ GEN6_USE(p, PIPE_CONTROL, gen6);
+ GEN6_USE(p, 3DPRIMITIVE, gen6);
+ GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen6);
+ GEN6_USE(p, SF_VIEWPORT, gen6);
+ GEN6_USE(p, CLIP_VIEWPORT, gen6);
+ GEN6_USE(p, CC_VIEWPORT, gen6);
+ GEN6_USE(p, COLOR_CALC_STATE, gen6);
+ GEN6_USE(p, BLEND_STATE, gen6);
+ GEN6_USE(p, DEPTH_STENCIL_STATE, gen6);
+ GEN6_USE(p, SCISSOR_RECT, gen6);
+ GEN6_USE(p, BINDING_TABLE_STATE, gen6);
+ GEN6_USE(p, SURFACE_STATE, gen6);
+ GEN6_USE(p, so_SURFACE_STATE, gen6);
+ GEN6_USE(p, SAMPLER_STATE, gen6);
+ GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen6);
+ GEN6_USE(p, push_constant_buffer, gen6);
+#undef GEN6_USE
}
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h
index 16e96c07b..a9c4051d2 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h
@@ -73,12 +73,6 @@ struct gen6_pipeline_session {
int num_surfaces[PIPE_SHADER_TYPES];
};
-struct gen6_rectlist_session {
- uint32_t DEPTH_STENCIL_STATE;
- uint32_t COLOR_CALC_STATE;
- uint32_t CC_VIEWPORT;
-};
-
void
gen6_pipeline_prepare(const struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
@@ -120,6 +114,11 @@ gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p,
struct gen6_pipeline_session *session);
void
+gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p,
+ const struct ilo_context *ilo,
+ struct gen6_pipeline_session *session);
+
+void
gen6_pipeline_vs(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
@@ -149,10 +148,6 @@ gen6_pipeline_update_max_svbi(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
-int
-gen6_pipeline_estimate_state_size(const struct ilo_3d_pipeline *p,
- const struct ilo_context *ilo);
-
void
ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p);
@@ -165,10 +160,6 @@ ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index);
void
-ilo_3d_pipeline_emit_write_statistics_gen6(struct ilo_3d_pipeline *p,
- struct intel_bo *bo, int index);
-
-void
ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p);
#endif /* ILO_3D_PIPELINE_GEN6_H */
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
index 557e5a8a2..fc16f80a5 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
@@ -25,10 +25,10 @@
* Chia-I Wu <olv@lunarg.com>
*/
-#include "genhw/genhw.h"
#include "util/u_dual_blend.h"
+#include "intel_reg.h"
-#include "ilo_blitter.h"
+#include "ilo_common.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_gpe_gen7.h"
@@ -44,9 +44,9 @@ gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p,
bool change_depth_state)
{
struct intel_bo *bo = NULL;
- uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
+ uint32_t dw1 = PIPE_CONTROL_CS_STALL;
- assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5));
+ assert(p->dev->gen == ILO_GEN(7));
/* emit once */
if (p->state.has_gen6_wa_pipe_control)
@@ -81,20 +81,20 @@ gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p,
*/
if (change_multisample_state)
- dw1 |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ dw1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
if (change_depth_state) {
- dw1 |= GEN6_PIPE_CONTROL_WRITE_IMM;
+ dw1 |= PIPE_CONTROL_WRITE_IMMEDIATE;
bo = p->workaround_bo;
}
- gen6_emit_PIPE_CONTROL(p->dev, dw1, bo, 0, false, p->cp);
+ p->gen6_PIPE_CONTROL(p->dev, dw1, bo, 0, false, p->cp);
}
static void
gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
{
- assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5));
+ assert(p->dev->gen == ILO_GEN(7));
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 106:
@@ -105,9 +105,9 @@ gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
* needs to be sent before any combination of VS associated 3DSTATE."
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_STALL |
- GEN6_PIPE_CONTROL_WRITE_IMM,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_IMMEDIATE,
p->workaround_bo, 0, false, p->cp);
}
@@ -115,7 +115,7 @@ static void
gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
bool change_depth_buffer)
{
- assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5));
+ assert(p->dev->gen == ILO_GEN(7));
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 276:
@@ -144,36 +144,36 @@ gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
* guarantee that the pipeline from WM onwards is already flushed
* (e.g., via a preceding MI_FLUSH)."
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false, p->cp);
if (!change_depth_buffer)
return;
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH,
NULL, 0, false, p->cp);
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_DEPTH_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false, p->cp);
}
static void
-gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p)
+gen7_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
{
- assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5));
+ assert(p->dev->gen == ILO_GEN(7));
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 286:
*
- * "If this field (Maximum Number of Threads in 3DSTATE_PS) is changed
+ * "If this field (Maximum Number of Threads in 3DSTATE_WM) is changed
* between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
* Pixel Scoreboard set is required to be issued."
*/
- gen6_emit_PIPE_CONTROL(p->dev,
- GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
+ p->gen6_PIPE_CONTROL(p->dev,
+ PIPE_CONTROL_STALL_AT_SCOREBOARD,
NULL, 0, false, p->cp);
}
@@ -188,8 +188,7 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p,
/* 3DSTATE_URB_{VS,GS,HS,DS} */
if (DIRTY(VE) || DIRTY(VS)) {
/* the first 16KB are reserved for VS and PS PCBs */
- const int offset =
- (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384;
+ const int offset = 16 * 1024;
int vs_entry_size, vs_total_size;
vs_entry_size = (ilo->vs) ?
@@ -211,12 +210,12 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p,
gen7_wa_pipe_control_vs_depth_stall(p);
- gen7_emit_3DSTATE_URB_VS(p->dev,
+ p->gen7_3DSTATE_URB_VS(p->dev,
offset, vs_total_size, vs_entry_size, p->cp);
- gen7_emit_3DSTATE_URB_GS(p->dev, offset, 0, 0, p->cp);
- gen7_emit_3DSTATE_URB_HS(p->dev, offset, 0, 0, p->cp);
- gen7_emit_3DSTATE_URB_DS(p->dev, offset, 0, 0, p->cp);
+ p->gen7_3DSTATE_URB_GS(p->dev, offset, 0, 0, p->cp);
+ p->gen7_3DSTATE_URB_HS(p->dev, offset, 0, 0, p->cp);
+ p->gen7_3DSTATE_URB_DS(p->dev, offset, 0, 0, p->cp);
}
}
@@ -228,21 +227,16 @@ gen7_pipeline_common_pcb_alloc(struct ilo_3d_pipeline *p,
/* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
if (session->hw_ctx_changed) {
/*
- * Push constant buffers are only allowed to take up at most the first
- * 16KB of the URB. Split the space evenly for VS and FS.
+ * push constant buffers are only allowed to take up at most the first
+ * 16KB of the URB
*/
- const int max_size =
- (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384;
- const int size = max_size / 2;
- int offset = 0;
+ p->gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev,
+ 0, 8192, p->cp);
- gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev, offset, size, p->cp);
- offset += size;
+ p->gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev,
+ 8192, 8192, p->cp);
- gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev, offset, size, p->cp);
-
- if (p->dev->gen == ILO_GEN(7))
- gen7_wa_pipe_control_cs_stall(p, true, true);
+ gen7_wa_pipe_control_cs_stall(p, true, true);
}
}
@@ -253,10 +247,10 @@ gen7_pipeline_common_pointers_1(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_VIEWPORT_STATE_POINTERS_{CC,SF_CLIP} */
if (session->viewport_state_changed) {
- gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(p->dev,
+ p->gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(p->dev,
p->state.CC_VIEWPORT, p->cp);
- gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(p->dev,
+ p->gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(p->dev,
p->state.SF_CLIP_VIEWPORT, p->cp);
}
}
@@ -268,19 +262,19 @@ gen7_pipeline_common_pointers_2(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_BLEND_STATE_POINTERS */
if (session->cc_state_blend_changed) {
- gen7_emit_3DSTATE_BLEND_STATE_POINTERS(p->dev,
+ p->gen7_3DSTATE_BLEND_STATE_POINTERS(p->dev,
p->state.BLEND_STATE, p->cp);
}
/* 3DSTATE_CC_STATE_POINTERS */
if (session->cc_state_cc_changed) {
- gen7_emit_3DSTATE_CC_STATE_POINTERS(p->dev,
+ p->gen7_3DSTATE_CC_STATE_POINTERS(p->dev,
p->state.COLOR_CALC_STATE, p->cp);
}
/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS */
if (session->cc_state_dsa_changed) {
- gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(p->dev,
+ p->gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(p->dev,
p->state.DEPTH_STENCIL_STATE, p->cp);
}
}
@@ -294,40 +288,26 @@ gen7_pipeline_vs(struct ilo_3d_pipeline *p,
const bool emit_3dstate_sampler_state = session->sampler_state_vs_changed;
/* see gen6_pipeline_vs() */
const bool emit_3dstate_constant_vs = session->pcb_state_vs_changed;
- const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) ||
- session->kernel_bo_changed);
+ const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS));
/* emit depth stall before any of the VS commands */
if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
- emit_3dstate_constant_vs || emit_3dstate_vs)
- gen7_wa_pipe_control_vs_depth_stall(p);
+ emit_3dstate_constant_vs || emit_3dstate_vs)
+ gen7_wa_pipe_control_vs_depth_stall(p);
/* 3DSTATE_BINDING_TABLE_POINTERS_VS */
if (emit_3dstate_binding_table) {
- gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(p->dev,
- p->state.vs.BINDING_TABLE_STATE, p->cp);
+ p->gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(p->dev,
+ p->state.vs.BINDING_TABLE_STATE, p->cp);
}
/* 3DSTATE_SAMPLER_STATE_POINTERS_VS */
if (emit_3dstate_sampler_state) {
- gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(p->dev,
- p->state.vs.SAMPLER_STATE, p->cp);
- }
-
- /* 3DSTATE_CONSTANT_VS */
- if (emit_3dstate_constant_vs) {
- gen7_emit_3DSTATE_CONSTANT_VS(p->dev,
- &p->state.vs.PUSH_CONSTANT_BUFFER,
- &p->state.vs.PUSH_CONSTANT_BUFFER_size,
- 1, p->cp);
+ p->gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(p->dev,
+ p->state.vs.SAMPLER_STATE, p->cp);
}
- /* 3DSTATE_VS */
- if (emit_3dstate_vs) {
- const int num_samplers = ilo->sampler[PIPE_SHADER_VERTEX].count;
-
- gen6_emit_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp);
- }
+ gen6_pipeline_vs(p, ilo, session);
}
static void
@@ -337,13 +317,13 @@ gen7_pipeline_hs(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
if (session->hw_ctx_changed) {
- gen7_emit_3DSTATE_CONSTANT_HS(p->dev, 0, 0, 0, p->cp);
- gen7_emit_3DSTATE_HS(p->dev, NULL, 0, p->cp);
+ p->gen7_3DSTATE_CONSTANT_HS(p->dev, 0, 0, 0, p->cp);
+ p->gen7_3DSTATE_HS(p->dev, NULL, 0, p->cp);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
if (session->hw_ctx_changed)
- gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(p->dev, 0, p->cp);
+ p->gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(p->dev, 0, p->cp);
}
static void
@@ -353,7 +333,7 @@ gen7_pipeline_te(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_TE */
if (session->hw_ctx_changed)
- gen7_emit_3DSTATE_TE(p->dev, p->cp);
+ p->gen7_3DSTATE_TE(p->dev, p->cp);
}
static void
@@ -363,13 +343,13 @@ gen7_pipeline_ds(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
if (session->hw_ctx_changed) {
- gen7_emit_3DSTATE_CONSTANT_DS(p->dev, 0, 0, 0, p->cp);
- gen7_emit_3DSTATE_DS(p->dev, NULL, 0, p->cp);
+ p->gen7_3DSTATE_CONSTANT_DS(p->dev, 0, 0, 0, p->cp);
+ p->gen7_3DSTATE_DS(p->dev, NULL, 0, p->cp);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
if (session->hw_ctx_changed)
- gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(p->dev, 0, p->cp);
+ p->gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(p->dev, 0, p->cp);
}
@@ -380,13 +360,13 @@ gen7_pipeline_gs(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
if (session->hw_ctx_changed) {
- gen7_emit_3DSTATE_CONSTANT_GS(p->dev, 0, 0, 0, p->cp);
- gen7_emit_3DSTATE_GS(p->dev, NULL, 0, p->cp);
+ p->gen6_3DSTATE_CONSTANT_GS(p->dev, 0, 0, 0, p->cp);
+ p->gen7_3DSTATE_GS(p->dev, NULL, 0, p->cp);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
if (session->binding_table_gs_changed) {
- gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(p->dev,
+ p->gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(p->dev,
p->state.gs.BINDING_TABLE_STATE, p->cp);
}
}
@@ -422,17 +402,23 @@ gen7_pipeline_sol(struct ilo_3d_pipeline *p,
const int stride = so_info->stride[i] * 4; /* in bytes */
int base = 0;
- gen7_emit_3DSTATE_SO_BUFFER(p->dev, i, base, stride,
+ /* reset HW write offsets and offset buffer base */
+ if (!p->cp->render_ctx) {
+ ilo_cp_set_one_off_flags(p->cp, INTEL_EXEC_GEN7_SOL_RESET);
+ base += p->state.so_num_vertices * stride;
+ }
+
+ p->gen7_3DSTATE_SO_BUFFER(p->dev, i, base, stride,
ilo->so.states[i], p->cp);
}
for (; i < 4; i++)
- gen7_emit_3DSTATE_SO_BUFFER(p->dev, i, 0, 0, NULL, p->cp);
+ p->gen7_3DSTATE_SO_BUFFER(p->dev, i, 0, 0, NULL, p->cp);
}
/* 3DSTATE_SO_DECL_LIST */
if (dirty_sh && ilo->so.enabled)
- gen7_emit_3DSTATE_SO_DECL_LIST(p->dev, so_info, p->cp);
+ p->gen7_3DSTATE_SO_DECL_LIST(p->dev, so_info, p->cp);
/* 3DSTATE_STREAMOUT */
if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
@@ -440,7 +426,7 @@ gen7_pipeline_sol(struct ilo_3d_pipeline *p,
const int output_count = ilo_shader_get_kernel_param(shader,
ILO_KERNEL_OUTPUT_COUNT);
- gen7_emit_3DSTATE_STREAMOUT(p->dev, buffer_mask, output_count,
+ p->gen7_3DSTATE_STREAMOUT(p->dev, buffer_mask, output_count,
ilo->rasterizer->state.rasterizer_discard, p->cp);
}
}
@@ -451,16 +437,15 @@ gen7_pipeline_sf(struct ilo_3d_pipeline *p,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_SBE */
- if (DIRTY(RASTERIZER) || DIRTY(FS))
- gen7_emit_3DSTATE_SBE(p->dev, ilo->rasterizer, ilo->fs, ilo->cp);
+ if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) {
+ p->gen7_3DSTATE_SBE(p->dev, ilo->rasterizer, ilo->fs,
+ (ilo->gs) ? ilo->gs : ilo->vs, ilo->cp);
+ }
/* 3DSTATE_SF */
if (DIRTY(RASTERIZER) || DIRTY(FB)) {
- struct pipe_surface *zs = ilo->fb.state.zsbuf;
-
gen7_wa_pipe_control_cs_stall(p, true, true);
- gen7_emit_3DSTATE_SF(p->dev, ilo->rasterizer,
- (zs) ? zs->format : PIPE_FORMAT_NONE, p->cp);
+ p->gen7_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fb.state.zsbuf, p->cp);
}
}
@@ -471,32 +456,31 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
{
/* 3DSTATE_WM */
if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) {
- const bool cc_may_kill = (ilo->dsa->dw_alpha ||
+ const bool cc_may_kill = (ilo->dsa->alpha.enabled ||
ilo->blend->alpha_to_coverage);
- gen7_emit_3DSTATE_WM(p->dev, ilo->fs,
- ilo->rasterizer, cc_may_kill, 0, p->cp);
+ if (p->dev->gen == ILO_GEN(7) && session->hw_ctx_changed)
+ gen7_wa_pipe_control_wm_max_threads_stall(p);
+
+ p->gen7_3DSTATE_WM(p->dev, ilo->fs,
+ ilo->rasterizer, cc_may_kill, p->cp);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_PS */
if (session->binding_table_fs_changed) {
- gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(p->dev,
+ p->gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(p->dev,
p->state.wm.BINDING_TABLE_STATE, p->cp);
}
/* 3DSTATE_SAMPLER_STATE_POINTERS_PS */
if (session->sampler_state_fs_changed) {
- gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(p->dev,
+ p->gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(p->dev,
p->state.wm.SAMPLER_STATE, p->cp);
}
/* 3DSTATE_CONSTANT_PS */
- if (session->pcb_state_fs_changed) {
- gen7_emit_3DSTATE_CONSTANT_PS(p->dev,
- &p->state.wm.PUSH_CONSTANT_BUFFER,
- &p->state.wm.PUSH_CONSTANT_BUFFER_size,
- 1, p->cp);
- }
+ if (session->pcb_state_fs_changed)
+ p->gen6_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp);
/* 3DSTATE_PS */
if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) ||
@@ -504,16 +488,12 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
const int num_samplers = ilo->sampler[PIPE_SHADER_FRAGMENT].count;
const bool dual_blend = ilo->blend->dual_blend;
- if ((p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)) &&
- session->hw_ctx_changed)
- gen7_wa_pipe_control_ps_max_threads_stall(p);
-
- gen7_emit_3DSTATE_PS(p->dev, ilo->fs, num_samplers, dual_blend, p->cp);
+ p->gen7_3DSTATE_PS(p->dev, ilo->fs, num_samplers, dual_blend, p->cp);
}
/* 3DSTATE_SCISSOR_STATE_POINTERS */
if (session->scissor_state_changed) {
- gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(p->dev,
+ p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(p->dev,
p->state.SCISSOR_RECT, p->cp);
}
@@ -539,28 +519,24 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
if (DIRTY(FB) || session->batch_bo_changed) {
const struct ilo_zs_surface *zs;
- uint32_t clear_params;
if (ilo->fb.state.zsbuf) {
const struct ilo_surface_cso *surface =
(const struct ilo_surface_cso *) ilo->fb.state.zsbuf;
- const struct ilo_texture_slice *slice =
- ilo_texture_get_slice(ilo_texture(surface->base.texture),
- surface->base.u.tex.level, surface->base.u.tex.first_layer);
assert(!surface->is_rt);
zs = &surface->u.zs;
- clear_params = slice->clear_value;
}
else {
zs = &ilo->fb.null_zs;
- clear_params = 0;
}
- gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp);
- gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp);
- gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp);
- gen7_emit_3DSTATE_CLEAR_PARAMS(p->dev, clear_params, p->cp);
+ p->gen7_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp);
+ p->gen6_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp);
+ p->gen6_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp);
+
+ /* TODO */
+ p->gen6_3DSTATE_CLEAR_PARAMS(p->dev, 0, p->cp);
}
}
@@ -580,27 +556,17 @@ gen7_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
(ilo->fb.num_samples > 1) ? &p->packed_sample_position_4x :
&p->packed_sample_position_1x;
- gen6_emit_3DSTATE_MULTISAMPLE(p->dev,
+ p->gen6_3DSTATE_MULTISAMPLE(p->dev,
ilo->fb.num_samples, packed_sample_pos,
ilo->rasterizer->state.half_pixel_center, p->cp);
- gen7_emit_3DSTATE_SAMPLE_MASK(p->dev,
+ p->gen7_3DSTATE_SAMPLE_MASK(p->dev,
(ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1,
ilo->fb.num_samples, p->cp);
}
}
static void
-gen7_pipeline_vf_draw(struct ilo_3d_pipeline *p,
- const struct ilo_context *ilo,
- struct gen6_pipeline_session *session)
-{
- /* 3DPRIMITIVE */
- gen7_emit_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp);
- p->state.has_gen6_wa_pipe_control = false;
-}
-
-static void
gen7_pipeline_commands(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
@@ -631,7 +597,7 @@ gen7_pipeline_commands(struct ilo_3d_pipeline *p,
gen6_pipeline_wm_raster(p, ilo, session);
gen6_pipeline_sf_rect(p, ilo, session);
gen6_pipeline_vf(p, ilo, session);
- gen7_pipeline_vf_draw(p, ilo, session);
+ gen6_pipeline_vf_draw(p, ilo, session);
}
static void
@@ -649,283 +615,130 @@ ilo_3d_pipeline_emit_draw_gen7(struct ilo_3d_pipeline *p,
gen6_pipeline_end(p, ilo, &session);
}
-static void
-gen7_rectlist_pcb_alloc(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- /*
- * Push constant buffers are only allowed to take up at most the first
- * 16KB of the URB. Split the space evenly for VS and FS.
- */
- const int max_size =
- (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384;
- const int size = max_size / 2;
- int offset = 0;
-
- gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev, offset, size, p->cp);
- offset += size;
-
- gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev, offset, size, p->cp);
-
- gen7_wa_pipe_control_cs_stall(p, true, true);
-}
-
-static void
-gen7_rectlist_urb(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- /* the first 16KB are reserved for VS and PS PCBs */
- const int offset =
- (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384;
-
- gen7_emit_3DSTATE_URB_VS(p->dev, offset, p->dev->urb_size - offset,
- blitter->ve.count * 4 * sizeof(float), p->cp);
-
- gen7_emit_3DSTATE_URB_GS(p->dev, offset, 0, 0, p->cp);
- gen7_emit_3DSTATE_URB_HS(p->dev, offset, 0, 0, p->cp);
- gen7_emit_3DSTATE_URB_DS(p->dev, offset, 0, 0, p->cp);
-}
-
-static void
-gen7_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- gen7_emit_3DSTATE_CONSTANT_VS(p->dev, NULL, NULL, 0, p->cp);
- gen6_emit_3DSTATE_VS(p->dev, NULL, 0, p->cp);
-
- gen7_emit_3DSTATE_CONSTANT_HS(p->dev, NULL, NULL, 0, p->cp);
- gen7_emit_3DSTATE_HS(p->dev, NULL, 0, p->cp);
-
- gen7_emit_3DSTATE_TE(p->dev, p->cp);
-
- gen7_emit_3DSTATE_CONSTANT_DS(p->dev, NULL, NULL, 0, p->cp);
- gen7_emit_3DSTATE_DS(p->dev, NULL, 0, p->cp);
-
- gen7_emit_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp);
- gen7_emit_3DSTATE_GS(p->dev, NULL, 0, p->cp);
-
- gen7_emit_3DSTATE_STREAMOUT(p->dev, 0x0, 0, false, p->cp);
-
- gen6_emit_3DSTATE_CLIP(p->dev, NULL, NULL, false, 0, p->cp);
-
- gen7_wa_pipe_control_cs_stall(p, true, true);
-
- gen7_emit_3DSTATE_SF(p->dev, NULL, blitter->fb.dst.base.format, p->cp);
- gen7_emit_3DSTATE_SBE(p->dev, NULL, NULL, p->cp);
-}
-
-static void
-gen7_rectlist_wm(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- uint32_t hiz_op;
-
- switch (blitter->op) {
- case ILO_BLITTER_RECTLIST_CLEAR_ZS:
- hiz_op = GEN7_WM_DW1_DEPTH_CLEAR;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_Z:
- hiz_op = GEN7_WM_DW1_DEPTH_RESOLVE;
- break;
- case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
- hiz_op = GEN7_WM_DW1_HIZ_RESOLVE;
- break;
- default:
- hiz_op = 0;
- break;
- }
-
- gen7_emit_3DSTATE_WM(p->dev, NULL, NULL, false, hiz_op, p->cp);
-
- gen7_emit_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp);
-
- gen7_wa_pipe_control_ps_max_threads_stall(p);
- gen7_emit_3DSTATE_PS(p->dev, NULL, 0, false, p->cp);
-}
-
-static void
-gen7_rectlist_wm_depth(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
+static int
+gen7_pipeline_estimate_commands(const struct ilo_3d_pipeline *p,
+ const struct ilo_gpe_gen7 *gen7,
+ const struct ilo_context *ilo)
{
- gen7_wa_pipe_control_wm_depth_stall(p, true);
-
- if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
- ILO_BLITTER_USE_FB_STENCIL)) {
- gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev,
- &blitter->fb.dst.u.zs, p->cp);
- }
-
- if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
- gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev,
- &blitter->fb.dst.u.zs, p->cp);
- }
+ static int size;
+ enum ilo_gpe_gen7_command cmd;
+
+ if (size)
+ return size;
+
+ for (cmd = 0; cmd < ILO_GPE_GEN7_COMMAND_COUNT; cmd++) {
+ int count;
+
+ switch (cmd) {
+ case ILO_GPE_GEN7_PIPE_CONTROL:
+ /* for the workaround */
+ count = 2;
+ /* another one after 3DSTATE_URB */
+ count += 1;
+ /* and another one after 3DSTATE_CONSTANT_VS */
+ count += 1;
+ break;
+ case ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS:
+ count = 33;
+ break;
+ case ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS:
+ count = 34;
+ break;
+ case ILO_GPE_GEN7_MEDIA_VFE_STATE:
+ case ILO_GPE_GEN7_MEDIA_CURBE_LOAD:
+ case ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD:
+ case ILO_GPE_GEN7_MEDIA_STATE_FLUSH:
+ case ILO_GPE_GEN7_GPGPU_WALKER:
+ /* media commands */
+ count = 0;
+ break;
+ default:
+ count = 1;
+ break;
+ }
- if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) {
- gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev,
- &blitter->fb.dst.u.zs, p->cp);
+ if (count) {
+ size += gen7->estimate_command_size(p->dev,
+ cmd, count);
+ }
}
- gen7_emit_3DSTATE_CLEAR_PARAMS(p->dev,
- blitter->depth_clear_value, p->cp);
-}
-
-static void
-gen7_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- const uint32_t *packed_sample_pos =
- (blitter->fb.num_samples > 4) ? p->packed_sample_position_8x :
- (blitter->fb.num_samples > 1) ? &p->packed_sample_position_4x :
- &p->packed_sample_position_1x;
-
- gen7_wa_pipe_control_cs_stall(p, true, true);
-
- gen6_emit_3DSTATE_MULTISAMPLE(p->dev, blitter->fb.num_samples,
- packed_sample_pos, true, p->cp);
-
- gen7_emit_3DSTATE_SAMPLE_MASK(p->dev,
- (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples, p->cp);
+ return size;
}
-static void
-gen7_rectlist_commands(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- gen7_rectlist_wm_multisample(p, blitter, session);
-
- gen6_emit_STATE_BASE_ADDRESS(p->dev,
- NULL, /* General State Base */
- p->cp->bo, /* Surface State Base */
- p->cp->bo, /* Dynamic State Base */
- NULL, /* Indirect Object Base */
- NULL, /* Instruction Base */
- 0, 0, 0, 0, p->cp);
-
- gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev,
- &blitter->ve, &blitter->vb, p->cp);
-
- gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev,
- &blitter->ve, false, false, p->cp);
-
- gen7_rectlist_pcb_alloc(p, blitter, session);
-
- /* needed for any VS-related commands */
- gen7_wa_pipe_control_vs_depth_stall(p);
-
- gen7_rectlist_urb(p, blitter, session);
-
- if (blitter->uses & ILO_BLITTER_USE_DSA) {
- gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(p->dev,
- session->DEPTH_STENCIL_STATE, p->cp);
- }
-
- if (blitter->uses & ILO_BLITTER_USE_CC) {
- gen7_emit_3DSTATE_CC_STATE_POINTERS(p->dev,
- session->COLOR_CALC_STATE, p->cp);
- }
-
- gen7_rectlist_vs_to_sf(p, blitter, session);
- gen7_rectlist_wm(p, blitter, session);
+static int
+gen7_pipeline_estimate_states(const struct ilo_3d_pipeline *p,
+ const struct ilo_gpe_gen7 *gen7,
+ const struct ilo_context *ilo)
+{
+ static int static_size;
+ int shader_type, count, size;
+
+ if (!static_size) {
+ struct {
+ enum ilo_gpe_gen7_state state;
+ int count;
+ } static_states[] = {
+ /* viewports */
+ { ILO_GPE_GEN7_SF_CLIP_VIEWPORT, 1 },
+ { ILO_GPE_GEN7_CC_VIEWPORT, 1 },
+ /* cc */
+ { ILO_GPE_GEN7_COLOR_CALC_STATE, 1 },
+ { ILO_GPE_GEN7_BLEND_STATE, ILO_MAX_DRAW_BUFFERS },
+ { ILO_GPE_GEN7_DEPTH_STENCIL_STATE, 1 },
+ /* scissors */
+ { ILO_GPE_GEN7_SCISSOR_RECT, 1 },
+ /* binding table (vs, gs, fs) */
+ { ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES },
+ { ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES },
+ { ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES },
+ };
+ int i;
- if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
- gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(p->dev,
- session->CC_VIEWPORT, p->cp);
+ for (i = 0; i < Elements(static_states); i++) {
+ static_size += gen7->estimate_state_size(p->dev,
+ static_states[i].state,
+ static_states[i].count);
+ }
}
- gen7_rectlist_wm_depth(p, blitter, session);
-
- gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0,
- blitter->fb.width, blitter->fb.height, p->cp);
-
- gen7_emit_3DPRIMITIVE(p->dev, &blitter->draw, NULL, true, p->cp);
-}
+ size = static_size;
-static void
-gen7_rectlist_states(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter,
- struct gen6_rectlist_session *session)
-{
- if (blitter->uses & ILO_BLITTER_USE_DSA) {
- session->DEPTH_STENCIL_STATE =
- gen6_emit_DEPTH_STENCIL_STATE(p->dev, &blitter->dsa, p->cp);
+ /*
+ * render targets (fs)
+ * sampler views (vs, fs)
+ * constant buffers (vs, fs)
+ */
+ count = ilo->fb.state.nr_cbufs;
+ for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
+ count += ilo->view[shader_type].count;
+ count += util_bitcount(ilo->cbuf[shader_type].enabled_mask);
}
- if (blitter->uses & ILO_BLITTER_USE_CC) {
- session->COLOR_CALC_STATE =
- gen6_emit_COLOR_CALC_STATE(p->dev, &blitter->cc.stencil_ref,
- blitter->cc.alpha_ref, &blitter->cc.blend_color, p->cp);
+ if (count) {
+ size += gen7->estimate_state_size(p->dev,
+ ILO_GPE_GEN7_SURFACE_STATE, count);
}
- if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
- session->CC_VIEWPORT =
- gen6_emit_CC_VIEWPORT(p->dev, &blitter->viewport, 1, p->cp);
+ /* samplers (vs, fs) */
+ for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
+ count = ilo->sampler[shader_type].count;
+ if (count) {
+ size += gen7->estimate_state_size(p->dev,
+ ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE, count);
+ size += gen7->estimate_state_size(p->dev,
+ ILO_GPE_GEN7_SAMPLER_STATE, count);
+ }
}
-}
-static void
-ilo_3d_pipeline_emit_rectlist_gen7(struct ilo_3d_pipeline *p,
- const struct ilo_blitter *blitter)
-{
- struct gen6_rectlist_session session;
-
- memset(&session, 0, sizeof(session));
- gen7_rectlist_states(p, blitter, &session);
- gen7_rectlist_commands(p, blitter, &session);
-}
-
-static int
-gen7_pipeline_max_command_size(const struct ilo_3d_pipeline *p)
-{
- static int size;
+ /* pcb (vs) */
+ if (ilo->vs &&
+ ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)) {
+ const int pcb_size =
+ ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE);
- if (!size) {
- size += GEN7_3DSTATE_URB_ANY__SIZE * 4;
- size += GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_ANY__SIZE * 5;
- size += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 5;
- size += GEN7_3DSTATE_POINTERS_ANY__SIZE * (5 + 5 + 4);
- size += GEN7_3DSTATE_SO_BUFFER__SIZE * 4;
- size += GEN6_PIPE_CONTROL__SIZE * 5;
-
- size +=
- GEN6_STATE_BASE_ADDRESS__SIZE +
- GEN6_STATE_SIP__SIZE +
- GEN6_3DSTATE_VF_STATISTICS__SIZE +
- GEN6_PIPELINE_SELECT__SIZE +
- GEN6_3DSTATE_CLEAR_PARAMS__SIZE +
- GEN6_3DSTATE_DEPTH_BUFFER__SIZE +
- GEN6_3DSTATE_STENCIL_BUFFER__SIZE +
- GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE +
- GEN6_3DSTATE_VERTEX_BUFFERS__SIZE +
- GEN6_3DSTATE_VERTEX_ELEMENTS__SIZE +
- GEN6_3DSTATE_INDEX_BUFFER__SIZE +
- GEN75_3DSTATE_VF__SIZE +
- GEN6_3DSTATE_VS__SIZE +
- GEN6_3DSTATE_GS__SIZE +
- GEN6_3DSTATE_CLIP__SIZE +
- GEN6_3DSTATE_SF__SIZE +
- GEN6_3DSTATE_WM__SIZE +
- GEN6_3DSTATE_SAMPLE_MASK__SIZE +
- GEN7_3DSTATE_HS__SIZE +
- GEN7_3DSTATE_TE__SIZE +
- GEN7_3DSTATE_DS__SIZE +
- GEN7_3DSTATE_STREAMOUT__SIZE +
- GEN7_3DSTATE_SBE__SIZE +
- GEN7_3DSTATE_PS__SIZE +
- GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE +
- GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE +
- GEN6_3DSTATE_POLY_STIPPLE_PATTERN__SIZE +
- GEN6_3DSTATE_LINE_STIPPLE__SIZE +
- GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE +
- GEN6_3DSTATE_MULTISAMPLE__SIZE +
- GEN7_3DSTATE_SO_DECL_LIST__SIZE +
- GEN6_3DPRIMITIVE__SIZE;
+ size += gen7->estimate_state_size(p->dev,
+ ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER, pcb_size);
}
return size;
@@ -936,6 +749,7 @@ ilo_3d_pipeline_estimate_size_gen7(struct ilo_3d_pipeline *p,
enum ilo_3d_pipeline_action action,
const void *arg)
{
+ const struct ilo_gpe_gen7 *gen7 = ilo_gpe_gen7_get();
int size;
switch (action) {
@@ -943,27 +757,15 @@ ilo_3d_pipeline_estimate_size_gen7(struct ilo_3d_pipeline *p,
{
const struct ilo_context *ilo = arg;
- size = gen7_pipeline_max_command_size(p) +
- gen6_pipeline_estimate_state_size(p, ilo);
+ size = gen7_pipeline_estimate_commands(p, gen7, ilo) +
+ gen7_pipeline_estimate_states(p, gen7, ilo);
}
break;
case ILO_3D_PIPELINE_FLUSH:
case ILO_3D_PIPELINE_WRITE_TIMESTAMP:
case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT:
- size = GEN6_PIPE_CONTROL__SIZE;
- break;
- case ILO_3D_PIPELINE_WRITE_STATISTICS:
- {
- const int num_regs = 10;
- const int num_pads = 1;
-
- size = GEN6_PIPE_CONTROL__SIZE;
- size += GEN6_MI_STORE_REGISTER_MEM__SIZE * 2 * num_regs;
- size += GEN6_MI_STORE_DATA_IMM__SIZE * num_pads;
- }
- break;
- case ILO_3D_PIPELINE_RECTLIST:
- size = 64 + 256; /* states + commands */
+ size = gen7->estimate_command_size(p->dev,
+ ILO_GPE_GEN7_PIPE_CONTROL, 1);
break;
default:
assert(!"unknown 3D pipeline action");
@@ -977,11 +779,94 @@ ilo_3d_pipeline_estimate_size_gen7(struct ilo_3d_pipeline *p,
void
ilo_3d_pipeline_init_gen7(struct ilo_3d_pipeline *p)
{
+ const struct ilo_gpe_gen7 *gen7 = ilo_gpe_gen7_get();
+
p->estimate_size = ilo_3d_pipeline_estimate_size_gen7;
p->emit_draw = ilo_3d_pipeline_emit_draw_gen7;
p->emit_flush = ilo_3d_pipeline_emit_flush_gen6;
p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6;
p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6;
- p->emit_write_statistics = ilo_3d_pipeline_emit_write_statistics_gen6;
- p->emit_rectlist = ilo_3d_pipeline_emit_rectlist_gen7;
+
+#define GEN6_USE(p, name, from) \
+ p->gen6_ ## name = from->emit_ ## name
+ GEN6_USE(p, STATE_BASE_ADDRESS, gen7);
+ GEN6_USE(p, STATE_SIP, gen7);
+ GEN6_USE(p, PIPELINE_SELECT, gen7);
+ GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen7);
+ GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen7);
+ GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen7);
+ GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen7);
+ GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen7);
+ GEN6_USE(p, 3DSTATE_VS, gen7);
+ GEN6_USE(p, 3DSTATE_CLIP, gen7);
+ GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen7);
+ GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen7);
+ GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen7);
+ GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen7);
+ GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen7);
+ GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen7);
+ GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen7);
+ GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen7);
+ GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen7);
+ GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen7);
+ GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen7);
+ GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen7);
+ GEN6_USE(p, PIPE_CONTROL, gen7);
+ GEN6_USE(p, 3DPRIMITIVE, gen7);
+ GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen7);
+ GEN6_USE(p, CC_VIEWPORT, gen7);
+ GEN6_USE(p, COLOR_CALC_STATE, gen7);
+ GEN6_USE(p, BLEND_STATE, gen7);
+ GEN6_USE(p, DEPTH_STENCIL_STATE, gen7);
+ GEN6_USE(p, SCISSOR_RECT, gen7);
+ GEN6_USE(p, BINDING_TABLE_STATE, gen7);
+ GEN6_USE(p, SURFACE_STATE, gen7);
+ GEN6_USE(p, SAMPLER_STATE, gen7);
+ GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen7);
+ GEN6_USE(p, push_constant_buffer, gen7);
+#undef GEN6_USE
+
+#define GEN7_USE(p, name, from) \
+ p->gen7_ ## name = from->emit_ ## name
+ GEN7_USE(p, 3DSTATE_DEPTH_BUFFER, gen7);
+ GEN7_USE(p, 3DSTATE_CC_STATE_POINTERS, gen7);
+ GEN7_USE(p, 3DSTATE_GS, gen7);
+ GEN7_USE(p, 3DSTATE_SF, gen7);
+ GEN7_USE(p, 3DSTATE_WM, gen7);
+ GEN7_USE(p, 3DSTATE_SAMPLE_MASK, gen7);
+ GEN7_USE(p, 3DSTATE_CONSTANT_HS, gen7);
+ GEN7_USE(p, 3DSTATE_CONSTANT_DS, gen7);
+ GEN7_USE(p, 3DSTATE_HS, gen7);
+ GEN7_USE(p, 3DSTATE_TE, gen7);
+ GEN7_USE(p, 3DSTATE_DS, gen7);
+ GEN7_USE(p, 3DSTATE_STREAMOUT, gen7);
+ GEN7_USE(p, 3DSTATE_SBE, gen7);
+ GEN7_USE(p, 3DSTATE_PS, gen7);
+ GEN7_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, gen7);
+ GEN7_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS_CC, gen7);
+ GEN7_USE(p, 3DSTATE_BLEND_STATE_POINTERS, gen7);
+ GEN7_USE(p, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS, gen7);
+ GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_VS, gen7);
+ GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_HS, gen7);
+ GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_DS, gen7);
+ GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_GS, gen7);
+ GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_PS, gen7);
+ GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_VS, gen7);
+ GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_HS, gen7);
+ GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_DS, gen7);
+ GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_GS, gen7);
+ GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_PS, gen7);
+ GEN7_USE(p, 3DSTATE_URB_VS, gen7);
+ GEN7_USE(p, 3DSTATE_URB_HS, gen7);
+ GEN7_USE(p, 3DSTATE_URB_DS, gen7);
+ GEN7_USE(p, 3DSTATE_URB_GS, gen7);
+ GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_VS, gen7);
+ GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_HS, gen7);
+ GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_DS, gen7);
+ GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_GS, gen7);
+ GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_PS, gen7);
+ GEN7_USE(p, 3DSTATE_SO_DECL_LIST, gen7);
+ GEN7_USE(p, 3DSTATE_SO_BUFFER, gen7);
+ GEN7_USE(p, SF_CLIP_VIEWPORT, gen7);
+#undef GEN7_USE
}
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h
index 684626d88..73a94304b 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h
@@ -155,8 +155,7 @@ struct ilo_dsa_state {
/* DEPTH_STENCIL_STATE */
uint32_t payload[3];
- uint32_t dw_alpha;
- ubyte alpha_ref;
+ struct pipe_alpha_state alpha;
};
struct ilo_blend_cso {
@@ -256,11 +255,8 @@ struct ilo_surface_cso {
struct ilo_fb_state {
struct pipe_framebuffer_state state;
- struct ilo_view_surface null_rt;
struct ilo_zs_surface null_zs;
-
unsigned num_samples;
- bool offset_to_layers;
};
struct ilo_global_binding {
@@ -383,7 +379,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
- bool is_rt, bool offset_to_layer,
+ bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf);
void
@@ -409,7 +405,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
- bool is_rt, bool offset_to_layer,
+ bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf);
static inline void
@@ -455,27 +451,28 @@ ilo_gpe_init_view_surface_for_texture(const struct ilo_dev_info *dev,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
- bool is_rt, bool offset_to_layer,
+ bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
if (dev->gen >= ILO_GEN(7)) {
ilo_gpe_init_view_surface_for_texture_gen7(dev, tex, format,
first_level, num_levels, first_layer, num_layers,
- is_rt, offset_to_layer, surf);
+ is_rt, render_cache_rw, surf);
}
else {
ilo_gpe_init_view_surface_for_texture_gen6(dev, tex, format,
first_level, num_levels, first_layer, num_layers,
- is_rt, offset_to_layer, surf);
+ is_rt, render_cache_rw, surf);
}
}
void
ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
- enum pipe_format format, unsigned level,
+ enum pipe_format format,
+ unsigned level,
unsigned first_layer, unsigned num_layers,
- bool offset_to_layer, struct ilo_zs_surface *zs);
+ struct ilo_zs_surface *zs);
void
ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
@@ -528,9 +525,4 @@ ilo_gpe_init_fs_cso(const struct ilo_dev_info *dev,
}
}
-void
-ilo_gpe_set_fb(const struct ilo_dev_info *dev,
- const struct pipe_framebuffer_state *state,
- struct ilo_fb_state *fb);
-
#endif /* ILO_GPE_H */
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c
index 11972b968..442bef189 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c
@@ -25,12 +25,13 @@
* Chia-I Wu <olv@lunarg.com>
*/
-#include "genhw/genhw.h"
#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
#include "util/u_half.h"
+#include "brw_defines.h"
+#include "intel_reg.h"
#include "ilo_context.h"
+#include "ilo_cp.h"
#include "ilo_format.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
@@ -38,31 +39,128 @@
#include "ilo_gpe_gen6.h"
/**
+ * Translate winsys tiling to hardware tiling.
+ */
+int
+ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
+{
+ switch (tiling) {
+ case INTEL_TILING_NONE:
+ return 0;
+ case INTEL_TILING_X:
+ return BRW_SURFACE_TILED;
+ case INTEL_TILING_Y:
+ return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
+ default:
+ assert(!"unknown tiling");
+ return 0;
+ }
+}
+
+/**
+ * Translate a pipe primitive type to the matching hardware primitive type.
+ */
+int
+ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
+{
+ static const int prim_mapping[PIPE_PRIM_MAX] = {
+ [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
+ [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
+ [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
+ [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
+ [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
+ [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
+ [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
+ [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
+ [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
+ };
+
+ assert(prim_mapping[prim]);
+
+ return prim_mapping[prim];
+}
+
+/**
+ * Translate a pipe texture target to the matching hardware surface type.
+ */
+int
+ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_BUFFER:
+ return BRW_SURFACE_BUFFER;
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return BRW_SURFACE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return BRW_SURFACE_2D;
+ case PIPE_TEXTURE_3D:
+ return BRW_SURFACE_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return BRW_SURFACE_CUBE;
+ default:
+ assert(!"unknown texture target");
+ return BRW_SURFACE_BUFFER;
+ }
+}
+
+/**
+ * Translate a depth/stencil pipe format to the matching hardware
+ * format. Return -1 on errors.
+ */
+static int
+gen6_translate_depth_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return BRW_DEPTHFORMAT_D16_UNORM;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return BRW_DEPTHFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
+ default:
+ return -1;
+ }
+}
+
+/**
* Translate a pipe logicop to the matching hardware logicop.
*/
static int
gen6_translate_pipe_logicop(unsigned logicop)
{
switch (logicop) {
- case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR;
- case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR;
- case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED;
- case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
- case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE;
- case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT;
- case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR;
- case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND;
- case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND;
- case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV;
- case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP;
- case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED;
- case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY;
- case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE;
- case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR;
- case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET;
+ case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
+ case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
+ case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
+ case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
+ case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
+ case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
+ case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
+ case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
+ case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
+ case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
+ case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
+ case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
+ case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
+ case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
+ case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
default:
assert(!"unknown logicop function");
- return GEN6_LOGICOP_CLEAR;
+ return BRW_LOGICOPFUNCTION_CLEAR;
}
}
@@ -73,14 +171,14 @@ static int
gen6_translate_pipe_blend(unsigned blend)
{
switch (blend) {
- case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD;
- case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT;
- case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
- case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN;
- case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX;
+ case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
+ case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
+ case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
+ case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
default:
assert(!"unknown blend function");
- return GEN6_BLENDFUNCTION_ADD;
+ return BRW_BLENDFUNCTION_ADD;
};
}
@@ -91,28 +189,28 @@ static int
gen6_translate_pipe_blendfactor(unsigned blendfactor)
{
switch (blendfactor) {
- case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE;
- case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA;
- case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA;
- case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
- case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR;
- case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA;
- case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR;
- case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA;
- case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
+ case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
+ case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
default:
assert(!"unknown blend factor");
- return GEN6_BLENDFACTOR_ONE;
+ return BRW_BLENDFACTOR_ONE;
};
}
@@ -123,17 +221,17 @@ static int
gen6_translate_pipe_stencil_op(unsigned stencil_op)
{
switch (stencil_op) {
- case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP;
- case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO;
- case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE;
- case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT;
- case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT;
- case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR;
- case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR;
- case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT;
+ case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
default:
assert(!"unknown stencil op");
- return GEN6_STENCILOP_KEEP;
+ return BRW_STENCILOP_KEEP;
}
}
@@ -144,12 +242,12 @@ static int
gen6_translate_tex_mipfilter(unsigned filter)
{
switch (filter) {
- case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
- case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR;
- case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE;
+ case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
+ case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
default:
assert(!"unknown mipfilter");
- return GEN6_MIPFILTER_NONE;
+ return BRW_MIPFILTER_NONE;
}
}
@@ -160,11 +258,11 @@ static int
gen6_translate_tex_filter(unsigned filter)
{
switch (filter) {
- case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
- case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR;
+ case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
default:
assert(!"unknown sampler filter");
- return GEN6_MAPFILTER_NEAREST;
+ return BRW_MAPFILTER_NEAREST;
}
}
@@ -182,17 +280,39 @@ gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
}
switch (wrap) {
- case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER;
- case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR;
+ case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(!"unknown sampler wrap mode");
- return GEN6_TEXCOORDMODE_WRAP;
+ return BRW_TEXCOORDMODE_WRAP;
+ }
+}
+
+/**
+ * Translate a pipe DSA test function to the matching hardware compare
+ * function.
+ */
+static int
+gen6_translate_dsa_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
+ default:
+ assert(!"unknown depth/stencil/alpha test function");
+ return BRW_COMPAREFUNCTION_NEVER;
}
}
@@ -207,84 +327,587 @@ gen6_translate_shadow_func(unsigned func)
* For PIPE_FUNC_x, the reference value is on the left-hand side of the
* comparison, and 1.0 is returned when the comparison is true.
*
- * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
+ * For BRW_PREFILTER_x, the reference value is on the right-hand side of
* the comparison, and 0.0 is returned when the comparison is true.
*/
switch (func) {
- case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS;
- case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL;
- case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
- case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS;
- case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL;
- case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
- case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER;
- case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
+ case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
+ case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
+ case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
+ case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
+ case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
+ case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
default:
assert(!"unknown shadow compare function");
- return GEN6_COMPAREFUNCTION_NEVER;
+ return BRW_PREFILTER_NEVER;
}
}
/**
- * Translate a pipe DSA test function to the matching hardware compare
- * function.
+ * Translate an index size to the matching hardware index format.
*/
static int
-gen6_translate_dsa_func(unsigned func)
+gen6_translate_index_size(int size)
{
- switch (func) {
- case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER;
- case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS;
- case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
- case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL;
- case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER;
- case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
- case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
- case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS;
+ switch (size) {
+ case 4: return BRW_INDEX_DWORD;
+ case 2: return BRW_INDEX_WORD;
+ case 1: return BRW_INDEX_BYTE;
default:
- assert(!"unknown depth/stencil/alpha test function");
- return GEN6_COMPAREFUNCTION_NEVER;
+ assert(!"unknown index size");
+ return BRW_INDEX_BYTE;
}
}
static void
+gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
+ struct intel_bo *general_state_bo,
+ struct intel_bo *surface_state_bo,
+ struct intel_bo *dynamic_state_bo,
+ struct intel_bo *indirect_object_bo,
+ struct intel_bo *instruction_bo,
+ uint32_t general_state_size,
+ uint32_t dynamic_state_size,
+ uint32_t indirect_object_size,
+ uint32_t instruction_size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
+ const uint8_t cmd_len = 10;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /* 4K-page aligned */
+ assert(((general_state_size | dynamic_state_size |
+ indirect_object_size | instruction_size) & 0xfff) == 0);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+ ilo_cp_write_bo(cp, 1, general_state_bo,
+ INTEL_DOMAIN_RENDER,
+ 0);
+ ilo_cp_write_bo(cp, 1, surface_state_bo,
+ INTEL_DOMAIN_SAMPLER,
+ 0);
+ ilo_cp_write_bo(cp, 1, dynamic_state_bo,
+ INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
+ 0);
+ ilo_cp_write_bo(cp, 1, indirect_object_bo,
+ 0,
+ 0);
+ ilo_cp_write_bo(cp, 1, instruction_bo,
+ INTEL_DOMAIN_INSTRUCTION,
+ 0);
+
+ if (general_state_size) {
+ ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
+ INTEL_DOMAIN_RENDER,
+ 0);
+ }
+ else {
+ /* skip range check */
+ ilo_cp_write(cp, 1);
+ }
+
+ if (dynamic_state_size) {
+ ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
+ INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
+ 0);
+ }
+ else {
+ /* skip range check */
+ ilo_cp_write(cp, 0xfffff000 + 1);
+ }
+
+ if (indirect_object_size) {
+ ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
+ 0,
+ 0);
+ }
+ else {
+ /* skip range check */
+ ilo_cp_write(cp, 0xfffff000 + 1);
+ }
+
+ if (instruction_size) {
+ ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
+ INTEL_DOMAIN_INSTRUCTION,
+ 0);
+ }
+ else {
+ /* skip range check */
+ ilo_cp_write(cp, 1);
+ }
+
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
+ uint32_t sip,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
+ ilo_cp_write(cp, cmd);
+ ilo_cp_write(cp, sip);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
+ bool enable,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
+ const uint8_t cmd_len = 1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | enable);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
+ int pipeline,
+ struct ilo_cp *cp)
+{
+ const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
+ const uint8_t cmd_len = 1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /* 3D or media */
+ assert(pipeline == 0x0 || pipeline == 0x1);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | pipeline);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
+ int max_threads, int num_urb_entries,
+ int urb_entry_size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
+ const uint8_t cmd_len = 8;
+ uint32_t dw2, dw4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ dw2 = (max_threads - 1) << 16 |
+ num_urb_entries << 8 |
+ 1 << 7 | /* Reset Gateway Timer */
+ 1 << 6; /* Bypass Gateway Control */
+
+ dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
+ 480; /* CURBE Allocation Size */
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* MBZ */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, 0); /* scoreboard */
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
+ uint32_t buf, int size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ assert(buf % 32 == 0);
+ /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
+ size = align(size, 32);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0); /* MBZ */
+ ilo_cp_write(cp, size);
+ ilo_cp_write(cp, buf);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
+ uint32_t offset, int num_ids,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ assert(offset % 32 == 0);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0); /* MBZ */
+ /* every ID has 8 DWords */
+ ilo_cp_write(cp, num_ids * 8 * 4);
+ ilo_cp_write(cp, offset);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
+ int id, int byte, int thread_count,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
+ const uint8_t cmd_len = 2;
+ uint32_t dw1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ dw1 = id << 16 |
+ byte << 8 |
+ thread_count;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
+ int thread_count_water_mark,
+ int barrier_mask,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
+ const uint8_t cmd_len = 2;
+ uint32_t dw1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ dw1 = thread_count_water_mark << 16 |
+ barrier_mask;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp)
+{
+ assert(!"MEDIA_OBJECT_WALKER unsupported");
+}
+
+static void
+gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t vs_binding_table,
+ uint32_t gs_binding_table,
+ uint32_t ps_binding_table,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ GEN6_BINDING_TABLE_MODIFY_VS |
+ GEN6_BINDING_TABLE_MODIFY_GS |
+ GEN6_BINDING_TABLE_MODIFY_PS);
+ ilo_cp_write(cp, vs_binding_table);
+ ilo_cp_write(cp, gs_binding_table);
+ ilo_cp_write(cp, ps_binding_table);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t vs_sampler_state,
+ uint32_t gs_sampler_state,
+ uint32_t ps_sampler_state,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ VS_SAMPLER_STATE_CHANGE |
+ GS_SAMPLER_STATE_CHANGE |
+ PS_SAMPLER_STATE_CHANGE);
+ ilo_cp_write(cp, vs_sampler_state);
+ ilo_cp_write(cp, gs_sampler_state);
+ ilo_cp_write(cp, ps_sampler_state);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
+ int vs_total_size, int gs_total_size,
+ int vs_entry_size, int gs_entry_size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
+ const uint8_t cmd_len = 3;
+ const int row_size = 128; /* 1024 bits */
+ int vs_alloc_size, gs_alloc_size;
+ int vs_num_entries, gs_num_entries;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ /* in 1024-bit URB rows */
+ vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
+ gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
+
+ /* the valid range is [1, 5] */
+ if (!vs_alloc_size)
+ vs_alloc_size = 1;
+ if (!gs_alloc_size)
+ gs_alloc_size = 1;
+ assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
+
+ /* the valid range is [24, 256] in multiples of 4 */
+ vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
+ if (vs_num_entries > 256)
+ vs_num_entries = 256;
+ assert(vs_num_entries >= 24);
+
+ /* the valid range is [0, 256] in multiples of 4 */
+ gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
+ if (gs_num_entries > 256)
+ gs_num_entries = 256;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
+ vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
+ ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
+ (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
+ const struct pipe_vertex_buffer *vbuffers,
+ uint64_t vbuffer_mask,
+ const struct ilo_ve_state *ve,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
+ uint8_t cmd_len;
+ unsigned hw_idx;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 82:
+ *
+ * "From 1 to 33 VBs can be specified..."
+ */
+ assert(vbuffer_mask <= (1UL << 33));
+
+ if (!vbuffer_mask)
+ return;
+
+ cmd_len = 1;
+
+ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+ const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+
+ if (vbuffer_mask & (1 << pipe_idx))
+ cmd_len += 4;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+ const unsigned instance_divisor = ve->instance_divisors[hw_idx];
+ const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+ const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
+ uint32_t dw;
+
+ if (!(vbuffer_mask & (1 << pipe_idx)))
+ continue;
+
+ dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
+
+ if (instance_divisor)
+ dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
+ else
+ dw |= GEN6_VB0_ACCESS_VERTEXDATA;
+
+ if (dev->gen >= ILO_GEN(7))
+ dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+
+ /* use null vb if there is no buffer or the stride is out of range */
+ if (vb->buffer && vb->stride <= 2048) {
+ const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
+ const uint32_t start_offset = vb->buffer_offset;
+ /*
+ * As noted in ilo_translate_format(), we treat some 3-component
+ * formats as 4-component formats to work around hardware
+ * limitations. Imagine the case where the vertex buffer holds a
+ * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
+ * The hardware would not be able to fetch it because the vertex
+ * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
+ * and that takes at least 8 bytes.
+ *
+ * For the workaround to work, we query the physical size, which is
+ * page aligned, to calculate end_offset so that the last vertex has
+ * a better chance to be fetched.
+ */
+ const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
+
+ dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
+
+ ilo_cp_write(cp, dw);
+ ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+ ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+ ilo_cp_write(cp, instance_divisor);
+ }
+ else {
+ dw |= 1 << 13;
+
+ ilo_cp_write(cp, dw);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, instance_divisor);
+ }
+ }
+
+ ilo_cp_end(cp);
+}
+
+static void
+ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
+ struct ilo_ve_cso *cso)
+{
+ int format;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+ *
+ * "- This bit (Edge Flag Enable) must only be ENABLED on the last
+ * valid VERTEX_ELEMENT structure.
+ *
+ * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
+ * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
+ *
+ * - The Source Element Format must be set to the UINT format.
+ *
+ * - [DevSNB]: Edge Flags are not supported for QUADLIST
+ * primitives. Software may elect to convert QUADLIST primitives
+ * to some set of corresponding edge-flag-supported primitive
+ * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
+ */
+
+ cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
+ cso->payload[1] =
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+ BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
+ BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
+ BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
+
+ /*
+ * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
+ * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
+ * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
+ *
+ * Since all the hardware cares about is whether the flags are zero or not,
+ * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
+ */
+ format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
+ if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
+ STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
+ BRW_SURFACEFORMAT_R32_FLOAT - 1);
+
+ cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
+ }
+ else {
+ assert(format == BRW_SURFACEFORMAT_R8_UINT);
+ }
+}
+
+static void
+ve_init_cso_with_components(const struct ilo_dev_info *dev,
+ int comp0, int comp1, int comp2, int comp3,
+ struct ilo_ve_cso *cso)
+{
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ STATIC_ASSERT(Elements(cso->payload) >= 2);
+ cso->payload[0] = GEN6_VE0_VALID;
+ cso->payload[1] =
+ comp0 << BRW_VE1_COMPONENT_0_SHIFT |
+ comp1 << BRW_VE1_COMPONENT_1_SHIFT |
+ comp2 << BRW_VE1_COMPONENT_2_SHIFT |
+ comp3 << BRW_VE1_COMPONENT_3_SHIFT;
+}
+
+static void
ve_init_cso(const struct ilo_dev_info *dev,
const struct pipe_vertex_element *state,
unsigned vb_index,
struct ilo_ve_cso *cso)
{
int comp[4] = {
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
+ BRW_VE1_COMPONENT_STORE_SRC,
+ BRW_VE1_COMPONENT_STORE_SRC,
+ BRW_VE1_COMPONENT_STORE_SRC,
+ BRW_VE1_COMPONENT_STORE_SRC,
};
int format;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
switch (util_format_get_nr_components(state->src_format)) {
- case 1: comp[1] = GEN6_VFCOMP_STORE_0;
- case 2: comp[2] = GEN6_VFCOMP_STORE_0;
+ case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
+ case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
- GEN6_VFCOMP_STORE_1_INT :
- GEN6_VFCOMP_STORE_1_FP;
+ BRW_VE1_COMPONENT_STORE_1_INT :
+ BRW_VE1_COMPONENT_STORE_1_FLT;
}
format = ilo_translate_vertex_format(state->src_format);
STATIC_ASSERT(Elements(cso->payload) >= 2);
cso->payload[0] =
- vb_index << GEN6_VE_STATE_DW0_VB_INDEX__SHIFT |
- GEN6_VE_STATE_DW0_VALID |
- format << GEN6_VE_STATE_DW0_FORMAT__SHIFT |
- state->src_offset << GEN6_VE_STATE_DW0_VB_OFFSET__SHIFT;
+ vb_index << GEN6_VE0_INDEX_SHIFT |
+ GEN6_VE0_VALID |
+ format << BRW_VE0_FORMAT_SHIFT |
+ state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
cso->payload[1] =
- comp[0] << GEN6_VE_STATE_DW1_COMP0__SHIFT |
- comp[1] << GEN6_VE_STATE_DW1_COMP1__SHIFT |
- comp[2] << GEN6_VE_STATE_DW1_COMP2__SHIFT |
- comp[3] << GEN6_VE_STATE_DW1_COMP3__SHIFT;
+ comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
+ comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
+ comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
+ comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
}
void
@@ -295,7 +918,7 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev,
{
unsigned i;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
ve->count = num_states;
ve->vb_count = 0;
@@ -327,6 +950,179 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev,
}
}
+static void
+gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
+ const struct ilo_ve_state *ve,
+ bool last_velement_edgeflag,
+ bool prepend_generated_ids,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
+ uint8_t cmd_len;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 93:
+ *
+ * "Up to 34 (DevSNB+) vertex elements are supported."
+ */
+ assert(ve->count + prepend_generated_ids <= 34);
+
+ if (!ve->count && !prepend_generated_ids) {
+ struct ilo_ve_cso dummy;
+
+ ve_init_cso_with_components(dev,
+ BRW_VE1_COMPONENT_STORE_0,
+ BRW_VE1_COMPONENT_STORE_0,
+ BRW_VE1_COMPONENT_STORE_0,
+ BRW_VE1_COMPONENT_STORE_1_FLT,
+ &dummy);
+
+ cmd_len = 3;
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write_multi(cp, dummy.payload, 2);
+ ilo_cp_end(cp);
+
+ return;
+ }
+
+ cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+ if (prepend_generated_ids) {
+ struct ilo_ve_cso gen_ids;
+
+ ve_init_cso_with_components(dev,
+ BRW_VE1_COMPONENT_STORE_VID,
+ BRW_VE1_COMPONENT_STORE_IID,
+ BRW_VE1_COMPONENT_NOSTORE,
+ BRW_VE1_COMPONENT_NOSTORE,
+ &gen_ids);
+
+ ilo_cp_write_multi(cp, gen_ids.payload, 2);
+ }
+
+ if (last_velement_edgeflag) {
+ struct ilo_ve_cso edgeflag;
+
+ for (i = 0; i < ve->count - 1; i++)
+ ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
+
+ edgeflag = ve->cso[i];
+ ve_set_cso_edgeflag(dev, &edgeflag);
+ ilo_cp_write_multi(cp, edgeflag.payload, 2);
+ }
+ else {
+ for (i = 0; i < ve->count; i++)
+ ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
+ }
+
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_ib_state *ib,
+ bool enable_cut_index,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
+ const uint8_t cmd_len = 3;
+ struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
+ uint32_t start_offset, end_offset;
+ int format;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (!buf)
+ return;
+
+ format = gen6_translate_index_size(ib->hw_index_size);
+
+ /*
+ * set start_offset to 0 here and adjust pipe_draw_info::start with
+ * ib->draw_start_offset in 3DPRIMITIVE
+ */
+ start_offset = 0;
+ end_offset = buf->bo_size;
+
+ /* end_offset must also be aligned and is inclusive */
+ end_offset -= (end_offset % ib->hw_index_size);
+ end_offset--;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
+ format << 8);
+ ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+ ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t clip_viewport,
+ uint32_t sf_viewport,
+ uint32_t cc_viewport,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ GEN6_CLIP_VIEWPORT_MODIFY |
+ GEN6_SF_VIEWPORT_MODIFY |
+ GEN6_CC_VIEWPORT_MODIFY);
+ ilo_cp_write(cp, clip_viewport);
+ ilo_cp_write(cp, sf_viewport);
+ ilo_cp_write(cp, cc_viewport);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t blend_state,
+ uint32_t depth_stencil_state,
+ uint32_t color_calc_state,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, blend_state | 1);
+ ilo_cp_write(cp, depth_stencil_state | 1);
+ ilo_cp_write(cp, color_calc_state | 1);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t scissor_rect,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, scissor_rect);
+ ilo_cp_end(cp);
+}
+
void
ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs,
@@ -335,7 +1131,7 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
@@ -377,26 +1173,26 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
break;
case ILO_GEN(7.5):
/* see brwCreateContext() */
- max_threads = (dev->gt >= 2) ? 280 : 70;
+ max_threads = (dev->gt == 2) ? 280 : 70;
break;
default:
max_threads = 1;
break;
}
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT;
- dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
- vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
- 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
+ dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
+ vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
+ 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
- dw5 = GEN6_VS_DW5_STATISTICS |
- GEN6_VS_DW5_VS_ENABLE;
+ dw5 = GEN6_VS_STATISTICS_ENABLE |
+ GEN6_VS_ENABLE;
if (dev->gen >= ILO_GEN(7.5))
- dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
+ dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
else
- dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
+ dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
@@ -404,6 +1200,48 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
cso->payload[2] = dw5;
}
+static void
+gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *vs,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
+ const uint8_t cmd_len = 6;
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (!vs) {
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(vs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_end(cp);
+}
+
void
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
@@ -466,16 +1304,16 @@ ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
max_threads = 21;
}
- dw2 = GEN6_THREADDISP_SPF;
+ dw2 = GEN6_GS_SPF_MODE;
- dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
- 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
- start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
+ dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
+ 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
+ start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
- dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
- GEN6_GS_DW5_STATISTICS |
- GEN6_GS_DW5_SO_STATISTICS |
- GEN6_GS_DW5_RENDER_ENABLE;
+ dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_SO_STATISTICS_ENABLE |
+ GEN6_GS_RENDERING_ENABLE;
/*
* we cannot make use of GEN6_GS_REORDER because it will reorder
@@ -483,19 +1321,19 @@ ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
* (2N+2, 2N+1, 2N+3)).
*/
- dw6 = GEN6_GS_DW6_GS_ENABLE;
+ dw6 = GEN6_GS_ENABLE;
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
- dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
+ dw6 |= GEN6_GS_DISCARD_ADJACENCY;
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
const uint32_t svbi_post_inc =
ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
- dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
+ dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
if (svbi_post_inc) {
- dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
- svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
+ dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
+ svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
}
}
@@ -506,6 +1344,75 @@ ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
cso->payload[3] = dw6;
}
+static void
+gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ const struct ilo_shader_state *vs,
+ int verts_per_prim,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
+ const uint8_t cmd_len = 7;
+ uint32_t dw1, dw2, dw4, dw5, dw6;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ if (gs) {
+ const struct ilo_shader_cso *cso;
+
+ dw1 = ilo_shader_get_kernel_offset(gs);
+
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+ dw6 = cso->payload[3];
+ }
+ else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
+ struct ilo_shader_cso cso;
+ enum ilo_kernel_param param;
+
+ switch (verts_per_prim) {
+ case 1:
+ param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+ break;
+ case 2:
+ param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+ break;
+ default:
+ param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+ break;
+ }
+
+ dw1 = ilo_shader_get_kernel_offset(vs) +
+ ilo_shader_get_kernel_param(vs, param);
+
+ /* cannot use VS's CSO */
+ ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
+ dw2 = cso.payload[0];
+ dw4 = cso.payload[1];
+ dw5 = cso.payload[2];
+ dw6 = cso.payload[3];
+ }
+ else {
+ dw1 = 0;
+ dw2 = 0;
+ dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
+ dw5 = GEN6_GS_STATISTICS_ENABLE;
+ dw6 = 0;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_write(cp, dw6);
+ ilo_cp_end(cp);
+}
+
void
ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
@@ -513,9 +1420,9 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
{
uint32_t dw1, dw2, dw3;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
- dw1 = GEN6_CLIP_DW1_STATISTICS;
+ dw1 = GEN6_CLIP_STATISTICS_ENABLE;
if (dev->gen >= ILO_GEN(7)) {
/*
@@ -528,53 +1435,53 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
* What does this mean?
*/
dw1 |= 0 << 19 |
- GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
+ GEN7_CLIP_EARLY_CULL;
if (state->front_ccw)
- dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
+ dw1 |= GEN7_CLIP_WINDING_CCW;
switch (state->cull_face) {
case PIPE_FACE_NONE:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE;
+ dw1 |= GEN7_CLIP_CULLMODE_NONE;
break;
case PIPE_FACE_FRONT:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT;
+ dw1 |= GEN7_CLIP_CULLMODE_FRONT;
break;
case PIPE_FACE_BACK:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK;
+ dw1 |= GEN7_CLIP_CULLMODE_BACK;
break;
case PIPE_FACE_FRONT_AND_BACK:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH;
+ dw1 |= GEN7_CLIP_CULLMODE_BOTH;
break;
}
}
- dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
- GEN6_CLIP_DW2_XY_TEST_ENABLE |
- state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
- GEN6_CLIP_DW2_CLIPMODE_NORMAL;
+ dw2 = GEN6_CLIP_ENABLE |
+ GEN6_CLIP_XY_TEST |
+ state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
+ GEN6_CLIP_MODE_NORMAL;
if (state->clip_halfz)
- dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
+ dw2 |= GEN6_CLIP_API_D3D;
else
- dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
+ dw2 |= GEN6_CLIP_API_OGL;
if (state->depth_clip)
- dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
+ dw2 |= GEN6_CLIP_Z_TEST;
if (state->flatshade_first) {
- dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
- 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
- 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+ dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
+ 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
+ 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
}
else {
- dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
- 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
- 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+ dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
+ 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
+ 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
}
- dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
- 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT;
+ dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+ 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
clip->payload[0] = dw1;
clip->payload[1] = dw2;
@@ -594,6 +1501,53 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
clip->can_enable_guardband = false;
}
+static void
+gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ bool enable_guardband,
+ int num_viewports,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
+ const uint8_t cmd_len = 4;
+ uint32_t dw1, dw2, dw3;
+
+ if (rasterizer) {
+ int interps;
+
+ dw1 = rasterizer->clip.payload[0];
+ dw2 = rasterizer->clip.payload[1];
+ dw3 = rasterizer->clip.payload[2];
+
+ if (enable_guardband && rasterizer->clip.can_enable_guardband)
+ dw2 |= GEN6_CLIP_GB_TEST;
+
+ interps = (fs) ? ilo_shader_get_kernel_param(fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
+
+ if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
+ 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
+ 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
+ dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+
+ dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
+ (num_viewports - 1);
+ }
+ else {
+ dw1 = 0;
+ dw2 = 0;
+ dw3 = 0;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, dw3);
+ ilo_cp_end(cp);
+}
+
void
ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
@@ -603,7 +1557,7 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
int line_width, point_width;
uint32_t dw1, dw2, dw3;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
/*
* Scale the constant term. The minimum representable value used by the HW
@@ -622,8 +1576,8 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
* should be cleared if clipping is disabled or Statistics Enable in
* CLIP_STATE is clear."
*/
- dw1 = GEN7_SF_DW1_STATISTICS |
- GEN7_SF_DW1_VIEWPORT_ENABLE;
+ dw1 = GEN6_SF_STATISTICS_ENABLE |
+ GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
/* XXX GEN6 path seems to work fine for GEN7 */
if (false && dev->gen >= ILO_GEN(7)) {
@@ -638,11 +1592,11 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
*/
if (state->offset_tri || state->offset_line || state->offset_point) {
/* XXX need to scale offset_const according to the depth format */
- dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET;
+ dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
- GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
- GEN7_SF_DW1_DEPTH_OFFSET_POINT;
+ dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
+ GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
+ GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
}
else {
offset_const = 0.0f;
@@ -652,39 +1606,39 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
}
else {
if (state->offset_tri)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
+ dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
if (state->offset_line)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
+ dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
if (state->offset_point)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
+ dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
}
switch (state->fill_front) {
case PIPE_POLYGON_MODE_FILL:
- dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID;
+ dw1 |= GEN6_SF_FRONT_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
- dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME;
+ dw1 |= GEN6_SF_FRONT_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
- dw1 |= GEN7_SF_DW1_FRONTFACE_POINT;
+ dw1 |= GEN6_SF_FRONT_POINT;
break;
}
switch (state->fill_back) {
case PIPE_POLYGON_MODE_FILL:
- dw1 |= GEN7_SF_DW1_BACKFACE_SOLID;
+ dw1 |= GEN6_SF_BACK_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
- dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME;
+ dw1 |= GEN6_SF_BACK_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
- dw1 |= GEN7_SF_DW1_BACKFACE_POINT;
+ dw1 |= GEN6_SF_BACK_POINT;
break;
}
if (state->front_ccw)
- dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW;
+ dw1 |= GEN6_SF_WINDING_CCW;
dw2 = 0;
@@ -702,22 +1656,22 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
*
* TODO We do not check those yet.
*/
- dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE |
- GEN7_SF_DW2_AA_LINE_CAP_1_0;
+ dw2 |= GEN6_SF_LINE_AA_ENABLE |
+ GEN6_SF_LINE_END_CAP_WIDTH_1_0;
}
switch (state->cull_face) {
case PIPE_FACE_NONE:
- dw2 |= GEN7_SF_DW2_CULLMODE_NONE;
+ dw2 |= GEN6_SF_CULL_NONE;
break;
case PIPE_FACE_FRONT:
- dw2 |= GEN7_SF_DW2_CULLMODE_FRONT;
+ dw2 |= GEN6_SF_CULL_FRONT;
break;
case PIPE_FACE_BACK:
- dw2 |= GEN7_SF_DW2_CULLMODE_BACK;
+ dw2 |= GEN6_SF_CULL_BACK;
break;
case PIPE_FACE_FRONT_AND_BACK:
- dw2 |= GEN7_SF_DW2_CULLMODE_BOTH;
+ dw2 |= GEN6_SF_CULL_BOTH;
break;
}
@@ -738,33 +1692,30 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
line_width = 0;
}
- dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-
- if (dev->gen >= ILO_GEN(7.5) && state->line_stipple_enable)
- dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
+ dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
if (state->scissor)
- dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
+ dw2 |= GEN6_SF_SCISSOR_ENABLE;
- dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
- GEN7_SF_DW3_SUBPIXEL_8BITS;
+ dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
+ GEN6_SF_VERTEX_SUBPIXEL_8BITS;
if (state->line_last_pixel)
dw3 |= 1 << 31;
if (state->flatshade_first) {
- dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
+ 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
+ 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
}
else {
- dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
+ 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
+ 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
}
if (!state->point_size_per_vertex)
- dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
+ dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
/* in U8.3 */
point_width = (int) (state->point_size * 8.0f + 0.5f);
@@ -781,7 +1732,7 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
sf->payload[5] = fui(offset_clamp);
if (state->multisample) {
- sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
+ sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 251:
@@ -793,7 +1744,7 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
if (!line_width) {
line_width = 128; /* 1.0f */
- sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+ sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
}
}
else {
@@ -801,6 +1752,172 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
}
}
+/**
+ * Fill in DW2 to DW7 of 3DSTATE_SF.
+ */
+void
+ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ int num_samples,
+ enum pipe_format depth_format,
+ uint32_t *payload, unsigned payload_len)
+{
+ const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
+
+ assert(payload_len == Elements(sf->payload));
+
+ if (sf) {
+ memcpy(payload, sf->payload, sizeof(sf->payload));
+
+ if (num_samples > 1)
+ payload[1] |= sf->dw_msaa;
+
+ if (dev->gen >= ILO_GEN(7)) {
+ int format;
+
+ /* separate stencil */
+ switch (depth_format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ depth_format = PIPE_FORMAT_Z24X8_UNORM;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ depth_format = PIPE_FORMAT_Z32_FLOAT;;
+ break;
+ case PIPE_FORMAT_S8_UINT:
+ depth_format = PIPE_FORMAT_NONE;
+ break;
+ default:
+ break;
+ }
+
+ format = gen6_translate_depth_format(depth_format);
+ /* FLOAT surface is assumed when there is no depth buffer */
+ if (format < 0)
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+
+ payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
+ }
+ }
+ else {
+ payload[0] = 0;
+ payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
+ payload[2] = 0;
+ payload[3] = 0;
+ payload[4] = 0;
+ payload[5] = 0;
+ }
+}
+
+/**
+ * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
+ */
+void
+ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
+ uint32_t *dw, int num_dwords)
+{
+ int output_count, vue_offset, vue_len;
+ const struct ilo_kernel_routing *routing;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+ assert(num_dwords == 13);
+
+ if (!fs) {
+ memset(dw, 0, sizeof(dw[0]) * num_dwords);
+
+ if (dev->gen >= ILO_GEN(7))
+ dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
+ else
+ dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
+
+ return;
+ }
+
+ output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+ assert(output_count <= 32);
+
+ routing = ilo_shader_get_kernel_routing(fs);
+
+ vue_offset = routing->source_skip;
+ assert(vue_offset % 2 == 0);
+ vue_offset /= 2;
+
+ vue_len = (routing->source_len + 1) / 2;
+ if (!vue_len)
+ vue_len = 1;
+
+ if (dev->gen >= ILO_GEN(7)) {
+ dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
+ vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+ vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+ if (routing->swizzle_enable)
+ dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
+ }
+ else {
+ dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
+ vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+ vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+ if (routing->swizzle_enable)
+ dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
+ }
+
+ switch (rasterizer->state.sprite_coord_mode) {
+ case PIPE_SPRITE_COORD_UPPER_LEFT:
+ dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
+ break;
+ case PIPE_SPRITE_COORD_LOWER_LEFT:
+ dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
+ break;
+ }
+
+ STATIC_ASSERT(Elements(routing->swizzles) >= 16);
+ memcpy(&dw[1], routing->swizzles, 2 * 16);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 268:
+ *
+ * "This field (Point Sprite Texture Coordinate Enable) must be
+ * programmed to 0 when non-point primitives are rendered."
+ *
+ * TODO We do not check that yet.
+ */
+ dw[9] = routing->point_sprite_enable;
+
+ dw[10] = routing->const_interp_enable;
+
+ /* WrapShortest enables */
+ dw[11] = 0;
+ dw[12] = 0;
+}
+
+static void
+gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
+ const uint8_t cmd_len = 20;
+ uint32_t payload_raster[6], payload_sbe[13];
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
+ 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
+ ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
+ fs, last_sh, payload_sbe, Elements(payload_sbe));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, payload_sbe[0]);
+ ilo_cp_write_multi(cp, payload_raster, 6);
+ ilo_cp_write_multi(cp, &payload_sbe[1], 12);
+ ilo_cp_end(cp);
+}
+
void
ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
@@ -812,23 +1929,23 @@ ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
/* only the FF unit states are set, as in GEN7 */
- dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
+ dw5 = GEN6_WM_LINE_AA_WIDTH_2_0;
/* same value as in 3DSTATE_SF */
if (state->line_smooth)
- dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0;
+ dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
if (state->poly_stipple_enable)
- dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
+ dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
if (state->line_stipple_enable)
- dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
+ dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
- dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL |
- GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL |
- GEN6_WM_DW6_MSDISPMODE_PERSAMPLE;
+ dw6 = GEN6_WM_POSITION_ZW_PIXEL |
+ GEN6_WM_MSRAST_OFF_PIXEL |
+ GEN6_WM_MSDISPMODE_PERSAMPLE;
if (state->bottom_edge_rule)
- dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
+ dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
/*
* assertion that makes sure
@@ -837,12 +1954,12 @@ ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
*
* is valid
*/
- STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 &&
- GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0);
+ STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 &&
+ GEN6_WM_MSDISPMODE_PERSAMPLE == 0);
wm->dw_msaa_rast =
- (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0;
- wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
+ (state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0;
+ wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL;
STATIC_ASSERT(Elements(wm->payload) >= 2);
wm->payload[0] = dw5;
@@ -867,13 +1984,13 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
/* see brwCreateContext() */
max_threads = (dev->gt == 2) ? 80 : 40;
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
- dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
- 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
- 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
+ dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
+ 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
+ 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
- dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+ dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
@@ -901,7 +2018,7 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
* ENABLE this bit due to ClipDistance clipping."
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- dw5 |= GEN6_WM_DW5_PS_KILL;
+ dw5 |= GEN6_WM_KILL_ENABLE;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
@@ -912,13 +2029,13 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
* TODO This is not checked yet.
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
+ dw5 |= GEN6_WM_COMPUTED_DEPTH;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
+ dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- dw5 |= GEN6_WM_DW5_PS_USE_W;
+ dw5 |= GEN6_WM_USES_SOURCE_W;
/*
* TODO set this bit only when
@@ -928,14 +2045,14 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
* c) fs or cc kills
*/
if (true)
- dw5 |= GEN6_WM_DW5_PS_ENABLE;
+ dw5 |= GEN6_WM_DISPATCH_ENABLE;
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw5 |= GEN6_WM_DW5_8_PIXEL_DISPATCH;
+ dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
- dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
- GEN6_WM_DW6_POSOFFSET_NONE |
- interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT;
+ dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
+ GEN6_WM_POSOFFSET_NONE |
+ interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
@@ -944,6 +2061,291 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
cso->payload[3] = dw6;
}
+static void
+gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ int num_samplers,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool dual_blend, bool cc_may_kill,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
+ const uint8_t cmd_len = 9;
+ const int num_samples = 1;
+ const struct ilo_shader_cso *fs_cso;
+ uint32_t dw2, dw4, dw5, dw6;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ if (!fs) {
+ /* see brwCreateContext() */
+ const int max_threads = (dev->gt == 2) ? 80 : 40;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ /* honor the valid range even if dispatching is disabled */
+ ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+
+ return;
+ }
+
+ fs_cso = ilo_shader_get_kernel_cso(fs);
+ dw2 = fs_cso->payload[0];
+ dw4 = fs_cso->payload[1];
+ dw5 = fs_cso->payload[2];
+ dw6 = fs_cso->payload[3];
+
+ dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
+
+ if (true) {
+ dw4 |= GEN6_WM_STATISTICS_ENABLE;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "This bit (Statistics Enable) must be disabled if either of these
+ * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
+ * Resolve Enable or Depth Buffer Resolve Enable."
+ */
+ dw4 |= GEN6_WM_DEPTH_CLEAR;
+ dw4 |= GEN6_WM_DEPTH_RESOLVE;
+ dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+ }
+
+ if (cc_may_kill) {
+ dw5 |= GEN6_WM_KILL_ENABLE |
+ GEN6_WM_DISPATCH_ENABLE;
+ }
+
+ if (dual_blend)
+ dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
+
+ dw5 |= rasterizer->wm.payload[0];
+
+ dw6 |= rasterizer->wm.payload[1];
+
+ if (num_samples > 1) {
+ dw6 |= rasterizer->wm.dw_msaa_rast |
+ rasterizer->wm.dw_msaa_disp;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_write(cp, dw6);
+ ilo_cp_write(cp, 0); /* kernel 1 */
+ ilo_cp_write(cp, 0); /* kernel 2 */
+ ilo_cp_end(cp);
+}
+
+static unsigned
+gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs, int max_read_length,
+ uint32_t *dw, int num_dwords)
+{
+ unsigned enabled = 0x0;
+ int total_read_length, i;
+
+ assert(num_dwords == 4);
+
+ total_read_length = 0;
+ for (i = 0; i < 4; i++) {
+ if (i < num_bufs && sizes[i]) {
+ /* in 256-bit units minus one */
+ const int read_len = (sizes[i] + 31) / 32 - 1;
+
+ assert(bufs[i] % 32 == 0);
+ assert(read_len < 32);
+
+ enabled |= 1 << i;
+ dw[i] = bufs[i] | read_len;
+
+ total_read_length += read_len + 1;
+ }
+ else {
+ dw[i] = 0;
+ }
+ }
+
+ assert(total_read_length <= max_read_length);
+
+ return enabled;
+}
+
+static void
+gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(num_bufs <= 4);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 138:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 32"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(dev,
+ bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+ ilo_cp_write(cp, buf_dw[0]);
+ ilo_cp_write(cp, buf_dw[1]);
+ ilo_cp_write(cp, buf_dw[2]);
+ ilo_cp_write(cp, buf_dw[3]);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(num_bufs <= 4);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 161:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 64"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(dev,
+ bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+ ilo_cp_write(cp, buf_dw[0]);
+ ilo_cp_write(cp, buf_dw[1]);
+ ilo_cp_write(cp, buf_dw[2]);
+ ilo_cp_write(cp, buf_dw[3]);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(num_bufs <= 4);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 287:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 64"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(dev,
+ bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+ ilo_cp_write(cp, buf_dw[0]);
+ ilo_cp_write(cp, buf_dw[1]);
+ ilo_cp_write(cp, buf_dw[2]);
+ ilo_cp_write(cp, buf_dw[3]);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
+ unsigned sample_mask,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
+ const uint8_t cmd_len = 2;
+ const unsigned valid_mask = 0xf;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ sample_mask &= valid_mask;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, sample_mask);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
+ const uint8_t cmd_len = 4;
+ unsigned xmax = x + width - 1;
+ unsigned ymax = y + height - 1;
+ int rect_limit;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (dev->gen >= ILO_GEN(7)) {
+ rect_limit = 16383;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 230:
+ *
+ * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
+ * must be an even number"
+ */
+ assert(y % 2 == 0);
+
+ rect_limit = 8191;
+ }
+
+ if (x > rect_limit) x = rect_limit;
+ if (y > rect_limit) y = rect_limit;
+ if (xmax > rect_limit) xmax = rect_limit;
+ if (ymax > rect_limit) ymax = rect_limit;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, y << 16 | x);
+ ilo_cp_write(cp, ymax << 16 | xmax);
+
+ /*
+ * There is no need to set the origin. It is intended to support front
+ * buffer rendering.
+ */
+ ilo_cp_write(cp, 0);
+
+ ilo_cp_end(cp);
+}
+
struct ilo_zs_surface_info {
int surface_type;
int format;
@@ -964,12 +2366,12 @@ static void
zs_init_info_null(const struct ilo_dev_info *dev,
struct ilo_zs_surface_info *info)
{
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
memset(info, 0, sizeof(*info));
- info->surface_type = GEN6_SURFTYPE_NULL;
- info->format = GEN6_ZFORMAT_D32_FLOAT;
+ info->surface_type = BRW_SURFACE_NULL;
+ info->format = BRW_DEPTHFORMAT_D32_FLOAT;
info->width = 1;
info->height = 1;
info->depth = 1;
@@ -979,20 +2381,23 @@ zs_init_info_null(const struct ilo_dev_info *dev,
static void
zs_init_info(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
- enum pipe_format format, unsigned level,
+ enum pipe_format format,
+ unsigned level,
unsigned first_layer, unsigned num_layers,
- bool offset_to_layer, struct ilo_zs_surface_info *info)
+ struct ilo_zs_surface_info *info)
{
- uint32_t x_offset[3], y_offset[3];
+ const bool rebase_layer = true;
+ struct intel_bo * const hiz_bo = NULL;
bool separate_stencil;
+ uint32_t x_offset[3], y_offset[3];
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
memset(info, 0, sizeof(*info));
info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
- if (info->surface_type == GEN6_SURFTYPE_CUBE) {
+ if (info->surface_type == BRW_SURFACE_CUBE) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
*
@@ -1005,7 +2410,7 @@ zs_init_info(const struct ilo_dev_info *dev,
* As such, we cannot set first_layer and num_layers on cube surfaces.
* To work around that, treat it as a 2D surface.
*/
- info->surface_type = GEN6_SURFTYPE_2D;
+ info->surface_type = BRW_SURFACE_2D;
}
if (dev->gen >= ILO_GEN(7)) {
@@ -1019,8 +2424,7 @@ zs_init_info(const struct ilo_dev_info *dev,
* same value (enabled or disabled) as Hierarchical Depth Buffer
* Enable."
*/
- separate_stencil =
- ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers);
+ separate_stencil = (hiz_bo != NULL);
}
/*
@@ -1044,25 +2448,25 @@ zs_init_info(const struct ilo_dev_info *dev,
*/
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
- info->format = GEN6_ZFORMAT_D16_UNORM;
+ info->format = BRW_DEPTHFORMAT_D16_UNORM;
break;
case PIPE_FORMAT_Z32_FLOAT:
- info->format = GEN6_ZFORMAT_D32_FLOAT;
+ info->format = BRW_DEPTHFORMAT_D32_FLOAT;
break;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
info->format = (separate_stencil) ?
- GEN6_ZFORMAT_D24_UNORM_X8_UINT :
- GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+ BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
+ BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
info->format = (separate_stencil) ?
- GEN6_ZFORMAT_D32_FLOAT :
- GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
+ BRW_DEPTHFORMAT_D32_FLOAT :
+ BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
break;
case PIPE_FORMAT_S8_UINT:
if (separate_stencil) {
- info->format = GEN6_ZFORMAT_D32_FLOAT;
+ info->format = BRW_DEPTHFORMAT_D32_FLOAT;
break;
}
/* fall through */
@@ -1078,7 +2482,7 @@ zs_init_info(const struct ilo_dev_info *dev,
info->zs.stride = tex->bo_stride;
info->zs.tiling = tex->tiling;
- if (offset_to_layer) {
+ if (rebase_layer) {
info->zs.offset = ilo_texture_get_slice_offset(tex,
level, first_layer, &x_offset[0], &y_offset[0]);
}
@@ -1103,29 +2507,19 @@ zs_init_info(const struct ilo_dev_info *dev,
info->stencil.tiling = s8_tex->tiling;
- if (offset_to_layer) {
+ if (rebase_layer) {
info->stencil.offset = ilo_texture_get_slice_offset(s8_tex,
level, first_layer, &x_offset[1], &y_offset[1]);
}
}
- if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) {
- info->hiz.bo = tex->hiz.bo;
- info->hiz.stride = tex->hiz.bo_stride;
- info->hiz.tiling = INTEL_TILING_Y;
-
- /*
- * Layer offsetting is used on GEN6 only. And on GEN6, HiZ is enabled
- * only when the depth buffer is non-mipmapped and non-array, making
- * layer offsetting no-op.
- */
- if (offset_to_layer) {
- assert(level == 0 && first_layer == 0 && num_layers == 1);
-
- info->hiz.offset = 0;
- x_offset[2] = 0;
- y_offset[2] = 0;
- }
+ if (hiz_bo) {
+ info->hiz.bo = hiz_bo;
+ info->hiz.stride = 0;
+ info->hiz.tiling = 0;
+ info->hiz.offset = 0;
+ x_offset[2] = 0;
+ y_offset[2] = 0;
}
info->width = tex->base.width0;
@@ -1137,11 +2531,11 @@ zs_init_info(const struct ilo_dev_info *dev,
info->first_layer = first_layer;
info->num_layers = num_layers;
- if (offset_to_layer) {
+ if (rebase_layer) {
/* the size of the layer */
info->width = u_minify(info->width, level);
info->height = u_minify(info->height, level);
- if (info->surface_type == GEN6_SURFTYPE_3D)
+ if (info->surface_type == BRW_SURFACE_3D)
info->depth = u_minify(info->depth, level);
else
info->depth = 1;
@@ -1194,14 +2588,14 @@ zs_init_info(const struct ilo_dev_info *dev,
info->height += info->y_offset;
/* we have to treat them as 2D surfaces */
- if (info->surface_type == GEN6_SURFTYPE_CUBE) {
+ if (info->surface_type == BRW_SURFACE_CUBE) {
assert(tex->base.width0 == tex->base.height0);
/* we will set slice_offset to point to the single face */
- info->surface_type = GEN6_SURFTYPE_2D;
+ info->surface_type = BRW_SURFACE_2D;
}
- else if (info->surface_type == GEN6_SURFTYPE_1D && info->height > 1) {
+ else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) {
assert(tex->base.height0 == 1);
- info->surface_type = GEN6_SURFTYPE_2D;
+ info->surface_type = BRW_SURFACE_2D;
}
}
}
@@ -1209,46 +2603,44 @@ zs_init_info(const struct ilo_dev_info *dev,
void
ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
- enum pipe_format format, unsigned level,
+ enum pipe_format format,
+ unsigned level,
unsigned first_layer, unsigned num_layers,
- bool offset_to_layer, struct ilo_zs_surface *zs)
+ struct ilo_zs_surface *zs)
{
const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
struct ilo_zs_surface_info info;
uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
- if (tex) {
- zs_init_info(dev, tex, format, level, first_layer, num_layers,
- offset_to_layer, &info);
- }
- else {
+ if (tex)
+ zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
+ else
zs_init_info_null(dev, &info);
- }
switch (info.surface_type) {
- case GEN6_SURFTYPE_NULL:
+ case BRW_SURFACE_NULL:
break;
- case GEN6_SURFTYPE_1D:
+ case BRW_SURFACE_1D:
assert(info.width <= max_2d_size && info.height == 1 &&
info.depth <= max_array_size);
assert(info.first_layer < max_array_size - 1 &&
info.num_layers <= max_array_size);
break;
- case GEN6_SURFTYPE_2D:
+ case BRW_SURFACE_2D:
assert(info.width <= max_2d_size && info.height <= max_2d_size &&
info.depth <= max_array_size);
assert(info.first_layer < max_array_size - 1 &&
info.num_layers <= max_array_size);
break;
- case GEN6_SURFTYPE_3D:
+ case BRW_SURFACE_3D:
assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
assert(info.x_offset == 0 && info.y_offset == 0);
break;
- case GEN6_SURFTYPE_CUBE:
+ case BRW_SURFACE_CUBE:
assert(info.width <= max_2d_size && info.height <= max_2d_size &&
info.depth == 1);
assert(info.first_layer == 0 && info.num_layers == 1);
@@ -1311,7 +2703,7 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
dw3 = (info.height - 1) << 19 |
(info.width - 1) << 6 |
info.lod << 2 |
- GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
+ BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
dw4 = (info.depth - 1) << 21 |
info.first_layer << 10 |
@@ -1342,9 +2734,6 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
zs->payload[6] = info.stencil.stride - 1;
zs->payload[7] = info.stencil.offset;
- if (dev->gen >= ILO_GEN(7.5))
- zs->payload[6] |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
-
/* do not increment reference count */
zs->separate_s8_bo = info.stencil.bo;
}
@@ -1370,6 +2759,408 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
}
static void
+gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+ ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
+ const uint8_t cmd_len = 7;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, zs->payload[0]);
+ ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
+ INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_write(cp, zs->payload[2]);
+ ilo_cp_write(cp, zs->payload[3]);
+ ilo_cp_write(cp, zs->payload[4]);
+ ilo_cp_write(cp, zs->payload[5]);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
+ int x_offset, int y_offset,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+ assert(x_offset >= 0 && x_offset <= 31);
+ assert(y_offset >= 0 && y_offset <= 31);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, x_offset << 8 | y_offset);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
+ const struct pipe_poly_stipple *pattern,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
+ const uint8_t cmd_len = 33;
+ int i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+ assert(Elements(pattern->stipple) == 32);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ for (i = 0; i < 32; i++)
+ ilo_cp_write(cp, pattern->stipple[i]);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
+ unsigned pattern, unsigned factor,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
+ const uint8_t cmd_len = 3;
+ unsigned inverse;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+ assert((pattern & 0xffff) == pattern);
+ assert(factor >= 1 && factor <= 256);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, pattern);
+
+ if (dev->gen >= ILO_GEN(7)) {
+ /* in U1.16 */
+ inverse = (unsigned) (65536.0f / factor);
+ ilo_cp_write(cp, inverse << 15 | factor);
+ }
+ else {
+ /* in U1.13 */
+ inverse = (unsigned) (8192.0f / factor);
+ ilo_cp_write(cp, inverse << 16 | factor);
+ }
+
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
+ const uint8_t cmd_len = 3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0 << 16 | 0);
+ ilo_cp_write(cp, 0 << 16 | 0);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
+ int index, unsigned svbi,
+ unsigned max_svbi,
+ bool load_vertex_count,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
+ const uint8_t cmd_len = 4;
+ uint32_t dw1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(index >= 0 && index < 4);
+
+ dw1 = index << SVB_INDEX_SHIFT;
+ if (load_vertex_count)
+ dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, svbi);
+ ilo_cp_write(cp, max_svbi);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
+ int num_samples,
+ const uint32_t *packed_sample_pos,
+ bool pixel_location_center,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
+ const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
+ uint32_t dw1, dw2, dw3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ dw1 = (pixel_location_center) ?
+ MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
+
+ switch (num_samples) {
+ case 0:
+ case 1:
+ dw1 |= MS_NUMSAMPLES_1;
+ dw2 = 0;
+ dw3 = 0;
+ break;
+ case 4:
+ dw1 |= MS_NUMSAMPLES_4;
+ dw2 = packed_sample_pos[0];
+ dw3 = 0;
+ break;
+ case 8:
+ assert(dev->gen >= ILO_GEN(7));
+ dw1 |= MS_NUMSAMPLES_8;
+ dw2 = packed_sample_pos[0];
+ dw3 = packed_sample_pos[1];
+ break;
+ default:
+ assert(!"unsupported sample count");
+ dw1 |= MS_NUMSAMPLES_1;
+ dw2 = 0;
+ dw3 = 0;
+ break;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ if (dev->gen >= ILO_GEN(7))
+ ilo_cp_write(cp, dw3);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+ ILO_GPE_CMD(0x3, 0x0, 0x06) :
+ ILO_GPE_CMD(0x3, 0x1, 0x0e);
+ const uint8_t cmd_len = 3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ /* see ilo_gpe_init_zs_surface() */
+ ilo_cp_write(cp, zs->payload[6]);
+ ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
+ INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+ ILO_GPE_CMD(0x3, 0x0, 0x07) :
+ ILO_GPE_CMD(0x3, 0x1, 0x0f);
+ const uint8_t cmd_len = 3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ /* see ilo_gpe_init_zs_surface() */
+ ilo_cp_write(cp, zs->payload[8]);
+ ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
+ INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
+ uint32_t clear_val,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ GEN5_DEPTH_CLEAR_VALID);
+ ilo_cp_write(cp, clear_val);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
+ uint32_t dw1,
+ struct intel_bo *bo, uint32_t bo_offset,
+ bool write_qword,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
+ const uint8_t cmd_len = (write_qword) ? 5 : 4;
+ const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
+ const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (dw1 & PIPE_CONTROL_CS_STALL) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 73:
+ *
+ * "1 of the following must also be set (when CS stall is set):
+ *
+ * * Depth Cache Flush Enable ([0] of DW1)
+ * * Stall at Pixel Scoreboard ([1] of DW1)
+ * * Depth Stall ([13] of DW1)
+ * * Post-Sync Operation ([13] of DW1)
+ * * Render Target Cache Flush Enable ([12] of DW1)
+ * * Notify Enable ([8] of DW1)"
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 61:
+ *
+ * "One of the following must also be set (when CS stall is set):
+ *
+ * * Render Target Cache Flush Enable ([12] of DW1)
+ * * Depth Cache Flush Enable ([0] of DW1)
+ * * Stall at Pixel Scoreboard ([1] of DW1)
+ * * Depth Stall ([13] of DW1)
+ * * Post-Sync Operation ([13] of DW1)"
+ */
+ uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
+ PIPE_CONTROL_DEPTH_STALL;
+
+ /* post-sync op */
+ bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP;
+
+ if (dev->gen == ILO_GEN(6))
+ bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
+
+ assert(dw1 & bit_test);
+ }
+
+ if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 73:
+ *
+ * "Following bits must be clear (when Depth Stall is set):
+ *
+ * * Render Target Cache Flush Enable ([12] of DW1)
+ * * Depth Cache Flush Enable ([0] of DW1)"
+ */
+ assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
+ ilo_cp_write(cp, 0);
+ if (write_qword)
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static void
+gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
+ const struct pipe_draw_info *info,
+ const struct ilo_ib_state *ib,
+ bool rectlist,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
+ const uint8_t cmd_len = 6;
+ const int prim = (rectlist) ?
+ _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
+ const int vb_access = (info->indexed) ?
+ GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
+ GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ const uint32_t vb_start = info->start +
+ ((info->indexed) ? ib->draw_start_offset : 0);
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+ vb_access);
+ ilo_cp_write(cp, info->count);
+ ilo_cp_write(cp, vb_start);
+ ilo_cp_write(cp, info->instance_count);
+ ilo_cp_write(cp, info->start_instance);
+ ilo_cp_write(cp, info->index_bias);
+ ilo_cp_end(cp);
+}
+
+static uint32_t
+gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state **cs,
+ uint32_t *sampler_state,
+ int *num_samplers,
+ uint32_t *binding_table_state,
+ int *num_surfaces,
+ int num_ids,
+ struct ilo_cp *cp)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 2, page 34:
+ *
+ * "(Interface Descriptor Total Length) This field must have the same
+ * alignment as the Interface Descriptor Data Start Address.
+ *
+ * It must be DQWord (32-byte) aligned..."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 2, page 35:
+ *
+ * "(Interface Descriptor Data Start Address) Specifies the 32-byte
+ * aligned address of the Interface Descriptor data."
+ */
+ const int state_align = 32 / 4;
+ const int state_len = (32 / 4) * num_ids;
+ uint32_t state_offset, *dw;
+ int i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_ids; i++) {
+ dw[0] = ilo_shader_get_kernel_offset(cs[i]);
+ dw[1] = 1 << 18; /* SPF */
+ dw[2] = sampler_state[i] |
+ (num_samplers[i] + 3) / 4 << 2;
+ dw[3] = binding_table_state[i] |
+ num_surfaces[i];
+ dw[4] = 0 << 16 | /* CURBE Read Length */
+ 0; /* CURBE Read Offset */
+ dw[5] = 0; /* Barrier ID */
+ dw[6] = 0;
+ dw[7] = 0;
+
+ dw += 8;
+ }
+
+ return state_offset;
+}
+
+static void
viewport_get_guardband(const struct ilo_dev_info *dev,
int center_x, int center_y,
int *min_gbx, int *max_gbx,
@@ -1438,7 +3229,7 @@ ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
const float scale_z = fabs(state->scale[2]);
int min_gbx, max_gbx, min_gby, max_gby;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
viewport_get_guardband(dev,
(int) state->translate[0],
@@ -1468,15 +3259,158 @@ ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
vp->max_z = scale_z * 1.0f + state->translate[2];
}
+static uint32_t
+gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 8 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 262:
+ *
+ * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
+ * stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->m00);
+ dw[1] = fui(vp->m11);
+ dw[2] = fui(vp->m22);
+ dw[3] = fui(vp->m30);
+ dw[4] = fui(vp->m31);
+ dw[5] = fui(vp->m32);
+ dw[6] = 0;
+ dw[7] = 0;
+
+ dw += 8;
+ }
+
+ return state_offset;
+}
+
+static uint32_t
+gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 4 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 193:
+ *
+ * "The viewport-related state is stored as an array of up to 16
+ * elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->min_gbx);
+ dw[1] = fui(vp->max_gbx);
+ dw[2] = fui(vp->min_gby);
+ dw[3] = fui(vp->max_gby);
+
+ dw += 4;
+ }
+
+ return state_offset;
+}
+
+static uint32_t
+gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 2 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 385:
+ *
+ * "The viewport state is stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->min_z);
+ dw[1] = fui(vp->max_z);
+
+ dw += 2;
+ }
+
+ return state_offset;
+}
+
+static uint32_t
+gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
+ const struct pipe_stencil_ref *stencil_ref,
+ float alpha_ref,
+ const struct pipe_blend_color *blend_color,
+ struct ilo_cp *cp)
+{
+ const int state_align = 64 / 4;
+ const int state_len = 6;
+ uint32_t state_offset, *dw;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
+ state_len, state_align, &state_offset);
+
+ dw[0] = stencil_ref->ref_value[0] << 24 |
+ stencil_ref->ref_value[1] << 16 |
+ BRW_ALPHATEST_FORMAT_UNORM8;
+ dw[1] = float_to_ubyte(alpha_ref);
+ dw[2] = fui(blend_color->color[0]);
+ dw[3] = fui(blend_color->color[1]);
+ dw[4] = fui(blend_color->color[2]);
+ dw[5] = fui(blend_color->color[3]);
+
+ return state_offset;
+}
+
static int
gen6_blend_factor_dst_alpha_forced_one(int factor)
{
switch (factor) {
- case GEN6_BLENDFACTOR_DST_ALPHA:
- return GEN6_BLENDFACTOR_ONE;
- case GEN6_BLENDFACTOR_INV_DST_ALPHA:
- case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return GEN6_BLENDFACTOR_ZERO;
+ case BRW_BLENDFACTOR_DST_ALPHA:
+ return BRW_BLENDFACTOR_ONE;
+ case BRW_BLENDFACTOR_INV_DST_ALPHA:
+ case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return BRW_BLENDFACTOR_ZERO;
default:
return factor;
}
@@ -1527,7 +3461,7 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev,
{
unsigned num_cso, i;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
if (state->independent_blend_enable) {
num_cso = Elements(blend->cso);
@@ -1547,7 +3481,7 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev,
bool dual_blend;
cso->payload[0] = 0;
- cso->payload[1] = GEN6_BLEND_DW1_COLORCLAMP_RTFORMAT |
+ cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
0x3;
if (!(rt->colormask & PIPE_MASK_A))
@@ -1614,6 +3548,134 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev,
}
}
+static uint32_t
+gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_blend_state *blend,
+ const struct ilo_fb_state *fb,
+ const struct pipe_alpha_state *alpha,
+ struct ilo_cp *cp)
+{
+ const int state_align = 64 / 4;
+ int state_len;
+ uint32_t state_offset, *dw;
+ unsigned num_targets, i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 376:
+ *
+ * "The blend state is stored as an array of up to 8 elements..."
+ */
+ num_targets = fb->state.nr_cbufs;
+ assert(num_targets <= 8);
+
+ if (!num_targets) {
+ if (!alpha->enabled)
+ return 0;
+ /* to be able to reference alpha func */
+ num_targets = 1;
+ }
+
+ state_len = 2 * num_targets;
+
+ dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_targets; i++) {
+ const unsigned idx = (blend->independent_blend_enable) ? i : 0;
+ const struct ilo_blend_cso *cso = &blend->cso[idx];
+ const int num_samples = fb->num_samples;
+ const struct util_format_description *format_desc =
+ (idx < fb->state.nr_cbufs) ?
+ util_format_description(fb->state.cbufs[idx]->format) : NULL;
+ bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
+
+ rt_is_unorm = true;
+ rt_is_pure_integer = false;
+ rt_dst_alpha_forced_one = false;
+
+ if (format_desc) {
+ int ch;
+
+ switch (format_desc->format) {
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ /* force alpha to one when the HW format has alpha */
+ assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
+ == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
+ rt_dst_alpha_forced_one = true;
+ break;
+ default:
+ break;
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
+ continue;
+
+ if (format_desc->channel[ch].pure_integer) {
+ rt_is_unorm = false;
+ rt_is_pure_integer = true;
+ break;
+ }
+
+ if (!format_desc->channel[ch].normalized ||
+ format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
+ rt_is_unorm = false;
+ }
+ }
+
+ dw[0] = cso->payload[0];
+ dw[1] = cso->payload[1];
+
+ if (!rt_is_pure_integer) {
+ if (rt_dst_alpha_forced_one)
+ dw[0] |= cso->dw_blend_dst_alpha_forced_one;
+ else
+ dw[0] |= cso->dw_blend;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+ *
+ * "Logic Ops are only supported on *_UNORM surfaces (excluding
+ * _SRGB variants), otherwise Logic Ops must be DISABLED."
+ *
+ * Since logicop is ignored for non-UNORM color buffers, no special care
+ * is needed.
+ */
+ if (rt_is_unorm)
+ dw[1] |= cso->dw_logicop;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 356:
+ *
+ * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
+ * Dither both must be disabled."
+ *
+ * There is no such limitation on GEN7, or for AlphaToOne. But GL
+ * requires that anyway.
+ */
+ if (num_samples > 1)
+ dw[1] |= cso->dw_alpha_mod;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+ *
+ * "Alpha Test can only be enabled if Pixel Shader outputs a float
+ * alpha value."
+ */
+ if (alpha->enabled && !rt_is_pure_integer) {
+ dw[1] |= 1 << 16 |
+ gen6_translate_dsa_func(alpha->func) << 13;
+ }
+
+ dw += 2;
+ }
+
+ return state_offset;
+}
+
void
ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
const struct pipe_depth_stencil_alpha_state *state,
@@ -1622,10 +3684,12 @@ ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
const struct pipe_depth_state *depth = &state->depth;
const struct pipe_stencil_state *stencil0 = &state->stencil[0];
const struct pipe_stencil_state *stencil1 = &state->stencil[1];
- const struct pipe_alpha_state *alpha = &state->alpha;
uint32_t *dw;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /* copy alpha state for later use */
+ dsa->alpha = state->alpha;
STATIC_ASSERT(Elements(dsa->payload) >= 3);
dw = dsa->payload;
@@ -1692,18 +3756,29 @@ ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
if (depth->enabled)
dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
else
- dw[2] |= GEN6_COMPAREFUNCTION_ALWAYS << 27;
+ dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
+}
+
+static uint32_t
+gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_dsa_state *dsa,
+ struct ilo_cp *cp)
+{
+ const int state_align = 64 / 4;
+ const int state_len = 3;
+ uint32_t state_offset, *dw;
- /* dw_alpha will be ORed to BLEND_STATE */
- if (alpha->enabled) {
- dsa->dw_alpha = 1 << 16 |
- gen6_translate_dsa_func(alpha->func) << 13;
- }
- else {
- dsa->dw_alpha = 0;
- }
- dsa->alpha_ref = float_to_ubyte(alpha->ref_value);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
+ state_len, state_align, &state_offset);
+
+ dw[0] = dsa->payload[0];
+ dw[1] = dsa->payload[1];
+ dw[2] = dsa->payload[2];
+
+ return state_offset;
}
void
@@ -1715,7 +3790,7 @@ ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
{
unsigned i;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
for (i = 0; i < num_states; i++) {
uint16_t min_x, min_y, max_x, max_y;
@@ -1756,6 +3831,64 @@ ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
}
}
+static uint32_t
+gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
+ const struct ilo_scissor_state *scissor,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 2 * num_viewports;
+ uint32_t state_offset, *dw;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 263:
+ *
+ * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
+ * stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
+ state_len, state_align, &state_offset);
+
+ memcpy(dw, scissor->payload, state_len * 4);
+
+ return state_offset;
+}
+
+static uint32_t
+gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
+ uint32_t *surface_states,
+ int num_surface_states,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = num_surface_states;
+ uint32_t state_offset, *dw;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 69:
+ *
+ * "It is stored as an array of up to 256 elements..."
+ */
+ assert(num_surface_states <= 256);
+
+ if (!num_surface_states)
+ return 0;
+
+ dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
+ state_len, state_align, &state_offset);
+ memcpy(dw, surface_states,
+ num_surface_states * sizeof(surface_states[0]));
+
+ return state_offset;
+}
+
void
ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
@@ -1792,17 +3925,17 @@ ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
- dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
- GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+ dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
+ BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
dw[1] = 0;
- dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
- level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
+ dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
+ (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
+ level << BRW_SURFACE_LOD_SHIFT;
- dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- GEN6_TILING_X;
+ dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
+ BRW_SURFACE_TILED;
dw[4] = 0;
dw[5] = 0;
@@ -1884,18 +4017,18 @@ ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
- dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+ dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+ surface_format << BRW_SURFACE_FORMAT_SHIFT;
if (render_cache_rw)
- dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
+ dw[0] |= BRW_SURFACE_RC_READ_WRITE;
dw[1] = offset;
- dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
+ dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
+ width << BRW_SURFACE_WIDTH_SHIFT;
- dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
+ dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
+ pitch << BRW_SURFACE_PITCH_SHIFT;
dw[4] = 0;
dw[5] = 0;
@@ -1912,7 +4045,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
- bool is_rt, bool offset_to_layer,
+ bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
int surface_type, surface_format;
@@ -1923,7 +4056,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
ILO_GPE_VALID_GEN(dev, 6, 6);
surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
- assert(surface_type != GEN6_SURFTYPE_BUFFER);
+ assert(surface_type != BRW_SURFACE_BUFFER);
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
format = PIPE_FORMAT_Z32_FLOAT;
@@ -1940,7 +4073,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
tex->base.depth0 : num_layers;
pitch = tex->bo_stride;
- if (surface_type == GEN6_SURFTYPE_CUBE) {
+ if (surface_type == BRW_SURFACE_CUBE) {
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
@@ -1954,7 +4087,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
* restriction.
*/
if (is_rt) {
- surface_type = GEN6_SURFTYPE_2D;
+ surface_type = BRW_SURFACE_2D;
}
else {
assert(num_layers % 6 == 0);
@@ -1965,21 +4098,21 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
/* sanity check the size */
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
switch (surface_type) {
- case GEN6_SURFTYPE_1D:
+ case BRW_SURFACE_1D:
assert(width <= 8192 && height == 1 && depth <= 512);
assert(first_layer < 512 && num_layers <= 512);
break;
- case GEN6_SURFTYPE_2D:
+ case BRW_SURFACE_2D:
assert(width <= 8192 && height <= 8192 && depth <= 512);
assert(first_layer < 512 && num_layers <= 512);
break;
- case GEN6_SURFTYPE_3D:
+ case BRW_SURFACE_3D:
assert(width <= 2048 && height <= 2048 && depth <= 2048);
assert(first_layer < 2048 && num_layers <= 512);
if (!is_rt)
assert(first_layer == 0);
break;
- case GEN6_SURFTYPE_CUBE:
+ case BRW_SURFACE_CUBE:
assert(width <= 8192 && height <= 8192 && depth <= 85);
assert(width == height);
assert(first_layer < 512 && num_layers <= 512);
@@ -1998,44 +4131,52 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
assert(tex->interleaved);
if (is_rt) {
+ /*
+ * Compute the offset to the layer manually.
+ *
+ * For rendering, the hardware requires LOD to be the same for all
+ * render targets and the depth buffer. We need to compute the offset
+ * to the layer manually and always set LOD to 0.
+ */
+ if (true) {
+ /* we lose the capability for layered rendering */
+ assert(num_layers == 1);
+
+ layer_offset = ilo_texture_get_slice_offset(tex,
+ first_level, first_layer, &x_offset, &y_offset);
+
+ assert(x_offset % 4 == 0);
+ assert(y_offset % 2 == 0);
+ x_offset /= 4;
+ y_offset /= 2;
+
+ /* derive the size for the LOD */
+ width = u_minify(width, first_level);
+ height = u_minify(height, first_level);
+ if (surface_type == BRW_SURFACE_3D)
+ depth = u_minify(depth, first_level);
+ else
+ depth = 1;
+
+ first_level = 0;
+ first_layer = 0;
+ lod = 0;
+ }
+ else {
+ layer_offset = 0;
+ x_offset = 0;
+ y_offset = 0;
+ }
+
assert(num_levels == 1);
lod = first_level;
}
else {
- lod = num_levels - 1;
- }
-
- /*
- * Offset to the layer. When rendering, the hardware requires LOD and
- * Depth to be the same for all render targets and the depth buffer. We
- * need to offset to the layer manually and always set LOD and Depth to 0.
- */
- if (offset_to_layer) {
- /* we lose the capability for layered rendering */
- assert(is_rt && num_layers == 1);
-
- layer_offset = ilo_texture_get_slice_offset(tex,
- first_level, first_layer, &x_offset, &y_offset);
-
- assert(x_offset % 4 == 0);
- assert(y_offset % 2 == 0);
- x_offset /= 4;
- y_offset /= 2;
-
- /* derive the size for the LOD */
- width = u_minify(width, first_level);
- height = u_minify(height, first_level);
-
- first_level = 0;
- first_layer = 0;
-
- lod = 0;
- depth = 1;
- }
- else {
layer_offset = 0;
x_offset = 0;
y_offset = 0;
+
+ lod = num_levels - 1;
}
/*
@@ -2069,43 +4210,129 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
- dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
- GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
+ dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
+ surface_format << BRW_SURFACE_FORMAT_SHIFT |
+ BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
- if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
+ if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
dw[0] |= 1 << 9 |
- GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+ BRW_SURFACE_CUBEFACE_ENABLES;
}
- if (is_rt)
- dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
+ if (render_cache_rw)
+ dw[0] |= BRW_SURFACE_RC_READ_WRITE;
dw[1] = layer_offset;
- dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
- lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
+ dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
+ (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
+ lod << BRW_SURFACE_LOD_SHIFT;
- dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
+ dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
+ (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
- dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
+ dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
first_layer << 17 |
(num_layers - 1) << 8 |
- ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
- GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
+ ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
+ BRW_SURFACE_MULTISAMPLECOUNT_1);
- dw[5] = x_offset << GEN6_SURFACE_DW5_X_OFFSET__SHIFT |
- y_offset << GEN6_SURFACE_DW5_Y_OFFSET__SHIFT;
+ dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
+ y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
if (tex->valign_4)
- dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
+ dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
/* do not increment reference count */
surf->bo = tex->bo;
}
+static uint32_t
+gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_view_surface *surf,
+ bool for_render,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
+ uint32_t state_offset;
+ uint32_t read_domains, write_domain;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (for_render) {
+ read_domains = INTEL_DOMAIN_RENDER;
+ write_domain = INTEL_DOMAIN_RENDER;
+ }
+ else {
+ read_domains = INTEL_DOMAIN_SAMPLER;
+ write_domain = 0;
+ }
+
+ ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
+
+ STATIC_ASSERT(Elements(surf->payload) >= 8);
+
+ ilo_cp_write(cp, surf->payload[0]);
+ ilo_cp_write_bo(cp, surf->payload[1],
+ surf->bo, read_domains, write_domain);
+ ilo_cp_write(cp, surf->payload[2]);
+ ilo_cp_write(cp, surf->payload[3]);
+ ilo_cp_write(cp, surf->payload[4]);
+ ilo_cp_write(cp, surf->payload[5]);
+
+ if (dev->gen >= ILO_GEN(7)) {
+ ilo_cp_write(cp, surf->payload[6]);
+ ilo_cp_write(cp, surf->payload[7]);
+ }
+
+ ilo_cp_end(cp);
+
+ return state_offset;
+}
+
+static uint32_t
+gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
+ const struct pipe_stream_output_target *so,
+ const struct pipe_stream_output_info *so_info,
+ int so_index,
+ struct ilo_cp *cp)
+{
+ struct ilo_buffer *buf = ilo_buffer(so->buffer);
+ unsigned bo_offset, struct_size;
+ enum pipe_format elem_format;
+ struct ilo_view_surface surf;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+ struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
+
+ switch (so_info->output[so_index].num_components) {
+ case 1:
+ elem_format = PIPE_FORMAT_R32_FLOAT;
+ break;
+ case 2:
+ elem_format = PIPE_FORMAT_R32G32_FLOAT;
+ break;
+ case 3:
+ elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ break;
+ case 4:
+ elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ break;
+ default:
+ assert(!"unexpected SO components length");
+ elem_format = PIPE_FORMAT_R32_FLOAT;
+ break;
+ }
+
+ ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
+ struct_size, elem_format, false, true, &surf);
+
+ return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
+}
+
static void
sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
const union pipe_color_union *color,
@@ -2184,7 +4411,7 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
bool clamp_is_to_edge;
uint32_t dw0, dw1, dw3;
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
memset(sampler, 0, sizeof(*sampler));
@@ -2197,9 +4424,9 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
max_aniso = state->max_anisotropy / 2 - 1;
else if (state->max_anisotropy > 16)
- max_aniso = GEN6_ANISORATIO_16;
+ max_aniso = BRW_ANISORATIO_16;
else
- max_aniso = GEN6_ANISORATIO_2;
+ max_aniso = BRW_ANISORATIO_2;
/*
*
@@ -2300,10 +4527,10 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
if (state->seamless_cube_map &&
(state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
- wrap_cube = GEN6_TEXCOORDMODE_CUBE;
+ wrap_cube = BRW_TEXCOORDMODE_CUBE;
}
else {
- wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
+ wrap_cube = BRW_TEXCOORDMODE_CLAMP;
}
if (!state->normalized_coords) {
@@ -2327,22 +4554,22 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
* - Surface Min LOD must be 0.
* - Texture LOD Bias must be 0."
*/
- assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
- assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
- assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-
- assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
- mag_filter == GEN6_MAPFILTER_LINEAR);
- assert(min_filter == GEN6_MAPFILTER_NEAREST ||
- min_filter == GEN6_MAPFILTER_LINEAR);
+ assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
+ wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
+ assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
+ wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
+ assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
+ wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
+
+ assert(mag_filter == BRW_MAPFILTER_NEAREST ||
+ mag_filter == BRW_MAPFILTER_LINEAR);
+ assert(min_filter == BRW_MAPFILTER_NEAREST ||
+ min_filter == BRW_MAPFILTER_LINEAR);
/* work around a bug in util_blitter */
- mip_filter = GEN6_MIPFILTER_NONE;
+ mip_filter = BRW_MIPFILTER_NONE;
- assert(mip_filter == GEN6_MIPFILTER_NONE);
+ assert(mip_filter == BRW_MIPFILTER_NONE);
}
if (dev->gen >= ILO_GEN(7)) {
@@ -2353,8 +4580,8 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
sampler->dw_filter = mag_filter << 17 |
min_filter << 14;
- sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
- GEN6_MAPFILTER_ANISOTROPIC << 14 |
+ sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
+ BRW_MAPFILTER_ANISOTROPIC << 14 |
1;
dw1 = min_lod << 20 |
@@ -2366,15 +4593,15 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
dw3 = max_aniso << 19;
/* round the coordinates for linear filtering */
- if (min_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
- GEN6_SAMPLER_DW3_V_MIN_ROUND |
- GEN6_SAMPLER_DW3_R_MIN_ROUND);
+ if (min_filter != BRW_MAPFILTER_NEAREST) {
+ dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
}
- if (mag_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
- GEN6_SAMPLER_DW3_V_MAG_ROUND |
- GEN6_SAMPLER_DW3_R_MAG_ROUND);
+ if (mag_filter != BRW_MAPFILTER_NEAREST) {
+ dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+ BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+ BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
}
if (!state->normalized_coords)
@@ -2390,8 +4617,8 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
* mode
*/
sampler->dw_wrap_1d = wrap_s << 6 |
- GEN6_TEXCOORDMODE_WRAP << 3 |
- GEN6_TEXCOORDMODE_WRAP;
+ BRW_TEXCOORDMODE_WRAP << 3 |
+ BRW_TEXCOORDMODE_WRAP;
sampler->dw_wrap_cube = wrap_cube << 6 |
wrap_cube << 3 |
@@ -2418,8 +4645,8 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
mag_filter << 17 |
min_filter << 14;
- sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
- GEN6_MAPFILTER_ANISOTROPIC << 14;
+ sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
+ BRW_MAPFILTER_ANISOTROPIC << 14;
dw1 = min_lod << 22 |
max_lod << 12;
@@ -2429,8 +4656,8 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
wrap_r;
sampler->dw_wrap_1d = wrap_s << 6 |
- GEN6_TEXCOORDMODE_WRAP << 3 |
- GEN6_TEXCOORDMODE_WRAP;
+ BRW_TEXCOORDMODE_WRAP << 3 |
+ BRW_TEXCOORDMODE_WRAP;
sampler->dw_wrap_cube = wrap_cube << 6 |
wrap_cube << 3 |
@@ -2439,15 +4666,15 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
dw3 = max_aniso << 19;
/* round the coordinates for linear filtering */
- if (min_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
- GEN6_SAMPLER_DW3_V_MIN_ROUND |
- GEN6_SAMPLER_DW3_R_MIN_ROUND);
+ if (min_filter != BRW_MAPFILTER_NEAREST) {
+ dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
}
- if (mag_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
- GEN6_SAMPLER_DW3_V_MAG_ROUND |
- GEN6_SAMPLER_DW3_R_MAG_ROUND);
+ if (mag_filter != BRW_MAPFILTER_NEAREST) {
+ dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+ BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+ BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
}
if (!state->normalized_coords)
@@ -2464,109 +4691,342 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
}
}
-void
-ilo_gpe_set_fb(const struct ilo_dev_info *dev,
- const struct pipe_framebuffer_state *state,
- struct ilo_fb_state *fb)
+static uint32_t
+gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_sampler_cso * const *samplers,
+ const struct pipe_sampler_view * const *views,
+ const uint32_t *sampler_border_colors,
+ int num_samplers,
+ struct ilo_cp *cp)
{
- const struct pipe_surface *first;
- unsigned num_surfaces, first_idx;
+ const int state_align = 32 / 4;
+ const int state_len = 4 * num_samplers;
+ uint32_t state_offset, *dw;
+ int i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 101:
+ *
+ * "The sampler state is stored as an array of up to 16 elements..."
+ */
+ assert(num_samplers <= 16);
- util_copy_framebuffer_state(&fb->state, state);
+ if (!num_samplers)
+ return 0;
- ilo_gpe_init_view_surface_null(dev,
- state->width, state->height,
- 1, 0, &fb->null_rt);
+ dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_samplers; i++) {
+ const struct ilo_sampler_cso *sampler = samplers[i];
+ const struct pipe_sampler_view *view = views[i];
+ const uint32_t border_color = sampler_border_colors[i];
+ uint32_t dw_filter, dw_wrap;
+
+ /* there may be holes */
+ if (!sampler || !view) {
+ /* disabled sampler */
+ dw[0] = 1 << 31;
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
+ dw += 4;
+
+ continue;
+ }
- first = NULL;
- for (first_idx = 0; first_idx < state->nr_cbufs; first_idx++) {
- if (state->cbufs[first_idx]) {
- first = state->cbufs[first_idx];
+ /* determine filter and wrap modes */
+ switch (view->texture->target) {
+ case PIPE_TEXTURE_1D:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap_1d;
+ break;
+ case PIPE_TEXTURE_3D:
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 103:
+ *
+ * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
+ * surfaces of type SURFTYPE_3D."
+ */
+ dw_filter = sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap_cube;
break;
+ default:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap;
+ break;
+ }
+
+ dw[0] = sampler->payload[0];
+ dw[1] = sampler->payload[1];
+ assert(!(border_color & 0x1f));
+ dw[2] = border_color;
+ dw[3] = sampler->payload[2];
+
+ dw[0] |= dw_filter;
+
+ if (dev->gen >= ILO_GEN(7)) {
+ dw[3] |= dw_wrap;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 21:
+ *
+ * "[DevSNB] Errata: Incorrect behavior is observed in cases
+ * where the min and mag mode filters are different and
+ * SurfMinLOD is nonzero. The determination of MagMode uses the
+ * following equation instead of the one in the above
+ * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
+ *
+ * As a way to work around that, we set Base to
+ * view->u.tex.first_level.
+ */
+ dw[0] |= view->u.tex.first_level << 22;
+
+ dw[1] |= dw_wrap;
}
+
+ dw += 4;
}
- if (!first)
- first = state->zsbuf;
- fb->num_samples = (first) ? first->texture->nr_samples : 1;
- if (!fb->num_samples)
- fb->num_samples = 1;
+ return state_offset;
+}
+
+static uint32_t
+gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_sampler_cso *sampler,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
+ uint32_t state_offset, *dw;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
+ state_len, state_align, &state_offset);
+
+ /* see ilo_gpe_init_sampler_cso() */
+ memcpy(dw, &sampler->payload[3], state_len * 4);
- fb->offset_to_layers = false;
+ return state_offset;
+}
+static uint32_t
+gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
+ int size, void **pcb,
+ struct ilo_cp *cp)
+{
/*
- * The PRMs list several restrictions when the framebuffer has more than
- * one surface, but it seems they are lifted on GEN7+.
+ * For all VS, GS, FS, and CS push constant buffers, they must be aligned
+ * to 32 bytes, and their sizes are specified in 256-bit units.
*/
- num_surfaces = state->nr_cbufs + !!state->zsbuf;
+ const int state_align = 32 / 4;
+ const int state_len = align(size, 32) / 4;
+ uint32_t state_offset;
+ char *buf;
- if (dev->gen < ILO_GEN(7) && num_surfaces > 1) {
- const unsigned first_depth =
- (first->texture->target == PIPE_TEXTURE_3D) ?
- first->texture->depth0 :
- first->u.tex.last_layer - first->u.tex.first_layer + 1;
- bool has_3d_target = (first->texture->target == PIPE_TEXTURE_3D);
- unsigned i;
+ ILO_GPE_VALID_GEN(dev, 6, 7);
- for (i = first_idx + 1; i < num_surfaces; i++) {
- const struct pipe_surface *surf =
- (i < state->nr_cbufs) ? state->cbufs[i] : state->zsbuf;
- unsigned depth;
+ buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
+ state_len, state_align, &state_offset);
- if (!surf)
- continue;
+ /* zero out the unused range */
+ if (size < state_len * 4)
+ memset(&buf[size], 0, state_len * 4 - size);
- depth = (surf->texture->target == PIPE_TEXTURE_3D) ?
- surf->texture->depth0 :
- surf->u.tex.last_layer - surf->u.tex.first_layer + 1;
+ if (pcb)
+ *pcb = buf;
- has_3d_target |= (surf->texture->target == PIPE_TEXTURE_3D);
+ return state_offset;
+}
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 79:
- *
- * "The LOD of a render target must be the same as the LOD of the
- * other render target(s) and of the depth buffer (defined in
- * 3DSTATE_DEPTH_BUFFER)."
- *
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "The Depth of a render target must be the same as the Depth of
- * the other render target(s) and of the depth buffer (defined
- * in 3DSTATE_DEPTH_BUFFER)."
- */
- if (surf->u.tex.level != first->u.tex.level ||
- depth != first_depth) {
- fb->offset_to_layers = true;
- break;
- }
+static int
+gen6_estimate_command_size(const struct ilo_dev_info *dev,
+ enum ilo_gpe_gen6_command cmd,
+ int arg)
+{
+ static const struct {
+ int header;
+ int body;
+ } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
+ [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
+ [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
+ [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
+ [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
+ [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
+ [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
+ [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
+ [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
+ [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
+ [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
+ [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
+ [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
+ [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
+ [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
+ [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
+ [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
+ [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
+ [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
+ [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
+ [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
+ [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
+ [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
+ [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
+ [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
+ [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
+ [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
+ [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
+ [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
+ [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
+ [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
+ [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
+ [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
+ [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
+ [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
+ [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
+ [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
+ [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
+ [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
+ [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
+ [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
+ [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
+ };
+ const int header = gen6_command_size_table[cmd].header;
+ const int body = gen6_command_size_table[arg].body;
+ const int count = arg;
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 77:
- *
- * "The Height of a render target must be the same as the Height
- * of the other render targets and the depth buffer (defined in
- * 3DSTATE_DEPTH_BUFFER), unless Surface Type is SURFTYPE_1D or
- * SURFTYPE_2D with Depth = 0 (non-array) and LOD = 0 (non-mip
- * mapped)."
- *
- * From the Sandy Bridge PRM, volume 4 part 1, page 78:
- *
- * "The Width of a render target must be the same as the Width of
- * the other render target(s) and the depth buffer (defined in
- * 3DSTATE_DEPTH_BUFFER), unless Surface Type is SURFTYPE_1D or
- * SURFTYPE_2D with Depth = 0 (non-array) and LOD = 0 (non-mip
- * mapped)."
- */
- if (surf->texture->width0 != first->texture->width0 ||
- surf->texture->height0 != first->texture->height0) {
- if (has_3d_target || first->u.tex.level || first_depth > 1) {
- fb->offset_to_layers = true;
- break;
- }
- }
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
+
+ return (likely(count)) ? header + body * count : 0;
+}
+
+static int
+gen6_estimate_state_size(const struct ilo_dev_info *dev,
+ enum ilo_gpe_gen6_state state,
+ int arg)
+{
+ static const struct {
+ int alignment;
+ int body;
+ bool is_array;
+ } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
+ [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
+ [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
+ [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
+ [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
+ [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
+ [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
+ [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
+ [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
+ [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
+ [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
+ [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
+ [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
+ [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
+ };
+ const int alignment = gen6_state_size_table[state].alignment;
+ const int body = gen6_state_size_table[state].body;
+ const bool is_array = gen6_state_size_table[state].is_array;
+ const int count = arg;
+ int estimate;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(state < ILO_GPE_GEN6_STATE_COUNT);
+
+ if (likely(count)) {
+ if (is_array) {
+ estimate = (alignment - 1) + body * count;
}
+ else {
+ estimate = (alignment - 1) + body;
+ /* all states are aligned */
+ if (count > 1)
+ estimate += util_align_npot(body, alignment) * (count - 1);
+ }
+ }
+ else {
+ estimate = 0;
}
+
+ return estimate;
+}
+
+static const struct ilo_gpe_gen6 gen6_gpe = {
+ .estimate_command_size = gen6_estimate_command_size,
+ .estimate_state_size = gen6_estimate_state_size,
+
+#define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
+ GEN6_SET(STATE_BASE_ADDRESS),
+ GEN6_SET(STATE_SIP),
+ GEN6_SET(3DSTATE_VF_STATISTICS),
+ GEN6_SET(PIPELINE_SELECT),
+ GEN6_SET(MEDIA_VFE_STATE),
+ GEN6_SET(MEDIA_CURBE_LOAD),
+ GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
+ GEN6_SET(MEDIA_GATEWAY_STATE),
+ GEN6_SET(MEDIA_STATE_FLUSH),
+ GEN6_SET(MEDIA_OBJECT_WALKER),
+ GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
+ GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
+ GEN6_SET(3DSTATE_URB),
+ GEN6_SET(3DSTATE_VERTEX_BUFFERS),
+ GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
+ GEN6_SET(3DSTATE_INDEX_BUFFER),
+ GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
+ GEN6_SET(3DSTATE_CC_STATE_POINTERS),
+ GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
+ GEN6_SET(3DSTATE_VS),
+ GEN6_SET(3DSTATE_GS),
+ GEN6_SET(3DSTATE_CLIP),
+ GEN6_SET(3DSTATE_SF),
+ GEN6_SET(3DSTATE_WM),
+ GEN6_SET(3DSTATE_CONSTANT_VS),
+ GEN6_SET(3DSTATE_CONSTANT_GS),
+ GEN6_SET(3DSTATE_CONSTANT_PS),
+ GEN6_SET(3DSTATE_SAMPLE_MASK),
+ GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
+ GEN6_SET(3DSTATE_DEPTH_BUFFER),
+ GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
+ GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
+ GEN6_SET(3DSTATE_LINE_STIPPLE),
+ GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
+ GEN6_SET(3DSTATE_GS_SVB_INDEX),
+ GEN6_SET(3DSTATE_MULTISAMPLE),
+ GEN6_SET(3DSTATE_STENCIL_BUFFER),
+ GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
+ GEN6_SET(3DSTATE_CLEAR_PARAMS),
+ GEN6_SET(PIPE_CONTROL),
+ GEN6_SET(3DPRIMITIVE),
+ GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
+ GEN6_SET(SF_VIEWPORT),
+ GEN6_SET(CLIP_VIEWPORT),
+ GEN6_SET(CC_VIEWPORT),
+ GEN6_SET(COLOR_CALC_STATE),
+ GEN6_SET(BLEND_STATE),
+ GEN6_SET(DEPTH_STENCIL_STATE),
+ GEN6_SET(SCISSOR_RECT),
+ GEN6_SET(BINDING_TABLE_STATE),
+ GEN6_SET(SURFACE_STATE),
+ GEN6_SET(so_SURFACE_STATE),
+ GEN6_SET(SAMPLER_STATE),
+ GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
+ GEN6_SET(push_constant_buffer),
+#undef GEN6_SET
+};
+
+const struct ilo_gpe_gen6 *
+ilo_gpe_gen6_get(void)
+{
+ return &gen6_gpe;
}
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h
index e5647184f..7e24f97b8 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h
@@ -28,2498 +28,533 @@
#ifndef ILO_GPE_GEN6_H
#define ILO_GPE_GEN6_H
-#include "genhw/genhw.h"
-#include "intel_winsys.h"
-
#include "ilo_common.h"
-#include "ilo_cp.h"
-#include "ilo_format.h"
-#include "ilo_resource.h"
-#include "ilo_shader.h"
#include "ilo_gpe.h"
#define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
-#define ILO_GPE_MI(op) (0x0 << 29 | (op) << 23)
-
#define ILO_GPE_CMD(pipeline, op, subop) \
(0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
/**
- * Translate winsys tiling to hardware tiling.
- */
-static inline int
-ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
-{
- switch (tiling) {
- case INTEL_TILING_NONE:
- return GEN6_TILING_NONE;
- case INTEL_TILING_X:
- return GEN6_TILING_X;
- case INTEL_TILING_Y:
- return GEN6_TILING_Y;
- default:
- assert(!"unknown tiling");
- return GEN6_TILING_NONE;
- }
-}
-
-/**
- * Translate a pipe primitive type to the matching hardware primitive type.
+ * Commands that GEN6 GPE could emit.
*/
-static inline int
-ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
-{
- static const int prim_mapping[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST,
- [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST,
- [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP,
- [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP,
- [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST,
- [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN,
- [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST,
- [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP,
- [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON,
- [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ,
- [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ,
- [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ,
- [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ,
- };
-
- assert(prim_mapping[prim]);
-
- return prim_mapping[prim];
-}
+enum ilo_gpe_gen6_command {
+ ILO_GPE_GEN6_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */
+ ILO_GPE_GEN6_STATE_SIP, /* (0x0, 0x1, 0x02) */
+ ILO_GPE_GEN6_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */
+ ILO_GPE_GEN6_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */
+ ILO_GPE_GEN6_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */
+ ILO_GPE_GEN6_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */
+ ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */
+ ILO_GPE_GEN6_MEDIA_GATEWAY_STATE, /* (0x2, 0x0, 0x03) */
+ ILO_GPE_GEN6_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */
+ ILO_GPE_GEN6_MEDIA_OBJECT_WALKER, /* (0x2, 0x1, 0x03) */
+ ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS, /* (0x3, 0x0, 0x01) */
+ ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS, /* (0x3, 0x0, 0x02) */
+ ILO_GPE_GEN6_3DSTATE_URB, /* (0x3, 0x0, 0x05) */
+ ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */
+ ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */
+ ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */
+ ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS, /* (0x3, 0x0, 0x0d) */
+ ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */
+ ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */
+ ILO_GPE_GEN6_3DSTATE_VS, /* (0x3, 0x0, 0x10) */
+ ILO_GPE_GEN6_3DSTATE_GS, /* (0x3, 0x0, 0x11) */
+ ILO_GPE_GEN6_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */
+ ILO_GPE_GEN6_3DSTATE_SF, /* (0x3, 0x0, 0x13) */
+ ILO_GPE_GEN6_3DSTATE_WM, /* (0x3, 0x0, 0x14) */
+ ILO_GPE_GEN6_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */
+ ILO_GPE_GEN6_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */
+ ILO_GPE_GEN6_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */
+ ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */
+ ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */
+ ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x1, 0x05) */
+ ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */
+ ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */
+ ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */
+ ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */
+ ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX, /* (0x3, 0x1, 0x0b) */
+ ILO_GPE_GEN6_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */
+ ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x1, 0x0e) */
+ ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x1, 0x0f) */
+ ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x1, 0x10) */
+ ILO_GPE_GEN6_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */
+ ILO_GPE_GEN6_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */
+
+ ILO_GPE_GEN6_COMMAND_COUNT,
+};
/**
- * Translate a pipe texture target to the matching hardware surface type.
+ * Indirect states that GEN6 GPE could emit.
*/
-static inline int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
-{
- switch (target) {
- case PIPE_BUFFER:
- return GEN6_SURFTYPE_BUFFER;
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_CUBE;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_BUFFER;
- }
-}
+enum ilo_gpe_gen6_state {
+ ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA,
+ ILO_GPE_GEN6_SF_VIEWPORT,
+ ILO_GPE_GEN6_CLIP_VIEWPORT,
+ ILO_GPE_GEN6_CC_VIEWPORT,
+ ILO_GPE_GEN6_COLOR_CALC_STATE,
+ ILO_GPE_GEN6_BLEND_STATE,
+ ILO_GPE_GEN6_DEPTH_STENCIL_STATE,
+ ILO_GPE_GEN6_SCISSOR_RECT,
+ ILO_GPE_GEN6_BINDING_TABLE_STATE,
+ ILO_GPE_GEN6_SURFACE_STATE,
+ ILO_GPE_GEN6_SAMPLER_STATE,
+ ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE,
+ ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER,
+
+ ILO_GPE_GEN6_STATE_COUNT,
+};
+
+enum intel_tiling_mode;
+
+struct intel_bo;
+struct ilo_cp;
+struct ilo_texture;
+struct ilo_shader;
+
+typedef void
+(*ilo_gpe_gen6_STATE_BASE_ADDRESS)(const struct ilo_dev_info *dev,
+ struct intel_bo *general_state_bo,
+ struct intel_bo *surface_state_bo,
+ struct intel_bo *dynamic_state_bo,
+ struct intel_bo *indirect_object_bo,
+ struct intel_bo *instruction_bo,
+ uint32_t general_state_size,
+ uint32_t dynamic_state_size,
+ uint32_t indirect_object_size,
+ uint32_t instruction_size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_STATE_SIP)(const struct ilo_dev_info *dev,
+ uint32_t sip,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_VF_STATISTICS)(const struct ilo_dev_info *dev,
+ bool enable,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_PIPELINE_SELECT)(const struct ilo_dev_info *dev,
+ int pipeline,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_MEDIA_VFE_STATE)(const struct ilo_dev_info *dev,
+ int max_threads, int num_urb_entries,
+ int urb_entry_size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_MEDIA_CURBE_LOAD)(const struct ilo_dev_info *dev,
+ uint32_t buf, int size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD)(const struct ilo_dev_info *dev,
+ uint32_t offset, int num_ids,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_MEDIA_GATEWAY_STATE)(const struct ilo_dev_info *dev,
+ int id, int byte, int thread_count,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_MEDIA_STATE_FLUSH)(const struct ilo_dev_info *dev,
+ int thread_count_water_mark,
+ int barrier_mask,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_MEDIA_OBJECT_WALKER)(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_BINDING_TABLE_POINTERS)(const struct ilo_dev_info *dev,
+ uint32_t vs_binding_table,
+ uint32_t gs_binding_table,
+ uint32_t ps_binding_table,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_SAMPLER_STATE_POINTERS)(const struct ilo_dev_info *dev,
+ uint32_t vs_sampler_state,
+ uint32_t gs_sampler_state,
+ uint32_t ps_sampler_state,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_URB)(const struct ilo_dev_info *dev,
+ int vs_total_size, int gs_total_size,
+ int vs_entry_size, int gs_entry_size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS)(const struct ilo_dev_info *dev,
+ const struct pipe_vertex_buffer *vbuffers,
+ uint64_t vbuffer_mask,
+ const struct ilo_ve_state *ve,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS)(const struct ilo_dev_info *dev,
+ const struct ilo_ve_state *ve,
+ bool last_velement_edgeflag,
+ bool prepend_generated_ids,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_INDEX_BUFFER)(const struct ilo_dev_info *dev,
+ const struct ilo_ib_state *ib,
+ bool enable_cut_index,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_VIEWPORT_STATE_POINTERS)(const struct ilo_dev_info *dev,
+ uint32_t clip_viewport,
+ uint32_t sf_viewport,
+ uint32_t cc_viewport,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_CC_STATE_POINTERS)(const struct ilo_dev_info *dev,
+ uint32_t blend_state,
+ uint32_t depth_stencil_state,
+ uint32_t color_calc_state,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_SCISSOR_STATE_POINTERS)(const struct ilo_dev_info *dev,
+ uint32_t scissor_rect,
+ struct ilo_cp *cp);
+
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_VS)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *vs,
+ int num_samplers,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ const struct ilo_shader_state *vs,
+ int verts_per_prim,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_CLIP)(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ bool enable_guardband,
+ int num_viewports,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_SF)(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_WM)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ int num_samplers,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool dual_blend, bool cc_may_kill,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_CONSTANT_VS)(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_CONSTANT_GS)(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_CONSTANT_PS)(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_SAMPLE_MASK)(const struct ilo_dev_info *dev,
+ unsigned sample_mask,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_DRAWING_RECTANGLE)(const struct ilo_dev_info *dev,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_DEPTH_BUFFER)(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_OFFSET)(const struct ilo_dev_info *dev,
+ int x_offset, int y_offset,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_PATTERN)(const struct ilo_dev_info *dev,
+ const struct pipe_poly_stipple *pattern,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_LINE_STIPPLE)(const struct ilo_dev_info *dev,
+ unsigned pattern, unsigned factor,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_AA_LINE_PARAMETERS)(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_GS_SVB_INDEX)(const struct ilo_dev_info *dev,
+ int index, unsigned svbi,
+ unsigned max_svbi,
+ bool load_vertex_count,
+ struct ilo_cp *cp);
+
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_MULTISAMPLE)(const struct ilo_dev_info *dev,
+ int num_samples,
+ const uint32_t *packed_sample_pos,
+ bool pixel_location_center,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_STENCIL_BUFFER)(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_HIER_DEPTH_BUFFER)(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DSTATE_CLEAR_PARAMS)(const struct ilo_dev_info *dev,
+ uint32_t clear_val,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_PIPE_CONTROL)(const struct ilo_dev_info *dev,
+ uint32_t dw1,
+ struct intel_bo *bo, uint32_t bo_offset,
+ bool write_qword,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen6_3DPRIMITIVE)(const struct ilo_dev_info *dev,
+ const struct pipe_draw_info *info,
+ const struct ilo_ib_state *ib,
+ bool rectlist,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state **cs,
+ uint32_t *sampler_state,
+ int *num_samplers,
+ uint32_t *binding_table_state,
+ int *num_surfaces,
+ int num_ids,
+ struct ilo_cp *cp);
+typedef uint32_t
+(*ilo_gpe_gen6_SF_VIEWPORT)(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_CLIP_VIEWPORT)(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_CC_VIEWPORT)(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_COLOR_CALC_STATE)(const struct ilo_dev_info *dev,
+ const struct pipe_stencil_ref *stencil_ref,
+ float alpha_ref,
+ const struct pipe_blend_color *blend_color,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_BLEND_STATE)(const struct ilo_dev_info *dev,
+ const struct ilo_blend_state *blend,
+ const struct ilo_fb_state *fb,
+ const struct pipe_alpha_state *alpha,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_DEPTH_STENCIL_STATE)(const struct ilo_dev_info *dev,
+ const struct ilo_dsa_state *dsa,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_SCISSOR_RECT)(const struct ilo_dev_info *dev,
+ const struct ilo_scissor_state *scissor,
+ unsigned num_viewports,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_BINDING_TABLE_STATE)(const struct ilo_dev_info *dev,
+ uint32_t *surface_states,
+ int num_surface_states,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_SURFACE_STATE)(const struct ilo_dev_info *dev,
+ const struct ilo_view_surface *surface,
+ bool for_render,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_so_SURFACE_STATE)(const struct ilo_dev_info *dev,
+ const struct pipe_stream_output_target *so,
+ const struct pipe_stream_output_info *so_info,
+ int so_index,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_SAMPLER_STATE)(const struct ilo_dev_info *dev,
+ const struct ilo_sampler_cso * const *samplers,
+ const struct pipe_sampler_view * const *views,
+ const uint32_t *sampler_border_colors,
+ int num_samplers,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE)(const struct ilo_dev_info *dev,
+ const struct ilo_sampler_cso *sampler,
+ struct ilo_cp *cp);
+
+typedef uint32_t
+(*ilo_gpe_gen6_push_constant_buffer)(const struct ilo_dev_info *dev,
+ int size, void **pcb,
+ struct ilo_cp *cp);
/**
- * Fill in DW2 to DW7 of 3DSTATE_SF.
+ * GEN6 graphics processing engine
+ *
+ * This is a low-level interface. It does not handle the interdependencies
+ * between states.
*/
-static inline void
+struct ilo_gpe_gen6 {
+ int (*estimate_command_size)(const struct ilo_dev_info *dev,
+ enum ilo_gpe_gen6_command cmd,
+ int arg);
+
+ int (*estimate_state_size)(const struct ilo_dev_info *dev,
+ enum ilo_gpe_gen6_state state,
+ int arg);
+
+#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name emit_ ## name
+ GEN6_EMIT(STATE_BASE_ADDRESS);
+ GEN6_EMIT(STATE_SIP);
+ GEN6_EMIT(3DSTATE_VF_STATISTICS);
+ GEN6_EMIT(PIPELINE_SELECT);
+ GEN6_EMIT(MEDIA_VFE_STATE);
+ GEN6_EMIT(MEDIA_CURBE_LOAD);
+ GEN6_EMIT(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
+ GEN6_EMIT(MEDIA_GATEWAY_STATE);
+ GEN6_EMIT(MEDIA_STATE_FLUSH);
+ GEN6_EMIT(MEDIA_OBJECT_WALKER);
+ GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS);
+ GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS);
+ GEN6_EMIT(3DSTATE_URB);
+ GEN6_EMIT(3DSTATE_VERTEX_BUFFERS);
+ GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS);
+ GEN6_EMIT(3DSTATE_INDEX_BUFFER);
+ GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS);
+ GEN6_EMIT(3DSTATE_CC_STATE_POINTERS);
+ GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS);
+ GEN6_EMIT(3DSTATE_VS);
+ GEN6_EMIT(3DSTATE_GS);
+ GEN6_EMIT(3DSTATE_CLIP);
+ GEN6_EMIT(3DSTATE_SF);
+ GEN6_EMIT(3DSTATE_WM);
+ GEN6_EMIT(3DSTATE_CONSTANT_VS);
+ GEN6_EMIT(3DSTATE_CONSTANT_GS);
+ GEN6_EMIT(3DSTATE_CONSTANT_PS);
+ GEN6_EMIT(3DSTATE_SAMPLE_MASK);
+ GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE);
+ GEN6_EMIT(3DSTATE_DEPTH_BUFFER);
+ GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET);
+ GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN);
+ GEN6_EMIT(3DSTATE_LINE_STIPPLE);
+ GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS);
+ GEN6_EMIT(3DSTATE_GS_SVB_INDEX);
+ GEN6_EMIT(3DSTATE_MULTISAMPLE);
+ GEN6_EMIT(3DSTATE_STENCIL_BUFFER);
+ GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER);
+ GEN6_EMIT(3DSTATE_CLEAR_PARAMS);
+ GEN6_EMIT(PIPE_CONTROL);
+ GEN6_EMIT(3DPRIMITIVE);
+ GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA);
+ GEN6_EMIT(SF_VIEWPORT);
+ GEN6_EMIT(CLIP_VIEWPORT);
+ GEN6_EMIT(CC_VIEWPORT);
+ GEN6_EMIT(COLOR_CALC_STATE);
+ GEN6_EMIT(BLEND_STATE);
+ GEN6_EMIT(DEPTH_STENCIL_STATE);
+ GEN6_EMIT(SCISSOR_RECT);
+ GEN6_EMIT(BINDING_TABLE_STATE);
+ GEN6_EMIT(SURFACE_STATE);
+ GEN6_EMIT(so_SURFACE_STATE);
+ GEN6_EMIT(SAMPLER_STATE);
+ GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE);
+ GEN6_EMIT(push_constant_buffer);
+#undef GEN6_EMIT
+};
+
+const struct ilo_gpe_gen6 *
+ilo_gpe_gen6_get(void);
+
+/* Below are helpers for other GENs */
+
+int
+ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling);
+
+int
+ilo_gpe_gen6_translate_pipe_prim(unsigned prim);
+
+int
+ilo_gpe_gen6_translate_texture(enum pipe_texture_target target);
+
+void
ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
int num_samples,
enum pipe_format depth_format,
- uint32_t *payload, unsigned payload_len)
-{
- assert(payload_len == Elements(rasterizer->sf.payload));
-
- if (rasterizer) {
- const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
-
- memcpy(payload, sf->payload, sizeof(sf->payload));
- if (num_samples > 1)
- payload[1] |= sf->dw_msaa;
- }
- else {
- payload[0] = 0;
- payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0;
- payload[2] = 0;
- payload[3] = 0;
- payload[4] = 0;
- payload[5] = 0;
- }
-
- if (dev->gen >= ILO_GEN(7)) {
- int format;
-
- /* separate stencil */
- switch (depth_format) {
- case PIPE_FORMAT_Z16_UNORM:
- format = GEN6_ZFORMAT_D16_UNORM;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- break;
- default:
- /* FLOAT surface is assumed when there is no depth buffer */
- format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- }
-
- payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
- }
-}
+ uint32_t *payload, unsigned payload_len);
-/**
- * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
- */
-static inline void
+void
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
- uint32_t *dw, int num_dwords)
-{
- int output_count, vue_offset, vue_len;
- const struct ilo_kernel_routing *routing;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
- assert(num_dwords == 13);
-
- if (!fs) {
- memset(dw, 0, sizeof(dw[0]) * num_dwords);
- dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
- return;
- }
-
- output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
- assert(output_count <= 32);
-
- routing = ilo_shader_get_kernel_routing(fs);
-
- vue_offset = routing->source_skip;
- assert(vue_offset % 2 == 0);
- vue_offset /= 2;
-
- vue_len = (routing->source_len + 1) / 2;
- if (!vue_len)
- vue_len = 1;
-
- dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
- vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT |
- vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
- if (routing->swizzle_enable)
- dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
-
- switch (rasterizer->state.sprite_coord_mode) {
- case PIPE_SPRITE_COORD_UPPER_LEFT:
- dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
- break;
- case PIPE_SPRITE_COORD_LOWER_LEFT:
- dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
- break;
- }
-
- STATIC_ASSERT(Elements(routing->swizzles) >= 16);
- memcpy(&dw[1], routing->swizzles, 2 * 16);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 268:
- *
- * "This field (Point Sprite Texture Coordinate Enable) must be
- * programmed to 0 when non-point primitives are rendered."
- *
- * TODO We do not check that yet.
- */
- dw[9] = routing->point_sprite_enable;
-
- dw[10] = routing->const_interp_enable;
-
- /* WrapShortest enables */
- dw[11] = 0;
- dw[12] = 0;
-}
-
-static inline void
-gen6_emit_MI_STORE_DATA_IMM(const struct ilo_dev_info *dev,
- struct intel_bo *bo, uint32_t bo_offset,
- uint64_t val, bool store_qword,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_MI(0x20);
- const uint8_t cmd_len = (store_qword) ? 5 : 4;
- /* must use GGTT on GEN6 as in PIPE_CONTROL */
- const uint32_t cmd_flags = (dev->gen == ILO_GEN(6)) ? (1 << 22) : 0;
- const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
- const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- assert(bo_offset % ((store_qword) ? 8 : 4) == 0);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | cmd_flags | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
- ilo_cp_write(cp, (uint32_t) val);
-
- if (store_qword)
- ilo_cp_write(cp, (uint32_t) (val >> 32));
- else
- assert(val == (uint64_t) ((uint32_t) val));
-
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MI_LOAD_REGISTER_IMM(const struct ilo_dev_info *dev,
- uint32_t reg, uint32_t val,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_MI(0x22);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- assert(reg % 4 == 0);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, reg);
- ilo_cp_write(cp, val);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MI_STORE_REGISTER_MEM(const struct ilo_dev_info *dev,
- struct intel_bo *bo, uint32_t bo_offset,
- uint32_t reg, struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_MI(0x24);
- const uint8_t cmd_len = 3;
- /* must use GGTT on GEN6 as in PIPE_CONTROL */
- const uint32_t cmd_flags = (dev->gen == ILO_GEN(6)) ? (1 << 22) : 0;
- const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
- const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- assert(reg % 4 == 0 && bo_offset % 4 == 0);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | cmd_flags | (cmd_len - 2));
- ilo_cp_write(cp, reg);
- ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MI_REPORT_PERF_COUNT(const struct ilo_dev_info *dev,
- struct intel_bo *bo, uint32_t bo_offset,
- uint32_t report_id, struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_MI(0x28);
- const uint8_t cmd_len = 3;
- const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
- const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- assert(bo_offset % 64 == 0);
-
- /* must use GGTT on GEN6 as in PIPE_CONTROL */
- if (dev->gen == ILO_GEN(6))
- bo_offset |= 0x1;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
- ilo_cp_write(cp, report_id);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
- struct intel_bo *general_state_bo,
- struct intel_bo *surface_state_bo,
- struct intel_bo *dynamic_state_bo,
- struct intel_bo *indirect_object_bo,
- struct intel_bo *instruction_bo,
- uint32_t general_state_size,
- uint32_t dynamic_state_size,
- uint32_t indirect_object_size,
- uint32_t instruction_size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
- const uint8_t cmd_len = 10;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /* 4K-page aligned */
- assert(((general_state_size | dynamic_state_size |
- indirect_object_size | instruction_size) & 0xfff) == 0);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
-
- ilo_cp_write_bo(cp, 1, general_state_bo,
- INTEL_DOMAIN_RENDER,
- 0);
- ilo_cp_write_bo(cp, 1, surface_state_bo,
- INTEL_DOMAIN_SAMPLER,
- 0);
- ilo_cp_write_bo(cp, 1, dynamic_state_bo,
- INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
- 0);
- ilo_cp_write_bo(cp, 1, indirect_object_bo,
- 0,
- 0);
- ilo_cp_write_bo(cp, 1, instruction_bo,
- INTEL_DOMAIN_INSTRUCTION,
- 0);
-
- if (general_state_size) {
- ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
- INTEL_DOMAIN_RENDER,
- 0);
- }
- else {
- /* skip range check */
- ilo_cp_write(cp, 1);
- }
-
- if (dynamic_state_size) {
- ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
- INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
- 0);
- }
- else {
- /* skip range check */
- ilo_cp_write(cp, 0xfffff000 + 1);
- }
-
- if (indirect_object_size) {
- ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
- 0,
- 0);
- }
- else {
- /* skip range check */
- ilo_cp_write(cp, 0xfffff000 + 1);
- }
-
- if (instruction_size) {
- ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
- INTEL_DOMAIN_INSTRUCTION,
- 0);
- }
- else {
- /* skip range check */
- ilo_cp_write(cp, 1);
- }
-
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
- uint32_t sip,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, sip);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
- bool enable,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
- const uint8_t cmd_len = 1;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | enable);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
- int pipeline,
- struct ilo_cp *cp)
-{
- const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
- const uint8_t cmd_len = 1;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /* 3D or media */
- assert(pipeline == 0x0 || pipeline == 0x1);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | pipeline);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
- int max_threads, int num_urb_entries,
- int urb_entry_size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
- const uint8_t cmd_len = 8;
- uint32_t dw2, dw4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- dw2 = (max_threads - 1) << 16 |
- num_urb_entries << 8 |
- 1 << 7 | /* Reset Gateway Timer */
- 1 << 6; /* Bypass Gateway Control */
-
- dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
- 480; /* CURBE Allocation Size */
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* MBZ */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, 0); /* scoreboard */
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
- uint32_t buf, int size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- assert(buf % 32 == 0);
- /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
- size = align(size, 32);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0); /* MBZ */
- ilo_cp_write(cp, size);
- ilo_cp_write(cp, buf);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
- uint32_t offset, int num_ids,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- assert(offset % 32 == 0);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0); /* MBZ */
- /* every ID has 8 DWords */
- ilo_cp_write(cp, num_ids * 8 * 4);
- ilo_cp_write(cp, offset);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
- int id, int byte, int thread_count,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
- const uint8_t cmd_len = 2;
- uint32_t dw1;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- dw1 = id << 16 |
- byte << 8 |
- thread_count;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
- int thread_count_water_mark,
- int barrier_mask,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
- const uint8_t cmd_len = 2;
- uint32_t dw1;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- dw1 = thread_count_water_mark << 16 |
- barrier_mask;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
- struct ilo_cp *cp)
-{
- assert(!"MEDIA_OBJECT_WALKER unsupported");
-}
-
-static inline void
-gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t vs_binding_table,
- uint32_t gs_binding_table,
- uint32_t ps_binding_table,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED |
- GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED |
- GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED);
- ilo_cp_write(cp, vs_binding_table);
- ilo_cp_write(cp, gs_binding_table);
- ilo_cp_write(cp, ps_binding_table);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t vs_sampler_state,
- uint32_t gs_sampler_state,
- uint32_t ps_sampler_state,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- GEN6_PTR_SAMPLER_DW0_VS_CHANGED |
- GEN6_PTR_SAMPLER_DW0_GS_CHANGED |
- GEN6_PTR_SAMPLER_DW0_PS_CHANGED);
- ilo_cp_write(cp, vs_sampler_state);
- ilo_cp_write(cp, gs_sampler_state);
- ilo_cp_write(cp, ps_sampler_state);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
- int vs_total_size, int gs_total_size,
- int vs_entry_size, int gs_entry_size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
- const uint8_t cmd_len = 3;
- const int row_size = 128; /* 1024 bits */
- int vs_alloc_size, gs_alloc_size;
- int vs_num_entries, gs_num_entries;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- /* in 1024-bit URB rows */
- vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
- gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-
- /* the valid range is [1, 5] */
- if (!vs_alloc_size)
- vs_alloc_size = 1;
- if (!gs_alloc_size)
- gs_alloc_size = 1;
- assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
-
- /* the valid range is [24, 256] in multiples of 4 */
- vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
- if (vs_num_entries > 256)
- vs_num_entries = 256;
- assert(vs_num_entries >= 24);
-
- /* the valid range is [0, 256] in multiples of 4 */
- gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
- if (gs_num_entries > 256)
- gs_num_entries = 256;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
- vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT);
- ilo_cp_write(cp, gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
- (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
- const struct ilo_ve_state *ve,
- const struct ilo_vb_state *vb,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
- uint8_t cmd_len;
- unsigned hw_idx;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 82:
- *
- * "From 1 to 33 VBs can be specified..."
- */
- assert(ve->vb_count <= 33);
-
- if (!ve->vb_count)
- return;
-
- cmd_len = 1 + 4 * ve->vb_count;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
-
- for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
- const unsigned instance_divisor = ve->instance_divisors[hw_idx];
- const unsigned pipe_idx = ve->vb_mapping[hw_idx];
- const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
- uint32_t dw;
-
- dw = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
-
- if (instance_divisor)
- dw |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
- else
- dw |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
-
- if (dev->gen >= ILO_GEN(7))
- dw |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
-
- /* use null vb if there is no buffer or the stride is out of range */
- if (cso->buffer && cso->stride <= 2048) {
- const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
- const uint32_t start_offset = cso->buffer_offset;
- const uint32_t end_offset = buf->bo_size - 1;
-
- dw |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
-
- ilo_cp_write(cp, dw);
- ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
- ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
- ilo_cp_write(cp, instance_divisor);
- }
- else {
- dw |= 1 << 13;
-
- ilo_cp_write(cp, dw);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, instance_divisor);
- }
- }
-
- ilo_cp_end(cp);
-}
-
-static inline void
-ve_init_cso_with_components(const struct ilo_dev_info *dev,
- int comp0, int comp1, int comp2, int comp3,
- struct ilo_ve_cso *cso)
-{
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- STATIC_ASSERT(Elements(cso->payload) >= 2);
- cso->payload[0] = GEN6_VE_STATE_DW0_VALID;
- cso->payload[1] =
- comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT |
- comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT |
- comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT |
- comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT;
-}
-
-static inline void
-ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
- struct ilo_ve_cso *cso)
-{
- int format;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 94:
- *
- * "- This bit (Edge Flag Enable) must only be ENABLED on the last
- * valid VERTEX_ELEMENT structure.
- *
- * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
- * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
- *
- * - The Source Element Format must be set to the UINT format.
- *
- * - [DevSNB]: Edge Flags are not supported for QUADLIST
- * primitives. Software may elect to convert QUADLIST primitives
- * to some set of corresponding edge-flag-supported primitive
- * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
- */
-
- cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE;
- cso->payload[1] =
- GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT;
-
- /*
- * Edge flags have format GEN6_FORMAT_R8_UINT when defined via
- * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
- * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
- *
- * Since all the hardware cares about is whether the flags are zero or not,
- * we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
- */
- format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff;
- if (format == GEN6_FORMAT_R32_FLOAT) {
- STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1);
- cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT);
- }
- else {
- assert(format == GEN6_FORMAT_R8_UINT);
- }
-}
-
-static inline void
-gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
- const struct ilo_ve_state *ve,
- bool last_velement_edgeflag,
- bool prepend_generated_ids,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
- uint8_t cmd_len;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 93:
- *
- * "Up to 34 (DevSNB+) vertex elements are supported."
- */
- assert(ve->count + prepend_generated_ids <= 34);
-
- if (!ve->count && !prepend_generated_ids) {
- struct ilo_ve_cso dummy;
-
- ve_init_cso_with_components(dev,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_1_FP,
- &dummy);
-
- cmd_len = 3;
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_multi(cp, dummy.payload, 2);
- ilo_cp_end(cp);
-
- return;
- }
-
- cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
-
- if (prepend_generated_ids) {
- struct ilo_ve_cso gen_ids;
-
- ve_init_cso_with_components(dev,
- GEN6_VFCOMP_STORE_VID,
- GEN6_VFCOMP_STORE_IID,
- GEN6_VFCOMP_NOSTORE,
- GEN6_VFCOMP_NOSTORE,
- &gen_ids);
-
- ilo_cp_write_multi(cp, gen_ids.payload, 2);
- }
-
- if (last_velement_edgeflag) {
- struct ilo_ve_cso edgeflag;
-
- for (i = 0; i < ve->count - 1; i++)
- ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
-
- edgeflag = ve->cso[i];
- ve_set_cso_edgeflag(dev, &edgeflag);
- ilo_cp_write_multi(cp, edgeflag.payload, 2);
- }
- else {
- for (i = 0; i < ve->count; i++)
- ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
- }
-
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
- const struct ilo_ib_state *ib,
- bool enable_cut_index,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
- const uint8_t cmd_len = 3;
- struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
- uint32_t start_offset, end_offset;
- int format;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- if (!buf)
- return;
-
- /* this is moved to the new 3DSTATE_VF */
- if (dev->gen >= ILO_GEN(7.5))
- assert(!enable_cut_index);
-
- switch (ib->hw_index_size) {
- case 4:
- format = GEN6_IB_DW0_FORMAT_DWORD;
- break;
- case 2:
- format = GEN6_IB_DW0_FORMAT_WORD;
- break;
- case 1:
- format = GEN6_IB_DW0_FORMAT_BYTE;
- break;
- default:
- assert(!"unknown index size");
- format = GEN6_IB_DW0_FORMAT_BYTE;
- break;
- }
-
- /*
- * set start_offset to 0 here and adjust pipe_draw_info::start with
- * ib->draw_start_offset in 3DPRIMITIVE
- */
- start_offset = 0;
- end_offset = buf->bo_size;
-
- /* end_offset must also be aligned and is inclusive */
- end_offset -= (end_offset % ib->hw_index_size);
- end_offset--;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- ((enable_cut_index) ? GEN6_IB_DW0_CUT_INDEX_ENABLE : 0) |
- format);
- ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
- ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t clip_viewport,
- uint32_t sf_viewport,
- uint32_t cc_viewport,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- GEN6_PTR_VP_DW0_CLIP_CHANGED |
- GEN6_PTR_VP_DW0_SF_CHANGED |
- GEN6_PTR_VP_DW0_CC_CHANGED);
- ilo_cp_write(cp, clip_viewport);
- ilo_cp_write(cp, sf_viewport);
- ilo_cp_write(cp, cc_viewport);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t blend_state,
- uint32_t depth_stencil_state,
- uint32_t color_calc_state,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, blend_state | 1);
- ilo_cp_write(cp, depth_stencil_state | 1);
- ilo_cp_write(cp, color_calc_state | 1);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t scissor_rect,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, scissor_rect);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *vs,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
- const uint8_t cmd_len = 6;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- if (!vs) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
- return;
- }
-
- cso = ilo_shader_get_kernel_cso(vs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *gs,
- const struct ilo_shader_state *vs,
- int verts_per_prim,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
- const uint8_t cmd_len = 7;
- uint32_t dw1, dw2, dw4, dw5, dw6;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- if (gs) {
- const struct ilo_shader_cso *cso;
-
- dw1 = ilo_shader_get_kernel_offset(gs);
-
- cso = ilo_shader_get_kernel_cso(gs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
- dw6 = cso->payload[3];
- }
- else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
- struct ilo_shader_cso cso;
- enum ilo_kernel_param param;
-
- switch (verts_per_prim) {
- case 1:
- param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
- break;
- case 2:
- param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
- break;
- default:
- param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
- break;
- }
-
- dw1 = ilo_shader_get_kernel_offset(vs) +
- ilo_shader_get_kernel_param(vs, param);
-
- /* cannot use VS's CSO */
- ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
- dw2 = cso.payload[0];
- dw4 = cso.payload[1];
- dw5 = cso.payload[2];
- dw6 = cso.payload[3];
- }
- else {
- dw1 = 0;
- dw2 = 0;
- dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
- dw5 = GEN6_GS_DW5_STATISTICS;
- dw6 = 0;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_write(cp, dw6);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- bool enable_guardband,
- int num_viewports,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
- const uint8_t cmd_len = 4;
- uint32_t dw1, dw2, dw3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- if (rasterizer) {
- int interps;
-
- dw1 = rasterizer->clip.payload[0];
- dw2 = rasterizer->clip.payload[1];
- dw3 = rasterizer->clip.payload[2];
-
- if (enable_guardband && rasterizer->clip.can_enable_guardband)
- dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
-
- interps = (fs) ? ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
-
- if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
- GEN6_INTERP_NONPERSPECTIVE_CENTROID |
- GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
- dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
-
- dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO |
- (num_viewports - 1);
- }
- else {
- dw1 = 0;
- dw2 = 0;
- dw3 = 0;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, dw3);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
- const uint8_t cmd_len = 20;
- uint32_t payload_raster[6], payload_sbe[13];
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
- 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
- ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
- fs, payload_sbe, Elements(payload_sbe));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, payload_sbe[0]);
- ilo_cp_write_multi(cp, payload_raster, 6);
- ilo_cp_write_multi(cp, &payload_sbe[1], 12);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *fs,
- int num_samplers,
- const struct ilo_rasterizer_state *rasterizer,
- bool dual_blend, bool cc_may_kill,
- uint32_t hiz_op,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
- const uint8_t cmd_len = 9;
- const int num_samples = 1;
- const struct ilo_shader_cso *fs_cso;
- uint32_t dw2, dw4, dw5, dw6;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- if (!fs) {
- /* see brwCreateContext() */
- const int max_threads = (dev->gt == 2) ? 80 : 40;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, hiz_op);
- /* honor the valid range even if dispatching is disabled */
- ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
- return;
- }
-
- fs_cso = ilo_shader_get_kernel_cso(fs);
- dw2 = fs_cso->payload[0];
- dw4 = fs_cso->payload[1];
- dw5 = fs_cso->payload[2];
- dw6 = fs_cso->payload[3];
-
- dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "This bit (Statistics Enable) must be disabled if either of these
- * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
- * Enable or Depth Buffer Resolve Enable."
- */
- assert(!hiz_op);
- dw4 |= GEN6_WM_DW4_STATISTICS;
-
- if (cc_may_kill)
- dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE;
-
- if (dual_blend)
- dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND;
-
- dw5 |= rasterizer->wm.payload[0];
-
- dw6 |= rasterizer->wm.payload[1];
-
- if (num_samples > 1) {
- dw6 |= rasterizer->wm.dw_msaa_rast |
- rasterizer->wm.dw_msaa_disp;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_write(cp, dw6);
- ilo_cp_write(cp, 0); /* kernel 1 */
- ilo_cp_write(cp, 0); /* kernel 2 */
- ilo_cp_end(cp);
-}
-
-static inline unsigned
-gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs, int max_read_length,
- uint32_t *dw, int num_dwords)
-{
- unsigned enabled = 0x0;
- int total_read_length, i;
-
- assert(num_dwords == 4);
-
- total_read_length = 0;
- for (i = 0; i < 4; i++) {
- if (i < num_bufs && sizes[i]) {
- /* in 256-bit units minus one */
- const int read_len = (sizes[i] + 31) / 32 - 1;
-
- assert(bufs[i] % 32 == 0);
- assert(read_len < 32);
-
- enabled |= 1 << i;
- dw[i] = bufs[i] | read_len;
-
- total_read_length += read_len + 1;
- }
- else {
- dw[i] = 0;
- }
- }
-
- assert(total_read_length <= max_read_length);
-
- return enabled;
-}
-
-static inline void
-gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
- assert(num_bufs <= 4);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 138:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 32"
- */
- buf_enabled = gen6_fill_3dstate_constant(dev,
- bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
- ilo_cp_write(cp, buf_dw[0]);
- ilo_cp_write(cp, buf_dw[1]);
- ilo_cp_write(cp, buf_dw[2]);
- ilo_cp_write(cp, buf_dw[3]);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
- assert(num_bufs <= 4);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 161:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 64"
- */
- buf_enabled = gen6_fill_3dstate_constant(dev,
- bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
- ilo_cp_write(cp, buf_dw[0]);
- ilo_cp_write(cp, buf_dw[1]);
- ilo_cp_write(cp, buf_dw[2]);
- ilo_cp_write(cp, buf_dw[3]);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
- assert(num_bufs <= 4);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 287:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 64"
- */
- buf_enabled = gen6_fill_3dstate_constant(dev,
- bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
- ilo_cp_write(cp, buf_dw[0]);
- ilo_cp_write(cp, buf_dw[1]);
- ilo_cp_write(cp, buf_dw[2]);
- ilo_cp_write(cp, buf_dw[3]);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
- unsigned sample_mask,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
- const uint8_t cmd_len = 2;
- const unsigned valid_mask = 0xf;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- sample_mask &= valid_mask;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, sample_mask);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
- unsigned x, unsigned y,
- unsigned width, unsigned height,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
- const uint8_t cmd_len = 4;
- unsigned xmax = x + width - 1;
- unsigned ymax = y + height - 1;
- int rect_limit;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- if (dev->gen >= ILO_GEN(7)) {
- rect_limit = 16383;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 230:
- *
- * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
- * must be an even number"
- */
- assert(y % 2 == 0);
-
- rect_limit = 8191;
- }
-
- if (x > rect_limit) x = rect_limit;
- if (y > rect_limit) y = rect_limit;
- if (xmax > rect_limit) xmax = rect_limit;
- if (ymax > rect_limit) ymax = rect_limit;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, y << 16 | x);
- ilo_cp_write(cp, ymax << 16 | xmax);
-
- /*
- * There is no need to set the origin. It is intended to support front
- * buffer rendering.
- */
- ilo_cp_write(cp, 0);
-
- ilo_cp_end(cp);
-}
-
-static inline void
-zs_align_surface(const struct ilo_dev_info *dev,
- unsigned align_w, unsigned align_h,
- struct ilo_zs_surface *zs)
-{
- unsigned mask, shift_w, shift_h;
- unsigned width, height;
- uint32_t dw3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- if (dev->gen >= ILO_GEN(7)) {
- shift_w = 4;
- shift_h = 18;
- mask = 0x3fff;
- }
- else {
- shift_w = 6;
- shift_h = 19;
- mask = 0x1fff;
- }
-
- dw3 = zs->payload[2];
-
- /* aligned width and height */
- width = align(((dw3 >> shift_w) & mask) + 1, align_w);
- height = align(((dw3 >> shift_h) & mask) + 1, align_h);
-
- dw3 = (dw3 & ~((mask << shift_w) | (mask << shift_h))) |
- (width - 1) << shift_w |
- (height - 1) << shift_h;
-
- zs->payload[2] = dw3;
-}
-
-static inline void
-gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
- const struct ilo_zs_surface *zs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
- ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
- const uint8_t cmd_len = 7;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, zs->payload[0]);
- ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_write(cp, zs->payload[2]);
- ilo_cp_write(cp, zs->payload[3]);
- ilo_cp_write(cp, zs->payload[4]);
- ilo_cp_write(cp, zs->payload[5]);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
- int x_offset, int y_offset,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
- assert(x_offset >= 0 && x_offset <= 31);
- assert(y_offset >= 0 && y_offset <= 31);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, x_offset << 8 | y_offset);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
- const struct pipe_poly_stipple *pattern,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
- const uint8_t cmd_len = 33;
- int i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
- assert(Elements(pattern->stipple) == 32);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- for (i = 0; i < 32; i++)
- ilo_cp_write(cp, pattern->stipple[i]);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
- unsigned pattern, unsigned factor,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
- const uint8_t cmd_len = 3;
- unsigned inverse;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
- assert((pattern & 0xffff) == pattern);
- assert(factor >= 1 && factor <= 256);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, pattern);
-
- if (dev->gen >= ILO_GEN(7)) {
- /* in U1.16 */
- inverse = (unsigned) (65536.0f / factor);
- ilo_cp_write(cp, inverse << 15 | factor);
- }
- else {
- /* in U1.13 */
- inverse = (unsigned) (8192.0f / factor);
- ilo_cp_write(cp, inverse << 16 | factor);
- }
-
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0 << 16 | 0);
- ilo_cp_write(cp, 0 << 16 | 0);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
- int index, unsigned svbi,
- unsigned max_svbi,
- bool load_vertex_count,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
- const uint8_t cmd_len = 4;
- uint32_t dw1;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
- assert(index >= 0 && index < 4);
-
- dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT;
- if (load_vertex_count)
- dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, svbi);
- ilo_cp_write(cp, max_svbi);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
- int num_samples,
- const uint32_t *packed_sample_pos,
- bool pixel_location_center,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
- const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
- uint32_t dw1, dw2, dw3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- dw1 = (pixel_location_center) ?
- GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
-
- switch (num_samples) {
- case 0:
- case 1:
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
- dw2 = 0;
- dw3 = 0;
- break;
- case 4:
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
- dw2 = packed_sample_pos[0];
- dw3 = 0;
- break;
- case 8:
- assert(dev->gen >= ILO_GEN(7));
- dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
- dw2 = packed_sample_pos[0];
- dw3 = packed_sample_pos[1];
- break;
- default:
- assert(!"unsupported sample count");
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
- dw2 = 0;
- dw3 = 0;
- break;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- if (dev->gen >= ILO_GEN(7))
- ilo_cp_write(cp, dw3);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
- const struct ilo_zs_surface *zs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
- ILO_GPE_CMD(0x3, 0x0, 0x06) :
- ILO_GPE_CMD(0x3, 0x1, 0x0e);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- /* see ilo_gpe_init_zs_surface() */
- ilo_cp_write(cp, zs->payload[6]);
- ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
- const struct ilo_zs_surface *zs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
- ILO_GPE_CMD(0x3, 0x0, 0x07) :
- ILO_GPE_CMD(0x3, 0x1, 0x0f);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- /* see ilo_gpe_init_zs_surface() */
- ilo_cp_write(cp, zs->payload[8]);
- ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
- uint32_t clear_val,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- GEN6_CLEAR_PARAMS_DW0_VALID);
- ilo_cp_write(cp, clear_val);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
- uint32_t dw1,
- struct intel_bo *bo, uint32_t bo_offset,
- bool write_qword,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
- const uint8_t cmd_len = (write_qword) ? 5 : 4;
- const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
- const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- assert(bo_offset % ((write_qword) ? 8 : 4) == 0);
-
- if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 73:
- *
- * "1 of the following must also be set (when CS stall is set):
- *
- * * Depth Cache Flush Enable ([0] of DW1)
- * * Stall at Pixel Scoreboard ([1] of DW1)
- * * Depth Stall ([13] of DW1)
- * * Post-Sync Operation ([13] of DW1)
- * * Render Target Cache Flush Enable ([12] of DW1)
- * * Notify Enable ([8] of DW1)"
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 61:
- *
- * "One of the following must also be set (when CS stall is set):
- *
- * * Render Target Cache Flush Enable ([12] of DW1)
- * * Depth Cache Flush Enable ([0] of DW1)
- * * Stall at Pixel Scoreboard ([1] of DW1)
- * * Depth Stall ([13] of DW1)
- * * Post-Sync Operation ([13] of DW1)"
- */
- uint32_t bit_test = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
- GEN6_PIPE_CONTROL_DEPTH_STALL;
-
- /* post-sync op */
- bit_test |= GEN6_PIPE_CONTROL_WRITE_IMM |
- GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT |
- GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
-
- if (dev->gen == ILO_GEN(6))
- bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE;
-
- assert(dw1 & bit_test);
- }
-
- if (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 73:
- *
- * "Following bits must be clear (when Depth Stall is set):
- *
- * * Render Target Cache Flush Enable ([12] of DW1)
- * * Depth Cache Flush Enable ([0] of DW1)"
- */
- assert(!(dw1 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
- }
-
- /*
- * From the Sandy Bridge PRM, volume 1 part 3, page 19:
- *
- * "[DevSNB] PPGTT memory writes by MI_* (such as MI_STORE_DATA_IMM)
- * and PIPE_CONTROL are not supported."
- *
- * The kernel will add the mapping automatically (when write domain is
- * INTEL_DOMAIN_INSTRUCTION).
- */
- if (dev->gen == ILO_GEN(6) && bo)
- bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
- ilo_cp_write(cp, 0);
- if (write_qword)
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib,
- bool rectlist,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
- const uint8_t cmd_len = 6;
- const int prim = (rectlist) ?
- GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
- const int vb_access = (info->indexed) ?
- GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
- const uint32_t vb_start = info->start +
- ((info->indexed) ? ib->draw_start_offset : 0);
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
- vb_access);
- ilo_cp_write(cp, info->count);
- ilo_cp_write(cp, vb_start);
- ilo_cp_write(cp, info->instance_count);
- ilo_cp_write(cp, info->start_instance);
- ilo_cp_write(cp, info->index_bias);
- ilo_cp_end(cp);
-}
-
-static inline uint32_t
-gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
- const struct ilo_shader_state **cs,
- uint32_t *sampler_state,
- int *num_samplers,
- uint32_t *binding_table_state,
- int *num_surfaces,
- int num_ids,
- struct ilo_cp *cp)
-{
- /*
- * From the Sandy Bridge PRM, volume 2 part 2, page 34:
- *
- * "(Interface Descriptor Total Length) This field must have the same
- * alignment as the Interface Descriptor Data Start Address.
- *
- * It must be DQWord (32-byte) aligned..."
- *
- * From the Sandy Bridge PRM, volume 2 part 2, page 35:
- *
- * "(Interface Descriptor Data Start Address) Specifies the 32-byte
- * aligned address of the Interface Descriptor data."
- */
- const int state_align = 32 / 4;
- const int state_len = (32 / 4) * num_ids;
- uint32_t state_offset, *dw;
- int i;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_ids; i++) {
- dw[0] = ilo_shader_get_kernel_offset(cs[i]);
- dw[1] = 1 << 18; /* SPF */
- dw[2] = sampler_state[i] |
- (num_samplers[i] + 3) / 4 << 2;
- dw[3] = binding_table_state[i] |
- num_surfaces[i];
- dw[4] = 0 << 16 | /* CURBE Read Length */
- 0; /* CURBE Read Offset */
- dw[5] = 0; /* Barrier ID */
- dw[6] = 0;
- dw[7] = 0;
-
- dw += 8;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 8 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 262:
- *
- * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
- * stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->m00);
- dw[1] = fui(vp->m11);
- dw[2] = fui(vp->m22);
- dw[3] = fui(vp->m30);
- dw[4] = fui(vp->m31);
- dw[5] = fui(vp->m32);
- dw[6] = 0;
- dw[7] = 0;
-
- dw += 8;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 4 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 193:
- *
- * "The viewport-related state is stored as an array of up to 16
- * elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->min_gbx);
- dw[1] = fui(vp->max_gbx);
- dw[2] = fui(vp->min_gby);
- dw[3] = fui(vp->max_gby);
-
- dw += 4;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 2 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 385:
- *
- * "The viewport state is stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->min_z);
- dw[1] = fui(vp->max_z);
-
- dw += 2;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
- const struct pipe_stencil_ref *stencil_ref,
- ubyte alpha_ref,
- const struct pipe_blend_color *blend_color,
- struct ilo_cp *cp)
-{
- const int state_align = 64 / 4;
- const int state_len = 6;
- uint32_t state_offset, *dw;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
- state_len, state_align, &state_offset);
-
- dw[0] = stencil_ref->ref_value[0] << 24 |
- stencil_ref->ref_value[1] << 16 |
- GEN6_CC_DW0_ALPHATEST_UNORM8;
- dw[1] = alpha_ref;
- dw[2] = fui(blend_color->color[0]);
- dw[3] = fui(blend_color->color[1]);
- dw[4] = fui(blend_color->color[2]);
- dw[5] = fui(blend_color->color[3]);
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
- const struct ilo_blend_state *blend,
- const struct ilo_fb_state *fb,
- const struct ilo_dsa_state *dsa,
- struct ilo_cp *cp)
-{
- const int state_align = 64 / 4;
- int state_len;
- uint32_t state_offset, *dw;
- unsigned num_targets, i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 376:
- *
- * "The blend state is stored as an array of up to 8 elements..."
- */
- num_targets = fb->state.nr_cbufs;
- assert(num_targets <= 8);
-
- if (!num_targets) {
- if (!dsa->dw_alpha)
- return 0;
- /* to be able to reference alpha func */
- num_targets = 1;
- }
-
- state_len = 2 * num_targets;
-
- dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_targets; i++) {
- const unsigned idx = (blend->independent_blend_enable) ? i : 0;
- const struct ilo_blend_cso *cso = &blend->cso[idx];
- const int num_samples = fb->num_samples;
- const struct util_format_description *format_desc =
- (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ?
- util_format_description(fb->state.cbufs[idx]->format) : NULL;
- bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
-
- rt_is_unorm = true;
- rt_is_pure_integer = false;
- rt_dst_alpha_forced_one = false;
-
- if (format_desc) {
- int ch;
-
- switch (format_desc->format) {
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- /* force alpha to one when the HW format has alpha */
- assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
- == GEN6_FORMAT_B8G8R8A8_UNORM);
- rt_dst_alpha_forced_one = true;
- break;
- default:
- break;
- }
-
- for (ch = 0; ch < 4; ch++) {
- if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
- continue;
-
- if (format_desc->channel[ch].pure_integer) {
- rt_is_unorm = false;
- rt_is_pure_integer = true;
- break;
- }
-
- if (!format_desc->channel[ch].normalized ||
- format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
- rt_is_unorm = false;
- }
- }
-
- dw[0] = cso->payload[0];
- dw[1] = cso->payload[1];
-
- if (!rt_is_pure_integer) {
- if (rt_dst_alpha_forced_one)
- dw[0] |= cso->dw_blend_dst_alpha_forced_one;
- else
- dw[0] |= cso->dw_blend;
- }
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 365:
- *
- * "Logic Ops are only supported on *_UNORM surfaces (excluding
- * _SRGB variants), otherwise Logic Ops must be DISABLED."
- *
- * Since logicop is ignored for non-UNORM color buffers, no special care
- * is needed.
- */
- if (rt_is_unorm)
- dw[1] |= cso->dw_logicop;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 356:
- *
- * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
- * Dither both must be disabled."
- *
- * There is no such limitation on GEN7, or for AlphaToOne. But GL
- * requires that anyway.
- */
- if (num_samples > 1)
- dw[1] |= cso->dw_alpha_mod;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 382:
- *
- * "Alpha Test can only be enabled if Pixel Shader outputs a float
- * alpha value."
- */
- if (!rt_is_pure_integer)
- dw[1] |= dsa->dw_alpha;
-
- dw += 2;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
- const struct ilo_dsa_state *dsa,
- struct ilo_cp *cp)
-{
- const int state_align = 64 / 4;
- const int state_len = 3;
- uint32_t state_offset, *dw;
-
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
- state_len, state_align, &state_offset);
-
- dw[0] = dsa->payload[0];
- dw[1] = dsa->payload[1];
- dw[2] = dsa->payload[2];
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
- const struct ilo_scissor_state *scissor,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 2 * num_viewports;
- uint32_t state_offset, *dw;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 263:
- *
- * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
- * stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
- state_len, state_align, &state_offset);
-
- memcpy(dw, scissor->payload, state_len * 4);
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
- uint32_t *surface_states,
- int num_surface_states,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = num_surface_states;
- uint32_t state_offset, *dw;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 69:
- *
- * "It is stored as an array of up to 256 elements..."
- */
- assert(num_surface_states <= 256);
-
- if (!num_surface_states)
- return 0;
-
- dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
- state_len, state_align, &state_offset);
- memcpy(dw, surface_states,
- num_surface_states * sizeof(surface_states[0]));
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
- const struct ilo_view_surface *surf,
- bool for_render,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
- uint32_t state_offset;
- uint32_t read_domains, write_domain;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- if (for_render) {
- read_domains = INTEL_DOMAIN_RENDER;
- write_domain = INTEL_DOMAIN_RENDER;
- }
- else {
- read_domains = INTEL_DOMAIN_SAMPLER;
- write_domain = 0;
- }
-
- ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
-
- STATIC_ASSERT(Elements(surf->payload) >= 8);
-
- ilo_cp_write(cp, surf->payload[0]);
- ilo_cp_write_bo(cp, surf->payload[1],
- surf->bo, read_domains, write_domain);
- ilo_cp_write(cp, surf->payload[2]);
- ilo_cp_write(cp, surf->payload[3]);
- ilo_cp_write(cp, surf->payload[4]);
- ilo_cp_write(cp, surf->payload[5]);
-
- if (dev->gen >= ILO_GEN(7)) {
- ilo_cp_write(cp, surf->payload[6]);
- ilo_cp_write(cp, surf->payload[7]);
- }
-
- ilo_cp_end(cp);
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
- const struct pipe_stream_output_target *so,
- const struct pipe_stream_output_info *so_info,
- int so_index,
- struct ilo_cp *cp)
-{
- struct ilo_buffer *buf = ilo_buffer(so->buffer);
- unsigned bo_offset, struct_size;
- enum pipe_format elem_format;
- struct ilo_view_surface surf;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
- struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
-
- switch (so_info->output[so_index].num_components) {
- case 1:
- elem_format = PIPE_FORMAT_R32_FLOAT;
- break;
- case 2:
- elem_format = PIPE_FORMAT_R32G32_FLOAT;
- break;
- case 3:
- elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
- break;
- case 4:
- elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- break;
- default:
- assert(!"unexpected SO components length");
- elem_format = PIPE_FORMAT_R32_FLOAT;
- break;
- }
-
- ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
- struct_size, elem_format, false, true, &surf);
-
- return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
-}
-
-static inline uint32_t
-gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
- const struct ilo_sampler_cso * const *samplers,
- const struct pipe_sampler_view * const *views,
- const uint32_t *sampler_border_colors,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 4 * num_samplers;
- uint32_t state_offset, *dw;
- int i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 101:
- *
- * "The sampler state is stored as an array of up to 16 elements..."
- */
- assert(num_samplers <= 16);
-
- if (!num_samplers)
- return 0;
-
- dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_samplers; i++) {
- const struct ilo_sampler_cso *sampler = samplers[i];
- const struct pipe_sampler_view *view = views[i];
- const uint32_t border_color = sampler_border_colors[i];
- uint32_t dw_filter, dw_wrap;
-
- /* there may be holes */
- if (!sampler || !view) {
- /* disabled sampler */
- dw[0] = 1 << 31;
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw += 4;
-
- continue;
- }
-
- /* determine filter and wrap modes */
- switch (view->texture->target) {
- case PIPE_TEXTURE_1D:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap_1d;
- break;
- case PIPE_TEXTURE_3D:
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 103:
- *
- * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
- * surfaces of type SURFTYPE_3D."
- */
- dw_filter = sampler->dw_filter;
- dw_wrap = sampler->dw_wrap;
- break;
- case PIPE_TEXTURE_CUBE:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap_cube;
- break;
- default:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap;
- break;
- }
-
- dw[0] = sampler->payload[0];
- dw[1] = sampler->payload[1];
- assert(!(border_color & 0x1f));
- dw[2] = border_color;
- dw[3] = sampler->payload[2];
-
- dw[0] |= dw_filter;
-
- if (dev->gen >= ILO_GEN(7)) {
- dw[3] |= dw_wrap;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 21:
- *
- * "[DevSNB] Errata: Incorrect behavior is observed in cases
- * where the min and mag mode filters are different and
- * SurfMinLOD is nonzero. The determination of MagMode uses the
- * following equation instead of the one in the above
- * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
- *
- * As a way to work around that, we set Base to
- * view->u.tex.first_level.
- */
- dw[0] |= view->u.tex.first_level << 22;
-
- dw[1] |= dw_wrap;
- }
-
- dw += 4;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
- const struct ilo_sampler_cso *sampler,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
- uint32_t state_offset, *dw;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
- state_len, state_align, &state_offset);
-
- /* see ilo_gpe_init_sampler_cso() */
- memcpy(dw, &sampler->payload[3], state_len * 4);
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
- int size, void **pcb,
- struct ilo_cp *cp)
-{
- /*
- * For all VS, GS, FS, and CS push constant buffers, they must be aligned
- * to 32 bytes, and their sizes are specified in 256-bit units.
- */
- const int state_align = 32 / 4;
- const int state_len = align(size, 32) / 4;
- uint32_t state_offset;
- char *buf;
-
- ILO_GPE_VALID_GEN(dev, 6, 7.5);
-
- buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
- state_len, state_align, &state_offset);
-
- /* zero out the unused range */
- if (size < state_len * 4)
- memset(&buf[size], 0, state_len * 4 - size);
-
- if (pcb)
- *pcb = buf;
-
- return state_offset;
-}
+ const struct ilo_shader_state *last_sh,
+ uint32_t *dw, int num_dwords);
#endif /* ILO_GPE_GEN6_H */
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c
index 6d3397f2d..2a590be2d 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c
@@ -25,15 +25,63 @@
* Chia-I Wu <olv@lunarg.com>
*/
-#include "genhw/genhw.h"
#include "util/u_resource.h"
+#include "brw_defines.h"
+#include "intel_reg.h"
+#include "ilo_cp.h"
#include "ilo_format.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_gpe_gen7.h"
-#define SET_FIELD(value, field) (((value) << field ## __SHIFT) & field ## __MASK)
+static void
+gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp)
+{
+ assert(!"GPGPU_WALKER unsupported");
+}
+
+static void
+gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
+ uint32_t clear_val,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
+ const uint8_t cmd_len = 3;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, clear_val);
+ ilo_cp_write(cp, 1);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
+ int subop, uint32_t pointer,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, pointer);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t color_calc_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
+}
void
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
@@ -43,7 +91,7 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5;
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
+ ILO_GPE_VALID_GEN(dev, 7, 7);
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
@@ -52,9 +100,6 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
vue_read_len = (vue_read_len + 1) / 2;
switch (dev->gen) {
- case ILO_GEN(7.5):
- max_threads = (dev->gt >= 2) ? 256 : 70;
- break;
case ILO_GEN(7):
max_threads = (dev->gt == 2) ? 128 : 36;
break;
@@ -63,16 +108,16 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
break;
}
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
- dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
- GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
- 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
- start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
+ dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
+ GEN7_GS_INCLUDE_VERTEX_HANDLES |
+ 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
+ start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
- dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
- GEN7_GS_DW5_STATISTICS |
- GEN7_GS_DW5_GS_ENABLE;
+ dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_ENABLE;
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
@@ -80,6 +125,74 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
cso->payload[2] = dw5;
}
+static void
+gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
+ const uint8_t cmd_len = 7;
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ if (!gs) {
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct pipe_surface *zs_surf,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
+ const uint8_t cmd_len = 7;
+ const int num_samples = 1;
+ uint32_t payload[6];
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
+ rasterizer, num_samples,
+ (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
+ payload, Elements(payload));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write_multi(cp, payload, 6);
+ ilo_cp_end(cp);
+}
+
void
ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
@@ -87,25 +200,25 @@ ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
{
uint32_t dw1, dw2;
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
+ ILO_GPE_VALID_GEN(dev, 7, 7);
- dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL |
- GEN7_WM_DW1_AA_LINE_WIDTH_2_0 |
- GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL;
+ dw1 = GEN7_WM_POSITION_ZW_PIXEL |
+ GEN7_WM_LINE_AA_WIDTH_2_0 |
+ GEN7_WM_MSRAST_OFF_PIXEL;
/* same value as in 3DSTATE_SF */
if (state->line_smooth)
- dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
+ dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
if (state->poly_stipple_enable)
- dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
+ dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
if (state->line_stipple_enable)
- dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
+ dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
if (state->bottom_edge_rule)
- dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
+ dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
- dw2 = GEN7_WM_DW2_MSDISPMODE_PERSAMPLE;
+ dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
/*
* assertion that makes sure
@@ -115,12 +228,12 @@ ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
*
* is valid
*/
- STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 &&
- GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0);
+ STATIC_ASSERT(GEN7_WM_MSRAST_OFF_PIXEL == 0 &&
+ GEN7_WM_MSDISPMODE_PERSAMPLE == 0);
wm->dw_msaa_rast =
- (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0;
- wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
+ (state->multisample) ? GEN7_WM_MSRAST_ON_PATTERN : 0;
+ wm->dw_msaa_disp = GEN7_WM_MSDISPMODE_PERPIXEL;
STATIC_ASSERT(Elements(wm->payload) >= 2);
wm->payload[0] = dw1;
@@ -136,40 +249,29 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
uint32_t dw2, dw4, dw5;
uint32_t wm_interps, wm_dw1;
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
+ ILO_GPE_VALID_GEN(dev, 7, 7);
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+ /* see brwCreateContext() */
+ max_threads = (dev->gt == 2) ? 172 : 48;
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw2 = (true) ? 0 : GEN7_PS_FLOATING_POINT_MODE_ALT;
- dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
+ dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
+ GEN7_PS_POSOFFSET_NONE;
- /* see brwCreateContext() */
- switch (dev->gen) {
- case ILO_GEN(7.5):
- max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
- dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
- dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
- break;
- case ILO_GEN(7):
- default:
- max_threads = (dev->gt == 2) ? 172 : 48;
- dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
- break;
- }
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
- dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
+ if (false)
+ dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
- dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
+ dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw4 |= GEN7_PS_DW4_8_PIXEL_DISPATCH;
+ dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
- dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
- 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
- 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
+ dw5 = start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
+ 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
+ 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
/* FS affects 3DSTATE_WM too */
wm_dw1 = 0;
@@ -181,7 +283,7 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
* b) fs writes depth, or
* c) fs or cc kills
*/
- wm_dw1 |= GEN7_WM_DW1_PS_ENABLE;
+ wm_dw1 |= GEN7_WM_DISPATCH_ENABLE;
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 278:
@@ -210,21 +312,21 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
* to ENABLE this bit due to ClipDistance clipping."
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- wm_dw1 |= GEN7_WM_DW1_PS_KILL;
+ wm_dw1 |= GEN7_WM_KILL_ENABLE;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- wm_dw1 |= GEN7_WM_DW1_PSCDEPTH_ON;
+ wm_dw1 |= GEN7_WM_PSCDEPTH_ON;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- wm_dw1 |= GEN7_WM_DW1_PS_USE_DEPTH;
+ wm_dw1 |= GEN7_WM_USES_SOURCE_DEPTH;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- wm_dw1 |= GEN7_WM_DW1_PS_USE_W;
+ wm_dw1 |= GEN7_WM_USES_SOURCE_W;
wm_interps = ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
- wm_dw1 |= wm_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
+ wm_dw1 |= wm_interps << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
@@ -233,6 +335,934 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
cso->payload[3] = wm_dw1;
}
+static void
+gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool cc_may_kill,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
+ const uint8_t cmd_len = 3;
+ const int num_samples = 1;
+ uint32_t dw1, dw2;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /* see ilo_gpe_init_rasterizer_wm() */
+ dw1 = rasterizer->wm.payload[0];
+ dw2 = rasterizer->wm.payload[1];
+
+ dw1 |= GEN7_WM_STATISTICS_ENABLE;
+
+ if (false) {
+ dw1 |= GEN7_WM_DEPTH_CLEAR;
+ dw1 |= GEN7_WM_DEPTH_RESOLVE;
+ dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
+ }
+
+ if (fs) {
+ const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
+
+ dw1 |= fs_cso->payload[3];
+ }
+
+ if (cc_may_kill) {
+ dw1 |= GEN7_WM_DISPATCH_ENABLE |
+ GEN7_WM_KILL_ENABLE;
+ }
+
+ if (num_samples > 1) {
+ dw1 |= rasterizer->wm.dw_msaa_rast;
+ dw2 |= rasterizer->wm.dw_msaa_disp;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
+ int subop,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+ const uint8_t cmd_len = 7;
+ uint32_t dw[6];
+ int total_read_length, i;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /* VS, HS, DS, GS, and PS variants */
+ assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
+
+ assert(num_bufs <= 4);
+
+ dw[0] = 0;
+ dw[1] = 0;
+
+ total_read_length = 0;
+ for (i = 0; i < 4; i++) {
+ int read_len;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 112:
+ *
+ * "Constant buffers must be enabled in order from Constant Buffer 0
+ * to Constant Buffer 3 within this command. For example, it is
+ * not allowed to enable Constant Buffer 1 by programming a
+ * non-zero value in the VS Constant Buffer 1 Read Length without a
+ * non-zero value in VS Constant Buffer 0 Read Length."
+ */
+ if (i >= num_bufs || !sizes[i]) {
+ for (; i < 4; i++) {
+ assert(i >= num_bufs || !sizes[i]);
+ dw[2 + i] = 0;
+ }
+ break;
+ }
+
+ /* read lengths are in 256-bit units */
+ read_len = (sizes[i] + 31) / 32;
+ /* the lower 5 bits are used for memory object control state */
+ assert(bufs[i] % 32 == 0);
+
+ dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
+ dw[2 + i] = bufs[i];
+
+ total_read_length += read_len;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 113:
+ *
+ * "The sum of all four read length fields must be less than or equal
+ * to the size of 64"
+ */
+ assert(total_read_length <= 64);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write_multi(cp, dw, 6);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
+}
+
+static void
+gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
+}
+
+static void
+gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
+}
+
+static void
+gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
+ unsigned sample_mask,
+ int num_samples,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
+ const uint8_t cmd_len = 2;
+ const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 294:
+ *
+ * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
+ * (Sample Mask) must be zero.
+ *
+ * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
+ * must be zero."
+ */
+ sample_mask &= valid_mask;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, sample_mask);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
+}
+
+static void
+gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
+}
+
+static void
+gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *hs,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
+ const uint8_t cmd_len = 7;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ assert(!hs);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *ds,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
+ const uint8_t cmd_len = 6;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ assert(!ds);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+
+}
+
+static void
+gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
+ unsigned buffer_mask,
+ int vertex_attrib_count,
+ bool rasterizer_discard,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
+ const uint8_t cmd_len = 3;
+ const bool enable = (buffer_mask != 0);
+ uint32_t dw1, dw2;
+ int read_len;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ if (!enable) {
+ dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
+ if (rasterizer_discard)
+ dw1 |= SO_RENDERING_DISABLE;
+
+ dw2 = 0;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ read_len = (vertex_attrib_count + 1) / 2;
+ if (!read_len)
+ read_len = 1;
+
+ dw1 = SO_FUNCTION_ENABLE |
+ 0 << SO_RENDER_STREAM_SELECT_SHIFT |
+ SO_STATISTICS_ENABLE |
+ buffer_mask << 8;
+
+ if (rasterizer_discard)
+ dw1 |= SO_RENDERING_DISABLE;
+
+ /* API_OPENGL */
+ if (true)
+ dw1 |= SO_REORDER_TRAILING;
+
+ dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
+ 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
+ 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
+ 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
+ 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
+ 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
+ 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
+ (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
+ const uint8_t cmd_len = 14;
+ uint32_t dw[13];
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
+ fs, last_sh, dw, Elements(dw));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write_multi(cp, dw, 13);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ int num_samplers, bool dual_blend,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
+ const uint8_t cmd_len = 8;
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ if (!fs) {
+ /* see brwCreateContext() */
+ const int max_threads = (dev->gt == 2) ? 172 : 48;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ /* GPU hangs if none of the dispatch enable bits is set */
+ ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
+ GEN7_PS_8_DISPATCH_ENABLE);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(fs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT;
+
+ if (dual_blend)
+ dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_write(cp, 0); /* kernel 1 */
+ ilo_cp_write(cp, 0); /* kernel 2 */
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
+ uint32_t sf_clip_viewport,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
+}
+
+static void
+gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
+ uint32_t cc_viewport,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
+}
+
+static void
+gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t blend_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
+}
+
+static void
+gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t depth_stencil_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
+}
+
+static void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
+}
+
+static void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
+}
+
+static void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
+}
+
+static void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
+}
+
+static void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
+}
+
+static void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
+}
+
+static void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
+}
+
+static void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
+}
+
+static void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
+}
+
+static void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
+}
+
+static void
+gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
+ int subop, int offset, int size,
+ int entry_size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+ const uint8_t cmd_len = 2;
+ const int row_size = 64; /* 512 bits */
+ int alloc_size, num_entries, min_entries, max_entries;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /* VS, HS, DS, and GS variants */
+ assert(subop >= 0x30 && subop <= 0x33);
+
+ /* in multiples of 8KB */
+ assert(offset % 8192 == 0);
+ offset /= 8192;
+
+ /* in multiple of 512-bit rows */
+ alloc_size = (entry_size + row_size - 1) / row_size;
+ if (!alloc_size)
+ alloc_size = 1;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+ *
+ * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
+ * cause performance to decrease due to banking in the URB. Element
+ * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
+ */
+ if (subop == 0x30 && alloc_size == 5)
+ alloc_size = 6;
+
+ /* in multiples of 8 */
+ num_entries = (size / row_size / alloc_size) & ~7;
+
+ switch (subop) {
+ case 0x30: /* 3DSTATE_URB_VS */
+ min_entries = 32;
+ max_entries = (dev->gt == 2) ? 704 : 512;
+
+ assert(num_entries >= min_entries);
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ case 0x31: /* 3DSTATE_URB_HS */
+ max_entries = (dev->gt == 2) ? 64 : 32;
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ case 0x32: /* 3DSTATE_URB_DS */
+ if (num_entries)
+ assert(num_entries >= 138);
+ break;
+ case 0x33: /* 3DSTATE_URB_GS */
+ max_entries = (dev->gt == 2) ? 320 : 192;
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ default:
+ break;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
+ (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
+ num_entries);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
+}
+
+static void
+gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
+}
+
+static void
+gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
+}
+
+static void
+gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
+}
+
+static void
+gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
+ int subop, int offset, int size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
+ const uint8_t cmd_len = 2;
+ int end;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /* VS, HS, DS, GS, and PS variants */
+ assert(subop >= 0x12 && subop <= 0x16);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 68:
+ *
+ * "(A table that says the maximum size of each constant buffer is
+ * 16KB")
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 115:
+ *
+ * "The sum of the Constant Buffer Offset and the Constant Buffer Size
+ * may not exceed the maximum value of the Constant Buffer Size."
+ *
+ * Thus, the valid range of buffer end is [0KB, 16KB].
+ */
+ end = (offset + size) / 1024;
+ if (end > 16) {
+ assert(!"invalid constant buffer end");
+ end = 16;
+ }
+
+ /* the valid range of buffer offset is [0KB, 15KB] */
+ offset = (offset + 1023) / 1024;
+ if (offset > 15) {
+ assert(!"invalid constant buffer offset");
+ offset = 15;
+ }
+
+ if (offset > end) {
+ assert(!size);
+ offset = end;
+ }
+
+ /* the valid range of buffer size is [0KB, 15KB] */
+ size = end - offset;
+ if (size > 15) {
+ assert(!"invalid constant buffer size");
+ size = 15;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
+ size);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
+}
+
+static void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
+}
+
+static void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
+}
+
+static void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
+}
+
+static void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
+}
+
+static void
+gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
+ const struct pipe_stream_output_info *so_info,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
+ uint16_t cmd_len;
+ int buffer_selects, num_entries, i;
+ uint16_t so_decls[128];
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ buffer_selects = 0;
+ num_entries = 0;
+
+ if (so_info) {
+ int buffer_offsets[PIPE_MAX_SO_BUFFERS];
+
+ memset(buffer_offsets, 0, sizeof(buffer_offsets));
+
+ for (i = 0; i < so_info->num_outputs; i++) {
+ unsigned decl, buf, reg, mask;
+
+ buf = so_info->output[i].output_buffer;
+
+ /* pad with holes */
+ assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
+ while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
+ int num_dwords;
+
+ num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
+ if (num_dwords > 4)
+ num_dwords = 4;
+
+ decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
+ SO_DECL_HOLE_FLAG |
+ ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
+
+ so_decls[num_entries++] = decl;
+ buffer_offsets[buf] += num_dwords;
+ }
+
+ reg = so_info->output[i].register_index;
+ mask = ((1 << so_info->output[i].num_components) - 1) <<
+ so_info->output[i].start_component;
+
+ decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
+ reg << SO_DECL_REGISTER_INDEX_SHIFT |
+ mask << SO_DECL_COMPONENT_MASK_SHIFT;
+
+ so_decls[num_entries++] = decl;
+ buffer_selects |= 1 << buf;
+ buffer_offsets[buf] += so_info->output[i].num_components;
+ }
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 201:
+ *
+ * "Errata: All 128 decls for all four streams must be included
+ * whenever this command is issued. The "Num Entries [n]" fields still
+ * contain the actual numbers of valid decls."
+ *
+ * Also note that "DWord Length" has 9 bits for this command, and the type
+ * of cmd_len is thus uint16_t.
+ */
+ cmd_len = 2 * 128 + 3;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
+ 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
+ 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
+ buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
+ ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
+ 0 << SO_NUM_ENTRIES_2_SHIFT |
+ 0 << SO_NUM_ENTRIES_1_SHIFT |
+ num_entries << SO_NUM_ENTRIES_0_SHIFT);
+
+ for (i = 0; i < num_entries; i++) {
+ ilo_cp_write(cp, so_decls[i]);
+ ilo_cp_write(cp, 0);
+ }
+ for (; i < 128; i++) {
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ }
+
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
+ int index, int base, int stride,
+ const struct pipe_stream_output_target *so_target,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
+ const uint8_t cmd_len = 4;
+ struct ilo_buffer *buf;
+ int end;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ if (!so_target || !so_target->buffer) {
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ buf = ilo_buffer(so_target->buffer);
+
+ /* DWord-aligned */
+ assert(stride % 4 == 0 && base % 4 == 0);
+ assert(so_target->buffer_offset % 4 == 0);
+
+ stride &= ~3;
+ base = (base + so_target->buffer_offset) & ~3;
+ end = (base + so_target->buffer_size) & ~3;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
+ stride);
+ ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_end(cp);
+}
+
+static void
+gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
+ const struct pipe_draw_info *info,
+ const struct ilo_ib_state *ib,
+ bool rectlist,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
+ const uint8_t cmd_len = 7;
+ const int prim = (rectlist) ?
+ _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
+ const int vb_access = (info->indexed) ?
+ GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
+ GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ const uint32_t vb_start = info->start +
+ ((info->indexed) ? ib->draw_start_offset : 0);
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, vb_access | prim);
+ ilo_cp_write(cp, info->count);
+ ilo_cp_write(cp, vb_start);
+ ilo_cp_write(cp, info->instance_count);
+ ilo_cp_write(cp, info->start_instance);
+ ilo_cp_write(cp, info->index_bias);
+ ilo_cp_end(cp);
+}
+
+static uint32_t
+gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 64 / 4;
+ const int state_len = 16 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 270:
+ *
+ * "The viewport-specific state used by both the SF and CL units
+ * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
+ * of which contains the DWords described below. The start of each
+ * element is spaced 16 DWords apart. The location of first element of
+ * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
+ * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->m00);
+ dw[1] = fui(vp->m11);
+ dw[2] = fui(vp->m22);
+ dw[3] = fui(vp->m30);
+ dw[4] = fui(vp->m31);
+ dw[5] = fui(vp->m32);
+ dw[6] = 0;
+ dw[7] = 0;
+ dw[8] = fui(vp->min_gbx);
+ dw[9] = fui(vp->max_gbx);
+ dw[10] = fui(vp->min_gby);
+ dw[11] = fui(vp->max_gby);
+ dw[12] = 0;
+ dw[13] = 0;
+ dw[14] = 0;
+ dw[15] = 0;
+
+ dw += 16;
+ }
+
+ return state_offset;
+}
+
void
ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
@@ -241,7 +1271,7 @@ ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
{
uint32_t *dw;
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
+ ILO_GPE_VALID_GEN(dev, 7, 7);
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 62:
@@ -277,16 +1307,16 @@ ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
STATIC_ASSERT(Elements(surf->payload) >= 8);
dw = surf->payload;
- dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
- GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT |
- GEN6_TILING_X << 13;
+ dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
+ BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
+ BRW_SURFACE_TILED << 13;
dw[1] = 0;
- dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
- SET_FIELD(width - 1, GEN7_SURFACE_DW2_WIDTH);
+ dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
+ SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
- dw[3] = SET_FIELD(depth - 1, GEN7_SURFACE_DW3_DEPTH);
+ dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
dw[4] = 0;
dw[5] = level;
@@ -314,12 +1344,12 @@ ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
int surface_type, surface_format, num_entries;
uint32_t *dw;
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
+ ILO_GPE_VALID_GEN(dev, 7, 7);
- surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
+ surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
surface_format = (typed) ?
- ilo_translate_color_format(elem_format) : GEN6_FORMAT_RAW;
+ ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
num_entries = size / struct_size;
/* see if there is enough space to fit another element */
@@ -392,17 +1422,17 @@ ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
STATIC_ASSERT(Elements(surf->payload) >= 8);
dw = surf->payload;
- dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+ dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
+ surface_format << BRW_SURFACE_FORMAT_SHIFT;
if (render_cache_rw)
- dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
+ dw[0] |= BRW_SURFACE_RC_READ_WRITE;
dw[1] = offset;
- dw[2] = SET_FIELD(height, GEN7_SURFACE_DW2_HEIGHT) |
- SET_FIELD(width, GEN7_SURFACE_DW2_WIDTH);
+ dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
+ SET_FIELD(width, GEN7_SURFACE_WIDTH);
- dw[3] = SET_FIELD(depth, GEN7_SURFACE_DW3_DEPTH) |
+ dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
pitch;
dw[4] = 0;
@@ -411,13 +1441,6 @@ ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
dw[6] = 0;
dw[7] = 0;
- if (dev->gen >= ILO_GEN(7.5)) {
- dw[7] |= SET_FIELD(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
- SET_FIELD(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
- SET_FIELD(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
- SET_FIELD(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
- }
-
/* do not increment reference count */
surf->bo = buf->bo;
}
@@ -430,7 +1453,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
- bool is_rt, bool offset_to_layer,
+ bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
int surface_type, surface_format;
@@ -438,10 +1461,10 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
unsigned layer_offset, x_offset, y_offset;
uint32_t *dw;
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
+ ILO_GPE_VALID_GEN(dev, 7, 7);
surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
- assert(surface_type != GEN6_SURFTYPE_BUFFER);
+ assert(surface_type != BRW_SURFACE_BUFFER);
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
format = PIPE_FORMAT_Z32_FLOAT;
@@ -458,7 +1481,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
tex->base.depth0 : num_layers;
pitch = tex->bo_stride;
- if (surface_type == GEN6_SURFTYPE_CUBE) {
+ if (surface_type == BRW_SURFACE_CUBE) {
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
@@ -471,7 +1494,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
* restriction.
*/
if (is_rt) {
- surface_type = GEN6_SURFTYPE_2D;
+ surface_type = BRW_SURFACE_2D;
}
else {
assert(num_layers % 6 == 0);
@@ -483,18 +1506,18 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
assert(first_layer < 2048 && num_layers <= 2048);
switch (surface_type) {
- case GEN6_SURFTYPE_1D:
+ case BRW_SURFACE_1D:
assert(width <= 16384 && height == 1 && depth <= 2048);
break;
- case GEN6_SURFTYPE_2D:
+ case BRW_SURFACE_2D:
assert(width <= 16384 && height <= 16384 && depth <= 2048);
break;
- case GEN6_SURFTYPE_3D:
+ case BRW_SURFACE_3D:
assert(width <= 2048 && height <= 2048 && depth <= 2048);
if (!is_rt)
assert(first_layer == 0);
break;
- case GEN6_SURFTYPE_CUBE:
+ case BRW_SURFACE_CUBE:
assert(width <= 16384 && height <= 16384 && depth <= 86);
assert(width == height);
if (is_rt)
@@ -506,44 +1529,52 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
}
if (is_rt) {
+ /*
+ * Compute the offset to the layer manually.
+ *
+ * For rendering, the hardware requires LOD to be the same for all
+ * render targets and the depth buffer. We need to compute the offset
+ * to the layer manually and always set LOD to 0.
+ */
+ if (true) {
+ /* we lose the capability for layered rendering */
+ assert(num_layers == 1);
+
+ layer_offset = ilo_texture_get_slice_offset(tex,
+ first_level, first_layer, &x_offset, &y_offset);
+
+ assert(x_offset % 4 == 0);
+ assert(y_offset % 2 == 0);
+ x_offset /= 4;
+ y_offset /= 2;
+
+ /* derive the size for the LOD */
+ width = u_minify(width, first_level);
+ height = u_minify(height, first_level);
+ if (surface_type == BRW_SURFACE_3D)
+ depth = u_minify(depth, first_level);
+ else
+ depth = 1;
+
+ first_level = 0;
+ first_layer = 0;
+ lod = 0;
+ }
+ else {
+ layer_offset = 0;
+ x_offset = 0;
+ y_offset = 0;
+ }
+
assert(num_levels == 1);
lod = first_level;
}
else {
- lod = num_levels - 1;
- }
-
- /*
- * Offset to the layer. When rendering, the hardware requires LOD and
- * Depth to be the same for all render targets and the depth buffer. We
- * need to offset to the layer manually and always set LOD and Depth to 0.
- */
- if (offset_to_layer) {
- /* we lose the capability for layered rendering */
- assert(is_rt && num_layers == 1);
-
- layer_offset = ilo_texture_get_slice_offset(tex,
- first_level, first_layer, &x_offset, &y_offset);
-
- assert(x_offset % 4 == 0);
- assert(y_offset % 2 == 0);
- x_offset /= 4;
- y_offset /= 2;
-
- /* derive the size for the LOD */
- width = u_minify(width, first_level);
- height = u_minify(height, first_level);
-
- first_level = 0;
- first_layer = 0;
-
- lod = 0;
- depth = 1;
- }
- else {
layer_offset = 0;
x_offset = 0;
y_offset = 0;
+
+ lod = num_levels - 1;
}
/*
@@ -582,8 +1613,8 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
STATIC_ASSERT(Elements(surf->payload) >= 8);
dw = surf->payload;
- dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT |
+ dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
+ surface_format << BRW_SURFACE_FORMAT_SHIFT |
ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
/*
@@ -597,36 +1628,36 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
* For non-3D sampler surfaces, resinfo (the sampler message) always
* returns zero for the number of layers when this field is not set.
*/
- if (surface_type != GEN6_SURFTYPE_3D) {
+ if (surface_type != BRW_SURFACE_3D) {
if (util_resource_is_array_texture(&tex->base))
- dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
+ dw[0] |= GEN7_SURFACE_IS_ARRAY;
else
assert(depth == 1);
}
if (tex->valign_4)
- dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
+ dw[0] |= GEN7_SURFACE_VALIGN_4;
if (tex->halign_8)
- dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
+ dw[0] |= GEN7_SURFACE_HALIGN_8;
if (tex->array_spacing_full)
- dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
+ dw[0] |= GEN7_SURFACE_ARYSPC_FULL;
else
- dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
+ dw[0] |= GEN7_SURFACE_ARYSPC_LOD0;
- if (is_rt)
- dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
+ if (render_cache_rw)
+ dw[0] |= BRW_SURFACE_RC_READ_WRITE;
- if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
- dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+ if (surface_type == BRW_SURFACE_CUBE && !is_rt)
+ dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
dw[1] = layer_offset;
- dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
- SET_FIELD(width - 1, GEN7_SURFACE_DW2_WIDTH);
+ dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
+ SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
- dw[3] = SET_FIELD(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
+ dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
(pitch - 1);
dw[4] = first_layer << 18 |
@@ -639,34 +1670,270 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
*/
if (tex->interleaved && tex->base.nr_samples > 1) {
assert(!is_rt);
- dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
+ dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
}
else {
- dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
+ dw[4] |= GEN7_SURFACE_MSFMT_MSS;
}
if (tex->base.nr_samples > 4)
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
+ dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
else if (tex->base.nr_samples > 2)
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
+ dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
else
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
+ dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
- dw[5] = x_offset << GEN7_SURFACE_DW5_X_OFFSET__SHIFT |
- y_offset << GEN7_SURFACE_DW5_Y_OFFSET__SHIFT |
- SET_FIELD(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
+ dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
+ y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
+ SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
lod;
dw[6] = 0;
dw[7] = 0;
- if (dev->gen >= ILO_GEN(7.5)) {
- dw[7] |= SET_FIELD(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
- SET_FIELD(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
- SET_FIELD(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
- SET_FIELD(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
- }
-
/* do not increment reference count */
surf->bo = tex->bo;
}
+
+static int
+gen7_estimate_command_size(const struct ilo_dev_info *dev,
+ enum ilo_gpe_gen7_command cmd,
+ int arg)
+{
+ static const struct {
+ int header;
+ int body;
+ } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
+ [ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 },
+ [ILO_GPE_GEN7_STATE_SIP] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 },
+ [ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 },
+ [ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 },
+ [ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 },
+ [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
+ [ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 },
+ [ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 },
+ [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 },
+ [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
+ [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
+ [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
+ [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
+ [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 },
+ [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 },
+ [ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 },
+ [ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 },
+ [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 },
+ [ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 },
+ [ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 },
+ [ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 },
+ [ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 },
+ [ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 },
+ [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
+ [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, },
+ [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 },
+ [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
+ [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 },
+ [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 },
+ [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 },
+ [ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 },
+ [ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 },
+ [ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 },
+ };
+ const int header = gen7_command_size_table[cmd].header;
+ const int body = gen7_command_size_table[cmd].body;
+ const int count = arg;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+ assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
+
+ return (likely(count)) ? header + body * count : 0;
+}
+
+static int
+gen7_estimate_state_size(const struct ilo_dev_info *dev,
+ enum ilo_gpe_gen7_state state,
+ int arg)
+{
+ static const struct {
+ int alignment;
+ int body;
+ bool is_array;
+ } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
+ [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
+ [ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true },
+ [ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true },
+ [ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false },
+ [ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true },
+ [ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false },
+ [ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true },
+ [ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true },
+ [ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false },
+ [ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true },
+ [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false },
+ [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
+ };
+ const int alignment = gen7_state_size_table[state].alignment;
+ const int body = gen7_state_size_table[state].body;
+ const bool is_array = gen7_state_size_table[state].is_array;
+ const int count = arg;
+ int estimate;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+ assert(state < ILO_GPE_GEN7_STATE_COUNT);
+
+ if (likely(count)) {
+ if (is_array) {
+ estimate = (alignment - 1) + body * count;
+ }
+ else {
+ estimate = (alignment - 1) + body;
+ /* all states are aligned */
+ if (count > 1)
+ estimate += util_align_npot(body, alignment) * (count - 1);
+ }
+ }
+ else {
+ estimate = 0;
+ }
+
+ return estimate;
+}
+
+static void
+gen7_init(struct ilo_gpe_gen7 *gen7)
+{
+ const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
+
+ gen7->estimate_command_size = gen7_estimate_command_size;
+ gen7->estimate_state_size = gen7_estimate_state_size;
+
+#define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
+#define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name
+ GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
+ GEN7_USE(gen7, STATE_SIP, gen6);
+ GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
+ GEN7_USE(gen7, PIPELINE_SELECT, gen6);
+ GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
+ GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
+ GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
+ GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
+ GEN7_SET(gen7, GPGPU_WALKER);
+ GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
+ GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6);
+ GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
+ GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
+ GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
+ GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
+ GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
+ GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
+ GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
+ GEN7_USE(gen7, 3DSTATE_VS, gen6);
+ GEN7_SET(gen7, 3DSTATE_GS);
+ GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
+ GEN7_SET(gen7, 3DSTATE_SF);
+ GEN7_SET(gen7, 3DSTATE_WM);
+ GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
+ GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
+ GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
+ GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
+ GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
+ GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
+ GEN7_SET(gen7, 3DSTATE_HS);
+ GEN7_SET(gen7, 3DSTATE_TE);
+ GEN7_SET(gen7, 3DSTATE_DS);
+ GEN7_SET(gen7, 3DSTATE_STREAMOUT);
+ GEN7_SET(gen7, 3DSTATE_SBE);
+ GEN7_SET(gen7, 3DSTATE_PS);
+ GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
+ GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
+ GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
+ GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
+ GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
+ GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
+ GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
+ GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
+ GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
+ GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
+ GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
+ GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
+ GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
+ GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
+ GEN7_SET(gen7, 3DSTATE_URB_VS);
+ GEN7_SET(gen7, 3DSTATE_URB_HS);
+ GEN7_SET(gen7, 3DSTATE_URB_DS);
+ GEN7_SET(gen7, 3DSTATE_URB_GS);
+ GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
+ GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
+ GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
+ GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
+ GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
+ GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
+ GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
+ GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
+ GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
+ GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
+ GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
+ GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
+ GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
+ GEN7_USE(gen7, PIPE_CONTROL, gen6);
+ GEN7_SET(gen7, 3DPRIMITIVE);
+ GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
+ GEN7_SET(gen7, SF_CLIP_VIEWPORT);
+ GEN7_USE(gen7, CC_VIEWPORT, gen6);
+ GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
+ GEN7_USE(gen7, BLEND_STATE, gen6);
+ GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
+ GEN7_USE(gen7, SCISSOR_RECT, gen6);
+ GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
+ GEN7_USE(gen7, SURFACE_STATE, gen6);
+ GEN7_USE(gen7, SAMPLER_STATE, gen6);
+ GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6);
+ GEN7_USE(gen7, push_constant_buffer, gen6);
+#undef GEN7_USE
+#undef GEN7_SET
+}
+
+static struct ilo_gpe_gen7 gen7_gpe;
+
+const struct ilo_gpe_gen7 *
+ilo_gpe_gen7_get(void)
+{
+ if (!gen7_gpe.estimate_command_size)
+ gen7_init(&gen7_gpe);
+
+ return &gen7_gpe;
+}
diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h
index 0816fd674..321201548 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h
+++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h
@@ -28,1101 +28,466 @@
#ifndef ILO_GPE_GEN7_H
#define ILO_GPE_GEN7_H
-#include "intel_winsys.h"
-
#include "ilo_common.h"
-#include "ilo_cp.h"
-#include "ilo_resource.h"
-#include "ilo_shader.h"
#include "ilo_gpe_gen6.h"
-static inline void
-gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
- struct ilo_cp *cp)
-{
- assert(!"GPGPU_WALKER unsupported");
-}
-
-static inline void
-gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
- uint32_t clear_val,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, clear_val);
- ilo_cp_write(cp, 1);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_VF(const struct ilo_dev_info *dev,
- bool enable_cut_index,
- uint32_t cut_index,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0c);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 7.5, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- ((enable_cut_index) ? GEN75_VF_DW0_CUT_INDEX_ENABLE : 0));
- ilo_cp_write(cp, cut_index);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
- int subop, uint32_t pointer,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, pointer);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t color_calc_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *gs,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
- const uint8_t cmd_len = 7;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- if (!gs) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, GEN7_GS_DW5_STATISTICS);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
- return;
- }
-
- cso = ilo_shader_get_kernel_cso(gs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- enum pipe_format zs_format,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
- const uint8_t cmd_len = 7;
- const int num_samples = 1;
- uint32_t payload[6];
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
- rasterizer, num_samples, zs_format,
- payload, Elements(payload));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_multi(cp, payload, 6);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *fs,
- const struct ilo_rasterizer_state *rasterizer,
- bool cc_may_kill, uint32_t hiz_op,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
- const uint8_t cmd_len = 3;
- const int num_samples = 1;
- uint32_t dw1, dw2;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- /* see ilo_gpe_init_rasterizer_wm() */
- if (rasterizer) {
- dw1 = rasterizer->wm.payload[0];
- dw2 = rasterizer->wm.payload[1];
-
- assert(!hiz_op);
- dw1 |= GEN7_WM_DW1_STATISTICS;
- }
- else {
- dw1 = hiz_op;
- dw2 = 0;
- }
-
- if (fs) {
- const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
-
- dw1 |= fs_cso->payload[3];
- }
-
- if (cc_may_kill)
- dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL;
-
- if (num_samples > 1) {
- dw1 |= rasterizer->wm.dw_msaa_rast;
- dw2 |= rasterizer->wm.dw_msaa_disp;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
- int subop,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
- const uint8_t cmd_len = 7;
- uint32_t dw[6];
- int total_read_length, i;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- /* VS, HS, DS, GS, and PS variants */
- assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
-
- assert(num_bufs <= 4);
-
- dw[0] = 0;
- dw[1] = 0;
-
- total_read_length = 0;
- for (i = 0; i < 4; i++) {
- int read_len;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 112:
- *
- * "Constant buffers must be enabled in order from Constant Buffer 0
- * to Constant Buffer 3 within this command. For example, it is
- * not allowed to enable Constant Buffer 1 by programming a
- * non-zero value in the VS Constant Buffer 1 Read Length without a
- * non-zero value in VS Constant Buffer 0 Read Length."
- */
- if (i >= num_bufs || !sizes[i]) {
- for (; i < 4; i++) {
- assert(i >= num_bufs || !sizes[i]);
- dw[2 + i] = 0;
- }
- break;
- }
-
- /* read lengths are in 256-bit units */
- read_len = (sizes[i] + 31) / 32;
- /* the lower 5 bits are used for memory object control state */
- assert(bufs[i] % 32 == 0);
-
- dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
- dw[2 + i] = bufs[i];
-
- total_read_length += read_len;
- }
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 113:
- *
- * "The sum of all four read length fields must be less than or equal
- * to the size of 64"
- */
- assert(total_read_length <= 64);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_multi(cp, dw, 6);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
- unsigned sample_mask,
- int num_samples,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
- const uint8_t cmd_len = 2;
- const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 294:
- *
- * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
- * (Sample Mask) must be zero.
- *
- * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
- * must be zero."
- */
- sample_mask &= valid_mask;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, sample_mask);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *hs,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
- const uint8_t cmd_len = 7;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- assert(!hs);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *ds,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
- const uint8_t cmd_len = 6;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- assert(!ds);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
-}
-
-static inline void
-gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
- unsigned buffer_mask,
- int vertex_attrib_count,
- bool rasterizer_discard,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
- const uint8_t cmd_len = 3;
- const bool enable = (buffer_mask != 0);
- uint32_t dw1, dw2;
- int read_len;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- if (!enable) {
- dw1 = 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
- if (rasterizer_discard)
- dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
-
- dw2 = 0;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_end(cp);
- return;
- }
-
- read_len = (vertex_attrib_count + 1) / 2;
- if (!read_len)
- read_len = 1;
-
- dw1 = GEN7_SO_DW1_SO_ENABLE |
- 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
- GEN7_SO_DW1_STATISTICS |
- buffer_mask << 8;
-
- if (rasterizer_discard)
- dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
-
- /* API_OPENGL */
- if (true)
- dw1 |= GEN7_SO_DW1_REORDER_TRAILING;
-
- dw2 = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
- 0 << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
- 0 << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
- 0 << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
- const uint8_t cmd_len = 14;
- uint32_t dw[13];
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, fs, dw, Elements(dw));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_multi(cp, dw, 13);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *fs,
- int num_samplers, bool dual_blend,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
- const uint8_t cmd_len = 8;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- if (!fs) {
- int max_threads;
-
- /* GPU hangs if none of the dispatch enable bits is set */
- dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH;
-
- /* see brwCreateContext() */
- switch (dev->gen) {
- case ILO_GEN(7.5):
- max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
- dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
- break;
- case ILO_GEN(7):
- default:
- max_threads = (dev->gt == 2) ? 172 : 48;
- dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
- break;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
- return;
- }
-
- cso = ilo_shader_get_kernel_cso(fs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- if (dual_blend)
- dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_write(cp, 0); /* kernel 1 */
- ilo_cp_write(cp, 0); /* kernel 2 */
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
- uint32_t sf_clip_viewport,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
- uint32_t cc_viewport,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t blend_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t depth_stencil_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
-}
-
-static inline void
-gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
- int subop, int offset, int size,
- int entry_size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
- const uint8_t cmd_len = 2;
- const int row_size = 64; /* 512 bits */
- int alloc_size, num_entries, min_entries, max_entries;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- /* VS, HS, DS, and GS variants */
- assert(subop >= 0x30 && subop <= 0x33);
-
- /* in multiples of 8KB */
- assert(offset % 8192 == 0);
- offset /= 8192;
-
- /* in multiple of 512-bit rows */
- alloc_size = (entry_size + row_size - 1) / row_size;
- if (!alloc_size)
- alloc_size = 1;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 34:
- *
- * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
- * cause performance to decrease due to banking in the URB. Element
- * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
- */
- if (subop == 0x30 && alloc_size == 5)
- alloc_size = 6;
-
- /* in multiples of 8 */
- num_entries = (size / row_size / alloc_size) & ~7;
-
- switch (subop) {
- case 0x30: /* 3DSTATE_URB_VS */
- min_entries = 32;
-
- switch (dev->gen) {
- case ILO_GEN(7.5):
- max_entries = (dev->gt >= 2) ? 1644 : 640;
- break;
- case ILO_GEN(7):
- default:
- max_entries = (dev->gt == 2) ? 704 : 512;
- break;
- }
-
- assert(num_entries >= min_entries);
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case 0x31: /* 3DSTATE_URB_HS */
- max_entries = (dev->gt == 2) ? 64 : 32;
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case 0x32: /* 3DSTATE_URB_DS */
- if (num_entries)
- assert(num_entries >= 138);
- break;
- case 0x33: /* 3DSTATE_URB_GS */
- switch (dev->gen) {
- case ILO_GEN(7.5):
- max_entries = (dev->gt >= 2) ? 640 : 256;
- break;
- case ILO_GEN(7):
- default:
- max_entries = (dev->gt == 2) ? 320 : 192;
- break;
- }
-
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- default:
- break;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT |
- (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT |
- num_entries);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
- int offset, int size, int entry_size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
- int offset, int size, int entry_size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
- int offset, int size, int entry_size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
- int offset, int size, int entry_size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
-}
-
-static inline void
-gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
- int subop, int offset, int size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
- const uint8_t cmd_len = 2;
- int end;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- /* VS, HS, DS, GS, and PS variants */
- assert(subop >= 0x12 && subop <= 0x16);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 68:
- *
- * "(A table that says the maximum size of each constant buffer is
- * 16KB")
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 115:
- *
- * "The sum of the Constant Buffer Offset and the Constant Buffer Size
- * may not exceed the maximum value of the Constant Buffer Size."
- *
- * Thus, the valid range of buffer end is [0KB, 16KB].
- */
- end = (offset + size) / 1024;
- if (end > 16) {
- assert(!"invalid constant buffer end");
- end = 16;
- }
-
- /* the valid range of buffer offset is [0KB, 15KB] */
- offset = (offset + 1023) / 1024;
- if (offset > 15) {
- assert(!"invalid constant buffer offset");
- offset = 15;
- }
-
- if (offset > end) {
- assert(!size);
- offset = end;
- }
-
- /* the valid range of buffer size is [0KB, 15KB] */
- size = end - offset;
- if (size > 15) {
- assert(!"invalid constant buffer size");
- size = 15;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
- size);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
- const struct pipe_stream_output_info *so_info,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
- uint16_t cmd_len;
- int buffer_selects, num_entries, i;
- uint16_t so_decls[128];
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- buffer_selects = 0;
- num_entries = 0;
-
- if (so_info) {
- int buffer_offsets[PIPE_MAX_SO_BUFFERS];
-
- memset(buffer_offsets, 0, sizeof(buffer_offsets));
-
- for (i = 0; i < so_info->num_outputs; i++) {
- unsigned decl, buf, reg, mask;
-
- buf = so_info->output[i].output_buffer;
-
- /* pad with holes */
- assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
- while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
- int num_dwords;
-
- num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
- if (num_dwords > 4)
- num_dwords = 4;
-
- decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
- GEN7_SO_DECL_HOLE_FLAG |
- ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
- so_decls[num_entries++] = decl;
- buffer_offsets[buf] += num_dwords;
- }
-
- reg = so_info->output[i].register_index;
- mask = ((1 << so_info->output[i].num_components) - 1) <<
- so_info->output[i].start_component;
-
- decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
- reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
- mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
- so_decls[num_entries++] = decl;
- buffer_selects |= 1 << buf;
- buffer_offsets[buf] += so_info->output[i].num_components;
- }
- }
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 201:
- *
- * "Errata: All 128 decls for all four streams must be included
- * whenever this command is issued. The "Num Entries [n]" fields still
- * contain the actual numbers of valid decls."
- *
- * Also note that "DWord Length" has 9 bits for this command, and the type
- * of cmd_len is thus uint16_t.
- */
- cmd_len = 2 * 128 + 3;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
- 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
- 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
- buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT);
- ilo_cp_write(cp, 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
- 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
- 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
- num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT);
-
- for (i = 0; i < num_entries; i++) {
- ilo_cp_write(cp, so_decls[i]);
- ilo_cp_write(cp, 0);
- }
- for (; i < 128; i++) {
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- }
-
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
- int index, int base, int stride,
- const struct pipe_stream_output_target *so_target,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
- const uint8_t cmd_len = 4;
- struct ilo_buffer *buf;
- int end;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- if (!so_target || !so_target->buffer) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, index << GEN7_SO_BUF_DW1_INDEX__SHIFT);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
- return;
- }
-
- buf = ilo_buffer(so_target->buffer);
-
- /* DWord-aligned */
- assert(stride % 4 == 0 && base % 4 == 0);
- assert(so_target->buffer_offset % 4 == 0);
-
- stride &= ~3;
- base = (base + so_target->buffer_offset) & ~3;
- end = (base + so_target->buffer_size) & ~3;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
- stride);
- ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_end(cp);
-}
-
-static inline void
-gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib,
- bool rectlist,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
- const uint8_t cmd_len = 7;
- const int prim = (rectlist) ?
- GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
- const int vb_access = (info->indexed) ?
- GEN7_3DPRIM_DW1_ACCESS_RANDOM :
- GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL;
- const uint32_t vb_start = info->start +
- ((info->indexed) ? ib->draw_start_offset : 0);
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, vb_access | prim);
- ilo_cp_write(cp, info->count);
- ilo_cp_write(cp, vb_start);
- ilo_cp_write(cp, info->instance_count);
- ilo_cp_write(cp, info->start_instance);
- ilo_cp_write(cp, info->index_bias);
- ilo_cp_end(cp);
-}
-
-static inline uint32_t
-gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 64 / 4;
- const int state_len = 16 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 7, 7.5);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 270:
- *
- * "The viewport-specific state used by both the SF and CL units
- * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
- * of which contains the DWords described below. The start of each
- * element is spaced 16 DWords apart. The location of first element of
- * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
- * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->m00);
- dw[1] = fui(vp->m11);
- dw[2] = fui(vp->m22);
- dw[3] = fui(vp->m30);
- dw[4] = fui(vp->m31);
- dw[5] = fui(vp->m32);
- dw[6] = 0;
- dw[7] = 0;
- dw[8] = fui(vp->min_gbx);
- dw[9] = fui(vp->max_gbx);
- dw[10] = fui(vp->min_gby);
- dw[11] = fui(vp->max_gby);
- dw[12] = 0;
- dw[13] = 0;
- dw[14] = 0;
- dw[15] = 0;
-
- dw += 16;
- }
-
- return state_offset;
-}
+/**
+ * Commands that GEN7 GPE could emit.
+ */
+enum ilo_gpe_gen7_command {
+ ILO_GPE_GEN7_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */
+ ILO_GPE_GEN7_STATE_SIP, /* (0x0, 0x1, 0x02) */
+ ILO_GPE_GEN7_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */
+ ILO_GPE_GEN7_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */
+ ILO_GPE_GEN7_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */
+ ILO_GPE_GEN7_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */
+ ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */
+ ILO_GPE_GEN7_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */
+ ILO_GPE_GEN7_GPGPU_WALKER, /* (0x2, 0x1, 0x05) */
+ ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x0, 0x04) */
+ ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x0, 0x05) */
+ ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x0, 0x06) */
+ ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x0, 0x07) */
+ ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */
+ ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */
+ ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */
+ ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */
+ ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */
+ ILO_GPE_GEN7_3DSTATE_VS, /* (0x3, 0x0, 0x10) */
+ ILO_GPE_GEN7_3DSTATE_GS, /* (0x3, 0x0, 0x11) */
+ ILO_GPE_GEN7_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */
+ ILO_GPE_GEN7_3DSTATE_SF, /* (0x3, 0x0, 0x13) */
+ ILO_GPE_GEN7_3DSTATE_WM, /* (0x3, 0x0, 0x14) */
+ ILO_GPE_GEN7_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */
+ ILO_GPE_GEN7_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */
+ ILO_GPE_GEN7_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */
+ ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */
+ ILO_GPE_GEN7_3DSTATE_CONSTANT_HS, /* (0x3, 0x0, 0x19) */
+ ILO_GPE_GEN7_3DSTATE_CONSTANT_DS, /* (0x3, 0x0, 0x1a) */
+ ILO_GPE_GEN7_3DSTATE_HS, /* (0x3, 0x0, 0x1b) */
+ ILO_GPE_GEN7_3DSTATE_TE, /* (0x3, 0x0, 0x1c) */
+ ILO_GPE_GEN7_3DSTATE_DS, /* (0x3, 0x0, 0x1d) */
+ ILO_GPE_GEN7_3DSTATE_STREAMOUT, /* (0x3, 0x0, 0x1e) */
+ ILO_GPE_GEN7_3DSTATE_SBE, /* (0x3, 0x0, 0x1f) */
+ ILO_GPE_GEN7_3DSTATE_PS, /* (0x3, 0x0, 0x20) */
+ ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, /* (0x3, 0x0, 0x21) */
+ ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC, /* (0x3, 0x0, 0x23) */
+ ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS, /* (0x3, 0x0, 0x24) */
+ ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, /* (0x3, 0x0, 0x25) */
+ ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, /* (0x3, 0x0, 0x26) */
+ ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS, /* (0x3, 0x0, 0x27) */
+ ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS, /* (0x3, 0x0, 0x28) */
+ ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS, /* (0x3, 0x0, 0x29) */
+ ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS, /* (0x3, 0x0, 0x2a) */
+ ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, /* (0x3, 0x0, 0x2b) */
+ ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS, /* (0x3, 0x0, 0x2c) */
+ ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS, /* (0x3, 0x0, 0x2d) */
+ ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS, /* (0x3, 0x0, 0x2e) */
+ ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS, /* (0x3, 0x0, 0x2f) */
+ ILO_GPE_GEN7_3DSTATE_URB_VS, /* (0x3, 0x0, 0x30) */
+ ILO_GPE_GEN7_3DSTATE_URB_HS, /* (0x3, 0x0, 0x31) */
+ ILO_GPE_GEN7_3DSTATE_URB_DS, /* (0x3, 0x0, 0x32) */
+ ILO_GPE_GEN7_3DSTATE_URB_GS, /* (0x3, 0x0, 0x33) */
+ ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */
+ ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */
+ ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */
+ ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */
+ ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */
+ ILO_GPE_GEN7_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */
+ ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, /* (0x3, 0x1, 0x12) */
+ ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS, /* (0x3, 0x1, 0x13) */
+ ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS, /* (0x3, 0x1, 0x14) */
+ ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, /* (0x3, 0x1, 0x15) */
+ ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, /* (0x3, 0x1, 0x16) */
+ ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST, /* (0x3, 0x1, 0x17) */
+ ILO_GPE_GEN7_3DSTATE_SO_BUFFER, /* (0x3, 0x1, 0x18) */
+ ILO_GPE_GEN7_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */
+ ILO_GPE_GEN7_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */
+
+ ILO_GPE_GEN7_COMMAND_COUNT,
+};
+
+/**
+ * Indirect states that GEN7 GPE could emit.
+ */
+enum ilo_gpe_gen7_state {
+ ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA,
+ ILO_GPE_GEN7_SF_CLIP_VIEWPORT,
+ ILO_GPE_GEN7_CC_VIEWPORT,
+ ILO_GPE_GEN7_COLOR_CALC_STATE,
+ ILO_GPE_GEN7_BLEND_STATE,
+ ILO_GPE_GEN7_DEPTH_STENCIL_STATE,
+ ILO_GPE_GEN7_SCISSOR_RECT,
+ ILO_GPE_GEN7_BINDING_TABLE_STATE,
+ ILO_GPE_GEN7_SURFACE_STATE,
+ ILO_GPE_GEN7_SAMPLER_STATE,
+ ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE,
+ ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER,
+
+ ILO_GPE_GEN7_STATE_COUNT,
+};
+
+typedef ilo_gpe_gen6_STATE_BASE_ADDRESS ilo_gpe_gen7_STATE_BASE_ADDRESS;
+typedef ilo_gpe_gen6_STATE_SIP ilo_gpe_gen7_STATE_SIP;
+typedef ilo_gpe_gen6_3DSTATE_VF_STATISTICS ilo_gpe_gen7_3DSTATE_VF_STATISTICS;
+typedef ilo_gpe_gen6_PIPELINE_SELECT ilo_gpe_gen7_PIPELINE_SELECT;
+typedef ilo_gpe_gen6_MEDIA_VFE_STATE ilo_gpe_gen7_MEDIA_VFE_STATE;
+typedef ilo_gpe_gen6_MEDIA_CURBE_LOAD ilo_gpe_gen7_MEDIA_CURBE_LOAD;
+typedef ilo_gpe_gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD ilo_gpe_gen7_MEDIA_INTERFACE_DESCRIPTOR_LOAD;
+typedef ilo_gpe_gen6_MEDIA_STATE_FLUSH ilo_gpe_gen7_MEDIA_STATE_FLUSH;
+
+typedef void
+(*ilo_gpe_gen7_GPGPU_WALKER)(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp);
+
+typedef ilo_gpe_gen6_3DSTATE_CLEAR_PARAMS ilo_gpe_gen7_3DSTATE_CLEAR_PARAMS;
+typedef ilo_gpe_gen6_3DSTATE_DEPTH_BUFFER ilo_gpe_gen7_3DSTATE_DEPTH_BUFFER;
+typedef ilo_gpe_gen6_3DSTATE_STENCIL_BUFFER ilo_gpe_gen7_3DSTATE_STENCIL_BUFFER;
+typedef ilo_gpe_gen6_3DSTATE_HIER_DEPTH_BUFFER ilo_gpe_gen7_3DSTATE_HIER_DEPTH_BUFFER;
+typedef ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS ilo_gpe_gen7_3DSTATE_VERTEX_BUFFERS;
+typedef ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS ilo_gpe_gen7_3DSTATE_VERTEX_ELEMENTS;
+typedef ilo_gpe_gen6_3DSTATE_INDEX_BUFFER ilo_gpe_gen7_3DSTATE_INDEX_BUFFER;
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_CC_STATE_POINTERS)(const struct ilo_dev_info *dev,
+ uint32_t color_calc_state,
+ struct ilo_cp *cp);
+
+typedef ilo_gpe_gen6_3DSTATE_SCISSOR_STATE_POINTERS ilo_gpe_gen7_3DSTATE_SCISSOR_STATE_POINTERS;
+typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS;
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ int num_samplers,
+ struct ilo_cp *cp);
+
+typedef ilo_gpe_gen6_3DSTATE_CLIP ilo_gpe_gen7_3DSTATE_CLIP;
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SF)(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct pipe_surface *zs_surf,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_WM)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool cc_may_kill,
+ struct ilo_cp *cp);
+
+typedef ilo_gpe_gen6_3DSTATE_CONSTANT_VS ilo_gpe_gen7_3DSTATE_CONSTANT_VS;
+typedef ilo_gpe_gen6_3DSTATE_CONSTANT_GS ilo_gpe_gen7_3DSTATE_CONSTANT_GS;
+typedef ilo_gpe_gen6_3DSTATE_CONSTANT_PS ilo_gpe_gen7_3DSTATE_CONSTANT_PS;
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SAMPLE_MASK)(const struct ilo_dev_info *dev,
+ unsigned sample_mask,
+ int num_samples,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_CONSTANT_HS)(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_CONSTANT_DS)(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_HS)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *hs,
+ int num_samplers,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_TE)(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_DS)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *ds,
+ int num_samplers,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_STREAMOUT)(const struct ilo_dev_info *dev,
+ unsigned buffer_mask,
+ int vertex_attrib_count,
+ bool rasterizer_discard,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SBE)(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_PS)(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ int num_samplers, bool dual_blend,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP)(const struct ilo_dev_info *dev,
+ uint32_t viewport,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC)(const struct ilo_dev_info *dev,
+ uint32_t viewport,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_BLEND_STATE_POINTERS)(const struct ilo_dev_info *dev,
+ uint32_t blend,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS)(const struct ilo_dev_info *dev,
+ uint32_t depth_stencil,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_VS)(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_HS)(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_DS)(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_GS)(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_PS)(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS)(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_HS)(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_DS)(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS)(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS)(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_URB_VS)(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_URB_HS)(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_URB_DS)(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_URB_GS)(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp);
+
+typedef ilo_gpe_gen6_3DSTATE_DRAWING_RECTANGLE ilo_gpe_gen7_3DSTATE_DRAWING_RECTANGLE;
+typedef ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_OFFSET ilo_gpe_gen7_3DSTATE_POLY_STIPPLE_OFFSET;
+typedef ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_PATTERN ilo_gpe_gen7_3DSTATE_POLY_STIPPLE_PATTERN;
+typedef ilo_gpe_gen6_3DSTATE_LINE_STIPPLE ilo_gpe_gen7_3DSTATE_LINE_STIPPLE;
+typedef ilo_gpe_gen6_3DSTATE_AA_LINE_PARAMETERS ilo_gpe_gen7_3DSTATE_AA_LINE_PARAMETERS;
+typedef ilo_gpe_gen6_3DSTATE_MULTISAMPLE ilo_gpe_gen7_3DSTATE_MULTISAMPLE;
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS)(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS)(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS)(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS)(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS)(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SO_DECL_LIST)(const struct ilo_dev_info *dev,
+ const struct pipe_stream_output_info *so_info,
+ struct ilo_cp *cp);
+
+typedef void
+(*ilo_gpe_gen7_3DSTATE_SO_BUFFER)(const struct ilo_dev_info *dev,
+ int index, int base, int stride,
+ const struct pipe_stream_output_target *so_target,
+ struct ilo_cp *cp);
+
+typedef ilo_gpe_gen6_PIPE_CONTROL ilo_gpe_gen7_PIPE_CONTROL;
+typedef ilo_gpe_gen6_3DPRIMITIVE ilo_gpe_gen7_3DPRIMITIVE;
+typedef ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA ilo_gpe_gen7_INTERFACE_DESCRIPTOR_DATA;
+
+typedef uint32_t
+(*ilo_gpe_gen7_SF_CLIP_VIEWPORT)(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp);
+
+typedef ilo_gpe_gen6_CC_VIEWPORT ilo_gpe_gen7_CC_VIEWPORT;
+typedef ilo_gpe_gen6_COLOR_CALC_STATE ilo_gpe_gen7_COLOR_CALC_STATE;
+typedef ilo_gpe_gen6_BLEND_STATE ilo_gpe_gen7_BLEND_STATE;
+typedef ilo_gpe_gen6_DEPTH_STENCIL_STATE ilo_gpe_gen7_DEPTH_STENCIL_STATE;
+typedef ilo_gpe_gen6_SCISSOR_RECT ilo_gpe_gen7_SCISSOR_RECT;
+typedef ilo_gpe_gen6_BINDING_TABLE_STATE ilo_gpe_gen7_BINDING_TABLE_STATE;
+typedef ilo_gpe_gen6_SURFACE_STATE ilo_gpe_gen7_SURFACE_STATE;
+typedef ilo_gpe_gen6_SAMPLER_STATE ilo_gpe_gen7_SAMPLER_STATE;
+typedef ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE ilo_gpe_gen7_SAMPLER_BORDER_COLOR_STATE;
+typedef ilo_gpe_gen6_push_constant_buffer ilo_gpe_gen7_push_constant_buffer;
+
+/**
+ * GEN7 graphics processing engine
+ *
+ * \see ilo_gpe_gen6
+ */
+struct ilo_gpe_gen7 {
+ int (*estimate_command_size)(const struct ilo_dev_info *dev,
+ enum ilo_gpe_gen7_command cmd,
+ int arg);
+
+ int (*estimate_state_size)(const struct ilo_dev_info *dev,
+ enum ilo_gpe_gen7_state state,
+ int arg);
+
+#define GEN7_EMIT(name) ilo_gpe_gen7_ ## name emit_ ## name
+ GEN7_EMIT(STATE_BASE_ADDRESS);
+ GEN7_EMIT(STATE_SIP);
+ GEN7_EMIT(3DSTATE_VF_STATISTICS);
+ GEN7_EMIT(PIPELINE_SELECT);
+ GEN7_EMIT(MEDIA_VFE_STATE);
+ GEN7_EMIT(MEDIA_CURBE_LOAD);
+ GEN7_EMIT(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
+ GEN7_EMIT(MEDIA_STATE_FLUSH);
+ GEN7_EMIT(GPGPU_WALKER);
+ GEN7_EMIT(3DSTATE_CLEAR_PARAMS);
+ GEN7_EMIT(3DSTATE_DEPTH_BUFFER);
+ GEN7_EMIT(3DSTATE_STENCIL_BUFFER);
+ GEN7_EMIT(3DSTATE_HIER_DEPTH_BUFFER);
+ GEN7_EMIT(3DSTATE_VERTEX_BUFFERS);
+ GEN7_EMIT(3DSTATE_VERTEX_ELEMENTS);
+ GEN7_EMIT(3DSTATE_INDEX_BUFFER);
+ GEN7_EMIT(3DSTATE_CC_STATE_POINTERS);
+ GEN7_EMIT(3DSTATE_SCISSOR_STATE_POINTERS);
+ GEN7_EMIT(3DSTATE_VS);
+ GEN7_EMIT(3DSTATE_GS);
+ GEN7_EMIT(3DSTATE_CLIP);
+ GEN7_EMIT(3DSTATE_SF);
+ GEN7_EMIT(3DSTATE_WM);
+ GEN7_EMIT(3DSTATE_CONSTANT_VS);
+ GEN7_EMIT(3DSTATE_CONSTANT_GS);
+ GEN7_EMIT(3DSTATE_CONSTANT_PS);
+ GEN7_EMIT(3DSTATE_SAMPLE_MASK);
+ GEN7_EMIT(3DSTATE_CONSTANT_HS);
+ GEN7_EMIT(3DSTATE_CONSTANT_DS);
+ GEN7_EMIT(3DSTATE_HS);
+ GEN7_EMIT(3DSTATE_TE);
+ GEN7_EMIT(3DSTATE_DS);
+ GEN7_EMIT(3DSTATE_STREAMOUT);
+ GEN7_EMIT(3DSTATE_SBE);
+ GEN7_EMIT(3DSTATE_PS);
+ GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
+ GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
+ GEN7_EMIT(3DSTATE_BLEND_STATE_POINTERS);
+ GEN7_EMIT(3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_VS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_HS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_DS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_GS);
+ GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_PS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_VS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_HS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_DS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_GS);
+ GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_PS);
+ GEN7_EMIT(3DSTATE_URB_VS);
+ GEN7_EMIT(3DSTATE_URB_HS);
+ GEN7_EMIT(3DSTATE_URB_DS);
+ GEN7_EMIT(3DSTATE_URB_GS);
+ GEN7_EMIT(3DSTATE_DRAWING_RECTANGLE);
+ GEN7_EMIT(3DSTATE_POLY_STIPPLE_OFFSET);
+ GEN7_EMIT(3DSTATE_POLY_STIPPLE_PATTERN);
+ GEN7_EMIT(3DSTATE_LINE_STIPPLE);
+ GEN7_EMIT(3DSTATE_AA_LINE_PARAMETERS);
+ GEN7_EMIT(3DSTATE_MULTISAMPLE);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
+ GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
+ GEN7_EMIT(3DSTATE_SO_DECL_LIST);
+ GEN7_EMIT(3DSTATE_SO_BUFFER);
+ GEN7_EMIT(PIPE_CONTROL);
+ GEN7_EMIT(3DPRIMITIVE);
+ GEN7_EMIT(INTERFACE_DESCRIPTOR_DATA);
+ GEN7_EMIT(SF_CLIP_VIEWPORT);
+ GEN7_EMIT(CC_VIEWPORT);
+ GEN7_EMIT(COLOR_CALC_STATE);
+ GEN7_EMIT(BLEND_STATE);
+ GEN7_EMIT(DEPTH_STENCIL_STATE);
+ GEN7_EMIT(SCISSOR_RECT);
+ GEN7_EMIT(BINDING_TABLE_STATE);
+ GEN7_EMIT(SURFACE_STATE);
+ GEN7_EMIT(SAMPLER_STATE);
+ GEN7_EMIT(SAMPLER_BORDER_COLOR_STATE);
+ GEN7_EMIT(push_constant_buffer);
+#undef GEN7_EMIT
+};
+
+const struct ilo_gpe_gen7 *
+ilo_gpe_gen7_get(void);
#endif /* ILO_GPE_GEN7_H */
diff --git a/dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h b/dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h
index 04c8854d2..fedd78c41 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h
+++ b/dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h
@@ -1,6 +1,6 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
@@ -26,7 +26,7 @@
**********************************************************************/
/*
* Authors:
- * Keith Whitwell <keithw@vmware.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
*/
#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
@@ -1723,4 +1723,6 @@ enum brw_wm_barycentric_interp_mode {
*/
#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27)
+#include "intel_chipset.h"
+
#endif
diff --git a/dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h b/dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h
index 07f8d3aca..c322edfbd 100644
--- a/dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h
+++ b/dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h
@@ -1,6 +1,6 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
@@ -26,7 +26,7 @@
**********************************************************************/
/*
* Authors:
- * Keith Whitwell <keithw@vmware.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
*/