diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2013-09-05 13:11:20 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2013-09-05 13:11:20 +0000 |
commit | a0d72f02a5edb8dd5aa760de1e891e5e8b5a379c (patch) | |
tree | 240b6a20bd3b74779bf2eb59fb6a5879c415a730 /dist | |
parent | d8ca9df6585400b918b964c60d84c35599edb86a (diff) |
Import Mesa 9.2.0
Diffstat (limited to 'dist')
21 files changed, 7318 insertions, 7859 deletions
diff --git a/dist/Mesa/src/gallium/drivers/Makefile.am b/dist/Mesa/src/gallium/drivers/Makefile.am index f8baa3cf9..22f54b7ad 100644 --- a/dist/Mesa/src/gallium/drivers/Makefile.am +++ b/dist/Mesa/src/gallium/drivers/Makefile.am @@ -1,7 +1,46 @@ AUTOMAKE_OPTIONS = subdir-objects +AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + $(DEFINES) -SUBDIRS = . galahad identity noop trace rbug +AM_CFLAGS = $(VISIBILITY_CFLAGS) + +noinst_LTLIBRARIES = + +SUBDIRS = . trace rbug + +################################################################################ + +noinst_LTLIBRARIES += galahad/libgalahad.la + +galahad_libgalahad_la_SOURCES = \ + galahad/glhd_objects.c \ + galahad/glhd_context.c \ + galahad/glhd_screen.c + +################################################################################ + +noinst_LTLIBRARIES += identity/libidentity.la + +identity_libidentity_la_SOURCES = \ + identity/id_objects.c \ + identity/id_context.c \ + identity/id_screen.c + +################################################################################ + +# Meta-driver which combines whichever software rasterizers have been +# built into a single convenience library. + +noinst_LTLIBRARIES += noop/libnoop.la + +noop_libnoop_la_SOURCES = \ + noop/noop_pipe.c \ + noop/noop_state.c ################################################################################ @@ -47,7 +86,7 @@ endif if HAVE_GALLIUM_NOUVEAU -SUBDIRS += nouveau +SUBDIRS += nouveau nv30 nv50 nvc0 endif diff --git a/dist/Mesa/src/gallium/drivers/Makefile.in b/dist/Mesa/src/gallium/drivers/Makefile.in index b4cb7b57c..654fd7dba 100644 --- a/dist/Mesa/src/gallium/drivers/Makefile.in +++ b/dist/Mesa/src/gallium/drivers/Makefile.in @@ -1,7 +1,7 @@ -# Makefile.in generated by automake 1.14.1 from Makefile.am. +# Makefile.in generated by automake 1.12.2 from Makefile.am. # @configure_input@ -# Copyright (C) 1994-2013 Free Software Foundation, Inc. +# Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -13,52 +13,25 @@ # PARTICULAR PURPOSE. @SET_MAKE@ + VPATH = @srcdir@ -am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' -am__make_running_with_option = \ - case $${target_option-} in \ - ?) ;; \ - *) echo "am__make_running_with_option: internal error: invalid" \ - "target option '$${target_option-}' specified" >&2; \ - exit 1;; \ - esac; \ - has_opt=no; \ - sane_makeflags=$$MAKEFLAGS; \ - if $(am__is_gnu_make); then \ - sane_makeflags=$$MFLAGS; \ - else \ +am__make_dryrun = \ + { \ + am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ - bs=\\; \ - sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ - | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ - esac; \ - fi; \ - skip_next=no; \ - strip_trailopt () \ - { \ - flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ - }; \ - for flg in $$sane_makeflags; do \ - test $$skip_next = yes && { skip_next=no; continue; }; \ - case $$flg in \ - *=*|--*) continue;; \ - -*I) strip_trailopt 'I'; skip_next=yes;; \ - -*I?*) strip_trailopt 'I';; \ - -*O) strip_trailopt 'O'; skip_next=yes;; \ - -*O?*) strip_trailopt 'O';; \ - -*l) strip_trailopt 'l'; skip_next=yes;; \ - -*l?*) strip_trailopt 'l';; \ - -[dEDm]) skip_next=yes;; \ - -[JT]) skip_next=yes;; \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ esac; \ - case $$flg in \ - *$$target_option*) has_opt=yes; break;; \ - esac; \ - done; \ - test $$has_opt = yes -am__make_dryrun = (target_option=n; $(am__make_running_with_option)) -am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) + test $$am__dry = yes; \ + } pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ @@ -93,7 +66,7 @@ target_triplet = @target@ @HAVE_GALLIUM_ILO_TRUE@am__append_5 = ilo ################################################################################ -@HAVE_GALLIUM_NOUVEAU_TRUE@am__append_6 = nouveau +@HAVE_GALLIUM_NOUVEAU_TRUE@am__append_6 = nouveau nv30 nv50 nvc0 ################################################################################ @HAVE_GALLIUM_SVGA_TRUE@am__append_7 = svga @@ -113,21 +86,41 @@ target_triplet = @target@ ################################################################################ @NEED_GALLIUM_LLVMPIPE_DRIVER_TRUE@am__append_12 = llvmpipe subdir = src/gallium/drivers -DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ + $(top_srcdir)/bin/depcomp ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \ - $(top_srcdir)/m4/ax_gcc_builtin.m4 \ - $(top_srcdir)/m4/ax_prog_bison.m4 \ +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \ + $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \ + $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \ $(top_srcdir)/m4/ax_prog_flex.m4 \ - $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \ - $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ - $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ - $(top_srcdir)/VERSION $(top_srcdir)/configure.ac + $(top_srcdir)/m4/ax_pthread.m4 \ + $(top_srcdir)/m4/ax_python_module.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +galahad_libgalahad_la_LIBADD = +am__dirstamp = $(am__leading_dot)dirstamp +am_galahad_libgalahad_la_OBJECTS = galahad/glhd_objects.lo \ + galahad/glhd_context.lo galahad/glhd_screen.lo +galahad_libgalahad_la_OBJECTS = $(am_galahad_libgalahad_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +identity_libidentity_la_LIBADD = +am_identity_libidentity_la_OBJECTS = identity/id_objects.lo \ + identity/id_context.lo identity/id_screen.lo +identity_libidentity_la_OBJECTS = \ + $(am_identity_libidentity_la_OBJECTS) +noop_libnoop_la_LIBADD = +am_noop_libnoop_la_OBJECTS = noop/noop_pipe.lo noop/noop_state.lo +noop_libnoop_la_OBJECTS = $(am_noop_libnoop_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false @@ -140,16 +133,39 @@ AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = -SOURCES = -DIST_SOURCES = -RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ - ctags-recursive dvi-recursive html-recursive info-recursive \ - install-data-recursive install-dvi-recursive \ - install-exec-recursive install-html-recursive \ - install-info-recursive install-pdf-recursive \ - install-ps-recursive install-recursive installcheck-recursive \ - installdirs-recursive pdf-recursive ps-recursive \ - tags-recursive uninstall-recursive +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/bin/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(galahad_libgalahad_la_SOURCES) \ + $(identity_libidentity_la_SOURCES) $(noop_libnoop_la_SOURCES) +DIST_SOURCES = $(galahad_libgalahad_la_SOURCES) \ + $(identity_libidentity_la_SOURCES) $(noop_libnoop_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -157,33 +173,13 @@ am__can_run_installinfo = \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive -am__recursive_targets = \ - $(RECURSIVE_TARGETS) \ - $(RECURSIVE_CLEAN_TARGETS) \ - $(am__extra_recursive_targets) -AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ distdir -am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) -# Read a list of newline-separated strings from the standard input, -# and print each of them once, without duplicates. Input order is -# *not* preserved. -am__uniquify_input = $(AWK) '\ - BEGIN { nonempty = 0; } \ - { items[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in items) print i; }; } \ -' -# Make sure the list of sources is unique. This is necessary because, -# e.g., the same source file might be shared among _SOURCES variables -# for different programs/libraries. -am__define_uniq_tagged_files = \ - list='$(am__tagged_files)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | $(am__uniquify_input)` ETAGS = etags CTAGS = ctags -DIST_SUBDIRS = . galahad identity noop trace rbug radeon freedreno \ - i915 ilo nouveau svga r300 r600 radeonsi softpipe llvmpipe +DIST_SUBDIRS = . trace rbug radeon freedreno i915 ilo nouveau nv30 \ + nv50 nvc0 svga r300 r600 radeonsi softpipe llvmpipe DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ @@ -218,30 +214,39 @@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ +BUILD_EXEEXT = @BUILD_EXEEXT@ +BUILD_OBJEXT = @BUILD_OBJEXT@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ +CC_FOR_BUILD = @CC_FOR_BUILD@ CFLAGS = @CFLAGS@ +CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@ CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ CLOCK_LIB = @CLOCK_LIB@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ +CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@ +CPP_FOR_BUILD = @CPP_FOR_BUILD@ CXX = @CXX@ CXXCPP = @CXXCPP@ +CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@ +CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ +CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@ +CXX_FOR_BUILD = @CXX_FOR_BUILD@ CYGPATH_W = @CYGPATH_W@ DEFINES = @DEFINES@ +DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DLOPEN_LIBS = @DLOPEN_LIBS@ DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ -DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@ -DRI3PROTO_LIBS = @DRI3PROTO_LIBS@ DRIGL_CFLAGS = @DRIGL_CFLAGS@ DRIGL_LIBS = @DRIGL_LIBS@ DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ @@ -257,33 +262,41 @@ EGL_CFLAGS = @EGL_CFLAGS@ EGL_CLIENT_APIS = @EGL_CLIENT_APIS@ EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@ EGL_LIB_DEPS = @EGL_LIB_DEPS@ +EGL_LIB_GLOB = @EGL_LIB_GLOB@ +EGL_LIB_NAME = @EGL_LIB_NAME@ EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ +EGL_PLATFORMS = @EGL_PLATFORMS@ EGREP = @EGREP@ ELF_LIB = @ELF_LIB@ EXEEXT = @EXEEXT@ -EXPAT_CFLAGS = @EXPAT_CFLAGS@ -EXPAT_LIBS = @EXPAT_LIBS@ +EXPAT_INCLUDES = @EXPAT_INCLUDES@ FGREP = @FGREP@ FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ FREEDRENO_LIBS = @FREEDRENO_LIBS@ -GALLIUM_PIPE_LOADER_CLIENT_DEFINES = @GALLIUM_PIPE_LOADER_CLIENT_DEFINES@ -GALLIUM_PIPE_LOADER_CLIENT_LIBS = @GALLIUM_PIPE_LOADER_CLIENT_LIBS@ +GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@ GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@ GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@ GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@ GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ -GC_SECTIONS = @GC_SECTIONS@ +GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@ +GLAPI_LIB_NAME = @GLAPI_LIB_NAME@ GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ +GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@ +GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@ GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ +GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@ +GLESv2_LIB_NAME = @GLESv2_LIB_NAME@ GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ GLPROTO_LIBS = @GLPROTO_LIBS@ GLX_TLS = @GLX_TLS@ GL_LIB = @GL_LIB@ GL_LIB_DEPS = @GL_LIB_DEPS@ +GL_LIB_GLOB = @GL_LIB_GLOB@ +GL_LIB_NAME = @GL_LIB_NAME@ GL_PC_CFLAGS = @GL_PC_CFLAGS@ GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ @@ -300,7 +313,7 @@ INTEL_CFLAGS = @INTEL_CFLAGS@ INTEL_LIBS = @INTEL_LIBS@ LD = @LD@ LDFLAGS = @LDFLAGS@ -LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ +LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@ LEX = @LEX@ LEXLIB = @LEXLIB@ LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ @@ -308,13 +321,16 @@ LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ LIBDRM_LIBS = @LIBDRM_LIBS@ +LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@ +LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@ +LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@ +LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@ LIBUDEV_LIBS = @LIBUDEV_LIBS@ LIB_DIR = @LIB_DIR@ -LIB_EXT = @LIB_EXT@ LIPO = @LIPO@ LLVM_BINDIR = @LLVM_BINDIR@ LLVM_CFLAGS = @LLVM_CFLAGS@ @@ -339,13 +355,10 @@ NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ NOUVEAU_LIBS = @NOUVEAU_LIBS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ -OMX_CFLAGS = @OMX_CFLAGS@ -OMX_LIBS = @OMX_LIBS@ -OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@ -OPENCL_LIBNAME = @OPENCL_LIBNAME@ OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@ OSMESA_LIB = @OSMESA_LIB@ OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ +OSMESA_LIB_NAME = @OSMESA_LIB_NAME@ OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@ OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ OSMESA_PC_REQ = @OSMESA_PC_REQ@ @@ -365,8 +378,6 @@ PKG_CONFIG = @PKG_CONFIG@ PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ POSIX_SHELL = @POSIX_SHELL@ -PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@ -PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@ PTHREAD_CC = @PTHREAD_CC@ PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ PTHREAD_LIBS = @PTHREAD_LIBS@ @@ -375,7 +386,6 @@ RADEON_CFLAGS = @RADEON_CFLAGS@ RADEON_LIBS = @RADEON_LIBS@ RANLIB = @RANLIB@ SED = @SED@ -SELINUX_CFLAGS = @SELINUX_CFLAGS@ SELINUX_LIBS = @SELINUX_LIBS@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ @@ -387,6 +397,8 @@ VDPAU_MAJOR = @VDPAU_MAJOR@ VDPAU_MINOR = @VDPAU_MINOR@ VERSION = @VERSION@ VG_LIB_DEPS = @VG_LIB_DEPS@ +VG_LIB_GLOB = @VG_LIB_GLOB@ +VG_LIB_NAME = @VG_LIB_NAME@ VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@ VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ @@ -400,10 +412,15 @@ XA_TINY = @XA_TINY@ XA_VERSION = @XA_VERSION@ XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ +XEXT_CFLAGS = @XEXT_CFLAGS@ +XEXT_LIBS = @XEXT_LIBS@ XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ XLIBGL_LIBS = @XLIBGL_LIBS@ +XORG_CFLAGS = @XORG_CFLAGS@ +XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@ +XORG_LIBS = @XORG_LIBS@ XVMC_CFLAGS = @XVMC_CFLAGS@ XVMC_LIBS = @XVMC_LIBS@ XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ @@ -417,7 +434,9 @@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ +ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@ ac_ct_CXX = @ac_ct_CXX@ +ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ @@ -470,14 +489,47 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = subdir-objects -SUBDIRS = . galahad identity noop trace rbug $(am__append_1) \ - $(am__append_2) $(am__append_3) $(am__append_4) \ - $(am__append_5) $(am__append_6) $(am__append_7) \ - $(am__append_8) $(am__append_9) $(am__append_10) \ - $(am__append_11) $(am__append_12) +AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + $(DEFINES) + +AM_CFLAGS = $(VISIBILITY_CFLAGS) + +################################################################################ + +################################################################################ + +################################################################################ + +# Meta-driver which combines whichever software rasterizers have been +# built into a single convenience library. +noinst_LTLIBRARIES = galahad/libgalahad.la identity/libidentity.la \ + noop/libnoop.la +SUBDIRS = . trace rbug $(am__append_1) $(am__append_2) $(am__append_3) \ + $(am__append_4) $(am__append_5) $(am__append_6) \ + $(am__append_7) $(am__append_8) $(am__append_9) \ + $(am__append_10) $(am__append_11) $(am__append_12) +galahad_libgalahad_la_SOURCES = \ + galahad/glhd_objects.c \ + galahad/glhd_context.c \ + galahad/glhd_screen.c + +identity_libidentity_la_SOURCES = \ + identity/id_objects.c \ + identity/id_context.c \ + identity/id_screen.c + +noop_libnoop_la_SOURCES = \ + noop/noop_pipe.c \ + noop/noop_state.c + all: all-recursive .SUFFIXES: +.SUFFIXES: .c .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ @@ -509,11 +561,109 @@ $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +galahad/$(am__dirstamp): + @$(MKDIR_P) galahad + @: > galahad/$(am__dirstamp) +galahad/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) galahad/$(DEPDIR) + @: > galahad/$(DEPDIR)/$(am__dirstamp) +galahad/glhd_objects.lo: galahad/$(am__dirstamp) \ + galahad/$(DEPDIR)/$(am__dirstamp) +galahad/glhd_context.lo: galahad/$(am__dirstamp) \ + galahad/$(DEPDIR)/$(am__dirstamp) +galahad/glhd_screen.lo: galahad/$(am__dirstamp) \ + galahad/$(DEPDIR)/$(am__dirstamp) +galahad/libgalahad.la: $(galahad_libgalahad_la_OBJECTS) $(galahad_libgalahad_la_DEPENDENCIES) $(EXTRA_galahad_libgalahad_la_DEPENDENCIES) galahad/$(am__dirstamp) + $(AM_V_CCLD)$(LINK) $(galahad_libgalahad_la_OBJECTS) $(galahad_libgalahad_la_LIBADD) $(LIBS) +identity/$(am__dirstamp): + @$(MKDIR_P) identity + @: > identity/$(am__dirstamp) +identity/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) identity/$(DEPDIR) + @: > identity/$(DEPDIR)/$(am__dirstamp) +identity/id_objects.lo: identity/$(am__dirstamp) \ + identity/$(DEPDIR)/$(am__dirstamp) +identity/id_context.lo: identity/$(am__dirstamp) \ + identity/$(DEPDIR)/$(am__dirstamp) +identity/id_screen.lo: identity/$(am__dirstamp) \ + identity/$(DEPDIR)/$(am__dirstamp) +identity/libidentity.la: $(identity_libidentity_la_OBJECTS) $(identity_libidentity_la_DEPENDENCIES) $(EXTRA_identity_libidentity_la_DEPENDENCIES) identity/$(am__dirstamp) + $(AM_V_CCLD)$(LINK) $(identity_libidentity_la_OBJECTS) $(identity_libidentity_la_LIBADD) $(LIBS) +noop/$(am__dirstamp): + @$(MKDIR_P) noop + @: > noop/$(am__dirstamp) +noop/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) noop/$(DEPDIR) + @: > noop/$(DEPDIR)/$(am__dirstamp) +noop/noop_pipe.lo: noop/$(am__dirstamp) noop/$(DEPDIR)/$(am__dirstamp) +noop/noop_state.lo: noop/$(am__dirstamp) \ + noop/$(DEPDIR)/$(am__dirstamp) +noop/libnoop.la: $(noop_libnoop_la_OBJECTS) $(noop_libnoop_la_DEPENDENCIES) $(EXTRA_noop_libnoop_la_DEPENDENCIES) noop/$(am__dirstamp) + $(AM_V_CCLD)$(LINK) $(noop_libnoop_la_OBJECTS) $(noop_libnoop_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f galahad/*.$(OBJEXT) + -rm -f galahad/*.lo + -rm -f identity/*.$(OBJEXT) + -rm -f identity/*.lo + -rm -f noop/*.$(OBJEXT) + -rm -f noop/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@galahad/$(DEPDIR)/glhd_context.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@galahad/$(DEPDIR)/glhd_objects.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@galahad/$(DEPDIR)/glhd_screen.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@identity/$(DEPDIR)/id_context.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@identity/$(DEPDIR)/id_objects.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@identity/$(DEPDIR)/id_screen.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@noop/$(DEPDIR)/noop_pipe.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@noop/$(DEPDIR)/noop_state.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs + -rm -rf galahad/.libs galahad/_libs + -rm -rf identity/.libs identity/_libs + -rm -rf noop/.libs noop/_libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. @@ -521,13 +671,14 @@ clean-libtool: # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. -$(am__recursive_targets): - @fail=; \ - if $(am__make_keepgoing); then \ - failcom='fail=yes'; \ - else \ - failcom='exit 1'; \ - fi; \ +$(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ @@ -548,13 +699,31 @@ $(am__recursive_targets): if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done +cscopelist-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) cscopelist); \ + done -ID: $(am__tagged_files) - $(am__define_uniq_tagged_files); mkid -fID $$unique -tags: tags-recursive -TAGS: tags - -tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ @@ -570,7 +739,12 @@ tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ - $(am__define_uniq_tagged_files); \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ @@ -582,11 +756,15 @@ tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $$unique; \ fi; \ fi -ctags: ctags-recursive - -CTAGS: ctags -ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) - $(am__define_uniq_tagged_files); \ +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique @@ -595,10 +773,9 @@ GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" -cscopelist: cscopelist-recursive -cscopelist-am: $(am__tagged_files) - list='$(am__tagged_files)'; \ +cscopelist: cscopelist-recursive $(HEADERS) $(SOURCES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ @@ -671,7 +848,7 @@ distdir: $(DISTFILES) done check-am: all-am check: check-recursive -all-am: Makefile +all-am: Makefile $(LTLIBRARIES) installdirs: installdirs-recursive installdirs-am: install: install-recursive @@ -700,17 +877,26 @@ clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f galahad/$(DEPDIR)/$(am__dirstamp) + -rm -f galahad/$(am__dirstamp) + -rm -f identity/$(DEPDIR)/$(am__dirstamp) + -rm -f identity/$(am__dirstamp) + -rm -f noop/$(DEPDIR)/$(am__dirstamp) + -rm -f noop/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive -clean-am: clean-generic clean-libtool mostlyclean-am +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am distclean: distclean-recursive + -rm -rf galahad/$(DEPDIR) identity/$(DEPDIR) noop/$(DEPDIR) -rm -f Makefile -distclean-am: clean-am distclean-generic distclean-tags +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags dvi: dvi-recursive @@ -753,12 +939,14 @@ install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive + -rm -rf galahad/$(DEPDIR) identity/$(DEPDIR) noop/$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive -mostlyclean-am: mostlyclean-generic mostlyclean-libtool +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf: pdf-recursive @@ -770,20 +958,24 @@ ps-am: uninstall-am: -.MAKE: $(am__recursive_targets) install-am install-strip - -.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ - check-am clean clean-generic clean-libtool cscopelist-am ctags \ - ctags-am distclean distclean-generic distclean-libtool \ - distclean-tags distdir dvi dvi-am html html-am info info-am \ - install install-am install-data install-data-am install-dvi \ - install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-info install-info-am install-man \ - install-pdf install-pdf-am install-ps install-ps-am \ - install-strip installcheck installcheck-am installdirs \ - installdirs-am maintainer-clean maintainer-clean-generic \ - mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ - ps ps-am tags tags-am uninstall uninstall-am +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \ + cscopelist-recursive ctags-recursive install-am install-strip \ + tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + clean-noinstLTLIBRARIES cscopelist cscopelist-recursive ctags \ + ctags-recursive distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-recursive uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. diff --git a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c index 8c3704bf6..4db095f56 100644 --- a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c +++ b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c @@ -107,21 +107,189 @@ static void print_reg(reg_t reg, bool full, bool r, bool c, bool im, } } +/* Tracking for registers used, read-before-write (input), and + * write-after-read (output.. but not 100%).. + */ + +#define MAX_REG 128 + +typedef struct { + uint8_t full[MAX_REG/8]; + uint8_t half[MAX_REG/8]; +} regmask_t; + +static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val) +{ + unsigned i = num / 8; + unsigned j = num % 8; + assert(num < MAX_REG); + if (full) { + regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j); + } else { + regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j); + } +} + +static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full) +{ + unsigned i = num / 8; + unsigned j = num % 8; + assert(num < MAX_REG); + if (full) { + return (regmask->full[i] >> j) & 0x1; + } else { + return (regmask->half[i] >> j) & 0x1; + } +} + +static unsigned regidx(reg_t reg) +{ + return (4 * reg.num) + reg.comp; +} + +static struct { + regmask_t used; + regmask_t rbw; /* read before write */ + regmask_t war; /* write after read */ + regmask_t cnst; /* used consts */ +} regs; + +static void print_regs(regmask_t *regmask, bool full) +{ + int num, max = 0, cnt = 0; + int first, last; + + void print_sequence(void) + { + if (first != MAX_REG) { + if (first == last) { + printf(" %d", first); + } else { + printf(" %d-%d", first, last); + } + } + } + + first = last = MAX_REG; + + for (num = 0; num < MAX_REG; num++) { + if (regmask_get(regmask, num, full)) { + if (num != (last + 1)) { + print_sequence(); + first = num; + } + last = num; + max = num; + cnt++; + } + } + + print_sequence(); + + printf(" (cnt=%d, max=%d)", cnt, max); +} + +static void print_reg_stats(int level) +{ + printf("%sRegister Stats:\n", levels[level]); + printf("%s- used (half):", levels[level]); + print_regs(®s.used, false); + printf("\n"); + printf("%s- used (full):", levels[level]); + print_regs(®s.used, true); + printf("\n"); + printf("%s- input (half):", levels[level]); + print_regs(®s.rbw, false); + printf("\n"); + printf("%s- input (full):", levels[level]); + print_regs(®s.rbw, true); + printf("\n"); + printf("%s- const (half):", levels[level]); + print_regs(®s.cnst, false); + printf("\n"); + printf("%s- const (full):", levels[level]); + print_regs(®s.cnst, true); + printf("\n"); + printf("%s- output (half):", levels[level]); + print_regs(®s.war, false); + printf(" (estimated)\n"); + printf("%s- output (full):", levels[level]); + print_regs(®s.war, true); + printf(" (estimated)\n"); +} + +/* we have to process the dst register after src to avoid tripping up + * the read-before-write detection + */ +static unsigned last_dst; +static bool last_dst_full; +static bool last_dst_valid = false; /* current instruction repeat flag: */ static unsigned repeat; +static void process_reg_dst(void) +{ + int i; + + if (!last_dst_valid) + return; + + for (i = 0; i <= repeat; i++) { + unsigned dst = last_dst + i; + + regmask_set(®s.war, dst, last_dst_full, 1); + regmask_set(®s.used, dst, last_dst_full, 1); + } + + last_dst_valid = false; +} + static void print_reg_dst(reg_t reg, bool full, bool addr_rel) { + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(addr_rel || reg_special(reg))) { + last_dst = regidx(reg); + last_dst_full = full; + last_dst_valid = true; + } print_reg(reg, full, false, false, false, false, false, addr_rel); } static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im, bool neg, bool abs, bool addr_rel) { + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(addr_rel || c || im || reg_special(reg))) { + int i, num = regidx(reg); + for (i = 0; i <= repeat; i++) { + unsigned src = num + i; + + if (!regmask_get(®s.used, src, full)) + regmask_set(®s.rbw, src, full, 1); + + regmask_set(®s.war, src, full, 0); + regmask_set(®s.used, src, full, 1); + + if (!r) + break; + } + } else if (c) { + int i, num = regidx(reg); + for (i = 0; i <= repeat; i++) { + unsigned src = num + i; + + regmask_set(®s.cnst, src, full, 1); + + if (!r) + break; + } + } + print_reg(reg, full, r, c, im, neg, abs, addr_rel); } + static void print_instr_cat0(instr_t *instr) { instr_cat0_t *cat0 = &instr->cat0; @@ -149,7 +317,8 @@ static void print_instr_cat1(instr_t *instr) { instr_cat1_t *cat1 = &instr->cat1; - if (cat1->ul) + // XXX maybe a bug in libllvm disassembler? + if (cat1->src_rel) printf("(ul)"); if (cat1->src_type == cat1->dst_type) { @@ -186,11 +355,10 @@ static void print_instr_cat1(instr_t *instr) /* I would just use %+d but trying to make it diff'able with * libllvm-a3xx... */ - char type = cat1->src_rel_c ? 'c' : 'r'; if (cat1->off < 0) - printf("%c<a0.x - %d>", type, -cat1->off); + printf("c<a0.x - %d>", -cat1->off); else if (cat1->off > 0) - printf("%c<a0.x + %d>", type, cat1->off); + printf("c<a0.x + %d>", cat1->off); else printf("c<a0.x>"); } else { @@ -231,21 +399,9 @@ static void print_instr_cat2(instr_t *instr) printf("(ei)"); print_reg_dst((reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); printf(", "); - - if (cat2->c1.src1_c) { - print_reg_src((reg_t)(cat2->c1.src1), cat2->full, cat2->src1_r, - cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, false); - } else if (cat2->rel1.src1_rel) { - print_reg_src((reg_t)(cat2->rel1.src1), cat2->full, cat2->src1_r, - cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, cat2->rel1.src1_rel); - } else { - print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r, - false, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, false); - } - + print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r, + cat2->src1_c, cat2->src1_im, cat2->src1_neg, + cat2->src1_abs, cat2->src1_rel); switch (cat2->opc) { case OPC_ABSNEG_F: case OPC_ABSNEG_S: @@ -265,19 +421,9 @@ static void print_instr_cat2(instr_t *instr) break; default: printf(", "); - if (cat2->c2.src2_c) { - print_reg_src((reg_t)(cat2->c2.src2), cat2->full, cat2->src2_r, - cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, false); - } else if (cat2->rel2.src2_rel) { - print_reg_src((reg_t)(cat2->rel2.src2), cat2->full, cat2->src2_r, - cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, cat2->rel2.src2_rel); - } else { - print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r, - false, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, false); - } + print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r, + cat2->src2_c, cat2->src2_im, cat2->src2_neg, + cat2->src2_abs, cat2->src2_rel); break; } } @@ -285,42 +431,36 @@ static void print_instr_cat2(instr_t *instr) static void print_instr_cat3(instr_t *instr) { instr_cat3_t *cat3 = &instr->cat3; - bool full = instr_cat3_full(cat3); + bool full = true; + + // XXX is this based on opc or some other bit? + switch (cat3->opc) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + full = false; + break; + } printf(" "); print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false); printf(", "); - if (cat3->c1.src1_c) { - print_reg_src((reg_t)(cat3->c1.src1), full, - cat3->src1_r, cat3->c1.src1_c, false, cat3->src1_neg, - false, false); - } else if (cat3->rel1.src1_rel) { - print_reg_src((reg_t)(cat3->rel1.src1), full, - cat3->src1_r, cat3->rel1.src1_c, false, cat3->src1_neg, - false, cat3->rel1.src1_rel); - } else { - print_reg_src((reg_t)(cat3->src1), full, - cat3->src1_r, false, false, cat3->src1_neg, - false, false); - } + print_reg_src((reg_t)(cat3->src1), full, + cat3->src1_r, cat3->src1_c, false, cat3->src1_neg, + false, cat3->src1_rel); printf(", "); print_reg_src((reg_t)cat3->src2, full, cat3->src2_r, cat3->src2_c, false, cat3->src2_neg, false, false); printf(", "); - if (cat3->c2.src3_c) { - print_reg_src((reg_t)(cat3->c2.src3), full, - cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg, - false, false); - } else if (cat3->rel2.src3_rel) { - print_reg_src((reg_t)(cat3->rel2.src3), full, - cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg, - false, cat3->rel2.src3_rel); - } else { - print_reg_src((reg_t)(cat3->src3), full, - cat3->src3_r, false, false, cat3->src3_neg, - false, false); - } + print_reg_src((reg_t)(cat3->src3), full, + cat3->src3_r, cat3->src3_c, false, cat3->src3_neg, + false, cat3->src3_rel); } static void print_instr_cat4(instr_t *instr) @@ -330,20 +470,9 @@ static void print_instr_cat4(instr_t *instr) printf(" "); print_reg_dst((reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); printf(", "); - - if (cat4->c.src_c) { - print_reg_src((reg_t)(cat4->c.src), cat4->full, - cat4->src_r, cat4->c.src_c, cat4->src_im, - cat4->src_neg, cat4->src_abs, false); - } else if (cat4->rel.src_rel) { - print_reg_src((reg_t)(cat4->rel.src), cat4->full, - cat4->src_r, cat4->rel.src_c, cat4->src_im, - cat4->src_neg, cat4->src_abs, cat4->rel.src_rel); - } else { - print_reg_src((reg_t)(cat4->src), cat4->full, - cat4->src_r, false, cat4->src_im, - cat4->src_neg, cat4->src_abs, false); - } + print_reg_src((reg_t)(cat4->src), cat4->full, + cat4->src_r, cat4->src_c, cat4->src_im, + cat4->src_neg, cat4->src_abs, cat4->src_rel); if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) printf("\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); @@ -733,20 +862,26 @@ struct opc_info { #undef OPC }; -#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)])) +#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)])) -// XXX hack.. probably should move this table somewhere common: -#include "ir3.h" -const char *ir3_instr_name(struct ir3_instruction *instr) +static uint32_t getopc(instr_t *instr) { - if (instr->category == -1) return "??meta??"; - return opcs[(instr->category << NOPC_BITS) | instr->opc].name; + switch (instr->opc_cat) { + case 0: return instr->cat0.opc; + case 1: return 0; + case 2: return instr->cat2.opc; + case 3: return instr->cat3.opc; + case 4: return instr->cat4.opc; + case 5: return instr->cat5.opc; + case 6: return instr->cat6.opc; + default: return 0; + } } static void print_instr(uint32_t *dwords, int level, int n) { instr_t *instr = (instr_t *)dwords; - uint32_t opc = instr_opc(instr); + uint32_t opc = getopc(instr); const char *name; printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]); @@ -790,6 +925,8 @@ static void print_instr(uint32_t *dwords, int level, int n) } printf("\n"); + + process_reg_dst(); } int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type) @@ -798,8 +935,12 @@ int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type) assert((sizedwords % 2) == 0); + memset(®s, 0, sizeof(regs)); + for (i = 0; i < sizedwords; i += 2) print_instr(&dwords[i], level, i/2); + print_reg_stats(level); + return 0; } diff --git a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 3159e7ade..eabe21cb7 100644 --- a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -39,168 +39,136 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" -#include "freedreno_lowering.h" - #include "fd3_compiler.h" #include "fd3_program.h" #include "fd3_util.h" #include "instr-a3xx.h" -#include "ir3.h" +#include "ir-a3xx.h" -/* NOTE on half/full precision: - * Currently, the front end (ie. basically this file) does everything in - * full precision (with the exception of trans_arl() which doesn't work - * currently.. we reject anything with relative addressing and fallback - * to old compiler). - * - * In the RA step, if half_precision, it will assign the output to hr0.x - * but use full precision everywhere else. - * - * Eventually we'll need a better way to communicate type information - * to RA so that it can more properly assign both half and full precision - * registers. (And presumably double precision pairs for a4xx?) This - * would let us make more use of half precision registers, while still - * keeping things like tex coords in full precision registers. - * - * Since the RA is dealing with patching instruction types for half - * precision output, we can ignore that in the front end and just always - * create full precision instructions. - */ +/* ************************************************************************* */ +/* split the out or find some helper to use.. like main/bitset.h.. */ -struct fd3_compile_context { - const struct tgsi_token *tokens; - bool free_tokens; - struct ir3_shader *ir; - struct fd3_shader_variant *so; +#define MAX_REG 256 - struct ir3_block *block; - struct ir3_instruction *current_instr; +typedef uint8_t regmask_t[2 * MAX_REG / 8]; - /* we need to defer updates to block->outputs[] until the end - * of an instruction (so we don't see new value until *after* - * the src registers are processed) - */ - struct { - struct ir3_instruction *instr, **instrp; - } output_updates[16]; - unsigned num_output_updates; +static unsigned regmask_idx(struct ir3_register *reg) +{ + unsigned num = reg->num; + assert(num < MAX_REG); + if (reg->flags & IR3_REG_HALF) + num += MAX_REG; + return num; +} - /* are we in a sequence of "atomic" instructions? - */ - bool atomic; +static void regmask_set(regmask_t regmask, struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + regmask[idx / 8] |= 1 << (idx % 8); +} - /* For fragment shaders, from the hw perspective the only - * actual input is r0.xy position register passed to bary.f. - * But TGSI doesn't know that, it still declares things as - * IN[] registers. So we do all the input tracking normally - * and fix things up after compile_instructions() - * - * NOTE that frag_pos is the hardware position (possibly it - * is actually an index or tag or some such.. it is *not* - * values that can be directly used for gl_FragCoord..) - */ - struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4]; +static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + return regmask[idx / 8] & (1 << (idx % 8)); +} + +/* ************************************************************************* */ + +struct fd3_compile_context { + const struct tgsi_token *tokens; + struct ir3_shader *ir; + struct fd3_shader_stateobj *so; struct tgsi_parse_context parser; unsigned type; struct tgsi_shader_info info; - /* for calculating input/output positions/linkages: */ - unsigned next_inloc; + /* last input dst (for setting (ei) flag): */ + struct ir3_register *last_input; + unsigned next_inloc; unsigned num_internal_temps; - struct tgsi_src_register internal_temps[6]; + + /* track registers which need to synchronize w/ "complex alu" cat3 + * instruction pipeline: + */ + regmask_t needs_ss; + + /* track registers which need to synchronize with texture fetch + * pipeline: + */ + regmask_t needs_sy; + + /* inputs start at r0, temporaries start after last input, and + * outputs start after last temporary. + * + * We could be more clever, because this is not a hw restriction, + * but probably best just to implement an optimizing pass to + * reduce the # of registers used and get rid of redundant mov's + * (to output register). + */ + unsigned base_reg[TGSI_FILE_COUNT]; /* idx/slot for last compiler generated immediate */ unsigned immediate_idx; - /* stack of branch instructions that mark (potentially nested) - * branch if/else/loop/etc + /* stack of branch instructions that start (potentially nested) + * branch instructions, so that we can fix up the branch targets + * so that we can fix up the branch target on the corresponding + * END instruction */ - struct { - struct ir3_instruction *instr, *cond; - bool inv; /* true iff in else leg of branch */ - } branch[16]; + struct ir3_instruction *branch[16]; unsigned int branch_count; - /* list of kill instructions: */ - struct ir3_instruction *kill[16]; - unsigned int kill_count; - /* used when dst is same as one of the src, to avoid overwriting a * src element before the remaining scalar instructions that make * up the vector operation */ struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; + struct tgsi_src_register tmp_src; }; - -static void vectorize(struct fd3_compile_context *ctx, - struct ir3_instruction *instr, struct tgsi_dst_register *dst, - int nsrcs, ...); -static void create_mov(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *src); -static type_t get_ftype(struct fd3_compile_context *ctx); - static unsigned -compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, +compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, const struct tgsi_token *tokens) { unsigned ret; - struct tgsi_shader_info *info = &ctx->info; - const struct fd_lowering_config lconfig = { - .color_two_side = so->key.color_two_side, - .lower_DST = true, - .lower_XPD = true, - .lower_SCS = true, - .lower_LRP = true, - .lower_FRC = true, - .lower_POW = true, - .lower_LIT = true, - .lower_EXP = true, - .lower_LOG = true, - .lower_DP4 = true, - .lower_DP3 = true, - .lower_DPH = true, - .lower_DP2 = true, - .lower_DP2A = true, - }; - ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info); - ctx->free_tokens = !!ctx->tokens; - if (!ctx->tokens) { - /* no lowering */ - ctx->tokens = tokens; - } + ctx->tokens = tokens; ctx->ir = so->ir; ctx->so = so; + ctx->last_input = NULL; ctx->next_inloc = 8; ctx->num_internal_temps = 0; ctx->branch_count = 0; - ctx->kill_count = 0; - ctx->block = NULL; - ctx->current_instr = NULL; - ctx->num_output_updates = 0; - ctx->atomic = false; - ctx->frag_pos = NULL; - ctx->frag_face = NULL; - - memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord)); - -#define FM(x) (1 << TGSI_FILE_##x) - /* optimize can't deal with relative addressing: */ - if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) | - FM(OUTPUT) | FM(IMMEDIATE) | FM(CONSTANT))) - return TGSI_PARSE_ERROR; - /* Immediates go after constants: */ - so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1; - ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); + memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); + memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); + memset(ctx->base_reg, 0, sizeof(ctx->base_reg)); - ret = tgsi_parse_init(&ctx->parser, ctx->tokens); + tgsi_scan_shader(tokens, &ctx->info); + + /* Immediates go after constants: */ + ctx->base_reg[TGSI_FILE_CONSTANT] = 0; + ctx->base_reg[TGSI_FILE_IMMEDIATE] = + ctx->info.file_count[TGSI_FILE_CONSTANT]; + + /* Temporaries after outputs after inputs: */ + ctx->base_reg[TGSI_FILE_INPUT] = 0; + ctx->base_reg[TGSI_FILE_OUTPUT] = + ctx->info.file_count[TGSI_FILE_INPUT]; + ctx->base_reg[TGSI_FILE_TEMPORARY] = + ctx->info.file_count[TGSI_FILE_INPUT] + + ctx->info.file_count[TGSI_FILE_OUTPUT]; + + so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; + ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] + + ctx->info.file_count[TGSI_FILE_IMMEDIATE]); + + ret = tgsi_parse_init(&ctx->parser, tokens); if (ret != TGSI_PARSE_OK) return ret; @@ -210,25 +178,8 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, } static void -compile_error(struct fd3_compile_context *ctx, const char *format, ...) -{ - va_list ap; - va_start(ap, format); - _debug_vprintf(format, ap); - va_end(ap); - tgsi_dump(ctx->tokens, 0); - debug_assert(0); -} - -#define compile_assert(ctx, cond) do { \ - if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ - } while (0) - -static void compile_free(struct fd3_compile_context *ctx) { - if (ctx->free_tokens) - free((void *)ctx->tokens); tgsi_parse_free(&ctx->parser); } @@ -242,385 +193,55 @@ struct instr_translater { unsigned arg; }; -static void -instr_finish(struct fd3_compile_context *ctx) -{ - unsigned i; - - if (ctx->atomic) - return; - - for (i = 0; i < ctx->num_output_updates; i++) - *(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr; - - ctx->num_output_updates = 0; -} - -/* For "atomic" groups of instructions, for example the four scalar - * instructions to perform a vec4 operation. Basically this just - * blocks out handling of output_updates so the next scalar instruction - * still sees the result from before the start of the atomic group. - * - * NOTE: when used properly, this could probably replace get/put_dst() - * stuff. - */ -static void -instr_atomic_start(struct fd3_compile_context *ctx) -{ - ctx->atomic = true; -} - -static void -instr_atomic_end(struct fd3_compile_context *ctx) -{ - ctx->atomic = false; - instr_finish(ctx); -} - -static struct ir3_instruction * -instr_create(struct fd3_compile_context *ctx, int category, opc_t opc) -{ - instr_finish(ctx); - return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc)); -} - -static struct ir3_instruction * -instr_clone(struct fd3_compile_context *ctx, struct ir3_instruction *instr) -{ - instr_finish(ctx); - return (ctx->current_instr = ir3_instr_clone(instr)); -} - -static struct ir3_block * -push_block(struct fd3_compile_context *ctx) -{ - struct ir3_block *block; - unsigned ntmp, nin, nout; - -#define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1)) - - /* hmm, give ourselves room to create 4 extra temporaries (vec4): - */ - ntmp = SCALAR_REGS(TEMPORARY); - ntmp += 4 * 4; - - nout = SCALAR_REGS(OUTPUT); - nin = SCALAR_REGS(INPUT); - - /* for outermost block, 'inputs' are the actual shader INPUT - * register file. Reads from INPUT registers always go back to - * top block. For nested blocks, 'inputs' is used to track any - * TEMPORARY file register from one of the enclosing blocks that - * is ready in this block. - */ - if (!ctx->block) { - /* NOTE: fragment shaders actually have two inputs (r0.xy, the - * position) - */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - int n = 2; - if (ctx->info.reads_position) - n += 4; - if (ctx->info.uses_frontface) - n += 4; - nin = MAX2(n, nin); - nout += ARRAY_SIZE(ctx->kill); - } - } else { - nin = ntmp; - } - - block = ir3_block_create(ctx->ir, ntmp, nin, nout); - - if ((ctx->type == TGSI_PROCESSOR_FRAGMENT) && !ctx->block) - block->noutputs -= ARRAY_SIZE(ctx->kill); - - block->parent = ctx->block; - ctx->block = block; - - return block; -} - -static void -pop_block(struct fd3_compile_context *ctx) -{ - ctx->block = ctx->block->parent; - compile_assert(ctx, ctx->block); -} - -static void -ssa_dst(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -{ - unsigned n = regid(dst->Index, chan); - unsigned idx = ctx->num_output_updates; - - compile_assert(ctx, idx < ARRAY_SIZE(ctx->output_updates)); - - /* NOTE: defer update of temporaries[idx] or output[idx] - * until instr_finish(), so that if the current instruction - * reads the same TEMP/OUT[] it gets the old value: - * - * bleh.. this might be a bit easier to just figure out - * in instr_finish(). But at that point we've already - * lost information about OUTPUT vs TEMPORARY register - * file.. - */ - - switch (dst->File) { - case TGSI_FILE_OUTPUT: - compile_assert(ctx, n < ctx->block->noutputs); - ctx->output_updates[idx].instrp = &ctx->block->outputs[n]; - ctx->output_updates[idx].instr = instr; - ctx->num_output_updates++; - break; - case TGSI_FILE_TEMPORARY: - compile_assert(ctx, n < ctx->block->ntemporaries); - ctx->output_updates[idx].instrp = &ctx->block->temporaries[n]; - ctx->output_updates[idx].instr = instr; - ctx->num_output_updates++; - break; - } -} - -static struct ir3_instruction * -create_output(struct ir3_block *block, struct ir3_instruction *instr, - unsigned n) -{ - struct ir3_instruction *out; - - out = ir3_instr_create(block, -1, OPC_META_OUTPUT); - out->inout.block = block; - ir3_reg_create(out, n, 0); - if (instr) - ir3_reg_create(out, 0, IR3_REG_SSA)->instr = instr; - - return out; -} - -static struct ir3_instruction * -create_input(struct ir3_block *block, struct ir3_instruction *instr, - unsigned n) -{ - struct ir3_instruction *in; - - in = ir3_instr_create(block, -1, OPC_META_INPUT); - in->inout.block = block; - ir3_reg_create(in, n, 0); - if (instr) - ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr; - - return in; -} - -static struct ir3_instruction * -block_input(struct ir3_block *block, unsigned n) -{ - /* references to INPUT register file always go back up to - * top level: - */ - if (block->parent) - return block_input(block->parent, n); - return block->inputs[n]; -} - -/* return temporary in scope, creating if needed meta-input node - * to track block inputs - */ -static struct ir3_instruction * -block_temporary(struct ir3_block *block, unsigned n) -{ - /* references to TEMPORARY register file, find the nearest - * enclosing block which has already assigned this temporary, - * creating meta-input instructions along the way to keep - * track of block inputs - */ - if (block->parent && !block->temporaries[n]) { - /* if already have input for this block, reuse: */ - if (!block->inputs[n]) - block->inputs[n] = block_temporary(block->parent, n); - - /* and create new input to return: */ - return create_input(block, block->inputs[n], n); - } - return block->temporaries[n]; -} - -static struct ir3_instruction * -create_immed(struct fd3_compile_context *ctx, float val) -{ - /* this can happen when registers (or components of a TGSI - * register) are used as src before they have been assigned - * (undefined contents). To avoid confusing the rest of the - * compiler, and to generally keep things peachy, substitute - * an instruction that sets the src to 0.0. Or to keep - * things undefined, I could plug in a random number? :-P - * - * NOTE: *don't* use instr_create() here! - */ - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = val; - return instr; -} - -static void -ssa_src(struct fd3_compile_context *ctx, struct ir3_register *reg, - const struct tgsi_src_register *src, unsigned chan) -{ - struct ir3_block *block = ctx->block; - unsigned n = regid(src->Index, chan); - - switch (src->File) { - case TGSI_FILE_INPUT: - reg->flags |= IR3_REG_SSA; - reg->instr = block_input(ctx->block, n); - break; - case TGSI_FILE_OUTPUT: - /* really this should just happen in case of 'MOV_SAT OUT[n], ..', - * for the following clamp instructions: - */ - reg->flags |= IR3_REG_SSA; - reg->instr = block->outputs[n]; - /* we don't have to worry about read from an OUTPUT that was - * assigned outside of the current block, because the _SAT - * clamp instructions will always be in the same block as - * the original instruction which wrote the OUTPUT - */ - compile_assert(ctx, reg->instr); - break; - case TGSI_FILE_TEMPORARY: - reg->flags |= IR3_REG_SSA; - reg->instr = block_temporary(ctx->block, n); - break; - } - - if ((reg->flags & IR3_REG_SSA) && !reg->instr) { - /* this can happen when registers (or components of a TGSI - * register) are used as src before they have been assigned - * (undefined contents). To avoid confusing the rest of the - * compiler, and to generally keep things peachy, substitute - * an instruction that sets the src to 0.0. Or to keep - * things undefined, I could plug in a random number? :-P - * - * NOTE: *don't* use instr_create() here! - */ - reg->instr = create_immed(ctx, 0.0); - } -} - static struct ir3_register * -add_dst_reg_wrmask(struct fd3_compile_context *ctx, - struct ir3_instruction *instr, const struct tgsi_dst_register *dst, - unsigned chan, unsigned wrmask) +add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + const struct tgsi_dst_register *dst, unsigned chan) { unsigned flags = 0, num = 0; - struct ir3_register *reg; switch (dst->File) { case TGSI_FILE_OUTPUT: case TGSI_FILE_TEMPORARY: - /* uses SSA */ - break; - case TGSI_FILE_ADDRESS: - num = REG_A0; + num = dst->Index + ctx->base_reg[dst->File]; break; default: - compile_error(ctx, "unsupported dst register file: %s\n", + DBG("unsupported dst register file: %s", tgsi_file_name(dst->File)); + assert(0); break; } - if (dst->Indirect) - flags |= IR3_REG_RELATIV; - - reg = ir3_reg_create(instr, regid(num, chan), flags); - - /* NOTE: do not call ssa_dst() if atomic.. vectorize() - * itself will call ssa_dst(). This is to filter out - * the (initially bogus) .x component dst which is - * created (but not necessarily used, ie. if the net - * result of the vector operation does not write to - * the .x component) - */ - - reg->wrmask = wrmask; - if (wrmask == 0x1) { - /* normal case */ - if (!ctx->atomic) - ssa_dst(ctx, instr, dst, chan); - } else if ((dst->File == TGSI_FILE_TEMPORARY) || - (dst->File == TGSI_FILE_OUTPUT)) { - unsigned i; - - /* if instruction writes multiple, we need to create - * some place-holder collect the registers: - */ - for (i = 0; i < 4; i++) { - if (wrmask & (1 << i)) { - struct ir3_instruction *collect = - ir3_instr_create(ctx->block, -1, OPC_META_FO); - collect->fo.off = i; - /* unused dst reg: */ - ir3_reg_create(collect, 0, 0); - /* and src reg used to hold original instr */ - ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = instr; - if (!ctx->atomic) - ssa_dst(ctx, collect, dst, chan+i); - } - } - } - - return reg; -} + if (ctx->so->half_precision) + flags |= IR3_REG_HALF; -static struct ir3_register * -add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -{ - return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1); + return ir3_reg_create(instr, regid(num, chan), flags); } static struct ir3_register * -add_src_reg_wrmask(struct fd3_compile_context *ctx, - struct ir3_instruction *instr, const struct tgsi_src_register *src, - unsigned chan, unsigned wrmask) +add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + const struct tgsi_src_register *src, unsigned chan) { unsigned flags = 0, num = 0; struct ir3_register *reg; - /* TODO we need to use a mov to temp for const >= 64.. or maybe - * we could use relative addressing.. - */ - compile_assert(ctx, src->Index < 64); - switch (src->File) { case TGSI_FILE_IMMEDIATE: /* TODO if possible, use actual immediate instead of const.. but * TGSI has vec4 immediates, we can only embed scalar (of limited * size, depending on instruction..) */ - flags |= IR3_REG_CONST; - num = src->Index + ctx->so->first_immediate; - break; case TGSI_FILE_CONSTANT: flags |= IR3_REG_CONST; - num = src->Index; + num = src->Index + ctx->base_reg[src->File]; break; - case TGSI_FILE_OUTPUT: - /* NOTE: we should only end up w/ OUTPUT file for things like - * clamp()'ing saturated dst instructions - */ case TGSI_FILE_INPUT: case TGSI_FILE_TEMPORARY: - /* uses SSA */ + num = src->Index + ctx->base_reg[src->File]; break; default: - compile_error(ctx, "unsupported src register file: %s\n", + DBG("unsupported src register file: %s", tgsi_file_name(src->File)); + assert(0); break; } @@ -628,54 +249,24 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx, flags |= IR3_REG_ABS; if (src->Negate) flags |= IR3_REG_NEGATE; - if (src->Indirect) - flags |= IR3_REG_RELATIV; + if (ctx->so->half_precision) + flags |= IR3_REG_HALF; reg = ir3_reg_create(instr, regid(num, chan), flags); - reg->wrmask = wrmask; - if (wrmask == 0x1) { - /* normal case */ - ssa_src(ctx, reg, src, chan); - } else if ((src->File == TGSI_FILE_TEMPORARY) || - (src->File == TGSI_FILE_OUTPUT) || - (src->File == TGSI_FILE_INPUT)) { - struct ir3_instruction *collect; - unsigned i; - - /* if instruction reads multiple, we need to create - * some place-holder collect the registers: - */ - collect = ir3_instr_create(ctx->block, -1, OPC_META_FI); - ir3_reg_create(collect, 0, 0); /* unused dst reg */ - - for (i = 0; i < 4; i++) { - if (wrmask & (1 << i)) { - /* and src reg used point to the original instr */ - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - src, chan + i); - } else if (wrmask & ~((i << i) - 1)) { - /* if any remaining components, then dummy - * placeholder src reg to fill in the blanks: - */ - ir3_reg_create(collect, 0, 0); - } - } + if (regmask_get(ctx->needs_ss, reg)) { + instr->flags |= IR3_INSTR_SS; + memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); + } - reg->flags |= IR3_REG_SSA; - reg->instr = collect; + if (regmask_get(ctx->needs_sy, reg)) { + instr->flags |= IR3_INSTR_SY; + memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); } return reg; } -static struct ir3_register * -add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_src_register *src, unsigned chan) -{ - return add_src_reg_wrmask(ctx, instr, src, chan, 0x1); -} - static void src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) { @@ -694,38 +285,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) /* Get internal-temp src/dst to use for a sequence of instructions * generated by a single TGSI op. */ -static struct tgsi_src_register * +static void get_internal_temp(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst) -{ - struct tgsi_src_register *tmp_src; - int n; - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); - tmp_src = &ctx->internal_temps[n]; - - tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; - - src_from_dst(tmp_src, tmp_dst); - - return tmp_src; -} - -/* Get internal half-precision temp src/dst to use for a sequence of - * instructions generated by a single TGSI op. - */ -static struct tgsi_src_register * -get_internal_temp_hr(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst) + struct tgsi_dst_register *tmp_dst, + struct tgsi_src_register *tmp_src) { - struct tgsi_src_register *tmp_src; int n; tmp_dst->File = TGSI_FILE_TEMPORARY; @@ -735,79 +299,23 @@ get_internal_temp_hr(struct fd3_compile_context *ctx, /* assign next temporary: */ n = ctx->num_internal_temps++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); - tmp_src = &ctx->internal_temps[n]; - /* just use hr0 because no one else should be using half- - * precision regs: - */ - tmp_dst->Index = 0; + tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n; src_from_dst(tmp_src, tmp_dst); - - return tmp_src; -} - -static inline bool -is_const(struct tgsi_src_register *src) -{ - return (src->File == TGSI_FILE_CONSTANT) || - (src->File == TGSI_FILE_IMMEDIATE); -} - -static inline bool -is_relative(struct tgsi_src_register *src) -{ - return src->Indirect; -} - -static inline bool -is_rel_or_const(struct tgsi_src_register *src) -{ - return is_relative(src) || is_const(src); -} - -static type_t -get_ftype(struct fd3_compile_context *ctx) -{ - return TYPE_F32; -} - -static type_t -get_utype(struct fd3_compile_context *ctx) -{ - return TYPE_U32; } -static unsigned -src_swiz(struct tgsi_src_register *src, int chan) -{ - switch (chan) { - case 0: return src->SwizzleX; - case 1: return src->SwizzleY; - case 2: return src->SwizzleZ; - case 3: return src->SwizzleW; - } - assert(0); - return 0; -} - -/* for instructions that cannot take a const register as src, if needed - * generate a move to temporary gpr: +/* same as get_internal_temp, but w/ src.xxxx (for instructions that + * replicate their results) */ -static struct tgsi_src_register * -get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) +static void +get_internal_temp_repl(struct fd3_compile_context *ctx, + struct tgsi_dst_register *tmp_dst, + struct tgsi_src_register *tmp_src) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - - compile_assert(ctx, is_rel_or_const(src)); - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - create_mov(ctx, &tmp_dst, src); - - return tmp_src; + get_internal_temp(ctx, tmp_dst, tmp_src); + tmp_src->SwizzleX = tmp_src->SwizzleY = + tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; } static void @@ -857,63 +365,46 @@ get_immediate(struct fd3_compile_context *ctx, reg->SwizzleW = swiz2tgsi[swiz]; } +static type_t +get_type(struct fd3_compile_context *ctx) +{ + return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; +} + +static unsigned +src_swiz(struct tgsi_src_register *src, int chan) +{ + switch (chan) { + case 0: return src->SwizzleX; + case 1: return src->SwizzleY; + case 2: return src->SwizzleZ; + case 3: return src->SwizzleW; + } + assert(0); + return 0; +} + static void create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src) { - type_t type_mov = get_ftype(ctx); + type_t type_mov = get_type(ctx); unsigned i; for (i = 0; i < 4; i++) { /* move to destination: */ if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *instr; - - if (src->Absolute || src->Negate) { - /* can't have abs or neg on a mov instr, so use - * absneg.f instead to handle these cases: - */ - instr = instr_create(ctx, 2, OPC_ABSNEG_F); - } else { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - } - + struct ir3_instruction *instr = + ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = type_mov; + instr->cat1.dst_type = type_mov; add_dst_reg(ctx, instr, dst, i); add_src_reg(ctx, instr, src, src_swiz(src, i)); + } else { + ir3_instr_create(ctx->ir, 0, OPC_NOP); } } -} - -static void -create_clamp(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *val, - struct tgsi_src_register *minval, struct tgsi_src_register *maxval) -{ - struct ir3_instruction *instr; - - instr = instr_create(ctx, 2, OPC_MAX_F); - vectorize(ctx, instr, dst, 2, val, 0, minval, 0); - instr = instr_create(ctx, 2, OPC_MIN_F); - vectorize(ctx, instr, dst, 2, val, 0, maxval, 0); -} - -static void -create_clamp_imm(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, - uint32_t minval, uint32_t maxval) -{ - struct tgsi_src_register minconst, maxconst; - struct tgsi_src_register src; - - src_from_dst(&src, dst); - - get_immediate(ctx, &minconst, minval); - get_immediate(ctx, &maxconst, maxval); - - create_clamp(ctx, dst, &src, &minconst, &maxconst); } static struct tgsi_dst_register * @@ -924,13 +415,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { struct tgsi_src_register *src = &inst->Src[i].Register; if ((src->File == dst->File) && (src->Index == dst->Index)) { - if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) && - (src->SwizzleX == TGSI_SWIZZLE_X) && - (src->SwizzleY == TGSI_SWIZZLE_Y) && - (src->SwizzleZ == TGSI_SWIZZLE_Z) && - (src->SwizzleW == TGSI_SWIZZLE_W)) - continue; - ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); + get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src); ctx->tmp_dst.WriteMask = dst->WriteMask; dst = &ctx->tmp_dst; break; @@ -945,7 +430,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, { /* if necessary, add mov back into original dst: */ if (dst != &inst->Dst[0].Register) { - create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); + create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src); } } @@ -959,26 +444,14 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, va_list ap; int i, j, n = 0; - instr_atomic_start(ctx); - - add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X); + add_dst_reg(ctx, instr, dst, 0); va_start(ap, nsrcs); for (j = 0; j < nsrcs; j++) { struct tgsi_src_register *src = va_arg(ap, struct tgsi_src_register *); unsigned flags = va_arg(ap, unsigned); - struct ir3_register *reg; - if (flags & IR3_REG_IMMED) { - reg = ir3_reg_create(instr, 0, IR3_REG_IMMED); - /* this is an ugly cast.. should have put flags first! */ - reg->iim_val = *(int *)&src; - } else { - reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X); - } - reg->flags |= flags & ~IR3_REG_NEGATE; - if (flags & IR3_REG_NEGATE) - reg->flags ^= IR3_REG_NEGATE; + add_src_reg(ctx, instr, src, 0)->flags |= flags; } va_end(ap); @@ -989,32 +462,33 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, if (n++ == 0) { cur = instr; } else { - cur = instr_clone(ctx, instr); + cur = ir3_instr_clone(instr); + cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP); } - ssa_dst(ctx, cur, dst, i); - /* fix-up dst register component: */ cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i); /* fix-up src register component: */ va_start(ap, nsrcs); for (j = 0; j < nsrcs; j++) { - struct ir3_register *reg = cur->regs[j+1]; struct tgsi_src_register *src = va_arg(ap, struct tgsi_src_register *); - unsigned flags = va_arg(ap, unsigned); - if (reg->flags & IR3_REG_SSA) { - ssa_src(ctx, reg, src, src_swiz(src, i)); - } else if (!(flags & IR3_REG_IMMED)) { - reg->num = regid(reg->num >> 2, src_swiz(src, i)); - } + (void)va_arg(ap, unsigned); + cur->regs[j+1]->num = + regid(cur->regs[j+1]->num >> 2, + src_swiz(src, i)); } va_end(ap); } } - instr_atomic_end(ctx); + /* pad w/ nop's.. at least until we are clever enough to + * figure out if we really need to.. + */ + for (; n < 4; n++) { + ir3_instr_create(instr->shader, 0, OPC_NOP); + } } /* @@ -1023,832 +497,397 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, */ static void -trans_clamp(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct tgsi_src_register *src2 = &inst->Src[2].Register; - - create_clamp(ctx, dst, src0, src1, src2); - - put_dst(ctx, inst, dst); -} - -/* ARL(x) = x, but mova from hrN.x to a0.. */ -static void -trans_arl(const struct instr_translater *t, +trans_dotp(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src = &inst->Src[0].Register; - unsigned chan = src->SwizzleX; - compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS); - - tmp_src = get_internal_temp_hr(ctx, &tmp_dst); - - /* cov.{f32,f16}s16 Rtmp, Rsrc */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, src, chan); - - /* shl.b Rtmp, Rtmp, 2 */ - instr = instr_create(ctx, 2, OPC_SHL_B); - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; - - /* mova a0, Rtmp */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = TYPE_S16; - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; -} - -/* - * texture fetch/sample instructions: - */ + struct tgsi_src_register tmp_src; + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; + unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW }; + unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW }; + opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32; + unsigned n = t->arg; /* number of components */ + unsigned i; -struct tex_info { - int8_t order[4]; - unsigned src_wrmask, flags; -}; + get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); -static const struct tex_info * -get_tex_info(struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - static const struct tex_info tex1d = { - .order = { 0, -1, -1, -1 }, /* coord.x */ - .src_wrmask = TGSI_WRITEMASK_XY, - .flags = 0, - }; - static const struct tex_info tex1ds = { - .order = { 0, -1, 2, -1 }, /* coord.xz */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_S, - }; - static const struct tex_info tex2d = { - .order = { 0, 1, -1, -1 }, /* coord.xy */ - .src_wrmask = TGSI_WRITEMASK_XY, - .flags = 0, - }; - static const struct tex_info tex2ds = { - .order = { 0, 1, 2, -1 }, /* coord.xyz */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_S, - }; - static const struct tex_info tex3d = { - .order = { 0, 1, 2, -1 }, /* coord.xyz */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_3D, - }; - static const struct tex_info tex3ds = { - .order = { 0, 1, 2, 3 }, /* coord.xyzw */ - .src_wrmask = TGSI_WRITEMASK_XYZW, - .flags = IR3_INSTR_S | IR3_INSTR_3D, - }; - static const struct tex_info txp1d = { - .order = { 0, -1, 3, -1 }, /* coord.xw */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_P, - }; - static const struct tex_info txp1ds = { - .order = { 0, -1, 2, 3 }, /* coord.xzw */ - .src_wrmask = TGSI_WRITEMASK_XYZW, - .flags = IR3_INSTR_P | IR3_INSTR_S, - }; - static const struct tex_info txp2d = { - .order = { 0, 1, 3, -1 }, /* coord.xyw */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_P, - }; - static const struct tex_info txp2ds = { - .order = { 0, 1, 2, 3 }, /* coord.xyzw */ - .src_wrmask = TGSI_WRITEMASK_XYZW, - .flags = IR3_INSTR_P | IR3_INSTR_S, - }; - static const struct tex_info txp3d = { - .order = { 0, 1, 2, 3 }, /* coord.xyzw */ - .src_wrmask = TGSI_WRITEMASK_XYZW, - .flags = IR3_INSTR_P | IR3_INSTR_3D, - }; + /* Blob compiler never seems to use a const in src1 position for + * mad.*, although there does seem (according to disassembler + * hidden in libllvm-a3xx.so) to be a bit to indicate that src1 + * is a const. Not sure if this is a hw bug, or simply that the + * disassembler lies. + */ + if ((src1->File == TGSI_FILE_IMMEDIATE) || + (src1->File == TGSI_FILE_CONSTANT)) { - unsigned tex = inst->Texture.Texture; + /* the mov to tmp unswizzles src1, so now we have tmp.xyzw: + */ + for (i = 0; i < 4; i++) + swiz1[i] = i; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_TEX: - switch (tex) { - case TGSI_TEXTURE_1D: - return &tex1d; - case TGSI_TEXTURE_SHADOW1D: - return &tex1ds; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - return &tex2d; - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - return &tex2ds; - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - return &tex3d; - case TGSI_TEXTURE_SHADOWCUBE: - return &tex3ds; - default: - compile_error(ctx, "unknown texture type: %s\n", - tgsi_texture_names[tex]); - return NULL; - } - break; - case TGSI_OPCODE_TXP: - switch (tex) { - case TGSI_TEXTURE_1D: - return &txp1d; - case TGSI_TEXTURE_SHADOW1D: - return &txp1ds; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - return &txp2d; - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - return &txp2ds; - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - return &txp3d; - default: - compile_error(ctx, "unknown texture type: %s\n", - tgsi_texture_names[tex]); - break; - } - break; + /* the first mul.f will clobber tmp.x, but that is ok + * because after that point we no longer need tmp.x: + */ + create_mov(ctx, &tmp_dst, src1); + src1 = &tmp_src; } - compile_assert(ctx, 0); - return NULL; -} -static struct tgsi_src_register * -get_tex_coord(struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst, - const struct tex_info *tinf) -{ - struct tgsi_src_register *coord = &inst->Src[0].Register; - struct ir3_instruction *instr; - unsigned tex = inst->Texture.Texture; - bool needs_mov = false; - unsigned i; + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, swiz0[0]); + add_src_reg(ctx, instr, src1, swiz1[0]); - /* cat5 instruction cannot seem to handle const or relative: */ - if (is_rel_or_const(coord)) - needs_mov = true; + for (i = 1; i < n; i++) { + ir3_instr_create(ctx->ir, 0, OPC_NOP); - /* 1D textures we fix up w/ 0.0 as 2nd coord: */ - if ((tex == TGSI_TEXTURE_1D) || (tex == TGSI_TEXTURE_SHADOW1D)) - needs_mov = true; + instr = ir3_instr_create(ctx->ir, 3, opc_mad); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, swiz0[i]); + add_src_reg(ctx, instr, src1, swiz1[i]); + add_src_reg(ctx, instr, &tmp_src, 0); + } - /* The texture sample instructions need to coord in successive - * registers/components (ie. src.xy but not src.yx). And TXP - * needs the .w component in .z for 2D.. so in some cases we - * might need to emit some mov instructions to shuffle things - * around: - */ - for (i = 1; (i < 4) && (tinf->order[i] >= 0) && !needs_mov; i++) - if (src_swiz(coord, i) != (src_swiz(coord, 0) + tinf->order[i])) - needs_mov = true; + /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ + if (t->tgsi_opc == TGSI_OPCODE_DPH) { + ir3_instr_create(ctx->ir, 0, OPC_NOP); - if (needs_mov) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - unsigned j; + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src1, swiz1[i]); + add_src_reg(ctx, instr, &tmp_src, 0); - type_t type_mov = get_ftype(ctx); + n++; + } - /* need to move things around: */ - tmp_src = get_internal_temp(ctx, &tmp_dst); + ir3_instr_create(ctx->ir, 0, OPC_NOP); - for (j = 0; j < 4; j++) { - if (tinf->order[j] < 0) - continue; - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, j); - add_src_reg(ctx, instr, coord, - src_swiz(coord, tinf->order[j])); - } + /* pad out to multiple of 4 scalar instructions: */ + for (i = 2 * n; i % 4; i++) { + ir3_instr_create(ctx->ir, 0, OPC_NOP); + } - /* fix up .y coord: */ - if ((tex == TGSI_TEXTURE_1D) || - (tex == TGSI_TEXTURE_SHADOW1D)) { - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */ - ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = 0.5; - } + create_mov(ctx, dst, &tmp_src); +} - coord = tmp_src; - } +/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ +static void +trans_lrp(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst1, tmp_dst2; + struct tgsi_src_register tmp_src1, tmp_src2; + struct tgsi_src_register tmp_const; + + get_internal_temp(ctx, &tmp_dst1, &tmp_src1); + get_internal_temp(ctx, &tmp_dst2, &tmp_src2); + + get_immediate(ctx, &tmp_const, fui(1.0)); + + /* tmp1 = (a * b) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + vectorize(ctx, instr, &tmp_dst1, 2, + &inst->Src[0].Register, 0, + &inst->Src[1].Register, 0); + + /* tmp2 = (1 - a) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + vectorize(ctx, instr, &tmp_dst2, 2, + &tmp_const, 0, + &inst->Src[0].Register, IR3_REG_NEGATE); + + /* tmp2 = tmp2 * c */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + vectorize(ctx, instr, &tmp_dst2, 2, + &tmp_src2, 0, + &inst->Src[2].Register, 0); - return coord; + /* dst = tmp1 + tmp2 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + vectorize(ctx, instr, &inst->Dst[0].Register, 2, + &tmp_src1, 0, + &tmp_src2, 0); } +/* FRC(x) = x - FLOOR(x) */ static void -trans_samp(const struct instr_translater *t, +trans_frac(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *coord; - struct tgsi_src_register *samp = &inst->Src[1].Register; - const struct tex_info *tinf; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; - tinf = get_tex_info(ctx, inst); - coord = get_tex_coord(ctx, inst, tinf); + get_internal_temp(ctx, &tmp_dst, &tmp_src); - instr = instr_create(ctx, 5, t->opc); - instr->cat5.type = get_ftype(ctx); - instr->cat5.samp = samp->Index; - instr->cat5.tex = samp->Index; - instr->flags |= tinf->flags; + /* tmp = FLOOR(x) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F); + vectorize(ctx, instr, &tmp_dst, 1, + &inst->Src[0].Register, 0); - add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask); - add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, tinf->src_wrmask); + /* dst = x - tmp */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + vectorize(ctx, instr, &inst->Dst[0].Register, 2, + &inst->Src[0].Register, 0, + &tmp_src, IR3_REG_NEGATE); } -/* - * SEQ(a,b) = (a == b) ? 1.0 : 0.0 - * cmps.f.eq tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SNE(a,b) = (a != b) ? 1.0 : 0.0 - * cmps.f.ne tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SGE(a,b) = (a >= b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SLE(a,b) = (a <= b) ? 1.0 : 0.0 - * cmps.f.le tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SGT(a,b) = (a > b) ? 1.0 : 0.0 - * cmps.f.gt tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SLT(a,b) = (a < b) ? 1.0 : 0.0 - * cmps.f.lt tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * CMP(a,b,c) = (a < 0.0) ? b : c - * cmps.f.lt tmp0, a, {0.0} - * sel.b16 dst, b, tmp0, c - */ +/* POW(a,b) = EXP2(b * LOG2(a)) */ static void -trans_cmp(const struct instr_translater *t, +trans_pow(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; + struct ir3_register *r; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval0; - /* final instruction for CMP() uses orig src1 and src2: */ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *a0, *a1, *a2; - unsigned condition; - - tmp_src = get_internal_temp(ctx, &tmp_dst); + struct tgsi_src_register tmp_src; + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ + get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_FSEQ: - condition = IR3_COND_EQ; - break; - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_FSNE: - condition = IR3_COND_NE; - break; - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_FSGE: - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_FSLT: - condition = IR3_COND_LT; - break; - case TGSI_OPCODE_SLE: - condition = IR3_COND_LE; - break; - case TGSI_OPCODE_SGT: - condition = IR3_COND_GT; - break; - case TGSI_OPCODE_CMP: - get_immediate(ctx, &constval0, fui(0.0)); - a0 = &inst->Src[0].Register; /* a */ - a1 = &constval0; /* {0.0} */ - condition = IR3_COND_LT; - break; - default: - compile_assert(ctx, 0); - return; - } - - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); + /* log2 Rtmp, Rsrc0 */ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; + instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2); + r = add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, src0->SwizzleX); + regmask_set(ctx->needs_ss, r); - /* cmps.f.<cond> tmp, a0, a1 */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = condition; - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); + /* mul.f Rtmp, Rtmp, Rsrc1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, &tmp_src, 0); + add_src_reg(ctx, instr, src1, src1->SwizzleX); - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_FSEQ: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_FSGE: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_FSNE: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_FSLT: - /* cov.u16f16 dst, tmp0 */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_utype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - vectorize(ctx, instr, dst, 1, tmp_src, 0); - break; - case TGSI_OPCODE_CMP: - a1 = &inst->Src[1].Register; - a2 = &inst->Src[2].Register; - /* sel.{b32,b16} dst, src2, tmp, src1 */ - instr = instr_create(ctx, 3, OPC_SEL_B32); - vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); + /* blob compiler seems to ensure there are at least 6 instructions + * between a "simple" (non-cat4) instruction and a dependent cat4.. + * probably we need to handle this in some other places too. + */ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; - break; - } + /* exp2 Rdst, Rtmp */ + instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); + r = add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, &tmp_src, 0); + regmask_set(ctx->needs_ss, r); - put_dst(ctx, inst, dst); + create_mov(ctx, dst, &tmp_src); } -/* - * USNE(a,b) = (a != b) ? 1 : 0 - * cmps.u32.ne dst, a, b - * - * USEQ(a,b) = (a == b) ? 1 : 0 - * cmps.u32.eq dst, a, b - * - * ISGE(a,b) = (a > b) ? 1 : 0 - * cmps.s32.ge dst, a, b - * - * USGE(a,b) = (a > b) ? 1 : 0 - * cmps.u32.ge dst, a, b - * - * ISLT(a,b) = (a < b) ? 1 : 0 - * cmps.s32.lt dst, a, b - * - * USLT(a,b) = (a < b) ? 1 : 0 - * cmps.u32.lt dst, a, b - * - * UCMP(a,b,c) = (a < 0) ? b : c - * cmps.u32.lt tmp0, a, {0} - * sel.b16 dst, b, tmp0, c - */ +/* texture fetch/sample instructions: */ static void -trans_icmp(const struct instr_translater *t, +trans_samp(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { + struct ir3_register *r; struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register constval0; - struct tgsi_src_register *a0, *a1, *a2; - unsigned condition; - - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct tgsi_src_register *coord = &inst->Src[0].Register; + struct tgsi_src_register *samp = &inst->Src[1].Register; + unsigned tex = inst->Texture.Texture; + int8_t *order; + unsigned i, j, flags = 0; - switch (t->tgsi_opc) { - case TGSI_OPCODE_USNE: - condition = IR3_COND_NE; - break; - case TGSI_OPCODE_USEQ: - condition = IR3_COND_EQ; - break; - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_USGE: - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_USLT: - condition = IR3_COND_LT; + switch (t->arg) { + case TGSI_OPCODE_TEX: + order = (tex == TGSI_TEXTURE_2D) ? + (int8_t[4]){ 0, 1, -1, -1 } : /* 2D */ + (int8_t[4]){ 0, 1, 2, -1 }; /* 3D */ break; - case TGSI_OPCODE_UCMP: - get_immediate(ctx, &constval0, 0); - a0 = &inst->Src[0].Register; /* a */ - a1 = &constval0; /* {0} */ - condition = IR3_COND_LT; + case TGSI_OPCODE_TXP: + order = (tex == TGSI_TEXTURE_2D) ? + (int8_t[4]){ 0, 1, 3, -1 } : /* 2D */ + (int8_t[4]){ 0, 1, 2, 3 }; /* 3D */ + flags |= IR3_INSTR_P; break; - default: - compile_assert(ctx, 0); - return; + assert(0); + break; } - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); - - if (t->tgsi_opc == TGSI_OPCODE_UCMP) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - tmp_src = get_internal_temp(ctx, &tmp_dst); - /* cmps.u32.lt tmp, a0, a1 */ - instr = instr_create(ctx, 2, t->opc); - instr->cat2.condition = condition; - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - - a1 = &inst->Src[1].Register; - a2 = &inst->Src[2].Register; - /* sel.{b32,b16} dst, src2, tmp, src1 */ - instr = instr_create(ctx, 3, OPC_SEL_B32); - vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); - } else { - /* cmps.{u32,s32}.<cond> dst, a0, a1 */ - instr = instr_create(ctx, 2, t->opc); - instr->cat2.condition = condition; - vectorize(ctx, instr, dst, 2, a0, 0, a1, 0); - } - put_dst(ctx, inst, dst); -} + if (tex == TGSI_TEXTURE_3D) + flags |= IR3_INSTR_3D; -/* - * Conditional / Flow control - */ + /* The texture sample instructions need to coord in successive + * registers/components (ie. src.xy but not src.yx). And TXP + * needs the .w component in .z for 2D.. so in some cases we + * might need to emit some mov instructions to shuffle things + * around: + */ + for (i = 1; (i < 4) && (order[i] >= 0); i++) { + if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) { + type_t type_mov = get_type(ctx); -static void -push_branch(struct fd3_compile_context *ctx, bool inv, - struct ir3_instruction *instr, struct ir3_instruction *cond) -{ - unsigned int idx = ctx->branch_count++; - compile_assert(ctx, idx < ARRAY_SIZE(ctx->branch)); - ctx->branch[idx].instr = instr; - ctx->branch[idx].inv = inv; - /* else side of branch has same condition: */ - if (!inv) - ctx->branch[idx].cond = cond; -} + /* need to move things around: */ + get_internal_temp(ctx, &tmp_dst, &tmp_src); -static struct ir3_instruction * -pop_branch(struct fd3_compile_context *ctx) -{ - unsigned int idx = --ctx->branch_count; - return ctx->branch[idx].instr; -} + for (j = 0; (j < 4) && (order[j] >= 0); j++) { + instr = ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = type_mov; + instr->cat1.dst_type = type_mov; + add_dst_reg(ctx, instr, &tmp_dst, j); + add_src_reg(ctx, instr, coord, + src_swiz(coord, order[j])); + } -static void -trans_if(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr, *cond; - struct tgsi_src_register *src = &inst->Src[0].Register; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval; + coord = &tmp_src; - get_immediate(ctx, &constval, fui(0.0)); - tmp_src = get_internal_temp(ctx, &tmp_dst); + if (j < 4) + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1; - if (is_const(src)) - src = get_unconst(ctx, src); + break; + } + } - /* cmps.f.ne tmp0, b, {0.0} */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, src, src->SwizzleX); - add_src_reg(ctx, instr, &constval, constval.SwizzleX); - instr->cat2.condition = IR3_COND_NE; + instr = ir3_instr_create(ctx->ir, 5, t->opc); + instr->cat5.type = get_type(ctx); + instr->cat5.samp = samp->Index; + instr->cat5.tex = samp->Index; + instr->flags |= flags; - compile_assert(ctx, instr->regs[1]->flags & IR3_REG_SSA); /* because get_unconst() */ - cond = instr->regs[1]->instr; + r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0); + r->wrmask = inst->Dst[0].Register.WriteMask; - /* meta:flow tmp0 */ - instr = instr_create(ctx, -1, OPC_META_FLOW); - ir3_reg_create(instr, 0, 0); /* dummy dst */ - add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X); + add_src_reg(ctx, instr, coord, coord->SwizzleX); - push_branch(ctx, false, instr, cond); - instr->flow.if_block = push_block(ctx); + regmask_set(ctx->needs_sy, r); } +/* CMP(a,b,c) = (a < 0) ? b : c */ static void -trans_else(const struct instr_translater *t, +trans_cmp(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct tgsi_src_register constval; + /* final instruction uses original src1 and src2, so we need get_dst() */ + struct tgsi_dst_register *dst = get_dst(ctx, inst); + + get_internal_temp(ctx, &tmp_dst, &tmp_src); - pop_block(ctx); + /* cmps.f.ge tmp, src0, 0.0 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + instr->cat2.condition = IR3_COND_GE; + get_immediate(ctx, &constval, fui(0.0)); + vectorize(ctx, instr, &tmp_dst, 2, + &inst->Src[0].Register, 0, + &constval, 0); - instr = pop_branch(ctx); + /* add.s tmp, tmp, -1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); + instr->repeat = 3; + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, &tmp_src, 0); + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; - compile_assert(ctx, (instr->category == -1) && - (instr->opc == OPC_META_FLOW)); + /* sel.{f32,f16} dst, src2, tmp, src1 */ + instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? + OPC_SEL_F16 : OPC_SEL_F32); + vectorize(ctx, instr, &inst->Dst[0].Register, 3, + &inst->Src[2].Register, 0, + &tmp_src, 0, + &inst->Src[1].Register, 0); - push_branch(ctx, true, instr, NULL); - instr->flow.else_block = push_block(ctx); + put_dst(ctx, inst, dst); } -static struct ir3_instruction * -find_temporary(struct ir3_block *block, unsigned n) -{ - if (block->parent && !block->temporaries[n]) - return find_temporary(block->parent, n); - return block->temporaries[n]; -} +/* + * Conditional / Flow control + */ -static struct ir3_instruction * -find_output(struct ir3_block *block, unsigned n) +static unsigned +find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr) { - if (block->parent && !block->outputs[n]) - return find_output(block->parent, n); - return block->outputs[n]; + unsigned i; + for (i = 0; i < ctx->ir->instrs_count; i++) + if (ctx->ir->instrs[i] == instr) + return i; + return ~0; } -static struct ir3_instruction * -create_phi(struct fd3_compile_context *ctx, struct ir3_instruction *cond, - struct ir3_instruction *a, struct ir3_instruction *b) +static void +push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr) { - struct ir3_instruction *phi; - - compile_assert(ctx, cond); - - /* Either side of the condition could be null.. which - * indicates a variable written on only one side of the - * branch. Normally this should only be variables not - * used outside of that side of the branch. So we could - * just 'return a ? a : b;' in that case. But for better - * defined undefined behavior we just stick in imm{0.0}. - * In the common case of a value only used within the - * one side of the branch, the PHI instruction will not - * get scheduled - */ - if (!a) - a = create_immed(ctx, 0.0); - if (!b) - b = create_immed(ctx, 0.0); - - phi = instr_create(ctx, -1, OPC_META_PHI); - ir3_reg_create(phi, 0, 0); /* dummy dst */ - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = cond; - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = a; - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = b; - - return phi; + ctx->branch[ctx->branch_count++] = instr; } static void -trans_endif(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) +pop_branch(struct fd3_compile_context *ctx) { struct ir3_instruction *instr; - struct ir3_block *ifb, *elseb; - struct ir3_instruction **ifout, **elseout; - unsigned i, ifnout = 0, elsenout = 0; - - pop_block(ctx); - - instr = pop_branch(ctx); - compile_assert(ctx, (instr->category == -1) && - (instr->opc == OPC_META_FLOW)); - - ifb = instr->flow.if_block; - elseb = instr->flow.else_block; - /* if there is no else block, the parent block is used for the - * branch-not-taken src of the PHI instructions: + /* if we were clever enough, we'd patch this up after the fact, + * and set (jp) flag on whatever the next instruction was, rather + * than inserting an extra nop.. */ - if (!elseb) - elseb = ifb->parent; - - /* worst case sizes: */ - ifnout = ifb->ntemporaries + ifb->noutputs; - elsenout = elseb->ntemporaries + elseb->noutputs; - - ifout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * ifnout); - if (elseb != ifb->parent) - elseout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * elsenout); - - ifnout = 0; - elsenout = 0; - - /* generate PHI instructions for any temporaries written: */ - for (i = 0; i < ifb->ntemporaries; i++) { - struct ir3_instruction *a = ifb->temporaries[i]; - struct ir3_instruction *b = elseb->temporaries[i]; + instr = ir3_instr_create(ctx->ir, 0, OPC_NOP); + instr->flags |= IR3_INSTR_JP; - /* if temporary written in if-block, or if else block - * is present and temporary written in else-block: - */ - if (a || ((elseb != ifb->parent) && b)) { - struct ir3_instruction *phi; - - /* if only written on one side, find the closest - * enclosing update on other side: - */ - if (!a) - a = find_temporary(ifb, i); - if (!b) - b = find_temporary(elseb, i); - - ifout[ifnout] = a; - a = create_output(ifb, a, ifnout++); - - if (elseb != ifb->parent) { - elseout[elsenout] = b; - b = create_output(elseb, b, elsenout++); - } - - phi = create_phi(ctx, instr, a, b); - ctx->block->temporaries[i] = phi; - } - } - - compile_assert(ctx, ifb->noutputs == elseb->noutputs); - - /* .. and any outputs written: */ - for (i = 0; i < ifb->noutputs; i++) { - struct ir3_instruction *a = ifb->outputs[i]; - struct ir3_instruction *b = elseb->outputs[i]; - - /* if output written in if-block, or if else block - * is present and output written in else-block: - */ - if (a || ((elseb != ifb->parent) && b)) { - struct ir3_instruction *phi; - - /* if only written on one side, find the closest - * enclosing update on other side: - */ - if (!a) - a = find_output(ifb, i); - if (!b) - b = find_output(elseb, i); - - ifout[ifnout] = a; - a = create_output(ifb, a, ifnout++); - - if (elseb != ifb->parent) { - elseout[elsenout] = b; - b = create_output(elseb, b, elsenout++); - } - - phi = create_phi(ctx, instr, a, b); - ctx->block->outputs[i] = phi; - } - } - - ifb->noutputs = ifnout; - ifb->outputs = ifout; - - if (elseb != ifb->parent) { - elseb->noutputs = elsenout; - elseb->outputs = elseout; - } - - // TODO maybe we want to compact block->inputs? + /* pop the branch instruction from the stack and fix up branch target: */ + instr = ctx->branch[--ctx->branch_count]; + instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1; } -/* - * Kill +/* We probably don't really want to translate if/else/endif into branches.. + * the blob driver evaluates both legs of the if and then uses the sel + * instruction to pick which sides of the branch to "keep".. but figuring + * that out will take somewhat more compiler smarts. So hopefully branches + * don't kill performance too badly. */ - static void -trans_kill(const struct instr_translater *t, +trans_if(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { - struct ir3_instruction *instr, *immed, *cond = NULL; - bool inv = false; - - switch (t->tgsi_opc) { - case TGSI_OPCODE_KILL: - /* unconditional kill, use enclosing if condition: */ - if (ctx->branch_count > 0) { - unsigned int idx = ctx->branch_count - 1; - cond = ctx->branch[idx].cond; - inv = ctx->branch[idx].inv; - } else { - cond = create_immed(ctx, 1.0); - } - - break; - } - - compile_assert(ctx, cond); + struct ir3_instruction *instr; + struct tgsi_src_register *src = &inst->Src[0].Register; + struct tgsi_src_register constval; - immed = create_immed(ctx, 0.0); + get_immediate(ctx, &constval, fui(0.0)); - /* cmps.f.ne p0.x, cond, {0.0} */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = IR3_COND_NE; + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); ir3_reg_create(instr, regid(REG_P0, 0), 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; - cond = instr; - - /* kill p0.x */ - instr = instr_create(ctx, 0, OPC_KILL); - instr->cat0.inv = inv; - ir3_reg_create(instr, 0, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; + add_src_reg(ctx, instr, &constval, constval.SwizzleX); + add_src_reg(ctx, instr, src, src->SwizzleX); + instr->cat2.condition = IR3_COND_EQ; - ctx->kill[ctx->kill_count++] = instr; + instr = ir3_instr_create(ctx->ir, 0, OPC_BR); + push_branch(ctx, instr); } -/* - * Kill-If - */ - static void -trans_killif(const struct instr_translater *t, +trans_else(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { - struct tgsi_src_register *src = &inst->Src[0].Register; - struct ir3_instruction *instr, *immed, *cond = NULL; - bool inv = false; - - immed = create_immed(ctx, 0.0); - - /* cmps.f.ne p0.x, cond, {0.0} */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = IR3_COND_NE; - ir3_reg_create(instr, regid(REG_P0, 0), 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; - add_src_reg(ctx, instr, src, src->SwizzleX); - - cond = instr; - - /* kill p0.x */ - instr = instr_create(ctx, 0, OPC_KILL); - instr->cat0.inv = inv; - ir3_reg_create(instr, 0, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; + struct ir3_instruction *instr; - ctx->kill[ctx->kill_count++] = instr; + /* for first half of if/else/endif, generate a jump past the else: */ + instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP); + pop_branch(ctx); + push_branch(ctx, instr); } -/* - * I2F / U2F / F2I / F2U - */ static void -trans_cov(const struct instr_translater *t, +trans_endif(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - - // cov.f32s32 dst, tmp0 / - instr = instr_create(ctx, 1, 0); - switch (t->tgsi_opc) { - case TGSI_OPCODE_U2F: - instr->cat1.src_type = TYPE_U32; - instr->cat1.dst_type = TYPE_F32; - break; - case TGSI_OPCODE_I2F: - instr->cat1.src_type = TYPE_S32; - instr->cat1.dst_type = TYPE_F32; - break; - case TGSI_OPCODE_F2U: - instr->cat1.src_type = TYPE_F32; - instr->cat1.dst_type = TYPE_U32; - break; - case TGSI_OPCODE_F2I: - instr->cat1.src_type = TYPE_F32; - instr->cat1.dst_type = TYPE_S32; - break; - - } - vectorize(ctx, instr, dst, 1, src, 0); + pop_branch(ctx); } /* @@ -1861,7 +900,7 @@ instr_cat0(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { - instr_create(ctx, 0, t->opc); + ir3_instr_create(ctx->ir, 0, t->opc); } static void @@ -1871,7 +910,26 @@ instr_cat1(const struct instr_translater *t, { struct tgsi_dst_register *dst = get_dst(ctx, inst); struct tgsi_src_register *src = &inst->Src[0].Register; - create_mov(ctx, dst, src); + + /* mov instructions can't handle a negate on src: */ + if (src->Negate) { + struct tgsi_src_register constval; + struct ir3_instruction *instr; + + /* since right now, we are using uniformly either TYPE_F16 or + * TYPE_F32, and we don't utilize the conversion possibilities + * of mov instructions, we can get away with substituting an + * add.f which can handle negate. Might need to revisit this + * in the future if we start supporting widening/narrowing or + * conversion to/from integer.. + */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + get_immediate(ctx, &constval, fui(0.0)); + vectorize(ctx, instr, dst, 2, src, 0, &constval, 0); + } else { + create_mov(ctx, dst, src); + /* create_mov() generates vector sequence, so no vectorize() */ + } put_dst(ctx, inst, dst); } @@ -1881,20 +939,19 @@ instr_cat2(const struct instr_translater *t, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; struct ir3_instruction *instr; - unsigned src0_flags = 0, src1_flags = 0; + unsigned src0_flags = 0; + + instr = ir3_instr_create(ctx->ir, 2, t->opc); switch (t->tgsi_opc) { + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + instr->cat2.condition = t->arg; + break; case TGSI_OPCODE_ABS: - case TGSI_OPCODE_IABS: src0_flags = IR3_REG_ABS; break; - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_INEG: - src1_flags = IR3_REG_NEGATE; - break; } switch (t->opc) { @@ -1913,16 +970,13 @@ instr_cat2(const struct instr_translater *t, case OPC_SETRM: case OPC_CBITS_B: /* these only have one src reg */ - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 1, src0, src0_flags); + vectorize(ctx, instr, dst, 1, + &inst->Src[0].Register, src0_flags); break; default: - if (is_const(src0) && is_const(src1)) - src0 = get_unconst(ctx, src0); - - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 2, src0, src0_flags, - src1, src1_flags); + vectorize(ctx, instr, dst, 2, + &inst->Src[0].Register, src0_flags, + &inst->Src[1].Register, 0); break; } @@ -1935,26 +989,29 @@ instr_cat3(const struct instr_translater *t, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; struct tgsi_src_register *src1 = &inst->Src[1].Register; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; struct ir3_instruction *instr; - /* in particular, can't handle const for src1 for cat3.. - * for mad, we can swap first two src's if needed: + /* Blob compiler never seems to use a const in src1 position.. + * although there does seem (according to disassembler hidden + * in libllvm-a3xx.so) to be a bit to indicate that src1 is a + * const. Not sure if this is a hw bug, or simply that the + * disassembler lies. */ - if (is_rel_or_const(src1)) { - if (is_mad(t->opc) && !is_rel_or_const(src0)) { - struct tgsi_src_register *tmp; - tmp = src0; - src0 = src1; - src1 = tmp; - } else { - src1 = get_unconst(ctx, src1); - } + if ((src1->File == TGSI_FILE_CONSTANT) || + (src1->File == TGSI_FILE_IMMEDIATE)) { + get_internal_temp(ctx, &tmp_dst, &tmp_src); + create_mov(ctx, &tmp_dst, src1); + src1 = &tmp_src; } - instr = instr_create(ctx, 3, t->opc); - vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, + instr = ir3_instr_create(ctx->ir, 3, + ctx->so->half_precision ? t->hopc : t->opc); + vectorize(ctx, instr, dst, 3, + &inst->Src[0].Register, 0, + src1, 0, &inst->Src[2].Register, 0); put_dst(ctx, inst, dst); } @@ -1965,22 +1022,15 @@ instr_cat4(const struct instr_translater *t, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; struct ir3_instruction *instr; - unsigned i; - /* seems like blob compiler avoids const as src.. */ - if (is_const(src)) - src = get_unconst(ctx, src); + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; + instr = ir3_instr_create(ctx->ir, 4, t->opc); - /* we need to replicate into each component: */ - for (i = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - instr = instr_create(ctx, 4, t->opc); - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src->SwizzleX); - } - } + vectorize(ctx, instr, dst, 1, + &inst->Src[0].Register, 0); + + regmask_set(ctx->needs_ss, instr->regs[0]); put_dst(ctx, inst, dst); } @@ -1995,446 +1045,141 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), INSTR(MUL, instr_cat2, .opc = OPC_MUL_F), INSTR(ADD, instr_cat2, .opc = OPC_ADD_F), - INSTR(SUB, instr_cat2, .opc = OPC_ADD_F), + INSTR(DP2, trans_dotp, .arg = 2), + INSTR(DP3, trans_dotp, .arg = 3), + INSTR(DP4, trans_dotp, .arg = 4), + INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */ INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), - INSTR(UADD, instr_cat2, .opc = OPC_ADD_U), - INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S), - INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U), - INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S), - INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U), - INSTR(AND, instr_cat2, .opc = OPC_AND_B), - INSTR(OR, instr_cat2, .opc = OPC_OR_B), - INSTR(NOT, instr_cat2, .opc = OPC_NOT_B), - INSTR(XOR, instr_cat2, .opc = OPC_XOR_B), - INSTR(UMUL, instr_cat2, .opc = OPC_MUL_U), - INSTR(SHL, instr_cat2, .opc = OPC_SHL_B), - INSTR(USHR, instr_cat2, .opc = OPC_SHR_B), - INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B), - INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S), - INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S), - INSTR(AND, instr_cat2, .opc = OPC_AND_B), + INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT), + INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE), INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), - INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F), - INSTR(CLAMP, trans_clamp), + INSTR(LRP, trans_lrp), + INSTR(FRC, trans_frac), INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), - INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F), - INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F), - INSTR(CEIL, instr_cat2, .opc = OPC_CEIL_F), - INSTR(ARL, trans_arl), INSTR(EX2, instr_cat4, .opc = OPC_EXP2), INSTR(LG2, instr_cat4, .opc = OPC_LOG2), + INSTR(POW, trans_pow), INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F), - INSTR(COS, instr_cat4, .opc = OPC_COS), - INSTR(SIN, instr_cat4, .opc = OPC_SIN), + INSTR(COS, instr_cat4, .opc = OPC_SIN), + INSTR(SIN, instr_cat4, .opc = OPC_COS), INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), - INSTR(SGT, trans_cmp), - INSTR(SLT, trans_cmp), - INSTR(FSLT, trans_cmp), - INSTR(SGE, trans_cmp), - INSTR(FSGE, trans_cmp), - INSTR(SLE, trans_cmp), - INSTR(SNE, trans_cmp), - INSTR(FSNE, trans_cmp), - INSTR(SEQ, trans_cmp), - INSTR(FSEQ, trans_cmp), INSTR(CMP, trans_cmp), - INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U), - INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U), - INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S), - INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U), - INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S), - INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U), - INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U), INSTR(IF, trans_if), - INSTR(UIF, trans_if), INSTR(ELSE, trans_else), INSTR(ENDIF, trans_endif), INSTR(END, instr_cat0, .opc = OPC_END), - INSTR(KILL, trans_kill, .opc = OPC_KILL), - INSTR(KILL_IF, trans_killif, .opc = OPC_KILL), - INSTR(I2F, trans_cov), - INSTR(U2F, trans_cov), - INSTR(F2I, trans_cov), - INSTR(F2U, trans_cov), }; -static fd3_semantic -decl_semantic(const struct tgsi_declaration_semantic *sem) -{ - return fd3_semantic_name(sem->Name, sem->Index); -} - -static struct ir3_instruction * -decl_in_frag_bary(struct fd3_compile_context *ctx, unsigned regid, - unsigned j, unsigned inloc) -{ - struct ir3_instruction *instr; - struct ir3_register *src; - - /* bary.f dst, #inloc, r0.x */ - instr = instr_create(ctx, 2, OPC_BARY_F); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc; - src = ir3_reg_create(instr, 0, IR3_REG_SSA); - src->wrmask = 0x3; - src->instr = ctx->frag_pos; - - return instr; -} - -/* TGSI_SEMANTIC_POSITION - * """""""""""""""""""""" - * - * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that - * fragment shader input contains the fragment's window position. The X - * component starts at zero and always increases from left to right. - * The Y component starts at zero and always increases but Y=0 may either - * indicate the top of the window or the bottom depending on the fragment - * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN). - * The Z coordinate ranges from 0 to 1 to represent depth from the front - * to the back of the Z buffer. The W component contains the reciprocol - * of the interpolated vertex position W component. - */ -static struct ir3_instruction * -decl_in_frag_coord(struct fd3_compile_context *ctx, unsigned regid, - unsigned j) -{ - struct ir3_instruction *instr, *src; - - compile_assert(ctx, !ctx->frag_coord[j]); - - ctx->frag_coord[j] = create_input(ctx->block, NULL, 0); - - - switch (j) { - case 0: /* .x */ - case 1: /* .y */ - /* for frag_coord, we get unsigned values.. we need - * to subtract (integer) 8 and divide by 16 (right- - * shift by 4) then convert to float: - */ - - /* add.s tmp, src, -8 */ - instr = instr_create(ctx, 2, OPC_ADD_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j]; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8; - src = instr; - - /* shr.b tmp, tmp, 4 */ - instr = instr_create(ctx, 2, OPC_SHR_B); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4; - src = instr; - - /* mov.u32f32 dst, tmp */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = TYPE_U32; - instr->cat1.dst_type = TYPE_F32; - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - - break; - case 2: /* .z */ - case 3: /* .w */ - /* seems that we can use these as-is: */ - instr = ctx->frag_coord[j]; - break; - default: - compile_error(ctx, "invalid channel\n"); - instr = create_immed(ctx, 0.0); - break; - } - - return instr; -} - -/* TGSI_SEMANTIC_FACE - * """""""""""""""""" - * - * This label applies to fragment shader inputs only and indicates that - * the register contains front/back-face information of the form (F, 0, - * 0, 1). The first component will be positive when the fragment belongs - * to a front-facing polygon, and negative when the fragment belongs to a - * back-facing polygon. - */ -static struct ir3_instruction * -decl_in_frag_face(struct fd3_compile_context *ctx, unsigned regid, - unsigned j) -{ - struct ir3_instruction *instr, *src; - - switch (j) { - case 0: /* .x */ - compile_assert(ctx, !ctx->frag_face); - - ctx->frag_face = create_input(ctx->block, NULL, 0); - - /* for faceness, we always get -1 or 0 (int).. but TGSI expects - * positive vs negative float.. and piglit further seems to - * expect -1.0 or 1.0: - * - * mul.s tmp, hr0.x, 2 - * add.s tmp, tmp, 1 - * mov.s16f32, dst, tmp - * - */ - - instr = instr_create(ctx, 2, OPC_MUL_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; - src = instr; - - instr = instr_create(ctx, 2, OPC_ADD_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; - src = instr; - - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = TYPE_S32; - instr->cat1.dst_type = TYPE_F32; - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - - break; - case 1: /* .y */ - case 2: /* .z */ - instr = create_immed(ctx, 0.0); - break; - case 3: /* .w */ - instr = create_immed(ctx, 1.0); - break; - default: - compile_error(ctx, "invalid channel\n"); - instr = create_immed(ctx, 0.0); - break; - } - - return instr; -} - -static void +static int decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_variant *so = ctx->so; - unsigned name = decl->Semantic.Name; - unsigned i; + struct fd3_shader_stateobj *so = ctx->so; + unsigned base = ctx->base_reg[TGSI_FILE_INPUT]; + unsigned i, flags = 0; + int nop = 0; - /* I don't think we should get frag shader input without - * semantic info? Otherwise how do inputs get linked to - * vert outputs? - */ - compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) || - decl->Declaration.Semantic); + if (ctx->so->half_precision) + flags |= IR3_REG_HALF; for (i = decl->Range.First; i <= decl->Range.Last; i++) { unsigned n = so->inputs_count++; - unsigned r = regid(i, 0); - unsigned ncomp, j; + unsigned r = regid(i + base, 0); + unsigned ncomp; - /* we'll figure out the actual components used after scheduling */ + /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */ ncomp = 4; - DBG("decl in -> r%d", i); + DBG("decl in -> r%d", i + base); // XXX - compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); - - so->inputs[n].semantic = decl_semantic(&decl->Semantic); so->inputs[n].compmask = (1 << ncomp) - 1; so->inputs[n].regid = r; so->inputs[n].inloc = ctx->next_inloc; + ctx->next_inloc += ncomp; - for (j = 0; j < ncomp; j++) { - struct ir3_instruction *instr = NULL; - - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - /* for fragment shaders, POSITION and FACE are handled - * specially, not using normal varying / bary.f - */ - if (name == TGSI_SEMANTIC_POSITION) { - so->inputs[n].bary = false; - so->frag_coord = true; - instr = decl_in_frag_coord(ctx, r + j, j); - } else if (name == TGSI_SEMANTIC_FACE) { - so->inputs[n].bary = false; - so->frag_face = true; - instr = decl_in_frag_face(ctx, r + j, j); - } else { - so->inputs[n].bary = true; - instr = decl_in_frag_bary(ctx, r + j, j, - so->inputs[n].inloc + j - 8); - } - } else { - instr = create_input(ctx->block, NULL, (i * 4) + j); - } + so->total_in += ncomp; - ctx->block->inputs[(i * 4) + j] = instr; - } + /* for frag shaders, we need to generate the corresponding bary instr: */ + if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + struct ir3_instruction *instr; - if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) { - ctx->next_inloc += ncomp; - so->total_in += ncomp; + instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F); + instr->repeat = ncomp - 1; + + /* dst register: */ + ctx->last_input = ir3_reg_create(instr, r, flags); + + /* input position: */ + ir3_reg_create(instr, 0, IR3_REG_IMMED | IR3_REG_R)->iim_val = + so->inputs[n].inloc - 8; + + /* input base (always r0.x): */ + ir3_reg_create(instr, regid(0,0), 0); + + nop = 6; } } + + return nop; } static void decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_variant *so = ctx->so; - unsigned comp = 0; + struct fd3_shader_stateobj *so = ctx->so; + unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT]; unsigned name = decl->Semantic.Name; unsigned i; - compile_assert(ctx, decl->Declaration.Semantic); + assert(decl->Declaration.Semantic); // TODO is this ever not true? - DBG("decl out[%d] -> r%d", name, decl->Range.First); + DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX if (ctx->type == TGSI_PROCESSOR_VERTEX) { switch (name) { case TGSI_SEMANTIC_POSITION: - so->writes_pos = true; + so->pos_regid = regid(decl->Range.First + base, 0); break; case TGSI_SEMANTIC_PSIZE: - so->writes_psize = true; + so->psize_regid = regid(decl->Range.First + base, 0); break; case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_BCOLOR: case TGSI_SEMANTIC_GENERIC: case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_TEXCOORD: + for (i = decl->Range.First; i <= decl->Range.Last; i++) + so->outputs[so->outputs_count++].regid = regid(i + base, 0); break; default: - compile_error(ctx, "unknown VS semantic name: %s\n", + DBG("unknown VS semantic name: %s", tgsi_semantic_names[name]); + assert(0); } } else { switch (name) { - case TGSI_SEMANTIC_POSITION: - comp = 2; /* tgsi will write to .z component */ - so->writes_pos = true; - break; case TGSI_SEMANTIC_COLOR: + so->color_regid = regid(decl->Range.First + base, 0); break; default: - compile_error(ctx, "unknown FS semantic name: %s\n", + DBG("unknown VS semantic name: %s", tgsi_semantic_names[name]); + assert(0); } } - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->outputs_count++; - unsigned ncomp, j; - - ncomp = 4; - - compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); - - so->outputs[n].semantic = decl_semantic(&decl->Semantic); - so->outputs[n].regid = regid(i, comp); - - /* avoid undefined outputs, stick a dummy mov from imm{0.0}, - * which if the output is actually assigned will be over- - * written - */ - for (j = 0; j < ncomp; j++) - ctx->block->outputs[(i * 4) + j] = create_immed(ctx, 0.0); - } } -/* from TGSI perspective, we actually have inputs. But most of the "inputs" - * for a fragment shader are just bary.f instructions. The *actual* inputs - * from the hw perspective are the frag_pos and optionally frag_coord and - * frag_face. - */ static void -fixup_frag_inputs(struct fd3_compile_context *ctx) +decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_variant *so = ctx->so; - struct ir3_block *block = ctx->block; - struct ir3_instruction **inputs; - struct ir3_instruction *instr; - int n, regid = 0; - - block->ninputs = 0; - - n = 4; /* always have frag_pos */ - n += COND(so->frag_face, 4); - n += COND(so->frag_coord, 4); - - inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); - - if (so->frag_face) { - /* this ultimately gets assigned to hr0.x so doesn't conflict - * with frag_coord/frag_pos.. - */ - inputs[block->ninputs++] = ctx->frag_face; - ctx->frag_face->regs[0]->num = 0; - - /* remaining channels not used, but let's avoid confusing - * other parts that expect inputs to come in groups of vec4 - */ - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; - } - - /* since we don't know where to set the regid for frag_coord, - * we have to use r0.x for it. But we don't want to *always* - * use r1.x for frag_pos as that could increase the register - * footprint on simple shaders: - */ - if (so->frag_coord) { - ctx->frag_coord[0]->regs[0]->num = regid++; - ctx->frag_coord[1]->regs[0]->num = regid++; - ctx->frag_coord[2]->regs[0]->num = regid++; - ctx->frag_coord[3]->regs[0]->num = regid++; - - inputs[block->ninputs++] = ctx->frag_coord[0]; - inputs[block->ninputs++] = ctx->frag_coord[1]; - inputs[block->ninputs++] = ctx->frag_coord[2]; - inputs[block->ninputs++] = ctx->frag_coord[3]; - } - - /* we always have frag_pos: */ - so->pos_regid = regid; - - /* r0.x */ - instr = create_input(block, NULL, block->ninputs); - instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; - ctx->frag_pos->regs[1]->instr = instr; - - /* r0.y */ - instr = create_input(block, NULL, block->ninputs); - instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; - ctx->frag_pos->regs[2]->instr = instr; - - block->inputs = inputs; + ctx->so->samplers_count++; } static void compile_instructions(struct fd3_compile_context *ctx) { - push_block(ctx); - - /* for fragment shader, we have a single input register (usually - * r0.xy) which is used as the base for bary.f varying fetch instrs: - */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, -1, OPC_META_FI); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ - ctx->frag_pos = instr; - } + struct ir3_shader *ir = ctx->ir; + int nop = 0; while (!tgsi_parse_end_of_tokens(&ctx->parser)) { tgsi_parse_token(&ctx->parser); @@ -2446,7 +1191,9 @@ compile_instructions(struct fd3_compile_context *ctx) if (decl->Declaration.File == TGSI_FILE_OUTPUT) { decl_out(ctx, decl); } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - decl_in(ctx, decl); + nop = decl_in(ctx, decl); + } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + decl_samp(ctx, decl); } break; } @@ -2458,7 +1205,6 @@ compile_instructions(struct fd3_compile_context *ctx) struct tgsi_full_immediate *imm = &ctx->parser.FullToken.FullImmediate; unsigned n = ctx->so->immediates_count++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates)); memcpy(ctx->so->immediates[n].val, imm->u, 16); break; } @@ -2468,196 +1214,55 @@ compile_instructions(struct fd3_compile_context *ctx) unsigned opc = inst->Instruction.Opcode; const struct instr_translater *t = &translaters[opc]; + if (nop) { + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = nop - 1; + nop = 0; + } + if (t->fxn) { t->fxn(t, ctx, inst); ctx->num_internal_temps = 0; } else { - compile_error(ctx, "unknown TGSI opc: %s\n", + debug_printf("unknown TGSI opc: %s\n", tgsi_get_opcode_name(opc)); + tgsi_dump(ctx->tokens, 0); + assert(0); } - switch (inst->Instruction.Saturate) { - case TGSI_SAT_ZERO_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(0.0), fui(1.0)); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(-1.0), fui(1.0)); - break; - } - - instr_finish(ctx); - break; } default: break; } } -} -static void -compile_dump(struct fd3_compile_context *ctx) -{ - const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag"; - static unsigned n = 0; - char fname[16]; - FILE *f; - snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++); - f = fopen(fname, "w"); - if (!f) - return; - ir3_block_depth(ctx->block); - ir3_shader_dump(ctx->ir, name, ctx->block, f); - fclose(f); + if (ir->instrs_count > 0) + ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; + + if (ctx->last_input) + ctx->last_input->flags |= IR3_REG_EI; } int -fd3_compile_shader(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, struct fd3_shader_key key) +fd3_compile_shader(struct fd3_shader_stateobj *so, + const struct tgsi_token *tokens) { struct fd3_compile_context ctx; - struct ir3_block *block; - struct ir3_instruction **inputs; - unsigned i, j, actual_in; - int ret = 0; assert(!so->ir); so->ir = ir3_shader_create(); - assert(so->ir); + so->color_regid = regid(63,0); + so->pos_regid = regid(63,0); + so->psize_regid = regid(63,0); - if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) { - ret = -1; - goto out; - } + if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) + return -1; compile_instructions(&ctx); - block = ctx.block; - - /* keep track of the inputs from TGSI perspective.. */ - inputs = block->inputs; - - /* but fixup actual inputs for frag shader: */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT) - fixup_frag_inputs(&ctx); - - /* at this point, for binning pass, throw away unneeded outputs: */ - if (key.binning_pass) { - for (i = 0, j = 0; i < so->outputs_count; i++) { - unsigned name = sem2name(so->outputs[i].semantic); - unsigned idx = sem2name(so->outputs[i].semantic); - - /* throw away everything but first position/psize */ - if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) || - (name == TGSI_SEMANTIC_PSIZE))) { - if (i != j) { - so->outputs[j] = so->outputs[i]; - block->outputs[(j*4)+0] = block->outputs[(i*4)+0]; - block->outputs[(j*4)+1] = block->outputs[(i*4)+1]; - block->outputs[(j*4)+2] = block->outputs[(i*4)+2]; - block->outputs[(j*4)+3] = block->outputs[(i*4)+3]; - } - j++; - } - } - so->outputs_count = j; - block->noutputs = j * 4; - } - - /* at this point, we want the kill's in the outputs array too, - * so that they get scheduled (since they have no dst).. we've - * already ensured that the array is big enough in push_block(): - */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { - for (i = 0; i < ctx.kill_count; i++) - block->outputs[block->noutputs++] = ctx.kill[i]; - } - - if (fd_mesa_debug & FD_DBG_OPTDUMP) - compile_dump(&ctx); - - ret = ir3_block_flatten(block); - if (ret < 0) - goto out; - if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP)) - compile_dump(&ctx); - - ir3_block_cp(block); - - if (fd_mesa_debug & FD_DBG_OPTDUMP) - compile_dump(&ctx); - - ir3_block_depth(block); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER DEPTH:\n"); - ir3_dump_instr_list(block->head); - } - - ir3_block_sched(block); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER SCHED:\n"); - ir3_dump_instr_list(block->head); - } - - ret = ir3_block_ra(block, so->type, key.half_precision, - so->frag_coord, so->frag_face, &so->has_samp); - if (ret) - goto out; - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER RA:\n"); - ir3_dump_instr_list(block->head); - } - - /* fixup input/outputs: */ - for (i = 0; i < so->outputs_count; i++) { - so->outputs[i].regid = block->outputs[i*4]->regs[0]->num; - /* preserve hack for depth output.. tgsi writes depth to .z, - * but what we give the hw is the scalar register: - */ - if ((ctx.type == TGSI_PROCESSOR_FRAGMENT) && - (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION)) - so->outputs[i].regid += 2; - } - /* Note that some or all channels of an input may be unused: */ - actual_in = 0; - for (i = 0; i < so->inputs_count; i++) { - unsigned j, regid = ~0, compmask = 0; - so->inputs[i].ncomp = 0; - for (j = 0; j < 4; j++) { - struct ir3_instruction *in = inputs[(i*4) + j]; - if (in) { - compmask |= (1 << j); - regid = in->regs[0]->num - j; - actual_in++; - so->inputs[i].ncomp++; - } - } - so->inputs[i].regid = regid; - so->inputs[i].compmask = compmask; - } - - /* fragment shader always gets full vec4's even if it doesn't - * fetch all components, but vertex shader we need to update - * with the actual number of components fetch, otherwise thing - * will hang due to mismaptch between VFD_DECODE's and - * TOTALATTRTOVS - */ - if (so->type == SHADER_VERTEX) - so->total_in = actual_in; - -out: - if (ret) { - ir3_shader_destroy(so->ir); - so->ir = NULL; - } compile_free(&ctx); - return ret; + return 0; } diff --git a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h index a53bb3ee9..1116f598a 100644 --- a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h +++ b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h @@ -32,12 +32,7 @@ #include "fd3_program.h" #include "fd3_util.h" - -int fd3_compile_shader(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, - struct fd3_shader_key key); -int fd3_compile_shader_old(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, - struct fd3_shader_key key); +int fd3_compile_shader(struct fd3_shader_stateobj *so, + const struct tgsi_token *tokens); #endif /* FD3_COMPILER_H_ */ diff --git a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h index a79998ef5..464a7e9d7 100644 --- a/dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h +++ b/dist/Mesa/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h @@ -190,22 +190,6 @@ typedef enum { OPC_LDC_4 = 30, OPC_LDLV = 31, - /* meta instructions (category -1): */ - /* placeholder instr to mark inputs/outputs: */ - OPC_META_INPUT = 0, - OPC_META_OUTPUT = 1, - /* The "fan-in" and "fan-out" instructions are used for keeping - * track of instructions that write to multiple dst registers - * (fan-out) like texture sample instructions, or read multiple - * consecutive scalar registers (fan-in) (bary.f, texture samp) - */ - OPC_META_FO = 2, - OPC_META_FI = 3, - /* branches/flow control */ - OPC_META_FLOW = 4, - OPC_META_PHI = 5, - - } opc_t; typedef enum { @@ -248,16 +232,13 @@ typedef union PACKED { /* normal gpr or const src register: */ struct PACKED { uint32_t comp : 2; - uint32_t num : 10; + uint32_t num : 9; }; /* for immediate val: */ int32_t iim_val : 11; /* to make compiler happy: */ uint32_t dummy32; - uint32_t dummy10 : 10; uint32_t dummy11 : 11; - uint32_t dummy12 : 12; - uint32_t dummy13 : 13; uint32_t dummy8 : 8; } reg_t; @@ -295,16 +276,12 @@ typedef struct PACKED { /* for normal src register: */ struct PACKED { uint32_t src : 11; - /* at least low bit of pad must be zero or it will - * look like a address relative src - */ uint32_t pad : 21; }; /* for address relative: */ struct PACKED { int32_t off : 10; - uint32_t src_rel_c : 1; - uint32_t src_rel : 1; + uint32_t must_be_3 : 2; uint32_t unknown : 20; }; /* for immediate: */ @@ -317,7 +294,7 @@ typedef struct PACKED { uint32_t repeat : 3; uint32_t src_r : 1; uint32_t ss : 1; - uint32_t ul : 1; + uint32_t src_rel : 1; uint32_t dst_type : 3; uint32_t dst_rel : 1; uint32_t src_type : 3; @@ -333,49 +310,19 @@ typedef struct PACKED { typedef struct PACKED { /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src1_im : 1; /* immediate */ - uint32_t src1_neg : 1; /* negate */ - uint32_t src1_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; /* relative-const */ - uint32_t src1_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; /* const */ - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src2 : 11; - uint32_t must_be_zero2: 2; - uint32_t src2_im : 1; /* immediate */ - uint32_t src2_neg : 1; /* negate */ - uint32_t src2_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src2 : 10; - uint32_t src2_c : 1; /* relative-const */ - uint32_t src2_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src2 : 12; - uint32_t src2_c : 1; /* const */ - uint32_t dummy : 3; - } c2; - }; + uint32_t src1 : 11; + uint32_t src1_rel : 1; /* relative address */ + uint32_t src1_c : 1; /* const */ + uint32_t src1_im : 1; /* immediate */ + uint32_t src1_neg : 1; /* negate */ + uint32_t src1_abs : 1; /* absolute value */ + + uint32_t src2 : 11; + uint32_t src2_rel : 1; /* relative address */ + uint32_t src2_c : 1; /* const */ + uint32_t src2_im : 1; /* immediate */ + uint32_t src2_neg : 1; /* negate */ + uint32_t src2_abs : 1; /* absolute value */ /* dword1: */ uint32_t dst : 8; @@ -396,49 +343,18 @@ typedef struct PACKED { typedef struct PACKED { /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src2_c : 1; - uint32_t src1_neg : 1; - uint32_t src2_r : 1; - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; - uint32_t src1_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src3 : 11; - uint32_t must_be_zero2: 2; - uint32_t src3_r : 1; - uint32_t src2_neg : 1; - uint32_t src3_neg : 1; - }; - struct PACKED { - uint32_t src3 : 10; - uint32_t src3_c : 1; - uint32_t src3_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src3 : 12; - uint32_t src3_c : 1; - uint32_t dummy : 3; - } c2; - }; + uint32_t src1 : 11; + uint32_t src1_rel : 1; + uint32_t src1_c : 1; + uint32_t src2_c : 1; + uint32_t src1_neg : 1; + uint32_t src2_r : 1; + uint32_t src3 : 11; + uint32_t src3_rel : 1; + uint32_t src3_c : 1; + uint32_t src3_r : 1; + uint32_t src2_neg : 1; + uint32_t src3_neg : 1; /* dword1: */ uint32_t dst : 8; @@ -454,46 +370,14 @@ typedef struct PACKED { uint32_t opc_cat : 3; } instr_cat3_t; -static inline bool instr_cat3_full(instr_cat3_t *cat3) -{ - switch (cat3->opc) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - return false; - default: - return true; - } -} - typedef struct PACKED { /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src : 11; - uint32_t must_be_zero1: 2; - uint32_t src_im : 1; /* immediate */ - uint32_t src_neg : 1; /* negate */ - uint32_t src_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src : 10; - uint32_t src_c : 1; /* relative-const */ - uint32_t src_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel; - struct PACKED { - uint32_t src : 12; - uint32_t src_c : 1; /* const */ - uint32_t dummy : 3; - } c; - }; + uint32_t src : 11; + uint32_t src_rel : 1; + uint32_t src_c : 1; + uint32_t src_im : 1; + uint32_t src_neg : 1; + uint32_t src_abs : 1; uint32_t dummy1 : 16; /* seem to be ignored */ /* dword1: */ @@ -645,35 +529,4 @@ typedef union PACKED { }; } instr_t; -static inline uint32_t instr_opc(instr_t *instr) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.opc; - case 1: return 0; - case 2: return instr->cat2.opc; - case 3: return instr->cat3.opc; - case 4: return instr->cat4.opc; - case 5: return instr->cat5.opc; - case 6: return instr->cat6.opc; - default: return 0; - } -} - -static inline bool is_mad(opc_t opc) -{ - switch (opc) { - case OPC_MAD_U16: - case OPC_MADSH_U16: - case OPC_MAD_S16: - case OPC_MADSH_M16: - case OPC_MAD_U24: - case OPC_MAD_S24: - case OPC_MAD_F16: - case OPC_MAD_F32: - return true; - default: - return false; - } -} - #endif /* INSTR_A3XX_H_ */ diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c index 90063761d..5b120e77d 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.c @@ -25,11 +25,9 @@ * Chia-I Wu <olv@lunarg.com> */ -#include "util/u_prim.h" #include "intel_winsys.h" #include "ilo_3d_pipeline.h" -#include "ilo_blit.h" #include "ilo_context.h" #include "ilo_cp.h" #include "ilo_query.h" @@ -47,7 +45,8 @@ process_query_for_occlusion_counter(struct ilo_3d *hw3d, /* in pairs */ assert(q->reg_read % 2 == 0); - vals = intel_bo_map(q->bo, false); + intel_bo_map(q->bo, false); + vals = intel_bo_get_virtual(q->bo); for (i = 1; i < q->reg_read; i += 2) depth_count += vals[i] - vals[i - 1]; intel_bo_unmap(q->bo); @@ -71,7 +70,8 @@ process_query_for_timestamp(struct ilo_3d *hw3d, struct ilo_query *q) assert(q->reg_read == 1); - vals = intel_bo_map(q->bo, false); + intel_bo_map(q->bo, false); + vals = intel_bo_get_virtual(q->bo); timestamp = vals[0]; intel_bo_unmap(q->bo); @@ -88,7 +88,8 @@ process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q) /* in pairs */ assert(q->reg_read % 2 == 0); - vals = intel_bo_map(q->bo, false); + intel_bo_map(q->bo, false); + vals = intel_bo_get_virtual(q->bo); for (i = 1; i < q->reg_read; i += 2) elapsed += vals[i] - vals[i - 1]; @@ -101,41 +102,6 @@ process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q) } static void -process_query_for_pipeline_statistics(struct ilo_3d *hw3d, - struct ilo_query *q) -{ - const uint64_t *vals; - int i; - - assert(q->reg_read % 22 == 0); - - vals = intel_bo_map(q->bo, false); - - for (i = 0; i < q->reg_read; i += 22) { - struct pipe_query_data_pipeline_statistics *stats = - &q->data.pipeline_statistics; - const uint64_t *begin = vals + i; - const uint64_t *end = begin + 11; - - stats->ia_vertices += end[0] - begin[0]; - stats->ia_primitives += end[1] - begin[1]; - stats->vs_invocations += end[2] - begin[2]; - stats->gs_invocations += end[3] - begin[3]; - stats->gs_primitives += end[4] - begin[4]; - stats->c_invocations += end[5] - begin[5]; - stats->c_primitives += end[6] - begin[6]; - stats->ps_invocations += end[7] - begin[7]; - stats->hs_invocations += end[8] - begin[8]; - stats->ds_invocations += end[9] - begin[9]; - stats->cs_invocations += end[10] - begin[10]; - } - - intel_bo_unmap(q->bo); - - q->reg_read = 0; -} - -static void ilo_3d_resume_queries(struct ilo_3d *hw3d) { struct ilo_query *q; @@ -159,17 +125,6 @@ ilo_3d_resume_queries(struct ilo_3d *hw3d) ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline, q->bo, q->reg_read++); } - - /* resume pipeline statistics queries */ - LIST_FOR_EACH_ENTRY(q, &hw3d->pipeline_statistics_queries, list) { - /* accumulate the result if the bo is alreay full */ - if (q->reg_read >= q->reg_total) - process_query_for_pipeline_statistics(hw3d, q); - - ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline, - q->bo, q->reg_read); - q->reg_read += 11; - } } static void @@ -190,14 +145,6 @@ ilo_3d_pause_queries(struct ilo_3d *hw3d) ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline, q->bo, q->reg_read++); } - - /* pause pipeline statistics queries */ - LIST_FOR_EACH_ENTRY(q, &hw3d->pipeline_statistics_queries, list) { - assert(q->reg_read < q->reg_total); - ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline, - q->bo, q->reg_read); - q->reg_read += 11; - } } static void @@ -208,10 +155,10 @@ ilo_3d_release_render_ring(struct ilo_cp *cp, void *data) ilo_3d_pause_queries(hw3d); } -void +static void ilo_3d_own_render_ring(struct ilo_3d *hw3d) { - ilo_cp_set_ring(hw3d->cp, INTEL_RING_RENDER); + ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER); if (ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve)) ilo_3d_resume_queries(hw3d); @@ -273,25 +220,6 @@ ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q) q->data.u64 = 0; list_add(&q->list, &hw3d->prim_emitted_queries); break; - case PIPE_QUERY_PIPELINE_STATISTICS: - /* reserve some space for pausing the query */ - q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline, - ILO_3D_PIPELINE_WRITE_STATISTICS, NULL); - hw3d->owner_reserve += q->reg_cmd_size; - ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve); - - memset(&q->data.pipeline_statistics, 0, - sizeof(q->data.pipeline_statistics)); - - if (ilo_query_alloc_bo(q, 11 * 2, -1, hw3d->cp->winsys)) { - /* XXX we should check the aperture size */ - ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline, - q->bo, q->reg_read); - q->reg_read += 11; - - list_add(&q->list, &hw3d->pipeline_statistics_queries); - } - break; default: assert(!"unknown query type"); break; @@ -339,16 +267,6 @@ ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q) case PIPE_QUERY_PRIMITIVES_EMITTED: list_del(&q->list); break; - case PIPE_QUERY_PIPELINE_STATISTICS: - list_del(&q->list); - - assert(q->reg_read + 11 <= q->reg_total); - hw3d->owner_reserve -= q->reg_cmd_size; - ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve); - ilo_3d_pipeline_emit_write_statistics(hw3d->pipeline, - q->bo, q->reg_read); - q->reg_read += 11; - break; default: assert(!"unknown query type"); break; @@ -379,10 +297,6 @@ ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q) case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: break; - case PIPE_QUERY_PIPELINE_STATISTICS: - if (q->bo) - process_query_for_pipeline_statistics(hw3d, q); - break; default: assert(!"unknown query type"); break; @@ -402,6 +316,10 @@ ilo_3d_cp_flushed(struct ilo_3d *hw3d) ilo_3d_pipeline_invalidate(hw3d->pipeline, ILO_3D_PIPELINE_INVALIDATE_BATCH_BO | ILO_3D_PIPELINE_INVALIDATE_STATE_BO); + if (!hw3d->cp->render_ctx) { + ilo_3d_pipeline_invalidate(hw3d->pipeline, + ILO_3D_PIPELINE_INVALIDATE_HW); + } hw3d->new_batch = true; } @@ -428,7 +346,6 @@ ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev) list_inithead(&hw3d->time_elapsed_queries); list_inithead(&hw3d->prim_generated_queries); list_inithead(&hw3d->prim_emitted_queries); - list_inithead(&hw3d->pipeline_statistics_queries); hw3d->pipeline = ilo_3d_pipeline_create(cp, dev); if (!hw3d->pipeline) { @@ -484,7 +401,7 @@ draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo, } if (max_len > ilo_cp_space(hw3d->cp)) { - ilo_cp_flush(hw3d->cp, "out of space"); + ilo_cp_flush(hw3d->cp); need_flush = false; assert(max_len <= ilo_cp_space(hw3d->cp)); } @@ -746,7 +663,7 @@ upload_shaders(struct ilo_3d *hw3d, struct ilo_shader_cache *shc) intel_bo_unreference(hw3d->kernel.bo); hw3d->kernel.bo = intel_winsys_alloc_buffer(hw3d->cp->winsys, - "kernel bo", new_size, INTEL_DOMAIN_CPU); + "kernel bo", new_size, 0); if (!hw3d->kernel.bo) { ilo_err("failed to allocate kernel bo\n"); return false; @@ -784,21 +701,6 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct ilo_3d *hw3d = ilo->hw3d; int prim_generated, prim_emitted; - if (ilo_debug & ILO_DEBUG_DRAW) { - if (info->indexed) { - ilo_printf("indexed draw %s: " - "index start %d, count %d, vertex range [%d, %d]\n", - u_prim_name(info->mode), info->start, info->count, - info->min_index, info->max_index); - } - else { - ilo_printf("draw %s: vertex start %d, count %d\n", - u_prim_name(info->mode), info->start, info->count); - } - - ilo_dump_dirty_flags(ilo->dirty); - } - if (!ilo_3d_pass_render_condition(ilo)) return; @@ -819,8 +721,6 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (!upload_shaders(hw3d, ilo->shader_cache)) return; - ilo_blit_resolve_framebuffer(ilo); - /* If draw_vbo ever fails, return immediately. */ if (!draw_vbo(hw3d, ilo, &prim_generated, &prim_emitted)) return; @@ -859,14 +759,14 @@ ilo_texture_barrier(struct pipe_context *pipe) struct ilo_context *ilo = ilo_context(pipe); struct ilo_3d *hw3d = ilo->hw3d; - if (ilo->cp->ring != INTEL_RING_RENDER) + if (ilo->cp->ring != ILO_CP_RING_RENDER) return; ilo_3d_pipeline_emit_flush(hw3d->pipeline); /* don't know why */ if (ilo->dev->gen >= ILO_GEN(7)) - ilo_cp_flush(hw3d->cp, "texture barrier"); + ilo_cp_flush(hw3d->cp); } static void diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h index 369594aff..f73b8177a 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d.h @@ -60,7 +60,6 @@ struct ilo_3d { struct list_head time_elapsed_queries; struct list_head prim_generated_queries; struct list_head prim_emitted_queries; - struct list_head pipeline_statistics_queries; struct ilo_3d_pipeline *pipeline; }; @@ -75,9 +74,6 @@ void ilo_3d_cp_flushed(struct ilo_3d *hw3d); void -ilo_3d_own_render_ring(struct ilo_3d *hw3d); - -void ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q); void diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c index 1f18bbbed..dee3e0ce5 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c @@ -28,7 +28,6 @@ #include "util/u_prim.h" #include "intel_winsys.h" -#include "ilo_blitter.h" #include "ilo_context.h" #include "ilo_cp.h" #include "ilo_state.h" @@ -82,7 +81,6 @@ ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev) ilo_3d_pipeline_init_gen6(p); break; case ILO_GEN(7): - case ILO_GEN(7.5): ilo_3d_pipeline_init_gen7(p); break; default: @@ -95,7 +93,7 @@ ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev) p->invalidate_flags = ILO_3D_PIPELINE_INVALIDATE_ALL; p->workaround_bo = intel_winsys_alloc_buffer(p->cp->winsys, - "PIPE_CONTROL workaround", 4096, INTEL_DOMAIN_INSTRUCTION); + "PIPE_CONTROL workaround", 4096, 0); if (!p->workaround_bo) { ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n"); FREE(p); @@ -173,6 +171,7 @@ ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p, while (true) { struct ilo_cp_jmp_buf jmp; + int err; /* we will rewind if aperture check below fails */ ilo_cp_setjmp(p->cp, &jmp); @@ -184,7 +183,8 @@ ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p, p->emit_draw(p, ilo); ilo_cp_assert_no_implicit_flush(p->cp, false); - if (intel_winsys_can_submit_bo(ilo->winsys, &p->cp->bo, 1)) { + err = intel_winsys_check_aperture_space(ilo->winsys, &p->cp->bo, 1); + if (!err) { success = true; break; } @@ -198,7 +198,7 @@ ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p, } else { /* flush and try again */ - ilo_cp_flush(p->cp, "out of aperture"); + ilo_cp_flush(p->cp); } } @@ -236,7 +236,7 @@ ilo_3d_pipeline_emit_flush(struct ilo_3d_pipeline *p) } /** - * Emit PIPE_CONTROL with GEN6_PIPE_CONTROL_WRITE_TIMESTAMP post-sync op. + * Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_TIMESTAMP post-sync op. */ void ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p, @@ -247,7 +247,7 @@ ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p, } /** - * Emit PIPE_CONTROL with GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT post-sync op. + * Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_DEPTH_COUNT post-sync op. */ void ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p, @@ -257,56 +257,6 @@ ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p, p->emit_write_depth_count(p, bo, index); } -/** - * Emit MI_STORE_REGISTER_MEM to store statistics registers. - */ -void -ilo_3d_pipeline_emit_write_statistics(struct ilo_3d_pipeline *p, - struct intel_bo *bo, int index) -{ - handle_invalid_batch_bo(p, true); - p->emit_write_statistics(p, bo, index); -} - -void -ilo_3d_pipeline_emit_rectlist(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter) -{ - const int max_len = ilo_3d_pipeline_estimate_size(p, - ILO_3D_PIPELINE_RECTLIST, blitter); - - if (max_len > ilo_cp_space(p->cp)) - ilo_cp_flush(p->cp, "out of space"); - - while (true) { - struct ilo_cp_jmp_buf jmp; - - /* we will rewind if aperture check below fails */ - ilo_cp_setjmp(p->cp, &jmp); - - handle_invalid_batch_bo(p, false); - - ilo_cp_assert_no_implicit_flush(p->cp, true); - p->emit_rectlist(p, blitter); - ilo_cp_assert_no_implicit_flush(p->cp, false); - - if (!intel_winsys_can_submit_bo(blitter->ilo->winsys, &p->cp->bo, 1)) { - /* rewind */ - ilo_cp_longjmp(p->cp, &jmp); - - /* flush and try again */ - if (!ilo_cp_empty(p->cp)) { - ilo_cp_flush(p->cp, "out of aperture"); - continue; - } - } - - break; - } - - ilo_3d_pipeline_invalidate(p, ILO_3D_PIPELINE_INVALIDATE_HW); -} - void ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p, unsigned sample_count, diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h index 90c626e52..f26fa83c2 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h @@ -29,10 +29,11 @@ #define ILO_3D_PIPELINE_H #include "ilo_common.h" -#include "ilo_gpe.h" +#include "ilo_context.h" +#include "ilo_gpe_gen6.h" +#include "ilo_gpe_gen7.h" struct intel_bo; -struct ilo_blitter; struct ilo_cp; struct ilo_context; @@ -50,8 +51,6 @@ enum ilo_3d_pipeline_action { ILO_3D_PIPELINE_FLUSH, ILO_3D_PIPELINE_WRITE_TIMESTAMP, ILO_3D_PIPELINE_WRITE_DEPTH_COUNT, - ILO_3D_PIPELINE_WRITE_STATISTICS, - ILO_3D_PIPELINE_RECTLIST, }; /** @@ -84,11 +83,103 @@ struct ilo_3d_pipeline { void (*emit_write_depth_count)(struct ilo_3d_pipeline *pipeline, struct intel_bo *bo, int index); - void (*emit_write_statistics)(struct ilo_3d_pipeline *pipeline, - struct intel_bo *bo, int index); - - void (*emit_rectlist)(struct ilo_3d_pipeline *pipeline, - const struct ilo_blitter *blitter); + /** + * all GPE functions of all GENs + */ +#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name gen6_ ## name + GEN6_EMIT(STATE_BASE_ADDRESS); + GEN6_EMIT(STATE_SIP); + GEN6_EMIT(PIPELINE_SELECT); + GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS); + GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS); + GEN6_EMIT(3DSTATE_URB); + GEN6_EMIT(3DSTATE_VERTEX_BUFFERS); + GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS); + GEN6_EMIT(3DSTATE_INDEX_BUFFER); + GEN6_EMIT(3DSTATE_VF_STATISTICS); + GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS); + GEN6_EMIT(3DSTATE_CC_STATE_POINTERS); + GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS); + GEN6_EMIT(3DSTATE_VS); + GEN6_EMIT(3DSTATE_GS); + GEN6_EMIT(3DSTATE_CLIP); + GEN6_EMIT(3DSTATE_SF); + GEN6_EMIT(3DSTATE_WM); + GEN6_EMIT(3DSTATE_CONSTANT_VS); + GEN6_EMIT(3DSTATE_CONSTANT_GS); + GEN6_EMIT(3DSTATE_CONSTANT_PS); + GEN6_EMIT(3DSTATE_SAMPLE_MASK); + GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE); + GEN6_EMIT(3DSTATE_DEPTH_BUFFER); + GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET); + GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN); + GEN6_EMIT(3DSTATE_LINE_STIPPLE); + GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS); + GEN6_EMIT(3DSTATE_GS_SVB_INDEX); + GEN6_EMIT(3DSTATE_MULTISAMPLE); + GEN6_EMIT(3DSTATE_STENCIL_BUFFER); + GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER); + GEN6_EMIT(3DSTATE_CLEAR_PARAMS); + GEN6_EMIT(PIPE_CONTROL); + GEN6_EMIT(3DPRIMITIVE); + GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA); + GEN6_EMIT(SF_VIEWPORT); + GEN6_EMIT(CLIP_VIEWPORT); + GEN6_EMIT(CC_VIEWPORT); + GEN6_EMIT(COLOR_CALC_STATE); + GEN6_EMIT(BLEND_STATE); + GEN6_EMIT(DEPTH_STENCIL_STATE); + GEN6_EMIT(SCISSOR_RECT); + GEN6_EMIT(BINDING_TABLE_STATE); + GEN6_EMIT(SURFACE_STATE); + GEN6_EMIT(so_SURFACE_STATE); + GEN6_EMIT(SAMPLER_STATE); + GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE); + GEN6_EMIT(push_constant_buffer); +#undef GEN6_EMIT + +#define GEN7_EMIT(name) ilo_gpe_gen7_ ## name gen7_ ## name + GEN7_EMIT(3DSTATE_DEPTH_BUFFER); + GEN7_EMIT(3DSTATE_CC_STATE_POINTERS); + GEN7_EMIT(3DSTATE_GS); + GEN7_EMIT(3DSTATE_SF); + GEN7_EMIT(3DSTATE_WM); + GEN7_EMIT(3DSTATE_SAMPLE_MASK); + GEN7_EMIT(3DSTATE_CONSTANT_HS); + GEN7_EMIT(3DSTATE_CONSTANT_DS); + GEN7_EMIT(3DSTATE_HS); + GEN7_EMIT(3DSTATE_TE); + GEN7_EMIT(3DSTATE_DS); + GEN7_EMIT(3DSTATE_STREAMOUT); + GEN7_EMIT(3DSTATE_SBE); + GEN7_EMIT(3DSTATE_PS); + GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); + GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_CC); + GEN7_EMIT(3DSTATE_BLEND_STATE_POINTERS); + GEN7_EMIT(3DSTATE_DEPTH_STENCIL_STATE_POINTERS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_VS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_HS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_DS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_GS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_PS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_VS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_HS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_DS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_GS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_PS); + GEN7_EMIT(3DSTATE_URB_VS); + GEN7_EMIT(3DSTATE_URB_HS); + GEN7_EMIT(3DSTATE_URB_DS); + GEN7_EMIT(3DSTATE_URB_GS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_VS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_HS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_DS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_GS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_PS); + GEN7_EMIT(3DSTATE_SO_DECL_LIST); + GEN7_EMIT(3DSTATE_SO_BUFFER); + GEN7_EMIT(SF_CLIP_VIEWPORT); +#undef GEN7_EMIT /** * HW states. @@ -134,8 +225,6 @@ struct ilo_3d_pipeline { uint32_t SURFACE_STATE[ILO_MAX_WM_SURFACES]; uint32_t SAMPLER_STATE; uint32_t SAMPLER_BORDER_COLOR_STATE[ILO_MAX_SAMPLERS]; - uint32_t PUSH_CONSTANT_BUFFER; - int PUSH_CONSTANT_BUFFER_size; } wm; } state; }; @@ -181,14 +270,6 @@ ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p, struct intel_bo *bo, int index); void -ilo_3d_pipeline_emit_write_statistics(struct ilo_3d_pipeline *p, - struct intel_bo *bo, int index); - -void -ilo_3d_pipeline_emit_rectlist(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter); - -void ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p, unsigned sample_count, unsigned sample_index, diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c index de47e8d45..227614047 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c @@ -517,17 +517,12 @@ static void dump_binding_table(struct brw_context *brw, uint32_t offset, } } -static bool +static void init_brw(struct brw_context *brw, struct ilo_3d_pipeline *p) { brw->intel.gen = ILO_GEN_GET_MAJOR(p->dev->gen); + brw->intel.batch.bo_dst.virtual = intel_bo_get_virtual(p->cp->bo); brw->intel.batch.bo = &brw->intel.batch.bo_dst; - - brw->intel.batch.bo_dst.virtual = intel_bo_map(p->cp->bo, false); - if (!brw->intel.batch.bo_dst.virtual) - return false; - - return true; } static void @@ -536,8 +531,7 @@ dump_3d_state(struct ilo_3d_pipeline *p) struct brw_context brw; int num_states, i; - if (!init_brw(&brw, p)) - return; + init_brw(&brw, p); if (brw.intel.gen >= 7) { dump_cc_viewport_state(&brw, p->state.CC_VIEWPORT); @@ -620,11 +614,6 @@ dump_3d_state(struct ilo_3d_pipeline *p) else dump_gen7_sampler_state(&brw, p->state.wm.SAMPLER_STATE, num_states * 16); - if (p->state.wm.PUSH_CONSTANT_BUFFER_size) { - dump_wm_constants(&brw, p->state.wm.PUSH_CONSTANT_BUFFER, - p->state.wm.PUSH_CONSTANT_BUFFER_size); - } - dump_scissor(&brw, p->state.SCISSOR_RECT); (void) dump_vs_state; @@ -633,8 +622,7 @@ dump_3d_state(struct ilo_3d_pipeline *p) (void) dump_sf_state; (void) dump_wm_state; (void) dump_cc_state_gen4; - - intel_bo_unmap(p->cp->bo); + (void) dump_wm_constants; } /** @@ -643,6 +631,13 @@ dump_3d_state(struct ilo_3d_pipeline *p) void ilo_3d_pipeline_dump(struct ilo_3d_pipeline *p) { + int err; + ilo_cp_dump(p->cp); - dump_3d_state(p); + + err = intel_bo_map(p->cp->bo, false); + if (!err) { + dump_3d_state(p); + intel_bo_unmap(p->cp->bo); + } } diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index c2da385e0..eefb2f96b 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -25,16 +25,14 @@ * Chia-I Wu <olv@lunarg.com> */ -#include "genhw/genhw.h" #include "util/u_dual_blend.h" #include "util/u_prim.h" +#include "intel_reg.h" -#include "ilo_blitter.h" #include "ilo_3d.h" #include "ilo_context.h" #include "ilo_cp.h" #include "ilo_gpe_gen6.h" -#include "ilo_gpe_gen7.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_3d_pipeline.h" @@ -66,9 +64,9 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p, * * The workaround below necessitates this workaround. */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_CS_STALL | - GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD, NULL, 0, false, p->cp); /* the caller will emit the post-sync op */ @@ -85,8 +83,8 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p, * "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a * PIPE_CONTROL with any non-zero post-sync-op is required." */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_WRITE_IMM, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_WRITE_IMMEDIATE, p->workaround_bo, 0, false, p->cp); } @@ -105,9 +103,9 @@ gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p) * requires driver to send a PIPE_CONTROL with a CS stall along with a * Depth Flush prior to this command." */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH | - GEN6_PIPE_CONTROL_CS_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_CS_STALL, 0, 0, false, p->cp); } @@ -123,16 +121,16 @@ gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p) * to emit a sequence of PIPE_CONTROLs prior to emitting depth related * commands. */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_STALL, NULL, 0, false, p->cp); - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, NULL, 0, false, p->cp); - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_STALL, NULL, 0, false, p->cp); } @@ -152,8 +150,8 @@ gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p) * field set (DW1 Bit 1), must be issued prior to any change to the * value in this field (Maximum Number of Threads in 3DSTATE_WM)" */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_STALL_AT_SCOREBOARD, NULL, 0, false, p->cp); } @@ -170,10 +168,10 @@ gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p) * PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being * buffered by VS FF, to the point that the FF dies. */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_STALL | - GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | - GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE, NULL, 0, false, p->cp); } @@ -189,7 +187,7 @@ gen6_pipeline_common_select(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_PIPELINE_SELECT(p->dev, 0x0, p->cp); + p->gen6_PIPELINE_SELECT(p->dev, 0x0, p->cp); } } @@ -203,7 +201,7 @@ gen6_pipeline_common_sip(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_STATE_SIP(p->dev, 0, p->cp); + p->gen6_STATE_SIP(p->dev, 0, p->cp); } } @@ -218,7 +216,7 @@ gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_STATE_BASE_ADDRESS(p->dev, + p->gen6_STATE_BASE_ADDRESS(p->dev, NULL, p->cp->bo, p->cp->bo, NULL, ilo->hw3d->kernel.bo, 0, 0, 0, 0, p->cp); @@ -314,7 +312,7 @@ gen6_pipeline_common_urb(struct ilo_3d_pipeline *p, gs_total_size = 0; } - gen6_emit_3DSTATE_URB(p->dev, vs_total_size, gs_total_size, + p->gen6_3DSTATE_URB(p->dev, vs_total_size, gs_total_size, vs_entry_size, gs_entry_size, p->cp); /* @@ -340,7 +338,7 @@ gen6_pipeline_common_pointers_1(struct ilo_3d_pipeline *p, { /* 3DSTATE_VIEWPORT_STATE_POINTERS */ if (session->viewport_state_changed) { - gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev, + p->gen6_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev, p->state.CLIP_VIEWPORT, p->state.SF_VIEWPORT, p->state.CC_VIEWPORT, p->cp); @@ -356,7 +354,7 @@ gen6_pipeline_common_pointers_2(struct ilo_3d_pipeline *p, if (session->cc_state_blend_changed || session->cc_state_dsa_changed || session->cc_state_cc_changed) { - gen6_emit_3DSTATE_CC_STATE_POINTERS(p->dev, + p->gen6_3DSTATE_CC_STATE_POINTERS(p->dev, p->state.BLEND_STATE, p->state.DEPTH_STENCIL_STATE, p->state.COLOR_CALC_STATE, p->cp); @@ -366,7 +364,7 @@ gen6_pipeline_common_pointers_2(struct ilo_3d_pipeline *p, if (session->sampler_state_vs_changed || session->sampler_state_gs_changed || session->sampler_state_fs_changed) { - gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(p->dev, + p->gen6_3DSTATE_SAMPLER_STATE_POINTERS(p->dev, p->state.vs.SAMPLER_STATE, 0, p->state.wm.SAMPLER_STATE, p->cp); @@ -380,7 +378,7 @@ gen6_pipeline_common_pointers_3(struct ilo_3d_pipeline *p, { /* 3DSTATE_SCISSOR_STATE_POINTERS */ if (session->scissor_state_changed) { - gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(p->dev, + p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(p->dev, p->state.SCISSOR_RECT, p->cp); } @@ -388,7 +386,7 @@ gen6_pipeline_common_pointers_3(struct ilo_3d_pipeline *p, if (session->binding_table_vs_changed || session->binding_table_gs_changed || session->binding_table_fs_changed) { - gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(p->dev, + p->gen6_3DSTATE_BINDING_TABLE_POINTERS(p->dev, p->state.vs.BINDING_TABLE_STATE, p->state.gs.BINDING_TABLE_STATE, p->state.wm.BINDING_TABLE_STATE, p->cp); @@ -400,31 +398,18 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p, const struct ilo_context *ilo, struct gen6_pipeline_session *session) { - if (p->dev->gen >= ILO_GEN(7.5)) { - /* 3DSTATE_INDEX_BUFFER */ - if (DIRTY(IB) || session->batch_bo_changed) { - gen6_emit_3DSTATE_INDEX_BUFFER(p->dev, - &ilo->ib, false, p->cp); - } - - /* 3DSTATE_VF */ - if (session->primitive_restart_changed) { - gen7_emit_3DSTATE_VF(p->dev, ilo->draw->primitive_restart, - ilo->draw->restart_index, p->cp); - } - } - else { - /* 3DSTATE_INDEX_BUFFER */ - if (DIRTY(IB) || session->primitive_restart_changed || - session->batch_bo_changed) { - gen6_emit_3DSTATE_INDEX_BUFFER(p->dev, - &ilo->ib, ilo->draw->primitive_restart, p->cp); - } + /* 3DSTATE_INDEX_BUFFER */ + if (DIRTY(IB) || session->primitive_restart_changed || + session->batch_bo_changed) { + p->gen6_3DSTATE_INDEX_BUFFER(p->dev, + &ilo->ib, ilo->draw->primitive_restart, p->cp); } /* 3DSTATE_VERTEX_BUFFERS */ - if (DIRTY(VB) || DIRTY(VE) || session->batch_bo_changed) - gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev, ilo->ve, &ilo->vb, p->cp); + if (DIRTY(VB) || DIRTY(VE) || session->batch_bo_changed) { + p->gen6_3DSTATE_VERTEX_BUFFERS(p->dev, + ilo->vb.states, ilo->vb.enabled_mask, ilo->ve, p->cp); + } /* 3DSTATE_VERTEX_ELEMENTS */ if (DIRTY(VE) || DIRTY(VS)) { @@ -449,7 +434,7 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p, prepend_generate_ids = true; } - gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev, ve, + p->gen6_3DSTATE_VERTEX_ELEMENTS(p->dev, ve, last_velement_edgeflag, prepend_generate_ids, p->cp); } } @@ -461,16 +446,16 @@ gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p, { /* 3DSTATE_VF_STATISTICS */ if (session->hw_ctx_changed) - gen6_emit_3DSTATE_VF_STATISTICS(p->dev, false, p->cp); + p->gen6_3DSTATE_VF_STATISTICS(p->dev, false, p->cp); } -static void +void gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p, const struct ilo_context *ilo, struct gen6_pipeline_session *session) { /* 3DPRIMITIVE */ - gen6_emit_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp); + p->gen6_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp); p->state.has_gen6_wa_pipe_control = false; } @@ -492,7 +477,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p, /* 3DSTATE_CONSTANT_VS */ if (emit_3dstate_constant_vs) { - gen6_emit_3DSTATE_CONSTANT_VS(p->dev, + p->gen6_3DSTATE_CONSTANT_VS(p->dev, &p->state.vs.PUSH_CONSTANT_BUFFER, &p->state.vs.PUSH_CONSTANT_BUFFER_size, 1, p->cp); @@ -502,7 +487,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p, if (emit_3dstate_vs) { const int num_samplers = ilo->sampler[PIPE_SHADER_VERTEX].count; - gen6_emit_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp); + p->gen6_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp); } if (emit_3dstate_constant_vs && p->dev->gen == ILO_GEN(6)) @@ -516,14 +501,14 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p, { /* 3DSTATE_CONSTANT_GS */ if (session->pcb_state_gs_changed) - gen6_emit_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp); + p->gen6_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp); /* 3DSTATE_GS */ if (DIRTY(GS) || DIRTY(VS) || session->prim_changed || session->kernel_bo_changed) { const int verts_per_prim = u_vertices_per_prim(session->reduced_prim); - gen6_emit_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp); + p->gen6_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp); } } @@ -583,7 +568,7 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_3DSTATE_GS_SVB_INDEX(p->dev, + p->gen6_3DSTATE_GS_SVB_INDEX(p->dev, 0, p->state.so_num_vertices, p->state.so_max_vertices, false, p->cp); @@ -600,7 +585,7 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p, * 0xFFFFFFFF in order to not cause overflow in that SVBI." */ for (i = 1; i < 4; i++) { - gen6_emit_3DSTATE_GS_SVB_INDEX(p->dev, + p->gen6_3DSTATE_GS_SVB_INDEX(p->dev, i, 0, 0xffffffff, false, p->cp); } } @@ -631,7 +616,7 @@ gen6_pipeline_clip(struct ilo_3d_pipeline *p, } } - gen6_emit_3DSTATE_CLIP(p->dev, ilo->rasterizer, + p->gen6_3DSTATE_CLIP(p->dev, ilo->rasterizer, ilo->fs, enable_guardband, 1, p->cp); } } @@ -642,8 +627,10 @@ gen6_pipeline_sf(struct ilo_3d_pipeline *p, struct gen6_pipeline_session *session) { /* 3DSTATE_SF */ - if (DIRTY(RASTERIZER) || DIRTY(FS)) - gen6_emit_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fs, p->cp); + if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) { + p->gen6_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fs, + (ilo->gs) ? ilo->gs : ilo->vs, p->cp); + } } void @@ -656,7 +643,7 @@ gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0, + p->gen6_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0, ilo->fb.state.width, ilo->fb.state.height, p->cp); } } @@ -667,26 +654,22 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p, struct gen6_pipeline_session *session) { /* 3DSTATE_CONSTANT_PS */ - if (session->pcb_state_fs_changed) { - gen6_emit_3DSTATE_CONSTANT_PS(p->dev, - &p->state.wm.PUSH_CONSTANT_BUFFER, - &p->state.wm.PUSH_CONSTANT_BUFFER_size, - 1, p->cp); - } + if (session->pcb_state_fs_changed) + p->gen6_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp); /* 3DSTATE_WM */ if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER) || session->kernel_bo_changed) { const int num_samplers = ilo->sampler[PIPE_SHADER_FRAGMENT].count; const bool dual_blend = ilo->blend->dual_blend; - const bool cc_may_kill = (ilo->dsa->dw_alpha || + const bool cc_may_kill = (ilo->dsa->alpha.enabled || ilo->blend->alpha_to_coverage); if (p->dev->gen == ILO_GEN(6) && session->hw_ctx_changed) gen6_wa_pipe_control_wm_max_threads_stall(p); - gen6_emit_3DSTATE_WM(p->dev, ilo->fs, num_samplers, - ilo->rasterizer, dual_blend, cc_may_kill, 0, p->cp); + p->gen6_3DSTATE_WM(p->dev, ilo->fs, num_samplers, + ilo->rasterizer, dual_blend, cc_may_kill, p->cp); } } @@ -707,11 +690,11 @@ gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p, gen6_wa_pipe_control_wm_multisample_flush(p); } - gen6_emit_3DSTATE_MULTISAMPLE(p->dev, + p->gen6_3DSTATE_MULTISAMPLE(p->dev, ilo->fb.num_samples, packed_sample_pos, ilo->rasterizer->state.half_pixel_center, p->cp); - gen6_emit_3DSTATE_SAMPLE_MASK(p->dev, + p->gen6_3DSTATE_SAMPLE_MASK(p->dev, (ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1, p->cp); } } @@ -724,37 +707,16 @@ gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p, /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */ if (DIRTY(FB) || session->batch_bo_changed) { const struct ilo_zs_surface *zs; - struct ilo_zs_surface layer; - uint32_t clear_params; if (ilo->fb.state.zsbuf) { const struct ilo_surface_cso *surface = (const struct ilo_surface_cso *) ilo->fb.state.zsbuf; - const struct ilo_texture_slice *slice = - ilo_texture_get_slice(ilo_texture(surface->base.texture), - surface->base.u.tex.level, surface->base.u.tex.first_layer); - if (ilo->fb.offset_to_layers) { - assert(surface->base.u.tex.first_layer == - surface->base.u.tex.last_layer); - - ilo_gpe_init_zs_surface(ilo->dev, - ilo_texture(surface->base.texture), - surface->base.format, surface->base.u.tex.level, - surface->base.u.tex.first_layer, 1, true, &layer); - - zs = &layer; - } - else { - assert(!surface->is_rt); - zs = &surface->u.zs; - } - - clear_params = slice->clear_value; + assert(!surface->is_rt); + zs = &surface->u.zs; } else { zs = &ilo->fb.null_zs; - clear_params = 0; } if (p->dev->gen == ILO_GEN(6)) { @@ -762,10 +724,10 @@ gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p, gen6_wa_pipe_control_wm_depth_flush(p); } - gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp); - gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp); - gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp); - gen6_emit_3DSTATE_CLEAR_PARAMS(p->dev, clear_params, p->cp); + p->gen6_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp); + + /* TODO */ + p->gen6_3DSTATE_CLEAR_PARAMS(p->dev, 0, p->cp); } } @@ -780,10 +742,10 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(p->dev, + p->gen6_3DSTATE_POLY_STIPPLE_PATTERN(p->dev, &ilo->poly_stipple, p->cp); - gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(p->dev, 0, 0, p->cp); + p->gen6_3DSTATE_POLY_STIPPLE_OFFSET(p->dev, 0, 0, p->cp); } /* 3DSTATE_LINE_STIPPLE */ @@ -791,7 +753,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_3DSTATE_LINE_STIPPLE(p->dev, + p->gen6_3DSTATE_LINE_STIPPLE(p->dev, ilo->rasterizer->state.line_stipple_pattern, ilo->rasterizer->state.line_stipple_factor + 1, p->cp); } @@ -801,7 +763,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_3DSTATE_AA_LINE_PARAMETERS(p->dev, p->cp); + p->gen6_3DSTATE_AA_LINE_PARAMETERS(p->dev, p->cp); } } @@ -812,23 +774,23 @@ gen6_pipeline_state_viewports(struct ilo_3d_pipeline *p, { /* SF_CLIP_VIEWPORT and CC_VIEWPORT */ if (p->dev->gen >= ILO_GEN(7) && DIRTY(VIEWPORT)) { - p->state.SF_CLIP_VIEWPORT = gen7_emit_SF_CLIP_VIEWPORT(p->dev, + p->state.SF_CLIP_VIEWPORT = p->gen7_SF_CLIP_VIEWPORT(p->dev, ilo->viewport.cso, ilo->viewport.count, p->cp); - p->state.CC_VIEWPORT = gen6_emit_CC_VIEWPORT(p->dev, + p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev, ilo->viewport.cso, ilo->viewport.count, p->cp); session->viewport_state_changed = true; } /* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */ else if (DIRTY(VIEWPORT)) { - p->state.CLIP_VIEWPORT = gen6_emit_CLIP_VIEWPORT(p->dev, + p->state.CLIP_VIEWPORT = p->gen6_CLIP_VIEWPORT(p->dev, ilo->viewport.cso, ilo->viewport.count, p->cp); - p->state.SF_VIEWPORT = gen6_emit_SF_VIEWPORT(p->dev, + p->state.SF_VIEWPORT = p->gen6_SF_VIEWPORT(p->dev, ilo->viewport.cso, ilo->viewport.count, p->cp); - p->state.CC_VIEWPORT = gen6_emit_CC_VIEWPORT(p->dev, + p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev, ilo->viewport.cso, ilo->viewport.count, p->cp); session->viewport_state_changed = true; @@ -842,8 +804,8 @@ gen6_pipeline_state_cc(struct ilo_3d_pipeline *p, { /* BLEND_STATE */ if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA)) { - p->state.BLEND_STATE = gen6_emit_BLEND_STATE(p->dev, - ilo->blend, &ilo->fb, ilo->dsa, p->cp); + p->state.BLEND_STATE = p->gen6_BLEND_STATE(p->dev, + ilo->blend, &ilo->fb, &ilo->dsa->alpha, p->cp); session->cc_state_blend_changed = true; } @@ -851,8 +813,8 @@ gen6_pipeline_state_cc(struct ilo_3d_pipeline *p, /* COLOR_CALC_STATE */ if (DIRTY(DSA) || DIRTY(STENCIL_REF) || DIRTY(BLEND_COLOR)) { p->state.COLOR_CALC_STATE = - gen6_emit_COLOR_CALC_STATE(p->dev, &ilo->stencil_ref, - ilo->dsa->alpha_ref, &ilo->blend_color, p->cp); + p->gen6_COLOR_CALC_STATE(p->dev, &ilo->stencil_ref, + ilo->dsa->alpha.ref_value, &ilo->blend_color, p->cp); session->cc_state_cc_changed = true; } @@ -860,7 +822,7 @@ gen6_pipeline_state_cc(struct ilo_3d_pipeline *p, /* DEPTH_STENCIL_STATE */ if (DIRTY(DSA)) { p->state.DEPTH_STENCIL_STATE = - gen6_emit_DEPTH_STENCIL_STATE(p->dev, ilo->dsa, p->cp); + p->gen6_DEPTH_STENCIL_STATE(p->dev, ilo->dsa, p->cp); session->cc_state_dsa_changed = true; } @@ -874,7 +836,7 @@ gen6_pipeline_state_scissors(struct ilo_3d_pipeline *p, /* SCISSOR_RECT */ if (DIRTY(SCISSOR) || DIRTY(VIEWPORT)) { /* there should be as many scissors as there are viewports */ - p->state.SCISSOR_RECT = gen6_emit_SCISSOR_RECT(p->dev, + p->state.SCISSOR_RECT = p->gen6_SCISSOR_RECT(p->dev, &ilo->scissor, ilo->viewport.count, p->cp); session->scissor_state_changed = true; @@ -897,31 +859,9 @@ gen6_pipeline_state_surfaces_rt(struct ilo_3d_pipeline *p, const struct ilo_surface_cso *surface = (const struct ilo_surface_cso *) fb->state.cbufs[i]; - if (!surface) { - surface_state[i] = - gen6_emit_SURFACE_STATE(p->dev, &fb->null_rt, true, p->cp); - } - else if (fb->offset_to_layers) { - struct ilo_view_surface layer; - - assert(surface->base.u.tex.first_layer == - surface->base.u.tex.last_layer); - - ilo_gpe_init_view_surface_for_texture(ilo->dev, - ilo_texture(surface->base.texture), - surface->base.format, - surface->base.u.tex.level, 1, - surface->base.u.tex.first_layer, 1, - true, true, &layer); - - surface_state[i] = - gen6_emit_SURFACE_STATE(p->dev, &layer, true, p->cp); - } - else { - assert(surface && surface->is_rt); - surface_state[i] = - gen6_emit_SURFACE_STATE(p->dev, &surface->u.rt, true, p->cp); - } + assert(surface && surface->is_rt); + surface_state[i] = + p->gen6_SURFACE_STATE(p->dev, &surface->u.rt, true, p->cp); } /* @@ -929,8 +869,14 @@ gen6_pipeline_state_surfaces_rt(struct ilo_3d_pipeline *p, * brw_update_renderbuffer_surfaces() does. I don't know why. */ if (i == 0) { + struct ilo_view_surface null_surface; + + ilo_gpe_init_view_surface_null(p->dev, + fb->state.width, fb->state.height, + 1, 0, &null_surface); + surface_state[i] = - gen6_emit_SURFACE_STATE(p->dev, &fb->null_rt, true, p->cp); + p->gen6_SURFACE_STATE(p->dev, &null_surface, true, p->cp); i++; } @@ -969,7 +915,7 @@ gen6_pipeline_state_surfaces_so(struct ilo_3d_pipeline *p, (target < so->count) ? so->states[target] : NULL; if (so_target) { - surface_state[i] = gen6_emit_so_SURFACE_STATE(p->dev, + surface_state[i] = p->gen6_so_SURFACE_STATE(p->dev, so_target, so_info, i, p->cp); } else { @@ -1035,7 +981,7 @@ gen6_pipeline_state_surfaces_view(struct ilo_3d_pipeline *p, (const struct ilo_view_cso *) view->states[i]; surface_state[i] = - gen6_emit_SURFACE_STATE(p->dev, &cso->surface, false, p->cp); + p->gen6_SURFACE_STATE(p->dev, &cso->surface, false, p->cp); } else { surface_state[i] = 0; @@ -1056,39 +1002,45 @@ gen6_pipeline_state_surfaces_const(struct ilo_3d_pipeline *p, { const struct ilo_cbuf_state *cbuf = &ilo->cbuf[shader_type]; uint32_t *surface_state; - bool *binding_table_changed; int offset, count, i; - - if (!DIRTY(CBUF)) - return; + bool skip = false; /* SURFACE_STATEs for constant buffers */ switch (shader_type) { case PIPE_SHADER_VERTEX: - offset = ILO_VS_CONST_SURFACE(0); - surface_state = &p->state.vs.SURFACE_STATE[offset]; - binding_table_changed = &session->binding_table_vs_changed; + if (DIRTY(CBUF)) { + offset = ILO_VS_CONST_SURFACE(0); + surface_state = &p->state.vs.SURFACE_STATE[offset]; + + session->binding_table_vs_changed = true; + } + else { + skip = true; + } break; case PIPE_SHADER_FRAGMENT: - offset = ILO_WM_CONST_SURFACE(0); - surface_state = &p->state.wm.SURFACE_STATE[offset]; - binding_table_changed = &session->binding_table_fs_changed; + if (DIRTY(CBUF)) { + offset = ILO_WM_CONST_SURFACE(0); + surface_state = &p->state.wm.SURFACE_STATE[offset]; + + session->binding_table_fs_changed = true; + } + else { + skip = true; + } break; default: - return; + skip = true; break; } - /* constants are pushed via PCB */ - if (cbuf->enabled_mask == 0x1 && !cbuf->cso[0].resource) { - memset(surface_state, 0, ILO_MAX_CONST_BUFFERS * 4); + if (skip) return; - } count = util_last_bit(cbuf->enabled_mask); for (i = 0; i < count; i++) { if (cbuf->cso[i].resource) { - surface_state[i] = gen6_emit_SURFACE_STATE(p->dev, + surface_state[i] = p->gen6_SURFACE_STATE(p->dev, &cbuf->cso[i].surface, false, p->cp); } else { @@ -1100,8 +1052,6 @@ gen6_pipeline_state_surfaces_const(struct ilo_3d_pipeline *p, if (count && session->num_surfaces[shader_type] < offset + count) session->num_surfaces[shader_type] = offset + count; - - *binding_table_changed = true; } static void @@ -1158,7 +1108,7 @@ gen6_pipeline_state_binding_tables(struct ilo_3d_pipeline *p, if (size < session->num_surfaces[shader_type]) size = session->num_surfaces[shader_type]; - *binding_table_state = gen6_emit_BINDING_TABLE_STATE(p->dev, + *binding_table_state = p->gen6_BINDING_TABLE_STATE(p->dev, surface_state, size, p->cp); *binding_table_state_size = size; } @@ -1222,13 +1172,13 @@ gen6_pipeline_state_samplers(struct ilo_3d_pipeline *p, for (i = 0; i < num_samplers; i++) { border_color_state[i] = (samplers[i]) ? - gen6_emit_SAMPLER_BORDER_COLOR_STATE(p->dev, + p->gen6_SAMPLER_BORDER_COLOR_STATE(p->dev, samplers[i], p->cp) : 0; } } /* should we take the minimum of num_samplers and num_views? */ - *sampler_state = gen6_emit_SAMPLER_STATE(p->dev, + *sampler_state = p->gen6_SAMPLER_STATE(p->dev, samplers, views, border_color_state, MIN2(num_samplers, num_views), p->cp); @@ -1240,83 +1190,27 @@ gen6_pipeline_state_pcb(struct ilo_3d_pipeline *p, struct gen6_pipeline_session *session) { /* push constant buffer for VS */ - if (DIRTY(VS) || DIRTY(CBUF) || DIRTY(CLIP)) { - const int cbuf0_size = (ilo->vs) ? - ilo_shader_get_kernel_param(ilo->vs, - ILO_KERNEL_PCB_CBUF0_SIZE) : 0; + if (DIRTY(VS) || DIRTY(CLIP)) { const int clip_state_size = (ilo->vs) ? ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE) : 0; - const int total_size = cbuf0_size + clip_state_size; - if (total_size) { + if (clip_state_size) { void *pcb; + p->state.vs.PUSH_CONSTANT_BUFFER_size = clip_state_size; p->state.vs.PUSH_CONSTANT_BUFFER = - gen6_emit_push_constant_buffer(p->dev, total_size, &pcb, p->cp); - p->state.vs.PUSH_CONSTANT_BUFFER_size = total_size; - - if (cbuf0_size) { - const struct ilo_cbuf_state *cbuf = - &ilo->cbuf[PIPE_SHADER_VERTEX]; - - if (cbuf0_size <= cbuf->cso[0].user_buffer_size) { - memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size); - } - else { - memcpy(pcb, cbuf->cso[0].user_buffer, - cbuf->cso[0].user_buffer_size); - memset(pcb + cbuf->cso[0].user_buffer_size, 0, - cbuf0_size - cbuf->cso[0].user_buffer_size); - } - - pcb += cbuf0_size; - } - - if (clip_state_size) - memcpy(pcb, &ilo->clip, clip_state_size); + p->gen6_push_constant_buffer(p->dev, + p->state.vs.PUSH_CONSTANT_BUFFER_size, &pcb, p->cp); - session->pcb_state_vs_changed = true; + memcpy(pcb, &ilo->clip, clip_state_size); } - else if (p->state.vs.PUSH_CONSTANT_BUFFER_size) { - p->state.vs.PUSH_CONSTANT_BUFFER = 0; + else { p->state.vs.PUSH_CONSTANT_BUFFER_size = 0; - - session->pcb_state_vs_changed = true; - } - } - - /* push constant buffer for FS */ - if (DIRTY(FS) || DIRTY(CBUF)) { - const int cbuf0_size = (ilo->fs) ? - ilo_shader_get_kernel_param(ilo->fs, ILO_KERNEL_PCB_CBUF0_SIZE) : 0; - - if (cbuf0_size) { - const struct ilo_cbuf_state *cbuf = &ilo->cbuf[PIPE_SHADER_FRAGMENT]; - void *pcb; - - p->state.wm.PUSH_CONSTANT_BUFFER = - gen6_emit_push_constant_buffer(p->dev, cbuf0_size, &pcb, p->cp); - p->state.wm.PUSH_CONSTANT_BUFFER_size = cbuf0_size; - - if (cbuf0_size <= cbuf->cso[0].user_buffer_size) { - memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size); - } - else { - memcpy(pcb, cbuf->cso[0].user_buffer, - cbuf->cso[0].user_buffer_size); - memset(pcb + cbuf->cso[0].user_buffer_size, 0, - cbuf0_size - cbuf->cso[0].user_buffer_size); - } - - session->pcb_state_fs_changed = true; + p->state.vs.PUSH_CONSTANT_BUFFER = 0; } - else if (p->state.wm.PUSH_CONSTANT_BUFFER_size) { - p->state.wm.PUSH_CONSTANT_BUFFER = 0; - p->state.wm.PUSH_CONSTANT_BUFFER_size = 0; - session->pcb_state_fs_changed = true; - } + session->pcb_state_vs_changed = true; } } @@ -1486,14 +1380,14 @@ ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p) if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | - GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH | - GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH | - GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE | - GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - GEN6_PIPE_CONTROL_WRITE_NONE | - GEN6_PIPE_CONTROL_CS_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_TC_FLUSH | + PIPE_CONTROL_NO_WRITE | + PIPE_CONTROL_CS_STALL, 0, 0, false, p->cp); } @@ -1504,9 +1398,9 @@ ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, true); - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_WRITE_TIMESTAMP, - bo, index * sizeof(uint64_t), + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_WRITE_TIMESTAMP, + bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE, true, p->cp); } @@ -1517,379 +1411,156 @@ ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6)) gen6_wa_pipe_control_post_sync(p, false); - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_STALL | - GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT, - bo, index * sizeof(uint64_t), + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT, + bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE, true, p->cp); } -void -ilo_3d_pipeline_emit_write_statistics_gen6(struct ilo_3d_pipeline *p, - struct intel_bo *bo, int index) -{ - uint32_t regs[] = { - GEN6_REG_IA_VERTICES_COUNT, - GEN6_REG_IA_PRIMITIVES_COUNT, - GEN6_REG_VS_INVOCATION_COUNT, - GEN6_REG_GS_INVOCATION_COUNT, - GEN6_REG_GS_PRIMITIVES_COUNT, - GEN6_REG_CL_INVOCATION_COUNT, - GEN6_REG_CL_PRIMITIVES_COUNT, - GEN6_REG_PS_INVOCATION_COUNT, - p->dev->gen >= ILO_GEN(7) ? GEN6_REG_HS_INVOCATION_COUNT : 0, - p->dev->gen >= ILO_GEN(7) ? GEN6_REG_DS_INVOCATION_COUNT : 0, - 0, - }; - int i; - - p->emit_flush(p); - - for (i = 0; i < Elements(regs); i++) { - const uint32_t bo_offset = (index + i) * sizeof(uint64_t); - - if (regs[i]) { - /* store lower 32 bits */ - gen6_emit_MI_STORE_REGISTER_MEM(p->dev, - bo, bo_offset, regs[i], p->cp); - /* store higher 32 bits */ - gen6_emit_MI_STORE_REGISTER_MEM(p->dev, - bo, bo_offset + 4, regs[i] + 4, p->cp); - } - else { - gen6_emit_MI_STORE_DATA_IMM(p->dev, - bo, bo_offset, 0, true, p->cp); - } - } -} - -static void -gen6_rectlist_vs_to_sf(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - gen6_emit_3DSTATE_CONSTANT_VS(p->dev, NULL, NULL, 0, p->cp); - gen6_emit_3DSTATE_VS(p->dev, NULL, 0, p->cp); - - gen6_wa_pipe_control_vs_const_flush(p); - - gen6_emit_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp); - gen6_emit_3DSTATE_GS(p->dev, NULL, NULL, 0, p->cp); - - gen6_emit_3DSTATE_CLIP(p->dev, NULL, NULL, false, 0, p->cp); - gen6_emit_3DSTATE_SF(p->dev, NULL, NULL, p->cp); -} - -static void -gen6_rectlist_wm(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - uint32_t hiz_op; - - switch (blitter->op) { - case ILO_BLITTER_RECTLIST_CLEAR_ZS: - hiz_op = GEN6_WM_DW4_DEPTH_CLEAR; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_Z: - hiz_op = GEN6_WM_DW4_DEPTH_RESOLVE; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_HIZ: - hiz_op = GEN6_WM_DW4_HIZ_RESOLVE; - break; - default: - hiz_op = 0; - break; - } - - gen6_emit_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp); - - gen6_wa_pipe_control_wm_max_threads_stall(p); - gen6_emit_3DSTATE_WM(p->dev, NULL, 0, NULL, false, false, hiz_op, p->cp); -} - -static void -gen6_rectlist_wm_depth(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - gen6_wa_pipe_control_wm_depth_flush(p); - - if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH | - ILO_BLITTER_USE_FB_STENCIL)) { - gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev, - &blitter->fb.dst.u.zs, p->cp); - } - - if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) { - gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev, - &blitter->fb.dst.u.zs, p->cp); - } - - if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) { - gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev, - &blitter->fb.dst.u.zs, p->cp); - } - - gen6_emit_3DSTATE_CLEAR_PARAMS(p->dev, - blitter->depth_clear_value, p->cp); -} - -static void -gen6_rectlist_wm_multisample(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - const uint32_t *packed_sample_pos = (blitter->fb.num_samples > 1) ? - &p->packed_sample_position_4x : &p->packed_sample_position_1x; - - gen6_wa_pipe_control_wm_multisample_flush(p); - - gen6_emit_3DSTATE_MULTISAMPLE(p->dev, blitter->fb.num_samples, - packed_sample_pos, true, p->cp); - - gen6_emit_3DSTATE_SAMPLE_MASK(p->dev, - (1 << blitter->fb.num_samples) - 1, p->cp); -} - -static void -gen6_rectlist_commands(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - gen6_wa_pipe_control_post_sync(p, false); - - gen6_rectlist_wm_multisample(p, blitter, session); - - gen6_emit_STATE_BASE_ADDRESS(p->dev, - NULL, /* General State Base */ - p->cp->bo, /* Surface State Base */ - p->cp->bo, /* Dynamic State Base */ - NULL, /* Indirect Object Base */ - NULL, /* Instruction Base */ - 0, 0, 0, 0, p->cp); - - gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev, - &blitter->ve, &blitter->vb, p->cp); - - gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev, - &blitter->ve, false, false, p->cp); - - gen6_emit_3DSTATE_URB(p->dev, - p->dev->urb_size, 0, blitter->ve.count * 4 * sizeof(float), 0, p->cp); - /* 3DSTATE_URB workaround */ - if (p->state.gs.active) { - ilo_3d_pipeline_emit_flush_gen6(p); - p->state.gs.active = false; - } - - if (blitter->uses & - (ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_CC)) { - gen6_emit_3DSTATE_CC_STATE_POINTERS(p->dev, 0, - session->DEPTH_STENCIL_STATE, session->COLOR_CALC_STATE, p->cp); - } - - gen6_rectlist_vs_to_sf(p, blitter, session); - gen6_rectlist_wm(p, blitter, session); - - if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) { - gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev, - 0, 0, session->CC_VIEWPORT, p->cp); - } - - gen6_rectlist_wm_depth(p, blitter, session); - - gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0, - blitter->fb.width, blitter->fb.height, p->cp); - - gen6_emit_3DPRIMITIVE(p->dev, &blitter->draw, NULL, true, p->cp); -} - -static void -gen6_rectlist_states(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - if (blitter->uses & ILO_BLITTER_USE_DSA) { - session->DEPTH_STENCIL_STATE = - gen6_emit_DEPTH_STENCIL_STATE(p->dev, &blitter->dsa, p->cp); - } - - if (blitter->uses & ILO_BLITTER_USE_CC) { - session->COLOR_CALC_STATE = - gen6_emit_COLOR_CALC_STATE(p->dev, &blitter->cc.stencil_ref, - blitter->cc.alpha_ref, &blitter->cc.blend_color, p->cp); - } - - if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) { - session->CC_VIEWPORT = - gen6_emit_CC_VIEWPORT(p->dev, &blitter->viewport, 1, p->cp); - } -} - -static void -ilo_3d_pipeline_emit_rectlist_gen6(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter) -{ - struct gen6_rectlist_session session; - - memset(&session, 0, sizeof(session)); - gen6_rectlist_states(p, blitter, &session); - gen6_rectlist_commands(p, blitter, &session); -} - static int -gen6_pipeline_max_command_size(const struct ilo_3d_pipeline *p) +gen6_pipeline_estimate_commands(const struct ilo_3d_pipeline *p, + const struct ilo_gpe_gen6 *gen6, + const struct ilo_context *ilo) { static int size; + enum ilo_gpe_gen6_command cmd; + + if (size) + return size; + + for (cmd = 0; cmd < ILO_GPE_GEN6_COMMAND_COUNT; cmd++) { + int count; + + switch (cmd) { + case ILO_GPE_GEN6_PIPE_CONTROL: + /* for the workaround */ + count = 2; + /* another one after 3DSTATE_URB */ + count += 1; + /* and another one after 3DSTATE_CONSTANT_VS */ + count += 1; + break; + case ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX: + /* there are 4 SVBIs */ + count = 4; + break; + case ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS: + count = 33; + break; + case ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS: + count = 34; + break; + case ILO_GPE_GEN6_MEDIA_VFE_STATE: + case ILO_GPE_GEN6_MEDIA_CURBE_LOAD: + case ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD: + case ILO_GPE_GEN6_MEDIA_GATEWAY_STATE: + case ILO_GPE_GEN6_MEDIA_STATE_FLUSH: + case ILO_GPE_GEN6_MEDIA_OBJECT_WALKER: + /* media commands */ + count = 0; + break; + default: + count = 1; + break; + } - if (!size) { - size += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 3; - size += GEN6_3DSTATE_GS_SVB_INDEX__SIZE * 4; - size += GEN6_PIPE_CONTROL__SIZE * 5; - - size += - GEN6_STATE_BASE_ADDRESS__SIZE + - GEN6_STATE_SIP__SIZE + - GEN6_3DSTATE_VF_STATISTICS__SIZE + - GEN6_PIPELINE_SELECT__SIZE + - GEN6_3DSTATE_BINDING_TABLE_POINTERS__SIZE + - GEN6_3DSTATE_SAMPLER_STATE_POINTERS__SIZE + - GEN6_3DSTATE_URB__SIZE + - GEN6_3DSTATE_VERTEX_BUFFERS__SIZE + - GEN6_3DSTATE_VERTEX_ELEMENTS__SIZE + - GEN6_3DSTATE_INDEX_BUFFER__SIZE + - GEN6_3DSTATE_VIEWPORT_STATE_POINTERS__SIZE + - GEN6_3DSTATE_CC_STATE_POINTERS__SIZE + - GEN6_3DSTATE_SCISSOR_STATE_POINTERS__SIZE + - GEN6_3DSTATE_VS__SIZE + - GEN6_3DSTATE_GS__SIZE + - GEN6_3DSTATE_CLIP__SIZE + - GEN6_3DSTATE_SF__SIZE + - GEN6_3DSTATE_WM__SIZE + - GEN6_3DSTATE_SAMPLE_MASK__SIZE + - GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE + - GEN6_3DSTATE_DEPTH_BUFFER__SIZE + - GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE + - GEN6_3DSTATE_POLY_STIPPLE_PATTERN__SIZE + - GEN6_3DSTATE_LINE_STIPPLE__SIZE + - GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE + - GEN6_3DSTATE_MULTISAMPLE__SIZE + - GEN6_3DSTATE_STENCIL_BUFFER__SIZE + - GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE + - GEN6_3DSTATE_CLEAR_PARAMS__SIZE + - GEN6_3DPRIMITIVE__SIZE; + if (count) + size += gen6->estimate_command_size(p->dev, cmd, count); } return size; } -int -gen6_pipeline_estimate_state_size(const struct ilo_3d_pipeline *p, - const struct ilo_context *ilo) +static int +gen6_pipeline_estimate_states(const struct ilo_3d_pipeline *p, + const struct ilo_gpe_gen6 *gen6, + const struct ilo_context *ilo) { static int static_size; - int sh_type, size; + int shader_type, count, size; if (!static_size) { - /* 64 bytes, or 16 dwords */ - const int alignment = 64 / 4; - - /* pad first */ - size = alignment - 1; - - /* CC states */ - size += align(GEN6_BLEND_STATE__SIZE * ILO_MAX_DRAW_BUFFERS, alignment); - size += align(GEN6_DEPTH_STENCIL_STATE__SIZE, alignment); - size += align(GEN6_COLOR_CALC_STATE__SIZE, alignment); - - /* viewport arrays */ - if (p->dev->gen >= ILO_GEN(7)) { - size += - align(GEN7_SF_CLIP_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 16) + - align(GEN6_CC_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 8) + - align(GEN6_SCISSOR_RECT__SIZE * ILO_MAX_VIEWPORTS, 8); - } - else { - size += - align(GEN6_SF_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 8) + - align(GEN6_CLIP_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 8) + - align(GEN6_CC_VIEWPORT__SIZE * ILO_MAX_VIEWPORTS, 8) + - align(GEN6_SCISSOR_RECT__SIZE * ILO_MAX_VIEWPORTS, 8); - } + struct { + enum ilo_gpe_gen6_state state; + int count; + } static_states[] = { + /* viewports */ + { ILO_GPE_GEN6_SF_VIEWPORT, 1 }, + { ILO_GPE_GEN6_CLIP_VIEWPORT, 1 }, + { ILO_GPE_GEN6_CC_VIEWPORT, 1 }, + /* cc */ + { ILO_GPE_GEN6_COLOR_CALC_STATE, 1 }, + { ILO_GPE_GEN6_BLEND_STATE, ILO_MAX_DRAW_BUFFERS }, + { ILO_GPE_GEN6_DEPTH_STENCIL_STATE, 1 }, + /* scissors */ + { ILO_GPE_GEN6_SCISSOR_RECT, 1 }, + /* binding table (vs, gs, fs) */ + { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES }, + { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES }, + { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES }, + }; + int i; - static_size = size; + for (i = 0; i < Elements(static_states); i++) { + static_size += gen6->estimate_state_size(p->dev, + static_states[i].state, + static_states[i].count); + } } size = static_size; - for (sh_type = 0; sh_type < PIPE_SHADER_TYPES; sh_type++) { - const int alignment = 32 / 4; - int num_samplers, num_surfaces, pcb_size; - - /* samplers */ - num_samplers = ilo->sampler[sh_type].count; - - /* sampler views and constant buffers */ - num_surfaces = ilo->view[sh_type].count + - util_bitcount(ilo->cbuf[sh_type].enabled_mask); - - pcb_size = 0; + /* + * render targets (fs) + * stream outputs (gs) + * sampler views (vs, fs) + * constant buffers (vs, fs) + */ + count = ilo->fb.state.nr_cbufs; - switch (sh_type) { - case PIPE_SHADER_VERTEX: - if (ilo->vs) { - if (p->dev->gen == ILO_GEN(6)) { - const struct pipe_stream_output_info *so_info = - ilo_shader_get_kernel_so_info(ilo->vs); + if (ilo->gs) { + const struct pipe_stream_output_info *so_info = + ilo_shader_get_kernel_so_info(ilo->gs); - /* stream outputs */ - num_surfaces += so_info->num_outputs; - } + count += so_info->num_outputs; + } + else if (ilo->vs) { + const struct pipe_stream_output_info *so_info = + ilo_shader_get_kernel_so_info(ilo->vs); - pcb_size = ilo_shader_get_kernel_param(ilo->vs, - ILO_KERNEL_PCB_CBUF0_SIZE); - pcb_size += ilo_shader_get_kernel_param(ilo->vs, - ILO_KERNEL_VS_PCB_UCP_SIZE); - } - break; - case PIPE_SHADER_GEOMETRY: - if (ilo->gs && p->dev->gen == ILO_GEN(6)) { - const struct pipe_stream_output_info *so_info = - ilo_shader_get_kernel_so_info(ilo->gs); + count += so_info->num_outputs; + } - /* stream outputs */ - num_surfaces += so_info->num_outputs; - } - break; - case PIPE_SHADER_FRAGMENT: - /* render targets */ - num_surfaces += ilo->fb.state.nr_cbufs; + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + count += ilo->view[shader_type].count; + count += util_bitcount(ilo->cbuf[shader_type].enabled_mask); + } - if (ilo->fs) { - pcb_size = ilo_shader_get_kernel_param(ilo->fs, - ILO_KERNEL_PCB_CBUF0_SIZE); - } - break; - default: - break; - } + if (count) { + size += gen6->estimate_state_size(p->dev, + ILO_GPE_GEN6_SURFACE_STATE, count); + } - /* SAMPLER_STATE array and SAMPLER_BORDER_COLORs */ - if (num_samplers) { - size += align(GEN6_SAMPLER_STATE__SIZE * num_samplers, alignment) + - align(GEN6_SAMPLER_BORDER_COLOR__SIZE, alignment) * num_samplers; + /* samplers (vs, fs) */ + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + count = ilo->sampler[shader_type].count; + if (count) { + size += gen6->estimate_state_size(p->dev, + ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE, count); + size += gen6->estimate_state_size(p->dev, + ILO_GPE_GEN6_SAMPLER_STATE, count); } + } - /* BINDING_TABLE_STATE and SURFACE_STATEs */ - if (num_surfaces) { - size += align(num_surfaces, alignment) + - align(GEN6_SURFACE_STATE__SIZE, alignment) * num_surfaces; - } + /* pcb (vs) */ + if (ilo->vs && + ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)) { + const int pcb_size = + ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE); - /* PCB */ - if (pcb_size) - size += align(pcb_size, alignment); + size += gen6->estimate_state_size(p->dev, + ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER, pcb_size); } return size; @@ -1900,6 +1571,7 @@ ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p, enum ilo_3d_pipeline_action action, const void *arg) { + const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get(); int size; switch (action) { @@ -1907,31 +1579,21 @@ ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p, { const struct ilo_context *ilo = arg; - size = gen6_pipeline_max_command_size(p) + - gen6_pipeline_estimate_state_size(p, ilo); + size = gen6_pipeline_estimate_commands(p, gen6, ilo) + + gen6_pipeline_estimate_states(p, gen6, ilo); } break; case ILO_3D_PIPELINE_FLUSH: - size = GEN6_PIPE_CONTROL__SIZE * 3; + size = gen6->estimate_command_size(p->dev, + ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3; break; case ILO_3D_PIPELINE_WRITE_TIMESTAMP: - size = GEN6_PIPE_CONTROL__SIZE * 2; + size = gen6->estimate_command_size(p->dev, + ILO_GPE_GEN6_PIPE_CONTROL, 1) * 2; break; case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT: - size = GEN6_PIPE_CONTROL__SIZE * 3; - break; - case ILO_3D_PIPELINE_WRITE_STATISTICS: - { - const int num_regs = 8; - const int num_pads = 3; - - size = GEN6_PIPE_CONTROL__SIZE; - size += GEN6_MI_STORE_REGISTER_MEM__SIZE * 2 * num_regs; - size += GEN6_MI_STORE_DATA_IMM__SIZE * num_pads; - } - break; - case ILO_3D_PIPELINE_RECTLIST: - size = 64 + 256; /* states + commands */ + size = gen6->estimate_command_size(p->dev, + ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3; break; default: assert(!"unknown 3D pipeline action"); @@ -1945,11 +1607,64 @@ ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p, void ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p) { + const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get(); + p->estimate_size = ilo_3d_pipeline_estimate_size_gen6; p->emit_draw = ilo_3d_pipeline_emit_draw_gen6; p->emit_flush = ilo_3d_pipeline_emit_flush_gen6; p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6; p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6; - p->emit_write_statistics = ilo_3d_pipeline_emit_write_statistics_gen6; - p->emit_rectlist = ilo_3d_pipeline_emit_rectlist_gen6; + +#define GEN6_USE(p, name, from) \ + p->gen6_ ## name = from->emit_ ## name + GEN6_USE(p, STATE_BASE_ADDRESS, gen6); + GEN6_USE(p, STATE_SIP, gen6); + GEN6_USE(p, PIPELINE_SELECT, gen6); + GEN6_USE(p, 3DSTATE_BINDING_TABLE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_URB, gen6); + GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen6); + GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen6); + GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen6); + GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen6); + GEN6_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_CC_STATE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_VS, gen6); + GEN6_USE(p, 3DSTATE_GS, gen6); + GEN6_USE(p, 3DSTATE_CLIP, gen6); + GEN6_USE(p, 3DSTATE_SF, gen6); + GEN6_USE(p, 3DSTATE_WM, gen6); + GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen6); + GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen6); + GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen6); + GEN6_USE(p, 3DSTATE_SAMPLE_MASK, gen6); + GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen6); + GEN6_USE(p, 3DSTATE_DEPTH_BUFFER, gen6); + GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen6); + GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen6); + GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen6); + GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen6); + GEN6_USE(p, 3DSTATE_GS_SVB_INDEX, gen6); + GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen6); + GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen6); + GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen6); + GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen6); + GEN6_USE(p, PIPE_CONTROL, gen6); + GEN6_USE(p, 3DPRIMITIVE, gen6); + GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen6); + GEN6_USE(p, SF_VIEWPORT, gen6); + GEN6_USE(p, CLIP_VIEWPORT, gen6); + GEN6_USE(p, CC_VIEWPORT, gen6); + GEN6_USE(p, COLOR_CALC_STATE, gen6); + GEN6_USE(p, BLEND_STATE, gen6); + GEN6_USE(p, DEPTH_STENCIL_STATE, gen6); + GEN6_USE(p, SCISSOR_RECT, gen6); + GEN6_USE(p, BINDING_TABLE_STATE, gen6); + GEN6_USE(p, SURFACE_STATE, gen6); + GEN6_USE(p, so_SURFACE_STATE, gen6); + GEN6_USE(p, SAMPLER_STATE, gen6); + GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen6); + GEN6_USE(p, push_constant_buffer, gen6); +#undef GEN6_USE } diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h index 16e96c07b..a9c4051d2 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h @@ -73,12 +73,6 @@ struct gen6_pipeline_session { int num_surfaces[PIPE_SHADER_TYPES]; }; -struct gen6_rectlist_session { - uint32_t DEPTH_STENCIL_STATE; - uint32_t COLOR_CALC_STATE; - uint32_t CC_VIEWPORT; -}; - void gen6_pipeline_prepare(const struct ilo_3d_pipeline *p, const struct ilo_context *ilo, @@ -120,6 +114,11 @@ gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p, struct gen6_pipeline_session *session); void +gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void gen6_pipeline_vs(struct ilo_3d_pipeline *p, const struct ilo_context *ilo, struct gen6_pipeline_session *session); @@ -149,10 +148,6 @@ gen6_pipeline_update_max_svbi(struct ilo_3d_pipeline *p, const struct ilo_context *ilo, struct gen6_pipeline_session *session); -int -gen6_pipeline_estimate_state_size(const struct ilo_3d_pipeline *p, - const struct ilo_context *ilo); - void ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p); @@ -165,10 +160,6 @@ ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p, struct intel_bo *bo, int index); void -ilo_3d_pipeline_emit_write_statistics_gen6(struct ilo_3d_pipeline *p, - struct intel_bo *bo, int index); - -void ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p); #endif /* ILO_3D_PIPELINE_GEN6_H */ diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c index 557e5a8a2..fc16f80a5 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c @@ -25,10 +25,10 @@ * Chia-I Wu <olv@lunarg.com> */ -#include "genhw/genhw.h" #include "util/u_dual_blend.h" +#include "intel_reg.h" -#include "ilo_blitter.h" +#include "ilo_common.h" #include "ilo_context.h" #include "ilo_cp.h" #include "ilo_gpe_gen7.h" @@ -44,9 +44,9 @@ gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p, bool change_depth_state) { struct intel_bo *bo = NULL; - uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL; + uint32_t dw1 = PIPE_CONTROL_CS_STALL; - assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)); + assert(p->dev->gen == ILO_GEN(7)); /* emit once */ if (p->state.has_gen6_wa_pipe_control) @@ -81,20 +81,20 @@ gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p, */ if (change_multisample_state) - dw1 |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH; + dw1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; if (change_depth_state) { - dw1 |= GEN6_PIPE_CONTROL_WRITE_IMM; + dw1 |= PIPE_CONTROL_WRITE_IMMEDIATE; bo = p->workaround_bo; } - gen6_emit_PIPE_CONTROL(p->dev, dw1, bo, 0, false, p->cp); + p->gen6_PIPE_CONTROL(p->dev, dw1, bo, 0, false, p->cp); } static void gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p) { - assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)); + assert(p->dev->gen == ILO_GEN(7)); /* * From the Ivy Bridge PRM, volume 2 part 1, page 106: @@ -105,9 +105,9 @@ gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p) * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL * needs to be sent before any combination of VS associated 3DSTATE." */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_STALL | - GEN6_PIPE_CONTROL_WRITE_IMM, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_IMMEDIATE, p->workaround_bo, 0, false, p->cp); } @@ -115,7 +115,7 @@ static void gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p, bool change_depth_buffer) { - assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)); + assert(p->dev->gen == ILO_GEN(7)); /* * From the Ivy Bridge PRM, volume 2 part 1, page 276: @@ -144,36 +144,36 @@ gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p, * guarantee that the pipeline from WM onwards is already flushed * (e.g., via a preceding MI_FLUSH)." */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_STALL, NULL, 0, false, p->cp); if (!change_depth_buffer) return; - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, NULL, 0, false, p->cp); - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_DEPTH_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_DEPTH_STALL, NULL, 0, false, p->cp); } static void -gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p) +gen7_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p) { - assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)); + assert(p->dev->gen == ILO_GEN(7)); /* * From the Ivy Bridge PRM, volume 2 part 1, page 286: * - * "If this field (Maximum Number of Threads in 3DSTATE_PS) is changed + * "If this field (Maximum Number of Threads in 3DSTATE_WM) is changed * between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at * Pixel Scoreboard set is required to be issued." */ - gen6_emit_PIPE_CONTROL(p->dev, - GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL, + p->gen6_PIPE_CONTROL(p->dev, + PIPE_CONTROL_STALL_AT_SCOREBOARD, NULL, 0, false, p->cp); } @@ -188,8 +188,7 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p, /* 3DSTATE_URB_{VS,GS,HS,DS} */ if (DIRTY(VE) || DIRTY(VS)) { /* the first 16KB are reserved for VS and PS PCBs */ - const int offset = - (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384; + const int offset = 16 * 1024; int vs_entry_size, vs_total_size; vs_entry_size = (ilo->vs) ? @@ -211,12 +210,12 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p, gen7_wa_pipe_control_vs_depth_stall(p); - gen7_emit_3DSTATE_URB_VS(p->dev, + p->gen7_3DSTATE_URB_VS(p->dev, offset, vs_total_size, vs_entry_size, p->cp); - gen7_emit_3DSTATE_URB_GS(p->dev, offset, 0, 0, p->cp); - gen7_emit_3DSTATE_URB_HS(p->dev, offset, 0, 0, p->cp); - gen7_emit_3DSTATE_URB_DS(p->dev, offset, 0, 0, p->cp); + p->gen7_3DSTATE_URB_GS(p->dev, offset, 0, 0, p->cp); + p->gen7_3DSTATE_URB_HS(p->dev, offset, 0, 0, p->cp); + p->gen7_3DSTATE_URB_DS(p->dev, offset, 0, 0, p->cp); } } @@ -228,21 +227,16 @@ gen7_pipeline_common_pcb_alloc(struct ilo_3d_pipeline *p, /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */ if (session->hw_ctx_changed) { /* - * Push constant buffers are only allowed to take up at most the first - * 16KB of the URB. Split the space evenly for VS and FS. + * push constant buffers are only allowed to take up at most the first + * 16KB of the URB */ - const int max_size = - (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384; - const int size = max_size / 2; - int offset = 0; + p->gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev, + 0, 8192, p->cp); - gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev, offset, size, p->cp); - offset += size; + p->gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev, + 8192, 8192, p->cp); - gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev, offset, size, p->cp); - - if (p->dev->gen == ILO_GEN(7)) - gen7_wa_pipe_control_cs_stall(p, true, true); + gen7_wa_pipe_control_cs_stall(p, true, true); } } @@ -253,10 +247,10 @@ gen7_pipeline_common_pointers_1(struct ilo_3d_pipeline *p, { /* 3DSTATE_VIEWPORT_STATE_POINTERS_{CC,SF_CLIP} */ if (session->viewport_state_changed) { - gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(p->dev, + p->gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(p->dev, p->state.CC_VIEWPORT, p->cp); - gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(p->dev, + p->gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(p->dev, p->state.SF_CLIP_VIEWPORT, p->cp); } } @@ -268,19 +262,19 @@ gen7_pipeline_common_pointers_2(struct ilo_3d_pipeline *p, { /* 3DSTATE_BLEND_STATE_POINTERS */ if (session->cc_state_blend_changed) { - gen7_emit_3DSTATE_BLEND_STATE_POINTERS(p->dev, + p->gen7_3DSTATE_BLEND_STATE_POINTERS(p->dev, p->state.BLEND_STATE, p->cp); } /* 3DSTATE_CC_STATE_POINTERS */ if (session->cc_state_cc_changed) { - gen7_emit_3DSTATE_CC_STATE_POINTERS(p->dev, + p->gen7_3DSTATE_CC_STATE_POINTERS(p->dev, p->state.COLOR_CALC_STATE, p->cp); } /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS */ if (session->cc_state_dsa_changed) { - gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(p->dev, + p->gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(p->dev, p->state.DEPTH_STENCIL_STATE, p->cp); } } @@ -294,40 +288,26 @@ gen7_pipeline_vs(struct ilo_3d_pipeline *p, const bool emit_3dstate_sampler_state = session->sampler_state_vs_changed; /* see gen6_pipeline_vs() */ const bool emit_3dstate_constant_vs = session->pcb_state_vs_changed; - const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) || - session->kernel_bo_changed); + const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS)); /* emit depth stall before any of the VS commands */ if (emit_3dstate_binding_table || emit_3dstate_sampler_state || - emit_3dstate_constant_vs || emit_3dstate_vs) - gen7_wa_pipe_control_vs_depth_stall(p); + emit_3dstate_constant_vs || emit_3dstate_vs) + gen7_wa_pipe_control_vs_depth_stall(p); /* 3DSTATE_BINDING_TABLE_POINTERS_VS */ if (emit_3dstate_binding_table) { - gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(p->dev, - p->state.vs.BINDING_TABLE_STATE, p->cp); + p->gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(p->dev, + p->state.vs.BINDING_TABLE_STATE, p->cp); } /* 3DSTATE_SAMPLER_STATE_POINTERS_VS */ if (emit_3dstate_sampler_state) { - gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(p->dev, - p->state.vs.SAMPLER_STATE, p->cp); - } - - /* 3DSTATE_CONSTANT_VS */ - if (emit_3dstate_constant_vs) { - gen7_emit_3DSTATE_CONSTANT_VS(p->dev, - &p->state.vs.PUSH_CONSTANT_BUFFER, - &p->state.vs.PUSH_CONSTANT_BUFFER_size, - 1, p->cp); + p->gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(p->dev, + p->state.vs.SAMPLER_STATE, p->cp); } - /* 3DSTATE_VS */ - if (emit_3dstate_vs) { - const int num_samplers = ilo->sampler[PIPE_SHADER_VERTEX].count; - - gen6_emit_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp); - } + gen6_pipeline_vs(p, ilo, session); } static void @@ -337,13 +317,13 @@ gen7_pipeline_hs(struct ilo_3d_pipeline *p, { /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */ if (session->hw_ctx_changed) { - gen7_emit_3DSTATE_CONSTANT_HS(p->dev, 0, 0, 0, p->cp); - gen7_emit_3DSTATE_HS(p->dev, NULL, 0, p->cp); + p->gen7_3DSTATE_CONSTANT_HS(p->dev, 0, 0, 0, p->cp); + p->gen7_3DSTATE_HS(p->dev, NULL, 0, p->cp); } /* 3DSTATE_BINDING_TABLE_POINTERS_HS */ if (session->hw_ctx_changed) - gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(p->dev, 0, p->cp); + p->gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(p->dev, 0, p->cp); } static void @@ -353,7 +333,7 @@ gen7_pipeline_te(struct ilo_3d_pipeline *p, { /* 3DSTATE_TE */ if (session->hw_ctx_changed) - gen7_emit_3DSTATE_TE(p->dev, p->cp); + p->gen7_3DSTATE_TE(p->dev, p->cp); } static void @@ -363,13 +343,13 @@ gen7_pipeline_ds(struct ilo_3d_pipeline *p, { /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */ if (session->hw_ctx_changed) { - gen7_emit_3DSTATE_CONSTANT_DS(p->dev, 0, 0, 0, p->cp); - gen7_emit_3DSTATE_DS(p->dev, NULL, 0, p->cp); + p->gen7_3DSTATE_CONSTANT_DS(p->dev, 0, 0, 0, p->cp); + p->gen7_3DSTATE_DS(p->dev, NULL, 0, p->cp); } /* 3DSTATE_BINDING_TABLE_POINTERS_DS */ if (session->hw_ctx_changed) - gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(p->dev, 0, p->cp); + p->gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(p->dev, 0, p->cp); } @@ -380,13 +360,13 @@ gen7_pipeline_gs(struct ilo_3d_pipeline *p, { /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */ if (session->hw_ctx_changed) { - gen7_emit_3DSTATE_CONSTANT_GS(p->dev, 0, 0, 0, p->cp); - gen7_emit_3DSTATE_GS(p->dev, NULL, 0, p->cp); + p->gen6_3DSTATE_CONSTANT_GS(p->dev, 0, 0, 0, p->cp); + p->gen7_3DSTATE_GS(p->dev, NULL, 0, p->cp); } /* 3DSTATE_BINDING_TABLE_POINTERS_GS */ if (session->binding_table_gs_changed) { - gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(p->dev, + p->gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(p->dev, p->state.gs.BINDING_TABLE_STATE, p->cp); } } @@ -422,17 +402,23 @@ gen7_pipeline_sol(struct ilo_3d_pipeline *p, const int stride = so_info->stride[i] * 4; /* in bytes */ int base = 0; - gen7_emit_3DSTATE_SO_BUFFER(p->dev, i, base, stride, + /* reset HW write offsets and offset buffer base */ + if (!p->cp->render_ctx) { + ilo_cp_set_one_off_flags(p->cp, INTEL_EXEC_GEN7_SOL_RESET); + base += p->state.so_num_vertices * stride; + } + + p->gen7_3DSTATE_SO_BUFFER(p->dev, i, base, stride, ilo->so.states[i], p->cp); } for (; i < 4; i++) - gen7_emit_3DSTATE_SO_BUFFER(p->dev, i, 0, 0, NULL, p->cp); + p->gen7_3DSTATE_SO_BUFFER(p->dev, i, 0, 0, NULL, p->cp); } /* 3DSTATE_SO_DECL_LIST */ if (dirty_sh && ilo->so.enabled) - gen7_emit_3DSTATE_SO_DECL_LIST(p->dev, so_info, p->cp); + p->gen7_3DSTATE_SO_DECL_LIST(p->dev, so_info, p->cp); /* 3DSTATE_STREAMOUT */ if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) { @@ -440,7 +426,7 @@ gen7_pipeline_sol(struct ilo_3d_pipeline *p, const int output_count = ilo_shader_get_kernel_param(shader, ILO_KERNEL_OUTPUT_COUNT); - gen7_emit_3DSTATE_STREAMOUT(p->dev, buffer_mask, output_count, + p->gen7_3DSTATE_STREAMOUT(p->dev, buffer_mask, output_count, ilo->rasterizer->state.rasterizer_discard, p->cp); } } @@ -451,16 +437,15 @@ gen7_pipeline_sf(struct ilo_3d_pipeline *p, struct gen6_pipeline_session *session) { /* 3DSTATE_SBE */ - if (DIRTY(RASTERIZER) || DIRTY(FS)) - gen7_emit_3DSTATE_SBE(p->dev, ilo->rasterizer, ilo->fs, ilo->cp); + if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) { + p->gen7_3DSTATE_SBE(p->dev, ilo->rasterizer, ilo->fs, + (ilo->gs) ? ilo->gs : ilo->vs, ilo->cp); + } /* 3DSTATE_SF */ if (DIRTY(RASTERIZER) || DIRTY(FB)) { - struct pipe_surface *zs = ilo->fb.state.zsbuf; - gen7_wa_pipe_control_cs_stall(p, true, true); - gen7_emit_3DSTATE_SF(p->dev, ilo->rasterizer, - (zs) ? zs->format : PIPE_FORMAT_NONE, p->cp); + p->gen7_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fb.state.zsbuf, p->cp); } } @@ -471,32 +456,31 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p, { /* 3DSTATE_WM */ if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) { - const bool cc_may_kill = (ilo->dsa->dw_alpha || + const bool cc_may_kill = (ilo->dsa->alpha.enabled || ilo->blend->alpha_to_coverage); - gen7_emit_3DSTATE_WM(p->dev, ilo->fs, - ilo->rasterizer, cc_may_kill, 0, p->cp); + if (p->dev->gen == ILO_GEN(7) && session->hw_ctx_changed) + gen7_wa_pipe_control_wm_max_threads_stall(p); + + p->gen7_3DSTATE_WM(p->dev, ilo->fs, + ilo->rasterizer, cc_may_kill, p->cp); } /* 3DSTATE_BINDING_TABLE_POINTERS_PS */ if (session->binding_table_fs_changed) { - gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(p->dev, + p->gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(p->dev, p->state.wm.BINDING_TABLE_STATE, p->cp); } /* 3DSTATE_SAMPLER_STATE_POINTERS_PS */ if (session->sampler_state_fs_changed) { - gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(p->dev, + p->gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(p->dev, p->state.wm.SAMPLER_STATE, p->cp); } /* 3DSTATE_CONSTANT_PS */ - if (session->pcb_state_fs_changed) { - gen7_emit_3DSTATE_CONSTANT_PS(p->dev, - &p->state.wm.PUSH_CONSTANT_BUFFER, - &p->state.wm.PUSH_CONSTANT_BUFFER_size, - 1, p->cp); - } + if (session->pcb_state_fs_changed) + p->gen6_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp); /* 3DSTATE_PS */ if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || @@ -504,16 +488,12 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p, const int num_samplers = ilo->sampler[PIPE_SHADER_FRAGMENT].count; const bool dual_blend = ilo->blend->dual_blend; - if ((p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)) && - session->hw_ctx_changed) - gen7_wa_pipe_control_ps_max_threads_stall(p); - - gen7_emit_3DSTATE_PS(p->dev, ilo->fs, num_samplers, dual_blend, p->cp); + p->gen7_3DSTATE_PS(p->dev, ilo->fs, num_samplers, dual_blend, p->cp); } /* 3DSTATE_SCISSOR_STATE_POINTERS */ if (session->scissor_state_changed) { - gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(p->dev, + p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(p->dev, p->state.SCISSOR_RECT, p->cp); } @@ -539,28 +519,24 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p, /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */ if (DIRTY(FB) || session->batch_bo_changed) { const struct ilo_zs_surface *zs; - uint32_t clear_params; if (ilo->fb.state.zsbuf) { const struct ilo_surface_cso *surface = (const struct ilo_surface_cso *) ilo->fb.state.zsbuf; - const struct ilo_texture_slice *slice = - ilo_texture_get_slice(ilo_texture(surface->base.texture), - surface->base.u.tex.level, surface->base.u.tex.first_layer); assert(!surface->is_rt); zs = &surface->u.zs; - clear_params = slice->clear_value; } else { zs = &ilo->fb.null_zs; - clear_params = 0; } - gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp); - gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp); - gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp); - gen7_emit_3DSTATE_CLEAR_PARAMS(p->dev, clear_params, p->cp); + p->gen7_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp); + p->gen6_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp); + p->gen6_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp); + + /* TODO */ + p->gen6_3DSTATE_CLEAR_PARAMS(p->dev, 0, p->cp); } } @@ -580,27 +556,17 @@ gen7_pipeline_wm_multisample(struct ilo_3d_pipeline *p, (ilo->fb.num_samples > 1) ? &p->packed_sample_position_4x : &p->packed_sample_position_1x; - gen6_emit_3DSTATE_MULTISAMPLE(p->dev, + p->gen6_3DSTATE_MULTISAMPLE(p->dev, ilo->fb.num_samples, packed_sample_pos, ilo->rasterizer->state.half_pixel_center, p->cp); - gen7_emit_3DSTATE_SAMPLE_MASK(p->dev, + p->gen7_3DSTATE_SAMPLE_MASK(p->dev, (ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1, ilo->fb.num_samples, p->cp); } } static void -gen7_pipeline_vf_draw(struct ilo_3d_pipeline *p, - const struct ilo_context *ilo, - struct gen6_pipeline_session *session) -{ - /* 3DPRIMITIVE */ - gen7_emit_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp); - p->state.has_gen6_wa_pipe_control = false; -} - -static void gen7_pipeline_commands(struct ilo_3d_pipeline *p, const struct ilo_context *ilo, struct gen6_pipeline_session *session) @@ -631,7 +597,7 @@ gen7_pipeline_commands(struct ilo_3d_pipeline *p, gen6_pipeline_wm_raster(p, ilo, session); gen6_pipeline_sf_rect(p, ilo, session); gen6_pipeline_vf(p, ilo, session); - gen7_pipeline_vf_draw(p, ilo, session); + gen6_pipeline_vf_draw(p, ilo, session); } static void @@ -649,283 +615,130 @@ ilo_3d_pipeline_emit_draw_gen7(struct ilo_3d_pipeline *p, gen6_pipeline_end(p, ilo, &session); } -static void -gen7_rectlist_pcb_alloc(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - /* - * Push constant buffers are only allowed to take up at most the first - * 16KB of the URB. Split the space evenly for VS and FS. - */ - const int max_size = - (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384; - const int size = max_size / 2; - int offset = 0; - - gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev, offset, size, p->cp); - offset += size; - - gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev, offset, size, p->cp); - - gen7_wa_pipe_control_cs_stall(p, true, true); -} - -static void -gen7_rectlist_urb(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - /* the first 16KB are reserved for VS and PS PCBs */ - const int offset = - (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384; - - gen7_emit_3DSTATE_URB_VS(p->dev, offset, p->dev->urb_size - offset, - blitter->ve.count * 4 * sizeof(float), p->cp); - - gen7_emit_3DSTATE_URB_GS(p->dev, offset, 0, 0, p->cp); - gen7_emit_3DSTATE_URB_HS(p->dev, offset, 0, 0, p->cp); - gen7_emit_3DSTATE_URB_DS(p->dev, offset, 0, 0, p->cp); -} - -static void -gen7_rectlist_vs_to_sf(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - gen7_emit_3DSTATE_CONSTANT_VS(p->dev, NULL, NULL, 0, p->cp); - gen6_emit_3DSTATE_VS(p->dev, NULL, 0, p->cp); - - gen7_emit_3DSTATE_CONSTANT_HS(p->dev, NULL, NULL, 0, p->cp); - gen7_emit_3DSTATE_HS(p->dev, NULL, 0, p->cp); - - gen7_emit_3DSTATE_TE(p->dev, p->cp); - - gen7_emit_3DSTATE_CONSTANT_DS(p->dev, NULL, NULL, 0, p->cp); - gen7_emit_3DSTATE_DS(p->dev, NULL, 0, p->cp); - - gen7_emit_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp); - gen7_emit_3DSTATE_GS(p->dev, NULL, 0, p->cp); - - gen7_emit_3DSTATE_STREAMOUT(p->dev, 0x0, 0, false, p->cp); - - gen6_emit_3DSTATE_CLIP(p->dev, NULL, NULL, false, 0, p->cp); - - gen7_wa_pipe_control_cs_stall(p, true, true); - - gen7_emit_3DSTATE_SF(p->dev, NULL, blitter->fb.dst.base.format, p->cp); - gen7_emit_3DSTATE_SBE(p->dev, NULL, NULL, p->cp); -} - -static void -gen7_rectlist_wm(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - uint32_t hiz_op; - - switch (blitter->op) { - case ILO_BLITTER_RECTLIST_CLEAR_ZS: - hiz_op = GEN7_WM_DW1_DEPTH_CLEAR; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_Z: - hiz_op = GEN7_WM_DW1_DEPTH_RESOLVE; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_HIZ: - hiz_op = GEN7_WM_DW1_HIZ_RESOLVE; - break; - default: - hiz_op = 0; - break; - } - - gen7_emit_3DSTATE_WM(p->dev, NULL, NULL, false, hiz_op, p->cp); - - gen7_emit_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp); - - gen7_wa_pipe_control_ps_max_threads_stall(p); - gen7_emit_3DSTATE_PS(p->dev, NULL, 0, false, p->cp); -} - -static void -gen7_rectlist_wm_depth(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) +static int +gen7_pipeline_estimate_commands(const struct ilo_3d_pipeline *p, + const struct ilo_gpe_gen7 *gen7, + const struct ilo_context *ilo) { - gen7_wa_pipe_control_wm_depth_stall(p, true); - - if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH | - ILO_BLITTER_USE_FB_STENCIL)) { - gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev, - &blitter->fb.dst.u.zs, p->cp); - } - - if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) { - gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev, - &blitter->fb.dst.u.zs, p->cp); - } + static int size; + enum ilo_gpe_gen7_command cmd; + + if (size) + return size; + + for (cmd = 0; cmd < ILO_GPE_GEN7_COMMAND_COUNT; cmd++) { + int count; + + switch (cmd) { + case ILO_GPE_GEN7_PIPE_CONTROL: + /* for the workaround */ + count = 2; + /* another one after 3DSTATE_URB */ + count += 1; + /* and another one after 3DSTATE_CONSTANT_VS */ + count += 1; + break; + case ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS: + count = 33; + break; + case ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS: + count = 34; + break; + case ILO_GPE_GEN7_MEDIA_VFE_STATE: + case ILO_GPE_GEN7_MEDIA_CURBE_LOAD: + case ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD: + case ILO_GPE_GEN7_MEDIA_STATE_FLUSH: + case ILO_GPE_GEN7_GPGPU_WALKER: + /* media commands */ + count = 0; + break; + default: + count = 1; + break; + } - if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) { - gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev, - &blitter->fb.dst.u.zs, p->cp); + if (count) { + size += gen7->estimate_command_size(p->dev, + cmd, count); + } } - gen7_emit_3DSTATE_CLEAR_PARAMS(p->dev, - blitter->depth_clear_value, p->cp); -} - -static void -gen7_rectlist_wm_multisample(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - const uint32_t *packed_sample_pos = - (blitter->fb.num_samples > 4) ? p->packed_sample_position_8x : - (blitter->fb.num_samples > 1) ? &p->packed_sample_position_4x : - &p->packed_sample_position_1x; - - gen7_wa_pipe_control_cs_stall(p, true, true); - - gen6_emit_3DSTATE_MULTISAMPLE(p->dev, blitter->fb.num_samples, - packed_sample_pos, true, p->cp); - - gen7_emit_3DSTATE_SAMPLE_MASK(p->dev, - (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples, p->cp); + return size; } -static void -gen7_rectlist_commands(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - gen7_rectlist_wm_multisample(p, blitter, session); - - gen6_emit_STATE_BASE_ADDRESS(p->dev, - NULL, /* General State Base */ - p->cp->bo, /* Surface State Base */ - p->cp->bo, /* Dynamic State Base */ - NULL, /* Indirect Object Base */ - NULL, /* Instruction Base */ - 0, 0, 0, 0, p->cp); - - gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev, - &blitter->ve, &blitter->vb, p->cp); - - gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev, - &blitter->ve, false, false, p->cp); - - gen7_rectlist_pcb_alloc(p, blitter, session); - - /* needed for any VS-related commands */ - gen7_wa_pipe_control_vs_depth_stall(p); - - gen7_rectlist_urb(p, blitter, session); - - if (blitter->uses & ILO_BLITTER_USE_DSA) { - gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(p->dev, - session->DEPTH_STENCIL_STATE, p->cp); - } - - if (blitter->uses & ILO_BLITTER_USE_CC) { - gen7_emit_3DSTATE_CC_STATE_POINTERS(p->dev, - session->COLOR_CALC_STATE, p->cp); - } - - gen7_rectlist_vs_to_sf(p, blitter, session); - gen7_rectlist_wm(p, blitter, session); +static int +gen7_pipeline_estimate_states(const struct ilo_3d_pipeline *p, + const struct ilo_gpe_gen7 *gen7, + const struct ilo_context *ilo) +{ + static int static_size; + int shader_type, count, size; + + if (!static_size) { + struct { + enum ilo_gpe_gen7_state state; + int count; + } static_states[] = { + /* viewports */ + { ILO_GPE_GEN7_SF_CLIP_VIEWPORT, 1 }, + { ILO_GPE_GEN7_CC_VIEWPORT, 1 }, + /* cc */ + { ILO_GPE_GEN7_COLOR_CALC_STATE, 1 }, + { ILO_GPE_GEN7_BLEND_STATE, ILO_MAX_DRAW_BUFFERS }, + { ILO_GPE_GEN7_DEPTH_STENCIL_STATE, 1 }, + /* scissors */ + { ILO_GPE_GEN7_SCISSOR_RECT, 1 }, + /* binding table (vs, gs, fs) */ + { ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES }, + { ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES }, + { ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES }, + }; + int i; - if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) { - gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(p->dev, - session->CC_VIEWPORT, p->cp); + for (i = 0; i < Elements(static_states); i++) { + static_size += gen7->estimate_state_size(p->dev, + static_states[i].state, + static_states[i].count); + } } - gen7_rectlist_wm_depth(p, blitter, session); - - gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0, - blitter->fb.width, blitter->fb.height, p->cp); - - gen7_emit_3DPRIMITIVE(p->dev, &blitter->draw, NULL, true, p->cp); -} + size = static_size; -static void -gen7_rectlist_states(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter, - struct gen6_rectlist_session *session) -{ - if (blitter->uses & ILO_BLITTER_USE_DSA) { - session->DEPTH_STENCIL_STATE = - gen6_emit_DEPTH_STENCIL_STATE(p->dev, &blitter->dsa, p->cp); + /* + * render targets (fs) + * sampler views (vs, fs) + * constant buffers (vs, fs) + */ + count = ilo->fb.state.nr_cbufs; + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + count += ilo->view[shader_type].count; + count += util_bitcount(ilo->cbuf[shader_type].enabled_mask); } - if (blitter->uses & ILO_BLITTER_USE_CC) { - session->COLOR_CALC_STATE = - gen6_emit_COLOR_CALC_STATE(p->dev, &blitter->cc.stencil_ref, - blitter->cc.alpha_ref, &blitter->cc.blend_color, p->cp); + if (count) { + size += gen7->estimate_state_size(p->dev, + ILO_GPE_GEN7_SURFACE_STATE, count); } - if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) { - session->CC_VIEWPORT = - gen6_emit_CC_VIEWPORT(p->dev, &blitter->viewport, 1, p->cp); + /* samplers (vs, fs) */ + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + count = ilo->sampler[shader_type].count; + if (count) { + size += gen7->estimate_state_size(p->dev, + ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE, count); + size += gen7->estimate_state_size(p->dev, + ILO_GPE_GEN7_SAMPLER_STATE, count); + } } -} -static void -ilo_3d_pipeline_emit_rectlist_gen7(struct ilo_3d_pipeline *p, - const struct ilo_blitter *blitter) -{ - struct gen6_rectlist_session session; - - memset(&session, 0, sizeof(session)); - gen7_rectlist_states(p, blitter, &session); - gen7_rectlist_commands(p, blitter, &session); -} - -static int -gen7_pipeline_max_command_size(const struct ilo_3d_pipeline *p) -{ - static int size; + /* pcb (vs) */ + if (ilo->vs && + ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)) { + const int pcb_size = + ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE); - if (!size) { - size += GEN7_3DSTATE_URB_ANY__SIZE * 4; - size += GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_ANY__SIZE * 5; - size += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 5; - size += GEN7_3DSTATE_POINTERS_ANY__SIZE * (5 + 5 + 4); - size += GEN7_3DSTATE_SO_BUFFER__SIZE * 4; - size += GEN6_PIPE_CONTROL__SIZE * 5; - - size += - GEN6_STATE_BASE_ADDRESS__SIZE + - GEN6_STATE_SIP__SIZE + - GEN6_3DSTATE_VF_STATISTICS__SIZE + - GEN6_PIPELINE_SELECT__SIZE + - GEN6_3DSTATE_CLEAR_PARAMS__SIZE + - GEN6_3DSTATE_DEPTH_BUFFER__SIZE + - GEN6_3DSTATE_STENCIL_BUFFER__SIZE + - GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE + - GEN6_3DSTATE_VERTEX_BUFFERS__SIZE + - GEN6_3DSTATE_VERTEX_ELEMENTS__SIZE + - GEN6_3DSTATE_INDEX_BUFFER__SIZE + - GEN75_3DSTATE_VF__SIZE + - GEN6_3DSTATE_VS__SIZE + - GEN6_3DSTATE_GS__SIZE + - GEN6_3DSTATE_CLIP__SIZE + - GEN6_3DSTATE_SF__SIZE + - GEN6_3DSTATE_WM__SIZE + - GEN6_3DSTATE_SAMPLE_MASK__SIZE + - GEN7_3DSTATE_HS__SIZE + - GEN7_3DSTATE_TE__SIZE + - GEN7_3DSTATE_DS__SIZE + - GEN7_3DSTATE_STREAMOUT__SIZE + - GEN7_3DSTATE_SBE__SIZE + - GEN7_3DSTATE_PS__SIZE + - GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE + - GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE + - GEN6_3DSTATE_POLY_STIPPLE_PATTERN__SIZE + - GEN6_3DSTATE_LINE_STIPPLE__SIZE + - GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE + - GEN6_3DSTATE_MULTISAMPLE__SIZE + - GEN7_3DSTATE_SO_DECL_LIST__SIZE + - GEN6_3DPRIMITIVE__SIZE; + size += gen7->estimate_state_size(p->dev, + ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER, pcb_size); } return size; @@ -936,6 +749,7 @@ ilo_3d_pipeline_estimate_size_gen7(struct ilo_3d_pipeline *p, enum ilo_3d_pipeline_action action, const void *arg) { + const struct ilo_gpe_gen7 *gen7 = ilo_gpe_gen7_get(); int size; switch (action) { @@ -943,27 +757,15 @@ ilo_3d_pipeline_estimate_size_gen7(struct ilo_3d_pipeline *p, { const struct ilo_context *ilo = arg; - size = gen7_pipeline_max_command_size(p) + - gen6_pipeline_estimate_state_size(p, ilo); + size = gen7_pipeline_estimate_commands(p, gen7, ilo) + + gen7_pipeline_estimate_states(p, gen7, ilo); } break; case ILO_3D_PIPELINE_FLUSH: case ILO_3D_PIPELINE_WRITE_TIMESTAMP: case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT: - size = GEN6_PIPE_CONTROL__SIZE; - break; - case ILO_3D_PIPELINE_WRITE_STATISTICS: - { - const int num_regs = 10; - const int num_pads = 1; - - size = GEN6_PIPE_CONTROL__SIZE; - size += GEN6_MI_STORE_REGISTER_MEM__SIZE * 2 * num_regs; - size += GEN6_MI_STORE_DATA_IMM__SIZE * num_pads; - } - break; - case ILO_3D_PIPELINE_RECTLIST: - size = 64 + 256; /* states + commands */ + size = gen7->estimate_command_size(p->dev, + ILO_GPE_GEN7_PIPE_CONTROL, 1); break; default: assert(!"unknown 3D pipeline action"); @@ -977,11 +779,94 @@ ilo_3d_pipeline_estimate_size_gen7(struct ilo_3d_pipeline *p, void ilo_3d_pipeline_init_gen7(struct ilo_3d_pipeline *p) { + const struct ilo_gpe_gen7 *gen7 = ilo_gpe_gen7_get(); + p->estimate_size = ilo_3d_pipeline_estimate_size_gen7; p->emit_draw = ilo_3d_pipeline_emit_draw_gen7; p->emit_flush = ilo_3d_pipeline_emit_flush_gen6; p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6; p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6; - p->emit_write_statistics = ilo_3d_pipeline_emit_write_statistics_gen6; - p->emit_rectlist = ilo_3d_pipeline_emit_rectlist_gen7; + +#define GEN6_USE(p, name, from) \ + p->gen6_ ## name = from->emit_ ## name + GEN6_USE(p, STATE_BASE_ADDRESS, gen7); + GEN6_USE(p, STATE_SIP, gen7); + GEN6_USE(p, PIPELINE_SELECT, gen7); + GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen7); + GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen7); + GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen7); + GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen7); + GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen7); + GEN6_USE(p, 3DSTATE_VS, gen7); + GEN6_USE(p, 3DSTATE_CLIP, gen7); + GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen7); + GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen7); + GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen7); + GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen7); + GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen7); + GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen7); + GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen7); + GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen7); + GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen7); + GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen7); + GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen7); + GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen7); + GEN6_USE(p, PIPE_CONTROL, gen7); + GEN6_USE(p, 3DPRIMITIVE, gen7); + GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen7); + GEN6_USE(p, CC_VIEWPORT, gen7); + GEN6_USE(p, COLOR_CALC_STATE, gen7); + GEN6_USE(p, BLEND_STATE, gen7); + GEN6_USE(p, DEPTH_STENCIL_STATE, gen7); + GEN6_USE(p, SCISSOR_RECT, gen7); + GEN6_USE(p, BINDING_TABLE_STATE, gen7); + GEN6_USE(p, SURFACE_STATE, gen7); + GEN6_USE(p, SAMPLER_STATE, gen7); + GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen7); + GEN6_USE(p, push_constant_buffer, gen7); +#undef GEN6_USE + +#define GEN7_USE(p, name, from) \ + p->gen7_ ## name = from->emit_ ## name + GEN7_USE(p, 3DSTATE_DEPTH_BUFFER, gen7); + GEN7_USE(p, 3DSTATE_CC_STATE_POINTERS, gen7); + GEN7_USE(p, 3DSTATE_GS, gen7); + GEN7_USE(p, 3DSTATE_SF, gen7); + GEN7_USE(p, 3DSTATE_WM, gen7); + GEN7_USE(p, 3DSTATE_SAMPLE_MASK, gen7); + GEN7_USE(p, 3DSTATE_CONSTANT_HS, gen7); + GEN7_USE(p, 3DSTATE_CONSTANT_DS, gen7); + GEN7_USE(p, 3DSTATE_HS, gen7); + GEN7_USE(p, 3DSTATE_TE, gen7); + GEN7_USE(p, 3DSTATE_DS, gen7); + GEN7_USE(p, 3DSTATE_STREAMOUT, gen7); + GEN7_USE(p, 3DSTATE_SBE, gen7); + GEN7_USE(p, 3DSTATE_PS, gen7); + GEN7_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, gen7); + GEN7_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS_CC, gen7); + GEN7_USE(p, 3DSTATE_BLEND_STATE_POINTERS, gen7); + GEN7_USE(p, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS, gen7); + GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_VS, gen7); + GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_HS, gen7); + GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_DS, gen7); + GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_GS, gen7); + GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_PS, gen7); + GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_VS, gen7); + GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_HS, gen7); + GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_DS, gen7); + GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_GS, gen7); + GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_PS, gen7); + GEN7_USE(p, 3DSTATE_URB_VS, gen7); + GEN7_USE(p, 3DSTATE_URB_HS, gen7); + GEN7_USE(p, 3DSTATE_URB_DS, gen7); + GEN7_USE(p, 3DSTATE_URB_GS, gen7); + GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_VS, gen7); + GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_HS, gen7); + GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_DS, gen7); + GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_GS, gen7); + GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_PS, gen7); + GEN7_USE(p, 3DSTATE_SO_DECL_LIST, gen7); + GEN7_USE(p, 3DSTATE_SO_BUFFER, gen7); + GEN7_USE(p, SF_CLIP_VIEWPORT, gen7); +#undef GEN7_USE } diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h index 684626d88..73a94304b 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe.h @@ -155,8 +155,7 @@ struct ilo_dsa_state { /* DEPTH_STENCIL_STATE */ uint32_t payload[3]; - uint32_t dw_alpha; - ubyte alpha_ref; + struct pipe_alpha_state alpha; }; struct ilo_blend_cso { @@ -256,11 +255,8 @@ struct ilo_surface_cso { struct ilo_fb_state { struct pipe_framebuffer_state state; - struct ilo_view_surface null_rt; struct ilo_zs_surface null_zs; - unsigned num_samples; - bool offset_to_layers; }; struct ilo_global_binding { @@ -383,7 +379,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, unsigned num_levels, unsigned first_layer, unsigned num_layers, - bool is_rt, bool offset_to_layer, + bool is_rt, bool render_cache_rw, struct ilo_view_surface *surf); void @@ -409,7 +405,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, unsigned num_levels, unsigned first_layer, unsigned num_layers, - bool is_rt, bool offset_to_layer, + bool is_rt, bool render_cache_rw, struct ilo_view_surface *surf); static inline void @@ -455,27 +451,28 @@ ilo_gpe_init_view_surface_for_texture(const struct ilo_dev_info *dev, unsigned num_levels, unsigned first_layer, unsigned num_layers, - bool is_rt, bool offset_to_layer, + bool is_rt, bool render_cache_rw, struct ilo_view_surface *surf) { if (dev->gen >= ILO_GEN(7)) { ilo_gpe_init_view_surface_for_texture_gen7(dev, tex, format, first_level, num_levels, first_layer, num_layers, - is_rt, offset_to_layer, surf); + is_rt, render_cache_rw, surf); } else { ilo_gpe_init_view_surface_for_texture_gen6(dev, tex, format, first_level, num_levels, first_layer, num_layers, - is_rt, offset_to_layer, surf); + is_rt, render_cache_rw, surf); } } void ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, const struct ilo_texture *tex, - enum pipe_format format, unsigned level, + enum pipe_format format, + unsigned level, unsigned first_layer, unsigned num_layers, - bool offset_to_layer, struct ilo_zs_surface *zs); + struct ilo_zs_surface *zs); void ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, @@ -528,9 +525,4 @@ ilo_gpe_init_fs_cso(const struct ilo_dev_info *dev, } } -void -ilo_gpe_set_fb(const struct ilo_dev_info *dev, - const struct pipe_framebuffer_state *state, - struct ilo_fb_state *fb); - #endif /* ILO_GPE_H */ diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c index 11972b968..442bef189 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -25,12 +25,13 @@ * Chia-I Wu <olv@lunarg.com> */ -#include "genhw/genhw.h" #include "util/u_dual_blend.h" -#include "util/u_framebuffer.h" #include "util/u_half.h" +#include "brw_defines.h" +#include "intel_reg.h" #include "ilo_context.h" +#include "ilo_cp.h" #include "ilo_format.h" #include "ilo_resource.h" #include "ilo_shader.h" @@ -38,31 +39,128 @@ #include "ilo_gpe_gen6.h" /** + * Translate winsys tiling to hardware tiling. + */ +int +ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling) +{ + switch (tiling) { + case INTEL_TILING_NONE: + return 0; + case INTEL_TILING_X: + return BRW_SURFACE_TILED; + case INTEL_TILING_Y: + return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; + default: + assert(!"unknown tiling"); + return 0; + } +} + +/** + * Translate a pipe primitive type to the matching hardware primitive type. + */ +int +ilo_gpe_gen6_translate_pipe_prim(unsigned prim) +{ + static const int prim_mapping[PIPE_PRIM_MAX] = { + [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST, + [PIPE_PRIM_LINES] = _3DPRIM_LINELIST, + [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP, + [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP, + [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST, + [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP, + [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON, + [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, + }; + + assert(prim_mapping[prim]); + + return prim_mapping[prim]; +} + +/** + * Translate a pipe texture target to the matching hardware surface type. + */ +int +ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_BUFFER: + return BRW_SURFACE_BUFFER; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return BRW_SURFACE_1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D_ARRAY: + return BRW_SURFACE_2D; + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return BRW_SURFACE_CUBE; + default: + assert(!"unknown texture target"); + return BRW_SURFACE_BUFFER; + } +} + +/** + * Translate a depth/stencil pipe format to the matching hardware + * format. Return -1 on errors. + */ +static int +gen6_translate_depth_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return BRW_DEPTHFORMAT_D16_UNORM; + case PIPE_FORMAT_Z32_FLOAT: + return BRW_DEPTHFORMAT_D32_FLOAT; + case PIPE_FORMAT_Z24X8_UNORM: + return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; + default: + return -1; + } +} + +/** * Translate a pipe logicop to the matching hardware logicop. */ static int gen6_translate_pipe_logicop(unsigned logicop) { switch (logicop) { - case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR; - case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR; - case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED; - case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED; - case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE; - case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT; - case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR; - case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND; - case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND; - case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV; - case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP; - case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED; - case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY; - case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE; - case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR; - case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET; + case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR; + case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR; + case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED; + case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED; + case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE; + case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT; + case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR; + case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND; + case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND; + case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV; + case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP; + case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED; + case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY; + case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE; + case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR; + case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET; default: assert(!"unknown logicop function"); - return GEN6_LOGICOP_CLEAR; + return BRW_LOGICOPFUNCTION_CLEAR; } } @@ -73,14 +171,14 @@ static int gen6_translate_pipe_blend(unsigned blend) { switch (blend) { - case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD; - case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT; - case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT; - case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN; - case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX; + case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD; + case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX; default: assert(!"unknown blend function"); - return GEN6_BLENDFUNCTION_ADD; + return BRW_BLENDFUNCTION_ADD; }; } @@ -91,28 +189,28 @@ static int gen6_translate_pipe_blendfactor(unsigned blendfactor) { switch (blendfactor) { - case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA; - case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA; - case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA; + case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA; + case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA; default: assert(!"unknown blend factor"); - return GEN6_BLENDFACTOR_ONE; + return BRW_BLENDFACTOR_ONE; }; } @@ -123,17 +221,17 @@ static int gen6_translate_pipe_stencil_op(unsigned stencil_op) { switch (stencil_op) { - case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP; - case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO; - case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE; - case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT; - case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT; - case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR; - case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR; - case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT; + case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT; default: assert(!"unknown stencil op"); - return GEN6_STENCILOP_KEEP; + return BRW_STENCILOP_KEEP; } } @@ -144,12 +242,12 @@ static int gen6_translate_tex_mipfilter(unsigned filter) { switch (filter) { - case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST; - case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR; - case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR; + case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE; default: assert(!"unknown mipfilter"); - return GEN6_MIPFILTER_NONE; + return BRW_MIPFILTER_NONE; } } @@ -160,11 +258,11 @@ static int gen6_translate_tex_filter(unsigned filter) { switch (filter) { - case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST; - case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR; + case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR; default: assert(!"unknown sampler filter"); - return GEN6_MAPFILTER_NEAREST; + return BRW_MAPFILTER_NEAREST; } } @@ -182,17 +280,39 @@ gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge) } switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR; + case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR; case PIPE_TEX_WRAP_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(!"unknown sampler wrap mode"); - return GEN6_TEXCOORDMODE_WRAP; + return BRW_TEXCOORDMODE_WRAP; + } +} + +/** + * Translate a pipe DSA test function to the matching hardware compare + * function. + */ +static int +gen6_translate_dsa_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS; + default: + assert(!"unknown depth/stencil/alpha test function"); + return BRW_COMPAREFUNCTION_NEVER; } } @@ -207,84 +327,587 @@ gen6_translate_shadow_func(unsigned func) * For PIPE_FUNC_x, the reference value is on the left-hand side of the * comparison, and 1.0 is returned when the comparison is true. * - * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of + * For BRW_PREFILTER_x, the reference value is on the right-hand side of * the comparison, and 0.0 is returned when the comparison is true. */ switch (func) { - case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS; - case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS; - case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS; + case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL; + case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL; + case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS; + case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL; + case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL; + case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER; + case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER; default: assert(!"unknown shadow compare function"); - return GEN6_COMPAREFUNCTION_NEVER; + return BRW_PREFILTER_NEVER; } } /** - * Translate a pipe DSA test function to the matching hardware compare - * function. + * Translate an index size to the matching hardware index format. */ static int -gen6_translate_dsa_func(unsigned func) +gen6_translate_index_size(int size) { - switch (func) { - case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER; - case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS; - case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS; + switch (size) { + case 4: return BRW_INDEX_DWORD; + case 2: return BRW_INDEX_WORD; + case 1: return BRW_INDEX_BYTE; default: - assert(!"unknown depth/stencil/alpha test function"); - return GEN6_COMPAREFUNCTION_NEVER; + assert(!"unknown index size"); + return BRW_INDEX_BYTE; } } static void +gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev, + struct intel_bo *general_state_bo, + struct intel_bo *surface_state_bo, + struct intel_bo *dynamic_state_bo, + struct intel_bo *indirect_object_bo, + struct intel_bo *instruction_bo, + uint32_t general_state_size, + uint32_t dynamic_state_size, + uint32_t indirect_object_size, + uint32_t instruction_size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01); + const uint8_t cmd_len = 10; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* 4K-page aligned */ + assert(((general_state_size | dynamic_state_size | + indirect_object_size | instruction_size) & 0xfff) == 0); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + + ilo_cp_write_bo(cp, 1, general_state_bo, + INTEL_DOMAIN_RENDER, + 0); + ilo_cp_write_bo(cp, 1, surface_state_bo, + INTEL_DOMAIN_SAMPLER, + 0); + ilo_cp_write_bo(cp, 1, dynamic_state_bo, + INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, + 0); + ilo_cp_write_bo(cp, 1, indirect_object_bo, + 0, + 0); + ilo_cp_write_bo(cp, 1, instruction_bo, + INTEL_DOMAIN_INSTRUCTION, + 0); + + if (general_state_size) { + ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo, + INTEL_DOMAIN_RENDER, + 0); + } + else { + /* skip range check */ + ilo_cp_write(cp, 1); + } + + if (dynamic_state_size) { + ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo, + INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, + 0); + } + else { + /* skip range check */ + ilo_cp_write(cp, 0xfffff000 + 1); + } + + if (indirect_object_size) { + ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo, + 0, + 0); + } + else { + /* skip range check */ + ilo_cp_write(cp, 0xfffff000 + 1); + } + + if (instruction_size) { + ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo, + INTEL_DOMAIN_INSTRUCTION, + 0); + } + else { + /* skip range check */ + ilo_cp_write(cp, 1); + } + + ilo_cp_end(cp); +} + +static void +gen6_emit_STATE_SIP(const struct ilo_dev_info *dev, + uint32_t sip, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len | (cmd_len - 2)); + ilo_cp_write(cp, cmd); + ilo_cp_write(cp, sip); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev, + bool enable, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b); + const uint8_t cmd_len = 1; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | enable); + ilo_cp_end(cp); +} + +static void +gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev, + int pipeline, + struct ilo_cp *cp) +{ + const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04); + const uint8_t cmd_len = 1; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* 3D or media */ + assert(pipeline == 0x0 || pipeline == 0x1); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | pipeline); + ilo_cp_end(cp); +} + +static void +gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev, + int max_threads, int num_urb_entries, + int urb_entry_size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00); + const uint8_t cmd_len = 8; + uint32_t dw2, dw4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + dw2 = (max_threads - 1) << 16 | + num_urb_entries << 8 | + 1 << 7 | /* Reset Gateway Timer */ + 1 << 6; /* Bypass Gateway Control */ + + dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */ + 480; /* CURBE Allocation Size */ + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* MBZ */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, 0); /* scoreboard */ + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static void +gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev, + uint32_t buf, int size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + assert(buf % 32 == 0); + /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */ + size = align(size, 32); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); /* MBZ */ + ilo_cp_write(cp, size); + ilo_cp_write(cp, buf); + ilo_cp_end(cp); +} + +static void +gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev, + uint32_t offset, int num_ids, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + assert(offset % 32 == 0); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); /* MBZ */ + /* every ID has 8 DWords */ + ilo_cp_write(cp, num_ids * 8 * 4); + ilo_cp_write(cp, offset); + ilo_cp_end(cp); +} + +static void +gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev, + int id, int byte, int thread_count, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03); + const uint8_t cmd_len = 2; + uint32_t dw1; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + dw1 = id << 16 | + byte << 8 | + thread_count; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_end(cp); +} + +static void +gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev, + int thread_count_water_mark, + int barrier_mask, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04); + const uint8_t cmd_len = 2; + uint32_t dw1; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + dw1 = thread_count_water_mark << 16 | + barrier_mask; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_end(cp); +} + +static void +gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev, + struct ilo_cp *cp) +{ + assert(!"MEDIA_OBJECT_WALKER unsupported"); +} + +static void +gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev, + uint32_t vs_binding_table, + uint32_t gs_binding_table, + uint32_t ps_binding_table, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + GEN6_BINDING_TABLE_MODIFY_VS | + GEN6_BINDING_TABLE_MODIFY_GS | + GEN6_BINDING_TABLE_MODIFY_PS); + ilo_cp_write(cp, vs_binding_table); + ilo_cp_write(cp, gs_binding_table); + ilo_cp_write(cp, ps_binding_table); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t vs_sampler_state, + uint32_t gs_sampler_state, + uint32_t ps_sampler_state, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + VS_SAMPLER_STATE_CHANGE | + GS_SAMPLER_STATE_CHANGE | + PS_SAMPLER_STATE_CHANGE); + ilo_cp_write(cp, vs_sampler_state); + ilo_cp_write(cp, gs_sampler_state); + ilo_cp_write(cp, ps_sampler_state); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev, + int vs_total_size, int gs_total_size, + int vs_entry_size, int gs_entry_size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05); + const uint8_t cmd_len = 3; + const int row_size = 128; /* 1024 bits */ + int vs_alloc_size, gs_alloc_size; + int vs_num_entries, gs_num_entries; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + /* in 1024-bit URB rows */ + vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; + gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; + + /* the valid range is [1, 5] */ + if (!vs_alloc_size) + vs_alloc_size = 1; + if (!gs_alloc_size) + gs_alloc_size = 1; + assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); + + /* the valid range is [24, 256] in multiples of 4 */ + vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; + if (vs_num_entries > 256) + vs_num_entries = 256; + assert(vs_num_entries >= 24); + + /* the valid range is [0, 256] in multiples of 4 */ + gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; + if (gs_num_entries > 256) + gs_num_entries = 256; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT | + vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT); + ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT | + (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, + const struct pipe_vertex_buffer *vbuffers, + uint64_t vbuffer_mask, + const struct ilo_ve_state *ve, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08); + uint8_t cmd_len; + unsigned hw_idx; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 82: + * + * "From 1 to 33 VBs can be specified..." + */ + assert(vbuffer_mask <= (1UL << 33)); + + if (!vbuffer_mask) + return; + + cmd_len = 1; + + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + const unsigned pipe_idx = ve->vb_mapping[hw_idx]; + + if (vbuffer_mask & (1 << pipe_idx)) + cmd_len += 4; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + const unsigned instance_divisor = ve->instance_divisors[hw_idx]; + const unsigned pipe_idx = ve->vb_mapping[hw_idx]; + const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx]; + uint32_t dw; + + if (!(vbuffer_mask & (1 << pipe_idx))) + continue; + + dw = hw_idx << GEN6_VB0_INDEX_SHIFT; + + if (instance_divisor) + dw |= GEN6_VB0_ACCESS_INSTANCEDATA; + else + dw |= GEN6_VB0_ACCESS_VERTEXDATA; + + if (dev->gen >= ILO_GEN(7)) + dw |= GEN7_VB0_ADDRESS_MODIFYENABLE; + + /* use null vb if there is no buffer or the stride is out of range */ + if (vb->buffer && vb->stride <= 2048) { + const struct ilo_buffer *buf = ilo_buffer(vb->buffer); + const uint32_t start_offset = vb->buffer_offset; + /* + * As noted in ilo_translate_format(), we treat some 3-component + * formats as 4-component formats to work around hardware + * limitations. Imagine the case where the vertex buffer holds a + * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. + * The hardware would not be able to fetch it because the vertex + * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex + * and that takes at least 8 bytes. + * + * For the workaround to work, we query the physical size, which is + * page aligned, to calculate end_offset so that the last vertex has + * a better chance to be fetched. + */ + const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1; + + dw |= vb->stride << BRW_VB0_PITCH_SHIFT; + + ilo_cp_write(cp, dw); + ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); + ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); + ilo_cp_write(cp, instance_divisor); + } + else { + dw |= 1 << 13; + + ilo_cp_write(cp, dw); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, instance_divisor); + } + } + + ilo_cp_end(cp); +} + +static void +ve_set_cso_edgeflag(const struct ilo_dev_info *dev, + struct ilo_ve_cso *cso) +{ + int format; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "- This bit (Edge Flag Enable) must only be ENABLED on the last + * valid VERTEX_ELEMENT structure. + * + * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, + * and Component 1-3 Control must be set to VFCOMP_NOSTORE. + * + * - The Source Element Format must be set to the UINT format. + * + * - [DevSNB]: Edge Flags are not supported for QUADLIST + * primitives. Software may elect to convert QUADLIST primitives + * to some set of corresponding edge-flag-supported primitive + * types (e.g., POLYGONs) prior to submission to the 3D pipeline." + */ + + cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE; + cso->payload[1] = + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT; + + /* + * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via + * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined + * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. + * + * Since all the hardware cares about is whether the flags are zero or not, + * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case. + */ + format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff; + if (format == BRW_SURFACEFORMAT_R32_FLOAT) { + STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT == + BRW_SURFACEFORMAT_R32_FLOAT - 1); + + cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT); + } + else { + assert(format == BRW_SURFACEFORMAT_R8_UINT); + } +} + +static void +ve_init_cso_with_components(const struct ilo_dev_info *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso) +{ + ILO_GPE_VALID_GEN(dev, 6, 7); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + cso->payload[0] = GEN6_VE0_VALID; + cso->payload[1] = + comp0 << BRW_VE1_COMPONENT_0_SHIFT | + comp1 << BRW_VE1_COMPONENT_1_SHIFT | + comp2 << BRW_VE1_COMPONENT_2_SHIFT | + comp3 << BRW_VE1_COMPONENT_3_SHIFT; +} + +static void ve_init_cso(const struct ilo_dev_info *dev, const struct pipe_vertex_element *state, unsigned vb_index, struct ilo_ve_cso *cso) { int comp[4] = { - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, + BRW_VE1_COMPONENT_STORE_SRC, + BRW_VE1_COMPONENT_STORE_SRC, + BRW_VE1_COMPONENT_STORE_SRC, + BRW_VE1_COMPONENT_STORE_SRC, }; int format; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); switch (util_format_get_nr_components(state->src_format)) { - case 1: comp[1] = GEN6_VFCOMP_STORE_0; - case 2: comp[2] = GEN6_VFCOMP_STORE_0; + case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0; + case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0; case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ? - GEN6_VFCOMP_STORE_1_INT : - GEN6_VFCOMP_STORE_1_FP; + BRW_VE1_COMPONENT_STORE_1_INT : + BRW_VE1_COMPONENT_STORE_1_FLT; } format = ilo_translate_vertex_format(state->src_format); STATIC_ASSERT(Elements(cso->payload) >= 2); cso->payload[0] = - vb_index << GEN6_VE_STATE_DW0_VB_INDEX__SHIFT | - GEN6_VE_STATE_DW0_VALID | - format << GEN6_VE_STATE_DW0_FORMAT__SHIFT | - state->src_offset << GEN6_VE_STATE_DW0_VB_OFFSET__SHIFT; + vb_index << GEN6_VE0_INDEX_SHIFT | + GEN6_VE0_VALID | + format << BRW_VE0_FORMAT_SHIFT | + state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT; cso->payload[1] = - comp[0] << GEN6_VE_STATE_DW1_COMP0__SHIFT | - comp[1] << GEN6_VE_STATE_DW1_COMP1__SHIFT | - comp[2] << GEN6_VE_STATE_DW1_COMP2__SHIFT | - comp[3] << GEN6_VE_STATE_DW1_COMP3__SHIFT; + comp[0] << BRW_VE1_COMPONENT_0_SHIFT | + comp[1] << BRW_VE1_COMPONENT_1_SHIFT | + comp[2] << BRW_VE1_COMPONENT_2_SHIFT | + comp[3] << BRW_VE1_COMPONENT_3_SHIFT; } void @@ -295,7 +918,7 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev, { unsigned i; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); ve->count = num_states; ve->vb_count = 0; @@ -327,6 +950,179 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev, } } +static void +gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev, + const struct ilo_ve_state *ve, + bool last_velement_edgeflag, + bool prepend_generated_ids, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09); + uint8_t cmd_len; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 93: + * + * "Up to 34 (DevSNB+) vertex elements are supported." + */ + assert(ve->count + prepend_generated_ids <= 34); + + if (!ve->count && !prepend_generated_ids) { + struct ilo_ve_cso dummy; + + ve_init_cso_with_components(dev, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_1_FLT, + &dummy); + + cmd_len = 3; + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write_multi(cp, dummy.payload, 2); + ilo_cp_end(cp); + + return; + } + + cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + + if (prepend_generated_ids) { + struct ilo_ve_cso gen_ids; + + ve_init_cso_with_components(dev, + BRW_VE1_COMPONENT_STORE_VID, + BRW_VE1_COMPONENT_STORE_IID, + BRW_VE1_COMPONENT_NOSTORE, + BRW_VE1_COMPONENT_NOSTORE, + &gen_ids); + + ilo_cp_write_multi(cp, gen_ids.payload, 2); + } + + if (last_velement_edgeflag) { + struct ilo_ve_cso edgeflag; + + for (i = 0; i < ve->count - 1; i++) + ilo_cp_write_multi(cp, ve->cso[i].payload, 2); + + edgeflag = ve->cso[i]; + ve_set_cso_edgeflag(dev, &edgeflag); + ilo_cp_write_multi(cp, edgeflag.payload, 2); + } + else { + for (i = 0; i < ve->count; i++) + ilo_cp_write_multi(cp, ve->cso[i].payload, 2); + } + + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev, + const struct ilo_ib_state *ib, + bool enable_cut_index, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a); + const uint8_t cmd_len = 3; + struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); + uint32_t start_offset, end_offset; + int format; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (!buf) + return; + + format = gen6_translate_index_size(ib->hw_index_size); + + /* + * set start_offset to 0 here and adjust pipe_draw_info::start with + * ib->draw_start_offset in 3DPRIMITIVE + */ + start_offset = 0; + end_offset = buf->bo_size; + + /* end_offset must also be aligned and is inclusive */ + end_offset -= (end_offset % ib->hw_index_size); + end_offset--; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) | + format << 8); + ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); + ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t clip_viewport, + uint32_t sf_viewport, + uint32_t cc_viewport, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + GEN6_CLIP_VIEWPORT_MODIFY | + GEN6_SF_VIEWPORT_MODIFY | + GEN6_CC_VIEWPORT_MODIFY); + ilo_cp_write(cp, clip_viewport); + ilo_cp_write(cp, sf_viewport); + ilo_cp_write(cp, cc_viewport); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t blend_state, + uint32_t depth_stencil_state, + uint32_t color_calc_state, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, blend_state | 1); + ilo_cp_write(cp, depth_stencil_state | 1); + ilo_cp_write(cp, color_calc_state | 1); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t scissor_rect, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, scissor_rect); + ilo_cp_end(cp); +} + void ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, const struct ilo_shader_state *vs, @@ -335,7 +1131,7 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, int start_grf, vue_read_len, max_threads; uint32_t dw2, dw4, dw5; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); @@ -377,26 +1173,26 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, break; case ILO_GEN(7.5): /* see brwCreateContext() */ - max_threads = (dev->gt >= 2) ? 280 : 70; + max_threads = (dev->gt == 2) ? 280 : 70; break; default: max_threads = 1; break; } - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT; - dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT | - vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | - 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT; + dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT | + vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT | + 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT; - dw5 = GEN6_VS_DW5_STATISTICS | - GEN6_VS_DW5_VS_ENABLE; + dw5 = GEN6_VS_STATISTICS_ENABLE | + GEN6_VS_ENABLE; if (dev->gen >= ILO_GEN(7.5)) - dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT; + dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT; else - dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT; + dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT; STATIC_ASSERT(Elements(cso->payload) >= 3); cso->payload[0] = dw2; @@ -404,6 +1200,48 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, cso->payload[2] = dw5; } +static void +gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *vs, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10); + const uint8_t cmd_len = 6; + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (!vs) { + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + return; + } + + cso = ilo_shader_get_kernel_cso(vs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs)); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_end(cp); +} + void ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, const struct ilo_shader_state *gs, @@ -466,16 +1304,16 @@ ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, max_threads = 21; } - dw2 = GEN6_THREADDISP_SPF; + dw2 = GEN6_GS_SPF_MODE; - dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | - 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | - start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT; + dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT | + 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | + start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; - dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT | - GEN6_GS_DW5_STATISTICS | - GEN6_GS_DW5_SO_STATISTICS | - GEN6_GS_DW5_RENDER_ENABLE; + dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_SO_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE; /* * we cannot make use of GEN6_GS_REORDER because it will reorder @@ -483,19 +1321,19 @@ ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices * (2N+2, 2N+1, 2N+3)). */ - dw6 = GEN6_GS_DW6_GS_ENABLE; + dw6 = GEN6_GS_ENABLE; if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY)) - dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY; + dw6 |= GEN6_GS_DISCARD_ADJACENCY; if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) { const uint32_t svbi_post_inc = ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC); - dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE; + dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE; if (svbi_post_inc) { - dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE | - svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT; + dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE | + svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT; } } @@ -506,6 +1344,75 @@ ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, cso->payload[3] = dw6; } +static void +gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + const struct ilo_shader_state *vs, + int verts_per_prim, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); + const uint8_t cmd_len = 7; + uint32_t dw1, dw2, dw4, dw5, dw6; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + if (gs) { + const struct ilo_shader_cso *cso; + + dw1 = ilo_shader_get_kernel_offset(gs); + + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + dw6 = cso->payload[3]; + } + else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { + struct ilo_shader_cso cso; + enum ilo_kernel_param param; + + switch (verts_per_prim) { + case 1: + param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; + break; + case 2: + param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; + break; + default: + param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; + break; + } + + dw1 = ilo_shader_get_kernel_offset(vs) + + ilo_shader_get_kernel_param(vs, param); + + /* cannot use VS's CSO */ + ilo_gpe_init_gs_cso_gen6(dev, vs, &cso); + dw2 = cso.payload[0]; + dw4 = cso.payload[1]; + dw5 = cso.payload[2]; + dw6 = cso.payload[3]; + } + else { + dw1 = 0; + dw2 = 0; + dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT; + dw5 = GEN6_GS_STATISTICS_ENABLE; + dw6 = 0; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_write(cp, dw6); + ilo_cp_end(cp); +} + void ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, const struct pipe_rasterizer_state *state, @@ -513,9 +1420,9 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, { uint32_t dw1, dw2, dw3; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); - dw1 = GEN6_CLIP_DW1_STATISTICS; + dw1 = GEN6_CLIP_STATISTICS_ENABLE; if (dev->gen >= ILO_GEN(7)) { /* @@ -528,53 +1435,53 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, * What does this mean? */ dw1 |= 0 << 19 | - GEN7_CLIP_DW1_EARLY_CULL_ENABLE; + GEN7_CLIP_EARLY_CULL; if (state->front_ccw) - dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW; + dw1 |= GEN7_CLIP_WINDING_CCW; switch (state->cull_face) { case PIPE_FACE_NONE: - dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE; + dw1 |= GEN7_CLIP_CULLMODE_NONE; break; case PIPE_FACE_FRONT: - dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT; + dw1 |= GEN7_CLIP_CULLMODE_FRONT; break; case PIPE_FACE_BACK: - dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK; + dw1 |= GEN7_CLIP_CULLMODE_BACK; break; case PIPE_FACE_FRONT_AND_BACK: - dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH; + dw1 |= GEN7_CLIP_CULLMODE_BOTH; break; } } - dw2 = GEN6_CLIP_DW2_CLIP_ENABLE | - GEN6_CLIP_DW2_XY_TEST_ENABLE | - state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT | - GEN6_CLIP_DW2_CLIPMODE_NORMAL; + dw2 = GEN6_CLIP_ENABLE | + GEN6_CLIP_XY_TEST | + state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | + GEN6_CLIP_MODE_NORMAL; if (state->clip_halfz) - dw2 |= GEN6_CLIP_DW2_APIMODE_D3D; + dw2 |= GEN6_CLIP_API_D3D; else - dw2 |= GEN6_CLIP_DW2_APIMODE_OGL; + dw2 |= GEN6_CLIP_API_OGL; if (state->depth_clip) - dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE; + dw2 |= GEN6_CLIP_Z_TEST; if (state->flatshade_first) { - dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | - 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | - 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; + dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT | + 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT | + 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT; } else { - dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | - 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | - 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; + dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT | + 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT | + 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT; } - dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT | - 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT; + dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | + 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT; clip->payload[0] = dw1; clip->payload[1] = dw2; @@ -594,6 +1501,53 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, clip->can_enable_guardband = false; } +static void +gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + bool enable_guardband, + int num_viewports, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12); + const uint8_t cmd_len = 4; + uint32_t dw1, dw2, dw3; + + if (rasterizer) { + int interps; + + dw1 = rasterizer->clip.payload[0]; + dw2 = rasterizer->clip.payload[1]; + dw3 = rasterizer->clip.payload[2]; + + if (enable_guardband && rasterizer->clip.can_enable_guardband) + dw2 |= GEN6_CLIP_GB_TEST; + + interps = (fs) ? ilo_shader_get_kernel_param(fs, + ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; + + if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC | + 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC | + 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC)) + dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; + + dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX | + (num_viewports - 1); + } + else { + dw1 = 0; + dw2 = 0; + dw3 = 0; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, dw3); + ilo_cp_end(cp); +} + void ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, const struct pipe_rasterizer_state *state, @@ -603,7 +1557,7 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, int line_width, point_width; uint32_t dw1, dw2, dw3; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); /* * Scale the constant term. The minimum representable value used by the HW @@ -622,8 +1576,8 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, * should be cleared if clipping is disabled or Statistics Enable in * CLIP_STATE is clear." */ - dw1 = GEN7_SF_DW1_STATISTICS | - GEN7_SF_DW1_VIEWPORT_ENABLE; + dw1 = GEN6_SF_STATISTICS_ENABLE | + GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; /* XXX GEN6 path seems to work fine for GEN7 */ if (false && dev->gen >= ILO_GEN(7)) { @@ -638,11 +1592,11 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, */ if (state->offset_tri || state->offset_line || state->offset_point) { /* XXX need to scale offset_const according to the depth format */ - dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET; + dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS; - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID | - GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME | - GEN7_SF_DW1_DEPTH_OFFSET_POINT; + dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID | + GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME | + GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; } else { offset_const = 0.0f; @@ -652,39 +1606,39 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, } else { if (state->offset_tri) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID; + dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; if (state->offset_line) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME; + dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; if (state->offset_point) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT; + dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; } switch (state->fill_front) { case PIPE_POLYGON_MODE_FILL: - dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID; + dw1 |= GEN6_SF_FRONT_SOLID; break; case PIPE_POLYGON_MODE_LINE: - dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME; + dw1 |= GEN6_SF_FRONT_WIREFRAME; break; case PIPE_POLYGON_MODE_POINT: - dw1 |= GEN7_SF_DW1_FRONTFACE_POINT; + dw1 |= GEN6_SF_FRONT_POINT; break; } switch (state->fill_back) { case PIPE_POLYGON_MODE_FILL: - dw1 |= GEN7_SF_DW1_BACKFACE_SOLID; + dw1 |= GEN6_SF_BACK_SOLID; break; case PIPE_POLYGON_MODE_LINE: - dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME; + dw1 |= GEN6_SF_BACK_WIREFRAME; break; case PIPE_POLYGON_MODE_POINT: - dw1 |= GEN7_SF_DW1_BACKFACE_POINT; + dw1 |= GEN6_SF_BACK_POINT; break; } if (state->front_ccw) - dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW; + dw1 |= GEN6_SF_WINDING_CCW; dw2 = 0; @@ -702,22 +1656,22 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, * * TODO We do not check those yet. */ - dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE | - GEN7_SF_DW2_AA_LINE_CAP_1_0; + dw2 |= GEN6_SF_LINE_AA_ENABLE | + GEN6_SF_LINE_END_CAP_WIDTH_1_0; } switch (state->cull_face) { case PIPE_FACE_NONE: - dw2 |= GEN7_SF_DW2_CULLMODE_NONE; + dw2 |= GEN6_SF_CULL_NONE; break; case PIPE_FACE_FRONT: - dw2 |= GEN7_SF_DW2_CULLMODE_FRONT; + dw2 |= GEN6_SF_CULL_FRONT; break; case PIPE_FACE_BACK: - dw2 |= GEN7_SF_DW2_CULLMODE_BACK; + dw2 |= GEN6_SF_CULL_BACK; break; case PIPE_FACE_FRONT_AND_BACK: - dw2 |= GEN7_SF_DW2_CULLMODE_BOTH; + dw2 |= GEN6_SF_CULL_BOTH; break; } @@ -738,33 +1692,30 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, line_width = 0; } - dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; - - if (dev->gen >= ILO_GEN(7.5) && state->line_stipple_enable) - dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE; + dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT; if (state->scissor) - dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE; + dw2 |= GEN6_SF_SCISSOR_ENABLE; - dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | - GEN7_SF_DW3_SUBPIXEL_8BITS; + dw3 = GEN6_SF_LINE_AA_MODE_TRUE | + GEN6_SF_VERTEX_SUBPIXEL_8BITS; if (state->line_last_pixel) dw3 |= 1 << 31; if (state->flatshade_first) { - dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT | + 0 << GEN6_SF_LINE_PROVOKE_SHIFT | + 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT; } else { - dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT | + 1 << GEN6_SF_LINE_PROVOKE_SHIFT | + 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT; } if (!state->point_size_per_vertex) - dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; + dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH; /* in U8.3 */ point_width = (int) (state->point_size * 8.0f + 0.5f); @@ -781,7 +1732,7 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, sf->payload[5] = fui(offset_clamp); if (state->multisample) { - sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN; + sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN; /* * From the Sandy Bridge PRM, volume 2 part 1, page 251: @@ -793,7 +1744,7 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, if (!line_width) { line_width = 128; /* 1.0f */ - sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; + sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT; } } else { @@ -801,6 +1752,172 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, } } +/** + * Fill in DW2 to DW7 of 3DSTATE_SF. + */ +void +ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + int num_samples, + enum pipe_format depth_format, + uint32_t *payload, unsigned payload_len) +{ + const struct ilo_rasterizer_sf *sf = &rasterizer->sf; + + assert(payload_len == Elements(sf->payload)); + + if (sf) { + memcpy(payload, sf->payload, sizeof(sf->payload)); + + if (num_samples > 1) + payload[1] |= sf->dw_msaa; + + if (dev->gen >= ILO_GEN(7)) { + int format; + + /* separate stencil */ + switch (depth_format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + depth_format = PIPE_FORMAT_Z24X8_UNORM; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + depth_format = PIPE_FORMAT_Z32_FLOAT;; + break; + case PIPE_FORMAT_S8_UINT: + depth_format = PIPE_FORMAT_NONE; + break; + default: + break; + } + + format = gen6_translate_depth_format(depth_format); + /* FLOAT surface is assumed when there is no depth buffer */ + if (format < 0) + format = BRW_DEPTHFORMAT_D32_FLOAT; + + payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT; + } + } + else { + payload[0] = 0; + payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0; + payload[2] = 0; + payload[3] = 0; + payload[4] = 0; + payload[5] = 0; + } +} + +/** + * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. + */ +void +ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + const struct ilo_shader_state *last_sh, + uint32_t *dw, int num_dwords) +{ + int output_count, vue_offset, vue_len; + const struct ilo_kernel_routing *routing; + + ILO_GPE_VALID_GEN(dev, 6, 7); + assert(num_dwords == 13); + + if (!fs) { + memset(dw, 0, sizeof(dw[0]) * num_dwords); + + if (dev->gen >= ILO_GEN(7)) + dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT; + else + dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT; + + return; + } + + output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); + assert(output_count <= 32); + + routing = ilo_shader_get_kernel_routing(fs); + + vue_offset = routing->source_skip; + assert(vue_offset % 2 == 0); + vue_offset /= 2; + + vue_len = (routing->source_len + 1) / 2; + if (!vue_len) + vue_len = 1; + + if (dev->gen >= ILO_GEN(7)) { + dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT | + vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; + if (routing->swizzle_enable) + dw[0] |= GEN7_SBE_SWIZZLE_ENABLE; + } + else { + dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT | + vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + if (routing->swizzle_enable) + dw[0] |= GEN6_SF_SWIZZLE_ENABLE; + } + + switch (rasterizer->state.sprite_coord_mode) { + case PIPE_SPRITE_COORD_UPPER_LEFT: + dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT; + break; + case PIPE_SPRITE_COORD_LOWER_LEFT: + dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + break; + } + + STATIC_ASSERT(Elements(routing->swizzles) >= 16); + memcpy(&dw[1], routing->swizzles, 2 * 16); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 268: + * + * "This field (Point Sprite Texture Coordinate Enable) must be + * programmed to 0 when non-point primitives are rendered." + * + * TODO We do not check that yet. + */ + dw[9] = routing->point_sprite_enable; + + dw[10] = routing->const_interp_enable; + + /* WrapShortest enables */ + dw[11] = 0; + dw[12] = 0; +} + +static void +gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + const struct ilo_shader_state *last_sh, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); + const uint8_t cmd_len = 20; + uint32_t payload_raster[6], payload_sbe[13]; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer, + 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); + ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, + fs, last_sh, payload_sbe, Elements(payload_sbe)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, payload_sbe[0]); + ilo_cp_write_multi(cp, payload_raster, 6); + ilo_cp_write_multi(cp, &payload_sbe[1], 12); + ilo_cp_end(cp); +} + void ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev, const struct pipe_rasterizer_state *state, @@ -812,23 +1929,23 @@ ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev, /* only the FF unit states are set, as in GEN7 */ - dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0; + dw5 = GEN6_WM_LINE_AA_WIDTH_2_0; /* same value as in 3DSTATE_SF */ if (state->line_smooth) - dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0; + dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0; if (state->poly_stipple_enable) - dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE; + dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; if (state->line_stipple_enable) - dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE; + dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; - dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL | - GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL | - GEN6_WM_DW6_MSDISPMODE_PERSAMPLE; + dw6 = GEN6_WM_POSITION_ZW_PIXEL | + GEN6_WM_MSRAST_OFF_PIXEL | + GEN6_WM_MSDISPMODE_PERSAMPLE; if (state->bottom_edge_rule) - dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT; + dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT; /* * assertion that makes sure @@ -837,12 +1954,12 @@ ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev, * * is valid */ - STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 && - GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0); + STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 && + GEN6_WM_MSDISPMODE_PERSAMPLE == 0); wm->dw_msaa_rast = - (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0; - wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL; + (state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0; + wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL; STATIC_ASSERT(Elements(wm->payload) >= 2); wm->payload[0] = dw5; @@ -867,13 +1984,13 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, /* see brwCreateContext() */ max_threads = (dev->gt == 2) ? 80 : 40; - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT; - dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT | - 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | - 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT; + dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 | + 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 | + 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2; - dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; + dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* * From the Sandy Bridge PRM, volume 2 part 1, page 275: @@ -901,7 +2018,7 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, * ENABLE this bit due to ClipDistance clipping." */ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw5 |= GEN6_WM_DW5_PS_KILL; + dw5 |= GEN6_WM_KILL_ENABLE; /* * From the Sandy Bridge PRM, volume 2 part 1, page 275: @@ -912,13 +2029,13 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, * TODO This is not checked yet. */ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; + dw5 |= GEN6_WM_COMPUTED_DEPTH; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; + dw5 |= GEN6_WM_USES_SOURCE_DEPTH; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw5 |= GEN6_WM_DW5_PS_USE_W; + dw5 |= GEN6_WM_USES_SOURCE_W; /* * TODO set this bit only when @@ -928,14 +2045,14 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, * c) fs or cc kills */ if (true) - dw5 |= GEN6_WM_DW5_PS_ENABLE; + dw5 |= GEN6_WM_DISPATCH_ENABLE; assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw5 |= GEN6_WM_DW5_8_PIXEL_DISPATCH; + dw5 |= GEN6_WM_8_DISPATCH_ENABLE; - dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | - GEN6_WM_DW6_POSOFFSET_NONE | - interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT; + dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT | + GEN6_WM_POSOFFSET_NONE | + interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; STATIC_ASSERT(Elements(cso->payload) >= 4); cso->payload[0] = dw2; @@ -944,6 +2061,291 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, cso->payload[3] = dw6; } +static void +gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + int num_samplers, + const struct ilo_rasterizer_state *rasterizer, + bool dual_blend, bool cc_may_kill, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); + const uint8_t cmd_len = 9; + const int num_samples = 1; + const struct ilo_shader_cso *fs_cso; + uint32_t dw2, dw4, dw5, dw6; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + if (!fs) { + /* see brwCreateContext() */ + const int max_threads = (dev->gt == 2) ? 80 : 40; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + /* honor the valid range even if dispatching is disabled */ + ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + + return; + } + + fs_cso = ilo_shader_get_kernel_cso(fs); + dw2 = fs_cso->payload[0]; + dw4 = fs_cso->payload[1]; + dw5 = fs_cso->payload[2]; + dw6 = fs_cso->payload[3]; + + dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT; + + if (true) { + dw4 |= GEN6_WM_STATISTICS_ENABLE; + } + else { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "This bit (Statistics Enable) must be disabled if either of these + * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer + * Resolve Enable or Depth Buffer Resolve Enable." + */ + dw4 |= GEN6_WM_DEPTH_CLEAR; + dw4 |= GEN6_WM_DEPTH_RESOLVE; + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + } + + if (cc_may_kill) { + dw5 |= GEN6_WM_KILL_ENABLE | + GEN6_WM_DISPATCH_ENABLE; + } + + if (dual_blend) + dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE; + + dw5 |= rasterizer->wm.payload[0]; + + dw6 |= rasterizer->wm.payload[1]; + + if (num_samples > 1) { + dw6 |= rasterizer->wm.dw_msaa_rast | + rasterizer->wm.dw_msaa_disp; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_write(cp, dw6); + ilo_cp_write(cp, 0); /* kernel 1 */ + ilo_cp_write(cp, 0); /* kernel 2 */ + ilo_cp_end(cp); +} + +static unsigned +gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, int max_read_length, + uint32_t *dw, int num_dwords) +{ + unsigned enabled = 0x0; + int total_read_length, i; + + assert(num_dwords == 4); + + total_read_length = 0; + for (i = 0; i < 4; i++) { + if (i < num_bufs && sizes[i]) { + /* in 256-bit units minus one */ + const int read_len = (sizes[i] + 31) / 32 - 1; + + assert(bufs[i] % 32 == 0); + assert(read_len < 32); + + enabled |= 1 << i; + dw[i] = bufs[i] | read_len; + + total_read_length += read_len + 1; + } + else { + dw[i] = 0; + } + } + + assert(total_read_length <= max_read_length); + + return enabled; +} + +static void +gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15); + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(num_bufs <= 4); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 138: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 32" + */ + buf_enabled = gen6_fill_3dstate_constant(dev, + bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); + ilo_cp_write(cp, buf_dw[0]); + ilo_cp_write(cp, buf_dw[1]); + ilo_cp_write(cp, buf_dw[2]); + ilo_cp_write(cp, buf_dw[3]); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16); + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(num_bufs <= 4); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 161: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 64" + */ + buf_enabled = gen6_fill_3dstate_constant(dev, + bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); + ilo_cp_write(cp, buf_dw[0]); + ilo_cp_write(cp, buf_dw[1]); + ilo_cp_write(cp, buf_dw[2]); + ilo_cp_write(cp, buf_dw[3]); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17); + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(num_bufs <= 4); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 287: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 64" + */ + buf_enabled = gen6_fill_3dstate_constant(dev, + bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); + ilo_cp_write(cp, buf_dw[0]); + ilo_cp_write(cp, buf_dw[1]); + ilo_cp_write(cp, buf_dw[2]); + ilo_cp_write(cp, buf_dw[3]); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, + unsigned sample_mask, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); + const uint8_t cmd_len = 2; + const unsigned valid_mask = 0xf; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + sample_mask &= valid_mask; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, sample_mask); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev, + unsigned x, unsigned y, + unsigned width, unsigned height, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00); + const uint8_t cmd_len = 4; + unsigned xmax = x + width - 1; + unsigned ymax = y + height - 1; + int rect_limit; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (dev->gen >= ILO_GEN(7)) { + rect_limit = 16383; + } + else { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 230: + * + * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) + * must be an even number" + */ + assert(y % 2 == 0); + + rect_limit = 8191; + } + + if (x > rect_limit) x = rect_limit; + if (y > rect_limit) y = rect_limit; + if (xmax > rect_limit) xmax = rect_limit; + if (ymax > rect_limit) ymax = rect_limit; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, y << 16 | x); + ilo_cp_write(cp, ymax << 16 | xmax); + + /* + * There is no need to set the origin. It is intended to support front + * buffer rendering. + */ + ilo_cp_write(cp, 0); + + ilo_cp_end(cp); +} + struct ilo_zs_surface_info { int surface_type; int format; @@ -964,12 +2366,12 @@ static void zs_init_info_null(const struct ilo_dev_info *dev, struct ilo_zs_surface_info *info) { - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); memset(info, 0, sizeof(*info)); - info->surface_type = GEN6_SURFTYPE_NULL; - info->format = GEN6_ZFORMAT_D32_FLOAT; + info->surface_type = BRW_SURFACE_NULL; + info->format = BRW_DEPTHFORMAT_D32_FLOAT; info->width = 1; info->height = 1; info->depth = 1; @@ -979,20 +2381,23 @@ zs_init_info_null(const struct ilo_dev_info *dev, static void zs_init_info(const struct ilo_dev_info *dev, const struct ilo_texture *tex, - enum pipe_format format, unsigned level, + enum pipe_format format, + unsigned level, unsigned first_layer, unsigned num_layers, - bool offset_to_layer, struct ilo_zs_surface_info *info) + struct ilo_zs_surface_info *info) { - uint32_t x_offset[3], y_offset[3]; + const bool rebase_layer = true; + struct intel_bo * const hiz_bo = NULL; bool separate_stencil; + uint32_t x_offset[3], y_offset[3]; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); memset(info, 0, sizeof(*info)); info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); - if (info->surface_type == GEN6_SURFTYPE_CUBE) { + if (info->surface_type == BRW_SURFACE_CUBE) { /* * From the Sandy Bridge PRM, volume 2 part 1, page 325-326: * @@ -1005,7 +2410,7 @@ zs_init_info(const struct ilo_dev_info *dev, * As such, we cannot set first_layer and num_layers on cube surfaces. * To work around that, treat it as a 2D surface. */ - info->surface_type = GEN6_SURFTYPE_2D; + info->surface_type = BRW_SURFACE_2D; } if (dev->gen >= ILO_GEN(7)) { @@ -1019,8 +2424,7 @@ zs_init_info(const struct ilo_dev_info *dev, * same value (enabled or disabled) as Hierarchical Depth Buffer * Enable." */ - separate_stencil = - ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers); + separate_stencil = (hiz_bo != NULL); } /* @@ -1044,25 +2448,25 @@ zs_init_info(const struct ilo_dev_info *dev, */ switch (format) { case PIPE_FORMAT_Z16_UNORM: - info->format = GEN6_ZFORMAT_D16_UNORM; + info->format = BRW_DEPTHFORMAT_D16_UNORM; break; case PIPE_FORMAT_Z32_FLOAT: - info->format = GEN6_ZFORMAT_D32_FLOAT; + info->format = BRW_DEPTHFORMAT_D32_FLOAT; break; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: info->format = (separate_stencil) ? - GEN6_ZFORMAT_D24_UNORM_X8_UINT : - GEN6_ZFORMAT_D24_UNORM_S8_UINT; + BRW_DEPTHFORMAT_D24_UNORM_X8_UINT : + BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; break; case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: info->format = (separate_stencil) ? - GEN6_ZFORMAT_D32_FLOAT : - GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT; + BRW_DEPTHFORMAT_D32_FLOAT : + BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; break; case PIPE_FORMAT_S8_UINT: if (separate_stencil) { - info->format = GEN6_ZFORMAT_D32_FLOAT; + info->format = BRW_DEPTHFORMAT_D32_FLOAT; break; } /* fall through */ @@ -1078,7 +2482,7 @@ zs_init_info(const struct ilo_dev_info *dev, info->zs.stride = tex->bo_stride; info->zs.tiling = tex->tiling; - if (offset_to_layer) { + if (rebase_layer) { info->zs.offset = ilo_texture_get_slice_offset(tex, level, first_layer, &x_offset[0], &y_offset[0]); } @@ -1103,29 +2507,19 @@ zs_init_info(const struct ilo_dev_info *dev, info->stencil.tiling = s8_tex->tiling; - if (offset_to_layer) { + if (rebase_layer) { info->stencil.offset = ilo_texture_get_slice_offset(s8_tex, level, first_layer, &x_offset[1], &y_offset[1]); } } - if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) { - info->hiz.bo = tex->hiz.bo; - info->hiz.stride = tex->hiz.bo_stride; - info->hiz.tiling = INTEL_TILING_Y; - - /* - * Layer offsetting is used on GEN6 only. And on GEN6, HiZ is enabled - * only when the depth buffer is non-mipmapped and non-array, making - * layer offsetting no-op. - */ - if (offset_to_layer) { - assert(level == 0 && first_layer == 0 && num_layers == 1); - - info->hiz.offset = 0; - x_offset[2] = 0; - y_offset[2] = 0; - } + if (hiz_bo) { + info->hiz.bo = hiz_bo; + info->hiz.stride = 0; + info->hiz.tiling = 0; + info->hiz.offset = 0; + x_offset[2] = 0; + y_offset[2] = 0; } info->width = tex->base.width0; @@ -1137,11 +2531,11 @@ zs_init_info(const struct ilo_dev_info *dev, info->first_layer = first_layer; info->num_layers = num_layers; - if (offset_to_layer) { + if (rebase_layer) { /* the size of the layer */ info->width = u_minify(info->width, level); info->height = u_minify(info->height, level); - if (info->surface_type == GEN6_SURFTYPE_3D) + if (info->surface_type == BRW_SURFACE_3D) info->depth = u_minify(info->depth, level); else info->depth = 1; @@ -1194,14 +2588,14 @@ zs_init_info(const struct ilo_dev_info *dev, info->height += info->y_offset; /* we have to treat them as 2D surfaces */ - if (info->surface_type == GEN6_SURFTYPE_CUBE) { + if (info->surface_type == BRW_SURFACE_CUBE) { assert(tex->base.width0 == tex->base.height0); /* we will set slice_offset to point to the single face */ - info->surface_type = GEN6_SURFTYPE_2D; + info->surface_type = BRW_SURFACE_2D; } - else if (info->surface_type == GEN6_SURFTYPE_1D && info->height > 1) { + else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) { assert(tex->base.height0 == 1); - info->surface_type = GEN6_SURFTYPE_2D; + info->surface_type = BRW_SURFACE_2D; } } } @@ -1209,46 +2603,44 @@ zs_init_info(const struct ilo_dev_info *dev, void ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, const struct ilo_texture *tex, - enum pipe_format format, unsigned level, + enum pipe_format format, + unsigned level, unsigned first_layer, unsigned num_layers, - bool offset_to_layer, struct ilo_zs_surface *zs) + struct ilo_zs_surface *zs) { const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192; const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512; struct ilo_zs_surface_info info; uint32_t dw1, dw2, dw3, dw4, dw5, dw6; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); - if (tex) { - zs_init_info(dev, tex, format, level, first_layer, num_layers, - offset_to_layer, &info); - } - else { + if (tex) + zs_init_info(dev, tex, format, level, first_layer, num_layers, &info); + else zs_init_info_null(dev, &info); - } switch (info.surface_type) { - case GEN6_SURFTYPE_NULL: + case BRW_SURFACE_NULL: break; - case GEN6_SURFTYPE_1D: + case BRW_SURFACE_1D: assert(info.width <= max_2d_size && info.height == 1 && info.depth <= max_array_size); assert(info.first_layer < max_array_size - 1 && info.num_layers <= max_array_size); break; - case GEN6_SURFTYPE_2D: + case BRW_SURFACE_2D: assert(info.width <= max_2d_size && info.height <= max_2d_size && info.depth <= max_array_size); assert(info.first_layer < max_array_size - 1 && info.num_layers <= max_array_size); break; - case GEN6_SURFTYPE_3D: + case BRW_SURFACE_3D: assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048); assert(info.first_layer < 2048 && info.num_layers <= max_array_size); assert(info.x_offset == 0 && info.y_offset == 0); break; - case GEN6_SURFTYPE_CUBE: + case BRW_SURFACE_CUBE: assert(info.width <= max_2d_size && info.height <= max_2d_size && info.depth == 1); assert(info.first_layer == 0 && info.num_layers == 1); @@ -1311,7 +2703,7 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, dw3 = (info.height - 1) << 19 | (info.width - 1) << 6 | info.lod << 2 | - GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; + BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1; dw4 = (info.depth - 1) << 21 | info.first_layer << 10 | @@ -1342,9 +2734,6 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, zs->payload[6] = info.stencil.stride - 1; zs->payload[7] = info.stencil.offset; - if (dev->gen >= ILO_GEN(7.5)) - zs->payload[6] |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE; - /* do not increment reference count */ zs->separate_s8_bo = info.stencil.bo; } @@ -1370,6 +2759,408 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, } static void +gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp) +{ + const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? + ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05); + const uint8_t cmd_len = 7; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, zs->payload[0]); + ilo_cp_write_bo(cp, zs->payload[1], zs->bo, + INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_write(cp, zs->payload[2]); + ilo_cp_write(cp, zs->payload[3]); + ilo_cp_write(cp, zs->payload[4]); + ilo_cp_write(cp, zs->payload[5]); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev, + int x_offset, int y_offset, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 6, 7); + assert(x_offset >= 0 && x_offset <= 31); + assert(y_offset >= 0 && y_offset <= 31); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, x_offset << 8 | y_offset); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev, + const struct pipe_poly_stipple *pattern, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07); + const uint8_t cmd_len = 33; + int i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + assert(Elements(pattern->stipple) == 32); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + for (i = 0; i < 32; i++) + ilo_cp_write(cp, pattern->stipple[i]); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev, + unsigned pattern, unsigned factor, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08); + const uint8_t cmd_len = 3; + unsigned inverse; + + ILO_GPE_VALID_GEN(dev, 6, 7); + assert((pattern & 0xffff) == pattern); + assert(factor >= 1 && factor <= 256); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, pattern); + + if (dev->gen >= ILO_GEN(7)) { + /* in U1.16 */ + inverse = (unsigned) (65536.0f / factor); + ilo_cp_write(cp, inverse << 15 | factor); + } + else { + /* in U1.13 */ + inverse = (unsigned) (8192.0f / factor); + ilo_cp_write(cp, inverse << 16 | factor); + } + + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a); + const uint8_t cmd_len = 3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0 << 16 | 0); + ilo_cp_write(cp, 0 << 16 | 0); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev, + int index, unsigned svbi, + unsigned max_svbi, + bool load_vertex_count, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b); + const uint8_t cmd_len = 4; + uint32_t dw1; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(index >= 0 && index < 4); + + dw1 = index << SVB_INDEX_SHIFT; + if (load_vertex_count) + dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, svbi); + ilo_cp_write(cp, max_svbi); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev, + int num_samples, + const uint32_t *packed_sample_pos, + bool pixel_location_center, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d); + const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3; + uint32_t dw1, dw2, dw3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + dw1 = (pixel_location_center) ? + MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT; + + switch (num_samples) { + case 0: + case 1: + dw1 |= MS_NUMSAMPLES_1; + dw2 = 0; + dw3 = 0; + break; + case 4: + dw1 |= MS_NUMSAMPLES_4; + dw2 = packed_sample_pos[0]; + dw3 = 0; + break; + case 8: + assert(dev->gen >= ILO_GEN(7)); + dw1 |= MS_NUMSAMPLES_8; + dw2 = packed_sample_pos[0]; + dw3 = packed_sample_pos[1]; + break; + default: + assert(!"unsupported sample count"); + dw1 |= MS_NUMSAMPLES_1; + dw2 = 0; + dw3 = 0; + break; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + if (dev->gen >= ILO_GEN(7)) + ilo_cp_write(cp, dw3); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp) +{ + const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? + ILO_GPE_CMD(0x3, 0x0, 0x06) : + ILO_GPE_CMD(0x3, 0x1, 0x0e); + const uint8_t cmd_len = 3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + /* see ilo_gpe_init_zs_surface() */ + ilo_cp_write(cp, zs->payload[6]); + ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo, + INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp) +{ + const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? + ILO_GPE_CMD(0x3, 0x0, 0x07) : + ILO_GPE_CMD(0x3, 0x1, 0x0f); + const uint8_t cmd_len = 3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + /* see ilo_gpe_init_zs_surface() */ + ilo_cp_write(cp, zs->payload[8]); + ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo, + INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, + uint32_t clear_val, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + GEN5_DEPTH_CLEAR_VALID); + ilo_cp_write(cp, clear_val); + ilo_cp_end(cp); +} + +static void +gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev, + uint32_t dw1, + struct intel_bo *bo, uint32_t bo_offset, + bool write_qword, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00); + const uint8_t cmd_len = (write_qword) ? 5 : 4; + const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; + const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (dw1 & PIPE_CONTROL_CS_STALL) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 73: + * + * "1 of the following must also be set (when CS stall is set): + * + * * Depth Cache Flush Enable ([0] of DW1) + * * Stall at Pixel Scoreboard ([1] of DW1) + * * Depth Stall ([13] of DW1) + * * Post-Sync Operation ([13] of DW1) + * * Render Target Cache Flush Enable ([12] of DW1) + * * Notify Enable ([8] of DW1)" + * + * From the Ivy Bridge PRM, volume 2 part 1, page 61: + * + * "One of the following must also be set (when CS stall is set): + * + * * Render Target Cache Flush Enable ([12] of DW1) + * * Depth Cache Flush Enable ([0] of DW1) + * * Stall at Pixel Scoreboard ([1] of DW1) + * * Depth Stall ([13] of DW1) + * * Post-Sync Operation ([13] of DW1)" + */ + uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_DEPTH_STALL; + + /* post-sync op */ + bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP; + + if (dev->gen == ILO_GEN(6)) + bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE; + + assert(dw1 & bit_test); + } + + if (dw1 & PIPE_CONTROL_DEPTH_STALL) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 73: + * + * "Following bits must be clear (when Depth Stall is set): + * + * * Render Target Cache Flush Enable ([12] of DW1) + * * Depth Cache Flush Enable ([0] of DW1)" + */ + assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH))); + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain); + ilo_cp_write(cp, 0); + if (write_qword) + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static void +gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); + const uint8_t cmd_len = 6; + const int prim = (rectlist) ? + _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); + const int vb_access = (info->indexed) ? + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; + const uint32_t vb_start = info->start + + ((info->indexed) ? ib->draw_start_offset : 0); + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + vb_access); + ilo_cp_write(cp, info->count); + ilo_cp_write(cp, vb_start); + ilo_cp_write(cp, info->instance_count); + ilo_cp_write(cp, info->start_instance); + ilo_cp_write(cp, info->index_bias); + ilo_cp_end(cp); +} + +static uint32_t +gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev, + const struct ilo_shader_state **cs, + uint32_t *sampler_state, + int *num_samplers, + uint32_t *binding_table_state, + int *num_surfaces, + int num_ids, + struct ilo_cp *cp) +{ + /* + * From the Sandy Bridge PRM, volume 2 part 2, page 34: + * + * "(Interface Descriptor Total Length) This field must have the same + * alignment as the Interface Descriptor Data Start Address. + * + * It must be DQWord (32-byte) aligned..." + * + * From the Sandy Bridge PRM, volume 2 part 2, page 35: + * + * "(Interface Descriptor Data Start Address) Specifies the 32-byte + * aligned address of the Interface Descriptor data." + */ + const int state_align = 32 / 4; + const int state_len = (32 / 4) * num_ids; + uint32_t state_offset, *dw; + int i; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA", + state_len, state_align, &state_offset); + + for (i = 0; i < num_ids; i++) { + dw[0] = ilo_shader_get_kernel_offset(cs[i]); + dw[1] = 1 << 18; /* SPF */ + dw[2] = sampler_state[i] | + (num_samplers[i] + 3) / 4 << 2; + dw[3] = binding_table_state[i] | + num_surfaces[i]; + dw[4] = 0 << 16 | /* CURBE Read Length */ + 0; /* CURBE Read Offset */ + dw[5] = 0; /* Barrier ID */ + dw[6] = 0; + dw[7] = 0; + + dw += 8; + } + + return state_offset; +} + +static void viewport_get_guardband(const struct ilo_dev_info *dev, int center_x, int center_y, int *min_gbx, int *max_gbx, @@ -1438,7 +3229,7 @@ ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, const float scale_z = fabs(state->scale[2]); int min_gbx, max_gbx, min_gby, max_gby; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); viewport_get_guardband(dev, (int) state->translate[0], @@ -1468,15 +3259,158 @@ ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, vp->max_z = scale_z * 1.0f + state->translate[2]; } +static uint32_t +gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 8 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 262: + * + * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is + * stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT", + state_len, state_align, &state_offset); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + + dw += 8; + } + + return state_offset; +} + +static uint32_t +gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 4 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 193: + * + * "The viewport-related state is stored as an array of up to 16 + * elements..." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT", + state_len, state_align, &state_offset); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_gbx); + dw[1] = fui(vp->max_gbx); + dw[2] = fui(vp->min_gby); + dw[3] = fui(vp->max_gby); + + dw += 4; + } + + return state_offset; +} + +static uint32_t +gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 2 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 385: + * + * "The viewport state is stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT", + state_len, state_align, &state_offset); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_z); + dw[1] = fui(vp->max_z); + + dw += 2; + } + + return state_offset; +} + +static uint32_t +gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev, + const struct pipe_stencil_ref *stencil_ref, + float alpha_ref, + const struct pipe_blend_color *blend_color, + struct ilo_cp *cp) +{ + const int state_align = 64 / 4; + const int state_len = 6; + uint32_t state_offset, *dw; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE", + state_len, state_align, &state_offset); + + dw[0] = stencil_ref->ref_value[0] << 24 | + stencil_ref->ref_value[1] << 16 | + BRW_ALPHATEST_FORMAT_UNORM8; + dw[1] = float_to_ubyte(alpha_ref); + dw[2] = fui(blend_color->color[0]); + dw[3] = fui(blend_color->color[1]); + dw[4] = fui(blend_color->color[2]); + dw[5] = fui(blend_color->color[3]); + + return state_offset; +} + static int gen6_blend_factor_dst_alpha_forced_one(int factor) { switch (factor) { - case GEN6_BLENDFACTOR_DST_ALPHA: - return GEN6_BLENDFACTOR_ONE; - case GEN6_BLENDFACTOR_INV_DST_ALPHA: - case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE: - return GEN6_BLENDFACTOR_ZERO; + case BRW_BLENDFACTOR_DST_ALPHA: + return BRW_BLENDFACTOR_ONE; + case BRW_BLENDFACTOR_INV_DST_ALPHA: + case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_ZERO; default: return factor; } @@ -1527,7 +3461,7 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev, { unsigned num_cso, i; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); if (state->independent_blend_enable) { num_cso = Elements(blend->cso); @@ -1547,7 +3481,7 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev, bool dual_blend; cso->payload[0] = 0; - cso->payload[1] = GEN6_BLEND_DW1_COLORCLAMP_RTFORMAT | + cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 | 0x3; if (!(rt->colormask & PIPE_MASK_A)) @@ -1614,6 +3548,134 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev, } } +static uint32_t +gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev, + const struct ilo_blend_state *blend, + const struct ilo_fb_state *fb, + const struct pipe_alpha_state *alpha, + struct ilo_cp *cp) +{ + const int state_align = 64 / 4; + int state_len; + uint32_t state_offset, *dw; + unsigned num_targets, i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 376: + * + * "The blend state is stored as an array of up to 8 elements..." + */ + num_targets = fb->state.nr_cbufs; + assert(num_targets <= 8); + + if (!num_targets) { + if (!alpha->enabled) + return 0; + /* to be able to reference alpha func */ + num_targets = 1; + } + + state_len = 2 * num_targets; + + dw = ilo_cp_steal_ptr(cp, "BLEND_STATE", + state_len, state_align, &state_offset); + + for (i = 0; i < num_targets; i++) { + const unsigned idx = (blend->independent_blend_enable) ? i : 0; + const struct ilo_blend_cso *cso = &blend->cso[idx]; + const int num_samples = fb->num_samples; + const struct util_format_description *format_desc = + (idx < fb->state.nr_cbufs) ? + util_format_description(fb->state.cbufs[idx]->format) : NULL; + bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; + + rt_is_unorm = true; + rt_is_pure_integer = false; + rt_dst_alpha_forced_one = false; + + if (format_desc) { + int ch; + + switch (format_desc->format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + /* force alpha to one when the HW format has alpha */ + assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM) + == BRW_SURFACEFORMAT_B8G8R8A8_UNORM); + rt_dst_alpha_forced_one = true; + break; + default: + break; + } + + for (ch = 0; ch < 4; ch++) { + if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) + continue; + + if (format_desc->channel[ch].pure_integer) { + rt_is_unorm = false; + rt_is_pure_integer = true; + break; + } + + if (!format_desc->channel[ch].normalized || + format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) + rt_is_unorm = false; + } + } + + dw[0] = cso->payload[0]; + dw[1] = cso->payload[1]; + + if (!rt_is_pure_integer) { + if (rt_dst_alpha_forced_one) + dw[0] |= cso->dw_blend_dst_alpha_forced_one; + else + dw[0] |= cso->dw_blend; + } + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Logic Ops are only supported on *_UNORM surfaces (excluding + * _SRGB variants), otherwise Logic Ops must be DISABLED." + * + * Since logicop is ignored for non-UNORM color buffers, no special care + * is needed. + */ + if (rt_is_unorm) + dw[1] |= cso->dw_logicop; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 356: + * + * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage + * Dither both must be disabled." + * + * There is no such limitation on GEN7, or for AlphaToOne. But GL + * requires that anyway. + */ + if (num_samples > 1) + dw[1] |= cso->dw_alpha_mod; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 382: + * + * "Alpha Test can only be enabled if Pixel Shader outputs a float + * alpha value." + */ + if (alpha->enabled && !rt_is_pure_integer) { + dw[1] |= 1 << 16 | + gen6_translate_dsa_func(alpha->func) << 13; + } + + dw += 2; + } + + return state_offset; +} + void ilo_gpe_init_dsa(const struct ilo_dev_info *dev, const struct pipe_depth_stencil_alpha_state *state, @@ -1622,10 +3684,12 @@ ilo_gpe_init_dsa(const struct ilo_dev_info *dev, const struct pipe_depth_state *depth = &state->depth; const struct pipe_stencil_state *stencil0 = &state->stencil[0]; const struct pipe_stencil_state *stencil1 = &state->stencil[1]; - const struct pipe_alpha_state *alpha = &state->alpha; uint32_t *dw; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* copy alpha state for later use */ + dsa->alpha = state->alpha; STATIC_ASSERT(Elements(dsa->payload) >= 3); dw = dsa->payload; @@ -1692,18 +3756,29 @@ ilo_gpe_init_dsa(const struct ilo_dev_info *dev, if (depth->enabled) dw[2] |= gen6_translate_dsa_func(depth->func) << 27; else - dw[2] |= GEN6_COMPAREFUNCTION_ALWAYS << 27; + dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27; +} + +static uint32_t +gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev, + const struct ilo_dsa_state *dsa, + struct ilo_cp *cp) +{ + const int state_align = 64 / 4; + const int state_len = 3; + uint32_t state_offset, *dw; - /* dw_alpha will be ORed to BLEND_STATE */ - if (alpha->enabled) { - dsa->dw_alpha = 1 << 16 | - gen6_translate_dsa_func(alpha->func) << 13; - } - else { - dsa->dw_alpha = 0; - } - dsa->alpha_ref = float_to_ubyte(alpha->ref_value); + ILO_GPE_VALID_GEN(dev, 6, 7); + + dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE", + state_len, state_align, &state_offset); + + dw[0] = dsa->payload[0]; + dw[1] = dsa->payload[1]; + dw[2] = dsa->payload[2]; + + return state_offset; } void @@ -1715,7 +3790,7 @@ ilo_gpe_set_scissor(const struct ilo_dev_info *dev, { unsigned i; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); for (i = 0; i < num_states; i++) { uint16_t min_x, min_y, max_x, max_y; @@ -1756,6 +3831,64 @@ ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev, } } +static uint32_t +gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev, + const struct ilo_scissor_state *scissor, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 2 * num_viewports; + uint32_t state_offset, *dw; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 263: + * + * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is + * stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT", + state_len, state_align, &state_offset); + + memcpy(dw, scissor->payload, state_len * 4); + + return state_offset; +} + +static uint32_t +gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev, + uint32_t *surface_states, + int num_surface_states, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = num_surface_states; + uint32_t state_offset, *dw; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 69: + * + * "It is stored as an array of up to 256 elements..." + */ + assert(num_surface_states <= 256); + + if (!num_surface_states) + return 0; + + dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE", + state_len, state_align, &state_offset); + memcpy(dw, surface_states, + num_surface_states * sizeof(surface_states[0])); + + return state_offset; +} + void ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev, unsigned width, unsigned height, @@ -1792,17 +3925,17 @@ ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev, STATIC_ASSERT(Elements(surf->payload) >= 6); dw = surf->payload; - dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT | - GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT; + dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | + BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT; dw[1] = 0; - dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | - level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; + dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT | + (width - 1) << BRW_SURFACE_WIDTH_SHIFT | + level << BRW_SURFACE_LOD_SHIFT; - dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | - GEN6_TILING_X; + dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT | + BRW_SURFACE_TILED; dw[4] = 0; dw[5] = 0; @@ -1884,18 +4017,18 @@ ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev, STATIC_ASSERT(Elements(surf->payload) >= 6); dw = surf->payload; - dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT; + dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | + surface_format << BRW_SURFACE_FORMAT_SHIFT; if (render_cache_rw) - dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; + dw[0] |= BRW_SURFACE_RC_READ_WRITE; dw[1] = offset; - dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - width << GEN6_SURFACE_DW2_WIDTH__SHIFT; + dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT | + width << BRW_SURFACE_WIDTH_SHIFT; - dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT | - pitch << GEN6_SURFACE_DW3_PITCH__SHIFT; + dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT | + pitch << BRW_SURFACE_PITCH_SHIFT; dw[4] = 0; dw[5] = 0; @@ -1912,7 +4045,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, unsigned num_levels, unsigned first_layer, unsigned num_layers, - bool is_rt, bool offset_to_layer, + bool is_rt, bool render_cache_rw, struct ilo_view_surface *surf) { int surface_type, surface_format; @@ -1923,7 +4056,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, ILO_GPE_VALID_GEN(dev, 6, 6); surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); - assert(surface_type != GEN6_SURFTYPE_BUFFER); + assert(surface_type != BRW_SURFACE_BUFFER); if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) format = PIPE_FORMAT_Z32_FLOAT; @@ -1940,7 +4073,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, tex->base.depth0 : num_layers; pitch = tex->bo_stride; - if (surface_type == GEN6_SURFTYPE_CUBE) { + if (surface_type == BRW_SURFACE_CUBE) { /* * From the Sandy Bridge PRM, volume 4 part 1, page 81: * @@ -1954,7 +4087,7 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, * restriction. */ if (is_rt) { - surface_type = GEN6_SURFTYPE_2D; + surface_type = BRW_SURFACE_2D; } else { assert(num_layers % 6 == 0); @@ -1965,21 +4098,21 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, /* sanity check the size */ assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); switch (surface_type) { - case GEN6_SURFTYPE_1D: + case BRW_SURFACE_1D: assert(width <= 8192 && height == 1 && depth <= 512); assert(first_layer < 512 && num_layers <= 512); break; - case GEN6_SURFTYPE_2D: + case BRW_SURFACE_2D: assert(width <= 8192 && height <= 8192 && depth <= 512); assert(first_layer < 512 && num_layers <= 512); break; - case GEN6_SURFTYPE_3D: + case BRW_SURFACE_3D: assert(width <= 2048 && height <= 2048 && depth <= 2048); assert(first_layer < 2048 && num_layers <= 512); if (!is_rt) assert(first_layer == 0); break; - case GEN6_SURFTYPE_CUBE: + case BRW_SURFACE_CUBE: assert(width <= 8192 && height <= 8192 && depth <= 85); assert(width == height); assert(first_layer < 512 && num_layers <= 512); @@ -1998,44 +4131,52 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, assert(tex->interleaved); if (is_rt) { + /* + * Compute the offset to the layer manually. + * + * For rendering, the hardware requires LOD to be the same for all + * render targets and the depth buffer. We need to compute the offset + * to the layer manually and always set LOD to 0. + */ + if (true) { + /* we lose the capability for layered rendering */ + assert(num_layers == 1); + + layer_offset = ilo_texture_get_slice_offset(tex, + first_level, first_layer, &x_offset, &y_offset); + + assert(x_offset % 4 == 0); + assert(y_offset % 2 == 0); + x_offset /= 4; + y_offset /= 2; + + /* derive the size for the LOD */ + width = u_minify(width, first_level); + height = u_minify(height, first_level); + if (surface_type == BRW_SURFACE_3D) + depth = u_minify(depth, first_level); + else + depth = 1; + + first_level = 0; + first_layer = 0; + lod = 0; + } + else { + layer_offset = 0; + x_offset = 0; + y_offset = 0; + } + assert(num_levels == 1); lod = first_level; } else { - lod = num_levels - 1; - } - - /* - * Offset to the layer. When rendering, the hardware requires LOD and - * Depth to be the same for all render targets and the depth buffer. We - * need to offset to the layer manually and always set LOD and Depth to 0. - */ - if (offset_to_layer) { - /* we lose the capability for layered rendering */ - assert(is_rt && num_layers == 1); - - layer_offset = ilo_texture_get_slice_offset(tex, - first_level, first_layer, &x_offset, &y_offset); - - assert(x_offset % 4 == 0); - assert(y_offset % 2 == 0); - x_offset /= 4; - y_offset /= 2; - - /* derive the size for the LOD */ - width = u_minify(width, first_level); - height = u_minify(height, first_level); - - first_level = 0; - first_layer = 0; - - lod = 0; - depth = 1; - } - else { layer_offset = 0; x_offset = 0; y_offset = 0; + + lod = num_levels - 1; } /* @@ -2069,43 +4210,129 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, STATIC_ASSERT(Elements(surf->payload) >= 6); dw = surf->payload; - dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT | - GEN6_SURFACE_DW0_MIPLAYOUT_BELOW; + dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT | + surface_format << BRW_SURFACE_FORMAT_SHIFT | + BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT; - if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) { + if (surface_type == BRW_SURFACE_CUBE && !is_rt) { dw[0] |= 1 << 9 | - GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; + BRW_SURFACE_CUBEFACE_ENABLES; } - if (is_rt) - dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; + if (render_cache_rw) + dw[0] |= BRW_SURFACE_RC_READ_WRITE; dw[1] = layer_offset; - dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | - lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; + dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT | + (width - 1) << BRW_SURFACE_WIDTH_SHIFT | + lod << BRW_SURFACE_LOD_SHIFT; - dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | - (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT | + dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT | + (pitch - 1) << BRW_SURFACE_PITCH_SHIFT | ilo_gpe_gen6_translate_winsys_tiling(tex->tiling); - dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT | + dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT | first_layer << 17 | (num_layers - 1) << 8 | - ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 : - GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1); + ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 : + BRW_SURFACE_MULTISAMPLECOUNT_1); - dw[5] = x_offset << GEN6_SURFACE_DW5_X_OFFSET__SHIFT | - y_offset << GEN6_SURFACE_DW5_Y_OFFSET__SHIFT; + dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT | + y_offset << BRW_SURFACE_Y_OFFSET_SHIFT; if (tex->valign_4) - dw[5] |= GEN6_SURFACE_DW5_VALIGN_4; + dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE; /* do not increment reference count */ surf->bo = tex->bo; } +static uint32_t +gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev, + const struct ilo_view_surface *surf, + bool for_render, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6; + uint32_t state_offset; + uint32_t read_domains, write_domain; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (for_render) { + read_domains = INTEL_DOMAIN_RENDER; + write_domain = INTEL_DOMAIN_RENDER; + } + else { + read_domains = INTEL_DOMAIN_SAMPLER; + write_domain = 0; + } + + ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset); + + STATIC_ASSERT(Elements(surf->payload) >= 8); + + ilo_cp_write(cp, surf->payload[0]); + ilo_cp_write_bo(cp, surf->payload[1], + surf->bo, read_domains, write_domain); + ilo_cp_write(cp, surf->payload[2]); + ilo_cp_write(cp, surf->payload[3]); + ilo_cp_write(cp, surf->payload[4]); + ilo_cp_write(cp, surf->payload[5]); + + if (dev->gen >= ILO_GEN(7)) { + ilo_cp_write(cp, surf->payload[6]); + ilo_cp_write(cp, surf->payload[7]); + } + + ilo_cp_end(cp); + + return state_offset; +} + +static uint32_t +gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev, + const struct pipe_stream_output_target *so, + const struct pipe_stream_output_info *so_info, + int so_index, + struct ilo_cp *cp) +{ + struct ilo_buffer *buf = ilo_buffer(so->buffer); + unsigned bo_offset, struct_size; + enum pipe_format elem_format; + struct ilo_view_surface surf; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; + struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; + + switch (so_info->output[so_index].num_components) { + case 1: + elem_format = PIPE_FORMAT_R32_FLOAT; + break; + case 2: + elem_format = PIPE_FORMAT_R32G32_FLOAT; + break; + case 3: + elem_format = PIPE_FORMAT_R32G32B32_FLOAT; + break; + case 4: + elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + break; + default: + assert(!"unexpected SO components length"); + elem_format = PIPE_FORMAT_R32_FLOAT; + break; + } + + ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size, + struct_size, elem_format, false, true, &surf); + + return gen6_emit_SURFACE_STATE(dev, &surf, false, cp); +} + static void sampler_init_border_color_gen6(const struct ilo_dev_info *dev, const union pipe_color_union *color, @@ -2184,7 +4411,7 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, bool clamp_is_to_edge; uint32_t dw0, dw1, dw3; - ILO_GPE_VALID_GEN(dev, 6, 7.5); + ILO_GPE_VALID_GEN(dev, 6, 7); memset(sampler, 0, sizeof(*sampler)); @@ -2197,9 +4424,9 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16) max_aniso = state->max_anisotropy / 2 - 1; else if (state->max_anisotropy > 16) - max_aniso = GEN6_ANISORATIO_16; + max_aniso = BRW_ANISORATIO_16; else - max_aniso = GEN6_ANISORATIO_2; + max_aniso = BRW_ANISORATIO_2; /* * @@ -2300,10 +4527,10 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, if (state->seamless_cube_map && (state->min_img_filter != PIPE_TEX_FILTER_NEAREST || state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { - wrap_cube = GEN6_TEXCOORDMODE_CUBE; + wrap_cube = BRW_TEXCOORDMODE_CUBE; } else { - wrap_cube = GEN6_TEXCOORDMODE_CLAMP; + wrap_cube = BRW_TEXCOORDMODE_CLAMP; } if (!state->normalized_coords) { @@ -2327,22 +4554,22 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, * - Surface Min LOD must be 0. * - Texture LOD Bias must be 0." */ - assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP || - wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER); - assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP || - wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER); - assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP || - wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER); - - assert(mag_filter == GEN6_MAPFILTER_NEAREST || - mag_filter == GEN6_MAPFILTER_LINEAR); - assert(min_filter == GEN6_MAPFILTER_NEAREST || - min_filter == GEN6_MAPFILTER_LINEAR); + assert(wrap_s == BRW_TEXCOORDMODE_CLAMP || + wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER); + assert(wrap_t == BRW_TEXCOORDMODE_CLAMP || + wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER); + assert(wrap_r == BRW_TEXCOORDMODE_CLAMP || + wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER); + + assert(mag_filter == BRW_MAPFILTER_NEAREST || + mag_filter == BRW_MAPFILTER_LINEAR); + assert(min_filter == BRW_MAPFILTER_NEAREST || + min_filter == BRW_MAPFILTER_LINEAR); /* work around a bug in util_blitter */ - mip_filter = GEN6_MIPFILTER_NONE; + mip_filter = BRW_MIPFILTER_NONE; - assert(mip_filter == GEN6_MIPFILTER_NONE); + assert(mip_filter == BRW_MIPFILTER_NONE); } if (dev->gen >= ILO_GEN(7)) { @@ -2353,8 +4580,8 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, sampler->dw_filter = mag_filter << 17 | min_filter << 14; - sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | - GEN6_MAPFILTER_ANISOTROPIC << 14 | + sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 | + BRW_MAPFILTER_ANISOTROPIC << 14 | 1; dw1 = min_lod << 20 | @@ -2366,15 +4593,15 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, dw3 = max_aniso << 19; /* round the coordinates for linear filtering */ - if (min_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | - GEN6_SAMPLER_DW3_V_MIN_ROUND | - GEN6_SAMPLER_DW3_R_MIN_ROUND); + if (min_filter != BRW_MAPFILTER_NEAREST) { + dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13; } - if (mag_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | - GEN6_SAMPLER_DW3_V_MAG_ROUND | - GEN6_SAMPLER_DW3_R_MAG_ROUND); + if (mag_filter != BRW_MAPFILTER_NEAREST) { + dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13; } if (!state->normalized_coords) @@ -2390,8 +4617,8 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, * mode */ sampler->dw_wrap_1d = wrap_s << 6 | - GEN6_TEXCOORDMODE_WRAP << 3 | - GEN6_TEXCOORDMODE_WRAP; + BRW_TEXCOORDMODE_WRAP << 3 | + BRW_TEXCOORDMODE_WRAP; sampler->dw_wrap_cube = wrap_cube << 6 | wrap_cube << 3 | @@ -2418,8 +4645,8 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, mag_filter << 17 | min_filter << 14; - sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | - GEN6_MAPFILTER_ANISOTROPIC << 14; + sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 | + BRW_MAPFILTER_ANISOTROPIC << 14; dw1 = min_lod << 22 | max_lod << 12; @@ -2429,8 +4656,8 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, wrap_r; sampler->dw_wrap_1d = wrap_s << 6 | - GEN6_TEXCOORDMODE_WRAP << 3 | - GEN6_TEXCOORDMODE_WRAP; + BRW_TEXCOORDMODE_WRAP << 3 | + BRW_TEXCOORDMODE_WRAP; sampler->dw_wrap_cube = wrap_cube << 6 | wrap_cube << 3 | @@ -2439,15 +4666,15 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, dw3 = max_aniso << 19; /* round the coordinates for linear filtering */ - if (min_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | - GEN6_SAMPLER_DW3_V_MIN_ROUND | - GEN6_SAMPLER_DW3_R_MIN_ROUND); + if (min_filter != BRW_MAPFILTER_NEAREST) { + dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13; } - if (mag_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | - GEN6_SAMPLER_DW3_V_MAG_ROUND | - GEN6_SAMPLER_DW3_R_MAG_ROUND); + if (mag_filter != BRW_MAPFILTER_NEAREST) { + dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13; } if (!state->normalized_coords) @@ -2464,109 +4691,342 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, } } -void -ilo_gpe_set_fb(const struct ilo_dev_info *dev, - const struct pipe_framebuffer_state *state, - struct ilo_fb_state *fb) +static uint32_t +gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, + const struct ilo_sampler_cso * const *samplers, + const struct pipe_sampler_view * const *views, + const uint32_t *sampler_border_colors, + int num_samplers, + struct ilo_cp *cp) { - const struct pipe_surface *first; - unsigned num_surfaces, first_idx; + const int state_align = 32 / 4; + const int state_len = 4 * num_samplers; + uint32_t state_offset, *dw; + int i; + + ILO_GPE_VALID_GEN(dev, 6, 7); - ILO_GPE_VALID_GEN(dev, 6, 7.5); + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 101: + * + * "The sampler state is stored as an array of up to 16 elements..." + */ + assert(num_samplers <= 16); - util_copy_framebuffer_state(&fb->state, state); + if (!num_samplers) + return 0; - ilo_gpe_init_view_surface_null(dev, - state->width, state->height, - 1, 0, &fb->null_rt); + dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE", + state_len, state_align, &state_offset); + + for (i = 0; i < num_samplers; i++) { + const struct ilo_sampler_cso *sampler = samplers[i]; + const struct pipe_sampler_view *view = views[i]; + const uint32_t border_color = sampler_border_colors[i]; + uint32_t dw_filter, dw_wrap; + + /* there may be holes */ + if (!sampler || !view) { + /* disabled sampler */ + dw[0] = 1 << 31; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw += 4; + + continue; + } - first = NULL; - for (first_idx = 0; first_idx < state->nr_cbufs; first_idx++) { - if (state->cbufs[first_idx]) { - first = state->cbufs[first_idx]; + /* determine filter and wrap modes */ + switch (view->texture->target) { + case PIPE_TEXTURE_1D: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_1d; + break; + case PIPE_TEXTURE_3D: + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 103: + * + * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for + * surfaces of type SURFTYPE_3D." + */ + dw_filter = sampler->dw_filter; + dw_wrap = sampler->dw_wrap; + break; + case PIPE_TEXTURE_CUBE: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_cube; break; + default: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap; + break; + } + + dw[0] = sampler->payload[0]; + dw[1] = sampler->payload[1]; + assert(!(border_color & 0x1f)); + dw[2] = border_color; + dw[3] = sampler->payload[2]; + + dw[0] |= dw_filter; + + if (dev->gen >= ILO_GEN(7)) { + dw[3] |= dw_wrap; + } + else { + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 21: + * + * "[DevSNB] Errata: Incorrect behavior is observed in cases + * where the min and mag mode filters are different and + * SurfMinLOD is nonzero. The determination of MagMode uses the + * following equation instead of the one in the above + * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" + * + * As a way to work around that, we set Base to + * view->u.tex.first_level. + */ + dw[0] |= view->u.tex.first_level << 22; + + dw[1] |= dw_wrap; } + + dw += 4; } - if (!first) - first = state->zsbuf; - fb->num_samples = (first) ? first->texture->nr_samples : 1; - if (!fb->num_samples) - fb->num_samples = 1; + return state_offset; +} + +static uint32_t +gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev, + const struct ilo_sampler_cso *sampler, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12; + uint32_t state_offset, *dw; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE", + state_len, state_align, &state_offset); + + /* see ilo_gpe_init_sampler_cso() */ + memcpy(dw, &sampler->payload[3], state_len * 4); - fb->offset_to_layers = false; + return state_offset; +} +static uint32_t +gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev, + int size, void **pcb, + struct ilo_cp *cp) +{ /* - * The PRMs list several restrictions when the framebuffer has more than - * one surface, but it seems they are lifted on GEN7+. + * For all VS, GS, FS, and CS push constant buffers, they must be aligned + * to 32 bytes, and their sizes are specified in 256-bit units. */ - num_surfaces = state->nr_cbufs + !!state->zsbuf; + const int state_align = 32 / 4; + const int state_len = align(size, 32) / 4; + uint32_t state_offset; + char *buf; - if (dev->gen < ILO_GEN(7) && num_surfaces > 1) { - const unsigned first_depth = - (first->texture->target == PIPE_TEXTURE_3D) ? - first->texture->depth0 : - first->u.tex.last_layer - first->u.tex.first_layer + 1; - bool has_3d_target = (first->texture->target == PIPE_TEXTURE_3D); - unsigned i; + ILO_GPE_VALID_GEN(dev, 6, 7); - for (i = first_idx + 1; i < num_surfaces; i++) { - const struct pipe_surface *surf = - (i < state->nr_cbufs) ? state->cbufs[i] : state->zsbuf; - unsigned depth; + buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER", + state_len, state_align, &state_offset); - if (!surf) - continue; + /* zero out the unused range */ + if (size < state_len * 4) + memset(&buf[size], 0, state_len * 4 - size); - depth = (surf->texture->target == PIPE_TEXTURE_3D) ? - surf->texture->depth0 : - surf->u.tex.last_layer - surf->u.tex.first_layer + 1; + if (pcb) + *pcb = buf; - has_3d_target |= (surf->texture->target == PIPE_TEXTURE_3D); + return state_offset; +} - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 79: - * - * "The LOD of a render target must be the same as the LOD of the - * other render target(s) and of the depth buffer (defined in - * 3DSTATE_DEPTH_BUFFER)." - * - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "The Depth of a render target must be the same as the Depth of - * the other render target(s) and of the depth buffer (defined - * in 3DSTATE_DEPTH_BUFFER)." - */ - if (surf->u.tex.level != first->u.tex.level || - depth != first_depth) { - fb->offset_to_layers = true; - break; - } +static int +gen6_estimate_command_size(const struct ilo_dev_info *dev, + enum ilo_gpe_gen6_command cmd, + int arg) +{ + static const struct { + int header; + int body; + } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = { + [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 }, + [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 }, + [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 }, + [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 }, + [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 }, + [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 }, + [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 }, + [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 }, + [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 }, + [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 }, + [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 }, + [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 }, + [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 }, + [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 }, + [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 }, + [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 }, + [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 }, + [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 }, + [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 }, + [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 }, + [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 }, + [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 }, + [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 }, + [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 }, + [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 }, + [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 }, + [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 }, + [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 }, + [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 }, + [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 }, + [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 }, + [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 }, + [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 }, + [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 }, + [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 }, + [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 }, + [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 }, + [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 }, + [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 }, + [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 }, + [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 }, + }; + const int header = gen6_command_size_table[cmd].header; + const int body = gen6_command_size_table[arg].body; + const int count = arg; - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 77: - * - * "The Height of a render target must be the same as the Height - * of the other render targets and the depth buffer (defined in - * 3DSTATE_DEPTH_BUFFER), unless Surface Type is SURFTYPE_1D or - * SURFTYPE_2D with Depth = 0 (non-array) and LOD = 0 (non-mip - * mapped)." - * - * From the Sandy Bridge PRM, volume 4 part 1, page 78: - * - * "The Width of a render target must be the same as the Width of - * the other render target(s) and the depth buffer (defined in - * 3DSTATE_DEPTH_BUFFER), unless Surface Type is SURFTYPE_1D or - * SURFTYPE_2D with Depth = 0 (non-array) and LOD = 0 (non-mip - * mapped)." - */ - if (surf->texture->width0 != first->texture->width0 || - surf->texture->height0 != first->texture->height0) { - if (has_3d_target || first->u.tex.level || first_depth > 1) { - fb->offset_to_layers = true; - break; - } - } + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT); + + return (likely(count)) ? header + body * count : 0; +} + +static int +gen6_estimate_state_size(const struct ilo_dev_info *dev, + enum ilo_gpe_gen6_state state, + int arg) +{ + static const struct { + int alignment; + int body; + bool is_array; + } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = { + [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true }, + [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true }, + [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true }, + [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true }, + [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false }, + [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true }, + [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false }, + [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true }, + [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true }, + [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false }, + [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true }, + [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false }, + [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true }, + }; + const int alignment = gen6_state_size_table[state].alignment; + const int body = gen6_state_size_table[state].body; + const bool is_array = gen6_state_size_table[state].is_array; + const int count = arg; + int estimate; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(state < ILO_GPE_GEN6_STATE_COUNT); + + if (likely(count)) { + if (is_array) { + estimate = (alignment - 1) + body * count; } + else { + estimate = (alignment - 1) + body; + /* all states are aligned */ + if (count > 1) + estimate += util_align_npot(body, alignment) * (count - 1); + } + } + else { + estimate = 0; } + + return estimate; +} + +static const struct ilo_gpe_gen6 gen6_gpe = { + .estimate_command_size = gen6_estimate_command_size, + .estimate_state_size = gen6_estimate_state_size, + +#define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name + GEN6_SET(STATE_BASE_ADDRESS), + GEN6_SET(STATE_SIP), + GEN6_SET(3DSTATE_VF_STATISTICS), + GEN6_SET(PIPELINE_SELECT), + GEN6_SET(MEDIA_VFE_STATE), + GEN6_SET(MEDIA_CURBE_LOAD), + GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD), + GEN6_SET(MEDIA_GATEWAY_STATE), + GEN6_SET(MEDIA_STATE_FLUSH), + GEN6_SET(MEDIA_OBJECT_WALKER), + GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS), + GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS), + GEN6_SET(3DSTATE_URB), + GEN6_SET(3DSTATE_VERTEX_BUFFERS), + GEN6_SET(3DSTATE_VERTEX_ELEMENTS), + GEN6_SET(3DSTATE_INDEX_BUFFER), + GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS), + GEN6_SET(3DSTATE_CC_STATE_POINTERS), + GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS), + GEN6_SET(3DSTATE_VS), + GEN6_SET(3DSTATE_GS), + GEN6_SET(3DSTATE_CLIP), + GEN6_SET(3DSTATE_SF), + GEN6_SET(3DSTATE_WM), + GEN6_SET(3DSTATE_CONSTANT_VS), + GEN6_SET(3DSTATE_CONSTANT_GS), + GEN6_SET(3DSTATE_CONSTANT_PS), + GEN6_SET(3DSTATE_SAMPLE_MASK), + GEN6_SET(3DSTATE_DRAWING_RECTANGLE), + GEN6_SET(3DSTATE_DEPTH_BUFFER), + GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET), + GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN), + GEN6_SET(3DSTATE_LINE_STIPPLE), + GEN6_SET(3DSTATE_AA_LINE_PARAMETERS), + GEN6_SET(3DSTATE_GS_SVB_INDEX), + GEN6_SET(3DSTATE_MULTISAMPLE), + GEN6_SET(3DSTATE_STENCIL_BUFFER), + GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER), + GEN6_SET(3DSTATE_CLEAR_PARAMS), + GEN6_SET(PIPE_CONTROL), + GEN6_SET(3DPRIMITIVE), + GEN6_SET(INTERFACE_DESCRIPTOR_DATA), + GEN6_SET(SF_VIEWPORT), + GEN6_SET(CLIP_VIEWPORT), + GEN6_SET(CC_VIEWPORT), + GEN6_SET(COLOR_CALC_STATE), + GEN6_SET(BLEND_STATE), + GEN6_SET(DEPTH_STENCIL_STATE), + GEN6_SET(SCISSOR_RECT), + GEN6_SET(BINDING_TABLE_STATE), + GEN6_SET(SURFACE_STATE), + GEN6_SET(so_SURFACE_STATE), + GEN6_SET(SAMPLER_STATE), + GEN6_SET(SAMPLER_BORDER_COLOR_STATE), + GEN6_SET(push_constant_buffer), +#undef GEN6_SET +}; + +const struct ilo_gpe_gen6 * +ilo_gpe_gen6_get(void) +{ + return &gen6_gpe; } diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h index e5647184f..7e24f97b8 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -28,2498 +28,533 @@ #ifndef ILO_GPE_GEN6_H #define ILO_GPE_GEN6_H -#include "genhw/genhw.h" -#include "intel_winsys.h" - #include "ilo_common.h" -#include "ilo_cp.h" -#include "ilo_format.h" -#include "ilo_resource.h" -#include "ilo_shader.h" #include "ilo_gpe.h" #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \ assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen)) -#define ILO_GPE_MI(op) (0x0 << 29 | (op) << 23) - #define ILO_GPE_CMD(pipeline, op, subop) \ (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16) /** - * Translate winsys tiling to hardware tiling. - */ -static inline int -ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling) -{ - switch (tiling) { - case INTEL_TILING_NONE: - return GEN6_TILING_NONE; - case INTEL_TILING_X: - return GEN6_TILING_X; - case INTEL_TILING_Y: - return GEN6_TILING_Y; - default: - assert(!"unknown tiling"); - return GEN6_TILING_NONE; - } -} - -/** - * Translate a pipe primitive type to the matching hardware primitive type. + * Commands that GEN6 GPE could emit. */ -static inline int -ilo_gpe_gen6_translate_pipe_prim(unsigned prim) -{ - static const int prim_mapping[PIPE_PRIM_MAX] = { - [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST, - [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST, - [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP, - [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP, - [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST, - [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP, - [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN, - [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST, - [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP, - [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON, - [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ, - [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ, - [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ, - [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ, - }; - - assert(prim_mapping[prim]); - - return prim_mapping[prim]; -} +enum ilo_gpe_gen6_command { + ILO_GPE_GEN6_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */ + ILO_GPE_GEN6_STATE_SIP, /* (0x0, 0x1, 0x02) */ + ILO_GPE_GEN6_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */ + ILO_GPE_GEN6_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */ + ILO_GPE_GEN6_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */ + ILO_GPE_GEN6_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */ + ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */ + ILO_GPE_GEN6_MEDIA_GATEWAY_STATE, /* (0x2, 0x0, 0x03) */ + ILO_GPE_GEN6_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */ + ILO_GPE_GEN6_MEDIA_OBJECT_WALKER, /* (0x2, 0x1, 0x03) */ + ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS, /* (0x3, 0x0, 0x01) */ + ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS, /* (0x3, 0x0, 0x02) */ + ILO_GPE_GEN6_3DSTATE_URB, /* (0x3, 0x0, 0x05) */ + ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */ + ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */ + ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */ + ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS, /* (0x3, 0x0, 0x0d) */ + ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */ + ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */ + ILO_GPE_GEN6_3DSTATE_VS, /* (0x3, 0x0, 0x10) */ + ILO_GPE_GEN6_3DSTATE_GS, /* (0x3, 0x0, 0x11) */ + ILO_GPE_GEN6_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */ + ILO_GPE_GEN6_3DSTATE_SF, /* (0x3, 0x0, 0x13) */ + ILO_GPE_GEN6_3DSTATE_WM, /* (0x3, 0x0, 0x14) */ + ILO_GPE_GEN6_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */ + ILO_GPE_GEN6_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */ + ILO_GPE_GEN6_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */ + ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */ + ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */ + ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x1, 0x05) */ + ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */ + ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */ + ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */ + ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */ + ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX, /* (0x3, 0x1, 0x0b) */ + ILO_GPE_GEN6_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */ + ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x1, 0x0e) */ + ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x1, 0x0f) */ + ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x1, 0x10) */ + ILO_GPE_GEN6_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */ + ILO_GPE_GEN6_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */ + + ILO_GPE_GEN6_COMMAND_COUNT, +}; /** - * Translate a pipe texture target to the matching hardware surface type. + * Indirect states that GEN6 GPE could emit. */ -static inline int -ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) -{ - switch (target) { - case PIPE_BUFFER: - return GEN6_SURFTYPE_BUFFER; - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_1D_ARRAY: - return GEN6_SURFTYPE_1D; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D_ARRAY: - return GEN6_SURFTYPE_2D; - case PIPE_TEXTURE_3D: - return GEN6_SURFTYPE_3D; - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - return GEN6_SURFTYPE_CUBE; - default: - assert(!"unknown texture target"); - return GEN6_SURFTYPE_BUFFER; - } -} +enum ilo_gpe_gen6_state { + ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA, + ILO_GPE_GEN6_SF_VIEWPORT, + ILO_GPE_GEN6_CLIP_VIEWPORT, + ILO_GPE_GEN6_CC_VIEWPORT, + ILO_GPE_GEN6_COLOR_CALC_STATE, + ILO_GPE_GEN6_BLEND_STATE, + ILO_GPE_GEN6_DEPTH_STENCIL_STATE, + ILO_GPE_GEN6_SCISSOR_RECT, + ILO_GPE_GEN6_BINDING_TABLE_STATE, + ILO_GPE_GEN6_SURFACE_STATE, + ILO_GPE_GEN6_SAMPLER_STATE, + ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE, + ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER, + + ILO_GPE_GEN6_STATE_COUNT, +}; + +enum intel_tiling_mode; + +struct intel_bo; +struct ilo_cp; +struct ilo_texture; +struct ilo_shader; + +typedef void +(*ilo_gpe_gen6_STATE_BASE_ADDRESS)(const struct ilo_dev_info *dev, + struct intel_bo *general_state_bo, + struct intel_bo *surface_state_bo, + struct intel_bo *dynamic_state_bo, + struct intel_bo *indirect_object_bo, + struct intel_bo *instruction_bo, + uint32_t general_state_size, + uint32_t dynamic_state_size, + uint32_t indirect_object_size, + uint32_t instruction_size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_STATE_SIP)(const struct ilo_dev_info *dev, + uint32_t sip, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_VF_STATISTICS)(const struct ilo_dev_info *dev, + bool enable, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_PIPELINE_SELECT)(const struct ilo_dev_info *dev, + int pipeline, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_MEDIA_VFE_STATE)(const struct ilo_dev_info *dev, + int max_threads, int num_urb_entries, + int urb_entry_size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_MEDIA_CURBE_LOAD)(const struct ilo_dev_info *dev, + uint32_t buf, int size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD)(const struct ilo_dev_info *dev, + uint32_t offset, int num_ids, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_MEDIA_GATEWAY_STATE)(const struct ilo_dev_info *dev, + int id, int byte, int thread_count, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_MEDIA_STATE_FLUSH)(const struct ilo_dev_info *dev, + int thread_count_water_mark, + int barrier_mask, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_MEDIA_OBJECT_WALKER)(const struct ilo_dev_info *dev, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_BINDING_TABLE_POINTERS)(const struct ilo_dev_info *dev, + uint32_t vs_binding_table, + uint32_t gs_binding_table, + uint32_t ps_binding_table, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_SAMPLER_STATE_POINTERS)(const struct ilo_dev_info *dev, + uint32_t vs_sampler_state, + uint32_t gs_sampler_state, + uint32_t ps_sampler_state, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_URB)(const struct ilo_dev_info *dev, + int vs_total_size, int gs_total_size, + int vs_entry_size, int gs_entry_size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS)(const struct ilo_dev_info *dev, + const struct pipe_vertex_buffer *vbuffers, + uint64_t vbuffer_mask, + const struct ilo_ve_state *ve, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS)(const struct ilo_dev_info *dev, + const struct ilo_ve_state *ve, + bool last_velement_edgeflag, + bool prepend_generated_ids, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_INDEX_BUFFER)(const struct ilo_dev_info *dev, + const struct ilo_ib_state *ib, + bool enable_cut_index, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_VIEWPORT_STATE_POINTERS)(const struct ilo_dev_info *dev, + uint32_t clip_viewport, + uint32_t sf_viewport, + uint32_t cc_viewport, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_CC_STATE_POINTERS)(const struct ilo_dev_info *dev, + uint32_t blend_state, + uint32_t depth_stencil_state, + uint32_t color_calc_state, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_SCISSOR_STATE_POINTERS)(const struct ilo_dev_info *dev, + uint32_t scissor_rect, + struct ilo_cp *cp); + + +typedef void +(*ilo_gpe_gen6_3DSTATE_VS)(const struct ilo_dev_info *dev, + const struct ilo_shader_state *vs, + int num_samplers, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + const struct ilo_shader_state *vs, + int verts_per_prim, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_CLIP)(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + bool enable_guardband, + int num_viewports, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_SF)(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + const struct ilo_shader_state *last_sh, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_WM)(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + int num_samplers, + const struct ilo_rasterizer_state *rasterizer, + bool dual_blend, bool cc_may_kill, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_CONSTANT_VS)(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_CONSTANT_GS)(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_CONSTANT_PS)(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_SAMPLE_MASK)(const struct ilo_dev_info *dev, + unsigned sample_mask, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_DRAWING_RECTANGLE)(const struct ilo_dev_info *dev, + unsigned x, unsigned y, + unsigned width, unsigned height, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_DEPTH_BUFFER)(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_OFFSET)(const struct ilo_dev_info *dev, + int x_offset, int y_offset, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_PATTERN)(const struct ilo_dev_info *dev, + const struct pipe_poly_stipple *pattern, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_LINE_STIPPLE)(const struct ilo_dev_info *dev, + unsigned pattern, unsigned factor, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_AA_LINE_PARAMETERS)(const struct ilo_dev_info *dev, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_GS_SVB_INDEX)(const struct ilo_dev_info *dev, + int index, unsigned svbi, + unsigned max_svbi, + bool load_vertex_count, + struct ilo_cp *cp); + + +typedef void +(*ilo_gpe_gen6_3DSTATE_MULTISAMPLE)(const struct ilo_dev_info *dev, + int num_samples, + const uint32_t *packed_sample_pos, + bool pixel_location_center, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_STENCIL_BUFFER)(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_HIER_DEPTH_BUFFER)(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DSTATE_CLEAR_PARAMS)(const struct ilo_dev_info *dev, + uint32_t clear_val, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_PIPE_CONTROL)(const struct ilo_dev_info *dev, + uint32_t dw1, + struct intel_bo *bo, uint32_t bo_offset, + bool write_qword, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen6_3DPRIMITIVE)(const struct ilo_dev_info *dev, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA)(const struct ilo_dev_info *dev, + const struct ilo_shader_state **cs, + uint32_t *sampler_state, + int *num_samplers, + uint32_t *binding_table_state, + int *num_surfaces, + int num_ids, + struct ilo_cp *cp); +typedef uint32_t +(*ilo_gpe_gen6_SF_VIEWPORT)(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_CLIP_VIEWPORT)(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_CC_VIEWPORT)(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_COLOR_CALC_STATE)(const struct ilo_dev_info *dev, + const struct pipe_stencil_ref *stencil_ref, + float alpha_ref, + const struct pipe_blend_color *blend_color, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_BLEND_STATE)(const struct ilo_dev_info *dev, + const struct ilo_blend_state *blend, + const struct ilo_fb_state *fb, + const struct pipe_alpha_state *alpha, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_DEPTH_STENCIL_STATE)(const struct ilo_dev_info *dev, + const struct ilo_dsa_state *dsa, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_SCISSOR_RECT)(const struct ilo_dev_info *dev, + const struct ilo_scissor_state *scissor, + unsigned num_viewports, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_BINDING_TABLE_STATE)(const struct ilo_dev_info *dev, + uint32_t *surface_states, + int num_surface_states, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_SURFACE_STATE)(const struct ilo_dev_info *dev, + const struct ilo_view_surface *surface, + bool for_render, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_so_SURFACE_STATE)(const struct ilo_dev_info *dev, + const struct pipe_stream_output_target *so, + const struct pipe_stream_output_info *so_info, + int so_index, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_SAMPLER_STATE)(const struct ilo_dev_info *dev, + const struct ilo_sampler_cso * const *samplers, + const struct pipe_sampler_view * const *views, + const uint32_t *sampler_border_colors, + int num_samplers, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE)(const struct ilo_dev_info *dev, + const struct ilo_sampler_cso *sampler, + struct ilo_cp *cp); + +typedef uint32_t +(*ilo_gpe_gen6_push_constant_buffer)(const struct ilo_dev_info *dev, + int size, void **pcb, + struct ilo_cp *cp); /** - * Fill in DW2 to DW7 of 3DSTATE_SF. + * GEN6 graphics processing engine + * + * This is a low-level interface. It does not handle the interdependencies + * between states. */ -static inline void +struct ilo_gpe_gen6 { + int (*estimate_command_size)(const struct ilo_dev_info *dev, + enum ilo_gpe_gen6_command cmd, + int arg); + + int (*estimate_state_size)(const struct ilo_dev_info *dev, + enum ilo_gpe_gen6_state state, + int arg); + +#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name emit_ ## name + GEN6_EMIT(STATE_BASE_ADDRESS); + GEN6_EMIT(STATE_SIP); + GEN6_EMIT(3DSTATE_VF_STATISTICS); + GEN6_EMIT(PIPELINE_SELECT); + GEN6_EMIT(MEDIA_VFE_STATE); + GEN6_EMIT(MEDIA_CURBE_LOAD); + GEN6_EMIT(MEDIA_INTERFACE_DESCRIPTOR_LOAD); + GEN6_EMIT(MEDIA_GATEWAY_STATE); + GEN6_EMIT(MEDIA_STATE_FLUSH); + GEN6_EMIT(MEDIA_OBJECT_WALKER); + GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS); + GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS); + GEN6_EMIT(3DSTATE_URB); + GEN6_EMIT(3DSTATE_VERTEX_BUFFERS); + GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS); + GEN6_EMIT(3DSTATE_INDEX_BUFFER); + GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS); + GEN6_EMIT(3DSTATE_CC_STATE_POINTERS); + GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS); + GEN6_EMIT(3DSTATE_VS); + GEN6_EMIT(3DSTATE_GS); + GEN6_EMIT(3DSTATE_CLIP); + GEN6_EMIT(3DSTATE_SF); + GEN6_EMIT(3DSTATE_WM); + GEN6_EMIT(3DSTATE_CONSTANT_VS); + GEN6_EMIT(3DSTATE_CONSTANT_GS); + GEN6_EMIT(3DSTATE_CONSTANT_PS); + GEN6_EMIT(3DSTATE_SAMPLE_MASK); + GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE); + GEN6_EMIT(3DSTATE_DEPTH_BUFFER); + GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET); + GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN); + GEN6_EMIT(3DSTATE_LINE_STIPPLE); + GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS); + GEN6_EMIT(3DSTATE_GS_SVB_INDEX); + GEN6_EMIT(3DSTATE_MULTISAMPLE); + GEN6_EMIT(3DSTATE_STENCIL_BUFFER); + GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER); + GEN6_EMIT(3DSTATE_CLEAR_PARAMS); + GEN6_EMIT(PIPE_CONTROL); + GEN6_EMIT(3DPRIMITIVE); + GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA); + GEN6_EMIT(SF_VIEWPORT); + GEN6_EMIT(CLIP_VIEWPORT); + GEN6_EMIT(CC_VIEWPORT); + GEN6_EMIT(COLOR_CALC_STATE); + GEN6_EMIT(BLEND_STATE); + GEN6_EMIT(DEPTH_STENCIL_STATE); + GEN6_EMIT(SCISSOR_RECT); + GEN6_EMIT(BINDING_TABLE_STATE); + GEN6_EMIT(SURFACE_STATE); + GEN6_EMIT(so_SURFACE_STATE); + GEN6_EMIT(SAMPLER_STATE); + GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE); + GEN6_EMIT(push_constant_buffer); +#undef GEN6_EMIT +}; + +const struct ilo_gpe_gen6 * +ilo_gpe_gen6_get(void); + +/* Below are helpers for other GENs */ + +int +ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling); + +int +ilo_gpe_gen6_translate_pipe_prim(unsigned prim); + +int +ilo_gpe_gen6_translate_texture(enum pipe_texture_target target); + +void ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, const struct ilo_rasterizer_state *rasterizer, int num_samples, enum pipe_format depth_format, - uint32_t *payload, unsigned payload_len) -{ - assert(payload_len == Elements(rasterizer->sf.payload)); - - if (rasterizer) { - const struct ilo_rasterizer_sf *sf = &rasterizer->sf; - - memcpy(payload, sf->payload, sizeof(sf->payload)); - if (num_samples > 1) - payload[1] |= sf->dw_msaa; - } - else { - payload[0] = 0; - payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0; - payload[2] = 0; - payload[3] = 0; - payload[4] = 0; - payload[5] = 0; - } - - if (dev->gen >= ILO_GEN(7)) { - int format; - - /* separate stencil */ - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: - format = GEN6_ZFORMAT_D16_UNORM; - break; - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - format = GEN6_ZFORMAT_D32_FLOAT; - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; - break; - default: - /* FLOAT surface is assumed when there is no depth buffer */ - format = GEN6_ZFORMAT_D32_FLOAT; - break; - } - - payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; - } -} + uint32_t *payload, unsigned payload_len); -/** - * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. - */ -static inline void +void ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, const struct ilo_rasterizer_state *rasterizer, const struct ilo_shader_state *fs, - uint32_t *dw, int num_dwords) -{ - int output_count, vue_offset, vue_len; - const struct ilo_kernel_routing *routing; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - assert(num_dwords == 13); - - if (!fs) { - memset(dw, 0, sizeof(dw[0]) * num_dwords); - dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; - return; - } - - output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); - assert(output_count <= 32); - - routing = ilo_shader_get_kernel_routing(fs); - - vue_offset = routing->source_skip; - assert(vue_offset % 2 == 0); - vue_offset /= 2; - - vue_len = (routing->source_len + 1) / 2; - if (!vue_len) - vue_len = 1; - - dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | - vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT | - vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; - if (routing->swizzle_enable) - dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; - - switch (rasterizer->state.sprite_coord_mode) { - case PIPE_SPRITE_COORD_UPPER_LEFT: - dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; - break; - case PIPE_SPRITE_COORD_LOWER_LEFT: - dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT; - break; - } - - STATIC_ASSERT(Elements(routing->swizzles) >= 16); - memcpy(&dw[1], routing->swizzles, 2 * 16); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 268: - * - * "This field (Point Sprite Texture Coordinate Enable) must be - * programmed to 0 when non-point primitives are rendered." - * - * TODO We do not check that yet. - */ - dw[9] = routing->point_sprite_enable; - - dw[10] = routing->const_interp_enable; - - /* WrapShortest enables */ - dw[11] = 0; - dw[12] = 0; -} - -static inline void -gen6_emit_MI_STORE_DATA_IMM(const struct ilo_dev_info *dev, - struct intel_bo *bo, uint32_t bo_offset, - uint64_t val, bool store_qword, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_MI(0x20); - const uint8_t cmd_len = (store_qword) ? 5 : 4; - /* must use GGTT on GEN6 as in PIPE_CONTROL */ - const uint32_t cmd_flags = (dev->gen == ILO_GEN(6)) ? (1 << 22) : 0; - const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; - const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - assert(bo_offset % ((store_qword) ? 8 : 4) == 0); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | cmd_flags | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain); - ilo_cp_write(cp, (uint32_t) val); - - if (store_qword) - ilo_cp_write(cp, (uint32_t) (val >> 32)); - else - assert(val == (uint64_t) ((uint32_t) val)); - - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MI_LOAD_REGISTER_IMM(const struct ilo_dev_info *dev, - uint32_t reg, uint32_t val, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_MI(0x22); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - assert(reg % 4 == 0); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, reg); - ilo_cp_write(cp, val); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MI_STORE_REGISTER_MEM(const struct ilo_dev_info *dev, - struct intel_bo *bo, uint32_t bo_offset, - uint32_t reg, struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_MI(0x24); - const uint8_t cmd_len = 3; - /* must use GGTT on GEN6 as in PIPE_CONTROL */ - const uint32_t cmd_flags = (dev->gen == ILO_GEN(6)) ? (1 << 22) : 0; - const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; - const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - assert(reg % 4 == 0 && bo_offset % 4 == 0); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | cmd_flags | (cmd_len - 2)); - ilo_cp_write(cp, reg); - ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MI_REPORT_PERF_COUNT(const struct ilo_dev_info *dev, - struct intel_bo *bo, uint32_t bo_offset, - uint32_t report_id, struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_MI(0x28); - const uint8_t cmd_len = 3; - const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; - const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - assert(bo_offset % 64 == 0); - - /* must use GGTT on GEN6 as in PIPE_CONTROL */ - if (dev->gen == ILO_GEN(6)) - bo_offset |= 0x1; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain); - ilo_cp_write(cp, report_id); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev, - struct intel_bo *general_state_bo, - struct intel_bo *surface_state_bo, - struct intel_bo *dynamic_state_bo, - struct intel_bo *indirect_object_bo, - struct intel_bo *instruction_bo, - uint32_t general_state_size, - uint32_t dynamic_state_size, - uint32_t indirect_object_size, - uint32_t instruction_size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01); - const uint8_t cmd_len = 10; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* 4K-page aligned */ - assert(((general_state_size | dynamic_state_size | - indirect_object_size | instruction_size) & 0xfff) == 0); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - - ilo_cp_write_bo(cp, 1, general_state_bo, - INTEL_DOMAIN_RENDER, - 0); - ilo_cp_write_bo(cp, 1, surface_state_bo, - INTEL_DOMAIN_SAMPLER, - 0); - ilo_cp_write_bo(cp, 1, dynamic_state_bo, - INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, - 0); - ilo_cp_write_bo(cp, 1, indirect_object_bo, - 0, - 0); - ilo_cp_write_bo(cp, 1, instruction_bo, - INTEL_DOMAIN_INSTRUCTION, - 0); - - if (general_state_size) { - ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo, - INTEL_DOMAIN_RENDER, - 0); - } - else { - /* skip range check */ - ilo_cp_write(cp, 1); - } - - if (dynamic_state_size) { - ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo, - INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, - 0); - } - else { - /* skip range check */ - ilo_cp_write(cp, 0xfffff000 + 1); - } - - if (indirect_object_size) { - ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo, - 0, - 0); - } - else { - /* skip range check */ - ilo_cp_write(cp, 0xfffff000 + 1); - } - - if (instruction_size) { - ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo, - INTEL_DOMAIN_INSTRUCTION, - 0); - } - else { - /* skip range check */ - ilo_cp_write(cp, 1); - } - - ilo_cp_end(cp); -} - -static inline void -gen6_emit_STATE_SIP(const struct ilo_dev_info *dev, - uint32_t sip, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, sip); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev, - bool enable, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b); - const uint8_t cmd_len = 1; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | enable); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev, - int pipeline, - struct ilo_cp *cp) -{ - const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04); - const uint8_t cmd_len = 1; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* 3D or media */ - assert(pipeline == 0x0 || pipeline == 0x1); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | pipeline); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev, - int max_threads, int num_urb_entries, - int urb_entry_size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00); - const uint8_t cmd_len = 8; - uint32_t dw2, dw4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - dw2 = (max_threads - 1) << 16 | - num_urb_entries << 8 | - 1 << 7 | /* Reset Gateway Timer */ - 1 << 6; /* Bypass Gateway Control */ - - dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */ - 480; /* CURBE Allocation Size */ - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* MBZ */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, 0); /* scoreboard */ - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev, - uint32_t buf, int size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - assert(buf % 32 == 0); - /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */ - size = align(size, 32); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); /* MBZ */ - ilo_cp_write(cp, size); - ilo_cp_write(cp, buf); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev, - uint32_t offset, int num_ids, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - assert(offset % 32 == 0); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); /* MBZ */ - /* every ID has 8 DWords */ - ilo_cp_write(cp, num_ids * 8 * 4); - ilo_cp_write(cp, offset); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev, - int id, int byte, int thread_count, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03); - const uint8_t cmd_len = 2; - uint32_t dw1; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - dw1 = id << 16 | - byte << 8 | - thread_count; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev, - int thread_count_water_mark, - int barrier_mask, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04); - const uint8_t cmd_len = 2; - uint32_t dw1; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - dw1 = thread_count_water_mark << 16 | - barrier_mask; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev, - struct ilo_cp *cp) -{ - assert(!"MEDIA_OBJECT_WALKER unsupported"); -} - -static inline void -gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev, - uint32_t vs_binding_table, - uint32_t gs_binding_table, - uint32_t ps_binding_table, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED | - GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED | - GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED); - ilo_cp_write(cp, vs_binding_table); - ilo_cp_write(cp, gs_binding_table); - ilo_cp_write(cp, ps_binding_table); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t vs_sampler_state, - uint32_t gs_sampler_state, - uint32_t ps_sampler_state, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - GEN6_PTR_SAMPLER_DW0_VS_CHANGED | - GEN6_PTR_SAMPLER_DW0_GS_CHANGED | - GEN6_PTR_SAMPLER_DW0_PS_CHANGED); - ilo_cp_write(cp, vs_sampler_state); - ilo_cp_write(cp, gs_sampler_state); - ilo_cp_write(cp, ps_sampler_state); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev, - int vs_total_size, int gs_total_size, - int vs_entry_size, int gs_entry_size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05); - const uint8_t cmd_len = 3; - const int row_size = 128; /* 1024 bits */ - int vs_alloc_size, gs_alloc_size; - int vs_num_entries, gs_num_entries; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - /* in 1024-bit URB rows */ - vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; - gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; - - /* the valid range is [1, 5] */ - if (!vs_alloc_size) - vs_alloc_size = 1; - if (!gs_alloc_size) - gs_alloc_size = 1; - assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); - - /* the valid range is [24, 256] in multiples of 4 */ - vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; - if (vs_num_entries > 256) - vs_num_entries = 256; - assert(vs_num_entries >= 24); - - /* the valid range is [0, 256] in multiples of 4 */ - gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; - if (gs_num_entries > 256) - gs_num_entries = 256; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | - vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT); - ilo_cp_write(cp, gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | - (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, - const struct ilo_ve_state *ve, - const struct ilo_vb_state *vb, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08); - uint8_t cmd_len; - unsigned hw_idx; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 82: - * - * "From 1 to 33 VBs can be specified..." - */ - assert(ve->vb_count <= 33); - - if (!ve->vb_count) - return; - - cmd_len = 1 + 4 * ve->vb_count; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - - for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { - const unsigned instance_divisor = ve->instance_divisors[hw_idx]; - const unsigned pipe_idx = ve->vb_mapping[hw_idx]; - const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; - uint32_t dw; - - dw = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT; - - if (instance_divisor) - dw |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA; - else - dw |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA; - - if (dev->gen >= ILO_GEN(7)) - dw |= GEN7_VB_STATE_DW0_ADDR_MODIFIED; - - /* use null vb if there is no buffer or the stride is out of range */ - if (cso->buffer && cso->stride <= 2048) { - const struct ilo_buffer *buf = ilo_buffer(cso->buffer); - const uint32_t start_offset = cso->buffer_offset; - const uint32_t end_offset = buf->bo_size - 1; - - dw |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT; - - ilo_cp_write(cp, dw); - ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); - ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); - ilo_cp_write(cp, instance_divisor); - } - else { - dw |= 1 << 13; - - ilo_cp_write(cp, dw); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, instance_divisor); - } - } - - ilo_cp_end(cp); -} - -static inline void -ve_init_cso_with_components(const struct ilo_dev_info *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso) -{ - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - cso->payload[0] = GEN6_VE_STATE_DW0_VALID; - cso->payload[1] = - comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | - comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | - comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | - comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; -} - -static inline void -ve_set_cso_edgeflag(const struct ilo_dev_info *dev, - struct ilo_ve_cso *cso) -{ - int format; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 94: - * - * "- This bit (Edge Flag Enable) must only be ENABLED on the last - * valid VERTEX_ELEMENT structure. - * - * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, - * and Component 1-3 Control must be set to VFCOMP_NOSTORE. - * - * - The Source Element Format must be set to the UINT format. - * - * - [DevSNB]: Edge Flags are not supported for QUADLIST - * primitives. Software may elect to convert QUADLIST primitives - * to some set of corresponding edge-flag-supported primitive - * types (e.g., POLYGONs) prior to submission to the 3D pipeline." - */ - - cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; - cso->payload[1] = - GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; - - /* - * Edge flags have format GEN6_FORMAT_R8_UINT when defined via - * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined - * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. - * - * Since all the hardware cares about is whether the flags are zero or not, - * we can treat them as GEN6_FORMAT_R32_UINT in the latter case. - */ - format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff; - if (format == GEN6_FORMAT_R32_FLOAT) { - STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1); - cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT); - } - else { - assert(format == GEN6_FORMAT_R8_UINT); - } -} - -static inline void -gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev, - const struct ilo_ve_state *ve, - bool last_velement_edgeflag, - bool prepend_generated_ids, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09); - uint8_t cmd_len; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 93: - * - * "Up to 34 (DevSNB+) vertex elements are supported." - */ - assert(ve->count + prepend_generated_ids <= 34); - - if (!ve->count && !prepend_generated_ids) { - struct ilo_ve_cso dummy; - - ve_init_cso_with_components(dev, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_1_FP, - &dummy); - - cmd_len = 3; - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_multi(cp, dummy.payload, 2); - ilo_cp_end(cp); - - return; - } - - cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - - if (prepend_generated_ids) { - struct ilo_ve_cso gen_ids; - - ve_init_cso_with_components(dev, - GEN6_VFCOMP_STORE_VID, - GEN6_VFCOMP_STORE_IID, - GEN6_VFCOMP_NOSTORE, - GEN6_VFCOMP_NOSTORE, - &gen_ids); - - ilo_cp_write_multi(cp, gen_ids.payload, 2); - } - - if (last_velement_edgeflag) { - struct ilo_ve_cso edgeflag; - - for (i = 0; i < ve->count - 1; i++) - ilo_cp_write_multi(cp, ve->cso[i].payload, 2); - - edgeflag = ve->cso[i]; - ve_set_cso_edgeflag(dev, &edgeflag); - ilo_cp_write_multi(cp, edgeflag.payload, 2); - } - else { - for (i = 0; i < ve->count; i++) - ilo_cp_write_multi(cp, ve->cso[i].payload, 2); - } - - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev, - const struct ilo_ib_state *ib, - bool enable_cut_index, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a); - const uint8_t cmd_len = 3; - struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); - uint32_t start_offset, end_offset; - int format; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - if (!buf) - return; - - /* this is moved to the new 3DSTATE_VF */ - if (dev->gen >= ILO_GEN(7.5)) - assert(!enable_cut_index); - - switch (ib->hw_index_size) { - case 4: - format = GEN6_IB_DW0_FORMAT_DWORD; - break; - case 2: - format = GEN6_IB_DW0_FORMAT_WORD; - break; - case 1: - format = GEN6_IB_DW0_FORMAT_BYTE; - break; - default: - assert(!"unknown index size"); - format = GEN6_IB_DW0_FORMAT_BYTE; - break; - } - - /* - * set start_offset to 0 here and adjust pipe_draw_info::start with - * ib->draw_start_offset in 3DPRIMITIVE - */ - start_offset = 0; - end_offset = buf->bo_size; - - /* end_offset must also be aligned and is inclusive */ - end_offset -= (end_offset % ib->hw_index_size); - end_offset--; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - ((enable_cut_index) ? GEN6_IB_DW0_CUT_INDEX_ENABLE : 0) | - format); - ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); - ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t clip_viewport, - uint32_t sf_viewport, - uint32_t cc_viewport, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - GEN6_PTR_VP_DW0_CLIP_CHANGED | - GEN6_PTR_VP_DW0_SF_CHANGED | - GEN6_PTR_VP_DW0_CC_CHANGED); - ilo_cp_write(cp, clip_viewport); - ilo_cp_write(cp, sf_viewport); - ilo_cp_write(cp, cc_viewport); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t blend_state, - uint32_t depth_stencil_state, - uint32_t color_calc_state, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, blend_state | 1); - ilo_cp_write(cp, depth_stencil_state | 1); - ilo_cp_write(cp, color_calc_state | 1); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t scissor_rect, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, scissor_rect); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *vs, - int num_samplers, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10); - const uint8_t cmd_len = 6; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - if (!vs) { - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - return; - } - - cso = ilo_shader_get_kernel_cso(vs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs)); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *gs, - const struct ilo_shader_state *vs, - int verts_per_prim, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); - const uint8_t cmd_len = 7; - uint32_t dw1, dw2, dw4, dw5, dw6; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - if (gs) { - const struct ilo_shader_cso *cso; - - dw1 = ilo_shader_get_kernel_offset(gs); - - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - dw6 = cso->payload[3]; - } - else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { - struct ilo_shader_cso cso; - enum ilo_kernel_param param; - - switch (verts_per_prim) { - case 1: - param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; - break; - case 2: - param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; - break; - default: - param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; - break; - } - - dw1 = ilo_shader_get_kernel_offset(vs) + - ilo_shader_get_kernel_param(vs, param); - - /* cannot use VS's CSO */ - ilo_gpe_init_gs_cso_gen6(dev, vs, &cso); - dw2 = cso.payload[0]; - dw4 = cso.payload[1]; - dw5 = cso.payload[2]; - dw6 = cso.payload[3]; - } - else { - dw1 = 0; - dw2 = 0; - dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT; - dw5 = GEN6_GS_DW5_STATISTICS; - dw6 = 0; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_write(cp, dw6); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - bool enable_guardband, - int num_viewports, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12); - const uint8_t cmd_len = 4; - uint32_t dw1, dw2, dw3; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - if (rasterizer) { - int interps; - - dw1 = rasterizer->clip.payload[0]; - dw2 = rasterizer->clip.payload[1]; - dw3 = rasterizer->clip.payload[2]; - - if (enable_guardband && rasterizer->clip.can_enable_guardband) - dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE; - - interps = (fs) ? ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; - - if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | - GEN6_INTERP_NONPERSPECTIVE_CENTROID | - GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) - dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; - - dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO | - (num_viewports - 1); - } - else { - dw1 = 0; - dw2 = 0; - dw3 = 0; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, dw3); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); - const uint8_t cmd_len = 20; - uint32_t payload_raster[6], payload_sbe[13]; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer, - 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); - ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, - fs, payload_sbe, Elements(payload_sbe)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, payload_sbe[0]); - ilo_cp_write_multi(cp, payload_raster, 6); - ilo_cp_write_multi(cp, &payload_sbe[1], 12); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev, - const struct ilo_shader_state *fs, - int num_samplers, - const struct ilo_rasterizer_state *rasterizer, - bool dual_blend, bool cc_may_kill, - uint32_t hiz_op, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); - const uint8_t cmd_len = 9; - const int num_samples = 1; - const struct ilo_shader_cso *fs_cso; - uint32_t dw2, dw4, dw5, dw6; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - if (!fs) { - /* see brwCreateContext() */ - const int max_threads = (dev->gt == 2) ? 80 : 40; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, hiz_op); - /* honor the valid range even if dispatching is disabled */ - ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - - return; - } - - fs_cso = ilo_shader_get_kernel_cso(fs); - dw2 = fs_cso->payload[0]; - dw4 = fs_cso->payload[1]; - dw5 = fs_cso->payload[2]; - dw6 = fs_cso->payload[3]; - - dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "This bit (Statistics Enable) must be disabled if either of these - * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve - * Enable or Depth Buffer Resolve Enable." - */ - assert(!hiz_op); - dw4 |= GEN6_WM_DW4_STATISTICS; - - if (cc_may_kill) - dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE; - - if (dual_blend) - dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND; - - dw5 |= rasterizer->wm.payload[0]; - - dw6 |= rasterizer->wm.payload[1]; - - if (num_samples > 1) { - dw6 |= rasterizer->wm.dw_msaa_rast | - rasterizer->wm.dw_msaa_disp; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_write(cp, dw6); - ilo_cp_write(cp, 0); /* kernel 1 */ - ilo_cp_write(cp, 0); /* kernel 2 */ - ilo_cp_end(cp); -} - -static inline unsigned -gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, int max_read_length, - uint32_t *dw, int num_dwords) -{ - unsigned enabled = 0x0; - int total_read_length, i; - - assert(num_dwords == 4); - - total_read_length = 0; - for (i = 0; i < 4; i++) { - if (i < num_bufs && sizes[i]) { - /* in 256-bit units minus one */ - const int read_len = (sizes[i] + 31) / 32 - 1; - - assert(bufs[i] % 32 == 0); - assert(read_len < 32); - - enabled |= 1 << i; - dw[i] = bufs[i] | read_len; - - total_read_length += read_len + 1; - } - else { - dw[i] = 0; - } - } - - assert(total_read_length <= max_read_length); - - return enabled; -} - -static inline void -gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15); - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - - ILO_GPE_VALID_GEN(dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 138: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 32" - */ - buf_enabled = gen6_fill_3dstate_constant(dev, - bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); - ilo_cp_write(cp, buf_dw[0]); - ilo_cp_write(cp, buf_dw[1]); - ilo_cp_write(cp, buf_dw[2]); - ilo_cp_write(cp, buf_dw[3]); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16); - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - - ILO_GPE_VALID_GEN(dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 161: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 64" - */ - buf_enabled = gen6_fill_3dstate_constant(dev, - bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); - ilo_cp_write(cp, buf_dw[0]); - ilo_cp_write(cp, buf_dw[1]); - ilo_cp_write(cp, buf_dw[2]); - ilo_cp_write(cp, buf_dw[3]); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17); - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - - ILO_GPE_VALID_GEN(dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 287: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 64" - */ - buf_enabled = gen6_fill_3dstate_constant(dev, - bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); - ilo_cp_write(cp, buf_dw[0]); - ilo_cp_write(cp, buf_dw[1]); - ilo_cp_write(cp, buf_dw[2]); - ilo_cp_write(cp, buf_dw[3]); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, - unsigned sample_mask, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); - const uint8_t cmd_len = 2; - const unsigned valid_mask = 0xf; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - sample_mask &= valid_mask; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, sample_mask); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev, - unsigned x, unsigned y, - unsigned width, unsigned height, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00); - const uint8_t cmd_len = 4; - unsigned xmax = x + width - 1; - unsigned ymax = y + height - 1; - int rect_limit; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - if (dev->gen >= ILO_GEN(7)) { - rect_limit = 16383; - } - else { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 230: - * - * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) - * must be an even number" - */ - assert(y % 2 == 0); - - rect_limit = 8191; - } - - if (x > rect_limit) x = rect_limit; - if (y > rect_limit) y = rect_limit; - if (xmax > rect_limit) xmax = rect_limit; - if (ymax > rect_limit) ymax = rect_limit; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, y << 16 | x); - ilo_cp_write(cp, ymax << 16 | xmax); - - /* - * There is no need to set the origin. It is intended to support front - * buffer rendering. - */ - ilo_cp_write(cp, 0); - - ilo_cp_end(cp); -} - -static inline void -zs_align_surface(const struct ilo_dev_info *dev, - unsigned align_w, unsigned align_h, - struct ilo_zs_surface *zs) -{ - unsigned mask, shift_w, shift_h; - unsigned width, height; - uint32_t dw3; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - if (dev->gen >= ILO_GEN(7)) { - shift_w = 4; - shift_h = 18; - mask = 0x3fff; - } - else { - shift_w = 6; - shift_h = 19; - mask = 0x1fff; - } - - dw3 = zs->payload[2]; - - /* aligned width and height */ - width = align(((dw3 >> shift_w) & mask) + 1, align_w); - height = align(((dw3 >> shift_h) & mask) + 1, align_h); - - dw3 = (dw3 & ~((mask << shift_w) | (mask << shift_h))) | - (width - 1) << shift_w | - (height - 1) << shift_h; - - zs->payload[2] = dw3; -} - -static inline void -gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev, - const struct ilo_zs_surface *zs, - struct ilo_cp *cp) -{ - const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? - ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05); - const uint8_t cmd_len = 7; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, zs->payload[0]); - ilo_cp_write_bo(cp, zs->payload[1], zs->bo, - INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_write(cp, zs->payload[2]); - ilo_cp_write(cp, zs->payload[3]); - ilo_cp_write(cp, zs->payload[4]); - ilo_cp_write(cp, zs->payload[5]); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev, - int x_offset, int y_offset, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - assert(x_offset >= 0 && x_offset <= 31); - assert(y_offset >= 0 && y_offset <= 31); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, x_offset << 8 | y_offset); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev, - const struct pipe_poly_stipple *pattern, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07); - const uint8_t cmd_len = 33; - int i; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - assert(Elements(pattern->stipple) == 32); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - for (i = 0; i < 32; i++) - ilo_cp_write(cp, pattern->stipple[i]); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev, - unsigned pattern, unsigned factor, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08); - const uint8_t cmd_len = 3; - unsigned inverse; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - assert((pattern & 0xffff) == pattern); - assert(factor >= 1 && factor <= 256); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, pattern); - - if (dev->gen >= ILO_GEN(7)) { - /* in U1.16 */ - inverse = (unsigned) (65536.0f / factor); - ilo_cp_write(cp, inverse << 15 | factor); - } - else { - /* in U1.13 */ - inverse = (unsigned) (8192.0f / factor); - ilo_cp_write(cp, inverse << 16 | factor); - } - - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0 << 16 | 0); - ilo_cp_write(cp, 0 << 16 | 0); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev, - int index, unsigned svbi, - unsigned max_svbi, - bool load_vertex_count, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b); - const uint8_t cmd_len = 4; - uint32_t dw1; - - ILO_GPE_VALID_GEN(dev, 6, 6); - assert(index >= 0 && index < 4); - - dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT; - if (load_vertex_count) - dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, svbi); - ilo_cp_write(cp, max_svbi); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev, - int num_samples, - const uint32_t *packed_sample_pos, - bool pixel_location_center, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d); - const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3; - uint32_t dw1, dw2, dw3; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - dw1 = (pixel_location_center) ? - GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; - - switch (num_samples) { - case 0: - case 1: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; - dw2 = 0; - dw3 = 0; - break; - case 4: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; - dw2 = packed_sample_pos[0]; - dw3 = 0; - break; - case 8: - assert(dev->gen >= ILO_GEN(7)); - dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; - dw2 = packed_sample_pos[0]; - dw3 = packed_sample_pos[1]; - break; - default: - assert(!"unsupported sample count"); - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; - dw2 = 0; - dw3 = 0; - break; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - if (dev->gen >= ILO_GEN(7)) - ilo_cp_write(cp, dw3); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev, - const struct ilo_zs_surface *zs, - struct ilo_cp *cp) -{ - const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? - ILO_GPE_CMD(0x3, 0x0, 0x06) : - ILO_GPE_CMD(0x3, 0x1, 0x0e); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - /* see ilo_gpe_init_zs_surface() */ - ilo_cp_write(cp, zs->payload[6]); - ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo, - INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev, - const struct ilo_zs_surface *zs, - struct ilo_cp *cp) -{ - const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? - ILO_GPE_CMD(0x3, 0x0, 0x07) : - ILO_GPE_CMD(0x3, 0x1, 0x0f); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - /* see ilo_gpe_init_zs_surface() */ - ilo_cp_write(cp, zs->payload[8]); - ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo, - INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, - uint32_t clear_val, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - GEN6_CLEAR_PARAMS_DW0_VALID); - ilo_cp_write(cp, clear_val); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev, - uint32_t dw1, - struct intel_bo *bo, uint32_t bo_offset, - bool write_qword, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00); - const uint8_t cmd_len = (write_qword) ? 5 : 4; - const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; - const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - assert(bo_offset % ((write_qword) ? 8 : 4) == 0); - - if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 73: - * - * "1 of the following must also be set (when CS stall is set): - * - * * Depth Cache Flush Enable ([0] of DW1) - * * Stall at Pixel Scoreboard ([1] of DW1) - * * Depth Stall ([13] of DW1) - * * Post-Sync Operation ([13] of DW1) - * * Render Target Cache Flush Enable ([12] of DW1) - * * Notify Enable ([8] of DW1)" - * - * From the Ivy Bridge PRM, volume 2 part 1, page 61: - * - * "One of the following must also be set (when CS stall is set): - * - * * Render Target Cache Flush Enable ([12] of DW1) - * * Depth Cache Flush Enable ([0] of DW1) - * * Stall at Pixel Scoreboard ([1] of DW1) - * * Depth Stall ([13] of DW1) - * * Post-Sync Operation ([13] of DW1)" - */ - uint32_t bit_test = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH | - GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH | - GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL | - GEN6_PIPE_CONTROL_DEPTH_STALL; - - /* post-sync op */ - bit_test |= GEN6_PIPE_CONTROL_WRITE_IMM | - GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT | - GEN6_PIPE_CONTROL_WRITE_TIMESTAMP; - - if (dev->gen == ILO_GEN(6)) - bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE; - - assert(dw1 & bit_test); - } - - if (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 73: - * - * "Following bits must be clear (when Depth Stall is set): - * - * * Render Target Cache Flush Enable ([12] of DW1) - * * Depth Cache Flush Enable ([0] of DW1)" - */ - assert(!(dw1 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH | - GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH))); - } - - /* - * From the Sandy Bridge PRM, volume 1 part 3, page 19: - * - * "[DevSNB] PPGTT memory writes by MI_* (such as MI_STORE_DATA_IMM) - * and PIPE_CONTROL are not supported." - * - * The kernel will add the mapping automatically (when write domain is - * INTEL_DOMAIN_INSTRUCTION). - */ - if (dev->gen == ILO_GEN(6) && bo) - bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain); - ilo_cp_write(cp, 0); - if (write_qword) - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static inline void -gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib, - bool rectlist, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); - const uint8_t cmd_len = 6; - const int prim = (rectlist) ? - GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - prim << GEN6_3DPRIM_DW0_TYPE__SHIFT | - vb_access); - ilo_cp_write(cp, info->count); - ilo_cp_write(cp, vb_start); - ilo_cp_write(cp, info->instance_count); - ilo_cp_write(cp, info->start_instance); - ilo_cp_write(cp, info->index_bias); - ilo_cp_end(cp); -} - -static inline uint32_t -gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev, - const struct ilo_shader_state **cs, - uint32_t *sampler_state, - int *num_samplers, - uint32_t *binding_table_state, - int *num_surfaces, - int num_ids, - struct ilo_cp *cp) -{ - /* - * From the Sandy Bridge PRM, volume 2 part 2, page 34: - * - * "(Interface Descriptor Total Length) This field must have the same - * alignment as the Interface Descriptor Data Start Address. - * - * It must be DQWord (32-byte) aligned..." - * - * From the Sandy Bridge PRM, volume 2 part 2, page 35: - * - * "(Interface Descriptor Data Start Address) Specifies the 32-byte - * aligned address of the Interface Descriptor data." - */ - const int state_align = 32 / 4; - const int state_len = (32 / 4) * num_ids; - uint32_t state_offset, *dw; - int i; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA", - state_len, state_align, &state_offset); - - for (i = 0; i < num_ids; i++) { - dw[0] = ilo_shader_get_kernel_offset(cs[i]); - dw[1] = 1 << 18; /* SPF */ - dw[2] = sampler_state[i] | - (num_samplers[i] + 3) / 4 << 2; - dw[3] = binding_table_state[i] | - num_surfaces[i]; - dw[4] = 0 << 16 | /* CURBE Read Length */ - 0; /* CURBE Read Offset */ - dw[5] = 0; /* Barrier ID */ - dw[6] = 0; - dw[7] = 0; - - dw += 8; - } - - return state_offset; -} - -static inline uint32_t -gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 8 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 262: - * - * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT", - state_len, state_align, &state_offset); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - - dw += 8; - } - - return state_offset; -} - -static inline uint32_t -gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 4 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 193: - * - * "The viewport-related state is stored as an array of up to 16 - * elements..." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT", - state_len, state_align, &state_offset); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_gbx); - dw[1] = fui(vp->max_gbx); - dw[2] = fui(vp->min_gby); - dw[3] = fui(vp->max_gby); - - dw += 4; - } - - return state_offset; -} - -static inline uint32_t -gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 2 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 385: - * - * "The viewport state is stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT", - state_len, state_align, &state_offset); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_z); - dw[1] = fui(vp->max_z); - - dw += 2; - } - - return state_offset; -} - -static inline uint32_t -gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev, - const struct pipe_stencil_ref *stencil_ref, - ubyte alpha_ref, - const struct pipe_blend_color *blend_color, - struct ilo_cp *cp) -{ - const int state_align = 64 / 4; - const int state_len = 6; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE", - state_len, state_align, &state_offset); - - dw[0] = stencil_ref->ref_value[0] << 24 | - stencil_ref->ref_value[1] << 16 | - GEN6_CC_DW0_ALPHATEST_UNORM8; - dw[1] = alpha_ref; - dw[2] = fui(blend_color->color[0]); - dw[3] = fui(blend_color->color[1]); - dw[4] = fui(blend_color->color[2]); - dw[5] = fui(blend_color->color[3]); - - return state_offset; -} - -static inline uint32_t -gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev, - const struct ilo_blend_state *blend, - const struct ilo_fb_state *fb, - const struct ilo_dsa_state *dsa, - struct ilo_cp *cp) -{ - const int state_align = 64 / 4; - int state_len; - uint32_t state_offset, *dw; - unsigned num_targets, i; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 376: - * - * "The blend state is stored as an array of up to 8 elements..." - */ - num_targets = fb->state.nr_cbufs; - assert(num_targets <= 8); - - if (!num_targets) { - if (!dsa->dw_alpha) - return 0; - /* to be able to reference alpha func */ - num_targets = 1; - } - - state_len = 2 * num_targets; - - dw = ilo_cp_steal_ptr(cp, "BLEND_STATE", - state_len, state_align, &state_offset); - - for (i = 0; i < num_targets; i++) { - const unsigned idx = (blend->independent_blend_enable) ? i : 0; - const struct ilo_blend_cso *cso = &blend->cso[idx]; - const int num_samples = fb->num_samples; - const struct util_format_description *format_desc = - (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ? - util_format_description(fb->state.cbufs[idx]->format) : NULL; - bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; - - rt_is_unorm = true; - rt_is_pure_integer = false; - rt_dst_alpha_forced_one = false; - - if (format_desc) { - int ch; - - switch (format_desc->format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - /* force alpha to one when the HW format has alpha */ - assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM) - == GEN6_FORMAT_B8G8R8A8_UNORM); - rt_dst_alpha_forced_one = true; - break; - default: - break; - } - - for (ch = 0; ch < 4; ch++) { - if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) - continue; - - if (format_desc->channel[ch].pure_integer) { - rt_is_unorm = false; - rt_is_pure_integer = true; - break; - } - - if (!format_desc->channel[ch].normalized || - format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) - rt_is_unorm = false; - } - } - - dw[0] = cso->payload[0]; - dw[1] = cso->payload[1]; - - if (!rt_is_pure_integer) { - if (rt_dst_alpha_forced_one) - dw[0] |= cso->dw_blend_dst_alpha_forced_one; - else - dw[0] |= cso->dw_blend; - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 365: - * - * "Logic Ops are only supported on *_UNORM surfaces (excluding - * _SRGB variants), otherwise Logic Ops must be DISABLED." - * - * Since logicop is ignored for non-UNORM color buffers, no special care - * is needed. - */ - if (rt_is_unorm) - dw[1] |= cso->dw_logicop; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 356: - * - * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage - * Dither both must be disabled." - * - * There is no such limitation on GEN7, or for AlphaToOne. But GL - * requires that anyway. - */ - if (num_samples > 1) - dw[1] |= cso->dw_alpha_mod; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 382: - * - * "Alpha Test can only be enabled if Pixel Shader outputs a float - * alpha value." - */ - if (!rt_is_pure_integer) - dw[1] |= dsa->dw_alpha; - - dw += 2; - } - - return state_offset; -} - -static inline uint32_t -gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev, - const struct ilo_dsa_state *dsa, - struct ilo_cp *cp) -{ - const int state_align = 64 / 4; - const int state_len = 3; - uint32_t state_offset, *dw; - - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE", - state_len, state_align, &state_offset); - - dw[0] = dsa->payload[0]; - dw[1] = dsa->payload[1]; - dw[2] = dsa->payload[2]; - - return state_offset; -} - -static inline uint32_t -gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev, - const struct ilo_scissor_state *scissor, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 2 * num_viewports; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 263: - * - * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT", - state_len, state_align, &state_offset); - - memcpy(dw, scissor->payload, state_len * 4); - - return state_offset; -} - -static inline uint32_t -gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev, - uint32_t *surface_states, - int num_surface_states, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = num_surface_states; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 69: - * - * "It is stored as an array of up to 256 elements..." - */ - assert(num_surface_states <= 256); - - if (!num_surface_states) - return 0; - - dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE", - state_len, state_align, &state_offset); - memcpy(dw, surface_states, - num_surface_states * sizeof(surface_states[0])); - - return state_offset; -} - -static inline uint32_t -gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev, - const struct ilo_view_surface *surf, - bool for_render, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6; - uint32_t state_offset; - uint32_t read_domains, write_domain; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - if (for_render) { - read_domains = INTEL_DOMAIN_RENDER; - write_domain = INTEL_DOMAIN_RENDER; - } - else { - read_domains = INTEL_DOMAIN_SAMPLER; - write_domain = 0; - } - - ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset); - - STATIC_ASSERT(Elements(surf->payload) >= 8); - - ilo_cp_write(cp, surf->payload[0]); - ilo_cp_write_bo(cp, surf->payload[1], - surf->bo, read_domains, write_domain); - ilo_cp_write(cp, surf->payload[2]); - ilo_cp_write(cp, surf->payload[3]); - ilo_cp_write(cp, surf->payload[4]); - ilo_cp_write(cp, surf->payload[5]); - - if (dev->gen >= ILO_GEN(7)) { - ilo_cp_write(cp, surf->payload[6]); - ilo_cp_write(cp, surf->payload[7]); - } - - ilo_cp_end(cp); - - return state_offset; -} - -static inline uint32_t -gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev, - const struct pipe_stream_output_target *so, - const struct pipe_stream_output_info *so_info, - int so_index, - struct ilo_cp *cp) -{ - struct ilo_buffer *buf = ilo_buffer(so->buffer); - unsigned bo_offset, struct_size; - enum pipe_format elem_format; - struct ilo_view_surface surf; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; - struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; - - switch (so_info->output[so_index].num_components) { - case 1: - elem_format = PIPE_FORMAT_R32_FLOAT; - break; - case 2: - elem_format = PIPE_FORMAT_R32G32_FLOAT; - break; - case 3: - elem_format = PIPE_FORMAT_R32G32B32_FLOAT; - break; - case 4: - elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - break; - default: - assert(!"unexpected SO components length"); - elem_format = PIPE_FORMAT_R32_FLOAT; - break; - } - - ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size, - struct_size, elem_format, false, true, &surf); - - return gen6_emit_SURFACE_STATE(dev, &surf, false, cp); -} - -static inline uint32_t -gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, - const struct ilo_sampler_cso * const *samplers, - const struct pipe_sampler_view * const *views, - const uint32_t *sampler_border_colors, - int num_samplers, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 4 * num_samplers; - uint32_t state_offset, *dw; - int i; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 101: - * - * "The sampler state is stored as an array of up to 16 elements..." - */ - assert(num_samplers <= 16); - - if (!num_samplers) - return 0; - - dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE", - state_len, state_align, &state_offset); - - for (i = 0; i < num_samplers; i++) { - const struct ilo_sampler_cso *sampler = samplers[i]; - const struct pipe_sampler_view *view = views[i]; - const uint32_t border_color = sampler_border_colors[i]; - uint32_t dw_filter, dw_wrap; - - /* there may be holes */ - if (!sampler || !view) { - /* disabled sampler */ - dw[0] = 1 << 31; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw += 4; - - continue; - } - - /* determine filter and wrap modes */ - switch (view->texture->target) { - case PIPE_TEXTURE_1D: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_1d; - break; - case PIPE_TEXTURE_3D: - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 103: - * - * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for - * surfaces of type SURFTYPE_3D." - */ - dw_filter = sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - case PIPE_TEXTURE_CUBE: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_cube; - break; - default: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - } - - dw[0] = sampler->payload[0]; - dw[1] = sampler->payload[1]; - assert(!(border_color & 0x1f)); - dw[2] = border_color; - dw[3] = sampler->payload[2]; - - dw[0] |= dw_filter; - - if (dev->gen >= ILO_GEN(7)) { - dw[3] |= dw_wrap; - } - else { - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 21: - * - * "[DevSNB] Errata: Incorrect behavior is observed in cases - * where the min and mag mode filters are different and - * SurfMinLOD is nonzero. The determination of MagMode uses the - * following equation instead of the one in the above - * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" - * - * As a way to work around that, we set Base to - * view->u.tex.first_level. - */ - dw[0] |= view->u.tex.first_level << 22; - - dw[1] |= dw_wrap; - } - - dw += 4; - } - - return state_offset; -} - -static inline uint32_t -gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev, - const struct ilo_sampler_cso *sampler, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE", - state_len, state_align, &state_offset); - - /* see ilo_gpe_init_sampler_cso() */ - memcpy(dw, &sampler->payload[3], state_len * 4); - - return state_offset; -} - -static inline uint32_t -gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev, - int size, void **pcb, - struct ilo_cp *cp) -{ - /* - * For all VS, GS, FS, and CS push constant buffers, they must be aligned - * to 32 bytes, and their sizes are specified in 256-bit units. - */ - const int state_align = 32 / 4; - const int state_len = align(size, 32) / 4; - uint32_t state_offset; - char *buf; - - ILO_GPE_VALID_GEN(dev, 6, 7.5); - - buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER", - state_len, state_align, &state_offset); - - /* zero out the unused range */ - if (size < state_len * 4) - memset(&buf[size], 0, state_len * 4 - size); - - if (pcb) - *pcb = buf; - - return state_offset; -} + const struct ilo_shader_state *last_sh, + uint32_t *dw, int num_dwords); #endif /* ILO_GPE_GEN6_H */ diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c index 6d3397f2d..2a590be2d 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -25,15 +25,63 @@ * Chia-I Wu <olv@lunarg.com> */ -#include "genhw/genhw.h" #include "util/u_resource.h" +#include "brw_defines.h" +#include "intel_reg.h" +#include "ilo_cp.h" #include "ilo_format.h" #include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_gpe_gen7.h" -#define SET_FIELD(value, field) (((value) << field ## __SHIFT) & field ## __MASK) +static void +gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev, + struct ilo_cp *cp) +{ + assert(!"GPGPU_WALKER unsupported"); +} + +static void +gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, + uint32_t clear_val, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04); + const uint8_t cmd_len = 3; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, clear_val); + ilo_cp_write(cp, 1); + ilo_cp_end(cp); +} + +static void +gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev, + int subop, uint32_t pointer, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, pointer); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t color_calc_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp); +} void ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, @@ -43,7 +91,7 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, int start_grf, vue_read_len, max_threads; uint32_t dw2, dw4, dw5; - ILO_GPE_VALID_GEN(dev, 7, 7.5); + ILO_GPE_VALID_GEN(dev, 7, 7); start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); @@ -52,9 +100,6 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, vue_read_len = (vue_read_len + 1) / 2; switch (dev->gen) { - case ILO_GEN(7.5): - max_threads = (dev->gt >= 2) ? 256 : 70; - break; case ILO_GEN(7): max_threads = (dev->gt == 2) ? 128 : 36; break; @@ -63,16 +108,16 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, break; } - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT; - dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | - GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | - 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT | - start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT; + dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT | + GEN7_GS_INCLUDE_VERTEX_HANDLES | + 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | + start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; - dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT | - GEN7_GS_DW5_STATISTICS | - GEN7_GS_DW5_GS_ENABLE; + dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_ENABLE; STATIC_ASSERT(Elements(cso->payload) >= 3); cso->payload[0] = dw2; @@ -80,6 +125,74 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, cso->payload[2] = dw5; } +static void +gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); + const uint8_t cmd_len = 7; + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + if (!gs) { + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + return; + } + + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs)); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct pipe_surface *zs_surf, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); + const uint8_t cmd_len = 7; + const int num_samples = 1; + uint32_t payload[6]; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_gpe_gen6_fill_3dstate_sf_raster(dev, + rasterizer, num_samples, + (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE, + payload, Elements(payload)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write_multi(cp, payload, 6); + ilo_cp_end(cp); +} + void ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev, const struct pipe_rasterizer_state *state, @@ -87,25 +200,25 @@ ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev, { uint32_t dw1, dw2; - ILO_GPE_VALID_GEN(dev, 7, 7.5); + ILO_GPE_VALID_GEN(dev, 7, 7); - dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL | - GEN7_WM_DW1_AA_LINE_WIDTH_2_0 | - GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL; + dw1 = GEN7_WM_POSITION_ZW_PIXEL | + GEN7_WM_LINE_AA_WIDTH_2_0 | + GEN7_WM_MSRAST_OFF_PIXEL; /* same value as in 3DSTATE_SF */ if (state->line_smooth) - dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0; + dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0; if (state->poly_stipple_enable) - dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; + dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; if (state->line_stipple_enable) - dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; + dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; if (state->bottom_edge_rule) - dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; + dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT; - dw2 = GEN7_WM_DW2_MSDISPMODE_PERSAMPLE; + dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE; /* * assertion that makes sure @@ -115,12 +228,12 @@ ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev, * * is valid */ - STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 && - GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0); + STATIC_ASSERT(GEN7_WM_MSRAST_OFF_PIXEL == 0 && + GEN7_WM_MSDISPMODE_PERSAMPLE == 0); wm->dw_msaa_rast = - (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0; - wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL; + (state->multisample) ? GEN7_WM_MSRAST_ON_PATTERN : 0; + wm->dw_msaa_disp = GEN7_WM_MSDISPMODE_PERPIXEL; STATIC_ASSERT(Elements(wm->payload) >= 2); wm->payload[0] = dw1; @@ -136,40 +249,29 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, uint32_t dw2, dw4, dw5; uint32_t wm_interps, wm_dw1; - ILO_GPE_VALID_GEN(dev, 7, 7.5); + ILO_GPE_VALID_GEN(dev, 7, 7); start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); + /* see brwCreateContext() */ + max_threads = (dev->gt == 2) ? 172 : 48; - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 = (true) ? 0 : GEN7_PS_FLOATING_POINT_MODE_ALT; - dw4 = GEN7_PS_DW4_POSOFFSET_NONE; + dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT | + GEN7_PS_POSOFFSET_NONE; - /* see brwCreateContext() */ - switch (dev->gen) { - case ILO_GEN(7.5): - max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; - } - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) - dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; + if (false) + dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) - dw4 |= GEN7_PS_DW4_ATTR_ENABLE; + dw4 |= GEN7_PS_ATTRIBUTE_ENABLE; assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw4 |= GEN7_PS_DW4_8_PIXEL_DISPATCH; + dw4 |= GEN7_PS_8_DISPATCH_ENABLE; - dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT | - 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT | - 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT; + dw5 = start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 | + 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 | + 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2; /* FS affects 3DSTATE_WM too */ wm_dw1 = 0; @@ -181,7 +283,7 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, * b) fs writes depth, or * c) fs or cc kills */ - wm_dw1 |= GEN7_WM_DW1_PS_ENABLE; + wm_dw1 |= GEN7_WM_DISPATCH_ENABLE; /* * From the Ivy Bridge PRM, volume 2 part 1, page 278: @@ -210,21 +312,21 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, * to ENABLE this bit due to ClipDistance clipping." */ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - wm_dw1 |= GEN7_WM_DW1_PS_KILL; + wm_dw1 |= GEN7_WM_KILL_ENABLE; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - wm_dw1 |= GEN7_WM_DW1_PSCDEPTH_ON; + wm_dw1 |= GEN7_WM_PSCDEPTH_ON; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - wm_dw1 |= GEN7_WM_DW1_PS_USE_DEPTH; + wm_dw1 |= GEN7_WM_USES_SOURCE_DEPTH; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - wm_dw1 |= GEN7_WM_DW1_PS_USE_W; + wm_dw1 |= GEN7_WM_USES_SOURCE_W; wm_interps = ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); - wm_dw1 |= wm_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; + wm_dw1 |= wm_interps << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; STATIC_ASSERT(Elements(cso->payload) >= 4); cso->payload[0] = dw2; @@ -233,6 +335,934 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, cso->payload[3] = wm_dw1; } +static void +gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + const struct ilo_rasterizer_state *rasterizer, + bool cc_may_kill, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); + const uint8_t cmd_len = 3; + const int num_samples = 1; + uint32_t dw1, dw2; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* see ilo_gpe_init_rasterizer_wm() */ + dw1 = rasterizer->wm.payload[0]; + dw2 = rasterizer->wm.payload[1]; + + dw1 |= GEN7_WM_STATISTICS_ENABLE; + + if (false) { + dw1 |= GEN7_WM_DEPTH_CLEAR; + dw1 |= GEN7_WM_DEPTH_RESOLVE; + dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; + } + + if (fs) { + const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); + + dw1 |= fs_cso->payload[3]; + } + + if (cc_may_kill) { + dw1 |= GEN7_WM_DISPATCH_ENABLE | + GEN7_WM_KILL_ENABLE; + } + + if (num_samples > 1) { + dw1 |= rasterizer->wm.dw_msaa_rast; + dw2 |= rasterizer->wm.dw_msaa_disp; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_end(cp); +} + +static void +gen7_emit_3dstate_constant(const struct ilo_dev_info *dev, + int subop, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); + const uint8_t cmd_len = 7; + uint32_t dw[6]; + int total_read_length, i; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* VS, HS, DS, GS, and PS variants */ + assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18); + + assert(num_bufs <= 4); + + dw[0] = 0; + dw[1] = 0; + + total_read_length = 0; + for (i = 0; i < 4; i++) { + int read_len; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 112: + * + * "Constant buffers must be enabled in order from Constant Buffer 0 + * to Constant Buffer 3 within this command. For example, it is + * not allowed to enable Constant Buffer 1 by programming a + * non-zero value in the VS Constant Buffer 1 Read Length without a + * non-zero value in VS Constant Buffer 0 Read Length." + */ + if (i >= num_bufs || !sizes[i]) { + for (; i < 4; i++) { + assert(i >= num_bufs || !sizes[i]); + dw[2 + i] = 0; + } + break; + } + + /* read lengths are in 256-bit units */ + read_len = (sizes[i] + 31) / 32; + /* the lower 5 bits are used for memory object control state */ + assert(bufs[i] % 32 == 0); + + dw[i / 2] |= read_len << ((i % 2) ? 16 : 0); + dw[2 + i] = bufs[i]; + + total_read_length += read_len; + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 113: + * + * "The sum of all four read length fields must be less than or equal + * to the size of 64" + */ + assert(total_read_length <= 64); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write_multi(cp, dw, 6); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp); +} + +static void +gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp); +} + +static void +gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp); +} + +static void +gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, + unsigned sample_mask, + int num_samples, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); + const uint8_t cmd_len = 2; + const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 294: + * + * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field + * (Sample Mask) must be zero. + * + * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field + * must be zero." + */ + sample_mask &= valid_mask; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, sample_mask); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp); +} + +static void +gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp); +} + +static void +gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *hs, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b); + const uint8_t cmd_len = 7; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + assert(!hs); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *ds, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d); + const uint8_t cmd_len = 6; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + assert(!ds); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + +} + +static void +gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev, + unsigned buffer_mask, + int vertex_attrib_count, + bool rasterizer_discard, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e); + const uint8_t cmd_len = 3; + const bool enable = (buffer_mask != 0); + uint32_t dw1, dw2; + int read_len; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + if (!enable) { + dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT; + if (rasterizer_discard) + dw1 |= SO_RENDERING_DISABLE; + + dw2 = 0; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_end(cp); + return; + } + + read_len = (vertex_attrib_count + 1) / 2; + if (!read_len) + read_len = 1; + + dw1 = SO_FUNCTION_ENABLE | + 0 << SO_RENDER_STREAM_SELECT_SHIFT | + SO_STATISTICS_ENABLE | + buffer_mask << 8; + + if (rasterizer_discard) + dw1 |= SO_RENDERING_DISABLE; + + /* API_OPENGL */ + if (true) + dw1 |= SO_REORDER_TRAILING; + + dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT | + (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + const struct ilo_shader_state *last_sh, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f); + const uint8_t cmd_len = 14; + uint32_t dw[13]; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, + fs, last_sh, dw, Elements(dw)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write_multi(cp, dw, 13); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + int num_samplers, bool dual_blend, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20); + const uint8_t cmd_len = 8; + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + if (!fs) { + /* see brwCreateContext() */ + const int max_threads = (dev->gt == 2) ? 172 : 48; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + /* GPU hangs if none of the dispatch enable bits is set */ + ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT | + GEN7_PS_8_DISPATCH_ENABLE); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + + return; + } + + cso = ilo_shader_get_kernel_cso(fs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT; + + if (dual_blend) + dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_write(cp, 0); /* kernel 1 */ + ilo_cp_write(cp, 0); /* kernel 2 */ + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev, + uint32_t sf_clip_viewport, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp); +} + +static void +gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev, + uint32_t cc_viewport, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp); +} + +static void +gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t blend_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp); +} + +static void +gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t depth_stencil_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp); +} + +static void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp); +} + +static void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp); +} + +static void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp); +} + +static void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp); +} + +static void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp); +} + +static void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp); +} + +static void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp); +} + +static void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp); +} + +static void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp); +} + +static void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp); +} + +static void +gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, + int subop, int offset, int size, + int entry_size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); + const uint8_t cmd_len = 2; + const int row_size = 64; /* 512 bits */ + int alloc_size, num_entries, min_entries, max_entries; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* VS, HS, DS, and GS variants */ + assert(subop >= 0x30 && subop <= 0x33); + + /* in multiples of 8KB */ + assert(offset % 8192 == 0); + offset /= 8192; + + /* in multiple of 512-bit rows */ + alloc_size = (entry_size + row_size - 1) / row_size; + if (!alloc_size) + alloc_size = 1; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 34: + * + * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may + * cause performance to decrease due to banking in the URB. Element + * sizes of 16 to 20 should be programmed with six 512-bit URB rows." + */ + if (subop == 0x30 && alloc_size == 5) + alloc_size = 6; + + /* in multiples of 8 */ + num_entries = (size / row_size / alloc_size) & ~7; + + switch (subop) { + case 0x30: /* 3DSTATE_URB_VS */ + min_entries = 32; + max_entries = (dev->gt == 2) ? 704 : 512; + + assert(num_entries >= min_entries); + if (num_entries > max_entries) + num_entries = max_entries; + break; + case 0x31: /* 3DSTATE_URB_HS */ + max_entries = (dev->gt == 2) ? 64 : 32; + if (num_entries > max_entries) + num_entries = max_entries; + break; + case 0x32: /* 3DSTATE_URB_DS */ + if (num_entries) + assert(num_entries >= 138); + break; + case 0x33: /* 3DSTATE_URB_GS */ + max_entries = (dev->gt == 2) ? 320 : 192; + if (num_entries > max_entries) + num_entries = max_entries; + break; + default: + break; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT | + (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | + num_entries); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp); +} + +static void +gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp); +} + +static void +gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp); +} + +static void +gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp); +} + +static void +gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev, + int subop, int offset, int size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop); + const uint8_t cmd_len = 2; + int end; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* VS, HS, DS, GS, and PS variants */ + assert(subop >= 0x12 && subop <= 0x16); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 68: + * + * "(A table that says the maximum size of each constant buffer is + * 16KB") + * + * From the Ivy Bridge PRM, volume 2 part 1, page 115: + * + * "The sum of the Constant Buffer Offset and the Constant Buffer Size + * may not exceed the maximum value of the Constant Buffer Size." + * + * Thus, the valid range of buffer end is [0KB, 16KB]. + */ + end = (offset + size) / 1024; + if (end > 16) { + assert(!"invalid constant buffer end"); + end = 16; + } + + /* the valid range of buffer offset is [0KB, 15KB] */ + offset = (offset + 1023) / 1024; + if (offset > 15) { + assert(!"invalid constant buffer offset"); + offset = 15; + } + + if (offset > end) { + assert(!size); + offset = end; + } + + /* the valid range of buffer size is [0KB, 15KB] */ + size = end - offset; + if (size > 15) { + assert(!"invalid constant buffer size"); + size = 15; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT | + size); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp); +} + +static void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp); +} + +static void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp); +} + +static void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp); +} + +static void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp); +} + +static void +gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev, + const struct pipe_stream_output_info *so_info, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17); + uint16_t cmd_len; + int buffer_selects, num_entries, i; + uint16_t so_decls[128]; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + buffer_selects = 0; + num_entries = 0; + + if (so_info) { + int buffer_offsets[PIPE_MAX_SO_BUFFERS]; + + memset(buffer_offsets, 0, sizeof(buffer_offsets)); + + for (i = 0; i < so_info->num_outputs; i++) { + unsigned decl, buf, reg, mask; + + buf = so_info->output[i].output_buffer; + + /* pad with holes */ + assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); + while (buffer_offsets[buf] < so_info->output[i].dst_offset) { + int num_dwords; + + num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; + if (num_dwords > 4) + num_dwords = 4; + + decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | + SO_DECL_HOLE_FLAG | + ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT; + + so_decls[num_entries++] = decl; + buffer_offsets[buf] += num_dwords; + } + + reg = so_info->output[i].register_index; + mask = ((1 << so_info->output[i].num_components) - 1) << + so_info->output[i].start_component; + + decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | + reg << SO_DECL_REGISTER_INDEX_SHIFT | + mask << SO_DECL_COMPONENT_MASK_SHIFT; + + so_decls[num_entries++] = decl; + buffer_selects |= 1 << buf; + buffer_offsets[buf] += so_info->output[i].num_components; + } + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 201: + * + * "Errata: All 128 decls for all four streams must be included + * whenever this command is issued. The "Num Entries [n]" fields still + * contain the actual numbers of valid decls." + * + * Also note that "DWord Length" has 9 bits for this command, and the type + * of cmd_len is thus uint16_t. + */ + cmd_len = 2 * 128 + 3; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT | + 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT | + 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT | + buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT); + ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT | + 0 << SO_NUM_ENTRIES_2_SHIFT | + 0 << SO_NUM_ENTRIES_1_SHIFT | + num_entries << SO_NUM_ENTRIES_0_SHIFT); + + for (i = 0; i < num_entries; i++) { + ilo_cp_write(cp, so_decls[i]); + ilo_cp_write(cp, 0); + } + for (; i < 128; i++) { + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + } + + ilo_cp_end(cp); +} + +static void +gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev, + int index, int base, int stride, + const struct pipe_stream_output_target *so_target, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18); + const uint8_t cmd_len = 4; + struct ilo_buffer *buf; + int end; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + if (!so_target || !so_target->buffer) { + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + return; + } + + buf = ilo_buffer(so_target->buffer); + + /* DWord-aligned */ + assert(stride % 4 == 0 && base % 4 == 0); + assert(so_target->buffer_offset % 4 == 0); + + stride &= ~3; + base = (base + so_target->buffer_offset) & ~3; + end = (base + so_target->buffer_size) & ~3; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT | + stride); + ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_end(cp); +} + +static void +gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); + const uint8_t cmd_len = 7; + const int prim = (rectlist) ? + _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); + const int vb_access = (info->indexed) ? + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; + const uint32_t vb_start = info->start + + ((info->indexed) ? ib->draw_start_offset : 0); + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, vb_access | prim); + ilo_cp_write(cp, info->count); + ilo_cp_write(cp, vb_start); + ilo_cp_write(cp, info->instance_count); + ilo_cp_write(cp, info->start_instance); + ilo_cp_write(cp, info->index_bias); + ilo_cp_end(cp); +} + +static uint32_t +gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 64 / 4; + const int state_len = 16 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 270: + * + * "The viewport-specific state used by both the SF and CL units + * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each + * of which contains the DWords described below. The start of each + * element is spaced 16 DWords apart. The location of first element of + * the array, as specified by both Pointer to SF_VIEWPORT and Pointer + * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT", + state_len, state_align, &state_offset); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + dw[8] = fui(vp->min_gbx); + dw[9] = fui(vp->max_gbx); + dw[10] = fui(vp->min_gby); + dw[11] = fui(vp->max_gby); + dw[12] = 0; + dw[13] = 0; + dw[14] = 0; + dw[15] = 0; + + dw += 16; + } + + return state_offset; +} + void ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev, unsigned width, unsigned height, @@ -241,7 +1271,7 @@ ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev, { uint32_t *dw; - ILO_GPE_VALID_GEN(dev, 7, 7.5); + ILO_GPE_VALID_GEN(dev, 7, 7); /* * From the Ivy Bridge PRM, volume 4 part 1, page 62: @@ -277,16 +1307,16 @@ ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev, STATIC_ASSERT(Elements(surf->payload) >= 8); dw = surf->payload; - dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT | - GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT | - GEN6_TILING_X << 13; + dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | + BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT | + BRW_SURFACE_TILED << 13; dw[1] = 0; - dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_DW2_HEIGHT) | - SET_FIELD(width - 1, GEN7_SURFACE_DW2_WIDTH); + dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) | + SET_FIELD(width - 1, GEN7_SURFACE_WIDTH); - dw[3] = SET_FIELD(depth - 1, GEN7_SURFACE_DW3_DEPTH); + dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH); dw[4] = 0; dw[5] = level; @@ -314,12 +1344,12 @@ ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev, int surface_type, surface_format, num_entries; uint32_t *dw; - ILO_GPE_VALID_GEN(dev, 7, 7.5); + ILO_GPE_VALID_GEN(dev, 7, 7); - surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER; + surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER; surface_format = (typed) ? - ilo_translate_color_format(elem_format) : GEN6_FORMAT_RAW; + ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW; num_entries = size / struct_size; /* see if there is enough space to fit another element */ @@ -392,17 +1422,17 @@ ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev, STATIC_ASSERT(Elements(surf->payload) >= 8); dw = surf->payload; - dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; + dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT | + surface_format << BRW_SURFACE_FORMAT_SHIFT; if (render_cache_rw) - dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; + dw[0] |= BRW_SURFACE_RC_READ_WRITE; dw[1] = offset; - dw[2] = SET_FIELD(height, GEN7_SURFACE_DW2_HEIGHT) | - SET_FIELD(width, GEN7_SURFACE_DW2_WIDTH); + dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) | + SET_FIELD(width, GEN7_SURFACE_WIDTH); - dw[3] = SET_FIELD(depth, GEN7_SURFACE_DW3_DEPTH) | + dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) | pitch; dw[4] = 0; @@ -411,13 +1441,6 @@ ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev, dw[6] = 0; dw[7] = 0; - if (dev->gen >= ILO_GEN(7.5)) { - dw[7] |= SET_FIELD(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | - SET_FIELD(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | - SET_FIELD(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | - SET_FIELD(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); - } - /* do not increment reference count */ surf->bo = buf->bo; } @@ -430,7 +1453,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, unsigned num_levels, unsigned first_layer, unsigned num_layers, - bool is_rt, bool offset_to_layer, + bool is_rt, bool render_cache_rw, struct ilo_view_surface *surf) { int surface_type, surface_format; @@ -438,10 +1461,10 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, unsigned layer_offset, x_offset, y_offset; uint32_t *dw; - ILO_GPE_VALID_GEN(dev, 7, 7.5); + ILO_GPE_VALID_GEN(dev, 7, 7); surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); - assert(surface_type != GEN6_SURFTYPE_BUFFER); + assert(surface_type != BRW_SURFACE_BUFFER); if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) format = PIPE_FORMAT_Z32_FLOAT; @@ -458,7 +1481,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, tex->base.depth0 : num_layers; pitch = tex->bo_stride; - if (surface_type == GEN6_SURFTYPE_CUBE) { + if (surface_type == BRW_SURFACE_CUBE) { /* * From the Ivy Bridge PRM, volume 4 part 1, page 70: * @@ -471,7 +1494,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, * restriction. */ if (is_rt) { - surface_type = GEN6_SURFTYPE_2D; + surface_type = BRW_SURFACE_2D; } else { assert(num_layers % 6 == 0); @@ -483,18 +1506,18 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); assert(first_layer < 2048 && num_layers <= 2048); switch (surface_type) { - case GEN6_SURFTYPE_1D: + case BRW_SURFACE_1D: assert(width <= 16384 && height == 1 && depth <= 2048); break; - case GEN6_SURFTYPE_2D: + case BRW_SURFACE_2D: assert(width <= 16384 && height <= 16384 && depth <= 2048); break; - case GEN6_SURFTYPE_3D: + case BRW_SURFACE_3D: assert(width <= 2048 && height <= 2048 && depth <= 2048); if (!is_rt) assert(first_layer == 0); break; - case GEN6_SURFTYPE_CUBE: + case BRW_SURFACE_CUBE: assert(width <= 16384 && height <= 16384 && depth <= 86); assert(width == height); if (is_rt) @@ -506,44 +1529,52 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, } if (is_rt) { + /* + * Compute the offset to the layer manually. + * + * For rendering, the hardware requires LOD to be the same for all + * render targets and the depth buffer. We need to compute the offset + * to the layer manually and always set LOD to 0. + */ + if (true) { + /* we lose the capability for layered rendering */ + assert(num_layers == 1); + + layer_offset = ilo_texture_get_slice_offset(tex, + first_level, first_layer, &x_offset, &y_offset); + + assert(x_offset % 4 == 0); + assert(y_offset % 2 == 0); + x_offset /= 4; + y_offset /= 2; + + /* derive the size for the LOD */ + width = u_minify(width, first_level); + height = u_minify(height, first_level); + if (surface_type == BRW_SURFACE_3D) + depth = u_minify(depth, first_level); + else + depth = 1; + + first_level = 0; + first_layer = 0; + lod = 0; + } + else { + layer_offset = 0; + x_offset = 0; + y_offset = 0; + } + assert(num_levels == 1); lod = first_level; } else { - lod = num_levels - 1; - } - - /* - * Offset to the layer. When rendering, the hardware requires LOD and - * Depth to be the same for all render targets and the depth buffer. We - * need to offset to the layer manually and always set LOD and Depth to 0. - */ - if (offset_to_layer) { - /* we lose the capability for layered rendering */ - assert(is_rt && num_layers == 1); - - layer_offset = ilo_texture_get_slice_offset(tex, - first_level, first_layer, &x_offset, &y_offset); - - assert(x_offset % 4 == 0); - assert(y_offset % 2 == 0); - x_offset /= 4; - y_offset /= 2; - - /* derive the size for the LOD */ - width = u_minify(width, first_level); - height = u_minify(height, first_level); - - first_level = 0; - first_layer = 0; - - lod = 0; - depth = 1; - } - else { layer_offset = 0; x_offset = 0; y_offset = 0; + + lod = num_levels - 1; } /* @@ -582,8 +1613,8 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, STATIC_ASSERT(Elements(surf->payload) >= 8); dw = surf->payload; - dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT | + dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT | + surface_format << BRW_SURFACE_FORMAT_SHIFT | ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13; /* @@ -597,36 +1628,36 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, * For non-3D sampler surfaces, resinfo (the sampler message) always * returns zero for the number of layers when this field is not set. */ - if (surface_type != GEN6_SURFTYPE_3D) { + if (surface_type != BRW_SURFACE_3D) { if (util_resource_is_array_texture(&tex->base)) - dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY; + dw[0] |= GEN7_SURFACE_IS_ARRAY; else assert(depth == 1); } if (tex->valign_4) - dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; + dw[0] |= GEN7_SURFACE_VALIGN_4; if (tex->halign_8) - dw[0] |= GEN7_SURFACE_DW0_HALIGN_8; + dw[0] |= GEN7_SURFACE_HALIGN_8; if (tex->array_spacing_full) - dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL; + dw[0] |= GEN7_SURFACE_ARYSPC_FULL; else - dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0; + dw[0] |= GEN7_SURFACE_ARYSPC_LOD0; - if (is_rt) - dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; + if (render_cache_rw) + dw[0] |= BRW_SURFACE_RC_READ_WRITE; - if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) - dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; + if (surface_type == BRW_SURFACE_CUBE && !is_rt) + dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES; dw[1] = layer_offset; - dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_DW2_HEIGHT) | - SET_FIELD(width - 1, GEN7_SURFACE_DW2_WIDTH); + dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) | + SET_FIELD(width - 1, GEN7_SURFACE_WIDTH); - dw[3] = SET_FIELD(depth - 1, GEN7_SURFACE_DW3_DEPTH) | + dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) | (pitch - 1); dw[4] = first_layer << 18 | @@ -639,34 +1670,270 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, */ if (tex->interleaved && tex->base.nr_samples > 1) { assert(!is_rt); - dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL; + dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL; } else { - dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS; + dw[4] |= GEN7_SURFACE_MSFMT_MSS; } if (tex->base.nr_samples > 4) - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8; + dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8; else if (tex->base.nr_samples > 2) - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4; + dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4; else - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1; + dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1; - dw[5] = x_offset << GEN7_SURFACE_DW5_X_OFFSET__SHIFT | - y_offset << GEN7_SURFACE_DW5_Y_OFFSET__SHIFT | - SET_FIELD(first_level, GEN7_SURFACE_DW5_MIN_LOD) | + dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT | + y_offset << BRW_SURFACE_Y_OFFSET_SHIFT | + SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) | lod; dw[6] = 0; dw[7] = 0; - if (dev->gen >= ILO_GEN(7.5)) { - dw[7] |= SET_FIELD(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | - SET_FIELD(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | - SET_FIELD(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | - SET_FIELD(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); - } - /* do not increment reference count */ surf->bo = tex->bo; } + +static int +gen7_estimate_command_size(const struct ilo_dev_info *dev, + enum ilo_gpe_gen7_command cmd, + int arg) +{ + static const struct { + int header; + int body; + } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = { + [ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 }, + [ILO_GPE_GEN7_STATE_SIP] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 }, + [ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 }, + [ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 }, + [ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 }, + [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 }, + [ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 }, + [ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 }, + [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 }, + [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 }, + [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 }, + [ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 }, + [ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 }, + [ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 }, + [ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 }, + [ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 }, + [ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 }, + [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 }, + [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, }, + [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 }, + [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 }, + [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 }, + [ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 }, + [ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 }, + [ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 }, + }; + const int header = gen7_command_size_table[cmd].header; + const int body = gen7_command_size_table[cmd].body; + const int count = arg; + + ILO_GPE_VALID_GEN(dev, 7, 7); + assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT); + + return (likely(count)) ? header + body * count : 0; +} + +static int +gen7_estimate_state_size(const struct ilo_dev_info *dev, + enum ilo_gpe_gen7_state state, + int arg) +{ + static const struct { + int alignment; + int body; + bool is_array; + } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = { + [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true }, + [ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true }, + [ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true }, + [ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false }, + [ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true }, + [ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false }, + [ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true }, + [ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true }, + [ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false }, + [ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true }, + [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false }, + [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true }, + }; + const int alignment = gen7_state_size_table[state].alignment; + const int body = gen7_state_size_table[state].body; + const bool is_array = gen7_state_size_table[state].is_array; + const int count = arg; + int estimate; + + ILO_GPE_VALID_GEN(dev, 7, 7); + assert(state < ILO_GPE_GEN7_STATE_COUNT); + + if (likely(count)) { + if (is_array) { + estimate = (alignment - 1) + body * count; + } + else { + estimate = (alignment - 1) + body; + /* all states are aligned */ + if (count > 1) + estimate += util_align_npot(body, alignment) * (count - 1); + } + } + else { + estimate = 0; + } + + return estimate; +} + +static void +gen7_init(struct ilo_gpe_gen7 *gen7) +{ + const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get(); + + gen7->estimate_command_size = gen7_estimate_command_size; + gen7->estimate_state_size = gen7_estimate_state_size; + +#define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name +#define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name + GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6); + GEN7_USE(gen7, STATE_SIP, gen6); + GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6); + GEN7_USE(gen7, PIPELINE_SELECT, gen6); + GEN7_USE(gen7, MEDIA_VFE_STATE, gen6); + GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6); + GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6); + GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6); + GEN7_SET(gen7, GPGPU_WALKER); + GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS); + GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6); + GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6); + GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6); + GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6); + GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6); + GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6); + GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS); + GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6); + GEN7_USE(gen7, 3DSTATE_VS, gen6); + GEN7_SET(gen7, 3DSTATE_GS); + GEN7_USE(gen7, 3DSTATE_CLIP, gen6); + GEN7_SET(gen7, 3DSTATE_SF); + GEN7_SET(gen7, 3DSTATE_WM); + GEN7_SET(gen7, 3DSTATE_CONSTANT_VS); + GEN7_SET(gen7, 3DSTATE_CONSTANT_GS); + GEN7_SET(gen7, 3DSTATE_CONSTANT_PS); + GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK); + GEN7_SET(gen7, 3DSTATE_CONSTANT_HS); + GEN7_SET(gen7, 3DSTATE_CONSTANT_DS); + GEN7_SET(gen7, 3DSTATE_HS); + GEN7_SET(gen7, 3DSTATE_TE); + GEN7_SET(gen7, 3DSTATE_DS); + GEN7_SET(gen7, 3DSTATE_STREAMOUT); + GEN7_SET(gen7, 3DSTATE_SBE); + GEN7_SET(gen7, 3DSTATE_PS); + GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); + GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC); + GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS); + GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS); + GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS); + GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS); + GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS); + GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS); + GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS); + GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS); + GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS); + GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS); + GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS); + GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS); + GEN7_SET(gen7, 3DSTATE_URB_VS); + GEN7_SET(gen7, 3DSTATE_URB_HS); + GEN7_SET(gen7, 3DSTATE_URB_DS); + GEN7_SET(gen7, 3DSTATE_URB_GS); + GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6); + GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6); + GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6); + GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6); + GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6); + GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6); + GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS); + GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS); + GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS); + GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS); + GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS); + GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST); + GEN7_SET(gen7, 3DSTATE_SO_BUFFER); + GEN7_USE(gen7, PIPE_CONTROL, gen6); + GEN7_SET(gen7, 3DPRIMITIVE); + GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6); + GEN7_SET(gen7, SF_CLIP_VIEWPORT); + GEN7_USE(gen7, CC_VIEWPORT, gen6); + GEN7_USE(gen7, COLOR_CALC_STATE, gen6); + GEN7_USE(gen7, BLEND_STATE, gen6); + GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6); + GEN7_USE(gen7, SCISSOR_RECT, gen6); + GEN7_USE(gen7, BINDING_TABLE_STATE, gen6); + GEN7_USE(gen7, SURFACE_STATE, gen6); + GEN7_USE(gen7, SAMPLER_STATE, gen6); + GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6); + GEN7_USE(gen7, push_constant_buffer, gen6); +#undef GEN7_USE +#undef GEN7_SET +} + +static struct ilo_gpe_gen7 gen7_gpe; + +const struct ilo_gpe_gen7 * +ilo_gpe_gen7_get(void) +{ + if (!gen7_gpe.estimate_command_size) + gen7_init(&gen7_gpe); + + return &gen7_gpe; +} diff --git a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h index 0816fd674..321201548 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/dist/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h @@ -28,1101 +28,466 @@ #ifndef ILO_GPE_GEN7_H #define ILO_GPE_GEN7_H -#include "intel_winsys.h" - #include "ilo_common.h" -#include "ilo_cp.h" -#include "ilo_resource.h" -#include "ilo_shader.h" #include "ilo_gpe_gen6.h" -static inline void -gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev, - struct ilo_cp *cp) -{ - assert(!"GPGPU_WALKER unsupported"); -} - -static inline void -gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, - uint32_t clear_val, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, clear_val); - ilo_cp_write(cp, 1); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_VF(const struct ilo_dev_info *dev, - bool enable_cut_index, - uint32_t cut_index, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0c); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 7.5, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - ((enable_cut_index) ? GEN75_VF_DW0_CUT_INDEX_ENABLE : 0)); - ilo_cp_write(cp, cut_index); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev, - int subop, uint32_t pointer, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, pointer); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t color_calc_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp); -} - -static inline void -gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *gs, - int num_samplers, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); - const uint8_t cmd_len = 7; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - if (!gs) { - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, GEN7_GS_DW5_STATISTICS); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - return; - } - - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs)); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - enum pipe_format zs_format, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); - const uint8_t cmd_len = 7; - const int num_samples = 1; - uint32_t payload[6]; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - ilo_gpe_gen6_fill_3dstate_sf_raster(dev, - rasterizer, num_samples, zs_format, - payload, Elements(payload)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_multi(cp, payload, 6); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev, - const struct ilo_shader_state *fs, - const struct ilo_rasterizer_state *rasterizer, - bool cc_may_kill, uint32_t hiz_op, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); - const uint8_t cmd_len = 3; - const int num_samples = 1; - uint32_t dw1, dw2; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - /* see ilo_gpe_init_rasterizer_wm() */ - if (rasterizer) { - dw1 = rasterizer->wm.payload[0]; - dw2 = rasterizer->wm.payload[1]; - - assert(!hiz_op); - dw1 |= GEN7_WM_DW1_STATISTICS; - } - else { - dw1 = hiz_op; - dw2 = 0; - } - - if (fs) { - const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); - - dw1 |= fs_cso->payload[3]; - } - - if (cc_may_kill) - dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL; - - if (num_samples > 1) { - dw1 |= rasterizer->wm.dw_msaa_rast; - dw2 |= rasterizer->wm.dw_msaa_disp; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3dstate_constant(const struct ilo_dev_info *dev, - int subop, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); - const uint8_t cmd_len = 7; - uint32_t dw[6]; - int total_read_length, i; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18); - - assert(num_bufs <= 4); - - dw[0] = 0; - dw[1] = 0; - - total_read_length = 0; - for (i = 0; i < 4; i++) { - int read_len; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 112: - * - * "Constant buffers must be enabled in order from Constant Buffer 0 - * to Constant Buffer 3 within this command. For example, it is - * not allowed to enable Constant Buffer 1 by programming a - * non-zero value in the VS Constant Buffer 1 Read Length without a - * non-zero value in VS Constant Buffer 0 Read Length." - */ - if (i >= num_bufs || !sizes[i]) { - for (; i < 4; i++) { - assert(i >= num_bufs || !sizes[i]); - dw[2 + i] = 0; - } - break; - } - - /* read lengths are in 256-bit units */ - read_len = (sizes[i] + 31) / 32; - /* the lower 5 bits are used for memory object control state */ - assert(bufs[i] % 32 == 0); - - dw[i / 2] |= read_len << ((i % 2) ? 16 : 0); - dw[2 + i] = bufs[i]; - - total_read_length += read_len; - } - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 113: - * - * "The sum of all four read length fields must be less than or equal - * to the size of 64" - */ - assert(total_read_length <= 64); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_multi(cp, dw, 6); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp); -} - -static inline void -gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp); -} - -static inline void -gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp); -} - -static inline void -gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, - unsigned sample_mask, - int num_samples, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); - const uint8_t cmd_len = 2; - const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 294: - * - * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field - * (Sample Mask) must be zero. - * - * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field - * must be zero." - */ - sample_mask &= valid_mask; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, sample_mask); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp); -} - -static inline void -gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp); -} - -static inline void -gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *hs, - int num_samplers, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b); - const uint8_t cmd_len = 7; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - assert(!hs); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *ds, - int num_samplers, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d); - const uint8_t cmd_len = 6; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - assert(!ds); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - -} - -static inline void -gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev, - unsigned buffer_mask, - int vertex_attrib_count, - bool rasterizer_discard, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e); - const uint8_t cmd_len = 3; - const bool enable = (buffer_mask != 0); - uint32_t dw1, dw2; - int read_len; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - if (!enable) { - dw1 = 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT; - if (rasterizer_discard) - dw1 |= GEN7_SO_DW1_RENDER_DISABLE; - - dw2 = 0; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_end(cp); - return; - } - - read_len = (vertex_attrib_count + 1) / 2; - if (!read_len) - read_len = 1; - - dw1 = GEN7_SO_DW1_SO_ENABLE | - 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT | - GEN7_SO_DW1_STATISTICS | - buffer_mask << 8; - - if (rasterizer_discard) - dw1 |= GEN7_SO_DW1_RENDER_DISABLE; - - /* API_OPENGL */ - if (true) - dw1 |= GEN7_SO_DW1_REORDER_TRAILING; - - dw2 = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT | - 0 << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT | - 0 << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT | - 0 << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f); - const uint8_t cmd_len = 14; - uint32_t dw[13]; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, fs, dw, Elements(dw)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_multi(cp, dw, 13); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *fs, - int num_samplers, bool dual_blend, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20); - const uint8_t cmd_len = 8; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - if (!fs) { - int max_threads; - - /* GPU hangs if none of the dispatch enable bits is set */ - dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH; - - /* see brwCreateContext() */ - switch (dev->gen) { - case ILO_GEN(7.5): - max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - - return; - } - - cso = ilo_shader_get_kernel_cso(fs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - if (dual_blend) - dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_write(cp, 0); /* kernel 1 */ - ilo_cp_write(cp, 0); /* kernel 2 */ - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev, - uint32_t sf_clip_viewport, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp); -} - -static inline void -gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev, - uint32_t cc_viewport, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp); -} - -static inline void -gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t blend_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp); -} - -static inline void -gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t depth_stencil_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp); -} - -static inline void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp); -} - -static inline void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp); -} - -static inline void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp); -} - -static inline void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp); -} - -static inline void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp); -} - -static inline void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp); -} - -static inline void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp); -} - -static inline void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp); -} - -static inline void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp); -} - -static inline void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp); -} - -static inline void -gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, - int subop, int offset, int size, - int entry_size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); - const uint8_t cmd_len = 2; - const int row_size = 64; /* 512 bits */ - int alloc_size, num_entries, min_entries, max_entries; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - /* VS, HS, DS, and GS variants */ - assert(subop >= 0x30 && subop <= 0x33); - - /* in multiples of 8KB */ - assert(offset % 8192 == 0); - offset /= 8192; - - /* in multiple of 512-bit rows */ - alloc_size = (entry_size + row_size - 1) / row_size; - if (!alloc_size) - alloc_size = 1; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 34: - * - * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may - * cause performance to decrease due to banking in the URB. Element - * sizes of 16 to 20 should be programmed with six 512-bit URB rows." - */ - if (subop == 0x30 && alloc_size == 5) - alloc_size = 6; - - /* in multiples of 8 */ - num_entries = (size / row_size / alloc_size) & ~7; - - switch (subop) { - case 0x30: /* 3DSTATE_URB_VS */ - min_entries = 32; - - switch (dev->gen) { - case ILO_GEN(7.5): - max_entries = (dev->gt >= 2) ? 1644 : 640; - break; - case ILO_GEN(7): - default: - max_entries = (dev->gt == 2) ? 704 : 512; - break; - } - - assert(num_entries >= min_entries); - if (num_entries > max_entries) - num_entries = max_entries; - break; - case 0x31: /* 3DSTATE_URB_HS */ - max_entries = (dev->gt == 2) ? 64 : 32; - if (num_entries > max_entries) - num_entries = max_entries; - break; - case 0x32: /* 3DSTATE_URB_DS */ - if (num_entries) - assert(num_entries >= 138); - break; - case 0x33: /* 3DSTATE_URB_GS */ - switch (dev->gen) { - case ILO_GEN(7.5): - max_entries = (dev->gt >= 2) ? 640 : 256; - break; - case ILO_GEN(7): - default: - max_entries = (dev->gt == 2) ? 320 : 192; - break; - } - - if (num_entries > max_entries) - num_entries = max_entries; - break; - default: - break; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT | - (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT | - num_entries); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev, - int offset, int size, int entry_size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp); -} - -static inline void -gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev, - int offset, int size, int entry_size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp); -} - -static inline void -gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev, - int offset, int size, int entry_size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp); -} - -static inline void -gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev, - int offset, int size, int entry_size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp); -} - -static inline void -gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev, - int subop, int offset, int size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop); - const uint8_t cmd_len = 2; - int end; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= 0x12 && subop <= 0x16); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 68: - * - * "(A table that says the maximum size of each constant buffer is - * 16KB") - * - * From the Ivy Bridge PRM, volume 2 part 1, page 115: - * - * "The sum of the Constant Buffer Offset and the Constant Buffer Size - * may not exceed the maximum value of the Constant Buffer Size." - * - * Thus, the valid range of buffer end is [0KB, 16KB]. - */ - end = (offset + size) / 1024; - if (end > 16) { - assert(!"invalid constant buffer end"); - end = 16; - } - - /* the valid range of buffer offset is [0KB, 15KB] */ - offset = (offset + 1023) / 1024; - if (offset > 15) { - assert(!"invalid constant buffer offset"); - offset = 15; - } - - if (offset > end) { - assert(!size); - offset = end; - } - - /* the valid range of buffer size is [0KB, 15KB] */ - size = end - offset; - if (size > 15) { - assert(!"invalid constant buffer size"); - size = 15; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT | - size); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp); -} - -static inline void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp); -} - -static inline void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp); -} - -static inline void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp); -} - -static inline void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp); -} - -static inline void -gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev, - const struct pipe_stream_output_info *so_info, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17); - uint16_t cmd_len; - int buffer_selects, num_entries, i; - uint16_t so_decls[128]; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - buffer_selects = 0; - num_entries = 0; - - if (so_info) { - int buffer_offsets[PIPE_MAX_SO_BUFFERS]; - - memset(buffer_offsets, 0, sizeof(buffer_offsets)); - - for (i = 0; i < so_info->num_outputs; i++) { - unsigned decl, buf, reg, mask; - - buf = so_info->output[i].output_buffer; - - /* pad with holes */ - assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); - while (buffer_offsets[buf] < so_info->output[i].dst_offset) { - int num_dwords; - - num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; - if (num_dwords > 4) - num_dwords = 4; - - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - GEN7_SO_DECL_HOLE_FLAG | - ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; - - so_decls[num_entries++] = decl; - buffer_offsets[buf] += num_dwords; - } - - reg = so_info->output[i].register_index; - mask = ((1 << so_info->output[i].num_components) - 1) << - so_info->output[i].start_component; - - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - reg << GEN7_SO_DECL_REG_INDEX__SHIFT | - mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; - - so_decls[num_entries++] = decl; - buffer_selects |= 1 << buf; - buffer_offsets[buf] += so_info->output[i].num_components; - } - } - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 201: - * - * "Errata: All 128 decls for all four streams must be included - * whenever this command is issued. The "Num Entries [n]" fields still - * contain the actual numbers of valid decls." - * - * Also note that "DWord Length" has 9 bits for this command, and the type - * of cmd_len is thus uint16_t. - */ - cmd_len = 2 * 128 + 3; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | - 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | - 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | - buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT); - ilo_cp_write(cp, 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | - 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | - 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | - num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT); - - for (i = 0; i < num_entries; i++) { - ilo_cp_write(cp, so_decls[i]); - ilo_cp_write(cp, 0); - } - for (; i < 128; i++) { - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - } - - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev, - int index, int base, int stride, - const struct pipe_stream_output_target *so_target, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18); - const uint8_t cmd_len = 4; - struct ilo_buffer *buf; - int end; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - if (!so_target || !so_target->buffer) { - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, index << GEN7_SO_BUF_DW1_INDEX__SHIFT); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - return; - } - - buf = ilo_buffer(so_target->buffer); - - /* DWord-aligned */ - assert(stride % 4 == 0 && base % 4 == 0); - assert(so_target->buffer_offset % 4 == 0); - - stride &= ~3; - base = (base + so_target->buffer_offset) & ~3; - end = (base + so_target->buffer_size) & ~3; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, index << GEN7_SO_BUF_DW1_INDEX__SHIFT | - stride); - ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_end(cp); -} - -static inline void -gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib, - bool rectlist, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); - const uint8_t cmd_len = 7; - const int prim = (rectlist) ? - GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN7_3DPRIM_DW1_ACCESS_RANDOM : - GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, vb_access | prim); - ilo_cp_write(cp, info->count); - ilo_cp_write(cp, vb_start); - ilo_cp_write(cp, info->instance_count); - ilo_cp_write(cp, info->start_instance); - ilo_cp_write(cp, info->index_bias); - ilo_cp_end(cp); -} - -static inline uint32_t -gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 64 / 4; - const int state_len = 16 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 7, 7.5); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 270: - * - * "The viewport-specific state used by both the SF and CL units - * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each - * of which contains the DWords described below. The start of each - * element is spaced 16 DWords apart. The location of first element of - * the array, as specified by both Pointer to SF_VIEWPORT and Pointer - * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT", - state_len, state_align, &state_offset); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - dw[8] = fui(vp->min_gbx); - dw[9] = fui(vp->max_gbx); - dw[10] = fui(vp->min_gby); - dw[11] = fui(vp->max_gby); - dw[12] = 0; - dw[13] = 0; - dw[14] = 0; - dw[15] = 0; - - dw += 16; - } - - return state_offset; -} +/** + * Commands that GEN7 GPE could emit. + */ +enum ilo_gpe_gen7_command { + ILO_GPE_GEN7_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */ + ILO_GPE_GEN7_STATE_SIP, /* (0x0, 0x1, 0x02) */ + ILO_GPE_GEN7_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */ + ILO_GPE_GEN7_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */ + ILO_GPE_GEN7_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */ + ILO_GPE_GEN7_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */ + ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */ + ILO_GPE_GEN7_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */ + ILO_GPE_GEN7_GPGPU_WALKER, /* (0x2, 0x1, 0x05) */ + ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x0, 0x04) */ + ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x0, 0x05) */ + ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x0, 0x06) */ + ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x0, 0x07) */ + ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */ + ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */ + ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */ + ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */ + ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */ + ILO_GPE_GEN7_3DSTATE_VS, /* (0x3, 0x0, 0x10) */ + ILO_GPE_GEN7_3DSTATE_GS, /* (0x3, 0x0, 0x11) */ + ILO_GPE_GEN7_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */ + ILO_GPE_GEN7_3DSTATE_SF, /* (0x3, 0x0, 0x13) */ + ILO_GPE_GEN7_3DSTATE_WM, /* (0x3, 0x0, 0x14) */ + ILO_GPE_GEN7_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */ + ILO_GPE_GEN7_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */ + ILO_GPE_GEN7_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */ + ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */ + ILO_GPE_GEN7_3DSTATE_CONSTANT_HS, /* (0x3, 0x0, 0x19) */ + ILO_GPE_GEN7_3DSTATE_CONSTANT_DS, /* (0x3, 0x0, 0x1a) */ + ILO_GPE_GEN7_3DSTATE_HS, /* (0x3, 0x0, 0x1b) */ + ILO_GPE_GEN7_3DSTATE_TE, /* (0x3, 0x0, 0x1c) */ + ILO_GPE_GEN7_3DSTATE_DS, /* (0x3, 0x0, 0x1d) */ + ILO_GPE_GEN7_3DSTATE_STREAMOUT, /* (0x3, 0x0, 0x1e) */ + ILO_GPE_GEN7_3DSTATE_SBE, /* (0x3, 0x0, 0x1f) */ + ILO_GPE_GEN7_3DSTATE_PS, /* (0x3, 0x0, 0x20) */ + ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, /* (0x3, 0x0, 0x21) */ + ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC, /* (0x3, 0x0, 0x23) */ + ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS, /* (0x3, 0x0, 0x24) */ + ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, /* (0x3, 0x0, 0x25) */ + ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, /* (0x3, 0x0, 0x26) */ + ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS, /* (0x3, 0x0, 0x27) */ + ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS, /* (0x3, 0x0, 0x28) */ + ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS, /* (0x3, 0x0, 0x29) */ + ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS, /* (0x3, 0x0, 0x2a) */ + ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, /* (0x3, 0x0, 0x2b) */ + ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS, /* (0x3, 0x0, 0x2c) */ + ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS, /* (0x3, 0x0, 0x2d) */ + ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS, /* (0x3, 0x0, 0x2e) */ + ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS, /* (0x3, 0x0, 0x2f) */ + ILO_GPE_GEN7_3DSTATE_URB_VS, /* (0x3, 0x0, 0x30) */ + ILO_GPE_GEN7_3DSTATE_URB_HS, /* (0x3, 0x0, 0x31) */ + ILO_GPE_GEN7_3DSTATE_URB_DS, /* (0x3, 0x0, 0x32) */ + ILO_GPE_GEN7_3DSTATE_URB_GS, /* (0x3, 0x0, 0x33) */ + ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */ + ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */ + ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */ + ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */ + ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */ + ILO_GPE_GEN7_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */ + ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, /* (0x3, 0x1, 0x12) */ + ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS, /* (0x3, 0x1, 0x13) */ + ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS, /* (0x3, 0x1, 0x14) */ + ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, /* (0x3, 0x1, 0x15) */ + ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, /* (0x3, 0x1, 0x16) */ + ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST, /* (0x3, 0x1, 0x17) */ + ILO_GPE_GEN7_3DSTATE_SO_BUFFER, /* (0x3, 0x1, 0x18) */ + ILO_GPE_GEN7_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */ + ILO_GPE_GEN7_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */ + + ILO_GPE_GEN7_COMMAND_COUNT, +}; + +/** + * Indirect states that GEN7 GPE could emit. + */ +enum ilo_gpe_gen7_state { + ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA, + ILO_GPE_GEN7_SF_CLIP_VIEWPORT, + ILO_GPE_GEN7_CC_VIEWPORT, + ILO_GPE_GEN7_COLOR_CALC_STATE, + ILO_GPE_GEN7_BLEND_STATE, + ILO_GPE_GEN7_DEPTH_STENCIL_STATE, + ILO_GPE_GEN7_SCISSOR_RECT, + ILO_GPE_GEN7_BINDING_TABLE_STATE, + ILO_GPE_GEN7_SURFACE_STATE, + ILO_GPE_GEN7_SAMPLER_STATE, + ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE, + ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER, + + ILO_GPE_GEN7_STATE_COUNT, +}; + +typedef ilo_gpe_gen6_STATE_BASE_ADDRESS ilo_gpe_gen7_STATE_BASE_ADDRESS; +typedef ilo_gpe_gen6_STATE_SIP ilo_gpe_gen7_STATE_SIP; +typedef ilo_gpe_gen6_3DSTATE_VF_STATISTICS ilo_gpe_gen7_3DSTATE_VF_STATISTICS; +typedef ilo_gpe_gen6_PIPELINE_SELECT ilo_gpe_gen7_PIPELINE_SELECT; +typedef ilo_gpe_gen6_MEDIA_VFE_STATE ilo_gpe_gen7_MEDIA_VFE_STATE; +typedef ilo_gpe_gen6_MEDIA_CURBE_LOAD ilo_gpe_gen7_MEDIA_CURBE_LOAD; +typedef ilo_gpe_gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD ilo_gpe_gen7_MEDIA_INTERFACE_DESCRIPTOR_LOAD; +typedef ilo_gpe_gen6_MEDIA_STATE_FLUSH ilo_gpe_gen7_MEDIA_STATE_FLUSH; + +typedef void +(*ilo_gpe_gen7_GPGPU_WALKER)(const struct ilo_dev_info *dev, + struct ilo_cp *cp); + +typedef ilo_gpe_gen6_3DSTATE_CLEAR_PARAMS ilo_gpe_gen7_3DSTATE_CLEAR_PARAMS; +typedef ilo_gpe_gen6_3DSTATE_DEPTH_BUFFER ilo_gpe_gen7_3DSTATE_DEPTH_BUFFER; +typedef ilo_gpe_gen6_3DSTATE_STENCIL_BUFFER ilo_gpe_gen7_3DSTATE_STENCIL_BUFFER; +typedef ilo_gpe_gen6_3DSTATE_HIER_DEPTH_BUFFER ilo_gpe_gen7_3DSTATE_HIER_DEPTH_BUFFER; +typedef ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS ilo_gpe_gen7_3DSTATE_VERTEX_BUFFERS; +typedef ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS ilo_gpe_gen7_3DSTATE_VERTEX_ELEMENTS; +typedef ilo_gpe_gen6_3DSTATE_INDEX_BUFFER ilo_gpe_gen7_3DSTATE_INDEX_BUFFER; + +typedef void +(*ilo_gpe_gen7_3DSTATE_CC_STATE_POINTERS)(const struct ilo_dev_info *dev, + uint32_t color_calc_state, + struct ilo_cp *cp); + +typedef ilo_gpe_gen6_3DSTATE_SCISSOR_STATE_POINTERS ilo_gpe_gen7_3DSTATE_SCISSOR_STATE_POINTERS; +typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS; + +typedef void +(*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + int num_samplers, + struct ilo_cp *cp); + +typedef ilo_gpe_gen6_3DSTATE_CLIP ilo_gpe_gen7_3DSTATE_CLIP; + +typedef void +(*ilo_gpe_gen7_3DSTATE_SF)(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct pipe_surface *zs_surf, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_WM)(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + const struct ilo_rasterizer_state *rasterizer, + bool cc_may_kill, + struct ilo_cp *cp); + +typedef ilo_gpe_gen6_3DSTATE_CONSTANT_VS ilo_gpe_gen7_3DSTATE_CONSTANT_VS; +typedef ilo_gpe_gen6_3DSTATE_CONSTANT_GS ilo_gpe_gen7_3DSTATE_CONSTANT_GS; +typedef ilo_gpe_gen6_3DSTATE_CONSTANT_PS ilo_gpe_gen7_3DSTATE_CONSTANT_PS; + +typedef void +(*ilo_gpe_gen7_3DSTATE_SAMPLE_MASK)(const struct ilo_dev_info *dev, + unsigned sample_mask, + int num_samples, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_CONSTANT_HS)(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_CONSTANT_DS)(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_HS)(const struct ilo_dev_info *dev, + const struct ilo_shader_state *hs, + int num_samplers, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_TE)(const struct ilo_dev_info *dev, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_DS)(const struct ilo_dev_info *dev, + const struct ilo_shader_state *ds, + int num_samplers, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_STREAMOUT)(const struct ilo_dev_info *dev, + unsigned buffer_mask, + int vertex_attrib_count, + bool rasterizer_discard, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_SBE)(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + const struct ilo_shader_state *last_sh, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_PS)(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + int num_samplers, bool dual_blend, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP)(const struct ilo_dev_info *dev, + uint32_t viewport, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC)(const struct ilo_dev_info *dev, + uint32_t viewport, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_BLEND_STATE_POINTERS)(const struct ilo_dev_info *dev, + uint32_t blend, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS)(const struct ilo_dev_info *dev, + uint32_t depth_stencil, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_VS)(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_HS)(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_DS)(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_GS)(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_PS)(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS)(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_HS)(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_DS)(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS)(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS)(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_URB_VS)(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_URB_HS)(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_URB_DS)(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_URB_GS)(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp); + +typedef ilo_gpe_gen6_3DSTATE_DRAWING_RECTANGLE ilo_gpe_gen7_3DSTATE_DRAWING_RECTANGLE; +typedef ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_OFFSET ilo_gpe_gen7_3DSTATE_POLY_STIPPLE_OFFSET; +typedef ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_PATTERN ilo_gpe_gen7_3DSTATE_POLY_STIPPLE_PATTERN; +typedef ilo_gpe_gen6_3DSTATE_LINE_STIPPLE ilo_gpe_gen7_3DSTATE_LINE_STIPPLE; +typedef ilo_gpe_gen6_3DSTATE_AA_LINE_PARAMETERS ilo_gpe_gen7_3DSTATE_AA_LINE_PARAMETERS; +typedef ilo_gpe_gen6_3DSTATE_MULTISAMPLE ilo_gpe_gen7_3DSTATE_MULTISAMPLE; + +typedef void +(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS)(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS)(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS)(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS)(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS)(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_SO_DECL_LIST)(const struct ilo_dev_info *dev, + const struct pipe_stream_output_info *so_info, + struct ilo_cp *cp); + +typedef void +(*ilo_gpe_gen7_3DSTATE_SO_BUFFER)(const struct ilo_dev_info *dev, + int index, int base, int stride, + const struct pipe_stream_output_target *so_target, + struct ilo_cp *cp); + +typedef ilo_gpe_gen6_PIPE_CONTROL ilo_gpe_gen7_PIPE_CONTROL; +typedef ilo_gpe_gen6_3DPRIMITIVE ilo_gpe_gen7_3DPRIMITIVE; +typedef ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA ilo_gpe_gen7_INTERFACE_DESCRIPTOR_DATA; + +typedef uint32_t +(*ilo_gpe_gen7_SF_CLIP_VIEWPORT)(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp); + +typedef ilo_gpe_gen6_CC_VIEWPORT ilo_gpe_gen7_CC_VIEWPORT; +typedef ilo_gpe_gen6_COLOR_CALC_STATE ilo_gpe_gen7_COLOR_CALC_STATE; +typedef ilo_gpe_gen6_BLEND_STATE ilo_gpe_gen7_BLEND_STATE; +typedef ilo_gpe_gen6_DEPTH_STENCIL_STATE ilo_gpe_gen7_DEPTH_STENCIL_STATE; +typedef ilo_gpe_gen6_SCISSOR_RECT ilo_gpe_gen7_SCISSOR_RECT; +typedef ilo_gpe_gen6_BINDING_TABLE_STATE ilo_gpe_gen7_BINDING_TABLE_STATE; +typedef ilo_gpe_gen6_SURFACE_STATE ilo_gpe_gen7_SURFACE_STATE; +typedef ilo_gpe_gen6_SAMPLER_STATE ilo_gpe_gen7_SAMPLER_STATE; +typedef ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE ilo_gpe_gen7_SAMPLER_BORDER_COLOR_STATE; +typedef ilo_gpe_gen6_push_constant_buffer ilo_gpe_gen7_push_constant_buffer; + +/** + * GEN7 graphics processing engine + * + * \see ilo_gpe_gen6 + */ +struct ilo_gpe_gen7 { + int (*estimate_command_size)(const struct ilo_dev_info *dev, + enum ilo_gpe_gen7_command cmd, + int arg); + + int (*estimate_state_size)(const struct ilo_dev_info *dev, + enum ilo_gpe_gen7_state state, + int arg); + +#define GEN7_EMIT(name) ilo_gpe_gen7_ ## name emit_ ## name + GEN7_EMIT(STATE_BASE_ADDRESS); + GEN7_EMIT(STATE_SIP); + GEN7_EMIT(3DSTATE_VF_STATISTICS); + GEN7_EMIT(PIPELINE_SELECT); + GEN7_EMIT(MEDIA_VFE_STATE); + GEN7_EMIT(MEDIA_CURBE_LOAD); + GEN7_EMIT(MEDIA_INTERFACE_DESCRIPTOR_LOAD); + GEN7_EMIT(MEDIA_STATE_FLUSH); + GEN7_EMIT(GPGPU_WALKER); + GEN7_EMIT(3DSTATE_CLEAR_PARAMS); + GEN7_EMIT(3DSTATE_DEPTH_BUFFER); + GEN7_EMIT(3DSTATE_STENCIL_BUFFER); + GEN7_EMIT(3DSTATE_HIER_DEPTH_BUFFER); + GEN7_EMIT(3DSTATE_VERTEX_BUFFERS); + GEN7_EMIT(3DSTATE_VERTEX_ELEMENTS); + GEN7_EMIT(3DSTATE_INDEX_BUFFER); + GEN7_EMIT(3DSTATE_CC_STATE_POINTERS); + GEN7_EMIT(3DSTATE_SCISSOR_STATE_POINTERS); + GEN7_EMIT(3DSTATE_VS); + GEN7_EMIT(3DSTATE_GS); + GEN7_EMIT(3DSTATE_CLIP); + GEN7_EMIT(3DSTATE_SF); + GEN7_EMIT(3DSTATE_WM); + GEN7_EMIT(3DSTATE_CONSTANT_VS); + GEN7_EMIT(3DSTATE_CONSTANT_GS); + GEN7_EMIT(3DSTATE_CONSTANT_PS); + GEN7_EMIT(3DSTATE_SAMPLE_MASK); + GEN7_EMIT(3DSTATE_CONSTANT_HS); + GEN7_EMIT(3DSTATE_CONSTANT_DS); + GEN7_EMIT(3DSTATE_HS); + GEN7_EMIT(3DSTATE_TE); + GEN7_EMIT(3DSTATE_DS); + GEN7_EMIT(3DSTATE_STREAMOUT); + GEN7_EMIT(3DSTATE_SBE); + GEN7_EMIT(3DSTATE_PS); + GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); + GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_CC); + GEN7_EMIT(3DSTATE_BLEND_STATE_POINTERS); + GEN7_EMIT(3DSTATE_DEPTH_STENCIL_STATE_POINTERS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_VS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_HS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_DS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_GS); + GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_PS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_VS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_HS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_DS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_GS); + GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_PS); + GEN7_EMIT(3DSTATE_URB_VS); + GEN7_EMIT(3DSTATE_URB_HS); + GEN7_EMIT(3DSTATE_URB_DS); + GEN7_EMIT(3DSTATE_URB_GS); + GEN7_EMIT(3DSTATE_DRAWING_RECTANGLE); + GEN7_EMIT(3DSTATE_POLY_STIPPLE_OFFSET); + GEN7_EMIT(3DSTATE_POLY_STIPPLE_PATTERN); + GEN7_EMIT(3DSTATE_LINE_STIPPLE); + GEN7_EMIT(3DSTATE_AA_LINE_PARAMETERS); + GEN7_EMIT(3DSTATE_MULTISAMPLE); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_VS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_HS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_DS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_GS); + GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_PS); + GEN7_EMIT(3DSTATE_SO_DECL_LIST); + GEN7_EMIT(3DSTATE_SO_BUFFER); + GEN7_EMIT(PIPE_CONTROL); + GEN7_EMIT(3DPRIMITIVE); + GEN7_EMIT(INTERFACE_DESCRIPTOR_DATA); + GEN7_EMIT(SF_CLIP_VIEWPORT); + GEN7_EMIT(CC_VIEWPORT); + GEN7_EMIT(COLOR_CALC_STATE); + GEN7_EMIT(BLEND_STATE); + GEN7_EMIT(DEPTH_STENCIL_STATE); + GEN7_EMIT(SCISSOR_RECT); + GEN7_EMIT(BINDING_TABLE_STATE); + GEN7_EMIT(SURFACE_STATE); + GEN7_EMIT(SAMPLER_STATE); + GEN7_EMIT(SAMPLER_BORDER_COLOR_STATE); + GEN7_EMIT(push_constant_buffer); +#undef GEN7_EMIT +}; + +const struct ilo_gpe_gen7 * +ilo_gpe_gen7_get(void); #endif /* ILO_GPE_GEN7_H */ diff --git a/dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h b/dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h index 04c8854d2..fedd78c41 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h +++ b/dist/Mesa/src/gallium/drivers/ilo/include/brw_defines.h @@ -1,6 +1,6 @@ /* Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to develop this 3D driver. Permission is hereby granted, free of charge, to any person obtaining @@ -26,7 +26,7 @@ **********************************************************************/ /* * Authors: - * Keith Whitwell <keithw@vmware.com> + * Keith Whitwell <keith@tungstengraphics.com> */ #define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low)) @@ -1723,4 +1723,6 @@ enum brw_wm_barycentric_interp_mode { */ #define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27) +#include "intel_chipset.h" + #endif diff --git a/dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h b/dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h index 07f8d3aca..c322edfbd 100644 --- a/dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h +++ b/dist/Mesa/src/gallium/drivers/ilo/include/brw_structs.h @@ -1,6 +1,6 @@ /* Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to develop this 3D driver. Permission is hereby granted, free of charge, to any person obtaining @@ -26,7 +26,7 @@ **********************************************************************/ /* * Authors: - * Keith Whitwell <keithw@vmware.com> + * Keith Whitwell <keith@tungstengraphics.com> */ |