Import Mesa 11.2.2

author: Jonathan Gray <jsg@cvs.openbsd.org> 2016-05-29 10:22:51 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2016-05-29 10:22:51 +0000
commit: c9223eed3c16cd3e98a8f56dda953d8f299de0e3 (patch)
tree: 53e2a1c3f13bcf6b4ed201d7bc135e7213c94ebe /lib/mesa/src/gallium/drivers/svga
parent: 6e8f2d062ab9c198239b9283b2b7ed12f4ea17d8 (diff)
87 files changed, 23892 insertions, 3718 deletions
diff --git a/lib/mesa/src/gallium/drivers/svga/Makefile.sources b/lib/mesa/src/gallium/drivers/svga/Makefile.sources
index 276e6a8e2..5c022f437 100644
--- a/lib/mesa/src/gallium/drivers/svga/Makefile.sources
+++ b/lib/mesa/src/gallium/drivers/svga/Makefile.sources
@@ -1,6 +1,7 @@
 C_SOURCES := \
 	svga_cmd.c \
 	svga_cmd.h \
+	svga_cmd_vgpu10.c \
 	svga_context.c \
 	svga_context.h \
 	svga_debug.h \
@@ -12,6 +13,8 @@ C_SOURCES := \
 	svga_format.c \
 	svga_format.h \
 	svga_hw_reg.h \
+	svga_link.c \
+	svga_link.h \
 	svga_pipe_blend.c \
 	svga_pipe_blit.c \
 	svga_pipe_clear.c \
@@ -20,10 +23,12 @@ C_SOURCES := \
 	svga_pipe_draw.c \
 	svga_pipe_flush.c \
 	svga_pipe_fs.c \
+	svga_pipe_gs.c \
 	svga_pipe_misc.c \
 	svga_pipe_query.c \
 	svga_pipe_rasterizer.c \
 	svga_pipe_sampler.c \
+	svga_pipe_streamout.c \
 	svga_pipe_vertex.c \
 	svga_pipe_vs.c \
 	svga_public.h \
@@ -44,15 +49,19 @@ C_SOURCES := \
 	svga_shader.c \
 	svga_shader.h \
 	svga_state.c \
+	svga_state.h \
 	svga_state_constants.c \
 	svga_state_framebuffer.c \
 	svga_state_fs.c \
-	svga_state.h \
+	svga_state_gs.c \
 	svga_state_need_swtnl.c \
 	svga_state_rss.c \
+	svga_state_sampler.c \
+	svga_state_tgsi_transform.c \
 	svga_state_tss.c \
 	svga_state_vdecl.c \
 	svga_state_vs.c \
+	svga_streamout.h \
 	svga_surface.c \
 	svga_surface.h \
 	svga_swtnl_backend.c \
@@ -65,6 +74,7 @@ C_SOURCES := \
 	svga_tgsi_emit.h \
 	svga_tgsi.h \
 	svga_tgsi_insn.c \
+	svga_tgsi_vgpu10.c \
 	svga_winsys.h \
 	\
 	svgadump/svga_dump.c \
@@ -80,6 +90,7 @@ SVGA_H_FILES := \
 	include/svga3d_caps.h \
 	include/svga3d_cmd.h \
 	include/svga3d_devcaps.h \
+	include/svga3d_dx.h \
 	include/svga3d_limits.h \
 	include/svga3d_reg.h \
 	include/svga3d_shaderdefs.h \
@@ -89,5 +100,6 @@ SVGA_H_FILES := \
 	include/svga_overlay.h \
 	include/svga_reg.h \
 	include/svga_types.h \
+	include/VGPU10ShaderTokens.h \
 	include/vmware_pack_begin.h \
 	include/vmware_pack_end.h
diff --git a/lib/mesa/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h b/lib/mesa/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
new file mode 100644
index 000000000..444023589
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
@@ -0,0 +1,489 @@
+/**********************************************************
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * VGPU10ShaderTokens.h --
+ *
+ *    VGPU10 shader token definitions.
+ *
+ */
+
+#ifndef VGPU10SHADERTOKENS_H
+#define VGPU10SHADERTOKENS_H
+
+/* Shader limits */
+#define VGPU10_MAX_VS_INPUTS 16
+#define VGPU10_MAX_VS_OUTPUTS 16
+#define VGPU10_MAX_GS_INPUTS 16
+#define VGPU10_MAX_GS_OUTPUTS 32
+#define VGPU10_MAX_FS_INPUTS 32
+#define VGPU10_MAX_FS_OUTPUTS 8
+#define VGPU10_MAX_TEMPS 4096
+#define VGPU10_MAX_CONSTANT_BUFFERS 14
+#define VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT 4096
+#define VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT 4096
+#define VGPU10_MAX_SAMPLERS 16
+#define VGPU10_MAX_RESOURCES 128
+#define VGPU10_MIN_TEXEL_FETCH_OFFSET -8
+#define VGPU10_MAX_TEXEL_FETCH_OFFSET 7
+
+typedef enum {
+   VGPU10_PIXEL_SHADER = 0,
+   VGPU10_VERTEX_SHADER = 1,
+   VGPU10_GEOMETRY_SHADER = 2
+} VGPU10_PROGRAM_TYPE;
+
+typedef union {
+   struct {
+      unsigned int minorVersion  : 4;
+      unsigned int majorVersion  : 4;
+      unsigned int               : 8;
+      unsigned int programType   : 16; /* VGPU10_PROGRAM_TYPE */
+   };
+   uint32 value;
+} VGPU10ProgramToken;
+
+
+typedef enum {
+   VGPU10_OPCODE_ADD                               = 0,
+   VGPU10_OPCODE_AND                               = 1,
+   VGPU10_OPCODE_BREAK                             = 2,
+   VGPU10_OPCODE_BREAKC                            = 3,
+   VGPU10_OPCODE_CALL                              = 4,
+   VGPU10_OPCODE_CALLC                             = 5,
+   VGPU10_OPCODE_CASE                              = 6,
+   VGPU10_OPCODE_CONTINUE                          = 7,
+   VGPU10_OPCODE_CONTINUEC                         = 8,
+   VGPU10_OPCODE_CUT                               = 9,
+   VGPU10_OPCODE_DEFAULT                           = 10,
+   VGPU10_OPCODE_DERIV_RTX                         = 11,
+   VGPU10_OPCODE_DERIV_RTY                         = 12,
+   VGPU10_OPCODE_DISCARD                           = 13,
+   VGPU10_OPCODE_DIV                               = 14,
+   VGPU10_OPCODE_DP2                               = 15,
+   VGPU10_OPCODE_DP3                               = 16,
+   VGPU10_OPCODE_DP4                               = 17,
+   VGPU10_OPCODE_ELSE                              = 18,
+   VGPU10_OPCODE_EMIT                              = 19,
+   VGPU10_OPCODE_EMITTHENCUT                       = 20,
+   VGPU10_OPCODE_ENDIF                             = 21,
+   VGPU10_OPCODE_ENDLOOP                           = 22,
+   VGPU10_OPCODE_ENDSWITCH                         = 23,
+   VGPU10_OPCODE_EQ                                = 24,
+   VGPU10_OPCODE_EXP                               = 25,
+   VGPU10_OPCODE_FRC                               = 26,
+   VGPU10_OPCODE_FTOI                              = 27,
+   VGPU10_OPCODE_FTOU                              = 28,
+   VGPU10_OPCODE_GE                                = 29,
+   VGPU10_OPCODE_IADD                              = 30,
+   VGPU10_OPCODE_IF                                = 31,
+   VGPU10_OPCODE_IEQ                               = 32,
+   VGPU10_OPCODE_IGE                               = 33,
+   VGPU10_OPCODE_ILT                               = 34,
+   VGPU10_OPCODE_IMAD                              = 35,
+   VGPU10_OPCODE_IMAX                              = 36,
+   VGPU10_OPCODE_IMIN                              = 37,
+   VGPU10_OPCODE_IMUL                              = 38,
+   VGPU10_OPCODE_INE                               = 39,
+   VGPU10_OPCODE_INEG                              = 40,
+   VGPU10_OPCODE_ISHL                              = 41,
+   VGPU10_OPCODE_ISHR                              = 42,
+   VGPU10_OPCODE_ITOF                              = 43,
+   VGPU10_OPCODE_LABEL                             = 44,
+   VGPU10_OPCODE_LD                                = 45,
+   VGPU10_OPCODE_LD_MS                             = 46,
+   VGPU10_OPCODE_LOG                               = 47,
+   VGPU10_OPCODE_LOOP                              = 48,
+   VGPU10_OPCODE_LT                                = 49,
+   VGPU10_OPCODE_MAD                               = 50,
+   VGPU10_OPCODE_MIN                               = 51,
+   VGPU10_OPCODE_MAX                               = 52,
+   VGPU10_OPCODE_CUSTOMDATA                        = 53,
+   VGPU10_OPCODE_MOV                               = 54,
+   VGPU10_OPCODE_MOVC                              = 55,
+   VGPU10_OPCODE_MUL                               = 56,
+   VGPU10_OPCODE_NE                                = 57,
+   VGPU10_OPCODE_NOP                               = 58,
+   VGPU10_OPCODE_NOT                               = 59,
+   VGPU10_OPCODE_OR                                = 60,
+   VGPU10_OPCODE_RESINFO                           = 61,
+   VGPU10_OPCODE_RET                               = 62,
+   VGPU10_OPCODE_RETC                              = 63,
+   VGPU10_OPCODE_ROUND_NE                          = 64,
+   VGPU10_OPCODE_ROUND_NI                          = 65,
+   VGPU10_OPCODE_ROUND_PI                          = 66,
+   VGPU10_OPCODE_ROUND_Z                           = 67,
+   VGPU10_OPCODE_RSQ                               = 68,
+   VGPU10_OPCODE_SAMPLE                            = 69,
+   VGPU10_OPCODE_SAMPLE_C                          = 70,
+   VGPU10_OPCODE_SAMPLE_C_LZ                       = 71,
+   VGPU10_OPCODE_SAMPLE_L                          = 72,
+   VGPU10_OPCODE_SAMPLE_D                          = 73,
+   VGPU10_OPCODE_SAMPLE_B                          = 74,
+   VGPU10_OPCODE_SQRT                              = 75,
+   VGPU10_OPCODE_SWITCH                            = 76,
+   VGPU10_OPCODE_SINCOS                            = 77,
+   VGPU10_OPCODE_UDIV                              = 78,
+   VGPU10_OPCODE_ULT                               = 79,
+   VGPU10_OPCODE_UGE                               = 80,
+   VGPU10_OPCODE_UMUL                              = 81,
+   VGPU10_OPCODE_UMAD                              = 82,
+   VGPU10_OPCODE_UMAX                              = 83,
+   VGPU10_OPCODE_UMIN                              = 84,
+   VGPU10_OPCODE_USHR                              = 85,
+   VGPU10_OPCODE_UTOF                              = 86,
+   VGPU10_OPCODE_XOR                               = 87,
+   VGPU10_OPCODE_DCL_RESOURCE                      = 88,
+   VGPU10_OPCODE_DCL_CONSTANT_BUFFER               = 89,
+   VGPU10_OPCODE_DCL_SAMPLER                       = 90,
+   VGPU10_OPCODE_DCL_INDEX_RANGE                   = 91,
+   VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY  = 92,
+   VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE            = 93,
+   VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT       = 94,
+   VGPU10_OPCODE_DCL_INPUT                         = 95,
+   VGPU10_OPCODE_DCL_INPUT_SGV                     = 96,
+   VGPU10_OPCODE_DCL_INPUT_SIV                     = 97,
+   VGPU10_OPCODE_DCL_INPUT_PS                      = 98,
+   VGPU10_OPCODE_DCL_INPUT_PS_SGV                  = 99,
+   VGPU10_OPCODE_DCL_INPUT_PS_SIV                  = 100,
+   VGPU10_OPCODE_DCL_OUTPUT                        = 101,
+   VGPU10_OPCODE_DCL_OUTPUT_SGV                    = 102,
+   VGPU10_OPCODE_DCL_OUTPUT_SIV                    = 103,
+   VGPU10_OPCODE_DCL_TEMPS                         = 104,
+   VGPU10_OPCODE_DCL_INDEXABLE_TEMP                = 105,
+   VGPU10_OPCODE_DCL_GLOBAL_FLAGS                  = 106,
+   VGPU10_OPCODE_IDIV                              = 107,
+   VGPU10_NUM_OPCODES                  /* Should be the last entry. */
+} VGPU10_OPCODE_TYPE;
+
+typedef enum {
+   VGPU10_INTERPOLATION_UNDEFINED = 0,
+   VGPU10_INTERPOLATION_CONSTANT = 1,
+   VGPU10_INTERPOLATION_LINEAR = 2,
+   VGPU10_INTERPOLATION_LINEAR_CENTROID = 3,
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
+   VGPU10_INTERPOLATION_LINEAR_SAMPLE = 6,                  /* DX10.1 */
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7     /* DX10.1 */
+} VGPU10_INTERPOLATION_MODE;
+
+typedef enum {
+   VGPU10_RESOURCE_DIMENSION_UNKNOWN = 0,
+   VGPU10_RESOURCE_DIMENSION_BUFFER = 1,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE1D = 2,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2D = 3,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS = 4,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE3D = 5,
+   VGPU10_RESOURCE_DIMENSION_TEXTURECUBE = 6,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
+   VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10
+} VGPU10_RESOURCE_DIMENSION;
+
+typedef enum {
+   VGPU10_SAMPLER_MODE_DEFAULT = 0,
+   VGPU10_SAMPLER_MODE_COMPARISON = 1,
+   VGPU10_SAMPLER_MODE_MONO = 2
+} VGPU10_SAMPLER_MODE;
+
+typedef enum {
+   VGPU10_INSTRUCTION_TEST_ZERO     = 0,
+   VGPU10_INSTRUCTION_TEST_NONZERO  = 1
+} VGPU10_INSTRUCTION_TEST_BOOLEAN;
+
+typedef enum {
+   VGPU10_CB_IMMEDIATE_INDEXED   = 0,
+   VGPU10_CB_DYNAMIC_INDEXED     = 1
+} VGPU10_CB_ACCESS_PATTERN;
+
+typedef enum {
+   VGPU10_PRIMITIVE_UNDEFINED    = 0,
+   VGPU10_PRIMITIVE_POINT        = 1,
+   VGPU10_PRIMITIVE_LINE         = 2,
+   VGPU10_PRIMITIVE_TRIANGLE     = 3,
+   VGPU10_PRIMITIVE_LINE_ADJ     = 6,
+   VGPU10_PRIMITIVE_TRIANGLE_ADJ = 7
+} VGPU10_PRIMITIVE;
+
+typedef enum {
+   VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED          = 0,
+   VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST          = 1,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINELIST           = 2,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP          = 3,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST       = 4,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP      = 5,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ       = 10,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ      = 11,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ   = 12,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ  = 13
+} VGPU10_PRIMITIVE_TOPOLOGY;
+
+typedef enum {
+   VGPU10_CUSTOMDATA_COMMENT                       = 0,
+   VGPU10_CUSTOMDATA_DEBUGINFO                     = 1,
+   VGPU10_CUSTOMDATA_OPAQUE                        = 2,
+   VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER = 3
+} VGPU10_CUSTOMDATA_CLASS;
+
+typedef enum {
+   VGPU10_RESINFO_RETURN_FLOAT      = 0,
+   VGPU10_RESINFO_RETURN_RCPFLOAT   = 1,
+   VGPU10_RESINFO_RETURN_UINT       = 2
+} VGPU10_RESINFO_RETURN_TYPE;
+
+typedef union {
+   struct {
+      unsigned int opcodeType          : 11; /* VGPU10_OPCODE_TYPE */
+      unsigned int interpolationMode   : 4;  /* VGPU10_INTERPOLATION_MODE */
+      unsigned int                     : 3;
+      unsigned int testBoolean         : 1;  /* VGPU10_INSTRUCTION_TEST_BOOLEAN */
+      unsigned int                     : 5;
+      unsigned int instructionLength   : 7;
+      unsigned int extended            : 1;
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int resourceDimension   : 5;  /* VGPU10_RESOURCE_DIMENSION */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int samplerMode         : 4;  /* VGPU10_SAMPLER_MODE */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int accessPattern       : 1;  /* VGPU10_CB_ACCESS_PATTERN */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int primitive           : 6;  /* VGPU10_PRIMITIVE */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int primitiveTopology   : 6;  /* VGPU10_PRIMITIVE_TOPOLOGY */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int customDataClass     : 21; /* VGPU10_CUSTOMDATA_CLASS */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int resinfoReturnType   : 2;  /* VGPU10_RESINFO_RETURN_TYPE */
+      unsigned int saturate            : 1;
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int refactoringAllowed  : 1;
+   };
+   uint32 value;
+} VGPU10OpcodeToken0;
+
+
+typedef enum {
+   VGPU10_EXTENDED_OPCODE_EMPTY = 0,
+   VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
+} VGPU10_EXTENDED_OPCODE_TYPE;
+
+typedef union {
+   struct {
+      unsigned int opcodeType : 6;  /* VGPU10_EXTENDED_OPCODE_TYPE */
+      unsigned int            : 3;
+      unsigned int offsetU    : 4;  /* Two's complement. */
+      unsigned int offsetV    : 4;  /* Two's complement. */
+      unsigned int offsetW    : 4;  /* Two's complement. */
+      unsigned int            : 10;
+      unsigned int extended   : 1;
+   };
+   uint32 value;
+} VGPU10OpcodeToken1;
+
+
+typedef enum {
+   VGPU10_OPERAND_0_COMPONENT = 0,
+   VGPU10_OPERAND_1_COMPONENT = 1,
+   VGPU10_OPERAND_4_COMPONENT = 2,
+   VGPU10_OPERAND_N_COMPONENT = 3   /* Unused for now. */
+} VGPU10_OPERAND_NUM_COMPONENTS;
+
+typedef enum {
+   VGPU10_OPERAND_4_COMPONENT_MASK_MODE = 0,
+   VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE = 1,
+   VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE = 2
+} VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE;
+
+#define VGPU10_OPERAND_4_COMPONENT_MASK_X    0x1
+#define VGPU10_OPERAND_4_COMPONENT_MASK_Y    0x2
+#define VGPU10_OPERAND_4_COMPONENT_MASK_Z    0x4
+#define VGPU10_OPERAND_4_COMPONENT_MASK_W    0x8
+
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XY   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_Y)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XZ   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XW   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YZ   (VGPU10_OPERAND_4_COMPONENT_MASK_Y   | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YW   (VGPU10_OPERAND_4_COMPONENT_MASK_Y   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_ZW   (VGPU10_OPERAND_4_COMPONENT_MASK_Z   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZ  (VGPU10_OPERAND_4_COMPONENT_MASK_XY  | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYW  (VGPU10_OPERAND_4_COMPONENT_MASK_XY  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XZW  (VGPU10_OPERAND_4_COMPONENT_MASK_XZ  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YZW  (VGPU10_OPERAND_4_COMPONENT_MASK_YZ  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZW (VGPU10_OPERAND_4_COMPONENT_MASK_XYZ | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_ALL  VGPU10_OPERAND_4_COMPONENT_MASK_XYZW
+
+#define VGPU10_REGISTER_INDEX_FROM_SEMANTIC  0xffffffff
+
+typedef enum {
+   VGPU10_COMPONENT_X = 0,
+   VGPU10_COMPONENT_Y = 1,
+   VGPU10_COMPONENT_Z = 2,
+   VGPU10_COMPONENT_W = 3
+} VGPU10_COMPONENT_NAME;
+
+typedef enum {
+   VGPU10_OPERAND_TYPE_TEMP = 0,
+   VGPU10_OPERAND_TYPE_INPUT = 1,
+   VGPU10_OPERAND_TYPE_OUTPUT = 2,
+   VGPU10_OPERAND_TYPE_INDEXABLE_TEMP = 3,
+   VGPU10_OPERAND_TYPE_IMMEDIATE32 = 4,
+   VGPU10_OPERAND_TYPE_IMMEDIATE64 = 5,
+   VGPU10_OPERAND_TYPE_SAMPLER = 6,
+   VGPU10_OPERAND_TYPE_RESOURCE = 7,
+   VGPU10_OPERAND_TYPE_CONSTANT_BUFFER = 8,
+   VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9,
+   VGPU10_OPERAND_TYPE_LABEL = 10,
+   VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID = 11,
+   VGPU10_OPERAND_TYPE_OUTPUT_DEPTH = 12,
+   VGPU10_OPERAND_TYPE_NULL = 13,
+   VGPU10_OPERAND_TYPE_RASTERIZER = 14,            /* DX10.1 */
+   VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15   /* DX10.1 */
+} VGPU10_OPERAND_TYPE;
+
+typedef enum {
+   VGPU10_OPERAND_INDEX_0D = 0,
+   VGPU10_OPERAND_INDEX_1D = 1,
+   VGPU10_OPERAND_INDEX_2D = 2,
+   VGPU10_OPERAND_INDEX_3D = 3
+} VGPU10_OPERAND_INDEX_DIMENSION;
+
+typedef enum {
+   VGPU10_OPERAND_INDEX_IMMEDIATE32 = 0,
+   VGPU10_OPERAND_INDEX_IMMEDIATE64 = 1,
+   VGPU10_OPERAND_INDEX_RELATIVE = 2,
+   VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3,
+   VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4
+} VGPU10_OPERAND_INDEX_REPRESENTATION;
+
+typedef union {
+   struct {
+      unsigned int numComponents          : 2;  /* VGPU10_OPERAND_NUM_COMPONENTS */
+      unsigned int selectionMode          : 2;  /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
+      unsigned int mask                   : 4;  /* D3D10_SB_OPERAND_4_COMPONENT_MASK_* */
+      unsigned int                        : 4;
+      unsigned int operandType            : 8;  /* VGPU10_OPERAND_TYPE */
+      unsigned int indexDimension         : 2;  /* VGPU10_OPERAND_INDEX_DIMENSION */
+      unsigned int index0Representation   : 3;  /* VGPU10_OPERAND_INDEX_REPRESENTATION */
+      unsigned int index1Representation   : 3;  /* VGPU10_OPERAND_INDEX_REPRESENTATION */
+      unsigned int                        : 3;
+      unsigned int extended               : 1;
+   };
+   struct {
+      unsigned int                        : 4;
+      unsigned int swizzleX               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleY               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleZ               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleW               : 2;  /* VGPU10_COMPONENT_NAME */
+   };
+   struct {
+      unsigned int                        : 4;
+      unsigned int selectMask             : 2;  /* VGPU10_COMPONENT_NAME */
+   };
+   uint32 value;
+} VGPU10OperandToken0;
+
+
+typedef enum {
+   VGPU10_EXTENDED_OPERAND_EMPTY = 0,
+   VGPU10_EXTENDED_OPERAND_MODIFIER = 1
+} VGPU10_EXTENDED_OPERAND_TYPE;
+
+typedef enum {
+   VGPU10_OPERAND_MODIFIER_NONE = 0,
+   VGPU10_OPERAND_MODIFIER_NEG = 1,
+   VGPU10_OPERAND_MODIFIER_ABS = 2,
+   VGPU10_OPERAND_MODIFIER_ABSNEG = 3
+} VGPU10_OPERAND_MODIFIER;
+
+typedef union {
+   struct {
+      unsigned int extendedOperandType : 6;  /* VGPU10_EXTENDED_OPERAND_TYPE */
+      unsigned int operandModifier     : 8;  /* VGPU10_OPERAND_MODIFIER */
+      unsigned int                     : 17;
+      unsigned int extended            : 1;
+   };
+   uint32 value;
+} VGPU10OperandToken1;
+
+
+typedef enum {
+   VGPU10_RETURN_TYPE_UNORM = 1,
+   VGPU10_RETURN_TYPE_SNORM = 2,
+   VGPU10_RETURN_TYPE_SINT = 3,
+   VGPU10_RETURN_TYPE_UINT = 4,
+   VGPU10_RETURN_TYPE_FLOAT = 5,
+   VGPU10_RETURN_TYPE_MIXED = 6
+} VGPU10_RESOURCE_RETURN_TYPE;
+
+typedef union {
+   struct {
+      unsigned int component0 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component1 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component2 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component3 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+   };
+   uint32 value;
+} VGPU10ResourceReturnTypeToken;
+
+
+typedef enum {
+   VGPU10_NAME_UNDEFINED = 0,
+   VGPU10_NAME_POSITION = 1,
+   VGPU10_NAME_CLIP_DISTANCE = 2,
+   VGPU10_NAME_CULL_DISTANCE = 3,
+   VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+   VGPU10_NAME_VIEWPORT_ARRAY_INDEX = 5,
+   VGPU10_NAME_VERTEX_ID = 6,
+   VGPU10_NAME_PRIMITIVE_ID = 7,
+   VGPU10_NAME_INSTANCE_ID = 8,
+   VGPU10_NAME_IS_FRONT_FACE = 9,
+   VGPU10_NAME_SAMPLE_INDEX = 10,
+} VGPU10_SYSTEM_NAME;
+
+typedef union {
+   struct {
+      unsigned int name : 16; /* VGPU10_SYSTEM_NAME */
+   };
+   uint32 value;
+} VGPU10NameToken;
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_caps.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_caps.h
index c6c8e3667..01c8ba790 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_caps.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_caps.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -111,4 +111,4 @@ SVGA3dCapsRecord;
 typedef uint32 SVGA3dCapPair[2];
 
 
-#endif // _SVGA3D_CAPS_H_
+#endif
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_cmd.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_cmd.h
index 8953bf05f..c843417e8 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_cmd.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_cmd.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -53,130 +53,227 @@
  * and up.
  */
 
-#define SVGA_3D_CMD_LEGACY_BASE                                1000
-#define SVGA_3D_CMD_BASE                                       1040
-
-#define SVGA_3D_CMD_SURFACE_DEFINE                             1040
-#define SVGA_3D_CMD_SURFACE_DESTROY                            1041
-#define SVGA_3D_CMD_SURFACE_COPY                               1042
-#define SVGA_3D_CMD_SURFACE_STRETCHBLT                         1043
-#define SVGA_3D_CMD_SURFACE_DMA                                1044
-#define SVGA_3D_CMD_CONTEXT_DEFINE                             1045
-#define SVGA_3D_CMD_CONTEXT_DESTROY                            1046
-#define SVGA_3D_CMD_SETTRANSFORM                               1047
-#define SVGA_3D_CMD_SETZRANGE                                  1048
-#define SVGA_3D_CMD_SETRENDERSTATE                             1049
-#define SVGA_3D_CMD_SETRENDERTARGET                            1050
-#define SVGA_3D_CMD_SETTEXTURESTATE                            1051
-#define SVGA_3D_CMD_SETMATERIAL                                1052
-#define SVGA_3D_CMD_SETLIGHTDATA                               1053
-#define SVGA_3D_CMD_SETLIGHTENABLED                            1054
-#define SVGA_3D_CMD_SETVIEWPORT                                1055
-#define SVGA_3D_CMD_SETCLIPPLANE                               1056
-#define SVGA_3D_CMD_CLEAR                                      1057
-#define SVGA_3D_CMD_PRESENT                                    1058
-#define SVGA_3D_CMD_SHADER_DEFINE                              1059
-#define SVGA_3D_CMD_SHADER_DESTROY                             1060
-#define SVGA_3D_CMD_SET_SHADER                                 1061
-#define SVGA_3D_CMD_SET_SHADER_CONST                           1062
-#define SVGA_3D_CMD_DRAW_PRIMITIVES                            1063
-#define SVGA_3D_CMD_SETSCISSORRECT                             1064
-#define SVGA_3D_CMD_BEGIN_QUERY                                1065
-#define SVGA_3D_CMD_END_QUERY                                  1066
-#define SVGA_3D_CMD_WAIT_FOR_QUERY                             1067
-#define SVGA_3D_CMD_PRESENT_READBACK                           1068
-#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN                     1069
-#define SVGA_3D_CMD_SURFACE_DEFINE_V2                          1070
-#define SVGA_3D_CMD_GENERATE_MIPMAPS                           1071
-#define SVGA_3D_CMD_VIDEO_CREATE_DECODER                       1072
-#define SVGA_3D_CMD_VIDEO_DESTROY_DECODER                      1073
-#define SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR                     1074
-#define SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR                    1075
-#define SVGA_3D_CMD_VIDEO_DECODE_START_FRAME                   1076
-#define SVGA_3D_CMD_VIDEO_DECODE_RENDER                        1077
-#define SVGA_3D_CMD_VIDEO_DECODE_END_FRAME                     1078
-#define SVGA_3D_CMD_VIDEO_PROCESS_FRAME                        1079
-#define SVGA_3D_CMD_ACTIVATE_SURFACE                           1080
-#define SVGA_3D_CMD_DEACTIVATE_SURFACE                         1081
-#define SVGA_3D_CMD_SCREEN_DMA                                 1082
-#define SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE                   1083
-#define SVGA_3D_CMD_OPEN_CONTEXT_SURFACE                       1084
-
-#define SVGA_3D_CMD_LOGICOPS_BITBLT                            1085
-#define SVGA_3D_CMD_LOGICOPS_TRANSBLT                          1086
-#define SVGA_3D_CMD_LOGICOPS_STRETCHBLT                        1087
-#define SVGA_3D_CMD_LOGICOPS_COLORFILL                         1088
-#define SVGA_3D_CMD_LOGICOPS_ALPHABLEND                        1089
-#define SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND                    1090
-
-#define SVGA_3D_CMD_SET_OTABLE_BASE                            1091
-#define SVGA_3D_CMD_READBACK_OTABLE                            1092
-
-#define SVGA_3D_CMD_DEFINE_GB_MOB                              1093
-#define SVGA_3D_CMD_DESTROY_GB_MOB                             1094
-#define SVGA_3D_CMD_REDEFINE_GB_MOB                            1095
-#define SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING                      1096
-
-#define SVGA_3D_CMD_DEFINE_GB_SURFACE                          1097
-#define SVGA_3D_CMD_DESTROY_GB_SURFACE                         1098
-#define SVGA_3D_CMD_BIND_GB_SURFACE                            1099
-#define SVGA_3D_CMD_COND_BIND_GB_SURFACE                       1100
-#define SVGA_3D_CMD_UPDATE_GB_IMAGE                            1101
-#define SVGA_3D_CMD_UPDATE_GB_SURFACE                          1102
-#define SVGA_3D_CMD_READBACK_GB_IMAGE                          1103
-#define SVGA_3D_CMD_READBACK_GB_SURFACE                        1104
-#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE                        1105
-#define SVGA_3D_CMD_INVALIDATE_GB_SURFACE                      1106
-
-#define SVGA_3D_CMD_DEFINE_GB_CONTEXT                          1107
-#define SVGA_3D_CMD_DESTROY_GB_CONTEXT                         1108
-#define SVGA_3D_CMD_BIND_GB_CONTEXT                            1109
-#define SVGA_3D_CMD_READBACK_GB_CONTEXT                        1110
-#define SVGA_3D_CMD_INVALIDATE_GB_CONTEXT                      1111
-
-#define SVGA_3D_CMD_DEFINE_GB_SHADER                           1112
-#define SVGA_3D_CMD_DESTROY_GB_SHADER                          1113
-#define SVGA_3D_CMD_BIND_GB_SHADER                             1114
-
-#define SVGA_3D_CMD_BIND_SHADERCONSTS                          1115
-
-#define SVGA_3D_CMD_BEGIN_GB_QUERY                             1116
-#define SVGA_3D_CMD_END_GB_QUERY                               1117
-#define SVGA_3D_CMD_WAIT_FOR_GB_QUERY                          1118
-
-#define SVGA_3D_CMD_NOP                                        1119
-
-#define SVGA_3D_CMD_ENABLE_GART                                1120
-#define SVGA_3D_CMD_DISABLE_GART                               1121
-#define SVGA_3D_CMD_MAP_MOB_INTO_GART                          1122
-#define SVGA_3D_CMD_UNMAP_GART_RANGE                           1123
-
-#define SVGA_3D_CMD_DEFINE_GB_SCREENTARGET                     1124
-#define SVGA_3D_CMD_DESTROY_GB_SCREENTARGET                    1125
-#define SVGA_3D_CMD_BIND_GB_SCREENTARGET                       1126
-#define SVGA_3D_CMD_UPDATE_GB_SCREENTARGET                     1127
-
-#define SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL                  1128
-#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL                1129
-
-#define SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE                 1130
-
-#define SVGA_3D_CMD_GB_SCREEN_DMA                              1131
-#define SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH                 1132
-#define SVGA_3D_CMD_GB_MOB_FENCE                               1133
-#define SVGA_3D_CMD_DEFINE_GB_SURFACE_V2                       1134
-#define SVGA_3D_CMD_DEFINE_GB_MOB64                            1135
-#define SVGA_3D_CMD_REDEFINE_GB_MOB64                          1136
-#define SVGA_3D_CMD_NOP_ERROR                                  1137
-
-#define SVGA_3D_CMD_RESERVED1                                  1138
-#define SVGA_3D_CMD_RESERVED2                                  1139
-#define SVGA_3D_CMD_RESERVED3                                  1140
-#define SVGA_3D_CMD_RESERVED4                                  1141
-#define SVGA_3D_CMD_RESERVED5                                  1142
-
-#define SVGA_3D_CMD_MAX                                        1203
-#define SVGA_3D_CMD_FUTURE_MAX                                 3000
+typedef enum {
+   SVGA_3D_CMD_LEGACY_BASE                                = 1000,
+   SVGA_3D_CMD_BASE                                       = 1040,
+
+   SVGA_3D_CMD_SURFACE_DEFINE                             = 1040,
+   SVGA_3D_CMD_SURFACE_DESTROY                            = 1041,
+   SVGA_3D_CMD_SURFACE_COPY                               = 1042,
+   SVGA_3D_CMD_SURFACE_STRETCHBLT                         = 1043,
+   SVGA_3D_CMD_SURFACE_DMA                                = 1044,
+   SVGA_3D_CMD_CONTEXT_DEFINE                             = 1045,
+   SVGA_3D_CMD_CONTEXT_DESTROY                            = 1046,
+   SVGA_3D_CMD_SETTRANSFORM                               = 1047,
+   SVGA_3D_CMD_SETZRANGE                                  = 1048,
+   SVGA_3D_CMD_SETRENDERSTATE                             = 1049,
+   SVGA_3D_CMD_SETRENDERTARGET                            = 1050,
+   SVGA_3D_CMD_SETTEXTURESTATE                            = 1051,
+   SVGA_3D_CMD_SETMATERIAL                                = 1052,
+   SVGA_3D_CMD_SETLIGHTDATA                               = 1053,
+   SVGA_3D_CMD_SETLIGHTENABLED                            = 1054,
+   SVGA_3D_CMD_SETVIEWPORT                                = 1055,
+   SVGA_3D_CMD_SETCLIPPLANE                               = 1056,
+   SVGA_3D_CMD_CLEAR                                      = 1057,
+   SVGA_3D_CMD_PRESENT                                    = 1058,
+   SVGA_3D_CMD_SHADER_DEFINE                              = 1059,
+   SVGA_3D_CMD_SHADER_DESTROY                             = 1060,
+   SVGA_3D_CMD_SET_SHADER                                 = 1061,
+   SVGA_3D_CMD_SET_SHADER_CONST                           = 1062,
+   SVGA_3D_CMD_DRAW_PRIMITIVES                            = 1063,
+   SVGA_3D_CMD_SETSCISSORRECT                             = 1064,
+   SVGA_3D_CMD_BEGIN_QUERY                                = 1065,
+   SVGA_3D_CMD_END_QUERY                                  = 1066,
+   SVGA_3D_CMD_WAIT_FOR_QUERY                             = 1067,
+   SVGA_3D_CMD_PRESENT_READBACK                           = 1068,
+   SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN                     = 1069,
+   SVGA_3D_CMD_SURFACE_DEFINE_V2                          = 1070,
+   SVGA_3D_CMD_GENERATE_MIPMAPS                           = 1071,
+   SVGA_3D_CMD_VIDEO_CREATE_DECODER                       = 1072,
+   SVGA_3D_CMD_VIDEO_DESTROY_DECODER                      = 1073,
+   SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR                     = 1074,
+   SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR                    = 1075,
+   SVGA_3D_CMD_VIDEO_DECODE_START_FRAME                   = 1076,
+   SVGA_3D_CMD_VIDEO_DECODE_RENDER                        = 1077,
+   SVGA_3D_CMD_VIDEO_DECODE_END_FRAME                     = 1078,
+   SVGA_3D_CMD_VIDEO_PROCESS_FRAME                        = 1079,
+   SVGA_3D_CMD_ACTIVATE_SURFACE                           = 1080,
+   SVGA_3D_CMD_DEACTIVATE_SURFACE                         = 1081,
+   SVGA_3D_CMD_SCREEN_DMA                                 = 1082,
+   SVGA_3D_CMD_DEAD1                                      = 1083,
+   SVGA_3D_CMD_DEAD2                                      = 1084,
+
+   SVGA_3D_CMD_LOGICOPS_BITBLT                            = 1085,
+   SVGA_3D_CMD_LOGICOPS_TRANSBLT                          = 1086,
+   SVGA_3D_CMD_LOGICOPS_STRETCHBLT                        = 1087,
+   SVGA_3D_CMD_LOGICOPS_COLORFILL                         = 1088,
+   SVGA_3D_CMD_LOGICOPS_ALPHABLEND                        = 1089,
+   SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND                    = 1090,
+
+   SVGA_3D_CMD_SET_OTABLE_BASE                            = 1091,
+   SVGA_3D_CMD_READBACK_OTABLE                            = 1092,
+
+   SVGA_3D_CMD_DEFINE_GB_MOB                              = 1093,
+   SVGA_3D_CMD_DESTROY_GB_MOB                             = 1094,
+   SVGA_3D_CMD_DEAD3                                      = 1095,
+   SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING                      = 1096,
+
+   SVGA_3D_CMD_DEFINE_GB_SURFACE                          = 1097,
+   SVGA_3D_CMD_DESTROY_GB_SURFACE                         = 1098,
+   SVGA_3D_CMD_BIND_GB_SURFACE                            = 1099,
+   SVGA_3D_CMD_COND_BIND_GB_SURFACE                       = 1100,
+   SVGA_3D_CMD_UPDATE_GB_IMAGE                            = 1101,
+   SVGA_3D_CMD_UPDATE_GB_SURFACE                          = 1102,
+   SVGA_3D_CMD_READBACK_GB_IMAGE                          = 1103,
+   SVGA_3D_CMD_READBACK_GB_SURFACE                        = 1104,
+   SVGA_3D_CMD_INVALIDATE_GB_IMAGE                        = 1105,
+   SVGA_3D_CMD_INVALIDATE_GB_SURFACE                      = 1106,
+
+   SVGA_3D_CMD_DEFINE_GB_CONTEXT                          = 1107,
+   SVGA_3D_CMD_DESTROY_GB_CONTEXT                         = 1108,
+   SVGA_3D_CMD_BIND_GB_CONTEXT                            = 1109,
+   SVGA_3D_CMD_READBACK_GB_CONTEXT                        = 1110,
+   SVGA_3D_CMD_INVALIDATE_GB_CONTEXT                      = 1111,
+
+   SVGA_3D_CMD_DEFINE_GB_SHADER                           = 1112,
+   SVGA_3D_CMD_DESTROY_GB_SHADER                          = 1113,
+   SVGA_3D_CMD_BIND_GB_SHADER                             = 1114,
+
+   SVGA_3D_CMD_SET_OTABLE_BASE64                          = 1115,
+
+   SVGA_3D_CMD_BEGIN_GB_QUERY                             = 1116,
+   SVGA_3D_CMD_END_GB_QUERY                               = 1117,
+   SVGA_3D_CMD_WAIT_FOR_GB_QUERY                          = 1118,
+
+   SVGA_3D_CMD_NOP                                        = 1119,
+
+   SVGA_3D_CMD_ENABLE_GART                                = 1120,
+   SVGA_3D_CMD_DISABLE_GART                               = 1121,
+   SVGA_3D_CMD_MAP_MOB_INTO_GART                          = 1122,
+   SVGA_3D_CMD_UNMAP_GART_RANGE                           = 1123,
+
+   SVGA_3D_CMD_DEFINE_GB_SCREENTARGET                     = 1124,
+   SVGA_3D_CMD_DESTROY_GB_SCREENTARGET                    = 1125,
+   SVGA_3D_CMD_BIND_GB_SCREENTARGET                       = 1126,
+   SVGA_3D_CMD_UPDATE_GB_SCREENTARGET                     = 1127,
+
+   SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL                  = 1128,
+   SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL                = 1129,
+
+   SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE                 = 1130,
+
+   SVGA_3D_CMD_GB_SCREEN_DMA                              = 1131,
+   SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH                 = 1132,
+   SVGA_3D_CMD_GB_MOB_FENCE                               = 1133,
+   SVGA_3D_CMD_DEFINE_GB_SURFACE_V2                       = 1134,
+   SVGA_3D_CMD_DEFINE_GB_MOB64                            = 1135,
+   SVGA_3D_CMD_REDEFINE_GB_MOB64                          = 1136,
+   SVGA_3D_CMD_NOP_ERROR                                  = 1137,
+
+   SVGA_3D_CMD_SET_VERTEX_STREAMS                         = 1138,
+   SVGA_3D_CMD_SET_VERTEX_DECLS                           = 1139,
+   SVGA_3D_CMD_SET_VERTEX_DIVISORS                        = 1140,
+   SVGA_3D_CMD_DRAW                                       = 1141,
+   SVGA_3D_CMD_DRAW_INDEXED                               = 1142,
+
+   /*
+    * DX10 Commands
+    */
+   SVGA_3D_CMD_DX_MIN                                     = 1143,
+   SVGA_3D_CMD_DX_DEFINE_CONTEXT                          = 1143,
+   SVGA_3D_CMD_DX_DESTROY_CONTEXT                         = 1144,
+   SVGA_3D_CMD_DX_BIND_CONTEXT                            = 1145,
+   SVGA_3D_CMD_DX_READBACK_CONTEXT                        = 1146,
+   SVGA_3D_CMD_DX_INVALIDATE_CONTEXT                      = 1147,
+   SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER              = 1148,
+   SVGA_3D_CMD_DX_SET_SHADER_RESOURCES                    = 1149,
+   SVGA_3D_CMD_DX_SET_SHADER                              = 1150,
+   SVGA_3D_CMD_DX_SET_SAMPLERS                            = 1151,
+   SVGA_3D_CMD_DX_DRAW                                    = 1152,
+   SVGA_3D_CMD_DX_DRAW_INDEXED                            = 1153,
+   SVGA_3D_CMD_DX_DRAW_INSTANCED                          = 1154,
+   SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED                  = 1155,
+   SVGA_3D_CMD_DX_DRAW_AUTO                               = 1156,
+   SVGA_3D_CMD_DX_SET_INPUT_LAYOUT                        = 1157,
+   SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS                      = 1158,
+   SVGA_3D_CMD_DX_SET_INDEX_BUFFER                        = 1159,
+   SVGA_3D_CMD_DX_SET_TOPOLOGY                            = 1160,
+   SVGA_3D_CMD_DX_SET_RENDERTARGETS                       = 1161,
+   SVGA_3D_CMD_DX_SET_BLEND_STATE                         = 1162,
+   SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE                  = 1163,
+   SVGA_3D_CMD_DX_SET_RASTERIZER_STATE                    = 1164,
+   SVGA_3D_CMD_DX_DEFINE_QUERY                            = 1165,
+   SVGA_3D_CMD_DX_DESTROY_QUERY                           = 1166,
+   SVGA_3D_CMD_DX_BIND_QUERY                              = 1167,
+   SVGA_3D_CMD_DX_SET_QUERY_OFFSET                        = 1168,
+   SVGA_3D_CMD_DX_BEGIN_QUERY                             = 1169,
+   SVGA_3D_CMD_DX_END_QUERY                               = 1170,
+   SVGA_3D_CMD_DX_READBACK_QUERY                          = 1171,
+   SVGA_3D_CMD_DX_SET_PREDICATION                         = 1172,
+   SVGA_3D_CMD_DX_SET_SOTARGETS                           = 1173,
+   SVGA_3D_CMD_DX_SET_VIEWPORTS                           = 1174,
+   SVGA_3D_CMD_DX_SET_SCISSORRECTS                        = 1175,
+   SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW                 = 1176,
+   SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW                 = 1177,
+   SVGA_3D_CMD_DX_PRED_COPY_REGION                        = 1178,
+   SVGA_3D_CMD_DX_PRED_COPY                               = 1179,
+   SVGA_3D_CMD_DX_STRETCHBLT                              = 1180,
+   SVGA_3D_CMD_DX_GENMIPS                                 = 1181,
+   SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE                      = 1182,
+   SVGA_3D_CMD_DX_READBACK_SUBRESOURCE                    = 1183,
+   SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE                  = 1184,
+   SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW              = 1185,
+   SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW             = 1186,
+   SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW                = 1187,
+   SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW               = 1188,
+   SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW                = 1189,
+   SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW               = 1190,
+   SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT                    = 1191,
+   SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT                   = 1192,
+   SVGA_3D_CMD_DX_DEFINE_BLEND_STATE                      = 1193,
+   SVGA_3D_CMD_DX_DESTROY_BLEND_STATE                     = 1194,
+   SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE               = 1195,
+   SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE              = 1196,
+   SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE                 = 1197,
+   SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE                = 1198,
+   SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE                    = 1199,
+   SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE                   = 1200,
+   SVGA_3D_CMD_DX_DEFINE_SHADER                           = 1201,
+   SVGA_3D_CMD_DX_DESTROY_SHADER                          = 1202,
+   SVGA_3D_CMD_DX_BIND_SHADER                             = 1203,
+   SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT                     = 1204,
+   SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT                    = 1205,
+   SVGA_3D_CMD_DX_SET_STREAMOUTPUT                        = 1206,
+   SVGA_3D_CMD_DX_SET_COTABLE                             = 1207,
+   SVGA_3D_CMD_DX_READBACK_COTABLE                        = 1208,
+   SVGA_3D_CMD_DX_BUFFER_COPY                             = 1209,
+   SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER                    = 1210,
+   SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK               = 1211,
+   SVGA_3D_CMD_DX_MOVE_QUERY                              = 1212,
+   SVGA_3D_CMD_DX_BIND_ALL_QUERY                          = 1213,
+   SVGA_3D_CMD_DX_READBACK_ALL_QUERY                      = 1214,
+   SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER               = 1215,
+   SVGA_3D_CMD_DX_MOB_FENCE_64                            = 1216,
+   SVGA_3D_CMD_DX_BIND_ALL_SHADER                         = 1217,
+   SVGA_3D_CMD_DX_HINT                                    = 1218,
+   SVGA_3D_CMD_DX_BUFFER_UPDATE                           = 1219,
+   SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET           = 1220,
+   SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET           = 1221,
+   SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET           = 1222,
+
+   /*
+    * Reserve some IDs to be used for the DX11 shader types.
+    */
+   SVGA_3D_CMD_DX_RESERVED1                               = 1223,
+   SVGA_3D_CMD_DX_RESERVED2                               = 1224,
+   SVGA_3D_CMD_DX_RESERVED3                               = 1225,
+
+   SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER                    = 1226,
+
+   SVGA_3D_CMD_DX_MAX                                     = 1227,
+   SVGA_3D_CMD_MAX                                        = 1227,
+   SVGA_3D_CMD_FUTURE_MAX                                 = 3000
+} SVGAFifo3dCmdId;
 
 /*
  * FIFO command format definitions:
@@ -194,54 +291,6 @@ struct {
 #include "vmware_pack_end.h"
 SVGA3dCmdHeader;
 
-typedef enum {
-   SVGA3D_SURFACE_CUBEMAP               = (1 << 0),
-
-   /*
-    * HINT flags are not enforced by the device but are useful for
-    * performance.
-    */
-   SVGA3D_SURFACE_HINT_STATIC           = (1 << 1),
-   SVGA3D_SURFACE_HINT_DYNAMIC          = (1 << 2),
-   SVGA3D_SURFACE_HINT_INDEXBUFFER      = (1 << 3),
-   SVGA3D_SURFACE_HINT_VERTEXBUFFER     = (1 << 4),
-   SVGA3D_SURFACE_HINT_TEXTURE          = (1 << 5),
-   SVGA3D_SURFACE_HINT_RENDERTARGET     = (1 << 6),
-   SVGA3D_SURFACE_HINT_DEPTHSTENCIL     = (1 << 7),
-   SVGA3D_SURFACE_HINT_WRITEONLY        = (1 << 8),
-   SVGA3D_SURFACE_MASKABLE_ANTIALIAS    = (1 << 9),
-   SVGA3D_SURFACE_AUTOGENMIPMAPS        = (1 << 10),
-   SVGA3D_SURFACE_DECODE_RENDERTARGET   = (1 << 11),
-
-   /*
-    * Is this surface using a base-level pitch for it's mob backing?
-    *
-    * This flag is not intended to be set by guest-drivers, but is instead
-    * set by the device when the surface is bound to a mob with a specified
-    * pitch.
-    */
-   SVGA3D_SURFACE_MOB_PITCH             = (1 << 12),
-
-   SVGA3D_SURFACE_INACTIVE              = (1 << 13),
-   SVGA3D_SURFACE_HINT_RT_LOCKABLE      = (1 << 14),
-   SVGA3D_SURFACE_VOLUME                = (1 << 15),
-
-   /*
-    * Required to be set on a surface to bind it to a screen target.
-    */
-   SVGA3D_SURFACE_SCREENTARGET          = (1 << 16),
-
-   SVGA3D_SURFACE_RESERVED1             = (1 << 17),
-   SVGA3D_SURFACE_1D                    = (1 << 18),
-   SVGA3D_SURFACE_ARRAY                 = (1 << 19),
-
-} SVGA3dSurfaceFlags;
-
-#define SVGA3D_SURFACE_HB_DISALLOWED_MASK (SVGA3D_SURFACE_SCREENTARGET | \
-                                           SVGA3D_SURFACE_MOB_PITCH    | \
-                                           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
-                                           SVGA3D_SURFACE_BIND_STREAM_OUTPUT)
-
 typedef
 #include "vmware_pack_begin.h"
 struct {
@@ -669,6 +718,128 @@ SVGA3dCmdDrawPrimitives;      /* SVGA_3D_CMD_DRAWPRIMITIVES */
 typedef
 #include "vmware_pack_begin.h"
 struct {
+   uint32 cid;
+
+   uint32 primitiveCount;        /* How many primitives to render */
+   uint32 startVertexLocation;   /* Which vertex do we start rendering at. */
+
+   uint8 primitiveType;          /* SVGA3dPrimitiveType */
+   uint8 padding[3];
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDraw;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint8 primitiveType;       /* SVGA3dPrimitiveType */
+
+   uint32 indexBufferSid;     /* Valid index buffer sid. */
+   uint32 indexBufferOffset;  /* Byte offset into the vertex buffer, almost */
+			      /* always 0 for DX9 guests, non-zero for OpenGL */
+                              /* guests.  We can't represent non-multiple of */
+                              /* stride offsets in D3D9Renderer... */
+   uint8 indexBufferStride;   /* Allowable values = 1, 2, or 4 */
+
+   int32 baseVertexLocation;  /* Bias applied to the index when selecting a */
+                              /* vertex from the streams, may be negative */
+
+   uint32 primitiveCount;     /* How many primitives to render */
+   uint32 pad0;
+   uint16 pad1;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDrawIndexed;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   /*
+    * Describe a vertex array's data type, and define how it is to be
+    * used by the fixed function pipeline or the vertex shader. It
+    * isn't useful to have two VertexDecls with the same
+    * VertexArrayIdentity in one draw call.
+    */
+   uint16 streamOffset;
+   uint8 stream;
+   uint8 type;          /* SVGA3dDeclType */
+   uint8 method;        /* SVGA3dDeclMethod */
+   uint8 usage;         /* SVGA3dDeclUsage */
+   uint8 usageIndex;
+   uint8 padding;
+
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexElement;
+
+/*
+ * Should the vertex element respect the stream value?  The high bit of the
+ * stream should be set to indicate that the stream should be respected.  If
+ * the high bit is not set, the stream will be ignored and replaced by the index
+ * of the position of the currently considered vertex element.
+ *
+ * All guests should set this bit and correctly specify the stream going
+ * forward.
+ */
+#define SVGA3D_VERTEX_ELEMENT_RESPECT_STREAM (1 << 7)
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint32 numElements;
+
+   /*
+    * Followed by numElements SVGA3dVertexElement structures.
+    *
+    * If numElements < SVGA3D_MAX_VERTEX_ARRAYS, the remaining elements
+    * are cleared and will not be used by following draws.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexDecls;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 sid;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexStream;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint32 numStreams;
+   /*
+    * Followed by numStream SVGA3dVertexStream structures.
+    *
+    * If numStreams < SVGA3D_MAX_VERTEX_ARRAYS, the remaining streams
+    * are cleared and will not be used by following draws.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexStreams;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   uint32 numDivisors;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexDivisors;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
    uint32                   stage;
    SVGA3dTextureStateName   name;
    union {
@@ -989,38 +1160,6 @@ struct SVGA3dCmdScreenDMA {
 SVGA3dCmdScreenDMA;        /* SVGA_3D_CMD_SCREEN_DMA */
 
 /*
- * Set Unity Surface Cookie
- *
- * Associates the supplied cookie with the surface id for use with
- * Unity.  This cookie is a hint from guest to host, there is no way
- * for the guest to readback the cookie and the host is free to drop
- * the cookie association at will.  The default value for the cookie
- * on all surfaces is 0.
- */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdSetUnitySurfaceCookie {
-   uint32 sid;
-   uint64 cookie;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdSetUnitySurfaceCookie;   /* SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE */
-
-/*
- * Open a context-specific surface in a non-context-specific manner.
- */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdOpenContextSurface {
-   uint32 sid;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdOpenContextSurface;   /* SVGA_3D_CMD_OPEN_CONTEXT_SURFACE */
-
-
-/*
  * Logic ops
  */
 
@@ -1139,8 +1278,8 @@ struct SVGA3dCmdLogicOpsClearTypeBlend {
    uint32 gamma;
    uint32 color;
    uint32 color2;
-   int alphaOffsetX;
-   int alphaOffsetY;
+   int32 alphaOffsetX;
+   int32 alphaOffsetY;
    /* Followed by variable number of SVGA3dBox structures */
 }
 #include "vmware_pack_end.h"
@@ -1151,12 +1290,80 @@ SVGA3dCmdLogicOpsClearTypeBlend;   /* SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND */
  * Guest-backed objects definitions.
  */
 
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAMobFormat ptDepth;
+   uint32 sizeInBytes;
+   PPN64 base;
+}
+#include "vmware_pack_end.h"
+SVGAOTableMobEntry;
+#define SVGA3D_OTABLE_MOB_ENTRY_SIZE (sizeof(SVGAOTableMobEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceFormat format;
+   SVGA3dSurfaceFlags surfaceFlags;
+   uint32 numMipLevels;
+   uint32 multisampleCount;
+   SVGA3dTextureFilter autogenFilter;
+   SVGA3dSize size;
+   SVGAMobId mobid;
+   uint32 arraySize;
+   uint32 mobPitch;
+   uint32 pad[5];
+}
+#include "vmware_pack_end.h"
+SVGAOTableSurfaceEntry;
+#define SVGA3D_OTABLE_SURFACE_ENTRY_SIZE (sizeof(SVGAOTableSurfaceEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableContextEntry;
+#define SVGA3D_OTABLE_CONTEXT_ENTRY_SIZE (sizeof(SVGAOTableContextEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dShaderType type;
+   uint32 sizeInBytes;
+   uint32 offsetInBytes;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableShaderEntry;
+#define SVGA3D_OTABLE_SHADER_ENTRY_SIZE (sizeof(SVGAOTableShaderEntry))
+
 #define SVGA_STFLAG_PRIMARY (1 << 0)
 typedef uint32 SVGAScreenTargetFlags;
 
 typedef
 #include "vmware_pack_begin.h"
 struct {
+   SVGA3dSurfaceImageId image;
+   uint32 width;
+   uint32 height;
+   int32 xRoot;
+   int32 yRoot;
+   SVGAScreenTargetFlags flags;
+   uint32 dpi;
+   uint32 pad[7];
+}
+#include "vmware_pack_end.h"
+SVGAOTableScreenTargetEntry;
+#define SVGA3D_OTABLE_SCREEN_TARGET_ENTRY_SIZE \
+	(sizeof(SVGAOTableScreenTargetEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
    float value[4];
 }
 #include "vmware_pack_end.h"
@@ -1178,6 +1385,209 @@ struct {
 #include "vmware_pack_end.h"
 SVGA3dShaderConstBool;
 
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint16 streamOffset;
+   uint8 stream;
+   uint8 type;
+   uint8 methodUsage;
+   uint8 usageIndex;
+}
+#include "vmware_pack_end.h"
+SVGAGBVertexElement;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 sid;
+   uint16 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGAGBVertexStream;
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dRect viewport;
+   SVGA3dRect scissorRect;
+   SVGA3dZRange zRange;
+
+   SVGA3dSurfaceImageId renderTargets[SVGA3D_RT_MAX];
+   SVGAGBVertexElement decl1[4];
+
+   uint32 renderStates[SVGA3D_RS_MAX];
+   SVGAGBVertexElement decl2[18];
+   uint32 pad0[2];
+
+   struct {
+      SVGA3dFace face;
+      SVGA3dMaterial material;
+   } material;
+
+   float clipPlanes[SVGA3D_NUM_CLIPPLANES][4];
+   float matrices[SVGA3D_TRANSFORM_MAX][16];
+
+   SVGA3dBool lightEnabled[SVGA3D_NUM_LIGHTS];
+   SVGA3dLightData lightData[SVGA3D_NUM_LIGHTS];
+
+   /*
+    * Shaders currently bound
+    */
+   uint32 shaders[SVGA3D_NUM_SHADERTYPE_PREDX];
+   SVGAGBVertexElement decl3[10];
+   uint32 pad1[3];
+
+   uint32 occQueryActive;
+   uint32 occQueryValue;
+
+   /*
+    * Int/Bool Shader constants
+    */
+   SVGA3dShaderConstInt pShaderIValues[SVGA3D_CONSTINTREG_MAX];
+   SVGA3dShaderConstInt vShaderIValues[SVGA3D_CONSTINTREG_MAX];
+   uint16 pShaderBValues;
+   uint16 vShaderBValues;
+
+
+   SVGAGBVertexStream streams[SVGA3D_MAX_VERTEX_ARRAYS];
+   SVGA3dVertexDivisor divisors[SVGA3D_MAX_VERTEX_ARRAYS];
+   uint32 numVertexDecls;
+   uint32 numVertexStreams;
+   uint32 numVertexDivisors;
+   uint32 pad2[30];
+
+   /*
+    * Texture Stages
+    *
+    * SVGA3D_TS_INVALID through SVGA3D_TS_CONSTANT are in the
+    * textureStages array.
+    * SVGA3D_TS_COLOR_KEY is in tsColorKey.
+    */
+   uint32 tsColorKey[SVGA3D_NUM_TEXTURE_UNITS];
+   uint32 textureStages[SVGA3D_NUM_TEXTURE_UNITS][SVGA3D_TS_CONSTANT + 1];
+   uint32 tsColorKeyEnable[SVGA3D_NUM_TEXTURE_UNITS];
+
+   /*
+    * Float Shader constants.
+    */
+   SVGA3dShaderConstFloat pShaderFValues[SVGA3D_CONSTREG_MAX];
+   SVGA3dShaderConstFloat vShaderFValues[SVGA3D_CONSTREG_MAX];
+}
+#include "vmware_pack_end.h"
+SVGAGBContextData;
+#define SVGA3D_CONTEXT_DATA_SIZE (sizeof(SVGAGBContextData))
+
+/*
+ * SVGA3dCmdSetOTableBase --
+ *
+ * This command allows the guest to specify the base PPN of the
+ * specified object table.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+   PPN baseAddress;
+   uint32 sizeInBytes;
+   uint32 validSizeInBytes;
+   SVGAMobFormat ptDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetOTableBase;  /* SVGA_3D_CMD_SET_OTABLE_BASE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+   PPN64 baseAddress;
+   uint32 sizeInBytes;
+   uint32 validSizeInBytes;
+   SVGAMobFormat ptDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetOTableBase64;  /* SVGA_3D_CMD_SET_OTABLE_BASE64 */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdReadbackOTable;  /* SVGA_3D_CMD_READBACK_OTABLE */
+
+/*
+ * Define a memory object (Mob) in the OTable.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBMob {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBMob;   /* SVGA_3D_CMD_DEFINE_GB_MOB */
+
+
+/*
+ * Destroys an object in the OTable.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDestroyGBMob {
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDestroyGBMob;   /* SVGA_3D_CMD_DESTROY_GB_MOB */
+
+
+/*
+ * Define a memory object (Mob) in the OTable with a PPN64 base.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBMob64 {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN64 base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBMob64;   /* SVGA_3D_CMD_DEFINE_GB_MOB64 */
+
+/*
+ * Redefine an object in the OTable with PPN64 base.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdRedefineGBMob64 {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN64 base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdRedefineGBMob64;   /* SVGA_3D_CMD_REDEFINE_GB_MOB64 */
+
+/*
+ * Notification that the page tables have been modified.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdUpdateGBMobMapping {
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdUpdateGBMobMapping;   /* SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING */
+
 /*
  * Define a guest-backed surface.
  */
@@ -1243,7 +1653,7 @@ SVGA3dCmdBindGBSurfaceWithPitch;   /* SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH */
 
 typedef
 #include "vmware_pack_begin.h"
-struct{
+struct SVGA3dCmdCondBindGBSurface {
    uint32 sid;
    SVGAMobId testMobid;
    SVGAMobId mobid;
@@ -1477,18 +1887,6 @@ struct SVGA3dCmdDestroyGBShader {
 #include "vmware_pack_end.h"
 SVGA3dCmdDestroyGBShader;   /* SVGA_3D_CMD_DESTROY_GB_SHADER */
 
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdBindGBShaderConsts {
-   uint32 cid;
-   SVGA3dShaderType shaderType;
-   SVGA3dShaderConstType shaderConstType;
-   uint32 sid;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdBindGBShaderConsts;   /* SVGA_3D_CMD_BIND_SHADERCONSTS */
-
 typedef
 #include "vmware_pack_begin.h"
 struct {
@@ -1553,7 +1951,7 @@ typedef
 #include "vmware_pack_begin.h"
 struct {
    SVGAMobId mobid;
-   uint32 fbOffset;
+   uint32 mustBeZero;
    uint32 initialized;
 }
 #include "vmware_pack_end.h"
@@ -1649,6 +2047,6 @@ struct {
    uint32 mobOffset;
 }
 #include "vmware_pack_end.h"
-SVGA3dCmdGBMobFence;  /* SVGA_3D_CMD_GB_MOB_FENCE*/
+SVGA3dCmdGBMobFence;  /* SVGA_3D_CMD_GB_MOB_FENCE */
 
-#endif // _SVGA3D_CMD_H_
+#endif /* _SVGA3D_CMD_H_ */
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_devcaps.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_devcaps.h
index 915f3c757..ade210b41 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_devcaps.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_devcaps.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -223,9 +223,230 @@ typedef enum {
     */
    SVGA3D_DEVCAP_TS_COLOR_KEY                      = 93, /* boolean */
 
+   /*
+    * Deprecated.
+    */
+   SVGA3D_DEVCAP_DEAD2                             = 94,
+
+   /*
+    * Does the device support the DX commands?
+    */
+   SVGA3D_DEVCAP_DX                                = 95,
+
+   /*
+    * What is the maximum size of a texture array?
+    *
+    * (Even if this cap is zero, cubemaps are still allowed.)
+    */
+   SVGA3D_DEVCAP_MAX_TEXTURE_ARRAY_SIZE            = 96,
+
+   /*
+    * What is the maximum number of vertex buffers that can
+    * be used in the DXContext inputAssembly?
+    */
+   SVGA3D_DEVCAP_DX_MAX_VERTEXBUFFERS              = 97,
+
+   /*
+    * What is the maximum number of constant buffers
+    * that can be expected to work correctly with a
+    * DX context?
+    */
+   SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS           = 98,
+
+   /*
+    * Does the device support provoking vertex control?
+    * If zero, the first vertex will always be the provoking vertex.
+    */
+   SVGA3D_DEVCAP_DX_PROVOKING_VERTEX               = 99,
+
+   SVGA3D_DEVCAP_DXFMT_X8R8G8B8                    = 100,
+   SVGA3D_DEVCAP_DXFMT_A8R8G8B8                    = 101,
+   SVGA3D_DEVCAP_DXFMT_R5G6B5                      = 102,
+   SVGA3D_DEVCAP_DXFMT_X1R5G5B5                    = 103,
+   SVGA3D_DEVCAP_DXFMT_A1R5G5B5                    = 104,
+   SVGA3D_DEVCAP_DXFMT_A4R4G4B4                    = 105,
+   SVGA3D_DEVCAP_DXFMT_Z_D32                       = 106,
+   SVGA3D_DEVCAP_DXFMT_Z_D16                       = 107,
+   SVGA3D_DEVCAP_DXFMT_Z_D24S8                     = 108,
+   SVGA3D_DEVCAP_DXFMT_Z_D15S1                     = 109,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE8                  = 110,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE4_ALPHA4           = 111,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE16                 = 112,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE8_ALPHA8           = 113,
+   SVGA3D_DEVCAP_DXFMT_DXT1                        = 114,
+   SVGA3D_DEVCAP_DXFMT_DXT2                        = 115,
+   SVGA3D_DEVCAP_DXFMT_DXT3                        = 116,
+   SVGA3D_DEVCAP_DXFMT_DXT4                        = 117,
+   SVGA3D_DEVCAP_DXFMT_DXT5                        = 118,
+   SVGA3D_DEVCAP_DXFMT_BUMPU8V8                    = 119,
+   SVGA3D_DEVCAP_DXFMT_BUMPL6V5U5                  = 120,
+   SVGA3D_DEVCAP_DXFMT_BUMPX8L8V8U8                = 121,
+   SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD1                = 122,
+   SVGA3D_DEVCAP_DXFMT_ARGB_S10E5                  = 123,
+   SVGA3D_DEVCAP_DXFMT_ARGB_S23E8                  = 124,
+   SVGA3D_DEVCAP_DXFMT_A2R10G10B10                 = 125,
+   SVGA3D_DEVCAP_DXFMT_V8U8                        = 126,
+   SVGA3D_DEVCAP_DXFMT_Q8W8V8U8                    = 127,
+   SVGA3D_DEVCAP_DXFMT_CxV8U8                      = 128,
+   SVGA3D_DEVCAP_DXFMT_X8L8V8U8                    = 129,
+   SVGA3D_DEVCAP_DXFMT_A2W10V10U10                 = 130,
+   SVGA3D_DEVCAP_DXFMT_ALPHA8                      = 131,
+   SVGA3D_DEVCAP_DXFMT_R_S10E5                     = 132,
+   SVGA3D_DEVCAP_DXFMT_R_S23E8                     = 133,
+   SVGA3D_DEVCAP_DXFMT_RG_S10E5                    = 134,
+   SVGA3D_DEVCAP_DXFMT_RG_S23E8                    = 135,
+   SVGA3D_DEVCAP_DXFMT_BUFFER                      = 136,
+   SVGA3D_DEVCAP_DXFMT_Z_D24X8                     = 137,
+   SVGA3D_DEVCAP_DXFMT_V16U16                      = 138,
+   SVGA3D_DEVCAP_DXFMT_G16R16                      = 139,
+   SVGA3D_DEVCAP_DXFMT_A16B16G16R16                = 140,
+   SVGA3D_DEVCAP_DXFMT_UYVY                        = 141,
+   SVGA3D_DEVCAP_DXFMT_YUY2                        = 142,
+   SVGA3D_DEVCAP_DXFMT_NV12                        = 143,
+   SVGA3D_DEVCAP_DXFMT_AYUV                        = 144,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS       = 145,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT           = 146,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT           = 147,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS          = 148,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT             = 149,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT              = 150,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT              = 151,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS       = 152,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT           = 153,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM          = 154,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT           = 155,
+   SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS             = 156,
+   SVGA3D_DEVCAP_DXFMT_R32G32_UINT                 = 157,
+   SVGA3D_DEVCAP_DXFMT_R32G32_SINT                 = 158,
+   SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS           = 159,
+   SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT        = 160,
+   SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS    = 161,
+   SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT     = 162,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS        = 163,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT            = 164,
+   SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT             = 165,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS           = 166,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM              = 167,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB         = 168,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT               = 169,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT               = 170,
+   SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS             = 171,
+   SVGA3D_DEVCAP_DXFMT_R16G16_UINT                 = 172,
+   SVGA3D_DEVCAP_DXFMT_R16G16_SINT                 = 173,
+   SVGA3D_DEVCAP_DXFMT_R32_TYPELESS                = 174,
+   SVGA3D_DEVCAP_DXFMT_D32_FLOAT                   = 175,
+   SVGA3D_DEVCAP_DXFMT_R32_UINT                    = 176,
+   SVGA3D_DEVCAP_DXFMT_R32_SINT                    = 177,
+   SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS              = 178,
+   SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT           = 179,
+   SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS       = 180,
+   SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT        = 181,
+   SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS               = 182,
+   SVGA3D_DEVCAP_DXFMT_R8G8_UNORM                  = 183,
+   SVGA3D_DEVCAP_DXFMT_R8G8_UINT                   = 184,
+   SVGA3D_DEVCAP_DXFMT_R8G8_SINT                   = 185,
+   SVGA3D_DEVCAP_DXFMT_R16_TYPELESS                = 186,
+   SVGA3D_DEVCAP_DXFMT_R16_UNORM                   = 187,
+   SVGA3D_DEVCAP_DXFMT_R16_UINT                    = 188,
+   SVGA3D_DEVCAP_DXFMT_R16_SNORM                   = 189,
+   SVGA3D_DEVCAP_DXFMT_R16_SINT                    = 190,
+   SVGA3D_DEVCAP_DXFMT_R8_TYPELESS                 = 191,
+   SVGA3D_DEVCAP_DXFMT_R8_UNORM                    = 192,
+   SVGA3D_DEVCAP_DXFMT_R8_UINT                     = 193,
+   SVGA3D_DEVCAP_DXFMT_R8_SNORM                    = 194,
+   SVGA3D_DEVCAP_DXFMT_R8_SINT                     = 195,
+   SVGA3D_DEVCAP_DXFMT_P8                          = 196,
+   SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP          = 197,
+   SVGA3D_DEVCAP_DXFMT_R8G8_B8G8_UNORM             = 198,
+   SVGA3D_DEVCAP_DXFMT_G8R8_G8B8_UNORM             = 199,
+   SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS                = 200,
+   SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB              = 201,
+   SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS                = 202,
+   SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB              = 203,
+   SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS                = 204,
+   SVGA3D_DEVCAP_DXFMT_BC3_UNORM_SRGB              = 205,
+   SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS                = 206,
+   SVGA3D_DEVCAP_DXFMT_ATI1                        = 207,
+   SVGA3D_DEVCAP_DXFMT_BC4_SNORM                   = 208,
+   SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS                = 209,
+   SVGA3D_DEVCAP_DXFMT_ATI2                        = 210,
+   SVGA3D_DEVCAP_DXFMT_BC5_SNORM                   = 211,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10_XR_BIAS_A2_UNORM  = 212,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS           = 213,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB         = 214,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS           = 215,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB         = 216,
+   SVGA3D_DEVCAP_DXFMT_Z_DF16                      = 217,
+   SVGA3D_DEVCAP_DXFMT_Z_DF24                      = 218,
+   SVGA3D_DEVCAP_DXFMT_Z_D24S8_INT                 = 219,
+   SVGA3D_DEVCAP_DXFMT_YV12                        = 220,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT          = 221,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT          = 222,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM          = 223,
+   SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT                = 224,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM           = 225,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM              = 226,
+   SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT                = 227,
+   SVGA3D_DEVCAP_DXFMT_R16G16_UNORM                = 228,
+   SVGA3D_DEVCAP_DXFMT_R16G16_SNORM                = 229,
+   SVGA3D_DEVCAP_DXFMT_R32_FLOAT                   = 230,
+   SVGA3D_DEVCAP_DXFMT_R8G8_SNORM                  = 231,
+   SVGA3D_DEVCAP_DXFMT_R16_FLOAT                   = 232,
+   SVGA3D_DEVCAP_DXFMT_D16_UNORM                   = 233,
+   SVGA3D_DEVCAP_DXFMT_A8_UNORM                    = 234,
+   SVGA3D_DEVCAP_DXFMT_BC1_UNORM                   = 235,
+   SVGA3D_DEVCAP_DXFMT_BC2_UNORM                   = 236,
+   SVGA3D_DEVCAP_DXFMT_BC3_UNORM                   = 237,
+   SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM                = 238,
+   SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM              = 239,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM              = 240,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM              = 241,
+   SVGA3D_DEVCAP_DXFMT_BC4_UNORM                   = 242,
+   SVGA3D_DEVCAP_DXFMT_BC5_UNORM                   = 243,
+
    SVGA3D_DEVCAP_MAX                       /* This must be the last index. */
 } SVGA3dDevCapIndex;
 
+/*
+ * Bit definitions for DXFMT devcaps
+ *
+ *
+ * SUPPORTED: Can the format be defined?
+ * SHADER_SAMPLE: Can the format be sampled from a shader?
+ * COLOR_RENDERTARGET: Can the format be a color render target?
+ * DEPTH_RENDERTARGET: Can the format be a depth render target?
+ * BLENDABLE: Is the format blendable?
+ * MIPS: Does the format support mip levels?
+ * ARRAY: Does the format support texture arrays?
+ * VOLUME: Does the format support having volume?
+ * MULTISAMPLE_2: Does the format support 2x multisample?
+ * MULTISAMPLE_4: Does the format support 4x multisample?
+ * MULTISAMPLE_8: Does the format support 8x multisample?
+ */
+#define SVGA3D_DXFMT_SUPPORTED                (1 <<  0)
+#define SVGA3D_DXFMT_SHADER_SAMPLE            (1 <<  1)
+#define SVGA3D_DXFMT_COLOR_RENDERTARGET       (1 <<  2)
+#define SVGA3D_DXFMT_DEPTH_RENDERTARGET       (1 <<  3)
+#define SVGA3D_DXFMT_BLENDABLE                (1 <<  4)
+#define SVGA3D_DXFMT_MIPS                     (1 <<  5)
+#define SVGA3D_DXFMT_ARRAY                    (1 <<  6)
+#define SVGA3D_DXFMT_VOLUME                   (1 <<  7)
+#define SVGA3D_DXFMT_DX_VERTEX_BUFFER         (1 <<  8)
+#define SVGADX_DXFMT_MULTISAMPLE_2            (1 <<  9)
+#define SVGADX_DXFMT_MULTISAMPLE_4            (1 << 10)
+#define SVGADX_DXFMT_MULTISAMPLE_8            (1 << 11)
+#define SVGADX_DXFMT_MAX                      (1 << 12)
+
+/*
+ * Convenience mask for any multisample capability.
+ *
+ * The multisample bits imply both load and render capability.
+ */
+#define SVGA3D_DXFMT_MULTISAMPLE ( \
+           SVGADX_DXFMT_MULTISAMPLE_2 | \
+           SVGADX_DXFMT_MULTISAMPLE_4 | \
+           SVGADX_DXFMT_MULTISAMPLE_8 )
+
 typedef union {
    Bool   b;
    uint32 u;
@@ -233,4 +454,4 @@ typedef union {
    float  f;
 } SVGA3dDevCapResult;
 
-#endif // _SVGA3D_DEVCAPS_H_
+#endif /* _SVGA3D_DEVCAPS_H_ */
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_dx.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_dx.h
new file mode 100644
index 000000000..fce2b0422
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_dx.h
@@ -0,0 +1,1521 @@
+/**********************************************************
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_dx.h --
+ *
+ *       SVGA 3d hardware definitions for DX10 support.
+ */
+
+#ifndef _SVGA3D_DX_H_
+#define _SVGA3D_DX_H_
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_VMCORE
+#include "includeCheck.h"
+
+#include "svga3d_limits.h"
+
+#define SVGA3D_INPUT_MIN               0
+#define SVGA3D_INPUT_PER_VERTEX_DATA   0
+#define SVGA3D_INPUT_PER_INSTANCE_DATA 1
+#define SVGA3D_INPUT_MAX               2
+typedef uint32 SVGA3dInputClassification;
+
+#define SVGA3D_RESOURCE_TYPE_MIN      1
+#define SVGA3D_RESOURCE_BUFFER        1
+#define SVGA3D_RESOURCE_TEXTURE1D     2
+#define SVGA3D_RESOURCE_TEXTURE2D     3
+#define SVGA3D_RESOURCE_TEXTURE3D     4
+#define SVGA3D_RESOURCE_TEXTURECUBE   5
+#define SVGA3D_RESOURCE_TYPE_DX10_MAX 6
+#define SVGA3D_RESOURCE_BUFFEREX      6
+#define SVGA3D_RESOURCE_TYPE_MAX      7
+typedef uint32 SVGA3dResourceType;
+
+#define SVGA3D_DEPTH_WRITE_MASK_ZERO   0
+#define SVGA3D_DEPTH_WRITE_MASK_ALL    1
+typedef uint8 SVGA3dDepthWriteMask;
+
+#define SVGA3D_FILTER_MIP_LINEAR  (1 << 0)
+#define SVGA3D_FILTER_MAG_LINEAR  (1 << 2)
+#define SVGA3D_FILTER_MIN_LINEAR  (1 << 4)
+#define SVGA3D_FILTER_ANISOTROPIC (1 << 6)
+#define SVGA3D_FILTER_COMPARE     (1 << 7)
+typedef uint32 SVGA3dFilter;
+
+#define SVGA3D_CULL_INVALID 0
+#define SVGA3D_CULL_MIN     1
+#define SVGA3D_CULL_NONE    1
+#define SVGA3D_CULL_FRONT   2
+#define SVGA3D_CULL_BACK    3
+#define SVGA3D_CULL_MAX     4
+typedef uint8 SVGA3dCullMode;
+
+#define SVGA3D_COMPARISON_INVALID         0
+#define SVGA3D_COMPARISON_MIN             1
+#define SVGA3D_COMPARISON_NEVER           1
+#define SVGA3D_COMPARISON_LESS            2
+#define SVGA3D_COMPARISON_EQUAL           3
+#define SVGA3D_COMPARISON_LESS_EQUAL      4
+#define SVGA3D_COMPARISON_GREATER         5
+#define SVGA3D_COMPARISON_NOT_EQUAL       6
+#define SVGA3D_COMPARISON_GREATER_EQUAL   7
+#define SVGA3D_COMPARISON_ALWAYS          8
+#define SVGA3D_COMPARISON_MAX             9
+typedef uint8 SVGA3dComparisonFunc;
+
+#define SVGA3D_DX_MAX_VERTEXBUFFERS 32
+#define SVGA3D_DX_MAX_VERTEXINPUTREGISTERS 16
+#define SVGA3D_DX_MAX_SOTARGETS 4
+#define SVGA3D_DX_MAX_SRVIEWS 128
+#define SVGA3D_DX_MAX_CONSTBUFFERS 16
+#define SVGA3D_DX_MAX_SAMPLERS 16
+
+/* Id limits */
+static const uint32 SVGA3dBlendObjectCountPerContext = 4096;
+static const uint32 SVGA3dDepthStencilObjectCountPerContext = 4096;
+
+typedef uint32 SVGA3dSurfaceId;
+typedef uint32 SVGA3dShaderResourceViewId;
+typedef uint32 SVGA3dRenderTargetViewId;
+typedef uint32 SVGA3dDepthStencilViewId;
+
+typedef uint32 SVGA3dShaderId;
+typedef uint32 SVGA3dElementLayoutId;
+typedef uint32 SVGA3dSamplerId;
+typedef uint32 SVGA3dBlendStateId;
+typedef uint32 SVGA3dDepthStencilStateId;
+typedef uint32 SVGA3dRasterizerStateId;
+typedef uint32 SVGA3dQueryId;
+typedef uint32 SVGA3dStreamOutputId;
+
+typedef union {
+   struct {
+      float r;
+      float g;
+      float b;
+      float a;
+   };
+
+   float value[4];
+} SVGA3dRGBAFloat;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableDXContextEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineContext;   /* SVGA_3D_CMD_DX_DEFINE_CONTEXT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyContext;   /* SVGA_3D_CMD_DX_DESTROY_CONTEXT */
+
+/*
+ * Bind a DX context.
+ *
+ * validContents should be set to 0 for new contexts,
+ * and 1 if this is an old context which is getting paged
+ * back on to the device.
+ *
+ * For new contexts, it is recommended that the driver
+ * issue commands to initialize all interesting state
+ * prior to rendering.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindContext {
+   uint32 cid;
+   SVGAMobId mobid;
+   uint32 validContents;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindContext;   /* SVGA_3D_CMD_DX_BIND_CONTEXT */
+
+/*
+ * Readback a DX context.
+ * (Request that the device flush the contents back into guest memory.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackContext;   /* SVGA_3D_CMD_DX_READBACK_CONTEXT */
+
+/*
+ * Invalidate a guest-backed context.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXInvalidateContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXInvalidateContext;   /* SVGA_3D_CMD_DX_INVALIDATE_CONTEXT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dReplyFormatData {
+   uint32 formatSupport;
+   uint32 msaa2xQualityLevels:5;
+   uint32 msaa4xQualityLevels:5;
+   uint32 msaa8xQualityLevels:5;
+   uint32 msaa16xQualityLevels:5;
+   uint32 msaa32xQualityLevels:5;
+   uint32 pad:7;
+}
+#include "vmware_pack_end.h"
+SVGA3dReplyFormatData;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSingleConstantBuffer {
+   uint32 slot;
+   SVGA3dShaderType type;
+   SVGA3dSurfaceId sid;
+   uint32 offsetInBytes;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSingleConstantBuffer;
+/* SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetShaderResources {
+   uint32 startView;
+   SVGA3dShaderType type;
+
+   /*
+    * Followed by a variable number of SVGA3dShaderResourceViewId's.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetShaderResources; /* SVGA_3D_CMD_DX_SET_SHADER_RESOURCES */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetShader {
+   SVGA3dShaderId shaderId;
+   SVGA3dShaderType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetShader; /* SVGA_3D_CMD_DX_SET_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSamplers {
+   uint32 startSampler;
+   SVGA3dShaderType type;
+
+   /*
+    * Followed by a variable number of SVGA3dSamplerId's.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSamplers; /* SVGA_3D_CMD_DX_SET_SAMPLERS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDraw {
+   uint32 vertexCount;
+   uint32 startVertexLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDraw; /* SVGA_3D_CMD_DX_DRAW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawIndexed {
+   uint32 indexCount;
+   uint32 startIndexLocation;
+   int32  baseVertexLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawIndexed; /* SVGA_3D_CMD_DX_DRAW_INDEXED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawInstanced {
+   uint32 vertexCountPerInstance;
+   uint32 instanceCount;
+   uint32 startVertexLocation;
+   uint32 startInstanceLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawInstanced; /* SVGA_3D_CMD_DX_DRAW_INSTANCED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawIndexedInstanced {
+   uint32 indexCountPerInstance;
+   uint32 instanceCount;
+   uint32 startIndexLocation;
+   int32  baseVertexLocation;
+   uint32 startInstanceLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawIndexedInstanced; /* SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawAuto {
+   uint32 pad0;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawAuto; /* SVGA_3D_CMD_DX_DRAW_AUTO */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetInputLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetInputLayout; /* SVGA_3D_CMD_DX_SET_INPUT_LAYOUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dVertexBuffer {
+   SVGA3dSurfaceId sid;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexBuffer;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetVertexBuffers {
+   uint32 startBuffer;
+   /* Followed by a variable number of SVGA3dVertexBuffer's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetVertexBuffers; /* SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetIndexBuffer {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetIndexBuffer; /* SVGA_3D_CMD_DX_SET_INDEX_BUFFER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetTopology {
+   SVGA3dPrimitiveType topology;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetTopology; /* SVGA_3D_CMD_DX_SET_TOPOLOGY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetRenderTargets {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+   /* Followed by a variable number of SVGA3dRenderTargetViewId's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetRenderTargets; /* SVGA_3D_CMD_DX_SET_RENDERTARGETS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetBlendState {
+   SVGA3dBlendStateId blendId;
+   float blendFactor[4];
+   uint32 sampleMask;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetBlendState; /* SVGA_3D_CMD_DX_SET_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+   uint32 stencilRef;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetDepthStencilState; /* SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetRasterizerState; /* SVGA_3D_CMD_DX_SET_RASTERIZER_STATE */
+
+#define SVGA3D_DXQUERY_FLAG_PREDICATEHINT (1 << 0)
+typedef uint32 SVGA3dDXQueryFlags;
+
+/*
+ * The SVGADXQueryDeviceState and SVGADXQueryDeviceBits are used by the device
+ * to track query state transitions, but are not intended to be used by the
+ * driver.
+ */
+#define SVGADX_QDSTATE_INVALID   ((uint8)-1) /* Query has no state */
+#define SVGADX_QDSTATE_MIN       0
+#define SVGADX_QDSTATE_IDLE      0   /* Query hasn't started yet */
+#define SVGADX_QDSTATE_ACTIVE    1   /* Query is actively gathering data */
+#define SVGADX_QDSTATE_PENDING   2   /* Query is waiting for results */
+#define SVGADX_QDSTATE_FINISHED  3   /* Query has completed */
+#define SVGADX_QDSTATE_MAX       4
+typedef uint8 SVGADXQueryDeviceState;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dQueryTypeUint8 type;
+   uint16 pad0;
+   SVGADXQueryDeviceState state;
+   SVGA3dDXQueryFlags flags;
+   SVGAMobId mobid;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXQueryEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineQuery {
+   SVGA3dQueryId queryId;
+   SVGA3dQueryType type;
+   SVGA3dDXQueryFlags flags;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineQuery; /* SVGA_3D_CMD_DX_DEFINE_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyQuery; /* SVGA_3D_CMD_DX_DESTROY_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindQuery {
+   SVGA3dQueryId queryId;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindQuery; /* SVGA_3D_CMD_DX_BIND_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetQueryOffset {
+   SVGA3dQueryId queryId;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetQueryOffset; /* SVGA_3D_CMD_DX_SET_QUERY_OFFSET */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBeginQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBeginQuery; /* SVGA_3D_CMD_DX_QUERY_BEGIN */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXEndQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXEndQuery; /* SVGA_3D_CMD_DX_QUERY_END */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackQuery; /* SVGA_3D_CMD_DX_READBACK_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXMoveQuery {
+   SVGA3dQueryId queryId;
+   SVGAMobId mobid;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXMoveQuery; /* SVGA_3D_CMD_DX_MOVE_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindAllQuery {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindAllQuery; /* SVGA_3D_CMD_DX_BIND_ALL_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackAllQuery {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackAllQuery; /* SVGA_3D_CMD_DX_READBACK_ALL_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetPredication {
+   SVGA3dQueryId queryId;
+   uint32 predicateValue;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetPredication; /* SVGA_3D_CMD_DX_SET_PREDICATION */
+
+typedef
+#include "vmware_pack_begin.h"
+struct MKS3dDXSOState {
+   uint32 offset;       /* Starting offset */
+   uint32 intOffset;    /* Internal offset */
+   uint32 vertexCount;  /* vertices written */
+   uint32 sizeInBytes;  /* max bytes to write */
+}
+#include "vmware_pack_end.h"
+SVGA3dDXSOState;
+
+/* Set the offset field to this value to append SO values to the buffer */
+#define SVGA3D_DX_SO_OFFSET_APPEND ((uint32) ~0u)
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dSoTarget {
+   SVGA3dSurfaceId sid;
+   uint32 offset;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dSoTarget;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSOTargets {
+   uint32 pad0;
+   /* Followed by a variable number of SVGA3dSOTarget's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSOTargets; /* SVGA_3D_CMD_DX_SET_SOTARGETS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dViewport
+{
+   float x;
+   float y;
+   float width;
+   float height;
+   float minDepth;
+   float maxDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dViewport;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetViewports {
+   uint32 pad0;
+   /* Followed by a variable number of SVGA3dViewport's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetViewports; /* SVGA_3D_CMD_DX_SET_VIEWPORTS */
+
+#define SVGA3D_DX_MAX_VIEWPORTS  16
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetScissorRects {
+   uint32 pad0;
+   /* Followed by a variable number of SVGASignedRect's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetScissorRects; /* SVGA_3D_CMD_DX_SET_SCISSORRECTS */
+
+#define SVGA3D_DX_MAX_SCISSORRECTS  16
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXClearRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+   SVGA3dRGBAFloat rgba;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXClearRenderTargetView; /* SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXClearDepthStencilView {
+   uint16 flags;
+   uint16 stencil;
+   SVGA3dDepthStencilViewId depthStencilViewId;
+   float depth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXClearDepthStencilView; /* SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredCopyRegion {
+   SVGA3dSurfaceId dstSid;
+   uint32 dstSubResource;
+   SVGA3dSurfaceId srcSid;
+   uint32 srcSubResource;
+   SVGA3dCopyBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredCopyRegion;
+/* SVGA_3D_CMD_DX_PRED_COPY_REGION */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredCopy {
+   SVGA3dSurfaceId dstSid;
+   SVGA3dSurfaceId srcSid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredCopy; /* SVGA_3D_CMD_DX_PRED_COPY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBufferCopy {
+   SVGA3dSurfaceId dest;
+   SVGA3dSurfaceId src;
+   uint32 destX;
+   uint32 srcX;
+   uint32 width;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBufferCopy;
+/* SVGA_3D_CMD_DX_BUFFER_COPY */
+
+typedef uint32 SVGA3dDXStretchBltMode;
+#define SVGADX_STRETCHBLT_LINEAR         (1 << 0)
+#define SVGADX_STRETCHBLT_FORCE_SRC_SRGB (1 << 1)
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXStretchBlt {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcSubResource;
+   SVGA3dSurfaceId dstSid;
+   uint32 destSubResource;
+   SVGA3dBox boxSrc;
+   SVGA3dBox boxDest;
+   SVGA3dDXStretchBltMode mode;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXStretchBlt; /* SVGA_3D_CMD_DX_STRETCHBLT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXGenMips {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXGenMips; /* SVGA_3D_CMD_DX_GENMIPS */
+
+/*
+ * Defines a resource/DX surface.  Resources share the surfaceId namespace.
+ *
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBSurface_v2 {
+   uint32 sid;
+   SVGA3dSurfaceFlags surfaceFlags;
+   SVGA3dSurfaceFormat format;
+   uint32 numMipLevels;
+   uint32 multisampleCount;
+   SVGA3dTextureFilter autogenFilter;
+   SVGA3dSize size;
+   uint32 arraySize;
+   uint32 pad;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBSurface_v2;   /* SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 */
+
+/*
+ * Update a sub-resource in a guest-backed resource.
+ * (Inform the device that the guest-contents have been updated.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXUpdateSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+   SVGA3dBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXUpdateSubResource;   /* SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE */
+
+/*
+ * Readback a subresource in a guest-backed resource.
+ * (Request the device to flush the dirty contents into the guest.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackSubResource;   /* SVGA_3D_CMD_DX_READBACK_SUBRESOURCE */
+
+/*
+ * Invalidate an image in a guest-backed surface.
+ * (Notify the device that the contents can be lost.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXInvalidateSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXInvalidateSubResource;   /* SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE */
+
+
+/*
+ * Raw byte wise transfer from a buffer surface into another surface
+ * of the requested box.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXTransferFromBuffer {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcOffset;
+   uint32 srcPitch;
+   uint32 srcSlicePitch;
+   SVGA3dSurfaceId destSid;
+   uint32 destSubResource;
+   SVGA3dBox destBox;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXTransferFromBuffer;   /* SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER */
+
+
+/*
+ * Raw byte wise transfer from a buffer surface into another surface
+ * of the requested box.  Supported if SVGA3D_DEVCAP_DXCONTEXT is set.
+ * The context is implied from the command buffer header.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredTransferFromBuffer {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcOffset;
+   uint32 srcPitch;
+   uint32 srcSlicePitch;
+   SVGA3dSurfaceId destSid;
+   uint32 destSubResource;
+   SVGA3dBox destBox;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredTransferFromBuffer;
+/* SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER */
+
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSurfaceCopyAndReadback {
+   SVGA3dSurfaceId srcSid;
+   SVGA3dSurfaceId destSid;
+   SVGA3dCopyBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSurfaceCopyAndReadback;
+/* SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXHint {
+   uint32 hintId;
+
+   /*
+    * Followed by variable sized data depending on the hintId.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXHint;
+/* SVGA_3D_CMD_DX_HINT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBufferUpdate {
+   SVGA3dSurfaceId sid;
+   uint32 x;
+   uint32 width;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBufferUpdate;
+/* SVGA_3D_CMD_DX_BUFFER_UPDATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetConstantBufferOffset {
+   uint32 slot;
+   uint32 offsetInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetConstantBufferOffset;
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetVSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET */
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetPSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET */
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetGSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET */
+
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   union {
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+         uint32 pad0;
+         uint32 pad1;
+      } buffer;
+      struct {
+         uint32 mostDetailedMip;
+         uint32 firstArraySlice;
+         uint32 mipLevels;
+         uint32 arraySize;
+      } tex;
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+         uint32 flags;
+         uint32 pad0;
+      } bufferex;
+   };
+}
+#include "vmware_pack_end.h"
+SVGA3dShaderResourceViewDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   SVGA3dShaderResourceViewDesc desc;
+   uint32 pad;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXSRViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineShaderResourceView {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+
+   SVGA3dShaderResourceViewDesc desc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineShaderResourceView;
+/* SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyShaderResourceView {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyShaderResourceView;
+/* SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dRenderTargetViewDesc {
+   union {
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+      } buffer;
+      struct {
+         uint32 mipSlice;
+         uint32 firstArraySlice;
+         uint32 arraySize;
+      } tex;                    /* 1d, 2d, cube */
+      struct {
+         uint32 mipSlice;
+         uint32 firstW;
+         uint32 wSize;
+      } tex3D;
+   };
+}
+#include "vmware_pack_end.h"
+SVGA3dRenderTargetViewDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   SVGA3dRenderTargetViewDesc desc;
+   uint32 pad[2];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXRTViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+
+   SVGA3dRenderTargetViewDesc desc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineRenderTargetView;
+/* SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyRenderTargetView;
+/* SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW */
+
+/*
+ */
+#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_DEPTH   0x01
+#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_STENCIL 0x02
+#define SVGA3D_DXDSVIEW_CREATE_FLAG_MASK         0x03
+typedef uint8 SVGA3DCreateDSViewFlags;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   uint32 mipSlice;
+   uint32 firstArraySlice;
+   uint32 arraySize;
+   SVGA3DCreateDSViewFlags flags;
+   uint8 pad0;
+   uint16 pad1;
+   uint32 pad2;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXDSViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineDepthStencilView {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   uint32 mipSlice;
+   uint32 firstArraySlice;
+   uint32 arraySize;
+   SVGA3DCreateDSViewFlags flags;
+   uint8 pad0;
+   uint16 pad1;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineDepthStencilView;
+/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyDepthStencilView {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyDepthStencilView;
+/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dInputElementDesc {
+   uint32 inputSlot;
+   uint32 alignedByteOffset;
+   SVGA3dSurfaceFormat format;
+   SVGA3dInputClassification inputSlotClass;
+   uint32 instanceDataStepRate;
+   uint32 inputRegister;
+}
+#include "vmware_pack_end.h"
+SVGA3dInputElementDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   /*
+    * XXX: How many of these can there be?
+    */
+   uint32 elid;
+   uint32 numDescs;
+   SVGA3dInputElementDesc desc[32];
+   uint32 pad[62];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXElementLayoutEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineElementLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+   /* Followed by a variable number of SVGA3dInputElementDesc's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineElementLayout;
+/* SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyElementLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyElementLayout;
+/* SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT */
+
+
+#define SVGA3D_DX_MAX_RENDER_TARGETS 8
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dDXBlendStatePerRT {
+      uint8 blendEnable;
+      uint8 srcBlend;
+      uint8 destBlend;
+      uint8 blendOp;
+      uint8 srcBlendAlpha;
+      uint8 destBlendAlpha;
+      uint8 blendOpAlpha;
+      uint8 renderTargetWriteMask;
+      uint8 logicOpEnable;
+      uint8 logicOp;
+      uint16 pad0;
+}
+#include "vmware_pack_end.h"
+SVGA3dDXBlendStatePerRT;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 alphaToCoverageEnable;
+   uint8 independentBlendEnable;
+   uint16 pad0;
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+   uint32 pad1[7];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXBlendStateEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineBlendState {
+   SVGA3dBlendStateId blendId;
+   uint8 alphaToCoverageEnable;
+   uint8 independentBlendEnable;
+   uint16 pad0;
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineBlendState; /* SVGA_3D_CMD_DX_DEFINE_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyBlendState {
+   SVGA3dBlendStateId blendId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyBlendState; /* SVGA_3D_CMD_DX_DESTROY_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 depthEnable;
+   SVGA3dDepthWriteMask depthWriteMask;
+   SVGA3dComparisonFunc depthFunc;
+   uint8 stencilEnable;
+   uint8 frontEnable;
+   uint8 backEnable;
+   uint8 stencilReadMask;
+   uint8 stencilWriteMask;
+
+   uint8 frontStencilFailOp;
+   uint8 frontStencilDepthFailOp;
+   uint8 frontStencilPassOp;
+   SVGA3dComparisonFunc frontStencilFunc;
+
+   uint8 backStencilFailOp;
+   uint8 backStencilDepthFailOp;
+   uint8 backStencilPassOp;
+   SVGA3dComparisonFunc backStencilFunc;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXDepthStencilEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+
+   uint8 depthEnable;
+   SVGA3dDepthWriteMask depthWriteMask;
+   SVGA3dComparisonFunc depthFunc;
+   uint8 stencilEnable;
+   uint8 frontEnable;
+   uint8 backEnable;
+   uint8 stencilReadMask;
+   uint8 stencilWriteMask;
+
+   uint8 frontStencilFailOp;
+   uint8 frontStencilDepthFailOp;
+   uint8 frontStencilPassOp;
+   SVGA3dComparisonFunc frontStencilFunc;
+
+   uint8 backStencilFailOp;
+   uint8 backStencilDepthFailOp;
+   uint8 backStencilPassOp;
+   SVGA3dComparisonFunc backStencilFunc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineDepthStencilState;
+/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyDepthStencilState;
+/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 fillMode;
+   SVGA3dCullMode cullMode;
+   uint8 frontCounterClockwise;
+   uint8 provokingVertexLast;
+   int32 depthBias;
+   float depthBiasClamp;
+   float slopeScaledDepthBias;
+   uint8 depthClipEnable;
+   uint8 scissorEnable;
+   uint8 multisampleEnable;
+   uint8 antialiasedLineEnable;
+   float lineWidth;
+   uint8 lineStippleEnable;
+   uint8 lineStippleFactor;
+   uint16 lineStipplePattern;
+   uint32 forcedSampleCount;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXRasterizerStateEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+
+   uint8 fillMode;
+   SVGA3dCullMode cullMode;
+   uint8 frontCounterClockwise;
+   uint8 provokingVertexLast;
+   int32 depthBias;
+   float depthBiasClamp;
+   float slopeScaledDepthBias;
+   uint8 depthClipEnable;
+   uint8 scissorEnable;
+   uint8 multisampleEnable;
+   uint8 antialiasedLineEnable;
+   float lineWidth;
+   uint8 lineStippleEnable;
+   uint8 lineStippleFactor;
+   uint16 lineStipplePattern;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineRasterizerState;
+/* SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyRasterizerState;
+/* SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dFilter filter;
+   uint8 addressU;
+   uint8 addressV;
+   uint8 addressW;
+   uint8 pad0;
+   float mipLODBias;
+   uint8 maxAnisotropy;
+   SVGA3dComparisonFunc comparisonFunc;
+   uint16 pad1;
+   SVGA3dRGBAFloat borderColor;
+   float minLOD;
+   float maxLOD;
+   uint32 pad2[6];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXSamplerEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineSamplerState {
+   SVGA3dSamplerId samplerId;
+   SVGA3dFilter filter;
+   uint8 addressU;
+   uint8 addressV;
+   uint8 addressW;
+   uint8 pad0;
+   float mipLODBias;
+   uint8 maxAnisotropy;
+   SVGA3dComparisonFunc comparisonFunc;
+   uint16 pad1;
+   SVGA3dRGBAFloat borderColor;
+   float minLOD;
+   float maxLOD;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineSamplerState; /* SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroySamplerState {
+   SVGA3dSamplerId samplerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroySamplerState; /* SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineShader {
+   SVGA3dShaderId shaderId;
+   SVGA3dShaderType type;
+   uint32 sizeInBytes; /* Number of bytes of shader text. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineShader; /* SVGA_3D_CMD_DX_DEFINE_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGACOTableDXShaderEntry {
+   SVGA3dShaderType type;
+   uint32 sizeInBytes;
+   uint32 offsetInBytes;
+   SVGAMobId mobid;
+   uint32 pad[4];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXShaderEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyShader {
+   SVGA3dShaderId shaderId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyShader; /* SVGA_3D_CMD_DX_DESTROY_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindShader {
+   uint32 cid;
+   uint32 shid;
+   SVGAMobId mobid;
+   uint32 offsetInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindShader;   /* SVGA_3D_CMD_DX_BIND_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindAllShader {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindAllShader;   /* SVGA_3D_CMD_DX_BIND_ALL_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXCondBindAllShader {
+   uint32 cid;
+   SVGAMobId testMobid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXCondBindAllShader;   /* SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER */
+
+/*
+ * The maximum number of streamout decl's in each streamout entry.
+ */
+#define SVGA3D_MAX_STREAMOUT_DECLS 64
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dStreamOutputDeclarationEntry {
+   uint32 outputSlot;
+   uint32 registerIndex;
+   uint8  registerMask;
+   uint8  pad0;
+   uint16 pad1;
+   uint32 stream;
+}
+#include "vmware_pack_end.h"
+SVGA3dStreamOutputDeclarationEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGAOTableStreamOutputEntry {
+   uint32 numOutputStreamEntries;
+   SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS];
+   uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS];
+   uint32 rasterizedStream;
+   uint32 pad[250];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXStreamOutputEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineStreamOutput {
+   SVGA3dStreamOutputId soid;
+   uint32 numOutputStreamEntries;
+   SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS];
+   uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS];
+   uint32 rasterizedStream;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineStreamOutput; /* SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyStreamOutput {
+   SVGA3dStreamOutputId soid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyStreamOutput; /* SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetStreamOutput {
+   SVGA3dStreamOutputId soid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetStreamOutput; /* SVGA_3D_CMD_DX_SET_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 value;
+   uint32 mobId;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXMobFence64;  /* SVGA_3D_CMD_DX_MOB_FENCE_64 */
+
+/*
+ * SVGA3dCmdSetCOTable --
+ *
+ * This command allows the guest to bind a mob to a context-object table.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetCOTable {
+   uint32 cid;
+   uint32 mobid;
+   SVGACOTableType type;
+   uint32 validSizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetCOTable; /* SVGA_3D_CMD_DX_SET_COTABLE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackCOTable {
+   uint32 cid;
+   SVGACOTableType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackCOTable; /* SVGA_3D_CMD_DX_READBACK_COTABLE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCOTableData {
+   uint32 mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCOTableData;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dBufferBinding {
+   uint32 bufferId;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dBufferBinding;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dConstantBufferBinding {
+   uint32 sid;
+   uint32 offsetInBytes;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dConstantBufferBinding;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGADXInputAssemblyMobFormat {
+   uint32 layoutId;
+   SVGA3dBufferBinding vertexBuffers[SVGA3D_DX_MAX_VERTEXBUFFERS];
+   uint32 indexBufferSid;
+   uint32 pad;
+   uint32 indexBufferOffset;
+   uint32 indexBufferFormat;
+   uint32 topology;
+}
+#include "vmware_pack_end.h"
+SVGADXInputAssemblyMobFormat;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGADXContextMobFormat {
+   SVGADXInputAssemblyMobFormat inputAssembly;
+
+   struct {
+      uint32 blendStateId;
+      uint32 blendFactor[4];
+      uint32 sampleMask;
+      uint32 depthStencilStateId;
+      uint32 stencilRef;
+      uint32 rasterizerStateId;
+      uint32 depthStencilViewId;
+      uint32 renderTargetViewIds[SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS];
+      uint32 unorderedAccessViewIds[SVGA3D_MAX_UAVIEWS];
+   } renderState;
+
+   struct {
+      uint32 targets[SVGA3D_DX_MAX_SOTARGETS];
+      uint32 soid;
+   } streamOut;
+   uint32 pad0[11];
+
+   uint8 numViewports;
+   uint8 numScissorRects;
+   uint16 pad1[1];
+
+   uint32 pad2[3];
+
+   SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS];
+   uint32 pad3[32];
+
+   SVGASignedRect scissorRects[SVGA3D_DX_MAX_SCISSORRECTS];
+   uint32 pad4[64];
+
+   struct {
+      uint32 queryID;
+      uint32 value;
+   } predication;
+   uint32 pad5[2];
+
+   struct {
+      uint32 shaderId;
+      SVGA3dConstantBufferBinding constantBuffers[SVGA3D_DX_MAX_CONSTBUFFERS];
+      uint32 shaderResources[SVGA3D_DX_MAX_SRVIEWS];
+      uint32 samplers[SVGA3D_DX_MAX_SAMPLERS];
+   } shaderState[SVGA3D_NUM_SHADERTYPE];
+   uint32 pad6[26];
+
+   SVGA3dQueryId queryID[SVGA3D_MAX_QUERY];
+
+   SVGA3dCOTableData cotables[SVGA_COTABLE_MAX];
+   uint32 pad7[380];
+}
+#include "vmware_pack_end.h"
+SVGADXContextMobFormat;
+
+#endif /* _SVGA3D_DX_H_ */
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_limits.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_limits.h
index 367e8cf7a..a1c36877a 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_limits.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_limits.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -41,6 +41,7 @@
 #define SVGA3D_NUM_CLIPPLANES                   6
 #define SVGA3D_MAX_RENDER_TARGETS               8
 #define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  (SVGA3D_MAX_RENDER_TARGETS)
+#define SVGA3D_MAX_UAVIEWS                      8
 #define SVGA3D_MAX_CONTEXT_IDS                  256
 #define SVGA3D_MAX_SURFACE_IDS                  (32 * 1024)
 
@@ -56,9 +57,6 @@
 
 #define SVGA3D_NUM_TEXTURE_UNITS                32
 #define SVGA3D_NUM_LIGHTS                       8
-#define SVGA3D_MAX_VIDEODECODERS                8
-#define SVGA3D_MAX_VIDEOPROCESSORS              8
-#define SVGA3D_MAX_VIDEODECODER_FRAMES          400
 
 /*
  * Maximum size in dwords of shader text the SVGA device will allow.
@@ -98,4 +96,4 @@
  */
 #define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32
 
-#endif // _SVGA3D_LIMITS_H_
+#endif /* _SVGA3D_LIMITS_H_ */
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_reg.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_reg.h
index 01705f314..b44ce648f 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_reg.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_reg.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -43,6 +43,7 @@
 #include "svga3d_types.h"
 #include "svga3d_limits.h"
 #include "svga3d_cmd.h"
+#include "svga3d_dx.h"
 #include "svga3d_devcaps.h"
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
index ce5475b6f..efa358b54 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
@@ -1,27 +1,29 @@
-/**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+/**************************************************************************
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
+ * Copyright � 1998-2015 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
  *
- **********************************************************/
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
 
 /*
  * svga3d_surfacedefs.h --
@@ -53,645 +55,851 @@
  */
 
 enum svga3d_block_desc {
-	SVGA3DBLOCKDESC_NONE        = 0,         /* No channels are active */
-	SVGA3DBLOCKDESC_BLUE        = 1 << 0,    /* Block with red channel
-						    data */
-	SVGA3DBLOCKDESC_U           = 1 << 0,    /* Block with bump U channel
-						    data */
-	SVGA3DBLOCKDESC_UV_VIDEO    = 1 << 7,    /* Block with alternating video
-						    U and V */
-	SVGA3DBLOCKDESC_GREEN       = 1 << 1,    /* Block with green channel
-						    data */
-	SVGA3DBLOCKDESC_V           = 1 << 1,    /* Block with bump V channel
-						    data */
-	SVGA3DBLOCKDESC_STENCIL     = 1 << 1,    /* Block with a stencil
-						    channel */
-	SVGA3DBLOCKDESC_RED         = 1 << 2,    /* Block with blue channel
-						    data */
-	SVGA3DBLOCKDESC_W           = 1 << 2,    /* Block with bump W channel
-						    data */
-	SVGA3DBLOCKDESC_LUMINANCE   = 1 << 2,    /* Block with luminance channel
-						    data */
-	SVGA3DBLOCKDESC_Y           = 1 << 2,    /* Block with video luminance
-						    data */
-	SVGA3DBLOCKDESC_DEPTH       = 1 << 2,    /* Block with depth channel */
-	SVGA3DBLOCKDESC_ALPHA       = 1 << 3,    /* Block with an alpha
-						    channel */
-	SVGA3DBLOCKDESC_Q           = 1 << 3,    /* Block with bump Q channel
-						    data */
-	SVGA3DBLOCKDESC_BUFFER      = 1 << 4,    /* Block stores 1 byte of
-						    data */
-	SVGA3DBLOCKDESC_COMPRESSED  = 1 << 5,    /* Block stores n bytes of
-						    data depending on the
-						    compression method used */
-	SVGA3DBLOCKDESC_IEEE_FP     = 1 << 6,    /* Block stores data in an IEEE
-						    floating point
-						    representation in
-						    all channels */
-	SVGA3DBLOCKDESC_PLANAR_YUV  = 1 << 8,    /* Three separate blocks store
-						    data. */
-	SVGA3DBLOCKDESC_U_VIDEO     = 1 << 9,    /* Block with U video data */
-	SVGA3DBLOCKDESC_V_VIDEO     = 1 << 10,   /* Block with V video data */
-	SVGA3DBLOCKDESC_EXP         = 1 << 11,   /* Shared exponent */
-	SVGA3DBLOCKDESC_SRGB        = 1 << 12,   /* Data is in sRGB format */
-	SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13,   /* 2 planes of Y, UV,
-						    e.g., NV12. */
-	SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14,   /* 3 planes of separate
-						    Y, U, V, e.g., YV12. */
-
-	SVGA3DBLOCKDESC_RG         = SVGA3DBLOCKDESC_RED |
-	SVGA3DBLOCKDESC_GREEN,
-	SVGA3DBLOCKDESC_RGB        = SVGA3DBLOCKDESC_RG |
-	SVGA3DBLOCKDESC_BLUE,
-	SVGA3DBLOCKDESC_RGB_SRGB   = SVGA3DBLOCKDESC_RGB |
-	SVGA3DBLOCKDESC_SRGB,
-	SVGA3DBLOCKDESC_RGBA       = SVGA3DBLOCKDESC_RGB |
-	SVGA3DBLOCKDESC_ALPHA,
-	SVGA3DBLOCKDESC_RGBA_SRGB  = SVGA3DBLOCKDESC_RGBA |
-	SVGA3DBLOCKDESC_SRGB,
-	SVGA3DBLOCKDESC_UV         = SVGA3DBLOCKDESC_U |
-	SVGA3DBLOCKDESC_V,
-	SVGA3DBLOCKDESC_UVL        = SVGA3DBLOCKDESC_UV |
-	SVGA3DBLOCKDESC_LUMINANCE,
-	SVGA3DBLOCKDESC_UVW        = SVGA3DBLOCKDESC_UV |
-	SVGA3DBLOCKDESC_W,
-	SVGA3DBLOCKDESC_UVWA       = SVGA3DBLOCKDESC_UVW |
-	SVGA3DBLOCKDESC_ALPHA,
-	SVGA3DBLOCKDESC_UVWQ       = SVGA3DBLOCKDESC_U |
-	SVGA3DBLOCKDESC_V |
-	SVGA3DBLOCKDESC_W |
-	SVGA3DBLOCKDESC_Q,
-	SVGA3DBLOCKDESC_LA         = SVGA3DBLOCKDESC_LUMINANCE |
-	SVGA3DBLOCKDESC_ALPHA,
-	SVGA3DBLOCKDESC_R_FP       = SVGA3DBLOCKDESC_RED |
-	SVGA3DBLOCKDESC_IEEE_FP,
-	SVGA3DBLOCKDESC_RG_FP      = SVGA3DBLOCKDESC_R_FP |
-	SVGA3DBLOCKDESC_GREEN,
-	SVGA3DBLOCKDESC_RGB_FP     = SVGA3DBLOCKDESC_RG_FP |
-	SVGA3DBLOCKDESC_BLUE,
-	SVGA3DBLOCKDESC_RGBA_FP    = SVGA3DBLOCKDESC_RGB_FP |
-	SVGA3DBLOCKDESC_ALPHA,
-	SVGA3DBLOCKDESC_DS         = SVGA3DBLOCKDESC_DEPTH |
-	SVGA3DBLOCKDESC_STENCIL,
-	SVGA3DBLOCKDESC_YUV        = SVGA3DBLOCKDESC_UV_VIDEO |
-	SVGA3DBLOCKDESC_Y,
-	SVGA3DBLOCKDESC_AYUV       = SVGA3DBLOCKDESC_ALPHA |
-	SVGA3DBLOCKDESC_Y |
-	SVGA3DBLOCKDESC_U_VIDEO |
-	SVGA3DBLOCKDESC_V_VIDEO,
-	SVGA3DBLOCKDESC_RGBE       = SVGA3DBLOCKDESC_RGB |
-	SVGA3DBLOCKDESC_EXP,
-	SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
-	SVGA3DBLOCKDESC_SRGB,
-	SVGA3DBLOCKDESC_NV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
-	SVGA3DBLOCKDESC_2PLANAR_YUV,
-	SVGA3DBLOCKDESC_YV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
-	SVGA3DBLOCKDESC_3PLANAR_YUV,
-};
 
-/*
- * SVGA3dSurfaceDesc describes the actual pixel data.
- *
- * This structure provides the following information:
- *    1. Block description.
- *    2. Dimensions of a block in the surface.
- *    3. Size of block in bytes.
- *    4. Bit depth of the pixel data.
- *    5. Channel bit depths and masks (if applicable).
- */
-#define SVGA3D_CHANNEL_DEF(type)		\
-	struct {				\
-		union {				\
-			type blue;              \
-			type u;                 \
-			type uv_video;          \
-			type u_video;           \
-		};				\
-		union {				\
-			type green;             \
-			type v;                 \
-			type stencil;           \
-			type v_video;           \
-		};				\
-		union {				\
-			type red;               \
-			type w;                 \
-			type luminance;         \
-			type y;                 \
-			type depth;             \
-			type data;              \
-		};				\
-		union {				\
-			type alpha;             \
-			type q;                 \
-			type exp;               \
-		};				\
-	}
-
-struct svga3d_surface_desc {
-	enum svga3d_block_desc block_desc;
-	SVGA3dSize block_size;
-	uint32 bytes_per_block;
-	uint32 pitch_bytes_per_block;
-
-	struct {
-		uint32 total;
-		SVGA3D_CHANNEL_DEF(uint8);
-	} bit_depth;
-
-	struct {
-		SVGA3D_CHANNEL_DEF(uint8);
-	} bit_offset;
+   SVGA3DBLOCKDESC_NONE        = 0,         /* No channels are active */
+   SVGA3DBLOCKDESC_BLUE        = 1 << 0,    /* Block with red channel data */
+   SVGA3DBLOCKDESC_U           = 1 << 0,    /* Block with bump U channel data */
+   SVGA3DBLOCKDESC_GREEN       = 1 << 1,    /* Block with green channel data */
+   SVGA3DBLOCKDESC_V           = 1 << 1,    /* Block with bump V channel data */
+   SVGA3DBLOCKDESC_RED         = 1 << 2,    /* Block with blue channel data */
+   SVGA3DBLOCKDESC_W           = 1 << 2,    /* Block with bump W channel data */
+   SVGA3DBLOCKDESC_LUMINANCE   = 1 << 2,    /* Block with luminance channel data */
+   SVGA3DBLOCKDESC_Y           = 1 << 2,    /* Block with video luminance data */
+   SVGA3DBLOCKDESC_ALPHA       = 1 << 3,    /* Block with an alpha channel */
+   SVGA3DBLOCKDESC_Q           = 1 << 3,    /* Block with bump Q channel data */
+   SVGA3DBLOCKDESC_BUFFER      = 1 << 4,    /* Block stores 1 byte of data */
+   SVGA3DBLOCKDESC_COMPRESSED  = 1 << 5,    /* Block stores n bytes of data depending
+                                               on the compression method used */
+   SVGA3DBLOCKDESC_IEEE_FP     = 1 << 6,    /* Block stores data in an IEEE floating point
+                                               representation in all channels */
+   SVGA3DBLOCKDESC_UV_VIDEO    = 1 << 7,    /* Block with alternating video U and V */
+   SVGA3DBLOCKDESC_PLANAR_YUV  = 1 << 8,    /* Three separate blocks store data. */
+   SVGA3DBLOCKDESC_U_VIDEO     = 1 << 9,    /* Block with U video data */
+   SVGA3DBLOCKDESC_V_VIDEO     = 1 << 10,   /* Block with V video data */
+   SVGA3DBLOCKDESC_EXP         = 1 << 11,   /* Shared exponent */
+   SVGA3DBLOCKDESC_SRGB        = 1 << 12,   /* Data is in sRGB format */
+   SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13,   /* 2 planes of Y, UV, e.g., NV12. */
+   SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14,   /* 3 planes of separate Y, U, V, e.g., YV12. */
+   SVGA3DBLOCKDESC_DEPTH       = 1 << 15,   /* Block with depth channel */
+   SVGA3DBLOCKDESC_STENCIL     = 1 << 16,   /* Block with a stencil channel */
+
+   SVGA3DBLOCKDESC_RG         = SVGA3DBLOCKDESC_RED |
+                                SVGA3DBLOCKDESC_GREEN,
+   SVGA3DBLOCKDESC_RGB        = SVGA3DBLOCKDESC_RG |
+                                SVGA3DBLOCKDESC_BLUE,
+   SVGA3DBLOCKDESC_RGB_SRGB   = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_RGBA       = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_RGBA_SRGB  = SVGA3DBLOCKDESC_RGBA |
+                                SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_UV         = SVGA3DBLOCKDESC_U |
+                                SVGA3DBLOCKDESC_V,
+   SVGA3DBLOCKDESC_UVL        = SVGA3DBLOCKDESC_UV |
+                                SVGA3DBLOCKDESC_LUMINANCE,
+   SVGA3DBLOCKDESC_UVW        = SVGA3DBLOCKDESC_UV |
+                                SVGA3DBLOCKDESC_W,
+   SVGA3DBLOCKDESC_UVWA       = SVGA3DBLOCKDESC_UVW |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_UVWQ       = SVGA3DBLOCKDESC_U |
+                                SVGA3DBLOCKDESC_V |
+                                SVGA3DBLOCKDESC_W |
+                                SVGA3DBLOCKDESC_Q,
+   SVGA3DBLOCKDESC_LA         = SVGA3DBLOCKDESC_LUMINANCE |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_R_FP       = SVGA3DBLOCKDESC_RED |
+                                SVGA3DBLOCKDESC_IEEE_FP,
+   SVGA3DBLOCKDESC_RG_FP      = SVGA3DBLOCKDESC_R_FP |
+                                SVGA3DBLOCKDESC_GREEN,
+   SVGA3DBLOCKDESC_RGB_FP     = SVGA3DBLOCKDESC_RG_FP |
+                                SVGA3DBLOCKDESC_BLUE,
+   SVGA3DBLOCKDESC_RGBA_FP    = SVGA3DBLOCKDESC_RGB_FP |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_DS         = SVGA3DBLOCKDESC_DEPTH |
+                                SVGA3DBLOCKDESC_STENCIL,
+   SVGA3DBLOCKDESC_YUV        = SVGA3DBLOCKDESC_UV_VIDEO |
+                                SVGA3DBLOCKDESC_Y,
+   SVGA3DBLOCKDESC_AYUV       = SVGA3DBLOCKDESC_ALPHA |
+                                SVGA3DBLOCKDESC_Y |
+                                SVGA3DBLOCKDESC_U_VIDEO |
+                                SVGA3DBLOCKDESC_V_VIDEO,
+   SVGA3DBLOCKDESC_RGBE       = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_EXP,
+   SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
+                                     SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_NV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+                                SVGA3DBLOCKDESC_2PLANAR_YUV,
+   SVGA3DBLOCKDESC_YV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+                                SVGA3DBLOCKDESC_3PLANAR_YUV,
 };
 
-static const struct svga3d_surface_desc svga3d_surface_descs[] = {
-	{SVGA3DBLOCKDESC_NONE,
-	 {1, 1, 1},  0, 0, {0, {{0}, {0}, {0}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_FORMAT_INVALID */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_X8R8G8B8 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_A8R8G8B8 */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  2, 2, {16, {{5}, {6}, {5}, {0} } },
-	 {{{0}, {5}, {11}, {0} } } },    /* SVGA3D_R5G6B5 */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  2, 2, {15, {{5}, {5}, {5}, {0} } },
-	 {{{0}, {5}, {10}, {0} } } },    /* SVGA3D_X1R5G5B5 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  2, 2, {16, {{5}, {5}, {5}, {1} } },
-	 {{{0}, {5}, {10}, {15} } } },   /* SVGA3D_A1R5G5B5 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  2, 2, {16, {{4}, {4}, {4}, {4} } },
-	 {{{0}, {4}, {8}, {12} } } },    /* SVGA3D_A4R4G4B4 */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D32 */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D16 */
-
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8 */
-
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  2, 2, {16, {{0}, {1}, {15}, {0} } },
-	 {{{0}, {15}, {0}, {0} } } },    /* SVGA3D_Z_D15S1 */
-
-	{SVGA3DBLOCKDESC_LUMINANCE,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE8 */
-
-	{SVGA3DBLOCKDESC_LA,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {4}, {4} } },
-	 {{{0}, {0}, {0}, {4} } } },     /* SVGA3D_LUMINANCE4_ALPHA4 */
-
-	{SVGA3DBLOCKDESC_LUMINANCE,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE16 */
-
-	{SVGA3DBLOCKDESC_LA,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
-	 {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_LUMINANCE8_ALPHA8 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT1 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT2 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT3 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT4 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT5 */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
-	 {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_BUMPU8V8 */
-
-	{SVGA3DBLOCKDESC_UVL,
-	 {1, 1, 1},  2, 2, {16, {{5}, {5}, {6}, {0} } },
-	 {{{11}, {6}, {0}, {0} } } },    /* SVGA3D_BUMPL6V5U5 */
-
-	{SVGA3DBLOCKDESC_UVL,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {0} } },
-	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPX8L8V8U8 */
-
-	{SVGA3DBLOCKDESC_UVL,
-	 {1, 1, 1},  3, 3, {24, {{8}, {8}, {8}, {0} } },
-	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPL8V8U8 */
-
-	{SVGA3DBLOCKDESC_RGBA_FP,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_ARGB_S10E5 */
-
-	{SVGA3DBLOCKDESC_RGBA_FP,
-	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_ARGB_S23E8 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2R10G10B10 */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
-	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_V8U8 */
-
-	{SVGA3DBLOCKDESC_UVWQ,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{24}, {16}, {8}, {0} } } },   /* SVGA3D_Q8W8V8U8 */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
-	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_CxV8U8 */
-
-	{SVGA3DBLOCKDESC_UVL,
-	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_X8L8V8U8 */
-
-	{SVGA3DBLOCKDESC_UVWA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2W10V10U10 */
-
-	{SVGA3DBLOCKDESC_ALPHA,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {0}, {8} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_ALPHA8 */
-
-	{SVGA3DBLOCKDESC_R_FP,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S10E5 */
-
-	{SVGA3DBLOCKDESC_R_FP,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S23E8 */
-
-	{SVGA3DBLOCKDESC_RG_FP,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_RG_S10E5 */
-
-	{SVGA3DBLOCKDESC_RG_FP,
-	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_RG_S23E8 */
-
-	{SVGA3DBLOCKDESC_BUFFER,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BUFFER */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24X8 */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  4, 4, {32, {{16}, {16}, {0}, {0} } },
-	 {{{16}, {0}, {0}, {0} } } },    /* SVGA3D_V16U16 */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {0}, {16}, {0} } } },    /* SVGA3D_G16R16 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_A16B16G16R16 */
-
-	{SVGA3DBLOCKDESC_YUV,
-	 {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {8}, {0} } } },     /* SVGA3D_UYVY */
-
-	{SVGA3DBLOCKDESC_YUV,
-	 {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
-	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_YUY2 */
-
-	{SVGA3DBLOCKDESC_NV12,
-	 {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_NV12 */
-
-	{SVGA3DBLOCKDESC_AYUV,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_AYUV */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_UINT */
-
-	{SVGA3DBLOCKDESC_UVWQ,
-	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_SINT */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGB_FP,
-	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_FLOAT */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_UINT */
-
-	{SVGA3DBLOCKDESC_UVW,
-	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_SINT */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_UINT */
-
-	{SVGA3DBLOCKDESC_UVWQ,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SNORM */
-
-	{SVGA3DBLOCKDESC_UVWQ,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SINT */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_UINT */
 
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_SINT */
+typedef struct SVGA3dChannelDef {
+  union {
+      uint8 blue;
+      uint8 u;
+      uint8 uv_video;
+      uint8 u_video;
+   };
+   union {
+      uint8 green;
+      uint8 v;
+      uint8 stencil;
+      uint8 v_video;
+   };
+   union {
+      uint8 red;
+      uint8 w;
+      uint8 luminance;
+      uint8 y;
+      uint8 depth;
+      uint8 data;
+   };
+   union {
+      uint8 alpha;
+      uint8 q;
+      uint8 exp;
+   };
+} SVGA3dChannelDef;
 
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G8X24_TYPELESS */
-
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_D32_FLOAT_S8X24_UINT */
-
-	{SVGA3DBLOCKDESC_R_FP,
-	 {1, 1, 1},  8, 8, {64, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },    /* SVGA3D_R32_FLOAT_X8_X24_TYPELESS */
-
-	{SVGA3DBLOCKDESC_GREEN,
-	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {0}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_X32_TYPELESS_G8X24_UINT */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_UINT */
-
-	{SVGA3DBLOCKDESC_RGB_FP,
-	 {1, 1, 1},  4, 4, {32, {{10}, {11}, {11}, {0} } },
-	 {{{0}, {10}, {21}, {0} } } },  /* SVGA3D_R11G11B10_FLOAT */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM */
-
-	{SVGA3DBLOCKDESC_RGBA_SRGB,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UINT */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_SINT */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RG_FP,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_UINT */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_SINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_TYPELESS */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_D32_FLOAT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_UINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_SINT */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_R24G8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_D24_UNORM_S8_UINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R24_UNORM_X8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_GREEN,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {0}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_X24_TYPELESS_G8_UINT */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UNORM */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UINT */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_SINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UNORM */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UINT */
-
-	{SVGA3DBLOCKDESC_U,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SNORM */
-
-	{SVGA3DBLOCKDESC_U,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UNORM */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UINT */
-
-	{SVGA3DBLOCKDESC_U,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SNORM */
-
-	{SVGA3DBLOCKDESC_U,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {8, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R1_UNORM */
-
-	{SVGA3DBLOCKDESC_RGBE,
-	 {1, 1, 1},  4, 4, {32, {{9}, {9}, {9}, {5} } },
-	 {{{18}, {9}, {0}, {27} } } },   /* SVGA3D_R9G9B9E5_SHAREDEXP */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_B8G8_UNORM */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_G8R8_G8B8_UNORM */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_UNORM */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_SNORM */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_UNORM */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_SNORM */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10_XR_BIAS_A2_UNORM */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA_SRGB,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGB_SRGB,
-	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_DF16 */
+struct svga3d_surface_desc {
+   SVGA3dSurfaceFormat format;
+   enum svga3d_block_desc block_desc;
 
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_DF24 */
+   SVGA3dSize block_size;
+   uint32 bytes_per_block;
+   uint32 pitch_bytes_per_block;
 
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8_INT */
+   uint32 totalBitDepth;
+   SVGA3dChannelDef bitDepth;
+   SVGA3dChannelDef bitOffset;
+};
 
-	{SVGA3DBLOCKDESC_YV12,
-	 {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_YV12 */
+static const struct svga3d_surface_desc svga3d_surface_descs[] = {
+   {SVGA3D_FORMAT_INVALID, SVGA3DBLOCKDESC_NONE,
+      {1, 1, 1},  0, 0,
+      0, {{0}, {0}, {0}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {6}, {5}, {0}},
+      {{0}, {5}, {11}, {0}}},
+
+   {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      15, {{5}, {5}, {5}, {0}},
+      {{0}, {5}, {10}, {0}}},
+
+   {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {5}, {1}},
+      {{0}, {5}, {10}, {15}}},
+
+   {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{4}, {4}, {4}, {4}},
+      {{0}, {4}, {8}, {12}}},
+
+   {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {1}, {15}, {0}},
+      {{0}, {15}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_LUMINANCE,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA,
+    {1  , 1, 1},  1, 1,
+      8, {{0}, {0}, {4}, {4}},
+      {{0}, {0}, {0}, {4}}},
+
+   {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_LUMINANCE,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {8}, {8}},
+      {{0}, {0}, {0}, {8}}},
+
+   {SVGA3D_DXT1, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT2, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT3, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT4, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT5, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {8}, {8}},
+      {{0}, {0}, {0}, {8}}},
+
+   {SVGA3D_BUMPL6V5U5, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {6}, {0}},
+      {{11}, {6}, {0}, {0}}},
+
+   {SVGA3D_BUMPX8L8V8U8, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {0}},
+      {{16}, {8}, {0}, {0}}},
+
+   {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_UVL,
+      {0, 0, 0},  0, 0,
+       0, {{0}, {0}, {0}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ARGB_S10E5, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_ARGB_S23E8, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{24}, {16}, {8}, {0}}},
+
+   {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_X8L8V8U8, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{16}, {8}, {0}, {0}}},
+
+   {SVGA3D_A2W10V10U10, SVGA3DBLOCKDESC_UVWA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_ALPHA,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {0}, {8}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R_S10E5, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R_S23E8, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_RG_S10E5, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_RG_S23E8, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_BUFFER, SVGA3DBLOCKDESC_BUFFER,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  4, 4,
+      32, {{16}, {16}, {0}, {0}},
+      {{16}, {0}, {0}, {0}}},
+
+   {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {0}, {16}, {0}}},
+
+   {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_UYVY, SVGA3DBLOCKDESC_YUV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {0}, {8}, {0}},
+      {{0}, {0}, {8}, {0}}},
+
+   {SVGA3D_YUY2, SVGA3DBLOCKDESC_YUV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {0}, {8}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_NV12, SVGA3DBLOCKDESC_NV12,
+      {2, 2, 1},  6, 2,
+      48, {{0}, {0}, {48}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_UVW,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32_FLOAT_X8X24_TYPELESS, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X32_TYPELESS_G8X24_UINT, SVGA3DBLOCKDESC_GREEN,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {0}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R11G11B10_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {11}, {11}, {0}},
+      {{0}, {10}, {21}, {0}}},
+
+   {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_R24_UNORM_X8_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {24}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X24_TYPELESS_G8_UINT, SVGA3DBLOCKDESC_GREEN,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {0}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_P8, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGBE,
+      {1, 1, 1},  4, 4,
+      32, {{9}, {9}, {9}, {5}},
+      {{18}, {9}, {0}, {27}}},
+
+   {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ATI1, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ATI2, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_SRGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_YV12, SVGA3DBLOCKDESC_YV12,
+      {2, 2, 1},  6, 2,
+      48, {{0}, {0}, {48}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32A32_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R16G16B16A16_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R32G32_FLOAT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{24}, {16}, {8}, {0}}},
+
+   {SVGA3D_R16G16_FLOAT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {0}, {16}, {0}}},
+
+   {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{16}, {16}, {0}, {0}},
+      {{16}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_FLOAT, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_FLOAT, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_ALPHA,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {0}, {8}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {6}, {5}, {0}},
+      {{0}, {5}, {11}, {0}}},
+
+   {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {5}, {1}},
+      {{0}, {5}, {10}, {15}}},
+
+   {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
 };
 
 
@@ -704,6 +912,16 @@ static inline uint32 clamped_umul32(uint32 a, uint32 b)
 	return (tmp > (uint64_t) ((uint32) -1)) ? (uint32) -1 : tmp;
 }
 
+static inline uint32 clamped_uadd32(uint32 a, uint32 b)
+{
+	uint32 c = a + b;
+	if (c < a || c < b) {
+		return MAX_UINT32;
+	}
+	return c;
+}
+
+
 static inline const struct svga3d_surface_desc *
 svga3dsurface_get_desc(SVGA3dSurfaceFormat format)
 {
@@ -828,7 +1046,7 @@ static inline uint32
 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
                                SVGA3dSize baseLevelSize,
                                uint32 numMipLevels,
-                               uint32 face,
+                               uint32 layer,
                                uint32 mip)
 
 {
@@ -853,7 +1071,7 @@ svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
       }
    }
 
-   offset = mipChainBytes * face + mipChainBytesToLevel;
+   offset = mipChainBytes * layer + mipChainBytesToLevel;
 
    return offset;
 }
@@ -863,7 +1081,7 @@ static inline uint32
 svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
 				  SVGA3dSize base_level_size,
 				  uint32 num_mip_levels,
-				  bool cubemap)
+                                  uint32 num_layers)
 {
 	const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
 	uint64_t total_size = 0;
@@ -876,8 +1094,7 @@ svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
 								  &size, 0);
 	}
 
-	if (cubemap)
-		total_size *= SVGA3D_MAX_SURFACE_FACES;
+	total_size *= num_layers;
 
 	return (total_size > (uint64_t) MAX_UINT32) ? MAX_UINT32 : 
                                                       (uint32) total_size;
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h
index fc4a6b95a..de711c388 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -43,10 +43,6 @@
  */
 
 #define SVGA3D_INVALID_ID         ((uint32)-1)
-#define SVGA3D_INVALID_CID        SVGA3D_INVALID_ID
-#define SVGA3D_INVALID_SID        SVGA3D_INVALID_ID
-#define SVGA3D_INVALID_SHID       SVGA3D_INVALID_ID
-
 
 typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
 typedef uint32 SVGA3dColor; /* a, r, g, b */
@@ -116,13 +112,7 @@ SVGA3dPoint;
 
 /*
  * Surface formats.
- *
- * If you modify this list, be sure to keep GLUtil.c in sync. It
- * includes the internal format definition of each surface in
- * GLUtil_ConvertSurfaceFormat, and it contains a table of
- * human-readable names in GLUtil_GetFormatName.
  */
-
 typedef enum SVGA3dSurfaceFormat {
    SVGA3D_FORMAT_INVALID               = 0,
 
@@ -155,7 +145,7 @@ typedef enum SVGA3dSurfaceFormat {
    SVGA3D_BUMPU8V8                     = 20,
    SVGA3D_BUMPL6V5U5                   = 21,
    SVGA3D_BUMPX8L8V8U8                 = 22,
-   SVGA3D_BUMPL8V8U8                   = 23,
+   SVGA3D_FORMAT_DEAD1                 = 23,
 
    SVGA3D_ARGB_S10E5                   = 24,   /* 16-bit floating-point ARGB */
    SVGA3D_ARGB_S23E8                   = 25,   /* 32-bit floating-point ARGB */
@@ -271,7 +261,7 @@ typedef enum SVGA3dSurfaceFormat {
    SVGA3D_B8G8R8X8_TYPELESS            = 116,
    SVGA3D_B8G8R8X8_UNORM_SRGB          = 117,
 
-   /* Advanced D3D9 depth formats. */
+   /* Advanced depth formats. */
    SVGA3D_Z_DF16                       = 118,
    SVGA3D_Z_DF24                       = 119,
    SVGA3D_Z_D24S8_INT                  = 120,
@@ -306,13 +296,157 @@ typedef enum SVGA3dSurfaceFormat {
    SVGA3D_FORMAT_MAX
 } SVGA3dSurfaceFormat;
 
+typedef uint32 SVGA3dSurfaceFlags;
+#define SVGA3D_SURFACE_CUBEMAP                (1 << 0)
+
+/*
+ * HINT flags are not enforced by the device but are useful for
+ * performance.
+ */
+#define SVGA3D_SURFACE_HINT_STATIC            (1 << 1)
+#define SVGA3D_SURFACE_HINT_DYNAMIC           (1 << 2)
+#define SVGA3D_SURFACE_HINT_INDEXBUFFER       (1 << 3)
+#define SVGA3D_SURFACE_HINT_VERTEXBUFFER      (1 << 4)
+#define SVGA3D_SURFACE_HINT_TEXTURE           (1 << 5)
+#define SVGA3D_SURFACE_HINT_RENDERTARGET      (1 << 6)
+#define SVGA3D_SURFACE_HINT_DEPTHSTENCIL      (1 << 7)
+#define SVGA3D_SURFACE_HINT_WRITEONLY         (1 << 8)
+#define SVGA3D_SURFACE_MASKABLE_ANTIALIAS     (1 << 9)
+#define SVGA3D_SURFACE_AUTOGENMIPMAPS         (1 << 10)
+#define SVGA3D_SURFACE_DECODE_RENDERTARGET    (1 << 11)
+
+/*
+ * Is this surface using a base-level pitch for it's mob backing?
+ *
+ * This flag is not intended to be set by guest-drivers, but is instead
+ * set by the device when the surface is bound to a mob with a specified
+ * pitch.
+ */
+#define SVGA3D_SURFACE_MOB_PITCH              (1 << 12)
+
+#define SVGA3D_SURFACE_INACTIVE               (1 << 13)
+#define SVGA3D_SURFACE_HINT_RT_LOCKABLE       (1 << 14)
+#define SVGA3D_SURFACE_VOLUME                 (1 << 15)
+
+/*
+ * Required to be set on a surface to bind it to a screen target.
+ */
+#define SVGA3D_SURFACE_SCREENTARGET           (1 << 16)
+
+/*
+ * Align images in the guest-backing mob to 16-bytes.
+ */
+#define SVGA3D_SURFACE_ALIGN16                (1 << 17)
+
+#define SVGA3D_SURFACE_1D                     (1 << 18)
+#define SVGA3D_SURFACE_ARRAY                  (1 << 19)
+
+/*
+ * Bind flags.
+ * These are enforced for any surface defined with DefineGBSurface_v2.
+ */
+#define SVGA3D_SURFACE_BIND_VERTEX_BUFFER     (1 << 20)
+#define SVGA3D_SURFACE_BIND_INDEX_BUFFER      (1 << 21)
+#define SVGA3D_SURFACE_BIND_CONSTANT_BUFFER   (1 << 22)
+#define SVGA3D_SURFACE_BIND_SHADER_RESOURCE   (1 << 23)
+#define SVGA3D_SURFACE_BIND_RENDER_TARGET     (1 << 24)
+#define SVGA3D_SURFACE_BIND_DEPTH_STENCIL     (1 << 25)
+#define SVGA3D_SURFACE_BIND_STREAM_OUTPUT     (1 << 26)
+
+/*
+ * The STAGING flags notes that the surface will not be used directly by the
+ * drawing pipeline, i.e. that it will not be bound to any bind point.
+ * Staging surfaces may be used by copy operations to move data in and out
+ * of other surfaces.  No bind flags may be set on surfaces with this flag.
+ *
+ * The HINT_INDIRECT_UPDATE flag suggests that the surface will receive
+ * updates indirectly, i.e. the surface will not be updated directly, but
+ * will receive copies from staging surfaces.
+ */
+#define SVGA3D_SURFACE_STAGING_UPLOAD         (1 << 27)
+#define SVGA3D_SURFACE_STAGING_DOWNLOAD       (1 << 28)
+#define SVGA3D_SURFACE_HINT_INDIRECT_UPDATE   (1 << 29)
+
+/*
+ * Setting this flag allow this surface to be used with the
+ * SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command.  It is only valid for
+ * buffer surfaces, and no bind flags are allowed to be set on surfaces
+ * with this flag.
+ */
+#define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER   (1 << 30)
 
 /*
- * These are really the D3DFORMAT_OP defines from the wdk. We need
- * them so that we can query the host for what the supported surface
- * operations are (when we're using the D3D backend, in particular),
- * and so we can send those operations to the guest.
+ * Marker for the last defined bit in SVGA3dSurfaceFlags.
  */
+#define SVGA3D_SURFACE_FLAG_MAX               (1 << 31)
+
+#define SVGA3D_SURFACE_HB_DISALLOWED_MASK        \
+        (  SVGA3D_SURFACE_MOB_PITCH    |         \
+           SVGA3D_SURFACE_SCREENTARGET |         \
+           SVGA3D_SURFACE_ALIGN16 |              \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |   \
+           SVGA3D_SURFACE_STAGING_UPLOAD |       \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |     \
+           SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER   \
+        )
+
+#define SVGA3D_SURFACE_2D_DISALLOWED_MASK           \
+        (  SVGA3D_SURFACE_CUBEMAP |                 \
+           SVGA3D_SURFACE_MASKABLE_ANTIALIAS |      \
+           SVGA3D_SURFACE_AUTOGENMIPMAPS |          \
+           SVGA3D_SURFACE_DECODE_RENDERTARGET |     \
+           SVGA3D_SURFACE_VOLUME |                  \
+           SVGA3D_SURFACE_1D |                      \
+           SVGA3D_SURFACE_BIND_VERTEX_BUFFER |      \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER |       \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |    \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL |      \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |      \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER      \
+        )
+
+#define SVGA3D_SURFACE_SCREENTARGET_DISALLOWED_MASK \
+        (  SVGA3D_SURFACE_CUBEMAP |                 \
+           SVGA3D_SURFACE_AUTOGENMIPMAPS |          \
+           SVGA3D_SURFACE_DECODE_RENDERTARGET |     \
+           SVGA3D_SURFACE_VOLUME |                  \
+           SVGA3D_SURFACE_1D |                      \
+           SVGA3D_SURFACE_BIND_VERTEX_BUFFER |      \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER |       \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |    \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL |      \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |      \
+           SVGA3D_SURFACE_INACTIVE |                \
+           SVGA3D_SURFACE_STAGING_UPLOAD |          \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |        \
+           SVGA3D_SURFACE_HINT_INDIRECT_UPDATE |    \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER      \
+        )
+
+#define SVGA3D_SURFACE_DX_ONLY_MASK             \
+        (  SVGA3D_SURFACE_BIND_STREAM_OUTPUT |  \
+           SVGA3D_SURFACE_STAGING_UPLOAD |      \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |    \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER  \
+        )
+
+#define SVGA3D_SURFACE_STAGING_MASK             \
+        (  SVGA3D_SURFACE_STAGING_UPLOAD |      \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD      \
+        )
+
+#define SVGA3D_SURFACE_BIND_MASK                  \
+        (  SVGA3D_SURFACE_BIND_VERTEX_BUFFER   |  \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER    |  \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |  \
+           SVGA3D_SURFACE_BIND_SHADER_RESOURCE |  \
+           SVGA3D_SURFACE_BIND_RENDER_TARGET   |  \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL   |  \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT      \
+        )
+
 typedef enum {
    SVGA3DFORMAT_OP_TEXTURE                               = 0x00000001,
    SVGA3DFORMAT_OP_VOLUMETEXTURE                         = 0x00000002,
@@ -656,25 +790,27 @@ union {
 SVGA3dLinePattern;
 
 typedef enum {
-   SVGA3D_BLENDOP_INVALID            = 0,
-   SVGA3D_BLENDOP_MIN                = 1,
-   SVGA3D_BLENDOP_ZERO               = 1,
-   SVGA3D_BLENDOP_ONE                = 2,
-   SVGA3D_BLENDOP_SRCCOLOR           = 3,
-   SVGA3D_BLENDOP_INVSRCCOLOR        = 4,
-   SVGA3D_BLENDOP_SRCALPHA           = 5,
-   SVGA3D_BLENDOP_INVSRCALPHA        = 6,
-   SVGA3D_BLENDOP_DESTALPHA          = 7,
-   SVGA3D_BLENDOP_INVDESTALPHA       = 8,
-   SVGA3D_BLENDOP_DESTCOLOR          = 9,
-   SVGA3D_BLENDOP_INVDESTCOLOR       = 10,
-   SVGA3D_BLENDOP_SRCALPHASAT        = 11,
-   SVGA3D_BLENDOP_BLENDFACTOR        = 12,
-   SVGA3D_BLENDOP_INVBLENDFACTOR     = 13,
-   SVGA3D_BLENDOP_SRC1COLOR          = 14,
-   SVGA3D_BLENDOP_INVSRC1COLOR       = 15,
-   SVGA3D_BLENDOP_SRC1ALPHA          = 16,
-   SVGA3D_BLENDOP_INVSRC1ALPHA       = 17,
+   SVGA3D_BLENDOP_INVALID             = 0,
+   SVGA3D_BLENDOP_MIN                 = 1,
+   SVGA3D_BLENDOP_ZERO                = 1,
+   SVGA3D_BLENDOP_ONE                 = 2,
+   SVGA3D_BLENDOP_SRCCOLOR            = 3,
+   SVGA3D_BLENDOP_INVSRCCOLOR         = 4,
+   SVGA3D_BLENDOP_SRCALPHA            = 5,
+   SVGA3D_BLENDOP_INVSRCALPHA         = 6,
+   SVGA3D_BLENDOP_DESTALPHA           = 7,
+   SVGA3D_BLENDOP_INVDESTALPHA        = 8,
+   SVGA3D_BLENDOP_DESTCOLOR           = 9,
+   SVGA3D_BLENDOP_INVDESTCOLOR        = 10,
+   SVGA3D_BLENDOP_SRCALPHASAT         = 11,
+   SVGA3D_BLENDOP_BLENDFACTOR         = 12,
+   SVGA3D_BLENDOP_INVBLENDFACTOR      = 13,
+   SVGA3D_BLENDOP_SRC1COLOR           = 14,
+   SVGA3D_BLENDOP_INVSRC1COLOR        = 15,
+   SVGA3D_BLENDOP_SRC1ALPHA           = 16,
+   SVGA3D_BLENDOP_INVSRC1ALPHA        = 17,
+   SVGA3D_BLENDOP_BLENDFACTORALPHA    = 18,
+   SVGA3D_BLENDOP_INVBLENDFACTORALPHA = 19,
    SVGA3D_BLENDOP_MAX
 } SVGA3dBlendOp;
 
@@ -690,6 +826,27 @@ typedef enum {
 } SVGA3dBlendEquation;
 
 typedef enum {
+   SVGA3D_DX11_LOGICOP_MIN           = 0,
+   SVGA3D_DX11_LOGICOP_CLEAR         = 0,
+   SVGA3D_DX11_LOGICOP_SET           = 1,
+   SVGA3D_DX11_LOGICOP_COPY          = 2,
+   SVGA3D_DX11_LOGICOP_COPY_INVERTED = 3,
+   SVGA3D_DX11_LOGICOP_NOOP          = 4,
+   SVGA3D_DX11_LOGICOP_INVERT        = 5,
+   SVGA3D_DX11_LOGICOP_AND           = 6,
+   SVGA3D_DX11_LOGICOP_NAND          = 7,
+   SVGA3D_DX11_LOGICOP_OR            = 8,
+   SVGA3D_DX11_LOGICOP_NOR           = 9,
+   SVGA3D_DX11_LOGICOP_XOR           = 10,
+   SVGA3D_DX11_LOGICOP_EQUIV         = 11,
+   SVGA3D_DX11_LOGICOP_AND_REVERSE   = 12,
+   SVGA3D_DX11_LOGICOP_AND_INVERTED  = 13,
+   SVGA3D_DX11_LOGICOP_OR_REVERSE    = 14,
+   SVGA3D_DX11_LOGICOP_OR_INVERTED   = 15,
+   SVGA3D_DX11_LOGICOP_MAX
+} SVGA3dDX11LogicOp;
+
+typedef enum {
    SVGA3D_FRONTWINDING_INVALID = 0,
    SVGA3D_FRONTWINDING_CW      = 1,
    SVGA3D_FRONTWINDING_CCW     = 2,
@@ -952,10 +1109,10 @@ typedef enum {
    SVGA3D_TEX_FILTER_NEAREST        = 1,
    SVGA3D_TEX_FILTER_LINEAR         = 2,
    SVGA3D_TEX_FILTER_ANISOTROPIC    = 3,
-   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, // Not currently implemented
-   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, // Not currently implemented
+   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, /* Not currently implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, /* Not currently implemented */
    SVGA3D_TEX_FILTER_MAX
 } SVGA3dTextureFilter;
 
@@ -1013,19 +1170,19 @@ typedef enum {
 
 typedef enum {
    SVGA3D_DECLUSAGE_POSITION     = 0,
-   SVGA3D_DECLUSAGE_BLENDWEIGHT,       //  1
-   SVGA3D_DECLUSAGE_BLENDINDICES,      //  2
-   SVGA3D_DECLUSAGE_NORMAL,            //  3
-   SVGA3D_DECLUSAGE_PSIZE,             //  4
-   SVGA3D_DECLUSAGE_TEXCOORD,          //  5
-   SVGA3D_DECLUSAGE_TANGENT,           //  6
-   SVGA3D_DECLUSAGE_BINORMAL,          //  7
-   SVGA3D_DECLUSAGE_TESSFACTOR,        //  8
-   SVGA3D_DECLUSAGE_POSITIONT,         //  9
-   SVGA3D_DECLUSAGE_COLOR,             // 10
-   SVGA3D_DECLUSAGE_FOG,               // 11
-   SVGA3D_DECLUSAGE_DEPTH,             // 12
-   SVGA3D_DECLUSAGE_SAMPLE,            // 13
+   SVGA3D_DECLUSAGE_BLENDWEIGHT,
+   SVGA3D_DECLUSAGE_BLENDINDICES,
+   SVGA3D_DECLUSAGE_NORMAL,
+   SVGA3D_DECLUSAGE_PSIZE,
+   SVGA3D_DECLUSAGE_TEXCOORD,
+   SVGA3D_DECLUSAGE_TANGENT,
+   SVGA3D_DECLUSAGE_BINORMAL,
+   SVGA3D_DECLUSAGE_TESSFACTOR,
+   SVGA3D_DECLUSAGE_POSITIONT,
+   SVGA3D_DECLUSAGE_COLOR,
+   SVGA3D_DECLUSAGE_FOG,
+   SVGA3D_DECLUSAGE_DEPTH,
+   SVGA3D_DECLUSAGE_SAMPLE,
    SVGA3D_DECLUSAGE_MAX
 } SVGA3dDeclUsage;
 
@@ -1033,10 +1190,11 @@ typedef enum {
    SVGA3D_DECLMETHOD_DEFAULT     = 0,
    SVGA3D_DECLMETHOD_PARTIALU,
    SVGA3D_DECLMETHOD_PARTIALV,
-   SVGA3D_DECLMETHOD_CROSSUV,          // Normal
+   SVGA3D_DECLMETHOD_CROSSUV,          /* Normal */
    SVGA3D_DECLMETHOD_UV,
-   SVGA3D_DECLMETHOD_LOOKUP,           // Lookup a displacement map
-   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+   SVGA3D_DECLMETHOD_LOOKUP,           /* Lookup a displacement map */
+   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement */
+                                       /* map */
 } SVGA3dDeclMethod;
 
 typedef enum {
@@ -1162,17 +1320,23 @@ typedef enum {
    SVGA3D_SHADERTYPE_MIN                        = 1,
    SVGA3D_SHADERTYPE_VS                         = 1,
    SVGA3D_SHADERTYPE_PS                         = 2,
-   SVGA3D_SHADERTYPE_MAX                        = 3,
    SVGA3D_SHADERTYPE_PREDX_MAX                  = 3,
    SVGA3D_SHADERTYPE_GS                         = 3,
-   SVGA3D_SHADERTYPE_DX_MAX                     = 4,
+   SVGA3D_SHADERTYPE_DX10_MAX                   = 4,
+   SVGA3D_SHADERTYPE_HS                         = 4,
+   SVGA3D_SHADERTYPE_DS                         = 5,
+   SVGA3D_SHADERTYPE_CS                         = 6,
+   SVGA3D_SHADERTYPE_MAX                        = 7
 } SVGA3dShaderType;
 
 #define SVGA3D_NUM_SHADERTYPE_PREDX \
    (SVGA3D_SHADERTYPE_PREDX_MAX - SVGA3D_SHADERTYPE_MIN)
 
-#define SVGA3D_NUM_SHADERTYPE_DX \
-   (SVGA3D_SHADERTYPE_DX_MAX - SVGA3D_SHADERTYPE_MIN)
+#define SVGA3D_NUM_SHADERTYPE_DX10 \
+   (SVGA3D_SHADERTYPE_DX10_MAX - SVGA3D_SHADERTYPE_MIN)
+
+#define SVGA3D_NUM_SHADERTYPE \
+   (SVGA3D_SHADERTYPE_MAX - SVGA3D_SHADERTYPE_MIN)
 
 typedef enum {
    SVGA3D_CONST_TYPE_MIN                        = 0,
@@ -1196,33 +1360,151 @@ typedef enum {
 } SVGA3dStretchBltMode;
 
 typedef enum {
-   SVGA3D_QUERYTYPE_INVALID                     = ((uint32)-1),
+   SVGA3D_QUERYTYPE_INVALID                     = ((uint8)-1),
    SVGA3D_QUERYTYPE_MIN                         = 0,
    SVGA3D_QUERYTYPE_OCCLUSION                   = 0,
-   SVGA3D_QUERYTYPE_EVENT                       = 1,
-   SVGA3D_QUERYTYPE_TIMESTAMP                   = 2,
-   SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT           = 3,
-   SVGA3D_QUERYTYPE_PIPELINESTATS               = 4,
-   SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE          = 5,
-   SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS           = 6,
-   SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE     = 7,
-   SVGA3D_QUERYTYPE_OCCLUSION64                 = 8,
+   SVGA3D_QUERYTYPE_TIMESTAMP                   = 1,
+   SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT           = 2,
+   SVGA3D_QUERYTYPE_PIPELINESTATS               = 3,
+   SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE          = 4,
+   SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS           = 5,
+   SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE     = 6,
+   SVGA3D_QUERYTYPE_OCCLUSION64                 = 7,
+   SVGA3D_QUERYTYPE_DX10_MAX                    = 8,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM0             = 8,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM1             = 9,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM2             = 10,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM3             = 11,
+   SVGA3D_QUERYTYPE_SOP_STREAM0                 = 12,
+   SVGA3D_QUERYTYPE_SOP_STREAM1                 = 13,
+   SVGA3D_QUERYTYPE_SOP_STREAM2                 = 14,
+   SVGA3D_QUERYTYPE_SOP_STREAM3                 = 15,
    SVGA3D_QUERYTYPE_MAX
 } SVGA3dQueryType;
 
+typedef uint8 SVGA3dQueryTypeUint8;
+
 #define SVGA3D_NUM_QUERYTYPE  (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN)
 
 /*
  * This is the maximum number of queries per context that can be active
  * simultaneously between a beginQuery and endQuery.
  */
-#define SVGA3D_MAX_QUERY_PER_CONTEXT 64
+#define SVGA3D_MAX_QUERY 64
+
+/*
+ * Query result buffer formats
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 samplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusionQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 passed;
+}
+#include "vmware_pack_end.h"
+SVGADXEventQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 timestamp;
+}
+#include "vmware_pack_end.h"
+SVGADXTimestampQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 realFrequency;
+   uint32 disjoint;
+}
+#include "vmware_pack_end.h"
+SVGADXTimestampDisjointQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 inputAssemblyVertices;
+   uint64 inputAssemblyPrimitives;
+   uint64 vertexShaderInvocations;
+   uint64 geometryShaderInvocations;
+   uint64 geometryShaderPrimitives;
+   uint64 clipperInvocations;
+   uint64 clipperPrimitives;
+   uint64 pixelShaderInvocations;
+   uint64 hullShaderInvocations;
+   uint64 domainShaderInvocations;
+   uint64 computeShaderInvocations;
+}
+#include "vmware_pack_end.h"
+SVGADXPipelineStatisticsQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 anySamplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusionPredicateQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 numPrimitivesWritten;
+   uint64 numPrimitivesRequired;
+}
+#include "vmware_pack_end.h"
+SVGADXStreamOutStatisticsQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 overflowed;
+}
+#include "vmware_pack_end.h"
+SVGADXStreamOutPredicateQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 samplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusion64QueryResult;
+
+/*
+ * SVGADXQueryResultUnion is not intended for use in the protocol, but is
+ * very helpful when working with queries generically.
+ */
+typedef
+#include "vmware_pack_begin.h"
+union SVGADXQueryResultUnion {
+   SVGADXOcclusionQueryResult occ;
+   SVGADXEventQueryResult event;
+   SVGADXTimestampQueryResult ts;
+   SVGADXTimestampDisjointQueryResult tsDisjoint;
+   SVGADXPipelineStatisticsQueryResult pipelineStats;
+   SVGADXOcclusionPredicateQueryResult occPred;
+   SVGADXStreamOutStatisticsQueryResult soStats;
+   SVGADXStreamOutPredicateQueryResult soPred;
+   SVGADXOcclusion64QueryResult occ64;
+}
+#include "vmware_pack_end.h"
+SVGADXQueryResultUnion;
+
 
 typedef enum {
-   SVGA3D_QUERYSTATE_PENDING     = 0,      /* Waiting on the host (set by guest) */
-   SVGA3D_QUERYSTATE_SUCCEEDED   = 1,      /* Completed successfully (set by host) */
-   SVGA3D_QUERYSTATE_FAILED      = 2,      /* Completed unsuccessfully (set by host) */
-   SVGA3D_QUERYSTATE_NEW         = 3,      /* Never submitted (For guest use only) */
+   SVGA3D_QUERYSTATE_PENDING     = 0,      /* Query is not finished yet */
+   SVGA3D_QUERYSTATE_SUCCEEDED   = 1,      /* Completed successfully */
+   SVGA3D_QUERYSTATE_FAILED      = 2,      /* Completed unsuccessfully */
+   SVGA3D_QUERYSTATE_NEW         = 3,      /* Never submitted (guest only) */
 } SVGA3dQueryState;
 
 typedef enum {
@@ -1249,9 +1531,9 @@ typedef
 struct {
    union {
       struct {
-         uint16  function;       // SVGA3dFogFunction
-         uint8   type;           // SVGA3dFogType
-         uint8   base;           // SVGA3dFogBase
+	 uint16  function;       /* SVGA3dFogFunction */
+	 uint8   type;           /* SVGA3dFogType */
+	 uint8   base;           /* SVGA3dFogBase */
       };
       uint32     uintValue;
    };
@@ -1287,8 +1569,47 @@ SVGA3dSize;
 /*
  * Guest-backed objects definitions.
  */
+typedef enum {
+   SVGA_OTABLE_MOB             = 0,
+   SVGA_OTABLE_MIN             = 0,
+   SVGA_OTABLE_SURFACE         = 1,
+   SVGA_OTABLE_CONTEXT         = 2,
+   SVGA_OTABLE_SHADER          = 3,
+   SVGA_OTABLE_SCREENTARGET    = 4,
+
+   SVGA_OTABLE_DX9_MAX         = 5,
 
-typedef uint32 SVGAMobId;
+   SVGA_OTABLE_DXCONTEXT       = 5,
+   SVGA_OTABLE_MAX             = 6
+} SVGAOTableType;
+
+/*
+ * Deprecated.
+ */
+#define SVGA_OTABLE_COUNT 4
+
+typedef enum {
+   SVGA_COTABLE_MIN             = 0,
+   SVGA_COTABLE_RTVIEW          = 0,
+   SVGA_COTABLE_DSVIEW          = 1,
+   SVGA_COTABLE_SRVIEW          = 2,
+   SVGA_COTABLE_ELEMENTLAYOUT   = 3,
+   SVGA_COTABLE_BLENDSTATE      = 4,
+   SVGA_COTABLE_DEPTHSTENCIL    = 5,
+   SVGA_COTABLE_RASTERIZERSTATE = 6,
+   SVGA_COTABLE_SAMPLER         = 7,
+   SVGA_COTABLE_STREAMOUTPUT    = 8,
+   SVGA_COTABLE_DXQUERY         = 9,
+   SVGA_COTABLE_DXSHADER        = 10,
+   SVGA_COTABLE_DX10_MAX        = 11,
+   SVGA_COTABLE_UAVIEW          = 11,
+   SVGA_COTABLE_MAX
+} SVGACOTableType;
+
+/*
+ * The largest size (number of entries) allowed in a COTable.
+ */
+#define SVGA_COTABLE_MAX_IDS (MAX_UINT16 - 2)
 
 typedef enum SVGAMobFormat {
    SVGA3D_MOBFMT_INVALID     = SVGA3D_INVALID_ID,
@@ -1300,7 +1621,11 @@ typedef enum SVGAMobFormat {
    SVGA3D_MOBFMT_PTDEPTH64_0 = 4,
    SVGA3D_MOBFMT_PTDEPTH64_1 = 5,
    SVGA3D_MOBFMT_PTDEPTH64_2 = 6,
+   SVGA3D_MOBFMT_PREDX_MAX   = 7,
+   SVGA3D_MOBFMT_EMPTY       = 7,
    SVGA3D_MOBFMT_MAX,
 } SVGAMobFormat;
 
-#endif // _SVGA3D_TYPES_H_
+#define SVGA3D_MOB_EMPTY_BASE 1
+
+#endif /* _SVGA3D_TYPES_H_ */
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga_escape.h b/lib/mesa/src/gallium/drivers/svga/include/svga_escape.h
index 9d44c4704..884b1d1fb 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga_escape.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga_escape.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -75,7 +75,7 @@
  */
 
 #define SVGA_ESCAPE_VMWARE_HINT               0x00030000
-#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  // Deprecated
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  /* Deprecated */
 
 typedef
 struct {
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga_overlay.h b/lib/mesa/src/gallium/drivers/svga/include/svga_overlay.h
index ccbf7912e..161c3de7b 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga_overlay.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga_overlay.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -38,9 +38,9 @@
  * Video formats we support
  */
 
-#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
-#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
-#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */
+#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */
+#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */
 
 typedef enum {
    SVGA_OVERLAY_FORMAT_INVALID = 0,
@@ -68,7 +68,7 @@ struct SVGAEscapeVideoSetRegs {
       uint32 streamId;
    } header;
 
-   // May include zero or more items.
+   /* May include zero or more items. */
    struct {
       uint32 registerId;
       uint32 value;
@@ -134,12 +134,12 @@ struct {
  */
 
 static inline Bool
-VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
-                         uint32 *width,                     // IN / OUT
-                         uint32 *height,                    // IN / OUT
-                         uint32 *size,                      // OUT
-                         uint32 *pitches,                   // OUT (optional)
-                         uint32 *offsets)                   // OUT (optional)
+VMwareVideoGetAttributes(const SVGAOverlayFormat format,    /* IN */
+                         uint32 *width,                     /* IN / OUT */
+                         uint32 *height,                    /* IN / OUT */
+                         uint32 *size,                      /* OUT */
+                         uint32 *pitches,                   /* OUT (optional) */
+                         uint32 *offsets)                   /* OUT (optional) */
 {
     int tmp;
 
@@ -196,4 +196,4 @@ VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
     return TRUE;
 }
 
-#endif // _SVGA_OVERLAY_H_
+#endif /* _SVGA_OVERLAY_H_ */
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga_reg.h b/lib/mesa/src/gallium/drivers/svga/include/svga_reg.h
index e75b442f9..2661eef03 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga_reg.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga_reg.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -43,6 +43,8 @@ typedef enum {
    SVGA_REG_ENABLE_HIDE = (1 << 1),
 } SvgaRegEnable;
 
+typedef uint32 SVGAMobId;
+
 /*
  * Arbitrary and meaningless limits. Please ignore these when writing
  * new drivers.
@@ -490,7 +492,7 @@ typedef struct SVGAGMRImageFormat {
       struct {
          uint32 bitsPerPixel : 8;
          uint32 colorDepth   : 8;
-         uint32 reserved     : 16;  // Must be zero
+	 uint32 reserved     : 16;  /* Must be zero */
       };
 
       uint32 value;
@@ -533,7 +535,7 @@ typedef struct SVGAColorBGRX {
          uint32 b : 8;
          uint32 g : 8;
          uint32 r : 8;
-         uint32 x : 8;  // Unused
+	     uint32 x : 8;  /* Unused */
       };
 
       uint32 value;
@@ -605,24 +607,35 @@ struct {
  * SVGA_CAP_COMMAND_BUFFERS --
  *    Enable register based command buffer submission.
  *
+ * SVGA_CAP_DEAD1 --
+ *    This cap was incorrectly used by old drivers and should not be
+ *    reused.
+ *
+ * SVGA_CAP_CMD_BUFFERS_2 --
+ *    Enable support for the prepend command buffer submision
+ *    registers.  SVGA_REG_CMD_PREPEND_LOW and
+ *    SVGA_REG_CMD_PREPEND_HIGH.
+ *
  * SVGA_CAP_GBOBJECTS --
  *    Enable guest-backed objects and surfaces.
  *
+ * SVGA_CAP_CMD_BUFFERS_3 --
+ *    Enable support for command buffers in a mob.
  */
 
 #define SVGA_CAP_NONE               0x00000000
 #define SVGA_CAP_RECT_COPY          0x00000002
 #define SVGA_CAP_CURSOR             0x00000020
-#define SVGA_CAP_CURSOR_BYPASS      0x00000040   // Legacy (Use Cursor Bypass 3 instead)
-#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS      0x00000040
+#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080
 #define SVGA_CAP_8BIT_EMULATION     0x00000100
 #define SVGA_CAP_ALPHA_CURSOR       0x00000200
 #define SVGA_CAP_3D                 0x00004000
 #define SVGA_CAP_EXTENDED_FIFO      0x00008000
-#define SVGA_CAP_MULTIMON           0x00010000   // Legacy multi-monitor support
+#define SVGA_CAP_MULTIMON           0x00010000
 #define SVGA_CAP_PITCHLOCK          0x00020000
 #define SVGA_CAP_IRQMASK            0x00040000
-#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
+#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000
 #define SVGA_CAP_GMR                0x00100000
 #define SVGA_CAP_TRACES             0x00200000
 #define SVGA_CAP_GMR2               0x00400000
@@ -631,6 +644,9 @@ struct {
 #define SVGA_CAP_DEAD1              0x02000000
 #define SVGA_CAP_CMD_BUFFERS_2      0x04000000
 #define SVGA_CAP_GBOBJECTS          0x08000000
+#define SVGA_CAP_CMD_BUFFERS_3      0x10000000
+
+#define SVGA_CAP_CMD_RESERVED       0x80000000
 
 
 /*
@@ -698,7 +714,7 @@ enum {
 
    SVGA_FIFO_CAPABILITIES = 4,
    SVGA_FIFO_FLAGS,
-   // Valid with SVGA_FIFO_CAP_FENCE:
+   /* Valid with SVGA_FIFO_CAP_FENCE: */
    SVGA_FIFO_FENCE,
 
    /*
@@ -710,20 +726,20 @@ enum {
     * These in block 3a, the VMX currently considers mandatory for the
     * extended FIFO.
     */
-   
-   // Valid if exists (i.e. if extended FIFO enabled):
+
+   /* Valid if exists (i.e. if extended FIFO enabled): */
    SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
-   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+   /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */
    SVGA_FIFO_PITCHLOCK,
 
-   // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+   /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */
    SVGA_FIFO_CURSOR_ON,          /* Cursor bypass 3 show/hide register */
    SVGA_FIFO_CURSOR_X,           /* Cursor bypass 3 x register */
    SVGA_FIFO_CURSOR_Y,           /* Cursor bypass 3 y register */
    SVGA_FIFO_CURSOR_COUNT,       /* Incremented when any of the other 3 change */
    SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
 
-   // Valid with SVGA_FIFO_CAP_RESERVE:
+   /* Valid with SVGA_FIFO_CAP_RESERVE: */
    SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */
 
    /*
@@ -789,7 +805,7 @@ enum {
     * sets SVGA_FIFO_MIN high enough to leave room for them.
     */
 
-   // Valid if register exists:
+   /* Valid if register exists: */
    SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
    SVGA_FIFO_FENCE_GOAL,         /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
    SVGA_FIFO_BUSY,               /* See "FIFO Synchronization Registers" */
@@ -1046,7 +1062,7 @@ enum {
 
 #define SVGA_FIFO_FLAG_NONE                 0
 #define SVGA_FIFO_FLAG_ACCELFRONT       (1<<0)
-#define SVGA_FIFO_FLAG_RESERVED        (1<<31) // Internal use only
+#define SVGA_FIFO_FLAG_RESERVED        (1<<31) /* Internal use only */
 
 /*
  * FIFO reservation sentinel value
@@ -1079,22 +1095,23 @@ enum {
    SVGA_VIDEO_DATA_OFFSET,
    SVGA_VIDEO_FORMAT,
    SVGA_VIDEO_COLORKEY,
-   SVGA_VIDEO_SIZE,          // Deprecated
+   SVGA_VIDEO_SIZE,          /* Deprecated */
    SVGA_VIDEO_WIDTH,
    SVGA_VIDEO_HEIGHT,
    SVGA_VIDEO_SRC_X,
    SVGA_VIDEO_SRC_Y,
    SVGA_VIDEO_SRC_WIDTH,
    SVGA_VIDEO_SRC_HEIGHT,
-   SVGA_VIDEO_DST_X,         // Signed int32
-   SVGA_VIDEO_DST_Y,         // Signed int32
+   SVGA_VIDEO_DST_X,         /* Signed int32 */
+   SVGA_VIDEO_DST_Y,         /* Signed int32 */
    SVGA_VIDEO_DST_WIDTH,
    SVGA_VIDEO_DST_HEIGHT,
    SVGA_VIDEO_PITCH_1,
    SVGA_VIDEO_PITCH_2,
    SVGA_VIDEO_PITCH_3,
-   SVGA_VIDEO_DATA_GMRID,    // Optional, defaults to SVGA_GMR_FRAMEBUFFER
-   SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+   SVGA_VIDEO_DATA_GMRID,    /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */
+   SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords */
+                             /* (SVGA_ID_INVALID) */
    SVGA_VIDEO_NUM_REGS
 };
 
@@ -1180,10 +1197,10 @@ typedef struct SVGADisplayTopology {
  *    value of zero means no cloning should happen.
  */
 
-#define SVGA_SCREEN_MUST_BE_SET     (1 << 0) // Must be set or results undefined
-#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET // Deprecated
-#define SVGA_SCREEN_IS_PRIMARY      (1 << 1) // Guest considers this screen to be 'primary'
-#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)  // Guest is running a fullscreen app here
+#define SVGA_SCREEN_MUST_BE_SET     (1 << 0)
+#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */
+#define SVGA_SCREEN_IS_PRIMARY      (1 << 1)
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)
 
 /*
  * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When the screen is
@@ -1207,7 +1224,7 @@ typedef struct SVGADisplayTopology {
 
 typedef
 struct {
-   uint32 structSize;   // sizeof(SVGAScreenObject)
+   uint32 structSize;   /* sizeof(SVGAScreenObject) */
    uint32 id;
    uint32 flags;
    struct {
@@ -1224,6 +1241,13 @@ struct {
     * with SVGA_FIFO_CAP_SCREEN_OBJECT.
     */
    SVGAGuestImage backingStore;
+
+   /*
+    * The cloneCount field is treated as a hint from the guest that
+    * the user wants this display to be cloned, cloneCount times.
+    *
+    * A value of zero means no cloning should happen.
+    */
    uint32 cloneCount;
 } SVGAScreenObject;
 
@@ -1238,7 +1262,7 @@ struct {
  *  Note the holes in the command ID numbers: These commands have been
  *  deprecated, and the old IDs must not be reused.
  *
- *  Command IDs from 1000 to 1999 are reserved for use by the SVGA3D
+ *  Command IDs from 1000 to 2999 are reserved for use by the SVGA3D
  *  protocol.
  *
  *  Each command's parameters are described by the comments and
@@ -1267,6 +1291,8 @@ typedef enum {
    SVGA_CMD_REMAP_GMR2            = 42,
    SVGA_CMD_DEAD                  = 43,
    SVGA_CMD_DEAD_2                = 44,
+   SVGA_CMD_NOP                   = 45,
+   SVGA_CMD_NOP_ERROR             = 46,
    SVGA_CMD_MAX
 } SVGAFifoCmdId;
 
@@ -1372,13 +1398,13 @@ struct {
 
 typedef
 struct {
-   uint32 id;             // Reserved, must be zero.
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
    uint32 height;
-   uint32 andMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
-   uint32 xorMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   uint32 andMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
+   uint32 xorMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
    /*
     * Followed by scanline data for AND mask, then XOR mask.
     * Each scanline is padded to a 32-bit boundary.
@@ -1401,7 +1427,7 @@ struct {
 
 typedef
 struct {
-   uint32 id;             // Reserved, must be zero.
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
@@ -1449,12 +1475,12 @@ struct {
 
 typedef
 struct {
-   uint32 color;     // In the same format as the GFB
+   uint32 color;     /* In the same format as the GFB */
    uint32 x;
    uint32 y;
    uint32 width;
    uint32 height;
-   uint32 rop;       // Must be SVGA_ROP_COPY
+   uint32 rop;       /* Must be SVGA_ROP_COPY */
 } SVGAFifoCmdFrontRopFill;
 
 
@@ -1526,7 +1552,7 @@ struct {
 
 typedef
 struct {
-   SVGAScreenObject screen;   // Variable-length according to version
+   SVGAScreenObject screen;   /* Variable-length according to version */
 } SVGAFifoCmdDefineScreen;
 
 
@@ -1807,8 +1833,8 @@ typedef
 struct {
    uint32 gmrId;
    SVGARemapGMR2Flags flags;
-   uint32 offsetPages; // offset in pages to begin remap
-   uint32 numPages; // number of pages to remap
+   uint32 offsetPages; /* offset in pages to begin remap */
+   uint32 numPages; /* number of pages to remap */
    /*
     * Followed by additional data depending on SVGARemapGMR2Flags.
     *
@@ -1823,7 +1849,7 @@ struct {
 /*
  * Size of SVGA device memory such as frame buffer and FIFO.
  */
-#define SVGA_VRAM_MIN_SIZE             (4 * 640 * 480) // bytes
+#define SVGA_VRAM_MIN_SIZE             (4 * 640 * 480) /* bytes */
 #define SVGA_VRAM_MIN_SIZE_3D       (16 * 1024 * 1024)
 #define SVGA_VRAM_MAX_SIZE         (128 * 1024 * 1024)
 #define SVGA_MEMORY_SIZE_MAX      (1024 * 1024 * 1024)
@@ -1832,7 +1858,7 @@ struct {
 #define SVGA_GRAPHICS_MEMORY_KB_MAX       (2 * 1024 * 1024)
 #define SVGA_GRAPHICS_MEMORY_KB_DEFAULT   (256 * 1024)
 
-#define SVGA_VRAM_SIZE_W2K          (64 * 1024 * 1024) // 64 MB
+#define SVGA_VRAM_SIZE_W2K          (64 * 1024 * 1024) /* 64 MB */
 
 /*
  * To simplify autoDetect display configuration, support a minimum of
@@ -1848,7 +1874,7 @@ struct {
 #define SVGA_VRAM_SIZE               (4 * 1024 * 1024)
 #define SVGA_VRAM_SIZE_3D           (64 * 1024 * 1024)
 #define SVGA_FIFO_SIZE                    (256 * 1024)
-#define SVGA_FIFO_SIZE_3D                 (516 * 1024) // Bump to 516KB to workaround WDDM driver issue (see bug# 744318)
+#define SVGA_FIFO_SIZE_3D                 (516 * 1024)
 #define SVGA_MEMORY_SIZE_DEFAULT   (160 * 1024 * 1024)
 #define SVGA_AUTODETECT_DEFAULT                  FALSE
 #else
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd.c b/lib/mesa/src/gallium/drivers/svga/svga_cmd.c
index b27183217..e45b3e72a 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_cmd.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd.c
@@ -66,7 +66,7 @@ surface_to_surfaceid(struct svga_winsys_context *swc, // IN
    if (surface) {
       struct svga_surface *s = svga_surface(surface);
       swc->surface_relocation(swc, &id->sid, NULL, s->handle, flags);
-      id->face = s->real_face; /* faces have the same order */
+      id->face = s->real_layer; /* faces have the same order */
       id->mipmap = s->real_level;
    }
    else {
@@ -337,7 +337,7 @@ SVGA3D_DefineSurface2D(struct svga_winsys_context *swc,    // IN
    mipSizes[0].height = height;
    mipSizes[0].depth = 1;
 
-   swc->commit(swc);;
+   swc->commit(swc);
 
    return PIPE_OK;
 }
@@ -372,7 +372,7 @@ SVGA3D_DestroySurface(struct svga_winsys_context *swc,
    
    swc->surface_relocation(swc, &cmd->sid, NULL, sid,
                            SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
-   swc->commit(swc);;
+   swc->commit(swc);
 
    return PIPE_OK;
 }
@@ -460,7 +460,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
 
    swc->surface_relocation(swc, &cmd->host.sid, NULL,
                            texture->handle, surface_flags);
-   cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+   cmd->host.face = st->slice; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
    cmd->host.mipmap = st->base.level;
 
    cmd->transfer = transfer;
@@ -473,6 +473,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
    pSuffix->flags = flags;
 
    swc->commit(swc);
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
 
    return PIPE_OK;
 }
@@ -543,6 +544,7 @@ SVGA3D_BufferDMA(struct svga_winsys_context *swc,
    pSuffix->flags = flags;
 
    swc->commit(swc);
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
 
    return PIPE_OK;
 }
@@ -842,6 +844,8 @@ SVGA3D_SetShader(struct svga_winsys_context *swc,
 {
    SVGA3dCmdSetShader *cmd;
 
+   assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
+
    cmd = SVGA3D_FIFOReserve(swc,
                             SVGA_3D_CMD_SET_SHADER, sizeof *cmd,
                             0);
@@ -1014,6 +1018,8 @@ SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
    *decls = declArray;
    *ranges = rangeArray;
 
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
+
    return PIPE_OK;
 }
 
@@ -1382,10 +1388,10 @@ SVGA3D_BeginGBQuery(struct svga_winsys_context *swc,
                             SVGA_3D_CMD_BEGIN_GB_QUERY,
                             sizeof *cmd,
                             1);
-   if(!cmd)
+   if (!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
    cmd->type = type;
 
    swc->commit(swc);
@@ -1462,10 +1468,10 @@ SVGA3D_EndGBQuery(struct svga_winsys_context *swc,
                             SVGA_3D_CMD_END_GB_QUERY,
                             sizeof *cmd,
                             2);
-   if(!cmd)
+   if (!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
    cmd->type = type;
 
    swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
@@ -1549,10 +1555,10 @@ SVGA3D_WaitForGBQuery(struct svga_winsys_context *swc,
                             SVGA_3D_CMD_WAIT_FOR_GB_QUERY,
                             sizeof *cmd,
                             2);
-   if(!cmd)
+   if (!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
    cmd->type = type;
 
    swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
@@ -1615,36 +1621,6 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
 
 
 enum pipe_error
-SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
-                      struct svga_winsys_gb_shader *gbshader,
-                      SVGA3dShaderType type,
-                      uint32 sizeInBytes)
-{
-   SVGA3dCmdDefineGBShader *cmd;
-
-   assert(sizeInBytes % 4 == 0);
-   assert(type == SVGA3D_SHADERTYPE_VS ||
-          type == SVGA3D_SHADERTYPE_PS);
-
-   cmd = SVGA3D_FIFOReserve(swc,
-                            SVGA_3D_CMD_DEFINE_GB_SHADER,
-                            sizeof *cmd,
-                            1); /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
-   cmd->type = type;
-   cmd->sizeInBytes = sizeInBytes;
-
-   swc->commit(swc);
-   
-   return PIPE_OK;
-}
-
-
-enum pipe_error
 SVGA3D_BindGBShader(struct svga_winsys_context *swc,
                     struct svga_winsys_gb_shader *gbshader)
 {
@@ -1658,7 +1634,7 @@ SVGA3D_BindGBShader(struct svga_winsys_context *swc,
       return PIPE_ERROR_OUT_OF_MEMORY;
 
    swc->shader_relocation(swc, &cmd->shid, &cmd->mobid,
-			  &cmd->offsetInBytes, gbshader);
+			  &cmd->offsetInBytes, gbshader, 0);
 
    swc->commit(swc);
 
@@ -1672,6 +1648,8 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
                    struct svga_winsys_gb_shader *gbshader)
 {
    SVGA3dCmdSetShader *cmd;
+
+   assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
    
    cmd = SVGA3D_FIFOReserve(swc,
                             SVGA_3D_CMD_SET_SHADER,
@@ -1680,10 +1658,10 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
    if (!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
    
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
    cmd->type = type;
    if (gbshader)
-      swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+      swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
    else
       cmd->shid = SVGA_ID_INVALID;
    swc->commit(swc);
@@ -1692,27 +1670,6 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
 }
 
 
-enum pipe_error
-SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
-                       struct svga_winsys_gb_shader *gbshader)
-{
-   SVGA3dCmdDestroyGBShader *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DESTROY_GB_SHADER,
-                         sizeof *cmd,
-                         1); /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
 /**
  * \param flags  mask of SVGA_RELOC_READ / _WRITE
  */
@@ -1738,89 +1695,6 @@ SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
 }
 
 
-enum pipe_error
-SVGA3D_DefineGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdDefineGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DEFINE_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_DestroyGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdDestroyGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DESTROY_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_BindGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdBindGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_BIND_GB_CONTEXT,
-                         sizeof *cmd,
-                         2);  /* two relocations */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-   swc->context_relocation(swc, &cmd->mobid);
-   cmd->validContents = 0;  /* XXX pass as a parameter? */
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdInvalidateGBContext *cmd =
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_INVALIDATE_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-
 /**
  * Update an image in a guest-backed surface.
  * (Inform the device that the guest-contents have been updated.)
@@ -1848,6 +1722,7 @@ SVGA3D_UpdateGBImage(struct svga_winsys_context *swc,
    cmd->box = *box;
 
    swc->commit(swc);
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
 
    return PIPE_OK;
 }
@@ -1874,6 +1749,7 @@ SVGA3D_UpdateGBSurface(struct svga_winsys_context *swc,
                            SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
 
    swc->commit(swc);
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
 
    return PIPE_OK;
 }
@@ -1903,6 +1779,7 @@ SVGA3D_ReadbackGBImage(struct svga_winsys_context *swc,
    cmd->image.mipmap = mipLevel;
 
    swc->commit(swc);
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
 
    return PIPE_OK;
 }
@@ -1929,6 +1806,7 @@ SVGA3D_ReadbackGBSurface(struct svga_winsys_context *swc,
                            SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
 
    swc->commit(swc);
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
 
    return PIPE_OK;
 }
@@ -1957,6 +1835,7 @@ SVGA3D_ReadbackGBImagePartial(struct svga_winsys_context *swc,
    cmd->invertBox = invertBox;
 
    swc->commit(swc);
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
 
    return PIPE_OK;
 }
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd.h b/lib/mesa/src/gallium/drivers/svga/svga_cmd.h
index 6f658bf3a..26e4690e6 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_cmd.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd.h
@@ -47,6 +47,7 @@ struct svga_winsys_context;
 struct svga_winsys_buffer;
 struct svga_winsys_surface;
 struct svga_winsys_gb_shader;
+struct svga_winsys_gb_query;
 
 
 /*
@@ -229,12 +230,6 @@ SVGA3D_SetShader(struct svga_winsys_context *swc,
  */
 
 enum pipe_error
-SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
-                      struct svga_winsys_gb_shader *gbshader,
-                      SVGA3dShaderType type,
-                      uint32 sizeInBytes);
-
-enum pipe_error
 SVGA3D_BindGBShader(struct svga_winsys_context *swc,
                     struct svga_winsys_gb_shader *gbshader);
 
@@ -244,26 +239,10 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
                    struct svga_winsys_gb_shader *gbshader);
 
 enum pipe_error
-SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
-                       struct svga_winsys_gb_shader *gbshader);
-
-enum pipe_error
 SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
                      struct svga_winsys_surface *surface);
 
 enum pipe_error
-SVGA3D_DefineGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_DestroyGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_BindGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
 SVGA3D_UpdateGBImage(struct svga_winsys_context *swc,
                      struct svga_winsys_surface *surface,
                      const SVGA3dBox *box,
@@ -327,4 +306,340 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
                     SVGA3dQueryType type,
                     struct svga_winsys_buffer *buffer);
 
+
+
+/*
+ * VGPU10 commands
+ */
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *dstSurf,
+                             uint32 dstSubResource,
+                             struct svga_winsys_surface *srcSurf,
+                             uint32 srcSubResource,
+                             const SVGA3dCopyBox *box);
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *dstSurf,
+                       struct svga_winsys_surface *srcSurf);
+
+enum pipe_error
+SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc,
+                           unsigned count, const SVGA3dViewport *viewports);
+
+enum pipe_error
+SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc,
+                        SVGA3dShaderType type,
+                        struct svga_winsys_gb_shader *gbshader,
+                        SVGA3dShaderId shaderId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc,
+                                 SVGA3dShaderType type,
+                                 uint32 startView,
+                                 unsigned count,
+                                 const SVGA3dShaderResourceViewId ids[],
+                                 struct svga_winsys_surface **views);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc,
+                          unsigned count,
+                          uint32 startSampler,
+                          SVGA3dShaderType type,
+                          const SVGA3dSamplerId *samplerIds);
+
+enum pipe_error
+SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc,
+                               unsigned color_count,
+                               struct pipe_surface **color_surfs,
+                               struct pipe_surface *depth_stencil_surf);
+
+enum pipe_error
+SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc,
+                            SVGA3dBlendStateId blendId,
+                            const float *blendFactor, uint32 sampleMask);
+
+enum pipe_error
+SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc,
+                                   SVGA3dDepthStencilStateId depthStencilId,
+                                   uint32 stencilRef);
+
+enum pipe_error
+SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc,
+                                 SVGA3dRasterizerStateId rasterizerId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 predicateValue);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc,
+                           unsigned count, const SVGA3dSoTarget *targets,
+                           struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc,
+                              unsigned count,
+                              const SVGASignedRect *rects);
+
+enum pipe_error
+SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc,
+                              SVGA3dStreamOutputId soid);
+
+enum pipe_error
+SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
+                   uint32 vertexCount, uint32 startVertexLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
+                          uint32 indexCount, uint32 startIndexLocation,
+                          int32 baseVertexLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
+                            uint32 vertexCountPerInstance,
+                            uint32 instanceCount,
+                            uint32 startVertexLocation,
+                            uint32 startInstanceLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
+                                   uint32 indexCountPerInstance,
+                                   uint32 instanceCount,
+                                   uint32 startIndexLocation,
+                                   int32  baseVertexLocation,
+                                   uint32 startInstanceLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc,
+                          SVGA3dQueryId queryId,
+                          SVGA3dQueryType type,
+                          SVGA3dDXQueryFlags flags);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc,
+                           SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc,
+                        struct svga_winsys_gb_query *gbQuery,
+                        SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 mobOffset);
+
+enum pipe_error
+SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc,
+                         SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc,
+                       SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *color_surf,
+                                    const float *rgba);
+
+enum pipe_error
+SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *ds_surf,
+                                    uint16 flags, uint16 stencil, float depth);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId,
+                             struct svga_winsys_surface *surf,
+                             SVGA3dSurfaceFormat format,
+                             SVGA3dResourceType resourceDimension,
+                             const SVGA3dShaderResourceViewDesc *desc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc,
+                            SVGA3dShaderResourceViewId shaderResourceViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc,
+                                  SVGA3dRenderTargetViewId renderTargetViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc,
+                                SVGA3dRenderTargetViewId renderTargetViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc,
+                                  SVGA3dDepthStencilViewId depthStencilViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc);
+
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc,
+                                SVGA3dDepthStencilViewId depthStencilViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc,
+                               unsigned count,
+                               SVGA3dElementLayoutId elementLayoutId,
+                               const SVGA3dInputElementDesc *elements);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc,
+                                   SVGA3dElementLayoutId elementLayoutId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc,
+                               SVGA3dBlendStateId blendId,
+                               uint8 alphaToCoverageEnable,
+                               uint8 independentBlendEnable,
+                               const SVGA3dDXBlendStatePerRT *perRT);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc,
+                                SVGA3dBlendStateId blendId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc,
+                                      SVGA3dDepthStencilStateId depthStencilId,
+                                      uint8 depthEnable,
+                                      SVGA3dDepthWriteMask depthWriteMask,
+                                      SVGA3dComparisonFunc depthFunc,
+                                      uint8 stencilEnable,
+                                      uint8 frontEnable,
+                                      uint8 backEnable,
+                                      uint8 stencilReadMask,
+                                      uint8 stencilWriteMask,
+                                      uint8 frontStencilFailOp,
+                                      uint8 frontStencilDepthFailOp,
+                                      uint8 frontStencilPassOp,
+                                      SVGA3dComparisonFunc frontStencilFunc,
+                                      uint8 backStencilFailOp,
+                                      uint8 backStencilDepthFailOp,
+                                      uint8 backStencilPassOp,
+                                      SVGA3dComparisonFunc backStencilFunc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc,
+                                       SVGA3dDepthStencilStateId depthStencilId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc,
+                                    SVGA3dRasterizerStateId rasterizerId,
+                                    uint8 fillMode,
+                                    SVGA3dCullMode cullMode,
+                                    uint8 frontCounterClockwise,
+                                    int32 depthBias,
+                                    float depthBiasClamp,
+                                    float slopeScaledDepthBias,
+                                    uint8 depthClipEnable,
+                                    uint8 scissorEnable,
+                                    uint8 multisampleEnable,
+                                    uint8 antialiasedLineEnable,
+                                    float lineWidth,
+                                    uint8 lineStippleEnable,
+                                    uint8 lineStippleFactor,
+                                    uint16 lineStipplePattern,
+                                    uint8 provokingVertexLast);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc,
+                                     SVGA3dRasterizerStateId rasterizerId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc,
+                                 SVGA3dSamplerId samplerId,
+                                 SVGA3dFilter filter,
+                                 uint8 addressU,
+                                 uint8 addressV,
+                                 uint8 addressW,
+                                 float mipLODBias,
+                                 uint8 maxAnisotropy,
+                                 uint8 comparisonFunc,
+                                 SVGA3dRGBAFloat borderColor,
+                                 float minLOD,
+                                 float maxLOD);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc,
+                                  SVGA3dSamplerId samplerId);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc,
+                            SVGA3dShaderId shaderId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc,
+                                  struct svga_winsys_gb_shader *gbshader,
+                                  SVGA3dShaderId shaderId,
+                                  SVGA3dShaderType type,
+                                  uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
+      SVGA3dStreamOutputId soid,
+      uint32 numOutputStreamEntries,
+      uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+      const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS]);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc,
+                                  SVGA3dStreamOutputId soid);
+
+enum pipe_error
+SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc,
+                                  struct svga_winsys_surface *surface,
+                                  unsigned subResource);
+
+enum pipe_error
+SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc,
+                             SVGA3dElementLayoutId elementLayoutId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
+                               unsigned count,
+                               uint32 startBuffer,
+                               const SVGA3dVertexBuffer *bufferInfo,
+                               struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
+                          SVGA3dPrimitiveType topology);
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *indexes,
+                             SVGA3dSurfaceFormat format, uint32 offset);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
+                                      unsigned slot,
+                                      SVGA3dShaderType type,
+                                      struct svga_winsys_surface *surface,
+                                      uint32 offsetInBytes,
+                                      uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                const SVGA3dBox *box,
+                                unsigned subResource);
+
+enum pipe_error
+SVGA3D_vgpu10_GenMips(struct svga_winsys_context *swc,
+                      const SVGA3dShaderResourceViewId shaderResourceViewId,
+                      struct svga_winsys_surface *view);
 #endif /* __SVGA3D_H__ */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/lib/mesa/src/gallium/drivers/svga/svga_cmd_vgpu10.c
new file mode 100644
index 000000000..99c9add17
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd_vgpu10.c
@@ -0,0 +1,1316 @@
+/**********************************************************
+ * Copyright 2008-2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file svga_cmd_vgpu10.c
+ *
+ * Command construction utility for the vgpu10 SVGA3D protocol.
+ *
+ * \author Mingcheng Chen
+ * \author Brian Paul
+ */
+
+
+#include "svga_winsys.h"
+#include "svga_resource_buffer.h"
+#include "svga_resource_texture.h"
+#include "svga_surface.h"
+#include "svga_cmd.h"
+
+
+/**
+ * Emit a surface relocation for RenderTargetViewId
+ */
+static void
+view_relocation(struct svga_winsys_context *swc, // IN
+                struct pipe_surface *surface,    // IN
+                SVGA3dRenderTargetViewId *id,    // OUT
+                unsigned flags)
+{
+   if (surface) {
+      struct svga_surface *s = svga_surface(surface);
+      assert(s->handle);
+      swc->surface_relocation(swc, id, NULL, s->handle, flags);
+   }
+   else {
+      swc->surface_relocation(swc, id, NULL, NULL, flags);
+   }
+}
+
+
+/**
+ * Emit a surface relocation for a ResourceId.
+ */
+static void
+surface_to_resourceid(struct svga_winsys_context *swc, // IN
+                      struct svga_winsys_surface *surface,    // IN
+                      SVGA3dSurfaceId *sid,            // OUT
+                      unsigned flags)                  // IN
+{
+   if (surface) {
+      swc->surface_relocation(swc, sid, NULL, surface, flags);
+   }
+   else {
+      swc->surface_relocation(swc, sid, NULL, NULL, flags);
+   }
+}
+
+
+#define SVGA3D_CREATE_COMMAND(CommandName, CommandCode) \
+SVGA3dCmdDX##CommandName *cmd; \
+{ \
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \
+                            sizeof(SVGA3dCmdDX##CommandName), 0); \
+   if (!cmd) \
+      return PIPE_ERROR_OUT_OF_MEMORY; \
+}
+
+#define SVGA3D_CREATE_CMD_COUNT(CommandName, CommandCode, ElementClassName) \
+SVGA3dCmdDX##CommandName *cmd; \
+{ \
+   assert(count > 0); \
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \
+                            sizeof(SVGA3dCmdDX##CommandName) + \
+                            count * sizeof(ElementClassName), 0); \
+   if (!cmd) \
+      return PIPE_ERROR_OUT_OF_MEMORY; \
+}
+
+#define SVGA3D_COPY_BASIC(VariableName) \
+{ \
+   cmd->VariableName = VariableName; \
+}
+
+#define SVGA3D_COPY_BASIC_2(VariableName1, VariableName2) \
+{ \
+   SVGA3D_COPY_BASIC(VariableName1); \
+   SVGA3D_COPY_BASIC(VariableName2); \
+}
+
+#define SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3) \
+{ \
+   SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \
+   SVGA3D_COPY_BASIC(VariableName3); \
+}
+
+#define SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                            VariableName4) \
+{ \
+   SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \
+   SVGA3D_COPY_BASIC_2(VariableName3, VariableName4); \
+}
+
+#define SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5) \
+{\
+   SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \
+   SVGA3D_COPY_BASIC_2(VariableName4, VariableName5); \
+}
+
+#define SVGA3D_COPY_BASIC_6(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6) \
+{\
+   SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \
+   SVGA3D_COPY_BASIC_3(VariableName4, VariableName5, VariableName6); \
+}
+
+#define SVGA3D_COPY_BASIC_7(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7) \
+{\
+   SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                       VariableName4); \
+   SVGA3D_COPY_BASIC_3(VariableName5, VariableName6, VariableName7); \
+}
+
+#define SVGA3D_COPY_BASIC_8(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7, VariableName8) \
+{\
+   SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                       VariableName4); \
+   SVGA3D_COPY_BASIC_4(VariableName5, VariableName6, VariableName7, \
+                       VariableName8); \
+}
+
+#define SVGA3D_COPY_BASIC_9(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7, VariableName8, VariableName9) \
+{\
+   SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \
+                       VariableName4, VariableName5); \
+   SVGA3D_COPY_BASIC_4(VariableName6, VariableName7, VariableName8, \
+                       VariableName9); \
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *dstSurf,
+                             uint32 dstSubResource,
+                             struct svga_winsys_surface *srcSurf,
+                             uint32 srcSubResource,
+                             const SVGA3dCopyBox *box)
+{
+   SVGA3dCmdDXPredCopyRegion *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DX_PRED_COPY_REGION,
+                         sizeof(SVGA3dCmdDXPredCopyRegion),
+                         2);  /* two relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE);
+   swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ);
+   cmd->dstSubResource = dstSubResource;
+   cmd->srcSubResource = srcSubResource;
+   cmd->box = *box;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *dstSurf,
+                       struct svga_winsys_surface *srcSurf)
+{
+   SVGA3dCmdDXPredCopy *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DX_PRED_COPY,
+                         sizeof(SVGA3dCmdDXPredCopy),
+                         2);  /* two relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE);
+   swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc,
+                           unsigned count,
+                           const SVGA3dViewport *viewports)
+{
+   SVGA3D_CREATE_CMD_COUNT(SetViewports, SET_VIEWPORTS, SVGA3dViewport);
+
+   memcpy(cmd + 1, viewports, count * sizeof(SVGA3dViewport));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc,
+                        SVGA3dShaderType type,
+                        struct svga_winsys_gb_shader *gbshader,
+                        SVGA3dShaderId shaderId)
+{
+   SVGA3dCmdDXSetShader *cmd = SVGA3D_FIFOReserve(swc,
+                                                  SVGA_3D_CMD_DX_SET_SHADER,
+                                                  sizeof *cmd,
+                                                  1);  /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->shader_relocation(swc, &cmd->shaderId, NULL, NULL, gbshader, 0);
+
+   cmd->type = type;
+   cmd->shaderId = shaderId;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc,
+                                 SVGA3dShaderType type,
+                                 uint32 startView,
+                                 unsigned count,
+                                 const SVGA3dShaderResourceViewId ids[],
+                                 struct svga_winsys_surface **views)
+{
+   SVGA3dCmdDXSetShaderResources *cmd;
+   SVGA3dShaderResourceViewId *cmd_ids;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_SHADER_RESOURCES,
+                            sizeof(SVGA3dCmdDXSetShaderResources) +
+                            count * sizeof(SVGA3dShaderResourceViewId),
+                            count); /* 'count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+   cmd->type = type;
+   cmd->startView = startView;
+
+   cmd_ids = (SVGA3dShaderResourceViewId *) (cmd + 1);
+   for (i = 0; i < count; i++) {
+      swc->surface_relocation(swc, cmd_ids + i, NULL, views[i],
+                              SVGA_RELOC_READ);
+      cmd_ids[i] = ids[i];
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc,
+                          unsigned count,
+                          uint32 startSampler,
+                          SVGA3dShaderType type,
+                          const SVGA3dSamplerId *samplerIds)
+{
+   SVGA3D_CREATE_CMD_COUNT(SetSamplers, SET_SAMPLERS, SVGA3dSamplerId);
+
+   SVGA3D_COPY_BASIC_2(startSampler, type);
+   memcpy(cmd + 1, samplerIds, count * sizeof(SVGA3dSamplerId));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *color_surf,
+                                    const float *rgba)
+{
+   SVGA3dCmdDXClearRenderTargetView *cmd;
+   struct svga_surface *ss = svga_surface(color_surf);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW,
+                            sizeof(SVGA3dCmdDXClearRenderTargetView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+   /* NOTE: The following is pretty tricky.  We need to emit a view/surface
+    * relocation and we have to provide a pointer to an ID which lies in
+    * the bounds of the command space which we just allocated.  However,
+    * we then need to overwrite it with the original RenderTargetViewId.
+    */
+   view_relocation(swc, color_surf, &cmd->renderTargetViewId,
+                   SVGA_RELOC_WRITE);
+   cmd->renderTargetViewId = ss->view_id;
+
+   COPY_4V(cmd->rgba.value, rgba);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc,
+                               unsigned color_count,
+                               struct pipe_surface **color_surfs,
+                               struct pipe_surface *depth_stencil_surf)
+{
+   const unsigned surf_count = color_count + 1;
+   SVGA3dCmdDXSetRenderTargets *cmd;
+   SVGA3dRenderTargetViewId *ctarget;
+   struct svga_surface *ss;
+   unsigned i;
+
+   assert(surf_count > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_RENDERTARGETS,
+                            sizeof(SVGA3dCmdDXSetRenderTargets) +
+                            color_count * sizeof(SVGA3dRenderTargetViewId),
+                            surf_count); /* 'surf_count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* NOTE: See earlier comment about the tricky handling of the ViewIds.
+    */
+
+   /* Depth / Stencil buffer */
+   if (depth_stencil_surf) {
+      ss = svga_surface(depth_stencil_surf);
+      view_relocation(swc, depth_stencil_surf, &cmd->depthStencilViewId,
+                      SVGA_RELOC_WRITE);
+      cmd->depthStencilViewId = ss->view_id;
+   }
+   else {
+      /* no depth/stencil buffer - still need a relocation */
+      view_relocation(swc, NULL, &cmd->depthStencilViewId,
+                      SVGA_RELOC_WRITE);
+      cmd->depthStencilViewId = SVGA3D_INVALID_ID;
+   }
+
+   /* Color buffers */
+   ctarget = (SVGA3dRenderTargetViewId *) &cmd[1];
+   for (i = 0; i < color_count; i++) {
+      if (color_surfs[i]) {
+         ss = svga_surface(color_surfs[i]);
+         view_relocation(swc, color_surfs[i], ctarget + i, SVGA_RELOC_WRITE);
+         ctarget[i] = ss->view_id;
+      }
+      else {
+         view_relocation(swc, NULL, ctarget + i, SVGA_RELOC_WRITE);
+         ctarget[i] = SVGA3D_INVALID_ID;
+      }
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc,
+                            SVGA3dBlendStateId blendId,
+                            const float *blendFactor,
+                            uint32 sampleMask)
+{
+   SVGA3D_CREATE_COMMAND(SetBlendState, SET_BLEND_STATE);
+
+   SVGA3D_COPY_BASIC_2(blendId, sampleMask);
+   memcpy(cmd->blendFactor, blendFactor, sizeof(float) * 4);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc,
+                                   SVGA3dDepthStencilStateId depthStencilId,
+                                   uint32 stencilRef)
+{
+   SVGA3D_CREATE_COMMAND(SetDepthStencilState, SET_DEPTHSTENCIL_STATE);
+
+   SVGA3D_COPY_BASIC_2(depthStencilId, stencilRef);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc,
+                                 SVGA3dRasterizerStateId rasterizerId)
+{
+   SVGA3D_CREATE_COMMAND(SetRasterizerState, SET_RASTERIZER_STATE);
+
+   cmd->rasterizerId = rasterizerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 predicateValue)
+{
+   SVGA3dCmdDXSetPredication *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_PREDICATION,
+                            sizeof *cmd, 0);
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->queryId = queryId;
+   cmd->predicateValue = predicateValue;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc,
+                           unsigned count,
+                           const SVGA3dSoTarget *targets,
+                           struct svga_winsys_surface **surfaces)
+{
+   SVGA3dCmdDXSetSOTargets *cmd;
+   SVGA3dSoTarget *sot;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_SOTARGETS,
+                            sizeof(SVGA3dCmdDXSetSOTargets) +
+                            count * sizeof(SVGA3dSoTarget),
+                            count);
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->pad0 = 0;
+   sot = (SVGA3dSoTarget *)(cmd + 1);
+   for (i = 0; i < count; i++, sot++) {
+      if (surfaces[i]) {
+         sot->offset = targets[i].offset;
+         sot->sizeInBytes = targets[i].sizeInBytes;
+         swc->surface_relocation(swc, &sot->sid, NULL, surfaces[i],
+                                 SVGA_RELOC_WRITE);
+      }
+      else {
+         sot->offset = 0;
+         sot->sizeInBytes = ~0u;
+         swc->surface_relocation(swc, &sot->sid, NULL, NULL,
+                                 SVGA_RELOC_WRITE);
+      }
+   }
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc,
+                              unsigned count,
+                              const SVGASignedRect *rects)
+{
+   SVGA3dCmdDXSetScissorRects *cmd;
+
+   assert(count > 0);
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SCISSORRECTS,
+                            sizeof(SVGA3dCmdDXSetScissorRects) +
+                            count * sizeof(SVGASignedRect),
+                            0);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   memcpy(cmd + 1, rects, count * sizeof(SVGASignedRect));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc,
+                              SVGA3dStreamOutputId soid)
+{
+   SVGA3D_CREATE_COMMAND(SetStreamOutput, SET_STREAMOUTPUT);
+
+   cmd->soid = soid;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
+                   uint32 vertexCount,
+                   uint32 startVertexLocation)
+{
+   SVGA3D_CREATE_COMMAND(Draw, DRAW);
+
+   SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation);
+
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
+                          uint32 indexCount,
+                          uint32 startIndexLocation,
+                          int32 baseVertexLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawIndexed, DRAW_INDEXED);
+
+   SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation,
+                       baseVertexLocation);
+
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
+                            uint32 vertexCountPerInstance,
+                            uint32 instanceCount,
+                            uint32 startVertexLocation,
+                            uint32 startInstanceLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawInstanced, DRAW_INSTANCED);
+
+   SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount,
+                       startVertexLocation, startInstanceLocation);
+
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
+                                   uint32 indexCountPerInstance,
+                                   uint32 instanceCount,
+                                   uint32 startIndexLocation,
+                                   int32  baseVertexLocation,
+                                   uint32 startInstanceLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawIndexedInstanced, DRAW_INDEXED_INSTANCED);
+
+   SVGA3D_COPY_BASIC_5(indexCountPerInstance, instanceCount,
+                       startIndexLocation, baseVertexLocation,
+                       startInstanceLocation);
+
+
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc)
+{
+   SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO);
+
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc,
+                          SVGA3dQueryId queryId,
+                          SVGA3dQueryType type,
+                          SVGA3dDXQueryFlags flags)
+{
+   SVGA3D_CREATE_COMMAND(DefineQuery, DEFINE_QUERY);
+
+   SVGA3D_COPY_BASIC_3(queryId, type, flags);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc,
+                           SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyQuery, DESTROY_QUERY);
+
+   cmd->queryId = queryId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc,
+                        struct svga_winsys_gb_query *gbQuery,
+                        SVGA3dQueryId queryId)
+{
+   SVGA3dCmdDXBindQuery *cmd = SVGA3D_FIFOReserve(swc,
+                                                  SVGA_3D_CMD_DX_BIND_QUERY,
+                                                  sizeof *cmd,
+                                                  1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->queryId = queryId;
+   swc->query_relocation(swc, &cmd->mobid, gbQuery);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 mobOffset)
+{
+   SVGA3D_CREATE_COMMAND(SetQueryOffset, SET_QUERY_OFFSET);
+   SVGA3D_COPY_BASIC_2(queryId, mobOffset);
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc,
+                         SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(BeginQuery, BEGIN_QUERY);
+   cmd->queryId = queryId;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc,
+                       SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(EndQuery, END_QUERY);
+   cmd->queryId = queryId;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *ds_surf,
+                                    uint16 flags,
+                                    uint16 stencil,
+                                    float depth)
+{
+   SVGA3dCmdDXClearDepthStencilView *cmd;
+   struct svga_surface *ss = svga_surface(ds_surf);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW,
+                            sizeof(SVGA3dCmdDXClearDepthStencilView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* NOTE: The following is pretty tricky.  We need to emit a view/surface
+    * relocation and we have to provide a pointer to an ID which lies in
+    * the bounds of the command space which we just allocated.  However,
+    * we then need to overwrite it with the original DepthStencilViewId.
+    */
+   view_relocation(swc, ds_surf, &cmd->depthStencilViewId,
+                   SVGA_RELOC_WRITE);
+   cmd->depthStencilViewId = ss->view_id;
+   cmd->flags = flags;
+   cmd->stencil = stencil;
+   cmd->depth = depth;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId,
+                             struct svga_winsys_surface *surface,
+                             SVGA3dSurfaceFormat format,
+                             SVGA3dResourceType resourceDimension,
+                             const SVGA3dShaderResourceViewDesc *desc)
+{
+   SVGA3dCmdDXDefineShaderResourceView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW,
+                            sizeof(SVGA3dCmdDXDefineShaderResourceView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(shaderResourceViewId, format, resourceDimension);
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_READ);
+
+   cmd->desc = *desc;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyShaderResourceView,
+                       DESTROY_SHADERRESOURCE_VIEW);
+
+   cmd->shaderResourceViewId = shaderResourceViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc,
+                                  SVGA3dRenderTargetViewId renderTargetViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc)
+{
+   SVGA3dCmdDXDefineRenderTargetView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW,
+                            sizeof(SVGA3dCmdDXDefineRenderTargetView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(renderTargetViewId, format, resourceDimension);
+   cmd->desc = *desc;
+
+   surface_to_resourceid(swc, surface,
+                         &cmd->sid,
+                         SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc,
+                                 SVGA3dRenderTargetViewId renderTargetViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyRenderTargetView, DESTROY_RENDERTARGET_VIEW);
+
+   cmd->renderTargetViewId = renderTargetViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc,
+                                  SVGA3dDepthStencilViewId depthStencilViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc)
+{
+   SVGA3dCmdDXDefineDepthStencilView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW,
+                            sizeof(SVGA3dCmdDXDefineDepthStencilView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(depthStencilViewId, format, resourceDimension);
+   cmd->mipSlice = desc->tex.mipSlice;
+   cmd->firstArraySlice = desc->tex.firstArraySlice;
+   cmd->arraySize = desc->tex.arraySize;
+
+   surface_to_resourceid(swc, surface,
+                         &cmd->sid,
+                         SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc,
+                                 SVGA3dDepthStencilViewId depthStencilViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyDepthStencilView, DESTROY_DEPTHSTENCIL_VIEW);
+
+   cmd->depthStencilViewId = depthStencilViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc,
+                                  unsigned count,
+                                  SVGA3dElementLayoutId elementLayoutId,
+                                  const SVGA3dInputElementDesc *elements)
+{
+   SVGA3dCmdDXDefineElementLayout *cmd;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT,
+                            sizeof(SVGA3dCmdDXDefineElementLayout) +
+                            count * sizeof(SVGA3dInputElementDesc), 0);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* check that all offsets are multiples of four */
+   for (i = 0; i < count; i++) {
+      assert(elements[i].alignedByteOffset % 4 == 0);
+   }
+   (void) i; /* silence unused var in release build */
+
+   cmd->elementLayoutId = elementLayoutId;
+   memcpy(cmd + 1, elements, count * sizeof(SVGA3dInputElementDesc));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc,
+                                   SVGA3dElementLayoutId elementLayoutId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyElementLayout, DESTROY_ELEMENTLAYOUT);
+
+   cmd->elementLayoutId = elementLayoutId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc,
+                               SVGA3dBlendStateId blendId,
+                               uint8 alphaToCoverageEnable,
+                               uint8 independentBlendEnable,
+                               const SVGA3dDXBlendStatePerRT *perRT)
+{
+   SVGA3D_CREATE_COMMAND(DefineBlendState, DEFINE_BLEND_STATE);
+
+   cmd->blendId = blendId;
+   cmd->alphaToCoverageEnable = alphaToCoverageEnable;
+   cmd->independentBlendEnable = independentBlendEnable;
+   memcpy(cmd->perRT, perRT, sizeof(cmd->perRT));
+   cmd->pad0 = 0;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc,
+                                SVGA3dBlendStateId blendId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyBlendState, DESTROY_BLEND_STATE);
+
+   cmd->blendId = blendId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc,
+                                      SVGA3dDepthStencilStateId depthStencilId,
+                                      uint8 depthEnable,
+                                      SVGA3dDepthWriteMask depthWriteMask,
+                                      SVGA3dComparisonFunc depthFunc,
+                                      uint8 stencilEnable,
+                                      uint8 frontEnable,
+                                      uint8 backEnable,
+                                      uint8 stencilReadMask,
+                                      uint8 stencilWriteMask,
+                                      uint8 frontStencilFailOp,
+                                      uint8 frontStencilDepthFailOp,
+                                      uint8 frontStencilPassOp,
+                                      SVGA3dComparisonFunc frontStencilFunc,
+                                      uint8 backStencilFailOp,
+                                      uint8 backStencilDepthFailOp,
+                                      uint8 backStencilPassOp,
+                                      SVGA3dComparisonFunc backStencilFunc)
+{
+   SVGA3D_CREATE_COMMAND(DefineDepthStencilState, DEFINE_DEPTHSTENCIL_STATE);
+
+   SVGA3D_COPY_BASIC_9(depthStencilId, depthEnable,
+                       depthWriteMask, depthFunc,
+                       stencilEnable, frontEnable,
+                       backEnable, stencilReadMask,
+                       stencilWriteMask);
+   SVGA3D_COPY_BASIC_8(frontStencilFailOp, frontStencilDepthFailOp,
+                       frontStencilPassOp, frontStencilFunc,
+                       backStencilFailOp, backStencilDepthFailOp,
+                       backStencilPassOp, backStencilFunc);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc,
+                                    SVGA3dDepthStencilStateId depthStencilId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyDepthStencilState,
+                         DESTROY_DEPTHSTENCIL_STATE);
+
+   cmd->depthStencilId = depthStencilId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc,
+                                    SVGA3dRasterizerStateId rasterizerId,
+                                    uint8 fillMode,
+                                    SVGA3dCullMode cullMode,
+                                    uint8 frontCounterClockwise,
+                                    int32 depthBias,
+                                    float depthBiasClamp,
+                                    float slopeScaledDepthBias,
+                                    uint8 depthClipEnable,
+                                    uint8 scissorEnable,
+                                    uint8 multisampleEnable,
+                                    uint8 antialiasedLineEnable,
+                                    float lineWidth,
+                                    uint8 lineStippleEnable,
+                                    uint8 lineStippleFactor,
+                                    uint16 lineStipplePattern,
+                                    uint8 provokingVertexLast)
+{
+   SVGA3D_CREATE_COMMAND(DefineRasterizerState, DEFINE_RASTERIZER_STATE);
+
+   SVGA3D_COPY_BASIC_5(rasterizerId, fillMode,
+                       cullMode, frontCounterClockwise,
+                       depthBias);
+   SVGA3D_COPY_BASIC_6(depthBiasClamp, slopeScaledDepthBias,
+                       depthClipEnable, scissorEnable,
+                       multisampleEnable, antialiasedLineEnable);
+   cmd->lineWidth = lineWidth;
+   cmd->lineStippleEnable = lineStippleEnable;
+   cmd->lineStippleFactor = lineStippleFactor;
+   cmd->lineStipplePattern = lineStipplePattern;
+   cmd->provokingVertexLast = provokingVertexLast;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc,
+                                     SVGA3dRasterizerStateId rasterizerId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyRasterizerState, DESTROY_RASTERIZER_STATE);
+
+   cmd->rasterizerId = rasterizerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc,
+                                 SVGA3dSamplerId samplerId,
+                                 SVGA3dFilter filter,
+                                 uint8 addressU,
+                                 uint8 addressV,
+                                 uint8 addressW,
+                                 float mipLODBias,
+                                 uint8 maxAnisotropy,
+                                 uint8 comparisonFunc,
+                                 SVGA3dRGBAFloat borderColor,
+                                 float minLOD,
+                                 float maxLOD)
+{
+   SVGA3D_CREATE_COMMAND(DefineSamplerState, DEFINE_SAMPLER_STATE);
+
+   SVGA3D_COPY_BASIC_6(samplerId, filter,
+                       addressU, addressV,
+                       addressW, mipLODBias);
+   SVGA3D_COPY_BASIC_5(maxAnisotropy, comparisonFunc,
+                       borderColor, minLOD,
+                       maxLOD);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc,
+                                  SVGA3dSamplerId samplerId)
+{
+   SVGA3D_CREATE_COMMAND(DestroySamplerState, DESTROY_SAMPLER_STATE);
+
+   cmd->samplerId = samplerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc,
+                                  struct svga_winsys_gb_shader *gbshader,
+                                  SVGA3dShaderId shaderId,
+                                  SVGA3dShaderType type,
+                                  uint32 sizeInBytes)
+{
+   SVGA3dCmdHeader *header;
+   SVGA3dCmdDXDefineShader *dcmd;
+   SVGA3dCmdDXBindShader *bcmd;
+   unsigned totalSize = 2 * sizeof(*header) +
+                        sizeof(*dcmd) + sizeof(*bcmd);
+
+   /* Make sure there is room for both commands */
+   header = swc->reserve(swc, totalSize, 2);
+   if (!header)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* DXDefineShader command */
+   header->id = SVGA_3D_CMD_DX_DEFINE_SHADER;
+   header->size = sizeof(*dcmd);
+   dcmd = (SVGA3dCmdDXDefineShader *)(header + 1);
+   dcmd->shaderId = shaderId;
+   dcmd->type = type;
+   dcmd->sizeInBytes = sizeInBytes;
+
+   /* DXBindShader command */
+   header = (SVGA3dCmdHeader *)(dcmd + 1);
+
+   header->id = SVGA_3D_CMD_DX_BIND_SHADER;
+   header->size = sizeof(*bcmd);
+   bcmd = (SVGA3dCmdDXBindShader *)(header + 1);
+
+   bcmd->cid = swc->cid;
+   swc->shader_relocation(swc, NULL, &bcmd->mobid,
+                          &bcmd->offsetInBytes, gbshader, 0);
+
+   bcmd->shid = shaderId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc,
+                            SVGA3dShaderId shaderId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyShader, DESTROY_SHADER);
+
+   cmd->shaderId = shaderId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
+       SVGA3dStreamOutputId soid,
+       uint32 numOutputStreamEntries,
+       uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+       const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS])
+{
+   unsigned i;
+   SVGA3D_CREATE_COMMAND(DefineStreamOutput, DEFINE_STREAMOUTPUT);
+
+   cmd->soid = soid;
+   cmd->numOutputStreamEntries = numOutputStreamEntries;
+
+   for (i = 0; i < Elements(cmd->streamOutputStrideInBytes); i++)
+      cmd->streamOutputStrideInBytes[i] = streamOutputStrideInBytes[i];
+
+   memcpy(cmd->decl, decl,
+          sizeof(SVGA3dStreamOutputDeclarationEntry)
+          * SVGA3D_MAX_STREAMOUT_DECLS);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc,
+                                  SVGA3dStreamOutputId soid)
+{
+   SVGA3D_CREATE_COMMAND(DestroyStreamOutput, DESTROY_STREAMOUTPUT);
+
+   cmd->soid = soid;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc,
+                             SVGA3dElementLayoutId elementLayoutId)
+{
+   SVGA3D_CREATE_COMMAND(SetInputLayout, SET_INPUT_LAYOUT);
+
+   cmd->elementLayoutId = elementLayoutId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
+                               unsigned count,
+                               uint32 startBuffer,
+                               const SVGA3dVertexBuffer *bufferInfo,
+                               struct svga_winsys_surface **surfaces)
+{
+   SVGA3dCmdDXSetVertexBuffers *cmd;
+   SVGA3dVertexBuffer *bufs;
+   unsigned i;
+
+   assert(count > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS,
+                            sizeof(SVGA3dCmdDXSetVertexBuffers) +
+                            count * sizeof(SVGA3dVertexBuffer),
+                            count); /* 'count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->startBuffer = startBuffer;
+
+   bufs = (SVGA3dVertexBuffer *) &cmd[1];
+   for (i = 0; i < count; i++) {
+      bufs[i].stride = bufferInfo[i].stride;
+      bufs[i].offset = bufferInfo[i].offset;
+      assert(bufs[i].stride % 4 == 0);
+      assert(bufs[i].offset % 4 == 0);
+      swc->surface_relocation(swc, &bufs[i].sid, NULL, surfaces[i],
+                              SVGA_RELOC_READ);
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
+                          SVGA3dPrimitiveType topology)
+{
+   SVGA3D_CREATE_COMMAND(SetTopology, SET_TOPOLOGY);
+
+   cmd->topology = topology;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *indexes,
+                             SVGA3dSurfaceFormat format,
+                             uint32 offset)
+{
+   SVGA3dCmdDXSetIndexBuffer *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_INDEX_BUFFER,
+                            sizeof(SVGA3dCmdDXSetIndexBuffer),
+                            1); /* one relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, indexes, SVGA_RELOC_READ);
+   SVGA3D_COPY_BASIC_2(format, offset);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
+                                      unsigned slot,
+                                      SVGA3dShaderType type,
+                                      struct svga_winsys_surface *surface,
+                                      uint32 offsetInBytes,
+                                      uint32 sizeInBytes)
+{
+   SVGA3dCmdDXSetSingleConstantBuffer *cmd;
+
+   assert(offsetInBytes % 256 == 0);
+   if (!surface)
+      assert(sizeInBytes == 0);
+   else
+      assert(sizeInBytes > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER,
+                            sizeof(SVGA3dCmdDXSetSingleConstantBuffer),
+                            1);  /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->slot = slot;
+   cmd->type = type;
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface, SVGA_RELOC_READ);
+   cmd->offsetInBytes = offsetInBytes;
+   cmd->sizeInBytes = sizeInBytes;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc,
+                                  struct svga_winsys_surface *surface,
+                                  unsigned subResource)
+{
+   SVGA3dCmdDXReadbackSubResource *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_READBACK_SUBRESOURCE,
+                            sizeof(SVGA3dCmdDXReadbackSubResource),
+                            1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+   cmd->subResource = subResource;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                const SVGA3dBox *box,
+                                unsigned subResource)
+{
+   SVGA3dCmdDXUpdateSubResource *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE,
+                            sizeof(SVGA3dCmdDXUpdateSubResource),
+                            1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
+   cmd->subResource = subResource;
+   cmd->box = *box;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_GenMips(struct svga_winsys_context *swc,
+                      SVGA3dShaderResourceViewId shaderResourceViewId,
+                      struct svga_winsys_surface *view)
+{
+   SVGA3dCmdDXGenMips *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_GENMIPS,
+                            sizeof(SVGA3dCmdDXGenMips), 1);
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->shaderResourceViewId, NULL, view,
+                           SVGA_RELOC_WRITE);
+   cmd->shaderResourceViewId = shaderResourceViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_context.c b/lib/mesa/src/gallium/drivers/svga/svga_context.c
index 0ffff9c50..b10eb45e5 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_context.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_context.c
@@ -30,6 +30,7 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 #include "util/u_bitmask.h"
+#include "util/u_upload_mgr.h"
 
 #include "svga_context.h"
 #include "svga_screen.h"
@@ -42,6 +43,9 @@
 #include "svga_draw.h"
 #include "svga_debug.h"
 #include "svga_state.h"
+#include "svga_winsys.h"
+
+#define CONST0_UPLOAD_DEFAULT_SIZE 65536
 
 DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE)
 DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE);
@@ -53,27 +57,67 @@ DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE",
 static void svga_destroy( struct pipe_context *pipe )
 {
    struct svga_context *svga = svga_context( pipe );
-   struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws;
-   unsigned shader;
+   unsigned shader, i;
+
+   /* free any alternate rasterizer states used for point sprite */
+   for (i = 0; i < Elements(svga->rasterizer_no_cull); i++) {
+      if (svga->rasterizer_no_cull[i]) {
+         pipe->delete_rasterizer_state(pipe, svga->rasterizer_no_cull[i]);
+      }
+   }
+
+   /* free polygon stipple state */
+   if (svga->polygon_stipple.sampler) {
+      pipe->delete_sampler_state(pipe, svga->polygon_stipple.sampler);
+   }
+   if (svga->polygon_stipple.sampler_view) {
+      pipe->sampler_view_destroy(pipe,
+                                 &svga->polygon_stipple.sampler_view->base);
+   }
+   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+   /* free HW constant buffers */
+   for (shader = 0; shader < Elements(svga->state.hw_draw.constbuf); shader++) {
+      pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], NULL);
+   }
+
+   pipe->delete_blend_state(pipe, svga->noop_blend);
+
+   /* free query gb object */
+   if (svga->gb_query) {
+      pipe->destroy_query(pipe, NULL);
+      svga->gb_query = NULL;
+   }
 
    util_blitter_destroy(svga->blitter);
 
    svga_cleanup_framebuffer( svga );
    svga_cleanup_tss_binding( svga );
 
-   svga_hwtnl_destroy( svga->hwtnl );
-
    svga_cleanup_vertex_state(svga);
    
-   svga->swc->destroy(svga->swc);
-   
    svga_destroy_swtnl( svga );
+   svga_hwtnl_destroy( svga->hwtnl );
 
-   util_bitmask_destroy( svga->shader_id_bm );
+   svga->swc->destroy(svga->swc);
 
+   util_bitmask_destroy(svga->blend_object_id_bm);
+   util_bitmask_destroy(svga->ds_object_id_bm);
+   util_bitmask_destroy(svga->input_element_object_id_bm);
+   util_bitmask_destroy(svga->rast_object_id_bm);
+   util_bitmask_destroy(svga->sampler_object_id_bm);
+   util_bitmask_destroy(svga->sampler_view_id_bm);
+   util_bitmask_destroy(svga->shader_id_bm);
+   util_bitmask_destroy(svga->surface_view_id_bm);
+   util_bitmask_destroy(svga->stream_output_id_bm);
+   util_bitmask_destroy(svga->query_id_bm);
+   u_upload_destroy(svga->const0_upload);
+
+   /* free user's constant buffers */
    for (shader = 0; shader < PIPE_SHADER_TYPES; ++shader) {
-      pipe_resource_reference( &svga->curr.cbufs[shader].buffer, NULL );
-      sws->surface_reference(sws, &svga->state.hw_draw.hw_cb[shader], NULL);
+      for (i = 0; i < Elements(svga->curr.constbufs[shader]); ++i) {
+         pipe_resource_reference(&svga->curr.constbufs[shader][i].buffer, NULL);
+      }
    }
 
    FREE( svga );
@@ -81,16 +125,16 @@ static void svga_destroy( struct pipe_context *pipe )
 
 
 
-struct pipe_context *svga_context_create( struct pipe_screen *screen,
-					  void *priv )
+struct pipe_context *svga_context_create(struct pipe_screen *screen,
+					 void *priv, unsigned flags)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
    struct svga_context *svga = NULL;
    enum pipe_error ret;
 
    svga = CALLOC_STRUCT(svga_context);
-   if (svga == NULL)
-      goto no_svga;
+   if (!svga)
+      goto cleanup;
 
    LIST_INITHEAD(&svga->dirty_buffers);
 
@@ -100,8 +144,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
    svga->pipe.clear = svga_clear;
 
    svga->swc = svgascreen->sws->context_create(svgascreen->sws);
-   if(!svga->swc)
-      goto no_swc;
+   if (!svga->swc)
+      goto cleanup;
 
    svga_init_resource_functions(svga);
    svga_init_blend_functions(svga);
@@ -114,11 +158,15 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
    svga_init_sampler_functions(svga);
    svga_init_fs_functions(svga);
    svga_init_vs_functions(svga);
+   svga_init_gs_functions(svga);
    svga_init_vertex_functions(svga);
    svga_init_constbuffer_functions(svga);
    svga_init_query_functions(svga);
    svga_init_surface_functions(svga);
+   svga_init_stream_output_functions(svga);
 
+   /* init misc state */
+   svga->curr.sample_mask = ~0;
 
    /* debug */
    svga->debug.no_swtnl = debug_get_option_no_swtnl();
@@ -128,21 +176,54 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
    svga->debug.no_line_width = debug_get_option_no_line_width();
    svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple();
 
-   svga->shader_id_bm = util_bitmask_create();
-   if (svga->shader_id_bm == NULL)
-      goto no_shader_bm;
+   if (!(svga->blend_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->ds_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->input_element_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->rast_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->sampler_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->sampler_view_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->shader_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->surface_view_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->stream_output_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->query_id_bm = util_bitmask_create()))
+      goto cleanup;
 
    svga->hwtnl = svga_hwtnl_create(svga);
    if (svga->hwtnl == NULL)
-      goto no_hwtnl;
+      goto cleanup;
 
    if (!svga_init_swtnl(svga))
-      goto no_swtnl;
+      goto cleanup;
 
    ret = svga_emit_initial_state( svga );
    if (ret != PIPE_OK)
-      goto no_state;
-   
+      goto cleanup;
+
+   svga->const0_upload = u_upload_create(&svga->pipe,
+                                         CONST0_UPLOAD_DEFAULT_SIZE,
+                                         PIPE_BIND_CONSTANT_BUFFER,
+                                         PIPE_USAGE_STREAM);
+   if (!svga->const0_upload)
+      goto cleanup;
+
    /* Avoid shortcircuiting state with initial value of zero.
     */
    memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
@@ -151,24 +232,64 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
 
    memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
    memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
+   memset(&svga->state.hw_draw.num_sampler_views, 0,
+      sizeof(svga->state.hw_draw.num_sampler_views));
    svga->state.hw_draw.num_views = 0;
-   memset(&svga->state.hw_draw.hw_cb, 0x0, sizeof(svga->state.hw_draw.hw_cb));
+
+   /* Initialize the shader pointers */
+   svga->state.hw_draw.vs = NULL;
+   svga->state.hw_draw.gs = NULL;
+   svga->state.hw_draw.fs = NULL;
+   memset(svga->state.hw_draw.constbuf, 0,
+          sizeof(svga->state.hw_draw.constbuf));
+   memset(svga->state.hw_draw.default_constbuf_size, 0,
+          sizeof(svga->state.hw_draw.default_constbuf_size));
+   memset(svga->state.hw_draw.enabled_constbufs, 0,
+          sizeof(svga->state.hw_draw.enabled_constbufs));
+
+   /* Create a no-operation blend state which we will bind whenever the
+    * requested blend state is impossible (e.g. due to having an integer
+    * render target attached).
+    *
+    * XXX: We will probably actually need 16 of these, one for each possible
+    * RGBA color mask (4 bits).  Then, we would bind the one with a color mask
+    * matching the blend state it is replacing.
+    */
+   {
+      struct pipe_blend_state noop_tmpl = {0};
+      unsigned i;
+
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+         // Set the color mask to all-ones.  Later this may change.
+         noop_tmpl.rt[i].colormask = PIPE_MASK_RGBA;
+      }
+      svga->noop_blend = svga->pipe.create_blend_state(&svga->pipe, &noop_tmpl);
+   }
 
    svga->dirty = ~0;
 
    return &svga->pipe;
 
-no_state:
+cleanup:
    svga_destroy_swtnl(svga);
-no_swtnl:
-   svga_hwtnl_destroy( svga->hwtnl );
-no_hwtnl:
-   util_bitmask_destroy( svga->shader_id_bm );
-no_shader_bm:
-   svga->swc->destroy(svga->swc);
-no_swc:
+
+   if (svga->const0_upload)
+      u_upload_destroy(svga->const0_upload);
+   if (svga->hwtnl)
+      svga_hwtnl_destroy(svga->hwtnl);
+   if (svga->swc)
+      svga->swc->destroy(svga->swc);
+   util_bitmask_destroy(svga->blend_object_id_bm);
+   util_bitmask_destroy(svga->ds_object_id_bm);
+   util_bitmask_destroy(svga->input_element_object_id_bm);
+   util_bitmask_destroy(svga->rast_object_id_bm);
+   util_bitmask_destroy(svga->sampler_object_id_bm);
+   util_bitmask_destroy(svga->sampler_view_id_bm);
+   util_bitmask_destroy(svga->shader_id_bm);
+   util_bitmask_destroy(svga->surface_view_id_bm);
+   util_bitmask_destroy(svga->stream_output_id_bm);
+   util_bitmask_destroy(svga->query_id_bm);
    FREE(svga);
-no_svga:
    return NULL;
 }
 
@@ -190,16 +311,26 @@ void svga_context_flush( struct svga_context *svga,
     */
    svga->swc->flush(svga->swc, &fence);
 
+   svga->hud.num_flushes++;
+
    svga_screen_cache_flush(svgascreen, fence);
 
    /* To force the re-emission of rendertargets and texture sampler bindings on
     * the next command buffer.
     */
-   svga->rebind.rendertargets = TRUE;
-   svga->rebind.texture_samplers = TRUE;
+   svga->rebind.flags.rendertargets = TRUE;
+   svga->rebind.flags.texture_samplers = TRUE;
+
    if (svga_have_gb_objects(svga)) {
-      svga->rebind.vs = TRUE;
-      svga->rebind.fs = TRUE;
+
+      svga->rebind.flags.constbufs = TRUE;
+      svga->rebind.flags.vs = TRUE;
+      svga->rebind.flags.fs = TRUE;
+      svga->rebind.flags.gs = TRUE;
+
+      if (svga_need_to_rebind_resources(svga)) {
+         svga->rebind.flags.query = TRUE;
+      }
    }
 
    if (SVGA_DEBUG & DEBUG_SYNC) {
@@ -208,13 +339,33 @@ void svga_context_flush( struct svga_context *svga,
                                           PIPE_TIMEOUT_INFINITE);
    }
 
-   if(pfence)
+   if (pfence)
       svgascreen->sws->fence_reference(svgascreen->sws, pfence, fence);
 
    svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL);
 }
 
 
+/**
+ * Flush pending commands and wait for completion with a fence.
+ */
+void
+svga_context_finish(struct svga_context *svga)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   struct pipe_fence_handle *fence = NULL;
+
+   svga_context_flush(svga, &fence);
+   svga->pipe.screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE);
+   screen->fence_reference(screen, &fence, NULL);
+}
+
+
+/**
+ * Emit pending drawing commands to the command buffer.
+ * If the command buffer overflows, we flush it and retry.
+ * \sa svga_hwtnl_flush()
+ */
 void svga_hwtnl_flush_retry( struct svga_context *svga )
 {
    enum pipe_error ret = PIPE_OK;
@@ -225,7 +376,7 @@ void svga_hwtnl_flush_retry( struct svga_context *svga )
       ret = svga_hwtnl_flush( svga->hwtnl );
    }
 
-   assert(ret == 0);
+   assert(ret == PIPE_OK);
 }
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_context.h b/lib/mesa/src/gallium/drivers/svga/svga_context.h
index 71f038df8..f1a2041b6 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_context.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_context.h
@@ -38,17 +38,44 @@
 
 #include "svga_screen.h"
 #include "svga_state.h"
-#include "svga_tgsi.h"
 #include "svga_winsys.h"
 #include "svga_hw_reg.h"
 #include "svga3d_shaderdefs.h"
 
 
 /** Non-GPU queries for gallium HUD */
-#define SVGA_QUERY_DRAW_CALLS   (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define SVGA_QUERY_FALLBACKS    (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define SVGA_QUERY_MEMORY_USED  (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+/* per-frame counters */
+#define SVGA_QUERY_NUM_DRAW_CALLS          (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define SVGA_QUERY_NUM_FALLBACKS           (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define SVGA_QUERY_NUM_FLUSHES             (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define SVGA_QUERY_NUM_VALIDATIONS         (PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define SVGA_QUERY_MAP_BUFFER_TIME         (PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define SVGA_QUERY_NUM_RESOURCES_MAPPED    (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define SVGA_QUERY_NUM_BYTES_UPLOADED      (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+
+/* running total counters */
+#define SVGA_QUERY_MEMORY_USED             (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define SVGA_QUERY_NUM_SHADERS             (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define SVGA_QUERY_NUM_RESOURCES           (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define SVGA_QUERY_NUM_STATE_OBJECTS       (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_NUM_SURFACE_VIEWS       (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define SVGA_QUERY_NUM_GENERATE_MIPMAP     (PIPE_QUERY_DRIVER_SPECIFIC + 12)
+/*SVGA_QUERY_MAX has to be last because it is size of an array*/
+#define SVGA_QUERY_MAX                     (PIPE_QUERY_DRIVER_SPECIFIC + 13)
+
+/**
+ * Maximum supported number of constant buffers per shader
+ */
+#define SVGA_MAX_CONST_BUFS 14
+
+/**
+ * Maximum constant buffer size that can be set in the
+ * DXSetSingleConstantBuffer command is
+ * DX10 constant buffer element count * 4 4-bytes components
+ */
+#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int))
 
+#define CONST0_UPLOAD_ALIGNMENT 256
 
 struct draw_vertex_shader;
 struct draw_fragment_shader;
@@ -57,49 +84,16 @@ struct SVGACmdMemory;
 struct util_bitmask;
 
 
-struct svga_shader
-{
-   const struct tgsi_token *tokens;
-
-   struct tgsi_shader_info info;
-
-   /** Head of linked list of variants */
-   struct svga_shader_variant *variants;
-
-   unsigned id;  /**< for debugging only */
-};
-
-
-struct svga_fragment_shader
-{
-   struct svga_shader base;
-
-   struct draw_fragment_shader *draw_shader;
-
-   /** Mask of which generic varying variables are read by this shader */
-   unsigned generic_inputs;
-   /** Table mapping original TGSI generic indexes to low integers */
-   int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-struct svga_vertex_shader
-{
-   struct svga_shader base;
-
-   struct draw_vertex_shader *draw_shader;
-};
-
-
 struct svga_cache_context;
 struct svga_tracked_state;
 
 struct svga_blend_state {
+   unsigned need_white_fragments:1;
+   unsigned independent_blend_enable:1;
+   unsigned alpha_to_coverage:1;
+   unsigned blend_color_alpha:1;  /**< set blend color to alpha value */
 
-   boolean need_white_fragments;
-
-   /* Should be per-render-target:
-    */
+   /** Per-render target state */
    struct {
       uint8_t writemask;
 
@@ -112,8 +106,9 @@ struct svga_blend_state {
       uint8_t srcblend_alpha;
       uint8_t dstblend_alpha;
       uint8_t blendeq_alpha;
+   } rt[PIPE_MAX_COLOR_BUFS];
 
-   } rt[1];
+   SVGA3dBlendStateId id;  /**< vgpu10 */
 };
 
 struct svga_depth_stencil_state {
@@ -139,6 +134,8 @@ struct svga_depth_stencil_state {
    unsigned stencil_writemask:8;
 
    float    alpharef;
+
+   SVGA3dDepthStencilStateId id;  /**< vgpu10 */
 };
 
 #define SVGA_UNFILLED_DISABLE 0
@@ -167,11 +164,13 @@ struct svga_rasterizer_state {
    float pointsize;
    float linewidth;
    
-   unsigned hw_unfilled:16;         /* PIPE_POLYGON_MODE_x */
+   unsigned hw_fillmode:2;         /* PIPE_POLYGON_MODE_x */
 
    /** Which prims do we need help for?  Bitmask of (1 << PIPE_PRIM_x) flags */
    unsigned need_pipeline:16;
 
+   SVGA3dRasterizerStateId id;    /**< vgpu10 */
+
    /** For debugging: */
    const char* need_pipeline_tris_str;
    const char* need_pipeline_lines_str;
@@ -195,15 +194,45 @@ struct svga_sampler_state {
    unsigned min_lod;
    unsigned view_min_lod;
    unsigned view_max_lod;
+
+   SVGA3dSamplerId id;
 };
 
+
+struct svga_pipe_sampler_view
+{
+   struct pipe_sampler_view base;
+
+   SVGA3dShaderResourceViewId id;
+};
+
+
+static inline struct svga_pipe_sampler_view *
+svga_pipe_sampler_view(struct pipe_sampler_view *v)
+{
+   return (struct svga_pipe_sampler_view *) v;
+}
+
+
 struct svga_velems_state {
    unsigned count;
    struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
    SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */
-   unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */
-   unsigned adjust_attrib_w_1;   /* bitmask of attrs needing w = 1 */
+
+   /** Bitmasks indicating which attributes need format conversion */
+   unsigned adjust_attrib_range;     /**< range adjustment */
+   unsigned attrib_is_pure_int;      /**< pure int */
+   unsigned adjust_attrib_w_1;       /**< set w = 1 */
+   unsigned adjust_attrib_itof;      /**< int->float */
+   unsigned adjust_attrib_utof;      /**< uint->float */
+   unsigned attrib_is_bgra;          /**< R / B swizzling */
+   unsigned attrib_puint_to_snorm;   /**< 10_10_10_2 packed uint -> snorm */
+   unsigned attrib_puint_to_uscaled; /**< 10_10_10_2 packed uint -> uscaled */
+   unsigned attrib_puint_to_sscaled; /**< 10_10_10_2 packed uint -> sscaled */
+
    boolean need_swvfetch;
+
+   SVGA3dElementLayoutId id; /**< VGPU10 */
 };
 
 /* Use to calculate differences between state emitted to hardware and
@@ -214,16 +243,22 @@ struct svga_state
    const struct svga_blend_state *blend;
    const struct svga_depth_stencil_state *depth;
    const struct svga_rasterizer_state *rast;
-   const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+   const struct svga_sampler_state *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
    const struct svga_velems_state *velems;
 
-   struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
+   struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; /* or texture ID's? */
    struct svga_fragment_shader *fs;
    struct svga_vertex_shader *vs;
+   struct svga_geometry_shader *user_gs; /* user-specified GS */
+   struct svga_geometry_shader *gs;      /* derived GS */
 
    struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
    struct pipe_index_buffer ib;
-   struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES];
+   /** Constant buffers for each shader.
+    * The size should probably always match with that of
+    * svga_shader_emitter_v10.num_shader_consts.
+    */
+   struct pipe_constant_buffer constbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
 
    struct pipe_framebuffer_state framebuffer;
    float depthscale;
@@ -240,8 +275,8 @@ struct svga_state
    struct pipe_clip_state clip;
    struct pipe_viewport_state viewport;
 
-   unsigned num_samplers;
-   unsigned num_sampler_views;
+   unsigned num_samplers[PIPE_SHADER_TYPES];
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
    unsigned num_vertex_buffers;
    unsigned reduced_prim;
 
@@ -249,6 +284,8 @@ struct svga_state
       unsigned flag_1d;
       unsigned flag_srgb;
    } tex_flags;
+
+   unsigned sample_mask;
 };
 
 struct svga_prescale {
@@ -262,9 +299,7 @@ struct svga_prescale {
  */
 struct svga_hw_clear_state
 {
-   struct {
-      unsigned x,y,w,h;
-   } viewport;
+   SVGA3dRect viewport;
 
    struct {
       float zmin, zmax;
@@ -280,7 +315,7 @@ struct svga_hw_view_state
    struct svga_sampler_view *v;
    unsigned min_lod;
    unsigned max_lod;
-   int dirty;
+   boolean dirty;
 };
 
 /* Updated by calling svga_update_state( SVGA_STATE_HW_DRAW )
@@ -291,16 +326,41 @@ struct svga_hw_draw_state
    unsigned ts[SVGA3D_PIXEL_SAMPLERREG_MAX][SVGA3D_TS_MAX];
    float cb[PIPE_SHADER_TYPES][SVGA3D_CONSTREG_MAX][4];
 
-   /**
-    * For guest backed shader constants only.
-    */
-   struct svga_winsys_surface *hw_cb[PIPE_SHADER_TYPES];
-
    struct svga_shader_variant *fs;
    struct svga_shader_variant *vs;
+   struct svga_shader_variant *gs;
    struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
-
    unsigned num_views;
+   struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
+
+   /* Bitmask of enabled constant bufffers */
+   unsigned enabled_constbufs[PIPE_SHADER_TYPES];
+
+   /* VGPU10 HW state (used to prevent emitting redundant state) */
+   SVGA3dDepthStencilStateId depth_stencil_id;
+   unsigned stencil_ref;
+   SVGA3dBlendStateId blend_id;
+   float blend_factor[4];
+   unsigned blend_sample_mask;
+   SVGA3dRasterizerStateId rasterizer_id;
+   SVGA3dElementLayoutId layout_id;
+   SVGA3dPrimitiveType topology;
+
+   /** Vertex buffer state */
+   SVGA3dVertexBuffer vbuffers[PIPE_MAX_ATTRIBS];
+   struct svga_winsys_surface *vbuffer_handles[PIPE_MAX_ATTRIBS];
+   unsigned num_vbuffers;
+
+   struct svga_winsys_surface *ib;  /**< index buffer for drawing */
+   SVGA3dSurfaceFormat ib_format;
+   unsigned ib_offset;
+
+   unsigned num_samplers[PIPE_SHADER_TYPES];
+   SVGA3dSamplerId samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+
+   /* used for rebinding */
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
+   unsigned default_constbuf_size[PIPE_SHADER_TYPES];
 };
 
 
@@ -326,12 +386,14 @@ struct svga_sw_state
 struct svga_hw_queue;
 
 struct svga_query;
+struct svga_qmem_alloc_entry;
 
 struct svga_context
 {
    struct pipe_context pipe;
    struct svga_winsys_context *swc;
    struct blitter_context *blitter;
+   struct u_upload_mgr *const0_upload;
 
    struct {
       boolean no_swtnl;
@@ -345,6 +407,9 @@ struct svga_context
 
       boolean no_line_width;
       boolean force_hw_line_stipple;
+
+      /** To report perf/conformance/etc issues to the state tracker */
+      struct pipe_debug_callback callback;
    } debug;
 
    struct {
@@ -355,12 +420,42 @@ struct svga_context
       boolean new_vdecl;
    } swtnl;
 
+   /* Bitmask of blend state objects IDs */
+   struct util_bitmask *blend_object_id_bm;
+
+   /* Bitmask of depth/stencil state objects IDs */
+   struct util_bitmask *ds_object_id_bm;
+
+   /* Bitmaks of input element object IDs */
+   struct util_bitmask *input_element_object_id_bm;
+
+   /* Bitmask of rasterizer object IDs */
+   struct util_bitmask *rast_object_id_bm;
+
+   /* Bitmask of sampler state objects IDs */
+   struct util_bitmask *sampler_object_id_bm;
+
+   /* Bitmask of sampler view IDs */
+   struct util_bitmask *sampler_view_id_bm;
+
    /* Bitmask of used shader IDs */
    struct util_bitmask *shader_id_bm;
 
+   /* Bitmask of used surface view IDs */
+   struct util_bitmask *surface_view_id_bm;
+
+   /* Bitmask of used stream output IDs */
+   struct util_bitmask *stream_output_id_bm;
+
+   /* Bitmask of used query IDs */
+   struct util_bitmask *query_id_bm;
+
    struct {
       unsigned dirty[SVGA_STATE_MAX];
 
+      /** bitmasks of which const buffers are changed */
+      unsigned dirty_constbufs[PIPE_SHADER_TYPES];
+
       unsigned texture_timestamp;
 
       /* 
@@ -373,30 +468,72 @@ struct svga_context
    struct svga_state curr;      /* state from the state tracker */
    unsigned dirty;              /* statechanges since last update_state() */
 
-   struct {
-      unsigned rendertargets:1;
-      unsigned texture_samplers:1;
-      unsigned vs:1;
-      unsigned fs:1;
+   union {
+      struct {
+         unsigned rendertargets:1;
+         unsigned texture_samplers:1;
+         unsigned constbufs:1;
+         unsigned vs:1;
+         unsigned fs:1;
+         unsigned gs:1;
+         unsigned query:1;
+      } flags;
+      unsigned val;
    } rebind;
 
    struct svga_hwtnl *hwtnl;
 
-   /** The occlusion query currently in progress */
-   struct svga_query *sq;
+   /** Queries states */
+   struct svga_winsys_gb_query *gb_query;     /**< gb query object, one per context */
+   unsigned gb_query_len;                     /**< gb query object size */
+   struct util_bitmask *gb_query_alloc_mask;  /**< gb query object allocation mask */
+   struct svga_qmem_alloc_entry *gb_query_map[SVGA_QUERY_MAX];
+                                              /**< query mem block mapping */
+   struct svga_query *sq[SVGA_QUERY_MAX];     /**< queries currently in progress */
 
    /** List of buffers with queued transfers */
    struct list_head dirty_buffers;
 
-   /** performance / info queries */
-   uint64_t num_draw_calls;  /**< SVGA_QUERY_DRAW_CALLS */
-   uint64_t num_fallbacks;   /**< SVGA_QUERY_FALLBACKS */
+   /** performance / info queries for HUD */
+   struct {
+      uint64_t num_draw_calls;       /**< SVGA_QUERY_DRAW_CALLS */
+      uint64_t num_fallbacks;        /**< SVGA_QUERY_NUM_FALLBACKS */
+      uint64_t num_flushes;          /**< SVGA_QUERY_NUM_FLUSHES */
+      uint64_t num_validations;      /**< SVGA_QUERY_NUM_VALIDATIONS */
+      uint64_t map_buffer_time;      /**< SVGA_QUERY_MAP_BUFFER_TIME */
+      uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */
+      uint64_t num_shaders;          /**< SVGA_QUERY_NUM_SHADERS */
+      uint64_t num_state_objects;    /**< SVGA_QUERY_NUM_STATE_OBJECTS */
+      uint64_t num_surface_views;    /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
+      uint64_t num_bytes_uploaded;   /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
+      uint64_t num_generate_mipmap;  /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */
+   } hud;
+
+   /** The currently bound stream output targets */
+   unsigned num_so_targets;
+   struct svga_winsys_surface *so_surfaces[SVGA3D_DX_MAX_SOTARGETS];
+   struct pipe_stream_output_target *so_targets[SVGA3D_DX_MAX_SOTARGETS];
+   struct svga_stream_output *current_so;
+
+   /** A blend state with blending disabled, for falling back to when blending
+    * is illegal (e.g. an integer texture is bound)
+    */
+   struct svga_blend_state *noop_blend;
+
+   struct {
+      struct pipe_resource *texture;
+      struct svga_pipe_sampler_view *sampler_view;
+      void *sampler;
+   } polygon_stipple;
+
+   /** Alternate rasterizer states created for point sprite */
+   struct svga_rasterizer_state *rasterizer_no_cull[2];
 };
 
 /* A flag for each state_tracker state object:
  */
 #define SVGA_NEW_BLEND               0x1
-#define SVGA_NEW_DEPTH_STENCIL       0x2
+#define SVGA_NEW_DEPTH_STENCIL_ALPHA 0x2
 #define SVGA_NEW_RAST                0x4
 #define SVGA_NEW_SAMPLER             0x8
 #define SVGA_NEW_TEXTURE             0x10
@@ -422,7 +559,9 @@ struct svga_context
 #define SVGA_NEW_VS_VARIANT          0x1000000
 #define SVGA_NEW_TEXTURE_FLAGS       0x4000000
 #define SVGA_NEW_STENCIL_REF         0x8000000
-
+#define SVGA_NEW_GS                  0x10000000
+#define SVGA_NEW_GS_CONST_BUFFER     0x20000000
+#define SVGA_NEW_GS_VARIANT          0x40000000
 
 
 
@@ -457,11 +596,13 @@ void svga_init_rasterizer_functions( struct svga_context *svga );
 void svga_init_sampler_functions( struct svga_context *svga );
 void svga_init_fs_functions( struct svga_context *svga );
 void svga_init_vs_functions( struct svga_context *svga );
+void svga_init_gs_functions( struct svga_context *svga );
 void svga_init_vertex_functions( struct svga_context *svga );
 void svga_init_constbuffer_functions( struct svga_context *svga );
 void svga_init_draw_functions( struct svga_context *svga );
 void svga_init_query_functions( struct svga_context *svga );
 void svga_init_surface_functions(struct svga_context *svga);
+void svga_init_stream_output_functions( struct svga_context *svga );
 
 void svga_cleanup_vertex_state( struct svga_context *svga );
 void svga_cleanup_tss_binding( struct svga_context *svga );
@@ -470,6 +611,8 @@ void svga_cleanup_framebuffer( struct svga_context *svga );
 void svga_context_flush( struct svga_context *svga,
                          struct pipe_fence_handle **pfence );
 
+void svga_context_finish(struct svga_context *svga);
+
 void svga_hwtnl_flush_retry( struct svga_context *svga );
 void svga_hwtnl_flush_buffer( struct svga_context *svga,
                               struct pipe_resource *buffer );
@@ -478,7 +621,7 @@ void svga_surfaces_flush(struct svga_context *svga);
 
 struct pipe_context *
 svga_context_create(struct pipe_screen *screen,
-		    void *priv);
+		    void *priv, unsigned flags);
 
 
 /***********************************************************************
@@ -504,5 +647,22 @@ svga_have_gb_dma(const struct svga_context *svga)
    return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
 }
 
+static inline boolean
+svga_have_vgpu10(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->have_vgpu10;
+}
+
+static inline boolean
+svga_need_to_rebind_resources(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources;
+}
+
+static inline boolean
+svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2)
+{
+   return memcmp(r1, r2, sizeof(*r1)) == 0;
+}
 
 #endif
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_debug.h b/lib/mesa/src/gallium/drivers/svga/svga_debug.h
index 82c9b602d..039f79d4e 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_debug.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_debug.h
@@ -29,21 +29,22 @@
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
 
-#define DEBUG_DMA      0x1
-#define DEBUG_TGSI     0x4
-#define DEBUG_PIPE     0x8
-#define DEBUG_STATE    0x10
-#define DEBUG_SCREEN   0x20
-#define DEBUG_TEX      0x40
-#define DEBUG_SWTNL    0x80
-#define DEBUG_CONSTS   0x100
-#define DEBUG_VIEWPORT 0x200
-#define DEBUG_VIEWS    0x400
-#define DEBUG_PERF     0x800    /* print something when we hit any slow path operation */
-#define DEBUG_FLUSH    0x1000   /* flush after every draw */
-#define DEBUG_SYNC     0x2000   /* sync after every flush */
-#define DEBUG_QUERY    0x4000
-#define DEBUG_CACHE    0x8000
+#define DEBUG_DMA          0x1
+#define DEBUG_TGSI         0x4
+#define DEBUG_PIPE         0x8
+#define DEBUG_STATE        0x10
+#define DEBUG_SCREEN       0x20
+#define DEBUG_TEX          0x40
+#define DEBUG_SWTNL        0x80
+#define DEBUG_CONSTS       0x100
+#define DEBUG_VIEWPORT     0x200
+#define DEBUG_VIEWS        0x400
+#define DEBUG_PERF         0x800    /* print something when we hit any slow path operation */
+#define DEBUG_FLUSH        0x1000   /* flush after every draw */
+#define DEBUG_SYNC         0x2000   /* sync after every flush */
+#define DEBUG_QUERY        0x4000
+#define DEBUG_CACHE        0x8000
+#define DEBUG_STREAMOUT    0x10000
 
 #ifdef DEBUG
 extern int SVGA_DEBUG;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_draw.c
index 366a2dccd..80526ed4d 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw.c
@@ -26,17 +26,19 @@
 #include "pipe/p_compiler.h"
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_helpers.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
-#include "util/u_upload_mgr.h"
 
 #include "svga_context.h"
 #include "svga_draw.h"
 #include "svga_draw_private.h"
 #include "svga_debug.h"
 #include "svga_screen.h"
+#include "svga_resource.h"
 #include "svga_resource_buffer.h"
 #include "svga_resource_texture.h"
+#include "svga_shader.h"
 #include "svga_surface.h"
 #include "svga_winsys.h"
 #include "svga_cmd.h"
@@ -46,7 +48,7 @@ struct svga_hwtnl *
 svga_hwtnl_create(struct svga_context *svga)
 {
    struct svga_hwtnl *hwtnl = CALLOC_STRUCT(svga_hwtnl);
-   if (hwtnl == NULL)
+   if (!hwtnl)
       goto fail;
 
    hwtnl->svga = svga;
@@ -71,8 +73,8 @@ svga_hwtnl_destroy(struct svga_hwtnl *hwtnl)
       }
    }
 
-   for (i = 0; i < hwtnl->cmd.vdecl_count; i++)
-      pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
+   for (i = 0; i < hwtnl->cmd.vbuf_count; i++)
+      pipe_resource_reference(&hwtnl->cmd.vbufs[i].buffer, NULL);
 
    for (i = 0; i < hwtnl->cmd.prim_count; i++)
       pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
@@ -85,45 +87,55 @@ void
 svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
                          boolean flatshade, boolean flatshade_first)
 {
-   hwtnl->hw_pv = PV_FIRST;
+   struct svga_screen *svgascreen = svga_screen(hwtnl->svga->pipe.screen);
+
+   /* User-specified PV */
    hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
+
+   /* Device supported PV */
+   if (svgascreen->haveProvokingVertex) {
+      /* use the mode specified by the user */
+      hwtnl->hw_pv = hwtnl->api_pv;
+   }
+   else {
+      /* the device only support first provoking vertex */
+      hwtnl->hw_pv = PV_FIRST;
+   }
 }
 
 
 void
-svga_hwtnl_set_unfilled(struct svga_hwtnl *hwtnl, unsigned mode)
+svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode)
 {
    hwtnl->api_fillmode = mode;
 }
 
 
 void
-svga_hwtnl_reset_vdecl(struct svga_hwtnl *hwtnl, unsigned count)
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+                        unsigned count,
+                        const SVGA3dVertexDecl * decls,
+                        const unsigned *buffer_indexes,
+                        SVGA3dElementLayoutId layout_id)
 {
-   unsigned i;
-
    assert(hwtnl->cmd.prim_count == 0);
-
-   for (i = count; i < hwtnl->cmd.vdecl_count; i++) {
-      pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
-   }
-
    hwtnl->cmd.vdecl_count = count;
+   hwtnl->cmd.vdecl_layout_id = layout_id;
+   memcpy(hwtnl->cmd.vdecl, decls, count * sizeof(*decls));
+   memcpy(hwtnl->cmd.vdecl_buffer_index, buffer_indexes,
+          count * sizeof(unsigned));
 }
 
 
+/**
+ * Specify vertex buffers for hardware drawing.
+ */
 void
-svga_hwtnl_vdecl(struct svga_hwtnl *hwtnl,
-                 unsigned i,
-                 const SVGA3dVertexDecl * decl, struct pipe_resource *vb)
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+                          unsigned count, struct pipe_vertex_buffer *buffers)
 {
-   assert(hwtnl->cmd.prim_count == 0);
-
-   assert(i < hwtnl->cmd.vdecl_count);
-
-   hwtnl->cmd.vdecl[i] = *decl;
-
-   pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], vb);
+   util_set_vertex_buffers_count(hwtnl->cmd.vbufs,
+                                 &hwtnl->cmd.vbuf_count, buffers, 0, count);
 }
 
 
@@ -145,8 +157,8 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
       return FALSE;
    }
 
-   for (i = 0; i < hwtnl->cmd.vdecl_count; ++i) {
-      if (hwtnl->cmd.vdecl_vb[i] == buffer) {
+   for (i = 0; i < hwtnl->cmd.vbuf_count; ++i) {
+      if (hwtnl->cmd.vbufs[i].buffer == buffer) {
          return TRUE;
       }
    }
@@ -161,116 +173,463 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
 }
 
 
-enum pipe_error
-svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+static enum pipe_error
+draw_vgpu9(struct svga_hwtnl *hwtnl)
 {
    struct svga_winsys_context *swc = hwtnl->cmd.swc;
    struct svga_context *svga = hwtnl->svga;
    enum pipe_error ret;
+   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+   struct svga_winsys_surface *ib_handle[QSZ];
+   struct svga_winsys_surface *handle;
+   SVGA3dVertexDecl *vdecl;
+   SVGA3dPrimitiveRange *prim;
+   unsigned i;
 
-   if (hwtnl->cmd.prim_count) {
-      struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
-      struct svga_winsys_surface *ib_handle[QSZ];
-      struct svga_winsys_surface *handle;
-      SVGA3dVertexDecl *vdecl;
-      SVGA3dPrimitiveRange *prim;
-      unsigned i;
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+      handle = svga_buffer_handle(svga, hwtnl->cmd.vbufs[j].buffer);
+      if (!handle)
+         return PIPE_ERROR_OUT_OF_MEMORY;
 
-      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-         assert(!svga_buffer_is_user_buffer(hwtnl->cmd.vdecl_vb[i]));
-         handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
-         if (handle == NULL)
+      vb_handle[i] = handle;
+   }
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+      if (hwtnl->cmd.prim_ib[i]) {
+         handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
+         if (!handle)
             return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+      else
+         handle = NULL;
+
+      ib_handle[i] = handle;
+   }
+
+   if (svga->rebind.flags.rendertargets) {
+      ret = svga_reemit_framebuffer_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.texture_samplers) {
+      ret = svga_reemit_tss_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.vs) {
+      ret = svga_reemit_vs_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.fs) {
+      ret = svga_reemit_fs_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
 
-         vb_handle[i] = handle;
+   SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
+            svga->curr.framebuffer.cbufs[0] ?
+            svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
+            hwtnl->cmd.prim_count);
+
+   ret = SVGA3D_BeginDrawPrimitives(swc,
+                                    &vdecl,
+                                    hwtnl->cmd.vdecl_count,
+                                    &prim, hwtnl->cmd.prim_count);
+   if (ret != PIPE_OK)
+      return ret;
+
+   memcpy(vdecl,
+          hwtnl->cmd.vdecl,
+          hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+      /* check for 4-byte alignment */
+      assert(vdecl[i].array.offset % 4 == 0);
+      assert(vdecl[i].array.stride % 4 == 0);
+
+      /* Given rangeHint is considered to be relative to indexBias, and
+       * indexBias varies per primitive, we cannot accurately supply an
+       * rangeHint when emitting more than one primitive per draw command.
+       */
+      if (hwtnl->cmd.prim_count == 1) {
+         vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
+         vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
       }
+      else {
+         vdecl[i].rangeHint.first = 0;
+         vdecl[i].rangeHint.last = 0;
+      }
+
+      swc->surface_relocation(swc,
+                              &vdecl[i].array.surfaceId,
+                              NULL, vb_handle[i], SVGA_RELOC_READ);
+   }
+
+   memcpy(prim,
+          hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+      swc->surface_relocation(swc,
+                              &prim[i].indexArray.surfaceId,
+                              NULL, ib_handle[i], SVGA_RELOC_READ);
+      pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+   }
+
+   SVGA_FIFOCommitAll(swc);
+
+   hwtnl->cmd.prim_count = 0;
+
+   return PIPE_OK;
+}
+
+
+static SVGA3dSurfaceFormat
+xlate_index_format(unsigned indexWidth)
+{
+   if (indexWidth == 2) {
+      return SVGA3D_R16_UINT;
+   }
+   else if (indexWidth == 4) {
+      return SVGA3D_R32_UINT;
+   }
+   else {
+      assert(!"Bad indexWidth");
+      return SVGA3D_R32_UINT;
+   }
+}
+
 
-      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         if (hwtnl->cmd.prim_ib[i]) {
-            assert(!svga_buffer_is_user_buffer(hwtnl->cmd.prim_ib[i]));
-            handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
-            if (handle == NULL)
-               return PIPE_ERROR_OUT_OF_MEMORY;
+static enum pipe_error
+validate_sampler_resources(struct svga_context *svga)
+{
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      unsigned count = svga->curr.num_sampler_views[shader];
+      unsigned i;
+      struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+      enum pipe_error ret;
+
+      /*
+       * Reference bound sampler resources to ensure pending updates are
+       * noticed by the device.
+       */
+      for (i = 0; i < count; i++) {
+         struct svga_pipe_sampler_view *sv =
+            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+         if (sv) {
+            if (sv->base.texture->target == PIPE_BUFFER) {
+               surfaces[i] = svga_buffer_handle(svga, sv->base.texture);
+            }
+            else {
+               surfaces[i] = svga_texture(sv->base.texture)->handle;
+            }
          }
          else {
-            handle = NULL;
+            surfaces[i] = NULL;
          }
+      }
+
+      if (shader == PIPE_SHADER_FRAGMENT &&
+          svga->curr.rast->templ.poly_stipple_enable) {
+         const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+         struct svga_pipe_sampler_view *sv =
+            svga->polygon_stipple.sampler_view;
 
-         ib_handle[i] = handle;
+         assert(sv);
+         surfaces[unit] = svga_texture(sv->base.texture)->handle;
+         count = MAX2(count, unit+1);
       }
 
-      if (svga->rebind.rendertargets) {
-         ret = svga_reemit_framebuffer_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+      /* rebind the shader resources if needed */
+      if (svga->rebind.flags.texture_samplers) {
+         for (i = 0; i < count; i++) {
+            if (surfaces[i]) {
+               ret = svga->swc->resource_rebind(svga->swc,
+                                                surfaces[i],
+                                                NULL,
+                                                SVGA_RELOC_READ);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
          }
       }
+   }
+   svga->rebind.flags.texture_samplers = FALSE;
 
-      if (svga->rebind.texture_samplers) {
-         ret = svga_reemit_tss_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+validate_constant_buffers(struct svga_context *svga)
+{
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      enum pipe_error ret;
+      struct svga_buffer *buffer;
+      struct svga_winsys_surface *handle;
+      unsigned enabled_constbufs;
+
+      /* Rebind the default constant buffer if needed */
+      if (svga->rebind.flags.constbufs) {
+         buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]);
+         if (buffer) {
+            ret = svga->swc->resource_rebind(svga->swc,
+                                             buffer->handle,
+                                             NULL,
+                                             SVGA_RELOC_READ);
+            if (ret != PIPE_OK)
+               return ret;
          }
       }
 
-      if (svga->rebind.vs) {
-         ret = svga_reemit_vs_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+      /*
+       * Reference other bound constant buffers to ensure pending updates are
+       * noticed by the device.
+       */
+      enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u;
+      while (enabled_constbufs) {
+         unsigned i = u_bit_scan(&enabled_constbufs);
+         buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer);
+         if (buffer) {
+            handle = svga_buffer_handle(svga, &buffer->b.b);
+
+            if (svga->rebind.flags.constbufs) {
+               ret = svga->swc->resource_rebind(svga->swc,
+                                                handle,
+                                                NULL,
+                                                SVGA_RELOC_READ);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
          }
       }
+   }
+   svga->rebind.flags.constbufs = FALSE;
 
-      if (svga->rebind.fs) {
-         ret = svga_reemit_fs_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
-         }
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+draw_vgpu10(struct svga_hwtnl *hwtnl,
+            const SVGA3dPrimitiveRange *range,
+            unsigned vcount,
+            unsigned min_index,
+            unsigned max_index, struct pipe_resource *ib,
+            unsigned start_instance, unsigned instance_count)
+{
+   struct svga_context *svga = hwtnl->svga;
+   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+   struct svga_winsys_surface *ib_handle;
+   const unsigned vbuf_count = hwtnl->cmd.vbuf_count;
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(svga_have_vgpu10(svga));
+   assert(hwtnl->cmd.prim_count == 0);
+
+   /* We need to reemit all the current resource bindings along with the Draw
+    * command to be sure that the referenced resources are available for the
+    * Draw command, just in case the surfaces associated with the resources
+    * are paged out.
+    */
+   if (svga->rebind.val) {
+      ret = svga_rebind_framebuffer_bindings(svga);
+      if (ret != PIPE_OK)
+         return ret;
+
+      ret = svga_rebind_shaders(svga);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   ret = validate_sampler_resources(svga);
+   if (ret != PIPE_OK)
+      return ret;
+
+   ret = validate_constant_buffers(svga);
+   if (ret != PIPE_OK)
+      return ret;
+
+   /* Get handle for each referenced vertex buffer */
+   for (i = 0; i < vbuf_count; i++) {
+      struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer);
+
+      if (sbuf) {
+         assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER);
+         vb_handle[i] = svga_buffer_handle(svga, &sbuf->b.b);
+         if (vb_handle[i] == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
       }
+      else {
+         vb_handle[i] = NULL;
+      }
+   }
+
+   /* Get handles for the index buffers */
+   if (ib) {
+      struct svga_buffer *sbuf = svga_buffer(ib);
+
+      assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER);
+      (void) sbuf; /* silence unused var warning */
 
-      SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
-               svga->curr.framebuffer.cbufs[0] ?
-               svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
-               hwtnl->cmd.prim_count);
+      ib_handle = svga_buffer_handle(svga, ib);
+      if (!ib_handle)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+   else {
+      ib_handle = NULL;
+   }
 
-      ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count,
-                                       &prim, hwtnl->cmd.prim_count);
+   /* setup vertex attribute input layout */
+   if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) {
+      ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+                                         hwtnl->cmd.vdecl_layout_id);
       if (ret != PIPE_OK)
          return ret;
 
-      memcpy(vdecl, hwtnl->cmd.vdecl,
-             hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+      svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id;
+   }
 
-      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-         /* Given rangeHint is considered to be relative to indexBias, and 
-          * indexBias varies per primitive, we cannot accurately supply an 
-          * rangeHint when emitting more than one primitive per draw command.
+   /* setup vertex buffers */
+   {
+      SVGA3dVertexBuffer buffers[PIPE_MAX_ATTRIBS];
+
+      for (i = 0; i < vbuf_count; i++) {
+         buffers[i].stride = hwtnl->cmd.vbufs[i].stride;
+         buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
+      }
+      if (vbuf_count > 0) {
+         /* If we haven't yet emitted a drawing command or if any
+          * vertex buffer state is changing, issue that state now.
           */
-         if (hwtnl->cmd.prim_count == 1) {
-            vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
-            vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
+         if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) == 0) ||
+             vbuf_count != svga->state.hw_draw.num_vbuffers ||
+             memcmp(buffers, svga->state.hw_draw.vbuffers,
+                    vbuf_count * sizeof(buffers[0])) ||
+             memcmp(vb_handle, svga->state.hw_draw.vbuffer_handles,
+                    vbuf_count * sizeof(vb_handle[0]))) {
+            ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
+                                                 0,    /* startBuffer */
+                                                 buffers, vb_handle);
+            if (ret != PIPE_OK)
+               return ret;
+
+            svga->state.hw_draw.num_vbuffers = vbuf_count;
+            memcpy(svga->state.hw_draw.vbuffers, buffers,
+                   vbuf_count * sizeof(buffers[0]));
+            memcpy(svga->state.hw_draw.vbuffer_handles, vb_handle,
+                   vbuf_count * sizeof(vb_handle[0]));
          }
-         else {
-            vdecl[i].rangeHint.first = 0;
-            vdecl[i].rangeHint.last = 0;
-         }
-
-         swc->surface_relocation(swc, &vdecl[i].array.surfaceId, NULL,
-                                 vb_handle[i], SVGA_RELOC_READ);
       }
+   }
 
-      memcpy(prim, hwtnl->cmd.prim,
-             hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+   /* Set primitive type (line, tri, etc) */
+   if (svga->state.hw_draw.topology != range->primType) {
+      ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType);
+      if (ret != PIPE_OK)
+         return ret;
 
-      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, NULL,
-                                 ib_handle[i], SVGA_RELOC_READ);
-         pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+      svga->state.hw_draw.topology = range->primType;
+   }
+
+   if (ib_handle) {
+      /* indexed drawing */
+      SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth);
+
+      /* setup index buffer */
+      if (ib_handle != svga->state.hw_draw.ib ||
+          indexFormat != svga->state.hw_draw.ib_format ||
+          range->indexArray.offset != svga->state.hw_draw.ib_offset) {
+         ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
+                                            indexFormat,
+                                            range->indexArray.offset);
+         if (ret != PIPE_OK)
+            return ret;
+         svga->state.hw_draw.ib = ib_handle;
+         svga->state.hw_draw.ib_format = indexFormat;
+         svga->state.hw_draw.ib_offset = range->indexArray.offset;
       }
 
-      SVGA_FIFOCommitAll(swc);
-      hwtnl->cmd.prim_count = 0;
+      if (instance_count > 1) {
+         ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
+                                                  vcount,
+                                                  instance_count,
+                                                  0, /* startIndexLocation */
+                                                  range->indexBias,
+                                                  start_instance);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         /* non-instanced drawing */
+         ret = SVGA3D_vgpu10_DrawIndexed(svga->swc,
+                                         vcount,
+                                         0,      /* startIndexLocation */
+                                         range->indexBias);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+   else {
+      /* non-indexed drawing */
+      if (instance_count > 1) {
+         ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
+                                           vcount,
+                                           instance_count,
+                                           range->indexBias,
+                                           start_instance);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         /* non-instanced */
+         ret = SVGA3D_vgpu10_Draw(svga->swc,
+                                  vcount,
+                                  range->indexBias);
+         if (ret != PIPE_OK)
+            return ret;
+      }
    }
 
+   hwtnl->cmd.prim_count = 0;
+
+   return PIPE_OK;
+}
+
+
+
+/**
+ * Emit any pending drawing commands to the command buffer.
+ * When we receive VGPU9 drawing commands we accumulate them and don't
+ * immediately emit them into the command buffer.
+ * This function needs to be called before we change state that could
+ * effect those pending draws.
+ */
+enum pipe_error
+svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+{
+   if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) {
+      /* we only queue up primitive for VGPU9 */
+      return draw_vgpu9(hwtnl);
+   }
    return PIPE_OK;
 }
 
@@ -298,18 +657,28 @@ check_draw_params(struct svga_hwtnl *hwtnl,
 {
    unsigned i;
 
+   assert(!svga_have_vgpu10(hwtnl->svga));
+
    for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-      struct pipe_resource *vb = hwtnl->cmd.vdecl_vb[i];
-      unsigned size = vb ? vb->width0 : 0;
+      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+      const struct pipe_vertex_buffer *vb = &hwtnl->cmd.vbufs[j];
+      unsigned size = vb->buffer ? vb->buffer->width0 : 0;
       unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
       unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
       int index_bias = (int) range->indexBias + hwtnl->index_bias;
       unsigned width;
 
+      if (size == 0)
+         continue;
+
       assert(vb);
       assert(size);
       assert(offset < size);
       assert(min_index <= max_index);
+      (void) width;
+      (void) stride;
+      (void) offset;
+      (void) size;
 
       switch (hwtnl->cmd.vdecl[i].identity.type) {
       case SVGA3D_DECLTYPE_FLOAT1:
@@ -390,6 +759,9 @@ check_draw_params(struct svga_hwtnl *hwtnl,
       assert(size);
       assert(offset < size);
       assert(stride);
+      (void) size;
+      (void) offset;
+      (void) stride;
 
       switch (range->primType) {
       case SVGA3D_PRIMITIVE_POINTLIST:
@@ -421,33 +793,57 @@ check_draw_params(struct svga_hwtnl *hwtnl,
 }
 
 
+/**
+ * All drawing filters down into this function, either directly
+ * on the hardware path or after doing software vertex processing.
+ */
 enum pipe_error
 svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
                 const SVGA3dPrimitiveRange * range,
+                unsigned vcount,
                 unsigned min_index,
-                unsigned max_index, struct pipe_resource *ib)
+                unsigned max_index, struct pipe_resource *ib,
+                unsigned start_instance, unsigned instance_count)
 {
    enum pipe_error ret = PIPE_OK;
 
+   if (svga_have_vgpu10(hwtnl->svga)) {
+      /* draw immediately */
+      ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+                        start_instance, instance_count);
+      if (ret != PIPE_OK) {
+         svga_context_flush(hwtnl->svga, NULL);
+         ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+                           start_instance, instance_count);
+         assert(ret == PIPE_OK);
+      }
+   }
+   else {
+      /* batch up drawing commands */
 #ifdef DEBUG
-   check_draw_params(hwtnl, range, min_index, max_index, ib);
+      check_draw_params(hwtnl, range, min_index, max_index, ib);
+      assert(start_instance == 0);
+      assert(instance_count <= 1);
+#else
+      (void) check_draw_params;
 #endif
 
-   if (hwtnl->cmd.prim_count + 1 >= QSZ) {
-      ret = svga_hwtnl_flush(hwtnl);
-      if (ret != PIPE_OK)
-         return ret;
-   }
+      if (hwtnl->cmd.prim_count + 1 >= QSZ) {
+         ret = svga_hwtnl_flush(hwtnl);
+         if (ret != PIPE_OK)
+            return ret;
+      }
 
-   /* min/max indices are relative to bias */
-   hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
-   hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
+      /* min/max indices are relative to bias */
+      hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
+      hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
 
-   hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
-   hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
+      hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+      hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
 
-   pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
-   hwtnl->cmd.prim_count++;
+      pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
+      hwtnl->cmd.prim_count++;
+   }
 
    return ret;
 }
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw.h b/lib/mesa/src/gallium/drivers/svga/svga_draw.h
index 1db79cd91..af8ecabcb 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw.h
@@ -35,54 +35,50 @@ struct svga_winsys_context;
 struct svga_screen;
 struct svga_context;
 struct pipe_resource;
+struct u_upload_mgr;
 
-struct svga_hwtnl *
-svga_hwtnl_create(struct svga_context *svga);
+struct svga_hwtnl *svga_hwtnl_create(struct svga_context *svga);
 
-void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl );
+void svga_hwtnl_destroy(struct svga_hwtnl *hwtnl);
 
-void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
-                               boolean flatshade,
-                               boolean flatshade_first );
+void svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
+                              boolean flatshade, boolean flatshade_first);
 
-void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
-                              unsigned mode );
+void svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode);
 
-void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
-                       unsigned i,
-                       const SVGA3dVertexDecl *decl,
-                       struct pipe_resource *vb);
+void
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+                        unsigned count,
+                        const SVGA3dVertexDecl * decls,
+                        const unsigned *buffer_indexes,
+                        SVGA3dElementLayoutId layoutId);
 
-void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
-                             unsigned count );
+void
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+                          unsigned count, struct pipe_vertex_buffer *buffers);
 
-
-enum pipe_error 
-svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
-                        unsigned prim, 
-                        unsigned start, 
-                        unsigned count);
+enum pipe_error
+svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
+                       unsigned prim, unsigned start, unsigned count,
+                       unsigned start_instance, unsigned instance_count);
 
 enum pipe_error
-svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
-                                struct pipe_resource *indexBuffer,
-                                unsigned index_size,
-                                int index_bias,
-                                unsigned min_index,
-                                unsigned max_index,
-                                unsigned prim, 
-                                unsigned start, 
-                                unsigned count );
+svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
+                               struct pipe_resource *indexBuffer,
+                               unsigned index_size,
+                               int index_bias,
+                               unsigned min_index,
+                               unsigned max_index,
+                               unsigned prim, unsigned start, unsigned count,
+                               unsigned start_instance, unsigned instance_count);
 
 boolean
-svga_hwtnl_is_buffer_referred( struct svga_hwtnl *hwtnl,
-                               struct pipe_resource *buffer );
+svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
+                              struct pipe_resource *buffer);
 
-enum pipe_error
-svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+enum pipe_error svga_hwtnl_flush(struct svga_hwtnl *hwtnl);
 
-void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
-                                int index_bias);
+void svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias);
 
 
 #endif /* SVGA_DRAW_H_ */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c b/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c
index d4d77200f..7d8293067 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -26,12 +26,14 @@
 #include "svga_cmd.h"
 
 #include "util/u_inlines.h"
+#include "util/u_prim.h"
 #include "indices/u_indices.h"
 
 #include "svga_hw_reg.h"
 #include "svga_draw.h"
 #include "svga_draw_private.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 
 
 #define DBG 0
@@ -49,13 +51,13 @@ generate_indices(struct svga_hwtnl *hwtnl,
    struct pipe_resource *dst = NULL;
    void *dst_map = NULL;
 
-   dst = pipe_buffer_create(pipe->screen,
-                            PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size);
-   if (dst == NULL)
+   dst = pipe_buffer_create(pipe->screen, PIPE_BIND_INDEX_BUFFER,
+                            PIPE_USAGE_IMMUTABLE, size);
+   if (!dst)
       goto fail;
 
    dst_map = pipe_buffer_map(pipe, dst, PIPE_TRANSFER_WRITE, &transfer);
-   if (dst_map == NULL)
+   if (!dst_map)
       goto fail;
 
    generate(0, nr, dst_map);
@@ -168,7 +170,8 @@ retrieve_or_generate_indices(struct svga_hwtnl *hwtnl,
 
 static enum pipe_error
 simple_draw_arrays(struct svga_hwtnl *hwtnl,
-                   unsigned prim, unsigned start, unsigned count)
+                   unsigned prim, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count)
 {
    SVGA3dPrimitiveRange range;
    unsigned hw_prim;
@@ -191,17 +194,47 @@ simple_draw_arrays(struct svga_hwtnl *hwtnl,
     * looking at those numbers knows to adjust them by
     * range.indexBias.
     */
-   return svga_hwtnl_prim(hwtnl, &range, 0, count - 1, NULL);
+   return svga_hwtnl_prim(hwtnl, &range, count,
+                          0, count - 1, NULL,
+                          start_instance, instance_count);
 }
 
 
 enum pipe_error
 svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
-                       unsigned prim, unsigned start, unsigned count)
+                       unsigned prim, unsigned start, unsigned count,
+                       unsigned start_instance, unsigned instance_count)
 {
-   unsigned gen_prim, gen_size, gen_nr, gen_type;
+   unsigned gen_prim, gen_size, gen_nr;
+   enum indices_mode gen_type;
    u_generate_func gen_func;
    enum pipe_error ret = PIPE_OK;
+   unsigned api_pv = hwtnl->api_pv;
+   struct svga_context *svga = hwtnl->svga;
+
+   if (svga->curr.rast->templ.flatshade &&
+       svga->state.hw_draw.fs->constant_color_output) {
+      /* The fragment color is a constant, not per-vertex so the whole
+       * primitive will be the same color (except for possible blending).
+       * We can ignore the current provoking vertex state and use whatever
+       * the hardware wants.
+       */
+      api_pv = hwtnl->hw_pv;
+
+      if (hwtnl->api_fillmode == PIPE_POLYGON_MODE_FILL) {
+         /* Do some simple primitive conversions to avoid index buffer
+          * generation below.  Note that polygons and quads are not directly
+          * supported by the svga device.  Also note, we can only do this
+          * for flat/constant-colored rendering because of provoking vertex.
+          */
+         if (prim == PIPE_PRIM_POLYGON) {
+            prim = PIPE_PRIM_TRIANGLE_FAN;
+         }
+         else if (prim == PIPE_PRIM_QUADS && count == 4) {
+            prim = PIPE_PRIM_TRIANGLE_FAN;
+         }
+      }
+   }
 
    if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL &&
        prim >= PIPE_PRIM_TRIANGLES) {
@@ -222,13 +255,14 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
                                    prim,
                                    start,
                                    count,
-                                   hwtnl->api_pv,
+                                   api_pv,
                                    hwtnl->hw_pv,
                                    &gen_prim, &gen_size, &gen_nr, &gen_func);
    }
 
    if (gen_type == U_GENERATE_LINEAR) {
-      return simple_draw_arrays(hwtnl, gen_prim, start, count);
+      return simple_draw_arrays(hwtnl, gen_prim, start, count,
+                                start_instance, instance_count);
    }
    else {
       struct pipe_resource *gen_buf = NULL;
@@ -244,14 +278,19 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
       if (ret != PIPE_OK)
          goto done;
 
+      pipe_debug_message(&svga->debug.callback, PERF_INFO,
+                         "generating temporary index buffer for drawing %s",
+                         u_prim_name(prim));
+
       ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
                                                   gen_buf,
                                                   gen_size,
                                                   start,
                                                   0,
                                                   count - 1,
-                                                  gen_prim, 0, gen_nr);
-
+                                                  gen_prim, 0, gen_nr,
+                                                  start_instance,
+                                                  instance_count);
       if (ret != PIPE_OK)
          goto done;
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c b/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c
index 038500a35..74bfebda1 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c
@@ -25,6 +25,7 @@
 
 #include "util/u_inlines.h"
 #include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_indices.h"
 
 #include "svga_cmd.h"
@@ -45,7 +46,7 @@ translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src,
    struct pipe_context *pipe = &hwtnl->svga->pipe;
    struct pipe_transfer *src_transfer = NULL;
    struct pipe_transfer *dst_transfer = NULL;
-   unsigned size;
+   unsigned size = index_size * nr;
    const void *src_map = NULL;
    struct pipe_resource *dst = NULL;
    void *dst_map = NULL;
@@ -59,15 +60,15 @@ translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src,
 
    dst = pipe_buffer_create(pipe->screen,
                             PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size);
-   if (dst == NULL)
+   if (!dst)
       goto fail;
 
    src_map = pipe_buffer_map(pipe, src, PIPE_TRANSFER_READ, &src_transfer);
-   if (src_map == NULL)
+   if (!src_map)
       goto fail;
 
    dst_map = pipe_buffer_map(pipe, dst, PIPE_TRANSFER_WRITE, &dst_transfer);
-   if (dst_map == NULL)
+   if (!dst_map)
       goto fail;
 
    translate((const char *) src_map + offset, 0, 0, nr, 0, dst_map);
@@ -98,7 +99,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
                                       unsigned index_size, int index_bias,
                                       unsigned min_index, unsigned max_index,
                                       unsigned prim, unsigned start,
-                                      unsigned count)
+                                      unsigned count,
+                                      unsigned start_instance,
+                                      unsigned instance_count)
 {
    SVGA3dPrimitiveRange range;
    unsigned hw_prim;
@@ -109,12 +112,6 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
    if (hw_count == 0)
       return PIPE_OK; /* nothing to draw */
 
-   /* We should never see user-space buffers in the driver.  The vbuf
-    * module should have converted them into real buffers.
-    */
-   if (index_buffer)
-      assert(!svga_buffer_is_user_buffer(index_buffer));
-
    range.primType = hw_prim;
    range.primitiveCount = hw_count;
    range.indexArray.offset = index_offset;
@@ -122,7 +119,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
    range.indexWidth = index_size;
    range.indexBias = index_bias;
 
-   return svga_hwtnl_prim(hwtnl, &range, min_index, max_index, index_buffer);
+   return svga_hwtnl_prim(hwtnl, &range, count,
+                          min_index, max_index, index_buffer,
+                          start_instance, instance_count);
 }
 
 
@@ -131,9 +130,11 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                struct pipe_resource *index_buffer,
                                unsigned index_size, int index_bias,
                                unsigned min_index, unsigned max_index,
-                               unsigned prim, unsigned start, unsigned count)
+                               unsigned prim, unsigned start, unsigned count,
+                               unsigned start_instance, unsigned instance_count)
 {
-   unsigned gen_prim, gen_size, gen_nr, gen_type;
+   unsigned gen_prim, gen_size, gen_nr;
+   enum indices_mode gen_type;
    u_translate_func gen_func;
    enum pipe_error ret = PIPE_OK;
 
@@ -165,7 +166,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                                    index_bias,
                                                    min_index,
                                                    max_index,
-                                                   gen_prim, start, count);
+                                                   gen_prim, start, count,
+                                                   start_instance,
+                                                   instance_count);
    }
    else {
       struct pipe_resource *gen_buf = NULL;
@@ -190,7 +193,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                                   index_bias,
                                                   min_index,
                                                   max_index,
-                                                  gen_prim, 0, gen_nr);
+                                                  gen_prim, 0, gen_nr,
+                                                  start_instance,
+                                                  instance_count);
       if (ret != PIPE_OK)
          goto done;
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h b/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h
index 9ab87e825..c8217422c 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h
@@ -46,7 +46,11 @@ static const unsigned svga_hw_prims =
     (1 << PIPE_PRIM_LINE_STRIP) |
     (1 << PIPE_PRIM_TRIANGLES) |
     (1 << PIPE_PRIM_TRIANGLE_STRIP) |
-    (1 << PIPE_PRIM_TRIANGLE_FAN));
+    (1 << PIPE_PRIM_TRIANGLE_FAN) |
+    (1 << PIPE_PRIM_LINES_ADJACENCY) |
+    (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) |
+    (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |
+    (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY));
 
 
 /**
@@ -57,8 +61,8 @@ static const unsigned svga_hw_prims =
  * PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON.  We convert
  * those to other types of primitives with index/translation code.
  */
-static inline unsigned
-svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
+static inline SVGA3dPrimitiveType
+svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count)
 {
    switch (mode) {
    case PIPE_PRIM_POINTS:
@@ -85,6 +89,22 @@ svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
       *prim_count = vcount - 2;
       return SVGA3D_PRIMITIVE_TRIANGLEFAN; 
 
+   case PIPE_PRIM_LINES_ADJACENCY:
+      *prim_count = vcount / 4;
+      return SVGA3D_PRIMITIVE_LINELIST_ADJ;
+
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      *prim_count = vcount - 3;
+      return SVGA3D_PRIMITIVE_LINESTRIP_ADJ;
+
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      *prim_count = vcount / 6;
+      return SVGA3D_PRIMITIVE_TRIANGLELIST_ADJ;
+
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      *prim_count = vcount / 2 - 2 ;
+      return SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ;
+
    default:
       assert(0);
       *prim_count = 0;
@@ -110,13 +130,19 @@ struct index_cache {
 struct draw_cmd {
    struct svga_winsys_context *swc;
 
+   /* vertex layout info */
    SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX];
-   struct pipe_resource *vdecl_vb[SVGA3D_INPUTREG_MAX];
    unsigned vdecl_count;
+   SVGA3dElementLayoutId vdecl_layout_id;
+   unsigned vdecl_buffer_index[SVGA3D_INPUTREG_MAX];
+
+   /* vertex buffer info */
+   struct pipe_vertex_buffer vbufs[SVGA3D_INPUTREG_MAX];
+   unsigned vbuf_count;
 
    SVGA3dPrimitiveRange prim[QSZ];
    struct pipe_resource *prim_ib[QSZ];
-   unsigned prim_count;
+   unsigned prim_count;   /**< number of primitives for this draw */
    unsigned min_index[QSZ];
    unsigned max_index[QSZ];
 };
@@ -158,9 +184,11 @@ struct svga_hwtnl {
 enum pipe_error 
 svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
                  const SVGA3dPrimitiveRange *range,
+                 unsigned vcount,
                  unsigned min_index,
                  unsigned max_index,
-                 struct pipe_resource *ib );
+                 struct pipe_resource *ib,
+                 unsigned start_instance, unsigned instance_count);
 
 enum pipe_error
 svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
@@ -171,7 +199,9 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
                                        unsigned max_index,
                                        unsigned prim, 
                                        unsigned start,
-                                       unsigned count );
+                                       unsigned count,
+                                       unsigned start_instance,
+                                       unsigned instance_count);
 
 
 #endif
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_format.c b/lib/mesa/src/gallium/drivers/svga/svga_format.c
index 8c1b161e6..0186736c8 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_format.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_format.c
@@ -26,6 +26,7 @@
 
 #include "pipe/p_format.h"
 #include "util/u_debug.h"
+#include "util/u_format.h"
 #include "util/u_memory.h"
 
 #include "svga_winsys.h"
@@ -33,6 +34,347 @@
 #include "svga_format.h"
 
 
+/** Describes mapping from gallium formats to SVGA vertex/pixel formats */
+struct vgpu10_format_entry
+{
+   enum pipe_format pformat;
+   SVGA3dSurfaceFormat vertex_format;
+   SVGA3dSurfaceFormat pixel_format;
+   unsigned flags;
+};
+
+
+static const struct vgpu10_format_entry format_conversion_table[] =
+{
+   /* Gallium format                    SVGA3D vertex format        SVGA3D pixel format          Flags */
+   { PIPE_FORMAT_NONE,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B8G8R8A8_UNORM,        SVGA3D_B8G8R8A8_UNORM,      SVGA3D_B8G8R8A8_UNORM,       TF_GEN_MIPS },
+   { PIPE_FORMAT_B8G8R8X8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8X8_UNORM,       TF_GEN_MIPS },
+   { PIPE_FORMAT_A8R8G8B8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8R8G8B8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B5G5R5A1_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_B5G5R5A1_UNORM,       TF_GEN_MIPS },
+   { PIPE_FORMAT_B4G4R4A4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B5G6R5_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_B5G6R5_UNORM,         TF_GEN_MIPS },
+   { PIPE_FORMAT_R10G10B10A2_UNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_R10G10B10A2_UNORM,    TF_GEN_MIPS },
+   { PIPE_FORMAT_L8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_A8_UNORM,             TF_GEN_MIPS },
+   { PIPE_FORMAT_I8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_UYVY,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YUYV,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_D16_UNORM,            0 },
+   { PIPE_FORMAT_Z32_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_D32_FLOAT,            0 },
+   { PIPE_FORMAT_Z24_UNORM_S8_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_D24_UNORM_S8_UINT,    0 },
+   { PIPE_FORMAT_S8_UINT_Z24_UNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z24X8_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_D24_UNORM_S8_UINT,    0 },
+   { PIPE_FORMAT_X8Z24_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_S8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64B64_FLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64B64A64_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_FLOAT,             SVGA3D_R32_FLOAT,           SVGA3D_R32_FLOAT,            TF_GEN_MIPS },
+   { PIPE_FORMAT_R32G32_FLOAT,          SVGA3D_R32G32_FLOAT,        SVGA3D_R32G32_FLOAT,         TF_GEN_MIPS },
+   { PIPE_FORMAT_R32G32B32_FLOAT,       SVGA3D_R32G32B32_FLOAT,     SVGA3D_R32G32B32_FLOAT,      TF_GEN_MIPS },
+   { PIPE_FORMAT_R32G32B32A32_FLOAT,    SVGA3D_R32G32B32A32_FLOAT,  SVGA3D_R32G32B32A32_FLOAT,   TF_GEN_MIPS },
+   { PIPE_FORMAT_R32_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_UNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_USCALED,           SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32_USCALED,        SVGA3D_R32G32_UINT,         SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32_USCALED,     SVGA3D_R32G32B32_UINT,      SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32A32_USCALED,  SVGA3D_R32G32B32A32_UINT,   SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_SNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_SNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_SSCALED,           SVGA3D_R32_SINT,            SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32_SSCALED,        SVGA3D_R32G32_SINT,         SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32_SSCALED,     SVGA3D_R32G32B32_SINT,      SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32A32_SSCALED,  SVGA3D_R32G32B32A32_SINT,   SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16_UNORM,             SVGA3D_R16_UNORM,           SVGA3D_R16_UNORM,            TF_GEN_MIPS },
+   { PIPE_FORMAT_R16G16_UNORM,          SVGA3D_R16G16_UNORM,        SVGA3D_R16G16_UNORM,         TF_GEN_MIPS },
+   { PIPE_FORMAT_R16G16B16_UNORM,       SVGA3D_R16G16B16A16_UNORM,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_UNORM,    SVGA3D_R16G16B16A16_UNORM,  SVGA3D_R16G16B16A16_UNORM,   TF_GEN_MIPS },
+   { PIPE_FORMAT_R16_USCALED,           SVGA3D_R16_UINT,            SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16_USCALED,        SVGA3D_R16G16_UINT,         SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16_USCALED,     SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16A16_USCALED,  SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16_SNORM,             SVGA3D_R16_SNORM,           SVGA3D_R16_SNORM,            0 },
+   { PIPE_FORMAT_R16G16_SNORM,          SVGA3D_R16G16_SNORM,        SVGA3D_R16G16_SNORM,         0 },
+   { PIPE_FORMAT_R16G16B16_SNORM,       SVGA3D_R16G16B16A16_SNORM,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_SNORM,    SVGA3D_R16G16B16A16_SNORM,  SVGA3D_R16G16B16A16_SNORM,   0 },
+   { PIPE_FORMAT_R16_SSCALED,           SVGA3D_R16_SINT,            SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16_SSCALED,        SVGA3D_R16G16_SINT,         SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16_SSCALED,     SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16A16_SSCALED,  SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8_UNORM,              SVGA3D_R8_UNORM,            SVGA3D_R8_UNORM,             TF_GEN_MIPS },
+   { PIPE_FORMAT_R8G8_UNORM,            SVGA3D_R8G8_UNORM,          SVGA3D_R8G8_UNORM,           TF_GEN_MIPS },
+   { PIPE_FORMAT_R8G8B8_UNORM,          SVGA3D_R8G8B8A8_UNORM,      SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_UNORM,        SVGA3D_R8G8B8A8_UNORM,      SVGA3D_R8G8B8A8_UNORM,       TF_GEN_MIPS },
+   { PIPE_FORMAT_X8B8G8R8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_USCALED,            SVGA3D_R8_UINT,             SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8_USCALED,          SVGA3D_R8G8_UINT,           SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8_USCALED,        SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8A8_USCALED,      SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { 73,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_SNORM,              SVGA3D_R8_SNORM,            SVGA3D_R8_SNORM,             0 },
+   { PIPE_FORMAT_R8G8_SNORM,            SVGA3D_R8G8_SNORM,          SVGA3D_R8G8_SNORM,           0 },
+   { PIPE_FORMAT_R8G8B8_SNORM,          SVGA3D_R8G8B8A8_SNORM,      SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_SNORM,        SVGA3D_R8G8B8A8_SNORM,      SVGA3D_R8G8B8A8_SNORM,       0 },
+   { 78,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 79,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 80,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 81,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_SSCALED,            SVGA3D_R8_SINT,             SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8_SSCALED,          SVGA3D_R8G8_SINT,           SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8_SSCALED,        SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8A8_SSCALED,      SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { 86,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_FIXED,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_FIXED,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_FIXED,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_FIXED,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16_FLOAT,             SVGA3D_R16_FLOAT,           SVGA3D_R16_FLOAT,            TF_GEN_MIPS },
+   { PIPE_FORMAT_R16G16_FLOAT,          SVGA3D_R16G16_FLOAT,        SVGA3D_R16G16_FLOAT,         TF_GEN_MIPS },
+   { PIPE_FORMAT_R16G16B16_FLOAT,       SVGA3D_R16G16B16A16_FLOAT,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_FLOAT,    SVGA3D_R16G16B16A16_FLOAT,  SVGA3D_R16G16B16A16_FLOAT,   TF_GEN_MIPS },
+   { PIPE_FORMAT_L8_SRGB,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8_SRGB,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8B8G8R8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B8G8R8A8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8A8_UNORM_SRGB,  TF_GEN_MIPS },
+   { PIPE_FORMAT_B8G8R8X8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8X8_UNORM_SRGB,  TF_GEN_MIPS },
+   { PIPE_FORMAT_A8R8G8B8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8R8G8B8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8A8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_R8G8B8A8_UNORM_SRGB,  TF_GEN_MIPS },
+   { PIPE_FORMAT_DXT1_RGB,              SVGA3D_FORMAT_INVALID,      SVGA3D_BC1_UNORM,            0 },
+   { PIPE_FORMAT_DXT1_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_BC1_UNORM,            0 },
+   { PIPE_FORMAT_DXT3_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_BC2_UNORM,            0 },
+   { PIPE_FORMAT_DXT5_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_BC3_UNORM,            0 },
+   { PIPE_FORMAT_DXT1_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_BC1_UNORM_SRGB,       0 },
+   { PIPE_FORMAT_DXT1_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_BC1_UNORM_SRGB,       0 },
+   { PIPE_FORMAT_DXT3_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_BC2_UNORM_SRGB,       0 },
+   { PIPE_FORMAT_DXT5_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_BC3_UNORM_SRGB,       0 },
+   { PIPE_FORMAT_RGTC1_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC4_UNORM,            0 },
+   { PIPE_FORMAT_RGTC1_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC4_SNORM,            0 },
+   { PIPE_FORMAT_RGTC2_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC5_UNORM,            0 },
+   { PIPE_FORMAT_RGTC2_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC5_SNORM,            0 },
+   { PIPE_FORMAT_R8G8_B8G8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_G8B8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8SG8SB8UX8U_NORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R5SG5SB6U_NORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B5G5R5X1_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_USCALED,   SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_USCALED },
+   { PIPE_FORMAT_R11G11B10_FLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_R11G11B10_FLOAT,      TF_GEN_MIPS },
+   { PIPE_FORMAT_R9G9B9E5_FLOAT,        SVGA3D_FORMAT_INVALID,      SVGA3D_R9G9B9E5_SHAREDEXP,   0 },
+   { PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,  SVGA3D_FORMAT_INVALID,      SVGA3D_D32_FLOAT_S8X24_UINT, 0 },
+   { PIPE_FORMAT_R1_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10X2_USCALED,   SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10X2_SNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L4A4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10A2_UNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA },
+   { PIPE_FORMAT_R10SG10SB10SA2U_NORM,  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8Bx_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B4G4R4X4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X24S8_UINT,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_S8X24_UINT,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X32_S8X24_UINT,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B2G3R3_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC1_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC1_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC2_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC2_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YV12,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YV16,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_IYUV,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_NV12,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_NV21,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A4R4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R4A4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8R8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_SSCALED,   SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_SSCALED },
+   { PIPE_FORMAT_R10G10B10A2_SNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_SNORM },
+   { PIPE_FORMAT_B10G10R10A2_USCALED,   SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_USCALED },
+   { PIPE_FORMAT_B10G10R10A2_SSCALED,   SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_SSCALED },
+   { PIPE_FORMAT_B10G10R10A2_SNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_SNORM },
+   { PIPE_FORMAT_R8_UINT,               SVGA3D_R8_UINT,             SVGA3D_R8_UINT,              0 },
+   { PIPE_FORMAT_R8G8_UINT,             SVGA3D_R8G8_UINT,           SVGA3D_R8G8_UINT,            0 },
+   { PIPE_FORMAT_R8G8B8_UINT,           SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_UINT,         SVGA3D_R8G8B8A8_UINT,       SVGA3D_R8G8B8A8_UINT,        0 },
+   { PIPE_FORMAT_R8_SINT,               SVGA3D_R8_SINT,             SVGA3D_R8_SINT,              0 },
+   { PIPE_FORMAT_R8G8_SINT,             SVGA3D_R8G8_SINT,           SVGA3D_R8G8_SINT,            0 },
+   { PIPE_FORMAT_R8G8B8_SINT,           SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_SINT,         SVGA3D_R8G8B8A8_SINT,       SVGA3D_R8G8B8A8_SINT,        0 },
+   { PIPE_FORMAT_R16_UINT,              SVGA3D_R16_UINT,            SVGA3D_R16_UINT,             0 },
+   { PIPE_FORMAT_R16G16_UINT,           SVGA3D_R16G16_UINT,         SVGA3D_R16G16_UINT,          0 },
+   { PIPE_FORMAT_R16G16B16_UINT,        SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_UINT,     SVGA3D_R16G16B16A16_UINT,   SVGA3D_R16G16B16A16_UINT,    0 },
+   { PIPE_FORMAT_R16_SINT,              SVGA3D_R16_SINT,            SVGA3D_R16_SINT,             0 },
+   { PIPE_FORMAT_R16G16_SINT,           SVGA3D_R16G16_SINT,         SVGA3D_R16G16_SINT,          0 },
+   { PIPE_FORMAT_R16G16B16_SINT,        SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_SINT,     SVGA3D_R16G16B16A16_SINT,   SVGA3D_R16G16B16A16_SINT,    0 },
+   { PIPE_FORMAT_R32_UINT,              SVGA3D_R32_UINT,            SVGA3D_R32_UINT,             0 },
+   { PIPE_FORMAT_R32G32_UINT,           SVGA3D_R32G32_UINT,         SVGA3D_R32G32_UINT,          0 },
+   { PIPE_FORMAT_R32G32B32_UINT,        SVGA3D_R32G32B32_UINT,      SVGA3D_R32G32B32_UINT,       0 },
+   { PIPE_FORMAT_R32G32B32A32_UINT,     SVGA3D_R32G32B32A32_UINT,   SVGA3D_R32G32B32A32_UINT,    0 },
+   { PIPE_FORMAT_R32_SINT,              SVGA3D_R32_SINT,            SVGA3D_R32_SINT,             0 },
+   { PIPE_FORMAT_R32G32_SINT,           SVGA3D_R32G32_SINT,         SVGA3D_R32G32_SINT,          0 },
+   { PIPE_FORMAT_R32G32B32_SINT,        SVGA3D_R32G32B32_SINT,      SVGA3D_R32G32B32_SINT,       0 },
+   { PIPE_FORMAT_R32G32B32A32_SINT,     SVGA3D_R32G32B32A32_SINT,   SVGA3D_R32G32B32A32_SINT,    0 },
+   { PIPE_FORMAT_A8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_UINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10A2_UINT,      SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC1_RGB8,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8_R8B8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_B8R8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_UINT,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SINT,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10X2_UNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_UNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_SNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_SINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_SINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_UINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_SINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_UINT,      SVGA3D_R10G10B10A2_UINT,    SVGA3D_R10G10B10A2_UINT,     0 },
+   { PIPE_FORMAT_B5G6R5_SRGB,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGBA_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGB_FLOAT,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGB_UFLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16L16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G16R16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G16R16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8B8G8R8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGB8,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGB8,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGB8A1,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGB8A1,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGBA8,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGBA8,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_R11_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_R11_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RG11_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RG11_SNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_4x4,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_5x4,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_5x5,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_6x5,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_6x6,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x5,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x6,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x8,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x5,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x6,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x8,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x10,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_12x10,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_12x12,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_4x4_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_5x4_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_5x5_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_6x5_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_6x6_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x5_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x6_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x5_SRGB,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x6_SRGB,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x8_SRGB,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x10_SRGB,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_12x10_SRGB,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_12x12_SRGB,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+};
+
+
+/**
+ * Translate a gallium vertex format to a vgpu10 vertex format.
+ * Also, return any special vertex format flags.
+ */
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+                                    SVGA3dSurfaceFormat *svga_format,
+                                    unsigned *vf_flags)
+{
+   assert(format < Elements(format_conversion_table));
+   if (format >= Elements(format_conversion_table)) {
+      format = PIPE_FORMAT_NONE;
+   }
+   *svga_format = format_conversion_table[format].vertex_format;
+   *vf_flags = format_conversion_table[format].flags;
+}
+
+
 /*
  * Translate from gallium format to SVGA3D format.
  */
@@ -41,8 +383,16 @@ svga_translate_format(struct svga_screen *ss,
                       enum pipe_format format,
                       unsigned bind)
 {
-   switch(format) {
+   if (ss->sws->have_vgpu10) {
+      if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
+         return format_conversion_table[format].vertex_format;
+      }
+      else {
+         return format_conversion_table[format].pixel_format;
+      }
+   }
 
+   switch(format) {
    case PIPE_FORMAT_B8G8R8A8_UNORM:
       return SVGA3D_A8R8G8B8;
    case PIPE_FORMAT_B8G8R8X8_UNORM:
@@ -70,10 +420,13 @@ svga_translate_format(struct svga_screen *ss,
       return SVGA3D_A16B16G16R16;
 
    case PIPE_FORMAT_Z16_UNORM:
+      assert(!ss->sws->have_vgpu10);
       return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.z16 : SVGA3D_Z_D16;
    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+      assert(!ss->sws->have_vgpu10);
       return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.s8z24 : SVGA3D_Z_D24S8;
    case PIPE_FORMAT_X8Z24_UNORM:
+      assert(!ss->sws->have_vgpu10);
       return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.x8z24 : SVGA3D_Z_D24X8;
 
    case PIPE_FORMAT_A8_UNORM:
@@ -116,12 +469,17 @@ svga_translate_format(struct svga_screen *ss,
  * Format capability description entry.
  */
 struct format_cap {
+   const char *name;
+
    SVGA3dSurfaceFormat format;
 
    /*
     * Capability index corresponding to the format.
     */
-   SVGA3dDevCapIndex index;
+   SVGA3dDevCapIndex devcap;
+
+   /* size of each pixel/block */
+   unsigned block_width, block_height, block_bytes;
 
    /*
     * Mask of supported SVGA3dFormatOp operations, to be inferred when the
@@ -134,413 +492,1367 @@ struct format_cap {
 /*
  * Format capability description table.
  *
- * Ordererd by increasing SVGA3dSurfaceFormat value, but with gaps.
+ * Ordered by increasing SVGA3dSurfaceFormat value, but with gaps.
+ *
+ * Note: there are some special cases below where we set devcap=0 and
+ * avoid querying the host.  In particular, depth/stencil formats which
+ * can be rendered to and sampled from.  For example, the gallium format
+ * PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT
+ * for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling.
+ * If we want to query if a format supports both rendering and sampling the
+ * host will tell us no for SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D16_UNORM and
+ * SVGA3D_R24_UNORM_X8_TYPELESS.  So we override the host query for those
+ * formats and report that both can do rendering and sampling.
  */
 static const struct format_cap format_cap_table[] = {
    {
+      "SVGA3D_FORMAT_INVALID",
+      SVGA3D_FORMAT_INVALID, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_X8R8G8B8",
       SVGA3D_X8R8G8B8,
       SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_DISPLAYMODE |
-      SVGA3DFORMAT_OP_3DACCELERATION |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A8R8G8B8",
       SVGA3D_A8R8G8B8,
       SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_R5G6B5",
       SVGA3D_R5G6B5,
       SVGA3D_DEVCAP_SURFACEFMT_R5G6B5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_DISPLAYMODE |
-      SVGA3DFORMAT_OP_3DACCELERATION |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_X1R5G5B5",
       SVGA3D_X1R5G5B5,
       SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A1R5G5B5",
       SVGA3D_A1R5G5B5,
       SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A4R4G4B4",
       SVGA3D_A4R4G4B4,
       SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
-   /*
-    * SVGA3D_Z_D32 is not yet supported, and has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
    {
+      /*
+       * SVGA3D_Z_D32 is not yet supported, and has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_Z_D32",
+      SVGA3D_Z_D32, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_Z_D16",
       SVGA3D_Z_D16,
       SVGA3D_DEVCAP_SURFACEFMT_Z_D16,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
    {
+      "SVGA3D_Z_D24S8",
       SVGA3D_Z_D24S8,
       SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 4,
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
    {
+      "SVGA3D_Z_D15S1",
       SVGA3D_Z_D15S1,
       SVGA3D_DEVCAP_MAX,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
    {
+      "SVGA3D_LUMINANCE8",
       SVGA3D_LUMINANCE8,
       SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8,
+      1, 1, 1,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
    {
-      SVGA3D_LUMINANCE8_ALPHA8,
-      SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      /*
+       * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_LUMINANCE4_ALPHA4",
+      SVGA3D_LUMINANCE4_ALPHA4, 0, 0, 0, 0, 0
    },
-   /*
-    * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
    {
+      "SVGA3D_LUMINANCE16",
       SVGA3D_LUMINANCE16,
       SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
    {
+      "SVGA3D_LUMINANCE8_ALPHA8",
+      SVGA3D_LUMINANCE8_ALPHA8,
+      SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
+   },
+   {
+      "SVGA3D_DXT1",
       SVGA3D_DXT1,
       SVGA3D_DEVCAP_SURFACEFMT_DXT1,
+      4, 4, 8,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_DXT2",
       SVGA3D_DXT2,
       SVGA3D_DEVCAP_SURFACEFMT_DXT2,
+      4, 4, 8,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_DXT3",
       SVGA3D_DXT3,
       SVGA3D_DEVCAP_SURFACEFMT_DXT3,
+      4, 4, 16,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_DXT4",
       SVGA3D_DXT4,
       SVGA3D_DEVCAP_SURFACEFMT_DXT4,
+      4, 4, 16,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_DXT5",
       SVGA3D_DXT5,
       SVGA3D_DEVCAP_SURFACEFMT_DXT5,
+      4, 4, 8,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_BUMPU8V8",
       SVGA3D_BUMPU8V8,
       SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
+   },
+   {
+      /*
+       * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_BUMPL6V5U5",
+      SVGA3D_BUMPL6V5U5, 0, 0, 0, 0, 0
    },
-   /*
-    * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
    {
+      "SVGA3D_BUMPX8L8V8U8",
       SVGA3D_BUMPX8L8V8U8,
       SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
-   /*
-    * SVGA3D_BUMPL8V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
-    */
    {
+      "SVGA3D_FORMAT_DEAD1",
+      SVGA3D_FORMAT_DEAD1, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_ARGB_S10E5",
       SVGA3D_ARGB_S10E5,
       SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_ARGB_S23E8",
       SVGA3D_ARGB_S23E8,
       SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A2R10G10B10",
       SVGA3D_A2R10G10B10,
       SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
-   /*
-    * SVGA3D_V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead.
-    */
    {
+      /*
+       * SVGA3D_V8U8 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead.
+       */
+      "SVGA3D_V8U8",
+      SVGA3D_V8U8, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_Q8W8V8U8",
       SVGA3D_Q8W8V8U8,
       SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_CxV8U8",
       SVGA3D_CxV8U8,
       SVGA3D_DEVCAP_SURFACEFMT_CxV8U8,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE
+   },
+   {
+      /*
+       * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
+       */
+      "SVGA3D_X8L8V8U8",
+      SVGA3D_X8L8V8U8, 0, 0, 0, 0, 0
    },
-   /*
-    * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
-    */
    {
+      "SVGA3D_A2W10V10U10",
       SVGA3D_A2W10V10U10,
       SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE
    },
    {
+      "SVGA3D_ALPHA8",
       SVGA3D_ALPHA8,
       SVGA3D_DEVCAP_SURFACEFMT_ALPHA8,
+      1, 1, 1,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
    {
+      "SVGA3D_R_S10E5",
       SVGA3D_R_S10E5,
       SVGA3D_DEVCAP_SURFACEFMT_R_S10E5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_R_S23E8",
       SVGA3D_R_S23E8,
       SVGA3D_DEVCAP_SURFACEFMT_R_S23E8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_RG_S10E5",
       SVGA3D_RG_S10E5,
       SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_RG_S23E8",
       SVGA3D_RG_S23E8,
       SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
-   /*
-    * SVGA3D_BUFFER is a placeholder format for index/vertex buffers.
-    */
    {
+      /*
+       * SVGA3D_BUFFER is a placeholder format for index/vertex buffers.
+       */
+      "SVGA3D_BUFFER",
+      SVGA3D_BUFFER, 0, 1, 1, 1, 0
+   },
+   {
+      "SVGA3D_Z_D24X8",
       SVGA3D_Z_D24X8,
       SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 4,
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
    {
+      "SVGA3D_V16U16",
       SVGA3D_V16U16,
       SVGA3D_DEVCAP_SURFACEFMT_V16U16,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
    {
+      "SVGA3D_G16R16",
       SVGA3D_G16R16,
       SVGA3D_DEVCAP_SURFACEFMT_G16R16,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A16B16G16R16",
       SVGA3D_A16B16G16R16,
       SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16,
+      1, 1, 8,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_UYVY",
       SVGA3D_UYVY,
       SVGA3D_DEVCAP_SURFACEFMT_UYVY,
+      0, 0, 0,
       0
    },
    {
+      "SVGA3D_YUY2",
       SVGA3D_YUY2,
       SVGA3D_DEVCAP_SURFACEFMT_YUY2,
+      0, 0, 0,
       0
    },
    {
+      "SVGA3D_NV12",
       SVGA3D_NV12,
       SVGA3D_DEVCAP_SURFACEFMT_NV12,
+      0, 0, 0,
       0
    },
    {
+      "SVGA3D_AYUV",
       SVGA3D_AYUV,
       SVGA3D_DEVCAP_SURFACEFMT_AYUV,
+      0, 0, 0,
       0
    },
    {
+      "SVGA3D_R32G32B32A32_TYPELESS",
+      SVGA3D_R32G32B32A32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32A32_UINT",
+      SVGA3D_R32G32B32A32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32A32_SINT",
+      SVGA3D_R32G32B32A32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_TYPELESS",
+      SVGA3D_R32G32B32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_FLOAT",
+      SVGA3D_R32G32B32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_UINT",
+      SVGA3D_R32G32B32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_SINT",
+      SVGA3D_R32G32B32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_TYPELESS",
+      SVGA3D_R16G16B16A16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_UINT",
+      SVGA3D_R16G16B16A16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_SNORM",
+      SVGA3D_R16G16B16A16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_SINT",
+      SVGA3D_R16G16B16A16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_TYPELESS",
+      SVGA3D_R32G32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_UINT",
+      SVGA3D_R32G32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_UINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_SINT",
+      SVGA3D_R32G32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_SINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G8X24_TYPELESS",
+      SVGA3D_R32G8X24_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D32_FLOAT_S8X24_UINT",
+      SVGA3D_D32_FLOAT_S8X24_UINT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT*/
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_R32_FLOAT_X8X24_TYPELESS",
+      SVGA3D_R32_FLOAT_X8X24_TYPELESS,
+      0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS*/
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_X32_TYPELESS_G8X24_UINT",
+      SVGA3D_X32_TYPELESS_G8X24_UINT,
+      SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_TYPELESS",
+      SVGA3D_R10G10B10A2_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_UINT",
+      SVGA3D_R10G10B10A2_UINT,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R11G11B10_FLOAT",
+      SVGA3D_R11G11B10_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_TYPELESS",
+      SVGA3D_R8G8B8A8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UNORM",
+      SVGA3D_R8G8B8A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UNORM_SRGB",
+      SVGA3D_R8G8B8A8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UINT",
+      SVGA3D_R8G8B8A8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+      },
+   {
+      "SVGA3D_R8G8B8A8_SINT",
+      SVGA3D_R8G8B8A8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_TYPELESS",
+      SVGA3D_R16G16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_UINT",
+      SVGA3D_R16G16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_SINT",
+      SVGA3D_R16G16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32_TYPELESS",
+      SVGA3D_R32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D32_FLOAT",
+      SVGA3D_D32_FLOAT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R32_UINT",
+      SVGA3D_R32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32_SINT",
+      SVGA3D_R32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R24G8_TYPELESS",
+      SVGA3D_R24G8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D24_UNORM_S8_UINT",
+      SVGA3D_D24_UNORM_S8_UINT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_R24_UNORM_X8_TYPELESS",
+      SVGA3D_R24_UNORM_X8_TYPELESS,
+      0, /*SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_X24_TYPELESS_G8_UINT",
+      SVGA3D_X24_TYPELESS_G8_UINT,
+      SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R8G8_TYPELESS",
+      SVGA3D_R8G8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_UNORM",
+      SVGA3D_R8G8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_UINT",
+      SVGA3D_R8G8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8_UINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_SINT",
+      SVGA3D_R8G8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8_SINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_TYPELESS",
+      SVGA3D_R16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16_TYPELESS,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL |
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_UNORM",
+      SVGA3D_R16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_UINT",
+      SVGA3D_R16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16_UINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_SNORM",
+      SVGA3D_R16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16_SNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_SINT",
+      SVGA3D_R16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16_SINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_TYPELESS",
+      SVGA3D_R8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8_TYPELESS,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_UNORM",
+      SVGA3D_R8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8_UNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_UINT",
+      SVGA3D_R8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8_UINT,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_SNORM",
+      SVGA3D_R8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8_SNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_SINT",
+      SVGA3D_R8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8_SINT,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_P8",
+      SVGA3D_P8, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_R9G9B9E5_SHAREDEXP",
+      SVGA3D_R9G9B9E5_SHAREDEXP,
+      SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_B8G8_UNORM",
+      SVGA3D_R8G8_B8G8_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_G8R8_G8B8_UNORM",
+      SVGA3D_G8R8_G8B8_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC1_TYPELESS",
+      SVGA3D_BC1_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC1_UNORM_SRGB",
+      SVGA3D_BC1_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC2_TYPELESS",
+      SVGA3D_BC2_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC2_UNORM_SRGB",
+      SVGA3D_BC2_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC3_TYPELESS",
+      SVGA3D_BC3_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC3_UNORM_SRGB",
+      SVGA3D_BC3_UNORM_SRGB,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC4_TYPELESS",
+      SVGA3D_BC4_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_ATI1",
+      SVGA3D_ATI1, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC4_SNORM",
+      SVGA3D_BC4_SNORM,
+      SVGA3D_DEVCAP_DXFMT_BC4_SNORM,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC5_TYPELESS",
+      SVGA3D_BC5_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_ATI2",
+      SVGA3D_ATI2, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC5_SNORM",
+      SVGA3D_BC5_SNORM,
+      SVGA3D_DEVCAP_DXFMT_BC5_SNORM,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_R10G10B10_XR_BIAS_A2_UNORM",
+      SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_B8G8R8A8_TYPELESS",
+      SVGA3D_B8G8R8A8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8A8_UNORM_SRGB",
+      SVGA3D_B8G8R8A8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_TYPELESS",
+      SVGA3D_B8G8R8X8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_UNORM_SRGB",
+      SVGA3D_B8G8R8X8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_Z_DF16",
       SVGA3D_Z_DF16,
       SVGA3D_DEVCAP_SURFACEFMT_Z_DF16,
+      1, 1, 2,
       0
    },
    {
+      "SVGA3D_Z_DF24",
       SVGA3D_Z_DF24,
       SVGA3D_DEVCAP_SURFACEFMT_Z_DF24,
+      1, 1, 4,
       0
    },
    {
+      "SVGA3D_Z_D24S8_INT",
       SVGA3D_Z_D24S8_INT,
       SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT,
-      0
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
+   {
+      "SVGA3D_YV12",
+      SVGA3D_YV12, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_R32G32B32A32_FLOAT",
+      SVGA3D_R32G32B32A32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_FLOAT",
+      SVGA3D_R16G16B16A16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_UNORM",
+      SVGA3D_R16G16B16A16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_FLOAT",
+      SVGA3D_R32G32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_UNORM",
+      SVGA3D_R10G10B10A2_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_SNORM",
+      SVGA3D_R8G8B8A8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_FLOAT",
+      SVGA3D_R16G16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_UNORM",
+      SVGA3D_R16G16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_SNORM",
+      SVGA3D_R16G16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16_SNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      /* Special case: no devcap / report sampler, render target and
+       * depth/stencil ability
+       */
+      "SVGA3D_R32_FLOAT",
+      SVGA3D_R32_FLOAT,
+      0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R8G8_SNORM",
+      SVGA3D_R8G8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8_SNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_FLOAT",
+      SVGA3D_R16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16_FLOAT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_D16_UNORM",
+      SVGA3D_D16_UNORM,
+      0, /*SVGA3D_DEVCAP_DXFMT_D16_UNORM*/
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_A8_UNORM",
+      SVGA3D_A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_A8_UNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_BC1_UNORM",
+      SVGA3D_BC1_UNORM,
+      SVGA3D_DEVCAP_DXFMT_BC1_UNORM,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC2_UNORM",
+      SVGA3D_BC2_UNORM,
+      SVGA3D_DEVCAP_DXFMT_BC2_UNORM,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC3_UNORM",
+      SVGA3D_BC3_UNORM,
+      SVGA3D_DEVCAP_DXFMT_BC3_UNORM,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_B5G6R5_UNORM",
+      SVGA3D_B5G6R5_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B5G5R5A1_UNORM",
+      SVGA3D_B5G5R5A1_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8A8_UNORM",
+      SVGA3D_B8G8R8A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_UNORM",
+      SVGA3D_B8G8R8X8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_BC4_UNORM",
+     SVGA3D_BC4_UNORM,
+     SVGA3D_DEVCAP_DXFMT_BC4_UNORM,
+     4, 4, 8,
+     SVGA3DFORMAT_OP_TEXTURE |
+     SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC5_UNORM",
+     SVGA3D_BC5_UNORM,
+     SVGA3D_DEVCAP_DXFMT_BC5_UNORM,
+     4, 4, 16,
+     SVGA3DFORMAT_OP_TEXTURE |
+     SVGA3DFORMAT_OP_CUBETEXTURE
+   }
 };
 
 
+/**
+ * Debug only:
+ * 1. check that format_cap_table[i] matches the i-th SVGA3D format.
+ * 2. check that format_conversion_table[i].pformat == i.
+ */
+static void
+check_format_tables(void)
+{
+   static boolean first_call = TRUE;
+
+   if (first_call) {
+      unsigned i;
+
+      STATIC_ASSERT(Elements(format_cap_table) == SVGA3D_FORMAT_MAX);
+      for (i = 0; i < Elements(format_cap_table); i++) {
+         assert(format_cap_table[i].format == i);
+      }
+
+      STATIC_ASSERT(Elements(format_conversion_table) == PIPE_FORMAT_COUNT);
+      for (i = 0; i < Elements(format_conversion_table); i++) {
+         assert(format_conversion_table[i].pformat == i);
+      }
+
+      first_call = FALSE;
+   }
+}
+
+
 /*
  * Get format capabilities from the host.  It takes in consideration
  * deprecated/unsupported formats, and formats which are implicitely assumed to
@@ -551,181 +1863,333 @@ svga_get_format_cap(struct svga_screen *ss,
                     SVGA3dSurfaceFormat format,
                     SVGA3dSurfaceFormatCaps *caps)
 {
+   struct svga_winsys_screen *sws = ss->sws;
+   SVGA3dDevCapResult result;
    const struct format_cap *entry;
 
-   for (entry = format_cap_table; entry < format_cap_table + Elements(format_cap_table); ++entry) {
-      if (entry->format == format) {
-         struct svga_winsys_screen *sws = ss->sws;
-         SVGA3dDevCapResult result;
+#ifdef DEBUG
+   check_format_tables();
+#else
+   (void) check_format_tables;
+#endif
 
-         if (sws->get_cap(sws, entry->index, &result)) {
-            /* Explicitly advertised format */
-            caps->value = result.u;
-         } else {
-            /* Implicitly advertised format -- use default caps */
-            caps->value = entry->defaultOperations;
-         }
+   assert(format < Elements(format_cap_table));
+   entry = &format_cap_table[format];
+   assert(entry->format == format);
 
-         return;
+   if (entry->devcap && sws->get_cap(sws, entry->devcap, &result)) {
+      /* Explicitly advertised format */
+      if (entry->devcap > SVGA3D_DEVCAP_DX) {
+         /* Translate DX/VGPU10 format cap to VGPU9 cap */
+         caps->value = 0;
+         if (result.u & SVGA3D_DXFMT_COLOR_RENDERTARGET)
+            caps->value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
+         if (!(result.u & SVGA3D_DXFMT_BLENDABLE))
+            caps->value |= SVGA3DFORMAT_OP_NOALPHABLEND;
+         if (result.u & SVGA3D_DXFMT_DEPTH_RENDERTARGET)
+            caps->value |= SVGA3DFORMAT_OP_ZSTENCIL;
+         if (result.u & SVGA3D_DXFMT_SHADER_SAMPLE)
+            caps->value |= (SVGA3DFORMAT_OP_TEXTURE |
+                            SVGA3DFORMAT_OP_CUBETEXTURE);
+         if (result.u & SVGA3D_DXFMT_VOLUME)
+            caps->value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
+      }
+      else {
+         /* Return VGPU9 format cap as-is */
+         caps->value = result.u;
       }
-   }
 
-   /* Unsupported format */
-   caps->value = 0;
+   } else {
+      /* Implicitly advertised format -- use default caps */
+      caps->value = entry->defaultOperations;
+   }
 }
 
 
-/**
- * Return block size and bytes per block for the given SVGA3D format.
- * block_width and block_height are one for uncompressed formats and
- * greater than one for compressed formats.
- * Note: we don't handle formats that are unsupported, according to
- * the format_cap_table above.
- */
 void
 svga_format_size(SVGA3dSurfaceFormat format,
                  unsigned *block_width,
                  unsigned *block_height,
                  unsigned *bytes_per_block)
 {
-   *block_width = *block_height = 1;
+   assert(format < Elements(format_cap_table));
+   *block_width = format_cap_table[format].block_width;
+   *block_height = format_cap_table[format].block_height;
+   *bytes_per_block = format_cap_table[format].block_bytes;
+   /* Make sure the the table entry was valid */
+   if (*block_width == 0)
+      debug_printf("Bad table entry for %s\n", svga_format_name(format));
+   assert(*block_width);
+   assert(*block_height);
+   assert(*bytes_per_block);
+}
+
+
+const char *
+svga_format_name(SVGA3dSurfaceFormat format)
+{
+   assert(format < Elements(format_cap_table));
+   return format_cap_table[format].name;
+}
 
+
+/**
+ * Is the given SVGA3dSurfaceFormat a signed or unsigned integer color format?
+ */
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format)
+{
    switch (format) {
-   case SVGA3D_X8R8G8B8:
-   case SVGA3D_A8R8G8B8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_R5G6B5:
-   case SVGA3D_X1R5G5B5:
-   case SVGA3D_A1R5G5B5:
-   case SVGA3D_A4R4G4B4:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_Z_D32:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Z_D16:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_Z_D24S8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Z_D15S1:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_LUMINANCE8:
-   case SVGA3D_LUMINANCE4_ALPHA4:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_LUMINANCE16:
-   case SVGA3D_LUMINANCE8_ALPHA8:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_DXT1:
-   case SVGA3D_DXT2:
-      *block_width = *block_height = 4;
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_DXT3:
-   case SVGA3D_DXT4:
-   case SVGA3D_DXT5:
-      *block_width = *block_height = 4;
-      *bytes_per_block = 16;
-      return;
-
-   case SVGA3D_BUMPU8V8:
-   case SVGA3D_BUMPL6V5U5:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_BUMPX8L8V8U8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_ARGB_S10E5:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_ARGB_S23E8:
-      *bytes_per_block = 16;
-      return;
-
-   case SVGA3D_A2R10G10B10:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Q8W8V8U8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_CxV8U8:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_X8L8V8U8:
-   case SVGA3D_A2W10V10U10:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_ALPHA8:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_R_S10E5:
-      *bytes_per_block = 2;
-      return;
-   case SVGA3D_R_S23E8:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_RG_S10E5:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_RG_S23E8:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_BUFFER:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_Z_D24X8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_V16U16:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_G16R16:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_A16B16G16R16:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_Z_DF16:
-      *bytes_per_block = 2;
-      return;
-   case SVGA3D_Z_DF24:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_Z_D24S8_INT:
-      *bytes_per_block = 4;
-      return;
+   case SVGA3D_R32G32B32A32_SINT:
+   case SVGA3D_R32G32B32_SINT:
+   case SVGA3D_R32G32_SINT:
+   case SVGA3D_R32_SINT:
+   case SVGA3D_R16G16B16A16_SINT:
+   case SVGA3D_R16G16_SINT:
+   case SVGA3D_R16_SINT:
+   case SVGA3D_R8G8B8A8_SINT:
+   case SVGA3D_R8G8_SINT:
+   case SVGA3D_R8_SINT:
+   case SVGA3D_R32G32B32A32_UINT:
+   case SVGA3D_R32G32B32_UINT:
+   case SVGA3D_R32G32_UINT:
+   case SVGA3D_R32_UINT:
+   case SVGA3D_R16G16B16A16_UINT:
+   case SVGA3D_R16G16_UINT:
+   case SVGA3D_R16_UINT:
+   case SVGA3D_R8G8B8A8_UINT:
+   case SVGA3D_R8G8_UINT:
+   case SVGA3D_R8_UINT:
+   case SVGA3D_R10G10B10A2_UINT:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
 
+boolean
+svga_format_support_gen_mips(enum pipe_format format)
+{
+   assert(format < Elements(format_conversion_table));
+   return ((format_conversion_table[format].flags & TF_GEN_MIPS) > 0);
+}
+
+
+/**
+ * Given a texture format, return the expected data type returned from
+ * the texture sampler.  For example, UNORM8 formats return floating point
+ * values while SINT formats returned signed integer values.
+ * Note: this function could be moved into the gallum u_format.[ch] code
+ * if it's useful to anyone else.
+ */
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   enum tgsi_return_type t;
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ) {
+      if (util_format_is_depth_or_stencil(format)) {
+         t = TGSI_RETURN_TYPE_FLOAT; /* XXX revisit this */
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_UNORM : TGSI_RETURN_TYPE_UINT;
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+         t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_SNORM : TGSI_RETURN_TYPE_SINT;
+      }
+      else {
+         assert(!"Unexpected channel type in svga_get_texture_datatype()");
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+   }
+   else {
+      /* compressed format, shared exponent format, etc. */
+      switch (format) {
+      case PIPE_FORMAT_DXT1_RGB:
+      case PIPE_FORMAT_DXT1_RGBA:
+      case PIPE_FORMAT_DXT3_RGBA:
+      case PIPE_FORMAT_DXT5_RGBA:
+      case PIPE_FORMAT_DXT1_SRGB:
+      case PIPE_FORMAT_DXT1_SRGBA:
+      case PIPE_FORMAT_DXT3_SRGBA:
+      case PIPE_FORMAT_DXT5_SRGBA:
+      case PIPE_FORMAT_RGTC1_UNORM:
+      case PIPE_FORMAT_RGTC2_UNORM:
+      case PIPE_FORMAT_LATC1_UNORM:
+      case PIPE_FORMAT_LATC2_UNORM:
+      case PIPE_FORMAT_ETC1_RGB8:
+         t = TGSI_RETURN_TYPE_UNORM;
+         break;
+      case PIPE_FORMAT_RGTC1_SNORM:
+      case PIPE_FORMAT_RGTC2_SNORM:
+      case PIPE_FORMAT_LATC1_SNORM:
+      case PIPE_FORMAT_LATC2_SNORM:
+      case PIPE_FORMAT_R10G10B10X2_SNORM:
+         t = TGSI_RETURN_TYPE_SNORM;
+         break;
+      case PIPE_FORMAT_R11G11B10_FLOAT:
+      case PIPE_FORMAT_R9G9B9E5_FLOAT:
+         t = TGSI_RETURN_TYPE_FLOAT;
+         break;
+      default:
+         assert(!"Unexpected channel type in svga_get_texture_datatype()");
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+   }
+
+   return t;
+}
+
+
+/**
+ * Given an svga context, return true iff there are currently any integer color
+ * buffers attached to the framebuffer.
+ */
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga)
+{
+   unsigned i;
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+      struct pipe_surface *cbuf = svga->curr.framebuffer.cbufs[i];
+
+      if (cbuf && util_format_is_pure_integer(cbuf->format)) {
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
+/**
+ * Given an SVGA format, return the corresponding typeless format.
+ * If there is no typeless format, return the format unchanged.
+ */
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format)
+{
+   switch (format) {
+   case SVGA3D_R32G32B32A32_UINT:
+   case SVGA3D_R32G32B32A32_SINT:
+   case SVGA3D_R32G32B32A32_FLOAT:
+      return SVGA3D_R32G32B32A32_TYPELESS;
+   case SVGA3D_R32G32B32_FLOAT:
+   case SVGA3D_R32G32B32_UINT:
+   case SVGA3D_R32G32B32_SINT:
+      return SVGA3D_R32G32B32_TYPELESS;
+   case SVGA3D_R16G16B16A16_UINT:
+   case SVGA3D_R16G16B16A16_UNORM:
+   case SVGA3D_R16G16B16A16_SNORM:
+   case SVGA3D_R16G16B16A16_SINT:
+   case SVGA3D_R16G16B16A16_FLOAT:
+      return SVGA3D_R16G16B16A16_TYPELESS;
+   case SVGA3D_R32G32_UINT:
+   case SVGA3D_R32G32_SINT:
+   case SVGA3D_R32G32_FLOAT:
+      return SVGA3D_R32G32_TYPELESS;
+   case SVGA3D_D32_FLOAT_S8X24_UINT:
+      return SVGA3D_R32G8X24_TYPELESS;
+   case SVGA3D_X32_TYPELESS_G8X24_UINT:
+      return SVGA3D_R32_FLOAT_X8X24_TYPELESS;
+   case SVGA3D_R10G10B10A2_UINT:
+   case SVGA3D_R10G10B10A2_UNORM:
+      return SVGA3D_R10G10B10A2_TYPELESS;
+   case SVGA3D_R8G8B8A8_UNORM:
+   case SVGA3D_R8G8B8A8_SNORM:
+   case SVGA3D_R8G8B8A8_UNORM_SRGB:
+   case SVGA3D_R8G8B8A8_UINT:
+   case SVGA3D_R8G8B8A8_SINT:
+      return SVGA3D_R8G8B8A8_TYPELESS;
+   case SVGA3D_R16G16_UINT:
+   case SVGA3D_R16G16_SINT:
+   case SVGA3D_R16G16_UNORM:
+   case SVGA3D_R16G16_SNORM:
+   case SVGA3D_R16G16_FLOAT:
+      return SVGA3D_R16G16_TYPELESS;
+   case SVGA3D_D32_FLOAT:
+   case SVGA3D_R32_FLOAT:
+   case SVGA3D_R32_UINT:
+   case SVGA3D_R32_SINT:
+      return SVGA3D_R32_TYPELESS;
+   case SVGA3D_D24_UNORM_S8_UINT:
+      return SVGA3D_R24G8_TYPELESS;
+   case SVGA3D_X24_TYPELESS_G8_UINT:
+      return SVGA3D_R24_UNORM_X8_TYPELESS;
+   case SVGA3D_R8G8_UNORM:
+   case SVGA3D_R8G8_SNORM:
+   case SVGA3D_R8G8_UINT:
+   case SVGA3D_R8G8_SINT:
+      return SVGA3D_R8G8_TYPELESS;
+   case SVGA3D_D16_UNORM:
+   case SVGA3D_R16_UNORM:
+   case SVGA3D_R16_UINT:
+   case SVGA3D_R16_SNORM:
+   case SVGA3D_R16_SINT:
+   case SVGA3D_R16_FLOAT:
+      return SVGA3D_R16_TYPELESS;
+   case SVGA3D_R8_UNORM:
+   case SVGA3D_R8_UINT:
+   case SVGA3D_R8_SNORM:
+   case SVGA3D_R8_SINT:
+      return SVGA3D_R8_TYPELESS;
+   case SVGA3D_B8G8R8A8_UNORM_SRGB:
+   case SVGA3D_B8G8R8A8_UNORM:
+      return SVGA3D_B8G8R8A8_TYPELESS;
+   case SVGA3D_B8G8R8X8_UNORM_SRGB:
+   case SVGA3D_B8G8R8X8_UNORM:
+      return SVGA3D_B8G8R8X8_TYPELESS;
+   case SVGA3D_BC1_UNORM:
+   case SVGA3D_BC1_UNORM_SRGB:
+      return SVGA3D_BC1_TYPELESS;
+   case SVGA3D_BC2_UNORM:
+   case SVGA3D_BC2_UNORM_SRGB:
+      return SVGA3D_BC2_TYPELESS;
+   case SVGA3D_BC3_UNORM:
+   case SVGA3D_BC3_UNORM_SRGB:
+      return SVGA3D_BC3_TYPELESS;
+   case SVGA3D_BC4_UNORM:
+   case SVGA3D_BC4_SNORM:
+      return SVGA3D_BC4_TYPELESS;
+   case SVGA3D_BC5_UNORM:
+   case SVGA3D_BC5_SNORM:
+      return SVGA3D_BC5_TYPELESS;
+
+   /* Special cases (no corresponding _TYPELESS formats) */
+   case SVGA3D_A8_UNORM:
+   case SVGA3D_B5G5R5A1_UNORM:
+   case SVGA3D_B5G6R5_UNORM:
+   case SVGA3D_R11G11B10_FLOAT:
+   case SVGA3D_R9G9B9E5_SHAREDEXP:
+      return format;
+   default:
+      debug_printf("Unexpected format %s in %s\n",
+                   svga_format_name(format), __FUNCTION__);
+      return format;
+   }
+}
+
+
+/**
+ * Given a surface format, return the corresponding format to use for
+ * a texture sampler.  In most cases, it's the format unchanged, but there
+ * are some special cases.
+ */
+SVGA3dSurfaceFormat
+svga_sampler_format(SVGA3dSurfaceFormat format)
+{
+   switch (format) {
+   case SVGA3D_D16_UNORM:
+      return SVGA3D_R16_UNORM;
+   case SVGA3D_D24_UNORM_S8_UINT:
+      return SVGA3D_R24_UNORM_X8_TYPELESS;
+   case SVGA3D_D32_FLOAT:
+      return SVGA3D_R32_FLOAT;
+   case SVGA3D_D32_FLOAT_S8X24_UINT:
+      return SVGA3D_R32_FLOAT_X8X24_TYPELESS;
    default:
-      debug_printf("format %u\n", (unsigned) format);
-      assert(!"unexpected format in svga_format_size()");
-      *bytes_per_block = 4;
+      return format;
    }
 }
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_format.h b/lib/mesa/src/gallium/drivers/svga/svga_format.h
index 94c867acf..630a86a49 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_format.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_format.h
@@ -28,6 +28,7 @@
 
 
 #include "pipe/p_format.h"
+#include "svga_context.h"
 #include "svga_types.h"
 #include "svga_reg.h"
 #include "svga3d_reg.h"
@@ -36,6 +37,31 @@
 struct svga_screen;
 
 
+/**
+ * Vertex format flags.  These are used to specify that some vertex formats
+ * need extra processing/conversion in the vertex shader.  For example,
+ * setting the W component to 1, or swapping R/B, or converting packed uint
+ * types to signed int/snorm.
+ */
+#define VF_ADJUST_RANGE     (1 << 0)
+#define VF_W_TO_1           (1 << 1)
+#define VF_U_TO_F_CAST      (1 << 2)  /* convert uint to float */
+#define VF_I_TO_F_CAST      (1 << 3)  /* convert sint to float */
+#define VF_BGRA             (1 << 4)  /* swap R/B */
+#define VF_PUINT_TO_SNORM   (1 << 5)  /* 10_10_10_2 to snorm */
+#define VF_PUINT_TO_USCALED (1 << 6)  /* 10_10_10_2 to uscaled */
+#define VF_PUINT_TO_SSCALED (1 << 7)  /* 10_10_10_2 to sscaled */
+
+/**
+ * Texture format flags.
+ */
+#define TF_GEN_MIPS         (1 << 8)  /* supports hw generate mipmap */
+
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+                                    SVGA3dSurfaceFormat *svga_format,
+                                    unsigned *vf_flags);
+
 enum SVGA3dSurfaceFormat
 svga_translate_format(struct svga_screen *ss,
                       enum pipe_format format,
@@ -52,5 +78,30 @@ svga_format_size(SVGA3dSurfaceFormat format,
                  unsigned *block_height,
                  unsigned *bytes_per_block);
 
+const char *
+svga_format_name(SVGA3dSurfaceFormat format);
+
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format);
+
+boolean
+svga_format_support_gen_mips(enum pipe_format format);
+
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format);
+
+
+// XXX: Move this to svga_context?
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga);
+
+
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format);
+
+
+SVGA3dSurfaceFormat
+svga_sampler_format(SVGA3dSurfaceFormat format);
+
 
 #endif /* SVGA_FORMAT_H_ */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_link.c b/lib/mesa/src/gallium/drivers/svga/svga_link.c
new file mode 100644
index 000000000..f3e524d38
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_link.c
@@ -0,0 +1,120 @@
+/*/
+ * Copyright 2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "svga_context.h"
+#include "svga_link.h"
+
+#include "tgsi/tgsi_strings.h"
+
+
+#define INVALID_INDEX 255
+
+
+/**
+ * Examine input and output shaders info to link outputs from the
+ * output shader to inputs from the input shader.
+ * Basically, we'll remap input shader's input slots to new numbers
+ * based on semantic name/index of the outputs from the output shader.
+ */
+void
+svga_link_shaders(const struct tgsi_shader_info *outshader_info,
+                  const struct tgsi_shader_info *inshader_info,
+                  struct shader_linkage *linkage)
+{
+   unsigned i, free_slot;
+
+   for (i = 0; i < Elements(linkage->input_map); i++) {
+      linkage->input_map[i] = INVALID_INDEX;
+   }
+
+   /* Assign input slots for input shader inputs.
+    * Basically, we want to use the same index for the output shader's outputs
+    * and the input shader's inputs that should be linked together.
+    * We'll modify the input shader's inputs to match the output shader.
+    */
+   assert(inshader_info->num_inputs <=
+          Elements(inshader_info->input_semantic_name));
+
+   /* free register index that can be used for built-in varyings */
+   free_slot = outshader_info->num_outputs + 1;
+
+   for (i = 0; i < inshader_info->num_inputs; i++) {
+      unsigned sem_name = inshader_info->input_semantic_name[i];
+      unsigned sem_index = inshader_info->input_semantic_index[i];
+      unsigned j;
+      /**
+       * Get the clip distance inputs from the output shader's
+       * clip distance shadow copy.
+       */
+      if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
+         linkage->input_map[i] = outshader_info->num_outputs + 1 + sem_index;
+         /* make sure free_slot includes this extra output */
+         free_slot = MAX2(free_slot, linkage->input_map[i] + 1);
+      }
+      else {
+         /* search output shader outputs for same item */
+         for (j = 0; j < outshader_info->num_outputs; j++) {
+            assert(j < Elements(outshader_info->output_semantic_name));
+            if (outshader_info->output_semantic_name[j] == sem_name &&
+                outshader_info->output_semantic_index[j] == sem_index) {
+               linkage->input_map[i] = j;
+               break;
+            }
+         }
+      }
+   }
+
+   linkage->num_inputs = inshader_info->num_inputs;
+
+   /* Things like the front-face register are handled here */
+   for (i = 0; i < inshader_info->num_inputs; i++) {
+      if (linkage->input_map[i] == INVALID_INDEX) {
+         unsigned j = free_slot++;
+         linkage->input_map[i] = j;
+      }
+   }
+
+   /* Debug */
+   if (0) {
+      unsigned reg = 0;
+      for (i = 0; i < linkage->num_inputs; i++) {
+
+         assert(linkage->input_map[i] != INVALID_INDEX);
+
+         debug_printf("input shader input[%d] slot %u  %s %u %s\n",
+                      i,
+                      linkage->input_map[i],
+                      tgsi_semantic_names[inshader_info->input_semantic_name[i]],
+                      inshader_info->input_semantic_index[i],
+                      tgsi_interpolate_names[inshader_info->input_interpolate[i]]);
+
+         /* make sure no repeating register index */
+         if (reg & 1 << linkage->input_map[i]) {
+            assert(0);
+         }
+         reg |= 1 << linkage->input_map[i];
+      }
+   }
+}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_link.h b/lib/mesa/src/gallium/drivers/svga/svga_link.h
new file mode 100644
index 000000000..724c61194
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_link.h
@@ -0,0 +1,20 @@
+
+#ifndef SVGA_LINK_H
+#define SVGA_LINK_H
+
+#include "pipe/p_defines.h"
+
+struct svga_context;
+
+struct shader_linkage
+{
+   unsigned num_inputs;
+   ubyte input_map[PIPE_MAX_SHADER_INPUTS];
+};
+
+void
+svga_link_shaders(const struct tgsi_shader_info *outshader_info,
+                  const struct tgsi_shader_info *inshader_info,
+                  struct shader_linkage *linkage);
+
+#endif /* SVGA_LINK_H */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c
index 2890516c0..0af80cd42 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -27,14 +27,15 @@
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_bitmask.h"
 
 #include "svga_context.h"
-
 #include "svga_hw_reg.h"
+#include "svga_cmd.h"
 
 
 static inline unsigned
-svga_translate_blend_factor(unsigned factor)
+svga_translate_blend_factor(const struct svga_context *svga, unsigned factor)
 {
    switch (factor) {
    case PIPE_BLENDFACTOR_ZERO:            return SVGA3D_BLENDOP_ZERO;
@@ -50,8 +51,21 @@ svga_translate_blend_factor(unsigned factor)
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT;
    case PIPE_BLENDFACTOR_CONST_COLOR:     return SVGA3D_BLENDOP_BLENDFACTOR;
    case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:     return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      if (svga_have_vgpu10(svga))
+         return SVGA3D_BLENDOP_BLENDFACTORALPHA;
+      else
+         return SVGA3D_BLENDOP_BLENDFACTOR; /* as close as we can get */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      if (svga_have_vgpu10(svga))
+         return SVGA3D_BLENDOP_INVBLENDFACTORALPHA;
+      else
+         return SVGA3D_BLENDOP_INVBLENDFACTOR; /* as close as we can get */
+   case PIPE_BLENDFACTOR_SRC1_COLOR:      return SVGA3D_BLENDOP_SRC1COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:  return SVGA3D_BLENDOP_INVSRC1COLOR;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:      return SVGA3D_BLENDOP_SRC1ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:  return SVGA3D_BLENDOP_INVSRC1ALPHA;
+   case 0:                                return SVGA3D_BLENDOP_ONE;
    default:
       assert(0);
       return SVGA3D_BLENDOP_ZERO;
@@ -74,18 +88,64 @@ svga_translate_blend_func(unsigned mode)
 }
 
 
+/**
+ * Define a vgpu10 blend state object for the given
+ * svga blend state.
+ */
+static void
+define_blend_state_object(struct svga_context *svga,
+                          struct svga_blend_state *bs)
+{
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+   unsigned try;
+   int i;
+
+   assert(svga_have_vgpu10(svga));
+
+   bs->id = util_bitmask_add(svga->blend_object_id_bm);
+
+   for (i = 0; i < SVGA3D_DX_MAX_RENDER_TARGETS; i++) {
+      perRT[i].blendEnable = bs->rt[i].blend_enable;
+      perRT[i].srcBlend = bs->rt[i].srcblend;
+      perRT[i].destBlend = bs->rt[i].dstblend;
+      perRT[i].blendOp = bs->rt[i].blendeq;
+      perRT[i].srcBlendAlpha = bs->rt[i].srcblend_alpha;
+      perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha;
+      perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha;
+      perRT[i].renderTargetWriteMask = bs->rt[i].writemask;
+      perRT[i].logicOpEnable = 0;
+      perRT[i].logicOp = SVGA3D_LOGICOP_COPY;
+      assert(perRT[i].srcBlend == perRT[0].srcBlend);
+   }
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_vgpu10_DefineBlendState(svga->swc,
+                                           bs->id,
+                                           bs->alpha_to_coverage,
+                                           bs->independent_blend_enable,
+                                           perRT);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_blend_state(struct pipe_context *pipe,
                         const struct pipe_blend_state *templ)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
    unsigned i;
 
- 
    /* Fill in the per-rendertarget blend state.  We currently only
-    * have one rendertarget.
+    * support independent blend enable and colormask per render target.
     */
-   for (i = 0; i < 1; i++) {
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
       /* No way to set this in SVGA3D, and no way to correctly implement it on
        * top of D3D9 API.  Instead we try to simulate with various blend modes.
        */
@@ -107,6 +167,9 @@ svga_create_blend_state(struct pipe_context *pipe,
             break;
          case PIPE_LOGICOP_COPY:
             blend->rt[i].blend_enable = FALSE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
             break;
          case PIPE_LOGICOP_COPY_INVERTED:
             blend->rt[i].blend_enable   = TRUE;
@@ -169,38 +232,110 @@ svga_create_blend_state(struct pipe_context *pipe,
          case PIPE_LOGICOP_EQUIV:
             /* Fill these in with plausible values */
             blend->rt[i].blend_enable = FALSE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
             break;
          default:
             assert(0);
             break;
          }
+         blend->rt[i].srcblend_alpha = blend->rt[i].srcblend;
+         blend->rt[i].dstblend_alpha = blend->rt[i].dstblend;
+         blend->rt[i].blendeq_alpha = blend->rt[i].blendeq;
+
+         if (templ->logicop_func == PIPE_LOGICOP_XOR) {
+            pipe_debug_message(&svga->debug.callback, CONFORMANCE,
+                               "XOR logicop mode has limited support");
+         }
+         else if (templ->logicop_func != PIPE_LOGICOP_COPY) {
+            pipe_debug_message(&svga->debug.callback, CONFORMANCE,
+                               "general logicops are not supported");
+         }
       }
       else {
-         blend->rt[i].blend_enable   = templ->rt[0].blend_enable;
-
-         if (templ->rt[0].blend_enable) {
-            blend->rt[i].srcblend       = svga_translate_blend_factor(templ->rt[0].rgb_src_factor);
-            blend->rt[i].dstblend       = svga_translate_blend_factor(templ->rt[0].rgb_dst_factor);
-            blend->rt[i].blendeq        = svga_translate_blend_func(templ->rt[0].rgb_func);
-            blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_src_factor);
-            blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_dst_factor);
-            blend->rt[i].blendeq_alpha  = svga_translate_blend_func(templ->rt[0].alpha_func);
+         /* Note: the vgpu10 device does not yet support independent
+          * blend terms per render target.  Target[0] always specifies the
+          * blending terms.
+          */
+         if (templ->independent_blend_enable || templ->rt[0].blend_enable) {
+            /* always use the 0th target's blending terms for now */
+            blend->rt[i].srcblend =
+               svga_translate_blend_factor(svga, templ->rt[0].rgb_src_factor);
+            blend->rt[i].dstblend =
+               svga_translate_blend_factor(svga, templ->rt[0].rgb_dst_factor);
+            blend->rt[i].blendeq =
+               svga_translate_blend_func(templ->rt[0].rgb_func);
+            blend->rt[i].srcblend_alpha =
+               svga_translate_blend_factor(svga, templ->rt[0].alpha_src_factor);
+            blend->rt[i].dstblend_alpha =
+               svga_translate_blend_factor(svga, templ->rt[0].alpha_dst_factor);
+            blend->rt[i].blendeq_alpha =
+               svga_translate_blend_func(templ->rt[0].alpha_func);
 
             if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend ||
                 blend->rt[i].dstblend_alpha != blend->rt[i].dstblend ||
-                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq)
-            {
+                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq) {
                blend->rt[i].separate_alpha_blend_enable = TRUE;
             }
          }
+         else {
+            /* disabled - default blend terms */
+            blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+            blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD;
+         }
+
+         if (templ->independent_blend_enable) {
+            blend->rt[i].blend_enable = templ->rt[i].blend_enable;
+         }
+         else {
+            blend->rt[i].blend_enable = templ->rt[0].blend_enable;
+         }
       }
 
-      blend->rt[i].writemask = templ->rt[0].colormask;
+      /* Some GL blend modes are not supported by the VGPU9 device (there's
+       * no equivalent of PIPE_BLENDFACTOR_[INV_]CONST_ALPHA).
+       * When we set this flag, we copy the constant blend alpha value
+       * to the R, G, B components.
+       * This works as long as the src/dst RGB blend factors doesn't use
+       * PIPE_BLENDFACTOR_CONST_COLOR and PIPE_BLENDFACTOR_CONST_ALPHA
+       * at the same time.  There's no work-around for that.
+       */
+      if (!svga_have_vgpu10(svga)) {
+         if (templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+             templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+             templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA ||
+             templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA) {
+            blend->blend_color_alpha = TRUE;
+         }
+      }
+
+      if (templ->independent_blend_enable) {
+         blend->rt[i].writemask = templ->rt[i].colormask;
+      }
+      else {
+         blend->rt[i].writemask = templ->rt[0].colormask;
+      }
+   }
+
+   blend->independent_blend_enable = templ->independent_blend_enable;
+
+   blend->alpha_to_coverage = templ->alpha_to_coverage;
+
+   if (svga_have_vgpu10(svga)) {
+      define_blend_state_object(svga, blend);
    }
 
+   svga->hud.num_state_objects++;
+
    return blend;
 }
 
+
 static void svga_bind_blend_state(struct pipe_context *pipe,
                                   void *blend)
 {
@@ -210,10 +345,32 @@ static void svga_bind_blend_state(struct pipe_context *pipe,
    svga->dirty |= SVGA_NEW_BLEND;
 }
 
-
-static void svga_delete_blend_state(struct pipe_context *pipe, void *blend)
+static void svga_delete_blend_state(struct pipe_context *pipe,
+                                    void *blend)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_blend_state *bs =
+      (struct svga_blend_state *) blend;
+
+   if (bs->id != SVGA3D_INVALID_ID) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (bs->id == svga->state.hw_draw.blend_id)
+         svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->blend_object_id_bm, bs->id);
+      bs->id = SVGA3D_INVALID_ID;
+   }
+
    FREE(blend);
+   svga->hud.num_state_objects--;
 }
 
 static void svga_set_blend_color( struct pipe_context *pipe,
@@ -235,6 +392,3 @@ void svga_init_blend_functions( struct svga_context *svga )
 
    svga->pipe.set_blend_color = svga_set_blend_color;
 }
-
-
-
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c
index dbb9f4b51..2b34f9640 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -29,6 +29,7 @@
 #include "svga_cmd.h"
 #include "svga_surface.h"
 
+//#include "util/u_blit_sw.h"
 #include "util/u_format.h"
 #include "util/u_surface.h"
 
@@ -159,7 +160,8 @@ static void svga_blit(struct pipe_context *pipe,
    struct svga_context *svga = svga_context(pipe);
    struct pipe_blit_info info = *blit_info;
 
-   if (info.src.resource->nr_samples > 1 &&
+   if (!svga_have_vgpu10(svga) &&
+       info.src.resource->nr_samples > 1 &&
        info.dst.resource->nr_samples <= 1 &&
        !util_format_is_depth_or_stencil(info.src.resource->format) &&
        !util_format_is_pure_integer(info.src.resource->format)) {
@@ -171,12 +173,8 @@ static void svga_blit(struct pipe_context *pipe,
       return; /* done */
    }
 
-   if (info.mask & PIPE_MASK_S) {
-      debug_printf("svga: cannot blit stencil, skipping\n");
-      info.mask &= ~PIPE_MASK_S;
-   }
-
-   if (!util_blitter_is_blit_supported(svga->blitter, &info)) {
+   if ((info.mask & PIPE_MASK_S) ||
+       !util_blitter_is_blit_supported(svga->blitter, &info)) {
       debug_printf("svga: blit unsupported %s -> %s\n",
                    util_format_short_name(info.src.resource->format),
                    util_format_short_name(info.dst.resource->format));
@@ -188,9 +186,9 @@ static void svga_blit(struct pipe_context *pipe,
    util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
    util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
    util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
-   /*util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);*/
-   /*util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
-                     (struct pipe_stream_output_target**)svga->so_targets);*/
+   util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs);
+   util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+                     (struct pipe_stream_output_target**)svga->so_targets);
    util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
    util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
    util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
@@ -199,14 +197,14 @@ static void svga_blit(struct pipe_context *pipe,
    util_blitter_save_depth_stencil_alpha(svga->blitter,
                                          (void*)svga->curr.depth);
    util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
-   /*util_blitter_save_sample_mask(svga->blitter, svga->sample_mask);*/
+   util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
    util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer);
    util_blitter_save_fragment_sampler_states(svga->blitter,
-                     svga->curr.num_samplers,
-                     (void**)svga->curr.sampler);
+                     svga->curr.num_samplers[PIPE_SHADER_FRAGMENT],
+                     (void**)svga->curr.sampler[PIPE_SHADER_FRAGMENT]);
    util_blitter_save_fragment_sampler_views(svga->blitter,
-                     svga->curr.num_sampler_views,
-                     svga->curr.sampler_views);
+                     svga->curr.num_sampler_views[PIPE_SHADER_FRAGMENT],
+                     svga->curr.sampler_views[PIPE_SHADER_FRAGMENT]);
    /*util_blitter_save_render_condition(svga->blitter, svga->render_cond_query,
                                       svga->render_cond_cond, svga->render_cond_mode);*/
    util_blitter_blit(svga->blitter, &info);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c
index c4edced9b..c874726b6 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -34,6 +34,78 @@
 #include "svga_surface.h"
 
 
+/**
+ * Clear the whole color buffer(s) by drawing a quad.  For VGPU10 we use
+ * this when clearing integer render targets.  We'll also clear the
+ * depth and/or stencil buffers if the clear_buffers mask specifies them.
+ */
+static void
+clear_buffers_with_quad(struct svga_context *svga,
+                        unsigned clear_buffers,
+                        const union pipe_color_union *color,
+                        double depth, unsigned stencil)
+{
+   const struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+
+   util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
+   util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
+   util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
+   util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);
+   util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+                     (struct pipe_stream_output_target**)svga->so_targets);
+   util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
+   util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
+   util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
+   util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs);
+   util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend);
+   util_blitter_save_depth_stencil_alpha(svga->blitter,
+                                         (void*)svga->curr.depth);
+   util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
+   util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
+
+   util_blitter_clear(svga->blitter,
+                      fb->width, fb->height,
+                      1, /* num_layers */
+                      clear_buffers, color,
+                      depth, stencil);
+}
+
+
+/**
+ * Check if any of the color buffers are integer buffers.
+ */
+static boolean
+is_integer_target(struct pipe_framebuffer_state *fb, unsigned buffers)
+{
+   unsigned i;
+
+   for (i = 0; i < fb->nr_cbufs; i++) {
+      if ((buffers & (PIPE_CLEAR_COLOR0 << i)) &&
+          fb->cbufs[i] &&
+          util_format_is_pure_integer(fb->cbufs[i]->format)) {
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
+/**
+ * Check if the integer values in the clear color can be represented
+ * by floats.  If so, we can use the VGPU10 ClearRenderTargetView command.
+ * Otherwise, we need to clear with a quad.
+ */
+static boolean
+ints_fit_in_floats(const union pipe_color_union *color)
+{
+   const int max = 1 << 24;
+   return (color->i[0] <= max &&
+           color->i[1] <= max &&
+           color->i[2] <= max &&
+           color->i[3] <= max);
+}
+
+
 static enum pipe_error
 try_clear(struct svga_context *svga, 
           unsigned buffers,
@@ -52,7 +124,7 @@ try_clear(struct svga_context *svga,
    if (ret != PIPE_OK)
       return ret;
 
-   if (svga->rebind.rendertargets) {
+   if (svga->rebind.flags.rendertargets) {
       ret = svga_reemit_framebuffer_bindings(svga);
       if (ret != PIPE_OK) {
          return ret;
@@ -71,29 +143,72 @@ try_clear(struct svga_context *svga,
       if (buffers & PIPE_CLEAR_DEPTH)
          flags |= SVGA3D_CLEAR_DEPTH;
 
-      if ((svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
-          (buffers & PIPE_CLEAR_STENCIL))
+      if (buffers & PIPE_CLEAR_STENCIL)
          flags |= SVGA3D_CLEAR_STENCIL;
 
       rect.w = MAX2(rect.w, fb->zsbuf->width);
       rect.h = MAX2(rect.h, fb->zsbuf->height);
    }
 
-   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+   if (!svga_have_vgpu10(svga) &&
+       !svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
       restore_viewport = TRUE;
       ret = SVGA3D_SetViewport(svga->swc, &rect);
       if (ret != PIPE_OK)
          return ret;
    }
 
-   ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
-                          rect.x, rect.y, rect.w, rect.h);
-   if (ret != PIPE_OK)
-      return ret;
+   if (svga_have_vgpu10(svga)) {
+      if (flags & SVGA3D_CLEAR_COLOR) {
+         unsigned i;
+
+         if (is_integer_target(fb, buffers) && !ints_fit_in_floats(color)) {
+            clear_buffers_with_quad(svga, buffers, color, depth, stencil);
+            /* We also cleared depth/stencil, so that's done */
+            flags &= ~(SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL);
+         }
+         else {
+            struct pipe_surface *rtv;
+
+            /* Issue VGPU10 Clear commands */
+            for (i = 0; i < fb->nr_cbufs; i++) {
+               if ((fb->cbufs[i] == NULL) ||
+                   !(buffers & (PIPE_CLEAR_COLOR0 << i)))
+                  continue;
+
+               rtv = svga_validate_surface_view(svga,
+                                                svga_surface(fb->cbufs[i]));
+               if (!rtv)
+                  return PIPE_ERROR_OUT_OF_MEMORY;
+
+               ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc,
+                                                         rtv, color->f);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
+         }
+      }
+      if (flags & (SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL)) {
+         struct pipe_surface *dsv =
+            svga_validate_surface_view(svga, svga_surface(fb->zsbuf));
+         if (!dsv)
+            return PIPE_ERROR_OUT_OF_MEMORY;
+
+         ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, flags,
+                                                   stencil, (float) depth);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+   else {
+      ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
+                             rect.x, rect.y, rect.w, rect.h);
+      if (ret != PIPE_OK)
+         return ret;
+   }
 
    if (restore_viewport) {
-      memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect);
-      ret = SVGA3D_SetViewport(svga->swc, &rect);
+      ret = SVGA3D_SetViewport(svga->swc, &svga->state.hw_clear.viewport);
    }
    
    return ret;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c
index c32b66d41..8150879ea 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -48,28 +48,46 @@ static void svga_set_constant_buffer(struct pipe_context *pipe,
                                      uint shader, uint index,
                                      struct pipe_constant_buffer *cb)
 {
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
    struct svga_context *svga = svga_context(pipe);
    struct pipe_resource *buf = cb ? cb->buffer : NULL;
-
-   if (cb && cb->user_buffer) {
-      buf = svga_user_buffer_create(pipe->screen,
-                                    (void *) cb->user_buffer,
-                                    cb->buffer_size,
-                                    PIPE_BIND_CONSTANT_BUFFER);
+   unsigned buffer_size = 0;
+
+   if (cb) {
+      buffer_size = cb->buffer_size;
+      if (cb->user_buffer) {
+         buf = svga_user_buffer_create(pipe->screen,
+                                       (void *) cb->user_buffer,
+                                       cb->buffer_size,
+                                       PIPE_BIND_CONSTANT_BUFFER);
+      }
    }
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(index == 0);
+   assert(index < Elements(svga->curr.constbufs[shader]));
+   assert(index < svgascreen->max_const_buffers);
+   (void) svgascreen;
+
+   pipe_resource_reference(&svga->curr.constbufs[shader][index].buffer, buf);
+
+   /* Make sure the constant buffer size to be updated is within the
+    * limit supported by the device.
+    */
+   svga->curr.constbufs[shader][index].buffer_size =
+      MIN2(buffer_size, SVGA_MAX_CONST_BUF_SIZE);
 
-   pipe_resource_reference(&svga->curr.cbufs[shader].buffer, buf);
-   svga->curr.cbufs[shader].buffer_size = cb ? cb->buffer_size : 0;
-   svga->curr.cbufs[shader].buffer_offset = cb ? cb->buffer_offset : 0;
-   svga->curr.cbufs[shader].user_buffer = NULL; /* not used */
+   svga->curr.constbufs[shader][index].buffer_offset = cb ? cb->buffer_offset : 0;
+   svga->curr.constbufs[shader][index].user_buffer = NULL; /* not used */
 
    if (shader == PIPE_SHADER_FRAGMENT)
       svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
-   else
+   else if (shader == PIPE_SHADER_VERTEX)
       svga->dirty |= SVGA_NEW_VS_CONST_BUFFER;
+   else
+      svga->dirty |= SVGA_NEW_GS_CONST_BUFFER;
+
+   /* update bitmask of dirty const buffers */
+   svga->state.dirty_constbufs[shader] |= (1 << index);
 
    if (cb && cb->user_buffer) {
       pipe_resource_reference(&buf, NULL);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_depthstencil.c
index 8db21fd74..d84ed1df4 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -23,13 +23,15 @@
  *
  **********************************************************/
 
-#include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
 #include "svga_context.h"
 #include "svga_hw_reg.h"
+#include "svga_cmd.h"
 
 
 static inline unsigned
@@ -69,10 +71,67 @@ svga_translate_stencil_op(unsigned op)
 }
 
 
+/**
+ * Define a vgpu10 depth/stencil state object for the given
+ * svga depth/stencil state.
+ */
+static void
+define_depth_stencil_state_object(struct svga_context *svga,
+                                  struct svga_depth_stencil_state *ds)
+{
+   unsigned try;
+
+   assert(svga_have_vgpu10(svga));
+
+   ds->id = util_bitmask_add(svga->ds_object_id_bm);
+
+   /* spot check that these comparision tokens are the same */
+   assert(SVGA3D_COMPARISON_NEVER == SVGA3D_CMP_NEVER);
+   assert(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS);
+   assert(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL);
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret;
+
+      /* Note: we use the ds->stencil[0].enabled value for both the front
+       * and back-face enables.  If single-side stencil is used, we'll have
+       * set the back state the same as the front state.
+       */
+      ret = SVGA3D_vgpu10_DefineDepthStencilState(svga->swc,
+                                                  ds->id,
+                                                  /* depth/Z */
+                                                  ds->zenable,
+                                                  ds->zwriteenable,
+                                                  ds->zfunc,
+                                                  /* Stencil */
+                                                  ds->stencil[0].enabled, /*f|b*/
+                                                  ds->stencil[0].enabled, /*f*/
+                                                  ds->stencil[0].enabled, /*b*/
+                                                  ds->stencil_mask,
+                                                  ds->stencil_writemask,
+                                                  /* front stencil */
+                                                  ds->stencil[0].fail,
+                                                  ds->stencil[0].zfail,
+                                                  ds->stencil[0].pass,
+                                                  ds->stencil[0].func,
+                                                  /* back stencil */
+                                                  ds->stencil[1].fail,
+                                                  ds->stencil[1].zfail,
+                                                  ds->stencil[1].pass,
+                                                  ds->stencil[1].func);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_depth_stencil_state(struct pipe_context *pipe,
 				const struct pipe_depth_stencil_alpha_state *templ)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );
 
    /* Don't try to figure out CW/CCW correspondence with
@@ -92,10 +151,18 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
       ds->stencil_mask      = templ->stencil[0].valuemask & 0xff;
       ds->stencil_writemask = templ->stencil[0].writemask & 0xff;
    }
+   else {
+      ds->stencil[0].func = SVGA3D_CMP_ALWAYS;
+      ds->stencil[0].fail = SVGA3D_STENCILOP_KEEP;
+      ds->stencil[0].zfail = SVGA3D_STENCILOP_KEEP;
+      ds->stencil[0].pass = SVGA3D_STENCILOP_KEEP;
+   }
 
 
    ds->stencil[1].enabled = templ->stencil[1].enabled;
    if (templ->stencil[1].enabled) {
+      assert(templ->stencil[0].enabled);
+      /* two-sided stencil */
       ds->stencil[1].func   = svga_translate_compare_func(templ->stencil[1].func);
       ds->stencil[1].fail   = svga_translate_stencil_op(templ->stencil[1].fail_op);
       ds->stencil[1].zfail  = svga_translate_stencil_op(templ->stencil[1].zfail_op);
@@ -104,6 +171,13 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
       ds->stencil_mask      = templ->stencil[1].valuemask & 0xff;
       ds->stencil_writemask = templ->stencil[1].writemask & 0xff;
    }
+   else {
+      /* back face state is same as front-face state */
+      ds->stencil[1].func = ds->stencil[0].func;
+      ds->stencil[1].fail = ds->stencil[0].fail;
+      ds->stencil[1].zfail = ds->stencil[0].zfail;
+      ds->stencil[1].pass = ds->stencil[0].pass;
+   }
 
 
    ds->zenable = templ->depth.enabled;
@@ -111,12 +185,24 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
       ds->zfunc = svga_translate_compare_func(templ->depth.func);
       ds->zwriteenable = templ->depth.writemask;
    }
+   else {
+      ds->zfunc = SVGA3D_CMP_ALWAYS;
+   }
 
    ds->alphatestenable = templ->alpha.enabled;
    if (ds->alphatestenable) {
       ds->alphafunc = svga_translate_compare_func(templ->alpha.func);
       ds->alpharef = templ->alpha.ref_value;
    }
+   else {
+      ds->alphafunc = SVGA3D_CMP_ALWAYS;
+   }
+
+   if (svga_have_vgpu10(svga)) {
+      define_depth_stencil_state_object(svga, ds);
+   }
+
+   svga->hud.num_state_objects++;
 
    return ds;
 }
@@ -126,14 +212,45 @@ static void svga_bind_depth_stencil_state(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
 
+   if (svga_have_vgpu10(svga)) {
+      /* flush any previously queued drawing before changing state */
+      svga_hwtnl_flush_retry(svga);
+   }
+
    svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil;
-   svga->dirty |= SVGA_NEW_DEPTH_STENCIL;
+   svga->dirty |= SVGA_NEW_DEPTH_STENCIL_ALPHA;
 }
 
 static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
                                             void *depth_stencil)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_depth_stencil_state *ds =
+      (struct svga_depth_stencil_state *) depth_stencil;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      assert(ds->id != SVGA3D_INVALID_ID);
+
+      ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (ds->id == svga->state.hw_draw.depth_stencil_id)
+         svga->state.hw_draw.depth_stencil_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->ds_object_id_bm, ds->id);
+      ds->id = SVGA3D_INVALID_ID;
+   }
+
    FREE(depth_stencil);
+   svga->hud.num_state_objects--;
 }
 
 
@@ -142,6 +259,11 @@ static void svga_set_stencil_ref( struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
 
+   if (svga_have_vgpu10(svga)) {
+      /* flush any previously queued drawing before changing state */
+      svga_hwtnl_flush_retry(svga);
+   }
+
    svga->curr.stencil_ref = *stencil_ref;
 
    svga->dirty |= SVGA_NEW_STENCIL_REF;
@@ -151,6 +273,11 @@ static void
 svga_set_sample_mask(struct pipe_context *pipe,
                      unsigned sample_mask)
 {
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.sample_mask = sample_mask;
+
+   svga->dirty |= SVGA_NEW_BLEND; /* See emit_rss_vgpu10() */
 }
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c
index 87f6b3d71..50ebb53df 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -27,7 +27,9 @@
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_prim.h"
+#include "util/u_prim_restart.h"
 #include "util/u_time.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_indices.h"
 
 #include "svga_hw_reg.h"
@@ -35,12 +37,12 @@
 #include "svga_context.h"
 #include "svga_screen.h"
 #include "svga_draw.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_swtnl.h"
 #include "svga_debug.h"
 #include "svga_resource_buffer.h"
 
-
 static enum pipe_error
 retry_draw_range_elements( struct svga_context *svga,
                            struct pipe_resource *index_buffer,
@@ -51,26 +53,31 @@ retry_draw_range_elements( struct svga_context *svga,
                            unsigned prim,
                            unsigned start,
                            unsigned count,
+                           unsigned start_instance,
                            unsigned instance_count,
                            boolean do_retry )
 {
    enum pipe_error ret = PIPE_OK;
 
-   svga_hwtnl_set_unfilled( svga->hwtnl,
-                            svga->curr.rast->hw_unfilled );
-
-   svga_hwtnl_set_flatshade( svga->hwtnl,
-                             svga->curr.rast->templ.flatshade,
-                             svga->curr.rast->templ.flatshade_first );
+   svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
 
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret != PIPE_OK)
       goto retry;
 
+   /** determine if flatshade is to be used after svga_update_state()
+    *  in case the fragment shader is changed.
+    */
+   svga_hwtnl_set_flatshade(svga->hwtnl,
+                            svga->curr.rast->templ.flatshade ||
+                            svga->state.hw_draw.fs->uses_flat_interp,
+                            svga->curr.rast->templ.flatshade_first);
+
    ret = svga_hwtnl_draw_range_elements( svga->hwtnl,
                                          index_buffer, index_size, index_bias,
                                          min_index, max_index,
-                                         prim, start, count );
+                                         prim, start, count,
+                                         start_instance, instance_count);
    if (ret != PIPE_OK)
       goto retry;
 
@@ -85,7 +92,7 @@ retry:
                                         index_buffer, index_size, index_bias,
                                         min_index, max_index,
                                         prim, start, count,
-                                        instance_count, FALSE );
+                                        start_instance, instance_count, FALSE );
    }
 
    return ret;
@@ -94,27 +101,28 @@ retry:
 
 static enum pipe_error
 retry_draw_arrays( struct svga_context *svga,
-                   unsigned prim,
-                   unsigned start,
-                   unsigned count,
-                   unsigned instance_count,
+                   unsigned prim, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count,
                    boolean do_retry )
 {
    enum pipe_error ret;
 
-   svga_hwtnl_set_unfilled( svga->hwtnl,
-                            svga->curr.rast->hw_unfilled );
-
-   svga_hwtnl_set_flatshade( svga->hwtnl,
-                             svga->curr.rast->templ.flatshade,
-                             svga->curr.rast->templ.flatshade_first );
+   svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
 
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret != PIPE_OK)
       goto retry;
 
-   ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim,
-                                 start, count );
+   /** determine if flatshade is to be used after svga_update_state()
+    *  in case the fragment shader is changed.
+    */
+   svga_hwtnl_set_flatshade(svga->hwtnl,
+                            svga->curr.rast->templ.flatshade ||
+                            svga->state.hw_draw.fs->uses_flat_interp,
+                            svga->curr.rast->templ.flatshade_first);
+
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count,
+                                start_instance, instance_count);
    if (ret != PIPE_OK)
       goto retry;
 
@@ -125,18 +133,41 @@ retry:
    {
       svga_context_flush( svga, NULL );
 
-      return retry_draw_arrays( svga,
-                                prim,
-                                start,
-                                count,
-                                instance_count,
-                                FALSE );
+      return retry_draw_arrays(svga, prim, start, count,
+                               start_instance, instance_count,
+                               FALSE );
    }
 
    return ret;
 }
 
 
+/**
+ * Determine if we need to implement primitive restart with a fallback
+ * path which breaks the original primitive into sub-primitive at the
+ * restart indexes.
+ */
+static boolean
+need_fallback_prim_restart(const struct svga_context *svga,
+                           const struct pipe_draw_info *info)
+{
+   if (info->primitive_restart && info->indexed) {
+      if (!svga_have_vgpu10(svga))
+         return TRUE;
+      else if (!svga->state.sw.need_swtnl) {
+         if (svga->curr.ib.index_size == 1)
+            return TRUE; /* no device support for 1-byte indexes */
+         else if (svga->curr.ib.index_size == 2)
+            return info->restart_index != 0xffff;
+         else
+            return info->restart_index != 0xffffffff;
+      }
+   }
+
+   return FALSE;
+}
+
+
 static void
 svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
@@ -146,9 +177,10 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    enum pipe_error ret = 0;
    boolean needed_swtnl;
 
-   svga->num_draw_calls++;  /* for SVGA_QUERY_DRAW_CALLS */
+   svga->hud.num_draw_calls++;  /* for SVGA_QUERY_NUM_DRAW_CALLS */
 
-   if (!u_trim_pipe_prim( info->mode, &count ))
+   if (u_reduced_prim(info->mode) == PIPE_PRIM_TRIANGLES &&
+       svga->curr.rast->templ.cull_face == PIPE_FACE_FRONT_AND_BACK)
       return;
 
    /*
@@ -165,6 +197,17 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
    }
 
+   if (need_fallback_prim_restart(svga, info)) {
+      enum pipe_error r;
+      r = util_draw_vbo_without_prim_restart(pipe, &svga->curr.ib, info);
+      assert(r == PIPE_OK);
+      (void) r;
+      return;
+   }
+
+   if (!u_trim_pipe_prim( info->mode, &count ))
+      return;
+
    needed_swtnl = svga->state.sw.need_swtnl;
 
    svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
@@ -176,7 +219,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 #endif
 
    if (svga->state.sw.need_swtnl) {
-      svga->num_fallbacks++;  /* for SVGA_QUERY_FALLBACKS */
+      svga->hud.num_fallbacks++;  /* for SVGA_QUERY_NUM_FALLBACKS */
       if (!needed_swtnl) {
          /*
           * We're switching from HW to SW TNL.  SW TNL will require mapping all
@@ -208,17 +251,15 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                           info->max_index,
                                           info->mode,
                                           info->start + offset,
-                                          info->count,
+                                          count,
+                                          info->start_instance,
                                           info->instance_count,
                                           TRUE );
       }
       else {
-         ret = retry_draw_arrays( svga,
-                                  info->mode,
-                                  info->start,
-                                  info->count,
-                                  info->instance_count,
-                                  TRUE );
+         ret = retry_draw_arrays(svga, info->mode, info->start, count,
+                                 info->start_instance, info->instance_count,
+                                 TRUE);
       }
    }
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c
index d593c7816..8e0af12d2 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -24,6 +24,7 @@
  **********************************************************/
 
 #include "pipe/p_defines.h"
+#include "util/u_debug_image.h"
 #include "util/u_string.h"
 #include "svga_screen.h"
 #include "svga_surface.h"
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_fs.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_fs.c
index 75299c50d..4a9b3c96a 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -31,7 +31,6 @@
 #include "draw/draw_context.h"
 
 #include "svga_context.h"
-#include "svga_tgsi.h"
 #include "svga_hw_reg.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
@@ -63,12 +62,6 @@ svga_create_fs_state(struct pipe_context *pipe,
 
    fs->draw_shader = draw_create_fragment_shader(svga->swtnl.draw, templ);
 
-   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
-      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
-                   __FUNCTION__, fs->base.id,
-                   fs->base.info.num_inputs, fs->base.info.num_outputs);
-   }
-
    return fs;
 }
 
@@ -94,20 +87,30 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader)
 
    svga_hwtnl_flush_retry(svga);
 
+   assert(fs->base.parent == NULL);
+
    draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader);
 
    for (variant = fs->base.variants; variant; variant = tmp) {
       tmp = variant->next;
 
-      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
-      (void) ret;  /* PIPE_ERROR_ not handled yet */
-
-      /*
-       * Remove stale references to this variant to ensure a new variant on the
-       * same address will be detected as a change.
-       */
-      if (variant == svga->state.hw_draw.fs)
+      /* Check if deleting currently bound shader */
+      if (variant == svga->state.hw_draw.fs) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+            assert(ret == PIPE_OK);
+         }
          svga->state.hw_draw.fs = NULL;
+      }
+
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+         assert(ret == PIPE_OK);
+      }
    }
 
    FREE((void *)fs->base.tokens);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_gs.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_gs.c
new file mode 100644
index 000000000..d614e9d6c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_gs.c
@@ -0,0 +1,142 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+#include "svga_shader.h"
+#include "svga_streamout.h"
+
+static void *
+svga_create_gs_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *templ)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_geometry_shader *gs = CALLOC_STRUCT(svga_geometry_shader);
+
+   if (!gs)
+      return NULL;
+
+   gs->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+   /* Collect basic info that we'll need later:
+    */
+   tgsi_scan_shader(gs->base.tokens, &gs->base.info);
+
+   gs->draw_shader = draw_create_geometry_shader(svga->swtnl.draw, templ);
+
+   gs->base.id = svga->debug.shader_id++;
+
+   gs->generic_outputs = svga_get_generic_outputs_mask(&gs->base.info);
+
+   /* check for any stream output declarations */
+   if (templ->stream_output.num_outputs) {
+      gs->base.stream_output = svga_create_stream_output(svga, &gs->base,
+                                                         &templ->stream_output);
+   }
+
+   return gs;
+}
+
+
+static void
+svga_bind_gs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.user_gs = gs;
+   svga->dirty |= SVGA_NEW_GS;
+}
+
+
+static void
+svga_delete_gs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
+   struct svga_geometry_shader *next_gs;
+   struct svga_shader_variant *variant, *tmp;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry(svga);
+
+   /* Start deletion from the original geometry shader state */
+   if (gs->base.parent != NULL)
+      gs = (struct svga_geometry_shader *)gs->base.parent;
+
+   /* Free the list of geometry shaders */
+   while (gs) {
+      next_gs = (struct svga_geometry_shader *)gs->base.next;
+
+      if (gs->base.stream_output != NULL)
+         svga_delete_stream_output(svga, gs->base.stream_output);
+
+      draw_delete_geometry_shader(svga->swtnl.draw, gs->draw_shader);
+
+      for (variant = gs->base.variants; variant; variant = tmp) {
+         tmp = variant->next;
+
+         /* Check if deleting currently bound shader */
+         if (variant == svga->state.hw_draw.gs) {
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+            if (ret != PIPE_OK) {
+               svga_context_flush(svga, NULL);
+               ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+               assert(ret == PIPE_OK);
+            }
+            svga->state.hw_draw.gs = NULL;
+         }
+
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS,
+                                              variant);
+            assert(ret == PIPE_OK);
+         }
+      }
+
+      FREE((void *)gs->base.tokens);
+      FREE(gs);
+      gs = next_gs;
+   }
+}
+
+
+void
+svga_init_gs_functions(struct svga_context *svga)
+{
+   svga->pipe.create_gs_state = svga_create_gs_state;
+   svga->pipe.bind_gs_state = svga_bind_gs_state;
+   svga->pipe.delete_gs_state = svga_delete_gs_state;
+}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c
index 1df32a13b..af9356d7c 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -27,6 +27,7 @@
 
 #include "util/u_framebuffer.h"
 #include "util/u_inlines.h"
+#include "util/u_pstipple.h"
 
 #include "svga_context.h"
 #include "svga_screen.h"
@@ -46,10 +47,37 @@ static void svga_set_scissor_states( struct pipe_context *pipe,
 }
 
 
-static void svga_set_polygon_stipple( struct pipe_context *pipe,
-                                      const struct pipe_poly_stipple *stipple )
+static void
+svga_set_polygon_stipple(struct pipe_context *pipe,
+                         const struct pipe_poly_stipple *stipple)
 {
-   /* overridden by the draw module */
+   struct svga_context *svga = svga_context(pipe);
+
+   /* release old texture */
+   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+   /* release old sampler view */
+   if (svga->polygon_stipple.sampler_view) {
+      pipe->sampler_view_destroy(pipe,
+                                 &svga->polygon_stipple.sampler_view->base);
+   }
+
+   /* create new stipple texture */
+   svga->polygon_stipple.texture =
+      util_pstipple_create_stipple_texture(pipe, stipple->stipple);
+
+   /* create new sampler view */
+   svga->polygon_stipple.sampler_view =
+      (struct svga_pipe_sampler_view *)
+      util_pstipple_create_sampler_view(pipe,
+                                        svga->polygon_stipple.texture);
+
+   /* allocate sampler state, if first time */
+   if (!svga->polygon_stipple.sampler) {
+      svga->polygon_stipple.sampler = util_pstipple_create_sampler(pipe);
+   }
+
+   svga->dirty |= SVGA_NEW_STIPPLE;
 }
 
 
@@ -83,6 +111,11 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
    boolean propagate = FALSE;
    unsigned i;
 
+   /* make sure any pending drawing calls are flushed before changing
+    * the framebuffer state
+    */
+   svga_hwtnl_flush_retry(svga);
+
    dst->width = fb->width;
    dst->height = fb->height;
    dst->nr_cbufs = fb->nr_cbufs;
@@ -99,9 +132,6 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
    }
 
    if (propagate) {
-      /* make sure that drawing calls comes before propagation calls */
-      svga_hwtnl_flush_retry( svga );
-   
       for (i = 0; i < dst->nr_cbufs; i++) {
          struct pipe_surface *s = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
          if (dst->cbufs[i] && dst->cbufs[i] != s)
@@ -109,13 +139,30 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
       }
    }
 
-   /* XXX: Actually the virtual hardware may support rendertargets with
-    * different size, depending on the host API and driver, but since we cannot
-    * know that make no such assumption here. */
-   for(i = 0; i < fb->nr_cbufs; ++i) {
-      if (fb->zsbuf && fb->cbufs[i]) {
-         assert(fb->zsbuf->width == fb->cbufs[i]->width); 
-         assert(fb->zsbuf->height == fb->cbufs[i]->height); 
+   /* Check that all surfaces are the same size.
+    * Actually, the virtual hardware may support rendertargets with
+    * different size, depending on the host API and driver,
+    */
+   {
+      int width = 0, height = 0;
+      if (fb->zsbuf) {
+         width = fb->zsbuf->width;
+         height = fb->zsbuf->height;
+      }
+      for (i = 0; i < fb->nr_cbufs; ++i) {
+         if (fb->cbufs[i]) {
+            if (width && height) {
+               if (fb->cbufs[i]->width != width ||
+                   fb->cbufs[i]->height != height) {
+                  debug_warning("Mixed-size color and depth/stencil surfaces "
+                                "may not work properly");
+               }
+            }
+            else {
+               width = fb->cbufs[i]->width;
+               height = fb->cbufs[i]->height;
+            }
+         }
       }
    }
 
@@ -197,6 +244,22 @@ static void svga_set_viewport_states( struct pipe_context *pipe,
 }
 
 
+/**
+ * Called by state tracker to specify a callback function the driver
+ * can use to report info back to the state tracker.
+ */
+static void
+svga_set_debug_callback(struct pipe_context *pipe,
+                        const struct pipe_debug_callback *cb)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   if (cb)
+      svga->debug.callback = *cb;
+   else
+      memset(&svga->debug.callback, 0, sizeof(svga->debug.callback));
+}
+
 
 void svga_init_misc_functions( struct svga_context *svga )
 {
@@ -205,6 +268,7 @@ void svga_init_misc_functions( struct svga_context *svga )
    svga->pipe.set_framebuffer_state = svga_set_framebuffer_state;
    svga->pipe.set_clip_state = svga_set_clip_state;
    svga->pipe.set_viewport_states = svga_set_viewport_states;
+   svga->pipe.set_debug_callback = svga_set_debug_callback;
 }
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_query.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_query.c
index 208a2cd14..255494a5d 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_query.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ * Copyright 2008-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -25,6 +25,8 @@
 
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
+
+#include "util/u_bitmask.h"
 #include "util/u_memory.h"
 
 #include "svga_cmd.h"
@@ -42,16 +44,26 @@ struct pipe_query {
    int dummy;
 };
 
-
 struct svga_query {
    struct pipe_query base;
    unsigned type;                  /**< PIPE_QUERY_x or SVGA_QUERY_x */
    SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */
 
+   unsigned id;                    /** Per-context query identifier */
+
+   struct pipe_fence_handle *fence;
+
    /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */
+
+   /* For VGPU9 */
    struct svga_winsys_buffer *hwbuf;
    volatile SVGA3dQueryResult *queryResult;
-   struct pipe_fence_handle *fence;
+
+   /** For VGPU10 */
+   struct svga_winsys_gb_query *gb_query;
+   SVGA3dDXQueryFlags flags;
+   unsigned offset;                /**< offset to the gb_query memory */
+   struct pipe_query *predicate;   /** The associated query that can be used for predicate */
 
    /** For non-GPU SVGA_QUERY_x queries */
    uint64_t begin_count, end_count;
@@ -72,54 +84,655 @@ svga_get_query_result(struct pipe_context *pipe,
                       boolean wait,
                       union pipe_query_result *result);
 
+static enum pipe_error
+define_query_vgpu9(struct svga_context *svga,
+                   struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+
+   sq->hwbuf = svga_winsys_buffer_create(svga, 1,
+                                         SVGA_BUFFER_USAGE_PINNED,
+                                         sizeof *sq->queryResult);
+   if (!sq->hwbuf)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   sq->queryResult = (SVGA3dQueryResult *)
+                     sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
+   if (!sq->queryResult) {
+      sws->buffer_destroy(sws, sq->hwbuf);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   sq->queryResult->totalSize = sizeof *sq->queryResult;
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+
+   /* We request the buffer to be pinned and assume it is always mapped.
+    * The reason is that we don't want to wait for fences when checking the
+    * query status.
+    */
+   sws->buffer_unmap(sws, sq->hwbuf);
+
+   return PIPE_OK;
+}
+
+static enum pipe_error
+begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+
+   if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
+      /* The application doesn't care for the pending query result.
+       * We cannot let go of the existing buffer and just get a new one
+       * because its storage may be reused for other purposes and clobbered
+       * by the host when it determines the query result.  So the only
+       * option here is to wait for the existing query's result -- not a
+       * big deal, given that no sane application would do this.
+       */
+       uint64_t result;
+       svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
+       assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
+   }
+
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+   }
+   return ret;
+}
+
+static enum pipe_error
+end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   /* Set to PENDING before sending EndQuery. */
+   sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
+
+   ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+   }
+   return ret;
+}
+
+static boolean
+get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
+                       boolean wait, uint64_t *result)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+   SVGA3dQueryState state;
+
+   if (!sq->fence) {
+      /* The query status won't be updated by the host unless
+       * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
+       * a synchronous wait on the host.
+       */
+      ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+      }
+      assert (ret == PIPE_OK);
+      svga_context_flush(svga, &sq->fence);
+      assert(sq->fence);
+   }
+
+   state = sq->queryResult->state;
+   if (state == SVGA3D_QUERYSTATE_PENDING) {
+      if (!wait)
+         return FALSE;
+      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      state = sq->queryResult->state;
+   }
+
+   assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
+          state == SVGA3D_QUERYSTATE_FAILED);
+
+   *result = (uint64_t)sq->queryResult->result32;
+   return TRUE;
+}
+
+
+/**
+ * VGPU10
+ *
+ * There is one query mob allocated for each context to be shared by all
+ * query types. The mob is used to hold queries's state and result. Since
+ * each query result type is of different length, to ease the query allocation
+ * management, the mob is divided into memory blocks. Each memory block
+ * will hold queries of the same type. Multiple memory blocks can be allocated
+ * for a particular query type.
+ *
+ * Currently each memory block is of 184 bytes. We support up to 128
+ * memory blocks. The query memory size is arbitrary right now.
+ * Each occlusion query takes about 8 bytes. One memory block can accomodate
+ * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
+ * queries. That seems reasonable for now. If we think this limit is
+ * not enough, we can increase the limit or try to grow the mob in runtime.
+ * Note, SVGA device does not impose one mob per context for queries,
+ * we could allocate multiple mobs for queries; however, wddm KMD does not
+ * currently support that.
+ *
+ * Also note that the GL guest driver does not issue any of the
+ * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
+ */
+#define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
+#define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
+
+struct svga_qmem_alloc_entry
+{
+   unsigned start_offset;               /* start offset of the memory block */
+   unsigned block_index;                /* block index of the memory block */
+   unsigned query_size;                 /* query size in this memory block */
+   unsigned nquery;                     /* number of queries allocated */
+   struct util_bitmask *alloc_mask;     /* allocation mask */
+   struct svga_qmem_alloc_entry *next;  /* next memory block */
+};
+
+
+/**
+ * Allocate a memory block from the query object memory
+ * \return -1 if out of memory, else index of the query memory block
+ */
+static int
+allocate_query_block(struct svga_context *svga)
+{
+   int index;
+   unsigned offset;
+
+   /* Find the next available query block */
+   index = util_bitmask_add(svga->gb_query_alloc_mask);
+
+   if (index == UTIL_BITMASK_INVALID_INDEX)
+      return -1;
+
+   offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   if (offset >= svga->gb_query_len) {
+      unsigned i;
+
+      /**
+       * All the memory blocks are allocated, lets see if there is
+       * any empty memory block around that can be freed up.
+       */
+      index = -1;
+      for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
+         struct svga_qmem_alloc_entry *alloc_entry;
+         struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
+
+         alloc_entry = svga->gb_query_map[i];
+         while (alloc_entry && index == -1) {
+            if (alloc_entry->nquery == 0) {
+               /* This memory block is empty, it can be recycled. */
+               if (prev_alloc_entry) {
+                  prev_alloc_entry->next = alloc_entry->next;
+               } else {
+                  svga->gb_query_map[i] = alloc_entry->next;
+               }
+               index = alloc_entry->block_index;
+            } else {
+               prev_alloc_entry = alloc_entry;
+               alloc_entry = alloc_entry->next;
+            }
+         }
+      }
+   }
+
+   return index;
+}
+
+/**
+ * Allocate a slot in the specified memory block.
+ * All slots in this memory block are of the same size.
+ *
+ * \return -1 if out of memory, else index of the query slot
+ */
+static int
+allocate_query_slot(struct svga_context *svga,
+                    struct svga_qmem_alloc_entry *alloc)
+{
+   int index;
+   unsigned offset;
+
+   /* Find the next available slot */
+   index = util_bitmask_add(alloc->alloc_mask);
+
+   if (index == UTIL_BITMASK_INVALID_INDEX)
+      return -1;
+
+   offset = index * alloc->query_size;
+   if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
+      return -1;
+
+   alloc->nquery++;
+
+   return index;
+}
+
+/**
+ * Deallocate the specified slot in the memory block.
+ * If all slots are freed up, then deallocate the memory block
+ * as well, so it can be allocated for other query type
+ */
+static void
+deallocate_query_slot(struct svga_context *svga,
+                      struct svga_qmem_alloc_entry *alloc,
+                      unsigned index)
+{
+   assert(index != UTIL_BITMASK_INVALID_INDEX);
+
+   util_bitmask_clear(alloc->alloc_mask, index);
+   alloc->nquery--;
+
+   /**
+    * Don't worry about deallocating the empty memory block here.
+    * The empty memory block will be recycled when no more memory block
+    * can be allocated.
+    */
+}
+
+static struct svga_qmem_alloc_entry *
+allocate_query_block_entry(struct svga_context *svga,
+                           unsigned len)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   int block_index = -1;
+
+   block_index = allocate_query_block(svga);
+   if (block_index == -1)
+      return NULL;
+   alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+   if (!alloc_entry)
+      return NULL;
+
+   alloc_entry->block_index = block_index;
+   alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   alloc_entry->nquery = 0;
+   alloc_entry->alloc_mask = util_bitmask_create();
+   alloc_entry->next = NULL;
+   alloc_entry->query_size = len;
+
+   return alloc_entry;
+}
+
+/**
+ * Allocate a memory slot for a query of the specified type.
+ * It will first search through the memory blocks that are allocated
+ * for the query type. If no memory slot is available, it will try
+ * to allocate another memory block within the query object memory for
+ * this query type.
+ */
+static int
+allocate_query(struct svga_context *svga,
+               SVGA3dQueryType type,
+               unsigned len)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   int slot_index = -1;
+   unsigned offset;
+
+   assert(type < SVGA_QUERY_MAX);
+
+   alloc_entry = svga->gb_query_map[type];
+
+   if (!alloc_entry) {
+      /**
+       * No query memory block has been allocated for this query type,
+       * allocate one now
+       */
+      alloc_entry = allocate_query_block_entry(svga, len);
+      if (!alloc_entry)
+         return -1;
+      svga->gb_query_map[type] = alloc_entry;
+   }
+
+   /* Allocate a slot within the memory block allocated for this query type */
+   slot_index = allocate_query_slot(svga, alloc_entry);
+
+   if (slot_index == -1) {
+      /* This query memory block is full, allocate another one */
+      alloc_entry = allocate_query_block_entry(svga, len);
+      if (!alloc_entry)
+         return -1;
+      alloc_entry->next = svga->gb_query_map[type];
+      svga->gb_query_map[type] = alloc_entry;
+      slot_index = allocate_query_slot(svga, alloc_entry);
+   }
+
+   assert(slot_index != -1);
+   offset = slot_index * len + alloc_entry->start_offset;
+
+   return offset;
+}
+
+
+/**
+ * Deallocate memory slot allocated for the specified query
+ */
+static void
+deallocate_query(struct svga_context *svga,
+                 struct svga_query *sq)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   unsigned slot_index;
+   unsigned offset = sq->offset;
+
+   alloc_entry = svga->gb_query_map[sq->svga_type];
+
+   while (alloc_entry) {
+      if (offset >= alloc_entry->start_offset &&
+          offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {
+
+         /* The slot belongs to this memory block, deallocate it */
+         slot_index = (offset - alloc_entry->start_offset) /
+                      alloc_entry->query_size;
+         deallocate_query_slot(svga, alloc_entry, slot_index);
+         alloc_entry = NULL;
+      } else {
+         alloc_entry = alloc_entry->next;
+      }
+   }
+}
+
+
+/**
+ * Destroy the gb query object and all the related query structures
+ */
+static void
+destroy_gb_query_obj(struct svga_context *svga)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   unsigned i;
+
+   for (i = 0; i < SVGA_QUERY_MAX; i++) {
+      struct svga_qmem_alloc_entry *alloc_entry, *next;
+      alloc_entry = svga->gb_query_map[i];
+      while (alloc_entry) {
+         next = alloc_entry->next;
+         util_bitmask_destroy(alloc_entry->alloc_mask);
+         FREE(alloc_entry);
+         alloc_entry = next;
+      }
+      svga->gb_query_map[i] = NULL;
+   }
+
+   if (svga->gb_query)
+      sws->query_destroy(sws, svga->gb_query);
+   svga->gb_query = NULL;
+
+   util_bitmask_destroy(svga->gb_query_alloc_mask);
+}
+
+/**
+ * Define query and create the gb query object if it is not already created.
+ * There is only one gb query object per context which will be shared by
+ * queries of all types.
+ */
+static enum pipe_error
+define_query_vgpu10(struct svga_context *svga,
+                    struct svga_query *sq, int resultLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   int qlen;
+   enum pipe_error ret = PIPE_OK;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   if (svga->gb_query == NULL) {
+      /* Create a gb query object */
+      svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
+      if (!svga->gb_query)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
+      memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
+      svga->gb_query_alloc_mask = util_bitmask_create();
+
+      /* Bind the query object to the context */
+      if (svga->swc->query_bind(svga->swc, svga->gb_query,
+                                SVGA_QUERY_FLAG_SET) != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         svga->swc->query_bind(svga->swc, svga->gb_query,
+                               SVGA_QUERY_FLAG_SET);
+      }
+   }
+
+   sq->gb_query = svga->gb_query;
+
+   /* Allocate an integer ID for this query */
+   sq->id = util_bitmask_add(svga->query_id_bm);
+   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* Find a slot for this query in the gb object */
+   qlen = resultLen + sizeof(SVGA3dQueryState);
+   sq->offset = allocate_query(svga, sq->svga_type, qlen);
+   if (sq->offset == -1)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
+            sq->svga_type, sq->id, sq->offset);
+
+   /**
+    * Send SVGA3D commands to define the query
+    */
+   ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+   }
+   if (ret != PIPE_OK)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+   }
+   assert(ret == PIPE_OK);
+
+   ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+   }
+   assert(ret == PIPE_OK);
+
+   return PIPE_OK;
+}
+
+static enum pipe_error
+destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
+
+   /* Deallocate the memory slot allocated for this query */
+   deallocate_query(svga, sq);
+
+   return ret;
+}
+
+
+/**
+ * Rebind queryies to the context.
+ */
+static void
+rebind_vgpu10_query(struct svga_context *svga)
+{
+   if (svga->swc->query_bind(svga->swc, svga->gb_query,
+                             SVGA_QUERY_FLAG_REF) != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      svga->swc->query_bind(svga->swc, svga->gb_query,
+                            SVGA_QUERY_FLAG_REF);
+   }
+
+   svga->rebind.flags.query = FALSE;
+}
+
+
+static enum pipe_error
+begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+   int status = 0;
+
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   /* Initialize the query state to NEW */
+   status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
+   if (status)
+      return PIPE_ERROR;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   /* Send the BeginQuery command to the device */
+   ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+   }
+   return ret;
+}
+
+static enum pipe_error
+end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+   }
+
+   /* Finish fence is copied here from get_query_result_vgpu10. This helps
+    * with cases where svga_begin_query might be called again before
+    * svga_get_query_result, such as GL_TIME_ELAPSED.
+    */
+   if (!sq->fence) {
+      svga_context_flush(svga, &sq->fence);
+   }
+   sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+
+   return ret;
+}
+
+static boolean
+get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
+                        boolean wait, void *result, int resultLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   SVGA3dQueryState queryState;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+
+   if (queryState == SVGA3D_QUERYSTATE_PENDING) {
+      if (!wait)
+         return FALSE;
+      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+   }
+
+   assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
+          queryState == SVGA3D_QUERYSTATE_FAILED);
+
+   return TRUE;
+}
 
 static struct pipe_query *
 svga_create_query(struct pipe_context *pipe,
                   unsigned query_type,
                   unsigned index)
 {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_context *svga = svga_context(pipe);
    struct svga_query *sq;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(query_type < SVGA_QUERY_MAX);
 
    sq = CALLOC_STRUCT(svga_query);
    if (!sq)
-      goto no_sq;
+      goto fail;
+
+   /* Allocate an integer ID for the query */
+   sq->id = util_bitmask_add(svga->query_id_bm);
+   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+      goto fail;
+
+   SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
+            query_type, sq, sq->id);
 
    switch (query_type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
       sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
+      if (svga_have_vgpu10(svga)) {
+         define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
 
-      sq->hwbuf = svga_winsys_buffer_create(svga, 1,
-                                            SVGA_BUFFER_USAGE_PINNED,
-                                            sizeof *sq->queryResult);
-      if (!sq->hwbuf) {
-         debug_printf("svga: failed to alloc query object!\n");
-         goto no_hwbuf;
-      }
+         /**
+          * In OpenGL, occlusion counter query can be used in conditional
+          * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
+          * be used for predication. Hence, we need to create an occlusion
+          * predicate query along with the occlusion counter query. So when
+          * the occlusion counter query is used for predication, the associated
+          * query of occlusion predicate type will be used
+          * in the SetPredication command.
+          */
+         sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
 
-      sq->queryResult = (SVGA3dQueryResult *)
-         sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
-      if (!sq->queryResult) {
-         debug_printf("svga: failed to map query object!\n");
-         goto no_query_result;
+      } else {
+         define_query_vgpu9(svga, sq);
       }
-
-      sq->queryResult->totalSize = sizeof *sq->queryResult;
-      sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
-
-      /* We request the buffer to be pinned and assume it is always mapped.
-       * The reason is that we don't want to wait for fences when checking the
-       * query status.
-       */
-      sws->buffer_unmap(sws, sq->hwbuf);
       break;
-   case SVGA_QUERY_DRAW_CALLS:
-   case SVGA_QUERY_FALLBACKS:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
+      define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
+      define_query_vgpu10(svga, sq,
+                          sizeof(SVGADXStreamOutStatisticsQueryResult));
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
+      define_query_vgpu10(svga, sq,
+                          sizeof(SVGADXTimestampQueryResult));
+      break;
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+   case SVGA_QUERY_NUM_FALLBACKS:
+   case SVGA_QUERY_NUM_FLUSHES:
    case SVGA_QUERY_MEMORY_USED:
+   case SVGA_QUERY_NUM_SHADERS:
+   case SVGA_QUERY_NUM_RESOURCES:
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+   case SVGA_QUERY_NUM_VALIDATIONS:
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
       break;
    default:
       assert(!"unexpected query type in svga_create_query()");
@@ -129,39 +742,75 @@ svga_create_query(struct pipe_context *pipe,
 
    return &sq->base;
 
-no_query_result:
-   sws->buffer_destroy(sws, sq->hwbuf);
-no_hwbuf:
+fail:
    FREE(sq);
-no_sq:
    return NULL;
 }
 
-
 static void
 svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   struct svga_query *sq;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   if (!q) {
+      destroy_gb_query_obj(svga);
+      return;
+   }
+
+   sq = svga_query(q);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
 
    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      sws->buffer_destroy(sws, sq->hwbuf);
-      sq->hwbuf = NULL;
+      if (svga_have_vgpu10(svga)) {
+         /* make sure to also destroy any associated predicate query */
+         if (sq->predicate)
+            svga_destroy_query(pipe, sq->predicate);
+         destroy_query_vgpu10(svga, sq);
+      } else {
+         sws->buffer_destroy(sws, sq->hwbuf);
+      }
       sws->fence_reference(sws, &sq->fence, NULL);
       break;
-   case SVGA_QUERY_DRAW_CALLS:
-   case SVGA_QUERY_FALLBACKS:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      destroy_query_vgpu10(svga, sq);
+      sws->fence_reference(sws, &sq->fence, NULL);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      destroy_query_vgpu10(svga, sq);
+      sws->fence_reference(sws, &sq->fence, NULL);
+      break;
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+   case SVGA_QUERY_NUM_FALLBACKS:
+   case SVGA_QUERY_NUM_FLUSHES:
    case SVGA_QUERY_MEMORY_USED:
+   case SVGA_QUERY_NUM_SHADERS:
+   case SVGA_QUERY_NUM_RESOURCES:
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+   case SVGA_QUERY_NUM_VALIDATIONS:
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
       /* nothing */
       break;
    default:
       assert(!"svga: unexpected query type in svga_destroy_query()");
    }
 
+   /* Free the query id */
+   util_bitmask_clear(svga->query_id_bm, sq->id);
+
    FREE(sq);
 }
 
@@ -169,13 +818,15 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 static boolean
 svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
    enum pipe_error ret;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(sq);
+   assert(sq->type < SVGA_QUERY_MAX);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
 
    /* Need to flush out buffered drawing commands so that they don't
     * get counted in the query results.
@@ -184,44 +835,69 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 
    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      assert(!svga->sq);
-      if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
-         /* The application doesn't care for the pending query result.
-          * We cannot let go of the existing buffer and just get a new one
-          * because its storage may be reused for other purposes and clobbered
-          * by the host when it determines the query result.  So the only
-          * option here is to wait for the existing query's result -- not a
-          * big deal, given that no sane application would do this.
-          */
-         uint64_t result;
-         svga_get_query_result(pipe, q, TRUE, (void*)&result);
-         assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
-      }
-
-      sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
-      sws->fence_reference(sws, &sq->fence, NULL);
-
-      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = begin_query_vgpu10(svga, sq);
+         /* also need to start the associated occlusion predicate query */
+         if (sq->predicate) {
+            enum pipe_error status;
+            status = begin_query_vgpu10(svga, svga_query(sq->predicate));
+            assert(status == PIPE_OK);
+            (void) status;
+         }
+      } else {
+         ret = begin_query_vgpu9(svga, sq);
       }
-
-      svga->sq = sq;
+      assert(ret == PIPE_OK);
+      (void) ret;
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      ret = begin_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      ret = begin_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
+      break;
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+      sq->begin_count = svga->hud.num_draw_calls;
+      break;
+   case SVGA_QUERY_NUM_FALLBACKS:
+      sq->begin_count = svga->hud.num_fallbacks;
       break;
-   case SVGA_QUERY_DRAW_CALLS:
-      sq->begin_count = svga->num_draw_calls;
+   case SVGA_QUERY_NUM_FLUSHES:
+      sq->begin_count = svga->hud.num_flushes;
       break;
-   case SVGA_QUERY_FALLBACKS:
-      sq->begin_count = svga->num_fallbacks;
+   case SVGA_QUERY_NUM_VALIDATIONS:
+      sq->begin_count = svga->hud.num_validations;
+      break;
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+      sq->begin_count = svga->hud.map_buffer_time;
+      break;
+   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+      sq->begin_count = svga->hud.num_resources_mapped;
+      break;
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+      sq->begin_count = svga->hud.num_bytes_uploaded;
       break;
    case SVGA_QUERY_MEMORY_USED:
+   case SVGA_QUERY_NUM_SHADERS:
+   case SVGA_QUERY_NUM_RESOURCES:
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
       /* nothing */
       break;
    default:
       assert(!"unexpected query type in svga_begin_query()");
    }
+
+   svga->sq[sq->type] = sq;
+
    return true;
 }
 
@@ -229,48 +905,91 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 static void
 svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
    enum pipe_error ret;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(sq);
+   assert(sq->type < SVGA_QUERY_MAX);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
+
+   if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
+      svga_begin_query(pipe, q);
 
    svga_hwtnl_flush_retry(svga);
 
+   assert(svga->sq[sq->type] == sq);
+
    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      assert(svga->sq == sq);
-
-      /* Set to PENDING before sending EndQuery. */
-      sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
-
-      ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = end_query_vgpu10(svga, sq);
+         /* also need to end the associated occlusion predicate query */
+         if (sq->predicate) {
+            enum pipe_error status;
+            status = end_query_vgpu10(svga, svga_query(sq->predicate));
+            assert(status == PIPE_OK);
+            (void) status;
+         }
+      } else {
+         ret = end_query_vgpu9(svga, sq);
       }
-
+      assert(ret == PIPE_OK);
+      (void) ret;
       /* TODO: Delay flushing. We don't really need to flush here, just ensure
        * that there is one flush before svga_get_query_result attempts to get
        * the result.
        */
       svga_context_flush(svga, NULL);
-
-      svga->sq = NULL;
       break;
-   case SVGA_QUERY_DRAW_CALLS:
-      sq->end_count = svga->num_draw_calls;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      ret = end_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
       break;
-   case SVGA_QUERY_FALLBACKS:
-      sq->end_count = svga->num_fallbacks;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      ret = end_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
+      break;
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+      sq->end_count = svga->hud.num_draw_calls;
+      break;
+   case SVGA_QUERY_NUM_FALLBACKS:
+      sq->end_count = svga->hud.num_fallbacks;
+      break;
+   case SVGA_QUERY_NUM_FLUSHES:
+      sq->end_count = svga->hud.num_flushes;
+      break;
+   case SVGA_QUERY_NUM_VALIDATIONS:
+      sq->end_count = svga->hud.num_validations;
+      break;
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+      sq->end_count = svga->hud.map_buffer_time;
+      break;
+   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+      sq->end_count = svga->hud.num_resources_mapped;
+      break;
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+      sq->end_count = svga->hud.num_bytes_uploaded;
       break;
    case SVGA_QUERY_MEMORY_USED:
+   case SVGA_QUERY_NUM_SHADERS:
+   case SVGA_QUERY_NUM_RESOURCES:
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
       /* nothing */
       break;
    default:
       assert(!"unexpected query type in svga_end_query()");
    }
+   svga->sq[sq->type] = NULL;
 }
 
 
@@ -280,64 +999,175 @@ svga_get_query_result(struct pipe_context *pipe,
                       boolean wait,
                       union pipe_query_result *vresult)
 {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_screen *svgascreen = svga_screen( pipe->screen );
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq = svga_query( q );
-   SVGA3dQueryState state;
-   uint64_t *result = (uint64_t *) vresult;
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
+   uint64_t *result = (uint64_t *)vresult;
+   boolean ret = TRUE;
 
-   SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__);
+   assert(sq);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
+            __FUNCTION__, sq, sq->id, wait);
 
    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      /* The query status won't be updated by the host unless
-       * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
-       * a synchronous wait on the host.
-       */
-      if (!sq->fence) {
-         enum pipe_error ret;
-
-         ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf);
-         if (ret != PIPE_OK) {
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf);
-            assert(ret == PIPE_OK);
-         }
-
-         svga_context_flush(svga, &sq->fence);
-
-         assert(sq->fence);
+      if (svga_have_vgpu10(svga)) {
+         SVGADXOcclusionQueryResult occResult;
+         ret = get_query_result_vgpu10(svga, sq, wait,
+                                       (void *)&occResult, sizeof(occResult));
+         *result = (uint64_t)occResult.samplesRendered;
+      } else {
+         ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
       }
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE: {
+      SVGADXOcclusionPredicateQueryResult occResult;
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&occResult, sizeof(occResult));
+      vresult->b = occResult.anySamplesRendered != 0;
+      break;
+   }
+   case PIPE_QUERY_SO_STATISTICS: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
+      struct pipe_query_data_so_statistics *pResult =
+         (struct pipe_query_data_so_statistics *)vresult;
 
-      state = sq->queryResult->state;
-      if (state == SVGA3D_QUERYSTATE_PENDING) {
-         if (!wait)
-            return FALSE;
-         sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
-         state = sq->queryResult->state;
-      }
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof(sResult));
+      pResult->num_primitives_written = sResult.numPrimitivesWritten;
+      pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
+      break;
+   }
+   case PIPE_QUERY_TIMESTAMP: {
+      SVGADXTimestampQueryResult sResult;
 
-      assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
-             state == SVGA3D_QUERYSTATE_FAILED);
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof(sResult));
+      *result = (uint64_t)sResult.timestamp;
+      break;
+   }
+   case PIPE_QUERY_PRIMITIVES_GENERATED: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
 
-      *result = (uint64_t) sq->queryResult->result32;
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof sResult);
+      *result = (uint64_t)sResult.numPrimitivesRequired;
       break;
-   case SVGA_QUERY_DRAW_CALLS:
-      /* fall-through */
-   case SVGA_QUERY_FALLBACKS:
+   }
+   case PIPE_QUERY_PRIMITIVES_EMITTED: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
+
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof sResult);
+      *result = (uint64_t)sResult.numPrimitivesWritten;
+      break;
+   }
+   /* These are per-frame counters */
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+   case SVGA_QUERY_NUM_FALLBACKS:
+   case SVGA_QUERY_NUM_FLUSHES:
+   case SVGA_QUERY_NUM_VALIDATIONS:
+   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_MAP_BUFFER_TIME:
       vresult->u64 = sq->end_count - sq->begin_count;
       break;
+   /* These are running total counters */
    case SVGA_QUERY_MEMORY_USED:
-      vresult->u64 = svgascreen->total_resource_bytes;
+      vresult->u64 = svgascreen->hud.total_resource_bytes;
+      break;
+   case SVGA_QUERY_NUM_SHADERS:
+      vresult->u64 = svga->hud.num_shaders;
+      break;
+   case SVGA_QUERY_NUM_RESOURCES:
+      vresult->u64 = svgascreen->hud.num_resources;
+      break;
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+      vresult->u64 = svga->hud.num_state_objects;
+      break;
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+      vresult->u64 = svga->hud.num_surface_views;
+      break;
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+      vresult->u64 = svga->hud.num_generate_mipmap;
       break;
    default:
       assert(!"unexpected query type in svga_get_query_result");
    }
 
-   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result);
+   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));
 
-   return TRUE;
+   return ret;
+}
+
+static void
+svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
+                      boolean condition, uint mode)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   struct svga_query *sq = svga_query(q);
+   SVGA3dQueryId queryId;
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   assert(svga_have_vgpu10(svga));
+   if (sq == NULL) {
+      queryId = SVGA3D_INVALID_ID;
+   }
+   else {
+      assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
+             sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);
+
+      if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
+         assert(sq->predicate);
+         /**
+          * For conditional rendering, make sure to use the associated
+          * predicate query.
+          */
+         sq = svga_query(sq->predicate);
+      }
+      queryId = sq->id;
+
+      if ((mode == PIPE_RENDER_COND_WAIT ||
+           mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
+         sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      }
+   }
+
+   ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                      (uint32) condition);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                         (uint32) condition);
+   }
+}
+
+
+/*
+ * This function is a workaround because we lack the ability to query
+ * renderer's time synchornously.
+ */
+static uint64_t
+svga_get_timestamp(struct pipe_context *pipe)
+{
+   struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
+   union pipe_query_result result;
+
+   svga_begin_query(pipe, q);
+   svga_end_query(pipe,q);
+   svga_get_query_result(pipe, q, TRUE, &result);
+   svga_destroy_query(pipe, q);
+
+   return result.u64;
 }
 
 
@@ -349,4 +1179,6 @@ svga_init_query_functions(struct svga_context *svga)
    svga->pipe.begin_query = svga_begin_query;
    svga->pipe.end_query = svga_end_query;
    svga->pipe.get_query_result = svga_get_query_result;
+   svga->pipe.render_condition = svga_render_condition;
+   svga->pipe.get_timestamp = svga_get_timestamp;
 }
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index 356898a86..8e0db5395 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -23,16 +23,18 @@
  *
  **********************************************************/
 
+#include "pipe/p_defines.h"
 #include "draw/draw_context.h"
+#include "util/u_bitmask.h"
 #include "util/u_inlines.h"
-#include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
+#include "svga_cmd.h"
 #include "svga_context.h"
+#include "svga_hw_reg.h"
 #include "svga_screen.h"
 
-#include "svga_hw_reg.h"
 
 /* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW.
  */
@@ -61,6 +63,96 @@ static SVGA3dShadeMode svga_translate_flatshade( unsigned mode )
 }
 
 
+static unsigned
+translate_fill_mode(unsigned fill)
+{
+   switch (fill) {
+   case PIPE_POLYGON_MODE_POINT:
+      return SVGA3D_FILLMODE_POINT;
+   case PIPE_POLYGON_MODE_LINE:
+      return SVGA3D_FILLMODE_LINE;
+   case PIPE_POLYGON_MODE_FILL:
+      return SVGA3D_FILLMODE_FILL;
+   default:
+      assert(!"Bad fill mode");
+      return SVGA3D_FILLMODE_FILL;
+   }
+}
+
+
+static unsigned
+translate_cull_mode(unsigned cull)
+{
+   switch (cull) {
+   case PIPE_FACE_NONE:
+      return SVGA3D_CULL_NONE;
+   case PIPE_FACE_FRONT:
+      return SVGA3D_CULL_FRONT;
+   case PIPE_FACE_BACK:
+      return SVGA3D_CULL_BACK;
+   case PIPE_FACE_FRONT_AND_BACK:
+      /* NOTE: we simply no-op polygon drawing in svga_draw_vbo() */
+      return SVGA3D_CULL_NONE;
+   default:
+      assert(!"Bad cull mode");
+      return SVGA3D_CULL_NONE;
+   }
+}
+
+
+static void
+define_rasterizer_object(struct svga_context *svga,
+                         struct svga_rasterizer_state *rast)
+{
+   unsigned fill_mode = translate_fill_mode(rast->templ.fill_front);
+   unsigned cull_mode = translate_cull_mode(rast->templ.cull_face);
+   int depth_bias = rast->templ.offset_units;
+   float slope_scaled_depth_bias =  rast->templ.offset_scale;
+   float depth_bias_clamp = 0.0; /* XXX fix me */
+   unsigned try;
+   const float line_width = rast->templ.line_width > 0.0f ?
+      rast->templ.line_width : 1.0f;
+   const uint8 line_factor = rast->templ.line_stipple_enable ?
+      rast->templ.line_stipple_factor : 0;
+   const uint16 line_pattern = rast->templ.line_stipple_enable ?
+      rast->templ.line_stipple_pattern : 0;
+
+   rast->id = util_bitmask_add(svga->rast_object_id_bm);
+
+   if (rast->templ.fill_front != rast->templ.fill_back) {
+      /* The VGPU10 device can't handle different front/back fill modes.
+       * We'll handle that with a swtnl/draw fallback.  But we need to
+       * make sure we always fill triangles in that case.
+       */
+      fill_mode = SVGA3D_FILLMODE_FILL;
+   }
+
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+                                             rast->id,
+                                             fill_mode,
+                                             cull_mode,
+                                             rast->templ.front_ccw,
+                                             depth_bias,
+                                             depth_bias_clamp,
+                                             slope_scaled_depth_bias,
+                                             rast->templ.depth_clip,
+                                             rast->templ.scissor,
+                                             rast->templ.multisample,
+                                             rast->templ.line_smooth,
+                                             line_width,
+                                             rast->templ.line_stipple_enable,
+                                             line_factor,
+                                             line_pattern,
+                                             !rast->templ.flatshade_first);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_rasterizer_state(struct pipe_context *pipe,
                              const struct pipe_rasterizer_state *templ)
@@ -92,17 +184,24 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
    rast->antialiasedlineenable = templ->line_smooth;
    rast->lastpixel = templ->line_last_pixel;
    rast->pointsprite = templ->sprite_coord_enable != 0x0;
-   rast->pointsize = templ->point_size;
-   rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+
+   if (templ->point_smooth) {
+      /* For smooth points we need to generate fragments for at least
+       * a 2x2 region.  Otherwise the quad we draw may be too small and
+       * we may generate no fragments at all.
+       */
+      rast->pointsize = MAX2(2.0f, templ->point_size);
+   }
+   else {
+      rast->pointsize = templ->point_size;
+   }
+
+   rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
 
    /* Use swtnl + decomposition implement these:
     */
-   if (templ->poly_stipple_enable) {
-      rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
-      rast->need_pipeline_tris_str = "poly stipple";
-   }
 
-   if (screen->maxLineWidth > 1.0F) {
+   if (templ->line_width <= screen->maxLineWidth) {
       /* pass line width to device */
       rast->linewidth = MAX2(1.0F, templ->line_width);
    }
@@ -129,7 +228,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
       }
    } 
 
-   if (templ->point_smooth) {
+   if (!svga_have_vgpu10(svga) && templ->point_smooth) {
       rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
       rast->need_pipeline_points_str = "smooth points";
    }
@@ -231,13 +330,13 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
          rast->depthbias = templ->offset_units;
       }
 
-      rast->hw_unfilled = fill;
+      rast->hw_fillmode = fill;
    }
 
    if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
       /* Turn off stuff which will get done in the draw module:
        */
-      rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+      rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
       rast->slopescaledepthbias = 0;
       rast->depthbias = 0;
    }
@@ -249,6 +348,17 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
       debug_printf(" tris: %s \n", rast->need_pipeline_tris_str);
    }
 
+   if (svga_have_vgpu10(svga)) {
+      define_rasterizer_object(svga, rast);
+   }
+
+   if (templ->poly_smooth) {
+      pipe_debug_message(&svga->debug.callback, CONFORMANCE,
+                         "GL_POLYGON_SMOOTH not supported");
+   }
+
+   svga->hud.num_state_objects++;
+
    return rast;
 }
 
@@ -258,18 +368,41 @@ static void svga_bind_rasterizer_state( struct pipe_context *pipe,
    struct svga_context *svga = svga_context(pipe);
    struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;
 
+   if (!raster ||
+       !svga->curr.rast ||
+       raster->templ.poly_stipple_enable !=
+       svga->curr.rast->templ.poly_stipple_enable) {
+      svga->dirty |= SVGA_NEW_STIPPLE;
+   }
 
-   draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL,
-                             state);
    svga->curr.rast = raster;
 
    svga->dirty |= SVGA_NEW_RAST;
 }
 
-static void svga_delete_rasterizer_state(struct pipe_context *pipe,
-                                         void *raster)
+static void
+svga_delete_rasterizer_state(struct pipe_context *pipe, void *state)
 {
-   FREE(raster);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_rasterizer_state *raster =
+      (struct svga_rasterizer_state *) state;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+      }
+
+      if (raster->id == svga->state.hw_draw.rasterizer_id)
+         svga->state.hw_draw.rasterizer_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->rast_object_id_bm, raster->id);
+   }
+
+   FREE(state);
+   svga->hud.num_state_objects--;
 }
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c
index effd490dd..3e778f0a0 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -23,17 +23,21 @@
  *
  **********************************************************/
 
-#include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 
 #include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
 #include "svga_resource_texture.h"
+#include "svga_surface.h"
+#include "svga_sampler_view.h"
 
-#include "svga_debug.h"
 
 static inline unsigned
 translate_wrap_mode(unsigned wrap)
@@ -91,6 +95,126 @@ static inline unsigned translate_mip_filter( unsigned filter )
    }
 }
 
+
+static uint8
+translate_comparison_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:
+      return SVGA3D_COMPARISON_NEVER;
+   case PIPE_FUNC_LESS:
+      return SVGA3D_COMPARISON_LESS;
+   case PIPE_FUNC_EQUAL:
+      return SVGA3D_COMPARISON_EQUAL;
+   case PIPE_FUNC_LEQUAL:
+      return SVGA3D_COMPARISON_LESS_EQUAL;
+   case PIPE_FUNC_GREATER:
+      return SVGA3D_COMPARISON_GREATER;
+   case PIPE_FUNC_NOTEQUAL:
+      return SVGA3D_COMPARISON_NOT_EQUAL;
+   case PIPE_FUNC_GEQUAL:
+      return SVGA3D_COMPARISON_GREATER_EQUAL;
+   case PIPE_FUNC_ALWAYS:
+      return SVGA3D_COMPARISON_ALWAYS;
+   default:
+      assert(!"Invalid comparison function");
+      return SVGA3D_COMPARISON_ALWAYS;
+   }
+}
+
+
+/**
+ * Translate filtering state to vgpu10 format.
+ */
+static SVGA3dFilter
+translate_filter_mode(unsigned img_filter,
+                      unsigned min_filter,
+                      unsigned mag_filter,
+                      boolean anisotropic,
+                      boolean compare)
+{
+   SVGA3dFilter mode = 0;
+
+   if (img_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MIP_LINEAR;
+   if (min_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MIN_LINEAR;
+   if (mag_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MAG_LINEAR;
+   if (anisotropic)
+      mode |= SVGA3D_FILTER_ANISOTROPIC;
+   if (compare)
+      mode |= SVGA3D_FILTER_COMPARE;
+
+   return mode;
+}
+
+
+/**
+ * Define a vgpu10 sampler state.
+ */
+static void
+define_sampler_state_object(struct svga_context *svga,
+                            struct svga_sampler_state *ss,
+                            const struct pipe_sampler_state *ps)
+{
+   uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */
+   boolean anisotropic;
+   uint8 compare_func;
+   SVGA3dFilter filter;
+   SVGA3dRGBAFloat bcolor;
+   unsigned try;
+   float min_lod, max_lod;
+
+   assert(svga_have_vgpu10(svga));
+
+   anisotropic = ss->aniso_level > 1.0f;
+
+   filter = translate_filter_mode(ps->min_mip_filter,
+                                  ps->min_img_filter,
+                                  ps->mag_img_filter,
+                                  anisotropic,
+                                  ss->compare_mode);
+
+   compare_func = translate_comparison_func(ss->compare_func);
+
+   COPY_4V(bcolor.value, ps->border_color.f);
+
+   ss->id = util_bitmask_add(svga->sampler_object_id_bm);
+
+   assert(ps->min_lod <= ps->max_lod);
+
+   if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+      /* just use the base level image */
+      min_lod = max_lod = 0.0f;
+   }
+   else {
+      min_lod = ps->min_lod;
+      max_lod = ps->max_lod;
+   }
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DefineSamplerState(svga->swc,
+                                          ss->id,
+                                          filter,
+                                          ss->addressu,
+                                          ss->addressv,
+                                          ss->addressw,
+                                          ss->lod_bias, /* float */
+                                          max_aniso,
+                                          compare_func,
+                                          bcolor,
+                                          min_lod,       /* float */
+                                          max_lod);      /* float */
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_sampler_state(struct pipe_context *pipe,
                           const struct pipe_sampler_state *sampler)
@@ -141,10 +265,16 @@ svga_create_sampler_state(struct pipe_context *pipe,
       }
    }
 
+   if (svga_have_vgpu10(svga)) {
+      define_sampler_state_object(svga, cso, sampler);
+   }
+
    SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n",
             cso->min_lod, cso->view_min_lod, cso->view_max_lod,
             cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
 
+   svga->hud.num_state_objects++;
+
    return cso;
 }
 
@@ -157,23 +287,31 @@ svga_bind_sampler_states(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
    unsigned i;
+   boolean any_change = FALSE;
 
    assert(shader < PIPE_SHADER_TYPES);
    assert(start + num <= PIPE_MAX_SAMPLERS);
 
-   /* we only support fragment shader samplers at this time */
-   if (shader != PIPE_SHADER_FRAGMENT)
+   /* Pre-VGPU10 only supports FS textures */
+   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
       return;
 
-   for (i = 0; i < num; i++)
-      svga->curr.sampler[start + i] = samplers[i];
+   for (i = 0; i < num; i++) {
+      if (svga->curr.sampler[shader][start + i] != samplers[i])
+         any_change = TRUE;
+      svga->curr.sampler[shader][start + i] = samplers[i];
+   }
+
+   if (!any_change) {
+      return;
+   }
 
    /* find highest non-null sampler[] entry */
    {
-      unsigned j = MAX2(svga->curr.num_samplers, start + num);
-      while (j > 0 && svga->curr.sampler[j - 1] == NULL)
+      unsigned j = MAX2(svga->curr.num_samplers[shader], start + num);
+      while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL)
          j--;
-      svga->curr.num_samplers = j;
+      svga->curr.num_samplers[shader] = j;
    }
 
    svga->dirty |= SVGA_NEW_SAMPLER;
@@ -183,7 +321,24 @@ svga_bind_sampler_states(struct pipe_context *pipe,
 static void svga_delete_sampler_state(struct pipe_context *pipe,
                                       void *sampler)
 {
+   struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler;
+   struct svga_context *svga = svga_context(pipe);
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+      }
+      util_bitmask_clear(svga->sampler_object_id_bm, ss->id);
+   }
+
    FREE(sampler);
+   svga->hud.num_state_objects--;
 }
 
 
@@ -192,17 +347,21 @@ svga_create_sampler_view(struct pipe_context *pipe,
                          struct pipe_resource *texture,
                          const struct pipe_sampler_view *templ)
 {
-   struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
-   if (view) {
-      *view = *templ;
-      view->reference.count = 1;
-      view->texture = NULL;
-      pipe_resource_reference(&view->texture, texture);
-      view->context = pipe;
+   struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view);
+
+   if (!sv) {
+      return NULL;
    }
 
-   return view;
+   sv->base = *templ;
+   sv->base.reference.count = 1;
+   sv->base.texture = NULL;
+   pipe_resource_reference(&sv->base.texture, texture);
+
+   sv->base.context = pipe;
+   sv->id = SVGA3D_INVALID_ID;
+
+   return &sv->base;
 }
 
 
@@ -210,8 +369,37 @@ static void
 svga_sampler_view_destroy(struct pipe_context *pipe,
                           struct pipe_sampler_view *view)
 {
-   pipe_resource_reference(&view->texture, NULL);
-   FREE(view);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view);
+
+   if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) {
+      if (view->context != pipe) {
+         /* The SVGA3D device will generate an error (and on Linux, cause
+          * us to abort) if we try to destroy a shader resource view from
+          * a context other than the one it was created with.  Skip the
+          * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler
+          * view for now.  This should only sometimes happen when a shared
+          * texture is deleted.
+          */
+         _debug_printf("context mismatch in %s\n", __func__);
+      }
+      else {
+         enum pipe_error ret;
+
+         svga_hwtnl_flush_retry(svga); /* XXX is this needed? */
+
+         ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+         }
+         util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+      }
+   }
+
+   pipe_resource_reference(&sv->base.texture, NULL);
+
+   FREE(sv);
 }
 
 static void
@@ -225,23 +413,25 @@ svga_set_sampler_views(struct pipe_context *pipe,
    unsigned flag_1d = 0;
    unsigned flag_srgb = 0;
    uint i;
+   boolean any_change = FALSE;
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(start + num <= Elements(svga->curr.sampler_views));
+   assert(start + num <= Elements(svga->curr.sampler_views[shader]));
 
-   /* we only support fragment shader sampler views at this time */
-   if (shader != PIPE_SHADER_FRAGMENT)
+   /* Pre-VGPU10 only supports FS textures */
+   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
       return;
 
    for (i = 0; i < num; i++) {
-      if (svga->curr.sampler_views[start + i] != views[i]) {
+      if (svga->curr.sampler_views[shader][start + i] != views[i]) {
          /* Note: we're using pipe_sampler_view_release() here to work around
           * a possible crash when the old view belongs to another context that
           * was already destroyed.
           */
-         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[start + i]);
-         pipe_sampler_view_reference(&svga->curr.sampler_views[start + i],
+         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]);
+         pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i],
                                      views[i]);
+         any_change = TRUE;
       }
 
       if (!views[i])
@@ -254,12 +444,16 @@ svga_set_sampler_views(struct pipe_context *pipe,
          flag_1d |= 1 << (start + i);
    }
 
+   if (!any_change) {
+      return;
+   }
+
    /* find highest non-null sampler_views[] entry */
    {
-      unsigned j = MAX2(svga->curr.num_sampler_views, start + num);
-      while (j > 0 && svga->curr.sampler_views[j - 1] == NULL)
+      unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
+      while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL)
          j--;
-      svga->curr.num_sampler_views = j;
+      svga->curr.num_sampler_views[shader] = j;
    }
 
    svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
@@ -270,7 +464,31 @@ svga_set_sampler_views(struct pipe_context *pipe,
       svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
       svga->curr.tex_flags.flag_1d = flag_1d;
       svga->curr.tex_flags.flag_srgb = flag_srgb;
-   }  
+   }
+
+   /* Check if any of the sampler view resources collide with the framebuffer
+    * color buffers or depth stencil resource. If so, enable the NEW_FRAME_BUFFER
+    * dirty bit so that emit_framebuffer can be invoked to create backed view
+    * for the conflicted surface view.
+    */
+   for (i = 0; i < svga->curr.framebuffer.nr_cbufs; i++) {
+      if (svga->curr.framebuffer.cbufs[i]) {
+         struct svga_surface *s = svga_surface(svga->curr.framebuffer.cbufs[i]);
+         if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+            svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+            break;
+         }
+      }
+   }
+
+   if (svga->curr.framebuffer.zsbuf) {
+      struct svga_surface *s = svga_surface(svga->curr.framebuffer.zsbuf);
+      if (s) {
+         if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+            svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+         }
+      }
+   }
 }
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c
new file mode 100644
index 000000000..3f443c44e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c
@@ -0,0 +1,320 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_resource_buffer.h"
+#include "svga_shader.h"
+#include "svga_debug.h"
+#include "svga_streamout.h"
+
+struct svga_stream_output_target {
+   struct pipe_stream_output_target base;
+};
+
+/** cast wrapper */
+static inline struct svga_stream_output_target *
+svga_stream_output_target(struct pipe_stream_output_target *s)
+{
+   return (struct svga_stream_output_target *)s;
+}
+
+struct svga_stream_output *
+svga_create_stream_output(struct svga_context *svga,
+                          struct svga_shader *shader,
+                          const struct pipe_stream_output_info *info)
+{
+   struct svga_stream_output *streamout;
+   SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];
+   unsigned strides[SVGA3D_DX_MAX_SOTARGETS];
+   unsigned i;
+   enum pipe_error ret;
+   unsigned id;
+
+   assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
+
+   /* Gallium utility creates shaders with stream output.
+    * For non-DX10, just return NULL.
+    */
+   if (!svga_have_vgpu10(svga))
+      return NULL;
+
+   assert(info->num_outputs <= SVGA3D_MAX_STREAMOUT_DECLS);
+
+   /* Allocate an integer ID for the stream output */
+   id = util_bitmask_add(svga->stream_output_id_bm);
+   if (id == UTIL_BITMASK_INVALID_INDEX) {
+      return NULL;
+   }
+
+   /* Allocate the streamout data structure */
+   streamout = CALLOC_STRUCT(svga_stream_output);
+
+   if (!streamout)
+      return NULL;
+
+   streamout->info = *info;
+   streamout->id = id;
+   streamout->pos_out_index = -1;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s, num_outputs=%d id=%d\n", __FUNCTION__,
+            info->num_outputs, id);
+
+   /* init whole decls and stride arrays to zero to avoid garbage values */
+   memset(decls, 0, sizeof(decls));
+   memset(strides, 0, sizeof(strides));
+
+   for (i = 0; i < info->num_outputs; i++) {
+      unsigned reg_idx = info->output[i].register_index;
+      unsigned buf_idx = info->output[i].output_buffer;
+      const unsigned sem_name = shader->info.output_semantic_name[reg_idx];
+
+      assert(buf_idx <= PIPE_MAX_SO_BUFFERS);
+
+      if (sem_name == TGSI_SEMANTIC_POSITION) {
+         /**
+          * Check if streaming out POSITION. If so, replace the
+          * register index with the index for NON_ADJUSTED POSITION.
+          */
+         decls[i].registerIndex = shader->info.num_outputs;
+
+         /* Save this output index, so we can tell later if this stream output
+          * includes an output of a vertex position
+          */
+         streamout->pos_out_index = i;
+      }
+      else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
+         /**
+          * Use the shadow copy for clip distance because
+          * CLIPDIST instruction is only emitted for enabled clip planes.
+          * It's valid to write to ClipDistance variable for non-enabled
+          * clip planes.
+          */
+         decls[i].registerIndex = shader->info.num_outputs + 1 +
+                                  shader->info.output_semantic_index[reg_idx];
+      }
+      else {
+         decls[i].registerIndex = reg_idx;
+      }
+
+      decls[i].outputSlot = buf_idx;
+      decls[i].registerMask =
+         ((1 << info->output[i].num_components) - 1)
+            << info->output[i].start_component;
+
+      SVGA_DBG(DEBUG_STREAMOUT, "%d slot=%d regIdx=%d regMask=0x%x\n",
+               i, decls[i].outputSlot, decls[i].registerIndex,
+               decls[i].registerMask);
+
+      strides[buf_idx] = info->stride[buf_idx] * sizeof(float);
+   }
+
+   ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
+                                          info->num_outputs,
+                                          strides,
+                                          decls);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
+                                             info->num_outputs,
+                                             strides,
+                                             decls);
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->stream_output_id_bm, id);
+         FREE(streamout);
+         streamout = NULL;
+      }
+   }
+   return streamout;
+}
+
+enum pipe_error
+svga_set_stream_output(struct svga_context *svga,
+                       struct svga_stream_output *streamout)
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID;
+
+   if (!svga_have_vgpu10(svga)) {
+      return PIPE_OK;
+   }
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__,
+            streamout, id);
+
+   if (svga->current_so != streamout) {
+      /* Save current SO state */
+      svga->current_so = streamout;
+
+      ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
+      }
+   }
+
+   return ret;
+}
+
+void
+svga_delete_stream_output(struct svga_context *svga,
+                          struct svga_stream_output *streamout)
+{
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout);
+
+   assert(svga_have_vgpu10(svga));
+   assert(streamout != NULL);
+
+   ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+   }
+
+   /* Release the ID */
+   util_bitmask_clear(svga->stream_output_id_bm, streamout->id);
+
+   /* Free streamout structure */
+   FREE(streamout);
+}
+
+static struct pipe_stream_output_target *
+svga_create_stream_output_target(struct pipe_context *pipe,
+                                 struct pipe_resource *buffer,
+                                 unsigned buffer_offset,
+                                 unsigned buffer_size)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_stream_output_target *sot;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__,
+            buffer_offset, buffer_size);
+
+   assert(svga_have_vgpu10(svga));
+   (void) svga;
+
+   sot = CALLOC_STRUCT(svga_stream_output_target);
+   if (!sot)
+      return NULL;
+
+   pipe_reference_init(&sot->base.reference, 1);
+   pipe_resource_reference(&sot->base.buffer, buffer);
+   sot->base.context = pipe;
+   sot->base.buffer = buffer;
+   sot->base.buffer_offset = buffer_offset;
+   sot->base.buffer_size = buffer_size;
+
+   return &sot->base;
+}
+
+static void
+svga_destroy_stream_output_target(struct pipe_context *pipe,
+                                  struct pipe_stream_output_target *target)
+{
+   struct svga_stream_output_target *sot = svga_stream_output_target(target);
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__);
+
+   pipe_resource_reference(&sot->base.buffer, NULL);
+   FREE(sot);
+}
+
+static void
+svga_set_stream_output_targets(struct pipe_context *pipe,
+                               unsigned num_targets,
+                               struct pipe_stream_output_target **targets,
+                               const unsigned *offsets)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];
+   enum pipe_error ret;
+   unsigned i;
+   unsigned num_so_targets;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__,
+            num_targets);
+
+   assert(svga_have_vgpu10(svga));
+
+   /* Mark the streamout buffers as dirty so that we'll issue readbacks
+    * before mapping.
+    */
+   for (i = 0; i < svga->num_so_targets; i++) {
+      struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer);
+      sbuf->dirty = TRUE;
+   }
+
+   assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);
+
+   for (i = 0; i < num_targets; i++) {
+      struct svga_stream_output_target *sot
+         = svga_stream_output_target(targets[i]);
+      struct svga_buffer *sbuf = svga_buffer(sot->base.buffer);
+      unsigned size;
+
+      assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
+      (void) sbuf;
+
+      svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer);
+      svga->so_targets[i] = &sot->base;
+      soBindings[i].offset = sot->base.buffer_offset;
+
+      /* The size cannot extend beyond the end of the buffer.  Clamp it. */
+      size = MIN2(sot->base.buffer_size,
+                  sot->base.buffer->width0 - sot->base.buffer_offset);
+
+      soBindings[i].sizeInBytes = size;
+   }
+
+   /* unbind any previously bound stream output buffers */
+   for (; i < svga->num_so_targets; i++) {
+      svga->so_surfaces[i] = NULL;
+      svga->so_targets[i] = NULL;
+   }
+
+   num_so_targets = MAX2(svga->num_so_targets, num_targets);
+   ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+                                    soBindings, svga->so_surfaces);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+                                       soBindings, svga->so_surfaces);
+   }
+
+   svga->num_so_targets = num_targets;
+}
+
+void
+svga_init_stream_output_functions(struct svga_context *svga)
+{
+   svga->pipe.create_stream_output_target = svga_create_stream_output_target;
+   svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;
+   svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;
+}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_vertex.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_vertex.c
index faf77f3ab..b932c568f 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -23,17 +23,21 @@
  *
  **********************************************************/
 
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_format.h"
 #include "util/u_helpers.h"
 #include "util/u_inlines.h"
-#include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_transfer.h"
 #include "tgsi/tgsi_parse.h"
 
-#include "svga_screen.h"
-#include "svga_resource_buffer.h"
 #include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_screen.h"
 
 
 static void svga_set_vertex_buffers(struct pipe_context *pipe,
@@ -55,25 +59,33 @@ static void svga_set_index_buffer(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
 
-   if (ib) {
-      pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer);
-      memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib));
-   }
-   else {
-      pipe_resource_reference(&svga->curr.ib.buffer, NULL);
-      memset(&svga->curr.ib, 0, sizeof(svga->curr.ib));
-   }
+   util_set_index_buffer(&svga->curr.ib, ib);
+}
+
 
-   /* TODO make this more like a state */
+/**
+ * Does the given vertex attrib format need range adjustment in the VS?
+ * Range adjustment scales and biases values from [0,1] to [-1,1].
+ * This lets us avoid the swtnl path.
+ */
+static boolean
+attrib_needs_range_adjustment(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return TRUE;
+   default:
+      return FALSE;
+   }
 }
 
 
 /**
- * Given a gallium vertex element format, return the corresponding SVGA3D
- * format.  Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats.
+ * Given a gallium vertex element format, return the corresponding
+ * SVGA3dDeclType.
  */
 static SVGA3dDeclType
-translate_vertex_format(enum pipe_format format)
+translate_vertex_format_to_decltype(enum pipe_format format)
 {
    switch (format) {
    case PIPE_FORMAT_R32_FLOAT:            return SVGA3D_DECLTYPE_FLOAT1;
@@ -94,10 +106,10 @@ translate_vertex_format(enum pipe_format format)
    case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
    case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;
 
-   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() below */
+   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() above */
    case PIPE_FORMAT_R8G8B8_SNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
 
-   /* See attrib_needs_w_to_1() below */
+   /* See attrib_needs_w_to_1() above */
    case PIPE_FORMAT_R16G16B16_SNORM:      return SVGA3D_DECLTYPE_SHORT4N;
    case PIPE_FORMAT_R16G16B16_UNORM:      return SVGA3D_DECLTYPE_USHORT4N;
    case PIPE_FORMAT_R8G8B8_UNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
@@ -111,38 +123,121 @@ translate_vertex_format(enum pipe_format format)
 }
 
 
-/**
- * Does the given vertex attrib format need range adjustment in the VS?
- * Range adjustment scales and biases values from [0,1] to [-1,1].
- * This lets us avoid the swtnl path.
- */
-static boolean
-attrib_needs_range_adjustment(enum pipe_format format)
+static void
+define_input_element_object(struct svga_context *svga,
+                            struct svga_velems_state *velems)
 {
-   switch (format) {
-   case PIPE_FORMAT_R8G8B8_SNORM:
-      return TRUE;
-   default:
-      return FALSE;
+   SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(velems->count <= PIPE_MAX_ATTRIBS);
+   assert(svga_have_vgpu10(svga));
+
+   for (i = 0; i < velems->count; i++) {
+      const struct pipe_vertex_element *elem = velems->velem + i;
+      SVGA3dSurfaceFormat svga_format;
+      unsigned vf_flags;
+
+      svga_translate_vertex_format_vgpu10(elem->src_format,
+                                          &svga_format, &vf_flags);
+
+      velems->decl_type[i] =
+         translate_vertex_format_to_decltype(elem->src_format);
+      elements[i].inputSlot = elem->vertex_buffer_index;
+      elements[i].alignedByteOffset = elem->src_offset;
+      elements[i].format = svga_format;
+
+      if (elem->instance_divisor) {
+         elements[i].inputSlotClass = SVGA3D_INPUT_PER_INSTANCE_DATA;
+         elements[i].instanceDataStepRate = elem->instance_divisor;
+      }
+      else {
+         elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+         elements[i].instanceDataStepRate = 0;
+      }
+      elements[i].inputRegister = i;
+
+      if (elements[i].format == SVGA3D_FORMAT_INVALID) {
+         velems->need_swvfetch = TRUE;
+      }
+
+      if (util_format_is_pure_integer(elem->src_format)) {
+         velems->attrib_is_pure_int |= (1 << i);
+      }
+
+      if (vf_flags & VF_W_TO_1) {
+         velems->adjust_attrib_w_1 |= (1 << i);
+      }
+
+      if (vf_flags & VF_U_TO_F_CAST) {
+         velems->adjust_attrib_utof |= (1 << i);
+      }
+      else if (vf_flags & VF_I_TO_F_CAST) {
+         velems->adjust_attrib_itof |= (1 << i);
+      }
+
+      if (vf_flags & VF_BGRA) {
+         velems->attrib_is_bgra |= (1 << i);
+      }
+
+      if (vf_flags & VF_PUINT_TO_SNORM) {
+         velems->attrib_puint_to_snorm |= (1 << i);
+      }
+      else if (vf_flags & VF_PUINT_TO_USCALED) {
+         velems->attrib_puint_to_uscaled |= (1 << i);
+      }
+      else if (vf_flags & VF_PUINT_TO_SSCALED) {
+         velems->attrib_puint_to_sscaled |= (1 << i);
+      }
+   }
+
+   velems->id = util_bitmask_add(svga->input_element_object_id_bm);
+
+   ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+                                           velems->id, elements);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+                                              velems->id, elements);
+      assert(ret == PIPE_OK);
    }
 }
 
 
 /**
- * Does the given vertex attrib format need to have the W component set
- * to one in the VS?
+ * Translate the vertex element types to SVGA3dDeclType and check
+ * for VS-based vertex attribute adjustments.
  */
-static boolean
-attrib_needs_w_to_1(enum pipe_format format)
+static void
+translate_vertex_decls(struct svga_context *svga,
+                       struct svga_velems_state *velems)
 {
-   switch (format) {
-   case PIPE_FORMAT_R8G8B8_SNORM:
-   case PIPE_FORMAT_R8G8B8_UNORM:
-   case PIPE_FORMAT_R16G16B16_SNORM:
-   case PIPE_FORMAT_R16G16B16_UNORM:
-      return TRUE;
-   default:
-      return FALSE;
+   unsigned i;
+
+   assert(!svga_have_vgpu10(svga));
+
+   for (i = 0; i < velems->count; i++) {
+      const enum pipe_format f = velems->velem[i].src_format;
+      SVGA3dSurfaceFormat svga_format;
+      unsigned vf_flags;
+
+      svga_translate_vertex_format_vgpu10(f, &svga_format, &vf_flags);
+
+      velems->decl_type[i] = translate_vertex_format_to_decltype(f);
+      if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
+         /* Unsupported format - use software fetch */
+         velems->need_swvfetch = TRUE;
+      }
+
+      /* Check for VS-based adjustments */
+      if (attrib_needs_range_adjustment(f)) {
+         velems->adjust_attrib_range |= (1 << i);
+      }
+
+      if (vf_flags & VF_W_TO_1) {
+         velems->adjust_attrib_w_1 |= (1 << i);
+      }
    }
 }
 
@@ -152,54 +247,78 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
                                   unsigned count,
                                   const struct pipe_vertex_element *attribs)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_velems_state *velems;
+
    assert(count <= PIPE_MAX_ATTRIBS);
    velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state));
    if (velems) {
-      unsigned i;
-
       velems->count = count;
       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
 
       velems->need_swvfetch = FALSE;
       velems->adjust_attrib_range = 0x0;
+      velems->attrib_is_pure_int = 0x0;
       velems->adjust_attrib_w_1 = 0x0;
-
-      /* Translate Gallium vertex format to SVGA3dDeclType */
-      for (i = 0; i < count; i++) {
-         enum pipe_format f = attribs[i].src_format;
-         velems->decl_type[i] = translate_vertex_format(f);
-         if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
-            /* Unsupported format - use software fetch */
-            velems->need_swvfetch = TRUE;
-            break;
-         }
-
-         if (attrib_needs_range_adjustment(f)) {
-            velems->adjust_attrib_range |= (1 << i);
-         }
-         if (attrib_needs_w_to_1(f)) {
-            velems->adjust_attrib_w_1 |= (1 << i);
-         }
+      velems->adjust_attrib_itof = 0x0;
+      velems->adjust_attrib_utof = 0x0;
+      velems->attrib_is_bgra = 0x0;
+      velems->attrib_puint_to_snorm = 0x0;
+      velems->attrib_puint_to_uscaled = 0x0;
+      velems->attrib_puint_to_sscaled = 0x0;
+
+      if (svga_have_vgpu10(svga)) {
+         define_input_element_object(svga, velems);
+      }
+      else {
+         translate_vertex_decls(svga, velems);
       }
    }
+
+   svga->hud.num_state_objects++;
+
    return velems;
 }
 
-static void svga_bind_vertex_elements_state(struct pipe_context *pipe,
-                                            void *velems)
+
+static void
+svga_bind_vertex_elements_state(struct pipe_context *pipe, void *state)
 {
    struct svga_context *svga = svga_context(pipe);
-   struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems;
+   struct svga_velems_state *velems = (struct svga_velems_state *) state;
 
-   svga->curr.velems = svga_velems;
+   svga->curr.velems = velems;
    svga->dirty |= SVGA_NEW_VELEMENT;
 }
 
-static void svga_delete_vertex_elements_state(struct pipe_context *pipe,
-                                              void *velems)
+
+static void
+svga_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_velems_state *velems = (struct svga_velems_state *) state;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (velems->id == svga->state.hw_draw.layout_id)
+         svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->input_element_object_id_bm, velems->id);
+      velems->id = SVGA3D_INVALID_ID;
+   }
+
    FREE(velems);
+   svga->hud.num_state_objects--;
 }
 
 void svga_cleanup_vertex_state( struct svga_context *svga )
@@ -219,5 +338,3 @@ void svga_init_vertex_functions( struct svga_context *svga )
    svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state;
    svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state;
 }
-
-
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_vs.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_vs.c
index c3ac663b4..630f49078 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -32,11 +32,11 @@
 #include "tgsi/tgsi_text.h"
 
 #include "svga_context.h"
-#include "svga_tgsi.h"
 #include "svga_hw_reg.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
 #include "svga_shader.h"
+#include "svga_streamout.h"
 
 
 /**
@@ -100,6 +100,7 @@ svga_create_vs_state(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader);
+
    if (!vs)
       return NULL;
 
@@ -123,10 +124,12 @@ svga_create_vs_state(struct pipe_context *pipe,
 
    vs->base.id = svga->debug.shader_id++;
 
-   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
-      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
-                   __FUNCTION__, vs->base.id,
-                   vs->base.info.num_inputs, vs->base.info.num_outputs);
+   vs->generic_outputs = svga_get_generic_outputs_mask(&vs->base.info);
+
+   /* check for any stream output declarations */
+   if (templ->stream_output.num_outputs) {
+      vs->base.stream_output = svga_create_stream_output(svga, &vs->base,
+                                                         &templ->stream_output);
    }
 
    return vs;
@@ -139,6 +142,17 @@ svga_bind_vs_state(struct pipe_context *pipe, void *shader)
    struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
    struct svga_context *svga = svga_context(pipe);
 
+   if (vs == svga->curr.vs)
+      return;
+
+   /* If the currently bound vertex shader has a generated geometry shader,
+    * then unbind the geometry shader before binding a new vertex shader.
+    * We need to unbind the geometry shader here because there is no
+    * pipe_shader associated with the generated geometry shader.
+    */
+   if (svga->curr.vs != NULL && svga->curr.vs->gs != NULL)
+      svga->pipe.bind_gs_state(&svga->pipe, NULL);
+
    svga->curr.vs = vs;
    svga->dirty |= SVGA_NEW_VS;
 }
@@ -154,20 +168,40 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader)
 
    svga_hwtnl_flush_retry(svga);
 
+   assert(vs->base.parent == NULL);
+
+   /* Check if there is a generated geometry shader to go with this
+    * vertex shader. If there is, then delete the geometry shader as well.
+    */
+   if (vs->gs != NULL) {
+      svga->pipe.delete_gs_state(&svga->pipe, vs->gs);
+   }
+
+   if (vs->base.stream_output != NULL)
+      svga_delete_stream_output(svga, vs->base.stream_output);
+
    draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
 
    for (variant = vs->base.variants; variant; variant = tmp) {
       tmp = variant->next;
 
-      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
-      (void) ret;  /* PIPE_ERROR_ not handled yet */
-
-      /*
-       * Remove stale references to this variant to ensure a new variant on the
-       * same address will be detected as a change.
-       */
-      if (variant == svga->state.hw_draw.vs)
+      /* Check if deleting currently bound shader */
+      if (variant == svga->state.hw_draw.vs) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+            assert(ret == PIPE_OK);
+         }
          svga->state.hw_draw.vs = NULL;
+      }
+
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+         assert(ret == PIPE_OK);
+      }
    }
 
    FREE((void *)vs->base.tokens);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource.c b/lib/mesa/src/gallium/drivers/svga/svga_resource.c
index b295b44ea..1c3bcd67a 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource.c
@@ -69,18 +69,21 @@ svga_can_create_resource(struct pipe_screen *screen,
    struct svga_winsys_screen *sws = svgascreen->sws;
    SVGA3dSurfaceFormat format;
    SVGA3dSize base_level_size;
-   uint32 numFaces;
    uint32 numMipLevels;
+   uint32 arraySize;
 
    if (res->target == PIPE_BUFFER) {
       format = SVGA3D_BUFFER;
       base_level_size.width = res->width0;
       base_level_size.height = 1;
       base_level_size.depth = 1;
-      numFaces = 1;
       numMipLevels = 1;
+      arraySize = 1;
 
    } else {
+      if (res->target == PIPE_TEXTURE_CUBE)
+         assert(res->array_size == 6);
+
       format = svga_translate_format(svgascreen, res->format, res->bind);
       if (format == SVGA3D_FORMAT_INVALID)
          return FALSE;
@@ -88,12 +91,12 @@ svga_can_create_resource(struct pipe_screen *screen,
       base_level_size.width = res->width0;
       base_level_size.height = res->height0;
       base_level_size.depth = res->depth0;
-      numFaces = (res->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
       numMipLevels = res->last_level + 1;
+      arraySize = res->array_size;
    }
 
    return sws->surface_can_create(sws, format, base_level_size, 
-                                  numFaces, numMipLevels);
+                                  arraySize, numMipLevels);
 }
 
 
@@ -104,6 +107,12 @@ svga_init_resource_functions(struct svga_context *svga)
    svga->pipe.transfer_flush_region = u_transfer_flush_region_vtbl;
    svga->pipe.transfer_unmap = u_transfer_unmap_vtbl;
    svga->pipe.transfer_inline_write = u_transfer_inline_write_vtbl;
+
+   if (svga_have_vgpu10(svga)) {
+      svga->pipe.generate_mipmap = svga_texture_generate_mipmap;
+   } else {
+      svga->pipe.generate_mipmap = NULL;
+   }
 }
 
 void
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c
index 13f85cddb..a8ffcc7f6 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -29,6 +29,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
 #include "os/os_thread.h"
+#include "os/os_time.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_resource.h"
@@ -48,7 +49,8 @@
 static inline boolean
 svga_buffer_needs_hw_storage(unsigned usage)
 {
-   return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER);
+   return (usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+                    PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT)) != 0;
 }
 
 
@@ -76,9 +78,15 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
    struct svga_buffer *sbuf = svga_buffer(resource);
    struct pipe_transfer *transfer;
    uint8_t *map;
+   int64_t begin = os_time_get();
+
+   assert(box->y == 0);
+   assert(box->z == 0);
+   assert(box->height == 1);
+   assert(box->depth == 1);
 
    transfer = CALLOC_STRUCT(pipe_transfer);
-   if (transfer == NULL) {
+   if (!transfer) {
       return NULL;
    }
 
@@ -87,6 +95,26 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
    transfer->usage = usage;
    transfer->box = *box;
 
+   if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) {
+      /* Only need to test for vgpu10 since only vgpu10 features (streamout,
+       * buffer copy) can modify buffers on the device.
+       */
+      if (svga_have_vgpu10(svga)) {
+         enum pipe_error ret;
+         assert(sbuf->handle);
+         ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+            assert(ret == PIPE_OK);
+         }
+
+         svga_context_finish(svga);
+
+         sbuf->dirty = FALSE;
+      }
+   }
+
    if (usage & PIPE_TRANSFER_WRITE) {
       if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
          /*
@@ -222,7 +250,9 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
    } else {
       FREE(transfer);
    }
-   
+
+   svga->hud.map_buffer_time += (os_time_get() - begin);
+
    return map;
 }
 
@@ -254,9 +284,9 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe,
    struct svga_screen *ss = svga_screen(pipe->screen);
    struct svga_context *svga = svga_context(pipe);
    struct svga_buffer *sbuf = svga_buffer(transfer->resource);
-   
+
    pipe_mutex_lock(ss->swc_mutex);
-   
+
    assert(sbuf->map.count);
    if (sbuf->map.count) {
       --sbuf->map.count;
@@ -275,7 +305,7 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe,
           */
 
          SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n");
-   
+
          sbuf->dma.flags.discard = TRUE;
 
          svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0);
@@ -295,28 +325,31 @@ svga_buffer_destroy( struct pipe_screen *screen,
    struct svga_buffer *sbuf = svga_buffer( buf );
 
    assert(!p_atomic_read(&buf->reference.count));
-   
+
    assert(!sbuf->dma.pending);
 
-   if(sbuf->handle)
+   if (sbuf->handle)
       svga_buffer_destroy_host_surface(ss, sbuf);
-   
-   if(sbuf->uploaded.buffer)
+
+   if (sbuf->uploaded.buffer)
       pipe_resource_reference(&sbuf->uploaded.buffer, NULL);
 
-   if(sbuf->hwbuf)
+   if (sbuf->hwbuf)
       svga_buffer_destroy_hw_storage(ss, sbuf);
-   
-   if(sbuf->swbuf && !sbuf->user)
+
+   if (sbuf->swbuf && !sbuf->user)
       align_free(sbuf->swbuf);
-   
-   ss->total_resource_bytes -= sbuf->size;
+
+   ss->hud.total_resource_bytes -= sbuf->size;
+   assert(ss->hud.num_resources > 0);
+   if (ss->hud.num_resources > 0)
+      ss->hud.num_resources--;
 
    FREE(sbuf);
 }
 
 
-struct u_resource_vtbl svga_buffer_vtbl = 
+struct u_resource_vtbl svga_buffer_vtbl =
 {
    u_default_resource_get_handle,      /* get_handle */
    svga_buffer_destroy,		     /* resource_destroy */
@@ -334,33 +367,65 @@ svga_buffer_create(struct pipe_screen *screen,
 {
    struct svga_screen *ss = svga_screen(screen);
    struct svga_buffer *sbuf;
-   
+
    sbuf = CALLOC_STRUCT(svga_buffer);
-   if(!sbuf)
+   if (!sbuf)
       goto error1;
-   
+
    sbuf->b.b = *template;
    sbuf->b.vtbl = &svga_buffer_vtbl;
    pipe_reference_init(&sbuf->b.b.reference, 1);
    sbuf->b.b.screen = screen;
+   sbuf->bind_flags = template->bind;
+
+   if (template->bind & PIPE_BIND_CONSTANT_BUFFER) {
+      /* Constant buffers can only have the PIPE_BIND_CONSTANT_BUFFER
+       * flag set.
+       */
+      if (ss->sws->have_vgpu10) {
+         sbuf->bind_flags = PIPE_BIND_CONSTANT_BUFFER;
+
+         /* Constant buffer size needs to be in multiples of 16. */
+         sbuf->b.b.width0 = align(sbuf->b.b.width0, 16);
+      }
+   }
+
+   if (svga_buffer_needs_hw_storage(template->bind)) {
+
+      /* If the buffer will be used for vertex/index/stream data, set all
+       * the flags so that the buffer will be accepted for all those uses.
+       * Note that the PIPE_BIND_ flags we get from the state tracker are
+       * just a hint about how the buffer may be used.  And OpenGL buffer
+       * object may be used for many different things.
+       */
+      if (!(template->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+         /* Not a constant buffer.  The buffer may be used for vertex data,
+          * indexes or stream-out.
+          */
+         sbuf->bind_flags |= (PIPE_BIND_VERTEX_BUFFER |
+                              PIPE_BIND_INDEX_BUFFER);
+         if (ss->sws->have_vgpu10)
+            sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT;
+      }
 
-   if(svga_buffer_needs_hw_storage(template->bind)) {
-      if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
+      if (svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
          goto error2;
    }
    else {
-      sbuf->swbuf = align_malloc(template->width0, 64);
-      if(!sbuf->swbuf)
+      sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64);
+      if (!sbuf->swbuf)
          goto error2;
    }
-      
+
    debug_reference(&sbuf->b.b.reference,
                    (debug_reference_descriptor)debug_describe_resource, 0);
 
-   sbuf->size = util_resource_size(template);
-   ss->total_resource_bytes += sbuf->size;
+   sbuf->size = util_resource_size(&sbuf->b.b);
+   ss->hud.total_resource_bytes += sbuf->size;
+
+   ss->hud.num_resources++;
 
-   return &sbuf->b.b; 
+   return &sbuf->b.b;
 
 error2:
    FREE(sbuf);
@@ -368,6 +433,7 @@ error1:
    return NULL;
 }
 
+
 struct pipe_resource *
 svga_user_buffer_create(struct pipe_screen *screen,
                         void *ptr,
@@ -375,11 +441,12 @@ svga_user_buffer_create(struct pipe_screen *screen,
 			unsigned bind)
 {
    struct svga_buffer *sbuf;
-   
+   struct svga_screen *ss = svga_screen(screen);
+
    sbuf = CALLOC_STRUCT(svga_buffer);
-   if(!sbuf)
+   if (!sbuf)
       goto no_sbuf;
-      
+
    pipe_reference_init(&sbuf->b.b.reference, 1);
    sbuf->b.vtbl = &svga_buffer_vtbl;
    sbuf->b.b.screen = screen;
@@ -391,13 +458,16 @@ svga_user_buffer_create(struct pipe_screen *screen,
    sbuf->b.b.depth0 = 1;
    sbuf->b.b.array_size = 1;
 
+   sbuf->bind_flags = bind;
    sbuf->swbuf = ptr;
    sbuf->user = TRUE;
 
    debug_reference(&sbuf->b.b.reference,
                    (debug_reference_descriptor)debug_describe_resource, 0);
-   
-   return &sbuf->b.b; 
+
+   ss->hud.num_resources++;
+
+   return &sbuf->b.b;
 
 no_sbuf:
    return NULL;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h
index e838beb66..0591f8960 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -65,6 +65,9 @@ struct svga_buffer
 {
    struct u_resource b;
 
+   /** This is a superset of b.b.bind */
+   unsigned bind_flags;
+
    /**
     * Regular (non DMA'able) memory.
     * 
@@ -187,6 +190,8 @@ struct svga_buffer
    struct list_head head;
 
    unsigned size;  /**< Approximate size in bytes */
+
+   boolean dirty;  /**< Need to do a readback before mapping? */
 };
 
 
@@ -248,6 +253,9 @@ svga_buffer_hw_storage_map(struct svga_context *svga,
                            unsigned flags, boolean *retry)
 {
    struct svga_winsys_screen *sws = svga_buffer_winsys_screen(sbuf);
+
+   svga->hud.num_resources_mapped++;
+
    if (sws->have_gb_objects) {
       return svga->swc->surface_map(svga->swc, sbuf->handle, flags, retry);
    } else {
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 5686531f9..7f7ceab0a 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -149,10 +149,22 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
       sbuf->key.flags = 0;
 
       sbuf->key.format = SVGA3D_BUFFER;
-      if (sbuf->b.b.bind & PIPE_BIND_VERTEX_BUFFER)
+      if (sbuf->bind_flags & PIPE_BIND_VERTEX_BUFFER) {
          sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER;
-      if (sbuf->b.b.bind & PIPE_BIND_INDEX_BUFFER)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_VERTEX_BUFFER;
+      }
+      if (sbuf->bind_flags & PIPE_BIND_INDEX_BUFFER) {
          sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER;
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_INDEX_BUFFER;
+      }
+      if (sbuf->bind_flags & PIPE_BIND_CONSTANT_BUFFER)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_CONSTANT_BUFFER;
+
+      if (sbuf->bind_flags & PIPE_BIND_STREAM_OUTPUT)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_STREAM_OUTPUT;
+
+      if (sbuf->bind_flags & PIPE_BIND_SAMPLER_VIEW)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
 
       sbuf->key.size.width = sbuf->b.b.width0;
       sbuf->key.size.height = 1;
@@ -161,10 +173,12 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
       sbuf->key.numFaces = 1;
       sbuf->key.numMipLevels = 1;
       sbuf->key.cachable = 1;
+      sbuf->key.arraySize = 1;
 
       SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->b.b.width0);
 
-      sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
+      sbuf->handle = svga_screen_surface_create(ss, sbuf->b.b.bind,
+                                                sbuf->b.b.usage, &sbuf->key);
       if (!sbuf->handle)
          return PIPE_ERROR_OUT_OF_MEMORY;
 
@@ -203,79 +217,89 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
 			      struct svga_buffer *sbuf)
 {
    struct svga_winsys_context *swc = svga->swc;
-   SVGA3dCmdUpdateGBImage *cmd;
-   struct svga_3d_update_gb_image *ccmd = NULL;
+   SVGA3dCmdUpdateGBImage *update_cmd;
+   struct svga_3d_update_gb_image *whole_update_cmd = NULL;
    uint32 numBoxes = sbuf->map.num_ranges;
    struct pipe_resource *dummy;
-   unsigned int i;
+   unsigned i;
 
    assert(numBoxes);
    assert(sbuf->dma.updates == NULL);
 
    if (sbuf->dma.flags.discard) {
       struct svga_3d_invalidate_gb_image *cicmd = NULL;
-      SVGA3dCmdInvalidateGBImage *icmd;
+      SVGA3dCmdInvalidateGBImage *invalidate_cmd;
+      const unsigned total_commands_size =
+         sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd);
 
       /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by
        * 'numBoxes' UPDATE_GB_IMAGE commands.  Allocate all at once rather
        * than with separate commands because we need to properly deal with
        * filling the command buffer.
        */
-      icmd = SVGA3D_FIFOReserve(swc,
-				SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
-				sizeof *icmd + numBoxes * sizeof *ccmd,
-				2);
-      if (!icmd)
+      invalidate_cmd = SVGA3D_FIFOReserve(swc,
+                                          SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
+                                          total_commands_size, 1 + numBoxes);
+      if (!invalidate_cmd)
 	 return PIPE_ERROR_OUT_OF_MEMORY;
 
-      cicmd = container_of(icmd, cicmd, body);
-      cicmd->header.size = sizeof *icmd;
-      swc->surface_relocation(swc, &icmd->image.sid, NULL, sbuf->handle,
+      cicmd = container_of(invalidate_cmd, cicmd, body);
+      cicmd->header.size = sizeof(*invalidate_cmd);
+      swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL, sbuf->handle,
                               (SVGA_RELOC_WRITE |
                                SVGA_RELOC_INTERNAL |
                                SVGA_RELOC_DMA));
-      icmd->image.face = 0;
-      icmd->image.mipmap = 0;
+      invalidate_cmd->image.face = 0;
+      invalidate_cmd->image.mipmap = 0;
 
+      /* The whole_update_command is a SVGA3dCmdHeader plus the
+       * SVGA3dCmdUpdateGBImage command.
+       */
+      whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1];
       /* initialize the first UPDATE_GB_IMAGE command */
-      ccmd = (struct svga_3d_update_gb_image *) &icmd[1];
-      ccmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
-      cmd = &ccmd->body;
+      whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+      update_cmd = &whole_update_cmd->body;
 
    } else {
       /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */
-      cmd = SVGA3D_FIFOReserve(swc,
-			       SVGA_3D_CMD_UPDATE_GB_IMAGE,
-			       sizeof *cmd + (numBoxes - 1) * sizeof *ccmd,
-			       1);
-      if (!cmd)
+      const unsigned total_commands_size =
+         sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd);
+
+      update_cmd = SVGA3D_FIFOReserve(swc,
+                                      SVGA_3D_CMD_UPDATE_GB_IMAGE,
+                                      total_commands_size, numBoxes);
+      if (!update_cmd)
 	 return PIPE_ERROR_OUT_OF_MEMORY;
 
-      ccmd = container_of(cmd, ccmd, body);
+      /* The whole_update_command is a SVGA3dCmdHeader plus the
+       * SVGA3dCmdUpdateGBImage command.
+       */
+      whole_update_cmd = container_of(update_cmd, whole_update_cmd, body);
    }
 
    /* Init the first UPDATE_GB_IMAGE command */
-   ccmd->header.size = sizeof *cmd;
-   swc->surface_relocation(swc, &cmd->image.sid, NULL, sbuf->handle,
+   whole_update_cmd->header.size = sizeof(*update_cmd);
+   swc->surface_relocation(swc, &update_cmd->image.sid, NULL, sbuf->handle,
 			   SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
-   cmd->image.face = 0;
-   cmd->image.mipmap = 0;
+   update_cmd->image.face = 0;
+   update_cmd->image.mipmap = 0;
 
    /* Save pointer to the first UPDATE_GB_IMAGE command so that we can
     * fill in the box info below.
     */
-   sbuf->dma.updates = ccmd;
+   sbuf->dma.updates = whole_update_cmd;
 
    /*
-    * Copy the relocation info, face and mipmap to all
-    * subsequent commands. NOTE: For winsyses that actually
-    * patch the image.sid member at flush time, this will fail
-    * miserably. For those we need to add as many relocations
-    * as there are copy boxes.
+    * Copy the face, mipmap, etc. info to all subsequent commands.
+    * Also do the surface relocation for each subsequent command.
     */
-
    for (i = 1; i < numBoxes; ++i) {
-      memcpy(++ccmd, sbuf->dma.updates, sizeof *ccmd);
+      whole_update_cmd++;
+      memcpy(whole_update_cmd, sbuf->dma.updates, sizeof(*whole_update_cmd));
+
+      swc->surface_relocation(swc, &whole_update_cmd->body.image.sid, NULL,
+                              sbuf->handle,
+                              SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
    }
 
    /* Increment reference count */
@@ -284,6 +308,7 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
    pipe_resource_reference(&dummy, &sbuf->b.b);
    SVGA_FIFOCommitAll(swc);
 
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
    sbuf->dma.flags.discard = FALSE;
 
    return PIPE_OK;
@@ -357,6 +382,7 @@ svga_buffer_upload_command(struct svga_context *svga,
 
    SVGA_FIFOCommitAll(swc);
 
+   swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
    sbuf->dma.flags.discard = FALSE;
 
    return PIPE_OK;
@@ -405,6 +431,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
 
          assert(box->x <= sbuf->b.b.width0);
          assert(box->x + box->w <= sbuf->b.b.width0);
+
+         svga->hud.num_bytes_uploaded += box->w;
       }
    }
    else {
@@ -430,6 +458,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
 
          assert(box->x <= sbuf->b.b.width0);
          assert(box->x + box->w <= sbuf->b.b.width0);
+
+         svga->hud.num_bytes_uploaded += box->w;
       }
    }
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c
index 64fd245c0..3f754c4d5 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c
@@ -29,6 +29,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "os/os_thread.h"
+#include "os/os_time.h"
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_math.h"
@@ -46,12 +47,6 @@
 #include "svga_debug.h"
 
 
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. Find a better way to accomplish this.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-
 static void
 svga_transfer_dma_band(struct svga_context *svga,
                        struct svga_transfer *st,
@@ -59,10 +54,10 @@ svga_transfer_dma_band(struct svga_context *svga,
                        unsigned y, unsigned h, unsigned srcy,
                        SVGA3dSurfaceDMAFlags flags)
 {
-   struct svga_texture *texture = svga_texture(st->base.resource); 
+   struct svga_texture *texture = svga_texture(st->base.resource);
    SVGA3dCopyBox box;
    enum pipe_error ret;
- 
+
    assert(!st->use_direct_map);
 
    box.x = st->base.box.x;
@@ -75,28 +70,23 @@ svga_transfer_dma_band(struct svga_context *svga,
    box.srcy = srcy;
    box.srcz = 0;
 
-   if (st->base.resource->target == PIPE_TEXTURE_CUBE) {
-      st->face = st->base.box.z;
-      box.z = 0;
-   }
-   else
-      st->face = 0;
-
-   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
-                transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", 
-                texture->handle,
-                st->face,
-                st->base.box.x,
-                y,
-                box.z,
-                st->base.box.x + st->base.box.width,
-                y + h,
-                box.z + 1,
-                util_format_get_blocksize(texture->b.b.format) * 8 /
-                (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
+   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - "
+            "(%u, %u, %u), %ubpp\n",
+            transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
+            texture->handle,
+            st->slice,
+            st->base.box.x,
+            y,
+            box.z,
+            st->base.box.x + st->base.box.width,
+            y + h,
+            box.z + 1,
+            util_format_get_blocksize(texture->b.b.format) * 8 /
+            (util_format_get_blockwidth(texture->b.b.format)
+             * util_format_get_blockheight(texture->b.b.format)));
 
    ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
-   if(ret != PIPE_OK) {
+   if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
       assert(ret == PIPE_OK);
@@ -110,7 +100,7 @@ svga_transfer_dma(struct svga_context *svga,
                   SVGA3dTransferType transfer,
                   SVGA3dSurfaceDMAFlags flags)
 {
-   struct svga_texture *texture = svga_texture(st->base.resource); 
+   struct svga_texture *texture = svga_texture(st->base.resource);
    struct svga_screen *screen = svga_screen(texture->b.b.screen);
    struct svga_winsys_screen *sws = screen->sws;
    struct pipe_fence_handle *fence = NULL;
@@ -126,14 +116,13 @@ svga_transfer_dma(struct svga_context *svga,
     */
    svga_surfaces_flush( svga );
 
-   if(!st->swbuf) {
+   if (!st->swbuf) {
       /* Do the DMA transfer in a single go */
-
       svga_transfer_dma_band(svga, st, transfer,
                              st->base.box.y, st->base.box.height, 0,
                              flags);
 
-      if(transfer == SVGA3D_READ_HOST_VRAM) {
+      if (transfer == SVGA3D_READ_HOST_VRAM) {
          svga_context_flush(svga, &fence);
          sws->fence_finish(sws, fence, 0);
          sws->fence_reference(sws, &fence, NULL);
@@ -141,10 +130,13 @@ svga_transfer_dma(struct svga_context *svga,
    }
    else {
       int y, h, srcy;
-      unsigned blockheight = util_format_get_blockheight(st->base.resource->format);
+      unsigned blockheight =
+         util_format_get_blockheight(st->base.resource->format);
+
       h = st->hw_nblocksy * blockheight;
       srcy = 0;
-      for(y = 0; y < st->base.box.height; y += h) {
+
+      for (y = 0; y < st->base.box.height; y += h) {
          unsigned offset, length;
          void *hw, *sw;
 
@@ -158,7 +150,7 @@ svga_transfer_dma(struct svga_context *svga,
          offset = y * st->base.stride / blockheight;
          length = h * st->base.stride / blockheight;
 
-         sw = (uint8_t *)st->swbuf + offset;
+         sw = (uint8_t *) st->swbuf + offset;
 
          if (transfer == SVGA3D_WRITE_HOST_VRAM) {
             unsigned usage = PIPE_TRANSFER_WRITE;
@@ -184,16 +176,15 @@ svga_transfer_dma(struct svga_context *svga,
           * Prevent the texture contents to be discarded on the next band
           * upload.
           */
-
          flags.discard = FALSE;
 
-         if(transfer == SVGA3D_READ_HOST_VRAM) {
+         if (transfer == SVGA3D_READ_HOST_VRAM) {
             svga_context_flush(svga, &fence);
             sws->fence_finish(sws, fence, 0);
 
             hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_READ);
             assert(hw);
-            if(hw) {
+            if (hw) {
                memcpy(sw, hw, length);
                sws->buffer_unmap(sws, st->hwbuf);
             }
@@ -203,19 +194,22 @@ svga_transfer_dma(struct svga_context *svga,
 }
 
 
-static boolean 
+static boolean
 svga_texture_get_handle(struct pipe_screen *screen,
-                               struct pipe_resource *texture,
-                               struct winsys_handle *whandle)
+                        struct pipe_resource *texture,
+                        struct winsys_handle *whandle)
 {
    struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
    unsigned stride;
 
    assert(svga_texture(texture)->key.cachable == 0);
    svga_texture(texture)->key.cachable = 0;
+
    stride = util_format_get_nblocksx(texture->format, texture->width0) *
             util_format_get_blocksize(texture->format);
-   return sws->surface_get_handle(sws, svga_texture(texture)->handle, stride, whandle);
+
+   return sws->surface_get_handle(sws, svga_texture(texture)->handle,
+                                  stride, whandle);
 }
 
 
@@ -236,10 +230,15 @@ svga_texture_destroy(struct pipe_screen *screen,
    SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle);
    svga_screen_surface_destroy(ss, &tex->key, &tex->handle);
 
-   ss->total_resource_bytes -= tex->size;
+   ss->hud.total_resource_bytes -= tex->size;
 
+   FREE(tex->defined);
    FREE(tex->rendered_to);
    FREE(tex);
+
+   assert(ss->hud.num_resources > 0);
+   if (ss->hud.num_resources > 0)
+      ss->hud.num_resources--;
 }
 
 
@@ -274,10 +273,43 @@ need_tex_readback(struct pipe_transfer *transfer)
 }
 
 
+static enum pipe_error
+readback_image_vgpu9(struct svga_context *svga,
+                   struct svga_winsys_surface *surf,
+                   unsigned slice,
+                   unsigned level)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+   }
+   return ret;
+}
+
+
+static enum pipe_error
+readback_image_vgpu10(struct svga_context *svga,
+                    struct svga_winsys_surface *surf,
+                    unsigned slice,
+                    unsigned level,
+                    unsigned numMipLevels)
+{
+   enum pipe_error ret;
+   unsigned subResource;
+
+   subResource = slice * numMipLevels + level;
+   ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+   }
+   return ret;
+}
+
 
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
 static void *
 svga_texture_transfer_map(struct pipe_context *pipe,
                           struct pipe_resource *texture,
@@ -289,11 +321,14 @@ svga_texture_transfer_map(struct pipe_context *pipe,
    struct svga_context *svga = svga_context(pipe);
    struct svga_screen *ss = svga_screen(pipe->screen);
    struct svga_winsys_screen *sws = ss->sws;
+   struct svga_texture *tex = svga_texture(texture);
    struct svga_transfer *st;
    unsigned nblocksx, nblocksy;
    boolean use_direct_map = svga_have_gb_objects(svga) &&
       !svga_have_gb_dma(svga);
    unsigned d;
+   void *returnVal;
+   int64_t begin = os_time_get();
 
    /* We can't map texture storage directly unless we have GB objects */
    if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
@@ -326,25 +361,40 @@ svga_texture_transfer_map(struct pipe_context *pipe,
    }
 
    pipe_resource_reference(&st->base.resource, texture);
+
    st->base.level = level;
    st->base.usage = usage;
    st->base.box = *box;
    st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
    st->base.layer_stride = st->base.stride * nblocksy;
 
+   switch (tex->b.b.target) {
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_1D_ARRAY:
+      st->slice = st->base.box.z;
+      st->base.box.z = 0;   /* so we don't apply double offsets below */
+      break;
+   default:
+      st->slice = 0;
+      break;
+   }
+
+   if (usage & PIPE_TRANSFER_WRITE) {
+      /* record texture upload for HUD */
+      svga->hud.num_bytes_uploaded +=
+         nblocksx * nblocksy * d * util_format_get_blocksize(texture->format);
+   }
+
    if (!use_direct_map) {
       /* Use a DMA buffer */
       st->hw_nblocksy = nblocksy;
 
-      st->hwbuf = svga_winsys_buffer_create(svga,
-                                            1, 
-                                            0,
-                                            st->hw_nblocksy * st->base.stride * d);
+      st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+                                   st->hw_nblocksy * st->base.stride * d);
       while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
-         st->hwbuf = svga_winsys_buffer_create(svga,
-                                               1, 
-                                               0,
-                                               st->hw_nblocksy * st->base.stride * d);
+         st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+                                   st->hw_nblocksy * st->base.stride * d);
       }
 
       if (!st->hwbuf) {
@@ -352,8 +402,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
          return NULL;
       }
 
-      if(st->hw_nblocksy < nblocksy) {
-         /* We couldn't allocate a hardware buffer big enough for the transfer, 
+      if (st->hw_nblocksy < nblocksy) {
+         /* We couldn't allocate a hardware buffer big enough for the transfer,
           * so allocate regular malloc memory instead */
          if (0) {
             debug_printf("%s: failed to allocate %u KB of DMA, "
@@ -379,45 +429,27 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       }
    } else {
       struct pipe_transfer *transfer = &st->base;
-      struct svga_texture *tex = svga_texture(transfer->resource);
       struct svga_winsys_surface *surf = tex->handle;
-      unsigned face;
-
-      assert(surf);
 
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	 face = transfer->box.z;
-      } else {
-	 face = 0;
+      if (!surf) {
+         FREE(st);
+         return NULL;
       }
 
       if (need_tex_readback(transfer)) {
-	 SVGA3dBox box;
 	 enum pipe_error ret;
 
-	 box.x = transfer->box.x;
-	 box.y = transfer->box.y;
-	 box.w = transfer->box.width;
-	 box.h = transfer->box.height;
-	 box.d = transfer->box.depth;
-	 if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	    box.z = 0;
-	 }
-	 else {
-	    box.z = transfer->box.z;
-	 }
-
-         (void) box;  /* not used at this time */
-
          svga_surfaces_flush(svga);
 
-	 ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+         if (svga_have_vgpu10(svga)) {
+            ret = readback_image_vgpu10(svga, surf, st->slice, transfer->level,
+                                        tex->b.b.last_level + 1);
+         } else {
+            ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level);
+         }
 
-	 if (ret != PIPE_OK) {
-	    svga_context_flush(svga, NULL);
-	    ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
-	    assert(ret == PIPE_OK);
-	 }
+         assert(ret == PIPE_OK);
+         (void) ret;
 
 	 svga_context_flush(svga, NULL);
 
@@ -425,7 +457,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
           * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified
           * we could potentially clear the flag for all faces/layers/mips.
           */
-         svga_clear_texture_rendered_to(tex, face, transfer->level);
+         svga_clear_texture_rendered_to(tex, st->slice, transfer->level);
       }
       else {
 	 assert(transfer->usage & PIPE_TRANSFER_WRITE);
@@ -445,23 +477,21 @@ svga_texture_transfer_map(struct pipe_context *pipe,
     * Begin mapping code
     */
    if (st->swbuf) {
-      return st->swbuf;
+      returnVal = st->swbuf;
    }
    else if (!st->use_direct_map) {
-      return sws->buffer_map(sws, st->hwbuf, usage);
+      returnVal = sws->buffer_map(sws, st->hwbuf, usage);
    }
    else {
-      struct svga_screen *screen = svga_screen(svga->pipe.screen);
-      SVGA3dSurfaceFormat format;
       SVGA3dSize baseLevelSize;
       struct svga_texture *tex = svga_texture(texture);
       struct svga_winsys_surface *surf = tex->handle;
       uint8_t *map;
       boolean retry;
-      unsigned face, offset, mip_width, mip_height;
-      unsigned xoffset = box->x;
-      unsigned yoffset = box->y;
-      unsigned zoffset = box->z;
+      unsigned offset, mip_width, mip_height;
+      unsigned xoffset = st->base.box.x;
+      unsigned yoffset = st->base.box.y;
+      unsigned zoffset = st->base.box.z;
 
       map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
       if (map == NULL && retry) {
@@ -476,7 +506,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       /*
        * Make sure we return NULL if the map fails
        */
-      if (map == NULL) {
+      if (!map) {
          FREE(st);
          return map;
       }
@@ -484,21 +514,13 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       /**
        * Compute the offset to the specific texture slice in the buffer.
        */
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-         face = zoffset;
-         zoffset = 0;
-      } else {
-         face = 0;
-      }
-
-      format = svga_translate_format(screen, tex->b.b.format, 0);
       baseLevelSize.width = tex->b.b.width0;
       baseLevelSize.height = tex->b.b.height0;
       baseLevelSize.depth = tex->b.b.depth0;
 
-      offset = svga3dsurface_get_image_offset(format, baseLevelSize,
+      offset = svga3dsurface_get_image_offset(tex->key.format, baseLevelSize,
                                               tex->b.b.last_level + 1, /* numMips */
-                                              face, level);
+                                              st->slice, level);
       if (level > 0) {
          assert(offset > 0);
       }
@@ -506,11 +528,16 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       mip_width = u_minify(tex->b.b.width0, level);
       mip_height = u_minify(tex->b.b.height0, level);
 
-      offset += svga3dsurface_get_pixel_offset(format, mip_width, mip_height,
+      offset += svga3dsurface_get_pixel_offset(tex->key.format,
+                                               mip_width, mip_height,
                                                xoffset, yoffset, zoffset);
-
-      return (void *) (map + offset);
+      returnVal = (void *) (map + offset);
    }
+
+   svga->hud.map_buffer_time += (os_time_get() - begin);
+   svga->hud.num_resources_mapped++;
+
+   return returnVal;
 }
 
 
@@ -541,9 +568,45 @@ svga_texture_surface_unmap(struct svga_context *svga,
 }
 
 
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
+static enum pipe_error
+update_image_vgpu9(struct svga_context *svga,
+                   struct svga_winsys_surface *surf,
+                   const SVGA3dBox *box,
+                   unsigned slice,
+                   unsigned level)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+   }
+   return ret;
+}
+
+
+static enum pipe_error
+update_image_vgpu10(struct svga_context *svga,
+                    struct svga_winsys_surface *surf,
+                    const SVGA3dBox *box,
+                    unsigned slice,
+                    unsigned level,
+                    unsigned numMipLevels)
+{
+   enum pipe_error ret;
+   unsigned subResource;
+
+   subResource = slice * numMipLevels + level;
+   ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+   }
+   return ret;
+}
+
+
 static void
 svga_texture_transfer_unmap(struct pipe_context *pipe,
 			    struct pipe_transfer *transfer)
@@ -579,26 +642,25 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
    } else if (transfer->usage & PIPE_TRANSFER_WRITE) {
       struct svga_winsys_surface *surf =
 	 svga_texture(transfer->resource)->handle;
-      unsigned face;
       SVGA3dBox box;
       enum pipe_error ret;
 
       assert(svga_have_gb_objects(svga));
 
       /* update the effected region */
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	 face = transfer->box.z;
-      } else {
-	 face = 0;
-      }
-
       box.x = transfer->box.x;
       box.y = transfer->box.y;
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+      switch (tex->b.b.target) {
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_2D_ARRAY:
          box.z = 0;
-      }
-      else {
+         break;
+      case PIPE_TEXTURE_1D_ARRAY:
+         box.y = box.z = 0;
+         break;
+      default:
          box.z = transfer->box.z;
+         break;
       }
       box.w = transfer->box.width;
       box.h = transfer->box.height;
@@ -610,18 +672,21 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
                       box.x, box.y, box.z,
                       box.w, box.h, box.d);
 
-      ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = update_image_vgpu10(svga, surf, &box, st->slice, transfer->level,
+                                   tex->b.b.last_level + 1);
+      } else {
+         ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level);
       }
+
+      assert(ret == PIPE_OK);
+      (void) ret;
    }
 
    ss->texture_timestamp++;
    svga_age_texture_view(tex, transfer->level);
    if (transfer->resource->target == PIPE_TEXTURE_CUBE)
-      svga_define_texture_level(tex, transfer->box.z, transfer->level);
+      svga_define_texture_level(tex, st->slice, transfer->level);
    else
       svga_define_texture_level(tex, 0, transfer->level);
 
@@ -635,7 +700,18 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
 }
 
 
-struct u_resource_vtbl svga_texture_vtbl = 
+/**
+ * Does format store depth values?
+ */
+static inline boolean
+format_has_depth(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   return util_format_has_depth(desc);
+}
+
+
+struct u_resource_vtbl svga_texture_vtbl =
 {
    svga_texture_get_handle,	      /* get_handle */
    svga_texture_destroy,	      /* resource_destroy */
@@ -651,57 +727,119 @@ svga_texture_create(struct pipe_screen *screen,
                     const struct pipe_resource *template)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
-   struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
+   struct svga_texture *tex;
+   unsigned bindings = template->bind;
 
-   if (!tex)
-      goto error1;
+   assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
+   if (template->last_level >= SVGA_MAX_TEXTURE_LEVELS) {
+      return NULL;
+   }
+
+   tex = CALLOC_STRUCT(svga_texture);
+   if (!tex) {
+      return NULL;
+   }
+
+   tex->defined = CALLOC(template->depth0 * template->array_size,
+                         sizeof(tex->defined[0]));
+   if (!tex->defined) {
+      FREE(tex);
+      return NULL;
+   }
+
+   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
+                             sizeof(tex->rendered_to[0]));
+   if (!tex->rendered_to) {
+      FREE(tex->defined);
+      FREE(tex);
+      return NULL;
+   }
 
    tex->b.b = *template;
    tex->b.vtbl = &svga_texture_vtbl;
    pipe_reference_init(&tex->b.b.reference, 1);
    tex->b.b.screen = screen;
 
-   assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
-   if(template->last_level >= SVGA_MAX_TEXTURE_LEVELS)
-      goto error2;
-   
    tex->key.flags = 0;
    tex->key.size.width = template->width0;
    tex->key.size.height = template->height0;
    tex->key.size.depth = template->depth0;
+   tex->key.arraySize = 1;
+   tex->key.numFaces = 1;
+   tex->key.sampleCount = template->nr_samples;
 
-   if(template->target == PIPE_TEXTURE_CUBE) {
-      tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
-      tex->key.numFaces = 6;
-   }
-   else {
-      tex->key.numFaces = 1;
+   if (template->nr_samples > 1) {
+      tex->key.flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
    }
 
-   if (template->target == PIPE_TEXTURE_3D) {
-      tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+   if (svgascreen->sws->have_vgpu10) {
+      switch (template->target) {
+      case PIPE_TEXTURE_1D:
+         tex->key.flags |= SVGA3D_SURFACE_1D;
+         break;
+      case PIPE_TEXTURE_1D_ARRAY:
+         tex->key.flags |= SVGA3D_SURFACE_1D;
+         /* fall-through */
+      case PIPE_TEXTURE_2D_ARRAY:
+         tex->key.flags |= SVGA3D_SURFACE_ARRAY;
+         tex->key.arraySize = template->array_size;
+         break;
+      case PIPE_TEXTURE_3D:
+         tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         tex->key.flags |= (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_ARRAY);
+         tex->key.numFaces = 6;
+         break;
+      default:
+         break;
+      }
+   }
+   else {
+      switch (template->target) {
+      case PIPE_TEXTURE_3D:
+         tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
+         tex->key.numFaces = 6;
+         break;
+      default:
+         break;
+      }
    }
 
    tex->key.cachable = 1;
 
-   if (template->bind & PIPE_BIND_SAMPLER_VIEW)
+   if (bindings & PIPE_BIND_SAMPLER_VIEW) {
       tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+      tex->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
+
+      if (!(bindings & PIPE_BIND_RENDER_TARGET)) {
+         /* Also check if the format is renderable */
+         if (screen->is_format_supported(screen, template->format,
+                                         template->target,
+                                         template->nr_samples,
+                                         PIPE_BIND_RENDER_TARGET)) {
+            bindings |= PIPE_BIND_RENDER_TARGET;
+         }
+      }
+   }
 
-   if (template->bind & PIPE_BIND_DISPLAY_TARGET) {
+   if (bindings & PIPE_BIND_DISPLAY_TARGET) {
       tex->key.cachable = 0;
    }
 
-   if (template->bind & PIPE_BIND_SHARED) {
+   if (bindings & PIPE_BIND_SHARED) {
       tex->key.cachable = 0;
    }
 
-   if (template->bind & (PIPE_BIND_SCANOUT |
-                         PIPE_BIND_CURSOR)) {
-      tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+   if (bindings & (PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR)) {
+      tex->key.scanout = 1;
       tex->key.cachable = 0;
    }
 
-   /* 
+   /*
     * Note: Previously we never passed the
     * SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
     * know beforehand whether a texture will be used as a rendertarget or not
@@ -712,23 +850,55 @@ svga_texture_create(struct pipe_screen *screen,
     * (XA for example) uses it accurately and certain device versions
     * relies on it in certain situations to render correctly.
     */
-   if((template->bind & PIPE_BIND_RENDER_TARGET) &&
-      !util_format_is_s3tc(template->format))
+   if ((bindings & PIPE_BIND_RENDER_TARGET) &&
+       !util_format_is_s3tc(template->format)) {
       tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
-   
-   if(template->bind & PIPE_BIND_DEPTH_STENCIL)
+      tex->key.flags |= SVGA3D_SURFACE_BIND_RENDER_TARGET;
+   }
+
+   if (bindings & PIPE_BIND_DEPTH_STENCIL) {
       tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
-   
+      tex->key.flags |= SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+   }
+
    tex->key.numMipLevels = template->last_level + 1;
-   
-   tex->key.format = svga_translate_format(svgascreen, template->format, template->bind);
-   if(tex->key.format == SVGA3D_FORMAT_INVALID)
-      goto error2;
+
+   tex->key.format = svga_translate_format(svgascreen, template->format,
+                                           bindings);
+   if (tex->key.format == SVGA3D_FORMAT_INVALID) {
+      FREE(tex->defined);
+      FREE(tex->rendered_to);
+      FREE(tex);
+      return NULL;
+   }
+
+   /* Use typeless formats for sRGB and depth resources.  Typeless
+    * formats can be reinterpreted as other formats.  For example,
+    * SVGA3D_R8G8B8A8_UNORM_TYPELESS can be interpreted as
+    * SVGA3D_R8G8B8A8_UNORM_SRGB or SVGA3D_R8G8B8A8_UNORM.
+    */
+   if (svgascreen->sws->have_vgpu10 &&
+       (util_format_is_srgb(template->format) ||
+        format_has_depth(template->format))) {
+      SVGA3dSurfaceFormat typeless = svga_typeless_format(tex->key.format);
+      if (0) {
+         debug_printf("Convert resource type %s -> %s (bind 0x%x)\n",
+                      svga_format_name(tex->key.format),
+                      svga_format_name(typeless),
+                      bindings);
+      }
+      tex->key.format = typeless;
+   }
 
    SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle);
-   tex->handle = svga_screen_surface_create(svgascreen, &tex->key);
-   if (!tex->handle)
-       goto error2;
+   tex->handle = svga_screen_surface_create(svgascreen, bindings,
+                                            tex->b.b.usage, &tex->key);
+   if (!tex->handle) {
+      FREE(tex->defined);
+      FREE(tex->rendered_to);
+      FREE(tex);
+      return NULL;
+   }
 
    SVGA_DBG(DEBUG_DMA, "  --> got sid %p (texture)\n", tex->handle);
 
@@ -736,20 +906,10 @@ svga_texture_create(struct pipe_screen *screen,
                    (debug_reference_descriptor)debug_describe_resource, 0);
 
    tex->size = util_resource_size(template);
-   svgascreen->total_resource_bytes += tex->size;
-
-   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
-                             sizeof(tex->rendered_to[0]));
-   if (!tex->rendered_to)
-      goto error2;
+   svgascreen->hud.total_resource_bytes += tex->size;
+   svgascreen->hud.num_resources++;
 
    return &tex->b.b;
-
-error2:
-   FREE(tex->rendered_to);
-   FREE(tex);
-error1:
-   return NULL;
 }
 
 
@@ -759,6 +919,7 @@ svga_texture_from_handle(struct pipe_screen *screen,
 			 struct winsys_handle *whandle)
 {
    struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+   struct svga_screen *ss = svga_screen(screen);
    struct svga_winsys_surface *srf;
    struct svga_texture *tex;
    enum SVGA3dSurfaceFormat format = 0;
@@ -777,16 +938,28 @@ svga_texture_from_handle(struct pipe_screen *screen,
    if (!srf)
       return NULL;
 
-   if (svga_translate_format(svga_screen(screen), template->format, template->bind) != format) {
-      unsigned f1 = svga_translate_format(svga_screen(screen), template->format, template->bind);
+   if (svga_translate_format(svga_screen(screen), template->format,
+                             template->bind) != format) {
+      unsigned f1 = svga_translate_format(svga_screen(screen),
+                                          template->format, template->bind);
       unsigned f2 = format;
 
-      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
-      if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up.
+       */
+      if (f1 == SVGA3D_B8G8R8A8_UNORM)
+         f1 = SVGA3D_A8R8G8B8;
+      if (f1 == SVGA3D_B8G8R8X8_UNORM)
+         f1 = SVGA3D_X8R8G8B8;
+
+      if ( !( (f1 == f2) ||
+              (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+              (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_B8G8R8X8_UNORM) ||
               (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
+              (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_B8G8R8A8_UNORM) ||
               (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ||
               (f1 == SVGA3D_Z_DF24 && f2 == SVGA3D_Z_D24S8_INT) ) ) {
-         debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
+         debug_printf("%s wrong format %s != %s\n", __FUNCTION__,
+                      svga_format_name(f1), svga_format_name(f2));
          return NULL;
       }
    }
@@ -795,6 +968,13 @@ svga_texture_from_handle(struct pipe_screen *screen,
    if (!tex)
       return NULL;
 
+   tex->defined = CALLOC(template->depth0 * template->array_size,
+                         sizeof(tex->defined[0]));
+   if (!tex->defined) {
+      FREE(tex);
+      return NULL;
+   }
+
    tex->b.b = *template;
    tex->b.vtbl = &svga_texture_vtbl;
    pipe_reference_init(&tex->b.b.reference, 1);
@@ -803,9 +983,71 @@ svga_texture_from_handle(struct pipe_screen *screen,
    SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf);
 
    tex->key.cachable = 0;
+   tex->key.format = format;
    tex->handle = srf;
 
    tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
+   tex->imported = TRUE;
+
+   ss->hud.num_resources++;
 
    return &tex->b.b;
 }
+
+boolean
+svga_texture_generate_mipmap(struct pipe_context *pipe,
+                             struct pipe_resource *pt,
+                             enum pipe_format format,
+                             unsigned base_level,
+                             unsigned last_level,
+                             unsigned first_layer,
+                             unsigned last_layer)
+{
+   struct pipe_sampler_view templ, *psv;
+   struct svga_pipe_sampler_view *sv;
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_texture *tex = svga_texture(pt);
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /* Only support 2D texture for now */
+   if (pt->target != PIPE_TEXTURE_2D)
+      return FALSE;
+
+   /* Fallback to the mipmap generation utility for those formats that
+    * do not support hw generate mipmap
+    */
+   if (!svga_format_support_gen_mips(format))
+      return FALSE;
+
+   /* Make sure the texture surface was created with
+    * SVGA3D_SURFACE_BIND_RENDER_TARGET
+    */
+   if (!tex->handle || !(tex->key.flags & SVGA3D_SURFACE_BIND_RENDER_TARGET))
+      return FALSE;
+
+   templ.format = format;
+   templ.u.tex.first_layer = first_layer;
+   templ.u.tex.last_layer = last_layer;
+   templ.u.tex.first_level = base_level;
+   templ.u.tex.last_level = last_level;
+
+   psv = pipe->create_sampler_view(pipe, pt, &templ);
+   if (psv == NULL)
+      return FALSE;
+
+   sv = svga_pipe_sampler_view(psv);
+   svga_validate_pipe_sampler_view(svga, sv);
+
+   ret = SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle);
+   }
+   pipe_sampler_view_reference(&psv, NULL);
+
+   svga->hud.num_generate_mipmap++;
+
+   return TRUE;
+}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h
index 19dadfb88..99ba33b26 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h
@@ -51,7 +51,7 @@ struct svga_texture
 {
    struct u_resource b;
 
-   boolean defined[6][SVGA_MAX_TEXTURE_LEVELS];
+   ushort *defined;
    
    struct svga_sampler_view *cached_view;
 
@@ -77,6 +77,12 @@ struct svga_texture
     */
    struct svga_winsys_surface *handle;
 
+   /**
+    * Whether the host side surface is imported and not created by this
+    * driver.
+    */
+   boolean imported;
+
    unsigned size;  /**< Approximate size in bytes */
 
    /** array indexed by cube face or 3D/array slice, one bit per mipmap level */
@@ -91,7 +97,7 @@ struct svga_transfer
 {
    struct pipe_transfer base;
 
-   unsigned face;
+   unsigned slice;  /**< array slice or cube face */
 
    struct svga_winsys_buffer *hwbuf;
 
@@ -135,29 +141,6 @@ svga_age_texture_view(struct svga_texture *tex, unsigned level)
 }
 
 
-/**
- * Mark the given texture face/level as being defined.
- */
-static inline void
-svga_define_texture_level(struct svga_texture *tex,
-                          unsigned face,unsigned level)
-{
-   assert(face < Elements(tex->defined));
-   assert(level < Elements(tex->defined[0]));
-   tex->defined[face][level] = TRUE;
-}
-
-
-static inline bool
-svga_is_texture_level_defined(const struct svga_texture *tex,
-                              unsigned face, unsigned level)
-{
-   assert(face < Elements(tex->defined));
-   assert(level < Elements(tex->defined[0]));
-   return tex->defined[face][level];
-}
-
-
 /** For debugging, check that face and level are legal */
 static inline void
 check_face_level(const struct svga_texture *tex,
@@ -177,6 +160,27 @@ check_face_level(const struct svga_texture *tex,
 }
 
 
+/**
+ * Mark the given texture face/level as being defined.
+ */
+static inline void
+svga_define_texture_level(struct svga_texture *tex,
+                          unsigned face,unsigned level)
+{
+   check_face_level(tex, face, level);
+   tex->defined[face] |= 1 << level;
+}
+
+
+static inline bool
+svga_is_texture_level_defined(const struct svga_texture *tex,
+                              unsigned face, unsigned level)
+{
+   check_face_level(tex, face, level);
+   return (tex->defined[face] & (1 << level)) != 0;
+}
+
+
 static inline void
 svga_set_texture_rendered_to(struct svga_texture *tex,
                              unsigned face, unsigned level)
@@ -213,7 +217,14 @@ svga_texture_from_handle(struct pipe_screen * screen,
 			const struct pipe_resource *template,
 			struct winsys_handle *whandle);
 
-
+boolean
+svga_texture_generate_mipmap(struct pipe_context *pipe,
+                             struct pipe_resource *pt,
+                             enum pipe_format format,
+                             unsigned base_level,
+                             unsigned last_level,
+                             unsigned first_layer,
+                             unsigned last_layer);
 
 
 #endif /* SVGA_TEXTURE_H */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c
index 55dc49f2d..9c33a79db 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c
@@ -48,9 +48,11 @@ svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv)
 {
    char res[128];
    debug_describe_resource(res, sv->texture);
-   util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod);
+   util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>",
+                res, sv->min_lod, sv->max_lod);
 }
 
+
 struct svga_sampler_view *
 svga_get_tex_sampler_view(struct pipe_context *pipe,
 			  struct pipe_resource *pt,
@@ -58,16 +60,17 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_screen *ss = svga_screen(pipe->screen);
-   struct svga_texture *tex = svga_texture(pt); 
+   struct svga_texture *tex = svga_texture(pt);
    struct svga_sampler_view *sv = NULL;
    SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE;
-   SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format, PIPE_BIND_SAMPLER_VIEW);
+   SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format,
+                                                      PIPE_BIND_SAMPLER_VIEW);
    boolean view = TRUE;
 
    assert(pt);
    assert(min_lod <= max_lod);
    assert(max_lod <= pt->last_level);
-
+   assert(!svga_have_vgpu10(svga));
 
    /* Is a view needed */
    {
@@ -143,17 +146,20 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
             pt->last_level);
 
    sv->age = tex->age;
-   sv->handle = svga_texture_view_surface(svga, tex, flags, format,
+   sv->handle = svga_texture_view_surface(svga, tex,
+                                          PIPE_BIND_SAMPLER_VIEW,
+                                          flags, format,
                                           min_lod,
                                           max_lod - min_lod + 1,
-                                          -1, -1,
+                                          -1, 1, -1,
                                           &sv->key);
 
    if (!sv->handle) {
       sv->key.cachable = 0;
       sv->handle = tex->handle;
       debug_reference(&sv->reference,
-                      (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
+                      (debug_reference_descriptor)
+                      svga_debug_describe_sampler_view, 0);
       return sv;
    }
 
@@ -162,13 +168,16 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
    pipe_mutex_unlock(ss->tex_mutex);
 
    debug_reference(&sv->reference,
-                   (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
+                   (debug_reference_descriptor)
+                   svga_debug_describe_sampler_view, 0);
 
    return sv;
 }
 
+
 void
-svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v)
+svga_validate_sampler_view(struct svga_context *svga,
+                           struct svga_sampler_view *v)
 {
    struct svga_texture *tex = svga_texture(v->texture);
    unsigned numFaces;
@@ -177,13 +186,14 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
    unsigned k;
 
    assert(svga);
+   assert(!svga_have_vgpu10(svga));
 
    if (v->handle == tex->handle)
       return;
 
    age = tex->age;
 
-   if(tex->b.b.target == PIPE_TEXTURE_CUBE)
+   if (tex->b.b.target == PIPE_TEXTURE_CUBE)
       numFaces = 6;
    else
       numFaces = 1;
@@ -204,12 +214,13 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
    v->age = age;
 }
 
+
 void
 svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
 {
    struct svga_texture *tex = svga_texture(v->texture);
 
-   if(v->handle != tex->handle) {
+   if (v->handle != tex->handle) {
       struct svga_screen *ss = svga_screen(v->texture->screen);
       SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
       svga_screen_surface_destroy(ss, &v->key, &v->handle);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.h b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.h
index 7f14323f8..15f2313c4 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.h
@@ -35,7 +35,9 @@
 struct pipe_context;
 struct pipe_screen;
 struct svga_context;
+struct svga_pipe_sampler_view;
 struct svga_winsys_surface;
+struct svga_surface;
 enum SVGA3dSurfaceFormat;
 
 
@@ -97,5 +99,13 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_
    *ptr = v;
 }
 
+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+                                           struct svga_winsys_surface *res,
+                                           unsigned shader);
+
+enum pipe_error
+svga_validate_pipe_sampler_view(struct svga_context *svga,
+                                struct svga_pipe_sampler_view *sv);
 
 #endif
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen.c b/lib/mesa/src/gallium/drivers/svga/svga_screen.c
index 6539971e0..c9abd49ec 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_screen.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_screen.c
@@ -34,31 +34,37 @@
 #include "svga_context.h"
 #include "svga_format.h"
 #include "svga_screen.h"
+#include "svga_tgsi.h"
 #include "svga_resource_texture.h"
 #include "svga_resource.h"
 #include "svga_debug.h"
 
 #include "svga3d_shaderdefs.h"
+#include "VGPU10ShaderTokens.h"
 
+/* NOTE: this constant may get moved into a svga3d*.h header file */
+#define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024)
 
 #ifdef DEBUG
 int SVGA_DEBUG = 0;
 
 static const struct debug_named_value svga_debug_flags[] = {
-   { "dma",      DEBUG_DMA, NULL },
-   { "tgsi",     DEBUG_TGSI, NULL },
-   { "pipe",     DEBUG_PIPE, NULL },
-   { "state",    DEBUG_STATE, NULL },
-   { "screen",   DEBUG_SCREEN, NULL },
-   { "tex",      DEBUG_TEX, NULL },
-   { "swtnl",    DEBUG_SWTNL, NULL },
-   { "const",    DEBUG_CONSTS, NULL },
-   { "viewport", DEBUG_VIEWPORT, NULL },
-   { "views",    DEBUG_VIEWS, NULL },
-   { "perf",     DEBUG_PERF, NULL },
-   { "flush",    DEBUG_FLUSH, NULL },
-   { "sync",     DEBUG_SYNC, NULL },
-   { "cache",    DEBUG_CACHE, NULL },
+   { "dma",         DEBUG_DMA, NULL },
+   { "tgsi",        DEBUG_TGSI, NULL },
+   { "pipe",        DEBUG_PIPE, NULL },
+   { "state",       DEBUG_STATE, NULL },
+   { "screen",      DEBUG_SCREEN, NULL },
+   { "tex",         DEBUG_TEX, NULL },
+   { "swtnl",       DEBUG_SWTNL, NULL },
+   { "const",       DEBUG_CONSTS, NULL },
+   { "viewport",    DEBUG_VIEWPORT, NULL },
+   { "views",       DEBUG_VIEWS, NULL },
+   { "perf",        DEBUG_PERF, NULL },
+   { "flush",       DEBUG_FLUSH, NULL },
+   { "sync",        DEBUG_SYNC, NULL },
+   { "cache",       DEBUG_CACHE, NULL },
+   { "streamout",   DEBUG_STREAMOUT, NULL },
+   { "query",       DEBUG_QUERY, NULL },
    DEBUG_NAMED_VALUE_END
 };
 #endif
@@ -80,18 +86,52 @@ svga_get_name( struct pipe_screen *pscreen )
     */
    build = "build: DEBUG;";
    mutex = "mutex: " PIPE_ATOMIC ";";
-#ifdef HAVE_LLVM
-   llvm = "LLVM;";
-#endif
 #else
    build = "build: RELEASE;";
 #endif
+#ifdef HAVE_LLVM
+   llvm = "LLVM;";
+#endif
 
    util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm);
    return name;
 }
 
 
+/** Helper for querying float-valued device cap */
+static float
+get_float_cap(struct svga_winsys_screen *sws, unsigned cap, float defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.f;
+   else
+      return defaultVal;
+}
+
+
+/** Helper for querying uint-valued device cap */
+static unsigned
+get_uint_cap(struct svga_winsys_screen *sws, unsigned cap, unsigned defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.u;
+   else
+      return defaultVal;
+}
+
+
+/** Helper for querying boolean-valued device cap */
+static boolean
+get_bool_cap(struct svga_winsys_screen *sws, unsigned cap, boolean defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.b;
+   else
+      return defaultVal;
+}
 
 
 static float
@@ -99,7 +139,6 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
    struct svga_winsys_screen *sws = svgascreen->sws;
-   SVGA3dDevCapResult result;
 
    switch (param) {
    case PIPE_CAPF_MAX_LINE_WIDTH:
@@ -113,12 +152,11 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
       return svgascreen->maxPointSize;
 
    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-      if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, &result))
-         return 4.0f;
-      return (float) result.u;
+      return (float) get_uint_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, 4);
 
    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
       return 15.0;
+
    case PIPE_CAPF_GUARD_BAND_LEFT:
    case PIPE_CAPF_GUARD_BAND_TOP:
    case PIPE_CAPF_GUARD_BAND_RIGHT:
@@ -145,7 +183,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TWO_SIDED_STENCIL:
       return 1;
    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-      return 0;
+      /*
+       * "In virtually every OpenGL implementation and hardware,
+       * GL_MAX_DUAL_SOURCE_DRAW_BUFFERS is 1"
+       * http://www.opengl.org/wiki/Blending
+       */
+      return sws->have_vgpu10 ? 1 : 0;
    case PIPE_CAP_ANISOTROPIC_FILTER:
       return 1;
    case PIPE_CAP_POINT_SPRITE:
@@ -158,6 +201,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 1;
    case PIPE_CAP_QUERY_TIME_ELAPSED:
       return 0;
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+      return sws->have_vgpu10;
    case PIPE_CAP_TEXTURE_SHADOW_MAP:
       return 1;
    case PIPE_CAP_TEXTURE_SWIZZLE:
@@ -170,7 +215,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_USER_CONSTANT_BUFFERS:
       return 1;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
-      return 16;
+      return 256;
 
    case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
       {
@@ -199,17 +244,20 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return MIN2(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
                   12 /* 2048x2048 */);
 
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return sws->have_vgpu10 ? SVGA3D_MAX_SURFACE_ARRAYSIZE : 0;
+
    case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
       return 1;
 
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
       return 1;
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-      return 0;
+      return sws->have_vgpu10;
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
       return 0;
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-      return 1;
+      return !sws->have_vgpu10;
 
    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
       return 1; /* The color outputs of vertex shaders are not clamped */
@@ -222,7 +270,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 1; /* expected for GL_ARB_framebuffer_object */
 
    case PIPE_CAP_GLSL_FEATURE_LEVEL:
-      return 120;
+      return sws->have_vgpu10 ? 330 : 120;
 
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
       return 0;
@@ -230,54 +278,75 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_SM3:
       return 1;
 
-   /* Unsupported features */
-   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
-   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-   case PIPE_CAP_SHADER_STENCIL_EXPORT:
    case PIPE_CAP_DEPTH_CLIP_DISABLE:
-   case PIPE_CAP_SEAMLESS_CUBE_MAP:
-   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
    case PIPE_CAP_INDEP_BLEND_ENABLE:
-   case PIPE_CAP_INDEP_BLEND_FUNC:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-   case PIPE_CAP_PRIMITIVE_RESTART:
+   case PIPE_CAP_CONDITIONAL_RENDER:
+   case PIPE_CAP_QUERY_TIMESTAMP:
    case PIPE_CAP_TGSI_INSTANCEID:
    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_FAKE_SW_MSAA:
+      return sws->have_vgpu10;
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return sws->have_vgpu10 ? SVGA3D_DX_MAX_SOTARGETS : 0;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+      return sws->have_vgpu10 ? 4 : 0;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return sws->have_vgpu10 ? SVGA3D_MAX_STREAMOUT_DECLS : 0;
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return 0;
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+      return svgascreen->ms_samples ? 1 : 0;
+
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return SVGA3D_DX_MAX_RESOURCE_SIZE;
+
    case PIPE_CAP_MIN_TEXEL_OFFSET:
+      return sws->have_vgpu10 ? VGPU10_MIN_TEXEL_FETCH_OFFSET : 0;
    case PIPE_CAP_MAX_TEXEL_OFFSET:
+      return sws->have_vgpu10 ? VGPU10_MAX_TEXEL_FETCH_OFFSET : 0;
+
    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
-   case PIPE_CAP_CONDITIONAL_RENDER:
-   case PIPE_CAP_TEXTURE_BARRIER:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
-   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return 0;
+
    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+      return sws->have_vgpu10 ? 256 : 0;
    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+      return sws->have_vgpu10 ? 1024 : 0;
+
+   case PIPE_CAP_PRIMITIVE_RESTART:
+      return 1; /* may be a sw fallback, depending on restart index */
+
+   case PIPE_CAP_GENERATE_MIPMAP:
+      return sws->have_vgpu10;
+
+   /* Unsupported features */
+   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+   case PIPE_CAP_SHADER_STENCIL_EXPORT:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+   case PIPE_CAP_TEXTURE_BARRIER:
    case PIPE_CAP_MAX_VERTEX_STREAMS:
    case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
-   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
-   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_COMPUTE:
    case PIPE_CAP_START_INSTANCE:
-   case PIPE_CAP_QUERY_TIMESTAMP:
-   case PIPE_CAP_TEXTURE_MULTISAMPLE:
    case PIPE_CAP_CUBE_MAP_ARRAY:
-   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
    case PIPE_CAP_TEXTURE_GATHER_SM5:
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_FAKE_SW_MSAA:
    case PIPE_CAP_TEXTURE_QUERY_LOD:
    case PIPE_CAP_SAMPLE_SHADING:
    case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
    case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
    case PIPE_CAP_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
    case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
    case PIPE_CAP_SAMPLER_VIEW_TARGET:
@@ -285,11 +354,19 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+   case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_STRING_MARKER:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+   case PIPE_CAP_QUERY_MEMORY_INFO:
       return 0;
    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
       return 64;
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
-      return 1;
+      return 1;  /* need 4-byte alignment for all offsets and strides */
    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
       return 2048;
    case PIPE_CAP_MAX_VIEWPORTS:
@@ -313,6 +390,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+   case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
+   case PIPE_CAP_DRAW_PARAMETERS:
+   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
       return 0;
    }
 
@@ -320,11 +407,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    return 0;
 }
 
-static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+
+static int
+vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                       enum pipe_shader_cap param)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
    struct svga_winsys_screen *sws = svgascreen->sws;
-   SVGA3dDevCapResult result;
+   unsigned val;
+
+   assert(!sws->have_vgpu10);
 
    switch (shader)
    {
@@ -347,9 +439,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
          return 1;
       case PIPE_SHADER_CAP_MAX_TEMPS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, &result))
-            return 32;
-         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+         val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, 32);
+         return MIN2(val, SVGA3D_TEMPREG_MAX);
       case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
 	 /* 
 	  * Although PS 3.0 has some addressing abilities it can only represent
@@ -377,11 +468,15 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
          return 16;
       case PIPE_SHADER_CAP_PREFERRED_IR:
          return PIPE_SHADER_IR_TGSI;
+      case PIPE_SHADER_CAP_SUPPORTED_IRS:
+         return 0;
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+      case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+      case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
          return 0;
       case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
          return 32;
@@ -394,9 +489,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       {
       case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
       case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS, &result))
-            return 512;
-         return result.u;
+         return get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS,
+                             512);
       case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
       case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
          /* XXX: until we have vertex texture support */
@@ -412,9 +506,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
          return 1;
       case PIPE_SHADER_CAP_MAX_TEMPS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, &result))
-            return 32;
-         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+         val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, 32);
+         return MIN2(val, SVGA3D_TEMPREG_MAX);
       case PIPE_SHADER_CAP_MAX_PREDS:
          return 1;
       case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -437,11 +530,15 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
          return 0;
       case PIPE_SHADER_CAP_PREFERRED_IR:
          return PIPE_SHADER_IR_TGSI;
+      case PIPE_SHADER_CAP_SUPPORTED_IRS:
+         return 0;
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+      case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+      case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
          return 0;
       case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
          return 32;
@@ -463,8 +560,108 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
 }
 
 
+static int
+vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                        enum pipe_shader_cap param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+
+   assert(sws->have_vgpu10);
+   (void) sws;  /* silence unused var warnings in non-debug builds */
+
+   /* Only VS, GS, FS supported */
+   if (shader != PIPE_SHADER_VERTEX &&
+       shader != PIPE_SHADER_GEOMETRY &&
+       shader != PIPE_SHADER_FRAGMENT) {
+      return 0;
+   }
+
+   /* NOTE: we do not query the device for any caps/limits at this time */
+
+   /* Generally the same limits for vertex, geometry and fragment shaders */
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      return 64 * 1024;
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      return 64;
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT)
+         return VGPU10_MAX_FS_INPUTS;
+      else if (shader == PIPE_SHADER_GEOMETRY)
+         return VGPU10_MAX_GS_INPUTS;
+      else
+         return VGPU10_MAX_VS_INPUTS;
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT)
+         return VGPU10_MAX_FS_OUTPUTS;
+      else if (shader == PIPE_SHADER_GEOMETRY)
+         return VGPU10_MAX_GS_OUTPUTS;
+      else
+         return VGPU10_MAX_VS_OUTPUTS;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]);
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      return svgascreen->max_const_buffers;
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return VGPU10_MAX_TEMPS;
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+      return TRUE; /* XXX verify */
+   case PIPE_SHADER_CAP_MAX_PREDS:
+      return 0;
+   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+   case PIPE_SHADER_CAP_SUBROUTINES:
+   case PIPE_SHADER_CAP_INTEGERS:
+      return TRUE;
+   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      return SVGA3D_DX_MAX_SAMPLERS;
+   case PIPE_SHADER_CAP_PREFERRED_IR:
+      return PIPE_SHADER_IR_TGSI;
+   case PIPE_SHADER_CAP_SUPPORTED_IRS:
+         return 0;
+   case PIPE_SHADER_CAP_DOUBLES:
+   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+   case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+   case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+      return 0;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
+   default:
+      debug_printf("Unexpected vgpu10 shader query %u\n", param);
+      return 0;
+   }
+   return 0;
+}
+
+
+static int
+svga_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                      enum pipe_shader_cap param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   if (sws->have_vgpu10) {
+      return vgpu10_get_shader_param(screen, shader, param);
+   }
+   else {
+      return vgpu9_get_shader_param(screen, shader, param);
+   }
+}
+
+
 /**
- * Implemnt pipe_screen::is_format_supported().
+ * Implement pipe_screen::is_format_supported().
  * \param bindings  bitmask of PIPE_BIND_x flags
  */
 static boolean
@@ -482,7 +679,12 @@ svga_is_format_supported( struct pipe_screen *screen,
    assert(bindings);
 
    if (sample_count > 1) {
-      return FALSE;
+      /* In ms_samples, if bit N is set it means that we support
+       * multisample with N+1 samples per pixel.
+       */
+      if ((ss->ms_samples & (1 << (sample_count - 1))) == 0) {
+         return FALSE;
+      }
    }
 
    svga_format = svga_translate_format(ss, format, bindings);
@@ -490,6 +692,22 @@ svga_is_format_supported( struct pipe_screen *screen,
       return FALSE;
    }
 
+   /* we don't support sRGB rendering into display targets */
+   if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) {
+      return FALSE;
+   }
+
+   /*
+    * For VGPU10 vertex formats, skip querying host capabilities
+    */
+
+   if (ss->sws->have_vgpu10 && (bindings & PIPE_BIND_VERTEX_BUFFER)) {
+      SVGA3dSurfaceFormat svga_format;
+      unsigned flags;
+      svga_translate_vertex_format_vgpu10(format, &svga_format, &flags);
+      return svga_format != SVGA3D_FORMAT_INVALID;
+   }
+
    /*
     * Override host capabilities, so that we end up with the same
     * visuals for all virtual hardware implementations.
@@ -502,6 +720,12 @@ svga_is_format_supported( struct pipe_screen *screen,
       case SVGA3D_R5G6B5:
          break;
 
+      /* VGPU10 formats */
+      case SVGA3D_B8G8R8A8_UNORM:
+      case SVGA3D_B8G8R8X8_UNORM:
+      case SVGA3D_B5G6R5_UNORM:
+         break;
+
       /* Often unsupported/problematic. This means we end up with the same
        * visuals for all virtual hardware implementations.
        */
@@ -520,22 +744,32 @@ svga_is_format_supported( struct pipe_screen *screen,
 
    svga_get_format_cap(ss, svga_format, &caps);
 
+   if (bindings & PIPE_BIND_RENDER_TARGET) {
+      /* Check that the color surface is blendable, unless it's an
+       * integer format.
+       */
+      if (!svga_format_is_integer(svga_format) &&
+          (caps.value & SVGA3DFORMAT_OP_NOALPHABLEND)) {
+         return FALSE;
+      }
+   }
+
    mask.value = 0;
    if (bindings & PIPE_BIND_RENDER_TARGET) {
-      mask.offscreenRenderTarget = 1;
+      mask.value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
    }
    if (bindings & PIPE_BIND_DEPTH_STENCIL) {
-      mask.zStencil = 1;
+      mask.value |= SVGA3DFORMAT_OP_ZSTENCIL;
    }
    if (bindings & PIPE_BIND_SAMPLER_VIEW) {
-      mask.texture = 1;
+      mask.value |= SVGA3DFORMAT_OP_TEXTURE;
    }
 
    if (target == PIPE_TEXTURE_CUBE) {
-      mask.cubeTexture = 1;
+      mask.value |= SVGA3DFORMAT_OP_CUBETEXTURE;
    }
-   if (target == PIPE_TEXTURE_3D) {
-      mask.volumeTexture = 1;
+   else if (target == PIPE_TEXTURE_3D) {
+      mask.value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
    }
 
    return (caps.value & mask.value) == mask.value;
@@ -574,11 +808,41 @@ svga_get_driver_query_info(struct pipe_screen *screen,
                            unsigned index,
                            struct pipe_driver_query_info *info)
 {
+#define QUERY(NAME, ENUM, UNITS) \
+   {NAME, ENUM, {0}, UNITS, PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, 0, 0x0}
+
    static const struct pipe_driver_query_info queries[] = {
-      {"draw-calls", SVGA_QUERY_DRAW_CALLS, {0}},
-      {"fallbacks", SVGA_QUERY_FALLBACKS, {0}},
-      {"memory-used", SVGA_QUERY_MEMORY_USED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES}
+      /* per-frame counters */
+      QUERY("num-draw-calls", SVGA_QUERY_NUM_DRAW_CALLS,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("num-fallbacks", SVGA_QUERY_NUM_FALLBACKS,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("num-flushes", SVGA_QUERY_NUM_FLUSHES,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("num-validations", SVGA_QUERY_NUM_VALIDATIONS,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("map-buffer-time", SVGA_QUERY_MAP_BUFFER_TIME,
+            PIPE_DRIVER_QUERY_TYPE_MICROSECONDS),
+      QUERY("num-resources-mapped", SVGA_QUERY_NUM_RESOURCES_MAPPED,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED,
+            PIPE_DRIVER_QUERY_TYPE_BYTES),
+
+      /* running total counters */
+      QUERY("memory-used", SVGA_QUERY_MEMORY_USED,
+            PIPE_DRIVER_QUERY_TYPE_BYTES),
+      QUERY("num-shaders", SVGA_QUERY_NUM_SHADERS,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("num-resources", SVGA_QUERY_NUM_RESOURCES,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("num-state-objects", SVGA_QUERY_NUM_STATE_OBJECTS,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("num-surface-views", SVGA_QUERY_NUM_SURFACE_VIEWS,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
+      QUERY("num-generate-mipmap", SVGA_QUERY_NUM_GENERATE_MIPMAP,
+            PIPE_DRIVER_QUERY_TYPE_UINT64),
    };
+#undef QUERY
 
    if (!info)
       return Elements(queries);
@@ -615,8 +879,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
 {
    struct svga_screen *svgascreen;
    struct pipe_screen *screen;
-   SVGA3dDevCapResult result;
-   boolean use_vs30, use_ps30;
 
 #ifdef DEBUG
    SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 );
@@ -646,6 +908,7 @@ svga_screen_create(struct svga_winsys_screen *sws)
    screen->get_param = svga_get_param;
    screen->get_shader_param = svga_get_shader_param;
    screen->get_paramf = svga_get_paramf;
+   screen->get_timestamp = NULL;
    screen->is_format_supported = svga_is_format_supported;
    screen->context_create = svga_context_create;
    screen->fence_reference = svga_fence_reference;
@@ -661,18 +924,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
       svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1;
    }
 
-   use_ps30 =
-      sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
-      result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
-
-   use_vs30 =
-      sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) &&
-      result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE;
-
-   /* we require Shader model 3.0 or later */
-   if (!use_ps30 || !use_vs30)
-      goto error2;
-
    /*
     * The D16, D24X8, and D24S8 formats always do an implicit shadow compare
     * when sampled from, where as the DF16, DF24, and D24S8_INT do not.  So
@@ -720,46 +971,77 @@ svga_screen_create(struct svga_winsys_screen *sws)
 
    /* Query device caps
     */
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, &result))
-      svgascreen->haveLineStipple = FALSE;
-   else
-      svgascreen->haveLineStipple = result.u;
+   if (sws->have_vgpu10) {
+      svgascreen->haveProvokingVertex
+         = get_bool_cap(sws, SVGA3D_DEVCAP_DX_PROVOKING_VERTEX, FALSE);
+      svgascreen->haveLineSmooth = TRUE;
+      svgascreen->maxPointSize = 80.0F;
+      svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS;
+
+      /* Multisample samples per pixel */
+      svgascreen->ms_samples =
+         get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0);
+
+      /* Maximum number of constant buffers */
+      svgascreen->max_const_buffers =
+         get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1);
+      assert(svgascreen->max_const_buffers <= SVGA_MAX_CONST_BUFS);
+   }
+   else {
+      /* VGPU9 */
+      unsigned vs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION,
+                                     SVGA3DVSVERSION_NONE);
+      unsigned fs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION,
+                                     SVGA3DPSVERSION_NONE);
+
+      /* we require Shader model 3.0 or later */
+      if (fs_ver < SVGA3DPSVERSION_30 || vs_ver < SVGA3DVSVERSION_30) {
+         goto error2;
+      }
 
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_AA, &result))
-      svgascreen->haveLineSmooth = FALSE;
-   else
-      svgascreen->haveLineSmooth = result.u;
+      svgascreen->haveProvokingVertex = FALSE;
 
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, &result))
-      svgascreen->maxLineWidth = 1.0F;
-   else
-      svgascreen->maxLineWidth = result.f;
+      svgascreen->haveLineSmooth =
+         get_bool_cap(sws, SVGA3D_DEVCAP_LINE_AA, FALSE);
 
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, &result))
-      svgascreen->maxLineWidthAA = 1.0F;
-   else
-      svgascreen->maxLineWidthAA = result.f;
+      svgascreen->maxPointSize =
+         get_float_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, 1.0f);
+      /* Keep this to a reasonable size to avoid failures in conform/pntaa.c */
+      svgascreen->maxPointSize = MIN2(svgascreen->maxPointSize, 80.0f);
+
+      /* The SVGA3D device always supports 4 targets at this time, regardless
+       * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
+       */
+      svgascreen->max_color_buffers = 4;
+
+      /* Only support one constant buffer
+       */
+      svgascreen->max_const_buffers = 1;
 
-   if (0)
+      /* No multisampling */
+      svgascreen->ms_samples = 0;
+   }
+
+   /* common VGPU9 / VGPU10 caps */
+   svgascreen->haveLineStipple =
+      get_bool_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, FALSE);
+
+   svgascreen->maxLineWidth =
+      get_float_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, 1.0f);
+
+   svgascreen->maxLineWidthAA =
+      get_float_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, 1.0f);
+
+   if (0) {
+      debug_printf("svga: haveProvokingVertex %u\n",
+                   svgascreen->haveProvokingVertex);
       debug_printf("svga: haveLineStip %u  "
                    "haveLineSmooth %u  maxLineWidth %f\n",
                    svgascreen->haveLineStipple, svgascreen->haveLineSmooth,
                    svgascreen->maxLineWidth);
-
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, &result)) {
-      svgascreen->maxPointSize = 1.0F;
-   } else {
-      /* Keep this to a reasonable size to avoid failures in
-       * conform/pntaa.c:
-       */
-      svgascreen->maxPointSize = MIN2(result.f, 80.0f);
+      debug_printf("svga: maxPointSize %g\n", svgascreen->maxPointSize);
    }
 
-   /* The SVGA3D device always supports 4 targets at this time, regardless
-    * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
-    */
-   svgascreen->max_color_buffers = 4;
-
    pipe_mutex_init(svgascreen->tex_mutex);
    pipe_mutex_init(svgascreen->swc_mutex);
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen.h b/lib/mesa/src/gallium/drivers/svga/svga_screen.h
index ea1e743df..98b56b2a6 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_screen.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_screen.h
@@ -1,4 +1,4 @@
-/**********************************************************
+ /**********************************************************
  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
@@ -48,10 +48,13 @@ struct svga_screen
    SVGA3dHardwareVersion hw_version;
 
    /** Device caps */
+   boolean haveProvokingVertex;
    boolean haveLineStipple, haveLineSmooth;
    float maxLineWidth, maxLineWidthAA;
    float maxPointSize;
    unsigned max_color_buffers;
+   unsigned max_const_buffers;
+   unsigned ms_samples;
 
    struct {
       boolean force_level_surface_view;
@@ -69,6 +72,7 @@ struct svga_screen
    /* which formats to translate depth formats into */
    struct {
      enum SVGA3dSurfaceFormat z16;
+
      /* note gallium order */
      enum SVGA3dSurfaceFormat x8z24;
      enum SVGA3dSurfaceFormat s8z24;
@@ -76,8 +80,12 @@ struct svga_screen
 
    struct svga_host_surface_cache cache;
 
-   /** Memory used by all resources (buffers and surfaces) */
-   uint64_t total_resource_bytes;
+   /** HUD counters */
+   struct {
+      /** Memory used by all resources (buffers and surfaces) */
+      uint64_t total_resource_bytes;
+      uint64_t num_resources;
+   } hud;
 };
 
 #ifndef DEBUG
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c
index 3c765394a..5b4412957 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c
@@ -115,8 +115,14 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
 
       assert(entry->handle);
 
+      /* If the key matches and the fence is signalled (the surface is no
+       * longer needed) the lookup was successful.  We found a surface that
+       * can be reused.
+       * We unlink the surface from the cache entry and we add the entry to
+       * the 'empty' list.
+       */
       if (memcmp(&entry->key, key, sizeof *key) == 0 &&
-         sws->fence_signalled(sws, entry->fence, 0) == 0) {
+          sws->fence_signalled(sws, entry->fence, 0) == 0) {
          unsigned surf_size;
 
          assert(sws->surface_is_flushed(sws, entry->handle));
@@ -124,10 +130,13 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
          handle = entry->handle; /* Reference is transfered here. */
          entry->handle = NULL;
 
+         /* Remove from hash table */
          LIST_DEL(&entry->bucket_head);
 
+         /* remove from LRU list */
          LIST_DEL(&entry->head);
 
+         /* Add the cache entry (but not the surface!) to the empty list */
          LIST_ADD(&entry->head, &cache->empty);
 
          /* update the cache size */
@@ -195,7 +204,8 @@ svga_screen_cache_shrink(struct svga_screen *svgascreen,
 
 
 /**
- * Transfers a handle reference.
+ * Add a surface to the cache.  This is done when the driver deletes
+ * the surface.  Note: transfers a handle reference.
  */
 static void
 svga_screen_cache_add(struct svga_screen *svgascreen,
@@ -207,17 +217,17 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
    struct svga_host_surface_cache_entry *entry = NULL;
    struct svga_winsys_surface *handle = *p_handle;
    unsigned surf_size;
-   
+
    assert(key->cachable);
 
    if (!handle)
       return;
-   
+
    surf_size = surface_size(key);
 
    *p_handle = NULL;
    pipe_mutex_lock(cache->mutex);
-   
+
    if (surf_size >= SVGA_HOST_SURFACE_CACHE_BYTES) {
       /* this surface is too large to cache, just free it */
       sws->surface_reference(sws, &handle, NULL);
@@ -245,10 +255,13 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
    }
 
    if (!LIST_IS_EMPTY(&cache->empty)) {
-      /* use the first empty entry */
+      /* An empty entry has no surface associated with it.
+       * Use the first empty entry.
+       */
       entry = LIST_ENTRY(struct svga_host_surface_cache_entry,
                          cache->empty.next, head);
 
+      /* Remove from LRU list */
       LIST_DEL(&entry->head);
    }
    else if (!LIST_IS_EMPTY(&cache->unused)) {
@@ -262,12 +275,15 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
 
       sws->surface_reference(sws, &entry->handle, NULL);
 
+      /* Remove from hash table */
       LIST_DEL(&entry->bucket_head);
 
+      /* Remove from LRU list */
       LIST_DEL(&entry->head);
    }
 
    if (entry) {
+      assert(entry->handle == NULL);
       entry->handle = handle;
       memcpy(&entry->key, key, sizeof entry->key);
 
@@ -304,6 +320,7 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
 
    pipe_mutex_lock(cache->mutex);
 
+   /* Loop over entries in the validated list */
    curr = cache->validated.next;
    next = curr->next;
    while (curr != &cache->validated) {
@@ -312,12 +329,15 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
       assert(entry->handle);
 
       if (sws->surface_is_flushed(sws, entry->handle)) {
+         /* remove entry from LRU list */
          LIST_DEL(&entry->head);
 
          svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence);
 
+         /* Add entry to the unused list */
          LIST_ADD(&entry->head, &cache->unused);
 
+         /* Add entry to the hash table bucket */
          bucket = svga_screen_cache_bucket(&entry->key);
          LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]);
       }
@@ -388,9 +408,12 @@ svga_screen_cache_init(struct svga_screen *svgascreen)
  * Allocate a new host-side surface.  If the surface is marked as cachable,
  * first try re-using a surface in the cache of freed surfaces.  Otherwise,
  * allocate a new surface.
+ * \param bind_flags  bitmask of PIPE_BIND_x flags
+ * \param usage  one of PIPE_USAGE_x values
  */
 struct svga_winsys_surface *
 svga_screen_surface_create(struct svga_screen *svgascreen,
+                           unsigned bind_flags, unsigned usage,
                            struct svga_host_surface_cache_key *key)
 {
    struct svga_winsys_screen *sws = svgascreen->sws;
@@ -398,17 +421,20 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
    boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;
 
    SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
-            "%s sz %dx%dx%d mips %d faces %d cachable %d\n",
+            "%s sz %dx%dx%d mips %d faces %d arraySize %d cachable %d\n",
             __FUNCTION__,
             key->size.width,
             key->size.height,
             key->size.depth,
             key->numMipLevels,
             key->numFaces,
+            key->arraySize,
             key->cachable);
 
    if (cachable) {
       if (key->format == SVGA3D_BUFFER) {
+         SVGA3dSurfaceFlags hint_flag;
+
          /* For buffers, round the buffer size up to the nearest power
           * of two to increase the probability of cache hits.  Keep
           * texture surface dimensions unchanged.
@@ -417,15 +443,33 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
          while (size < key->size.width)
             size <<= 1;
          key->size.width = size;
-	 /* Since we're reusing buffers we're effectively transforming all
-	  * of them into dynamic buffers.
-	  *
-	  * It would be nice to not cache long lived static buffers. But there
-	  * is no way to detect the long lived from short lived ones yet. A
-	  * good heuristic would be buffer size.
-	  */
-	 key->flags &= ~SVGA3D_SURFACE_HINT_STATIC;
-	 key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC;
+
+         /* Determine whether the buffer is static or dynamic.
+          * This is a bit of a heuristic which can be tuned as needed.
+          */
+         if (usage == PIPE_USAGE_DEFAULT ||
+             usage == PIPE_USAGE_IMMUTABLE) {
+            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+         }
+         else if (bind_flags & PIPE_BIND_INDEX_BUFFER) {
+            /* Index buffers don't change too often.  Mark them as static.
+             */
+            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+         }
+         else {
+            /* Since we're reusing buffers we're effectively transforming all
+             * of them into dynamic buffers.
+             *
+             * It would be nice to not cache long lived static buffers. But there
+             * is no way to detect the long lived from short lived ones yet. A
+             * good heuristic would be buffer size.
+             */
+            hint_flag = SVGA3D_SURFACE_HINT_DYNAMIC;
+         }
+
+         key->flags &= ~(SVGA3D_SURFACE_HINT_STATIC |
+                         SVGA3D_SURFACE_HINT_DYNAMIC);
+         key->flags |= hint_flag;
       }
 
       handle = svga_screen_cache_lookup(svgascreen, key);
@@ -436,24 +480,32 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
                      key->size.width);
          else
             SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
-                     "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle,
+                     "reuse sid %p sz %dx%dx%d mips %d faces %d arraySize %d\n", handle,
                      key->size.width,
                      key->size.height,
                      key->size.depth,
                      key->numMipLevels,
-                     key->numFaces);
+                     key->numFaces,
+                     key->arraySize);
       }
    }
 
    if (!handle) {
+      unsigned usage = 0;
+
+      if (!key->cachable)
+         usage |= SVGA_SURFACE_USAGE_SHARED;
+      if (key->scanout)
+         usage |= SVGA_SURFACE_USAGE_SCANOUT;
+
       handle = sws->surface_create(sws,
                                    key->flags,
                                    key->format,
-                                   key->cachable ?
-                                   0 : SVGA_SURFACE_USAGE_SHARED,
+                                   usage,
                                    key->size,
-                                   key->numFaces,
-                                   key->numMipLevels);
+                                   key->numFaces * key->arraySize,
+                                   key->numMipLevels,
+                                   key->sampleCount);
       if (handle)
          SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
                   "  CREATE sid %p sz %dx%dx%d\n",
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h
index 56ac62b39..424eb2c5a 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h
@@ -62,9 +62,12 @@ struct svga_host_surface_cache_key
    SVGA3dSurfaceFlags flags;
    SVGA3dSurfaceFormat format;
    SVGA3dSize size;
-   uint32_t numFaces:24;
-   uint32_t numMipLevels:7;
+   uint32_t numFaces:3;
+   uint32_t arraySize:16;
+   uint32_t numMipLevels:6;
    uint32_t cachable:1;         /* False if this is a shared surface */
+   uint32_t sampleCount:5;
+   uint32_t scanout:1;
 };
 
 
@@ -137,6 +140,7 @@ svga_screen_cache_init(struct svga_screen *svgascreen);
 
 struct svga_winsys_surface *
 svga_screen_surface_create(struct svga_screen *svgascreen,
+                           unsigned bind_flags, unsigned usage,
                            struct svga_host_surface_cache_key *key);
 
 void
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_shader.c b/lib/mesa/src/gallium/drivers/svga/svga_shader.c
index 46efa07df..5c99e16d9 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_shader.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_shader.c
@@ -27,14 +27,318 @@
 #include "util/u_memory.h"
 #include "svga_context.h"
 #include "svga_cmd.h"
+#include "svga_format.h"
 #include "svga_shader.h"
 
 
+/**
+ * This bit isn't really used anywhere.  It only serves to help
+ * generate a unique "signature" for the vertex shader output bitmask.
+ * Shader input/output signatures are used to resolve shader linking
+ * issues.
+ */
+#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
+
+
+/**
+ * Use the shader info to generate a bitmask indicating which generic
+ * inputs are used by the shader.  A set bit indicates that GENERIC[i]
+ * is used.
+ */
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_inputs; i++) {
+      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+         unsigned j = info->input_semantic_index[i];
+         assert(j < sizeof(mask) * 8);
+         mask |= ((uint64_t) 1) << j;
+      }
+   }
+
+   return mask;
+}
+
+
+/**
+ * Scan shader info to return a bitmask of written outputs.
+ */
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_outputs; i++) {
+      switch (info->output_semantic_name[i]) {
+      case TGSI_SEMANTIC_GENERIC:
+         {
+            unsigned j = info->output_semantic_index[i];
+            assert(j < sizeof(mask) * 8);
+            mask |= ((uint64_t) 1) << j;
+         }
+         break;
+      case TGSI_SEMANTIC_FOG:
+         mask |= FOG_GENERIC_BIT;
+         break;
+      }
+   }
+
+   return mask;
+}
+
+
+
+/**
+ * Given a mask of used generic variables (as returned by the above functions)
+ * fill in a table which maps those indexes to small integers.
+ * This table is used by the remap_generic_index() function in
+ * svga_tgsi_decl_sm30.c
+ * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
+ * GENERIC[3] are used.  The remap_table will contain:
+ *   table[1] = 0;
+ *   table[3] = 1;
+ * The remaining table entries will be filled in with the next unused
+ * generic index (in this example, 2).
+ */
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING])
+{
+   /* Note texcoord[0] is reserved so start at 1 */
+   unsigned count = 1, i;
+
+   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+      remap_table[i] = -1;
+   }
+
+   /* for each bit set in generic_mask */
+   while (generics_mask) {
+      unsigned index = ffsll(generics_mask) - 1;
+      remap_table[index] = count++;
+      generics_mask &= ~((uint64_t) 1 << index);
+   }
+}
+
+
+/**
+ * Use the generic remap table to map a TGSI generic varying variable
+ * index to a small integer.  If the remapping table doesn't have a
+ * valid value for the given index (the table entry is -1) it means
+ * the fragment shader doesn't use that VS output.  Just allocate
+ * the next free value in that case.  Alternately, we could cull
+ * VS instructions that write to register, or replace the register
+ * with a dummy temp register.
+ * XXX TODO: we should do one of the later as it would save precious
+ * texcoord registers.
+ */
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index)
+{
+   assert(generic_index < MAX_GENERIC_VARYING);
+
+   if (generic_index >= MAX_GENERIC_VARYING) {
+      /* just don't return a random/garbage value */
+      generic_index = MAX_GENERIC_VARYING - 1;
+   }
+
+   if (remap_table[generic_index] == -1) {
+      /* This is a VS output that has no matching PS input.  Find a
+       * free index.
+       */
+      int i, max = 0;
+      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+         max = MAX2(max, remap_table[i]);
+      }
+      remap_table[generic_index] = max + 1;
+   }
+
+   return remap_table[generic_index];
+}
+
+
+/**
+ * Initialize the shader-neutral fields of svga_compile_key from context
+ * state.  This is basically the texture-related state.
+ */
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+                            struct svga_compile_key *key)
+{
+   unsigned i, idx = 0;
+
+   assert(shader < Elements(svga->curr.num_sampler_views));
+
+   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      if (view) {
+         assert(svga->curr.sampler[shader][i]);
+         assert(view->texture);
+         assert(view->texture->target < (1 << 4)); /* texture_target:4 */
+
+         key->tex[i].texture_target = view->texture->target;
+
+         /* 1D/2D array textures with one slice are treated as non-arrays
+          * by the SVGA3D device.  Convert the texture type here so that
+          * we emit the right TEX/SAMPLE instruction in the shader.
+          */
+         if (view->texture->array_size == 1) {
+            if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) {
+               key->tex[i].texture_target = PIPE_TEXTURE_1D;
+            }
+            else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+               key->tex[i].texture_target = PIPE_TEXTURE_2D;
+            }
+         }
+
+         key->tex[i].texture_msaa = view->texture->nr_samples > 1;
+         if (!svga->curr.sampler[shader][i]->normalized_coords) {
+            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
+            key->tex[i].width_height_idx = idx++;
+            key->tex[i].unnormalized = TRUE;
+            ++key->num_unnormalized_coords;
+         }
+
+         key->tex[i].swizzle_r = view->swizzle_r;
+         key->tex[i].swizzle_g = view->swizzle_g;
+         key->tex[i].swizzle_b = view->swizzle_b;
+         key->tex[i].swizzle_a = view->swizzle_a;
+
+         key->tex[i].return_type = svga_get_texture_datatype(view->format);
+      }
+   }
+   key->num_textures = svga->curr.num_sampler_views[shader];
+}
+
+
+/** Search for a compiled shader variant with the same compile key */
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key)
+{
+   struct svga_shader_variant *variant = shader->variants;
+
+   assert(key);
+
+   for ( ; variant; variant = variant->next) {
+      if (svga_compile_keys_equal(key, &variant->key))
+         return variant;
+   }
+   return NULL;
+}
+
+/** Search for a shader with the same token key */
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *pshader,
+                             const struct svga_token_key *key)
+{
+   struct svga_shader *shader = pshader;
+
+   assert(key);
+
+   for ( ; shader; shader = shader->next) {
+      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
+         return shader;
+   }
+   return NULL;
+}
+
+/**
+ * Helper function to define a gb shader for non-vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu9(struct svga_context *svga,
+                       SVGA3dShaderType type,
+                       struct svga_shader_variant *variant,
+                       unsigned codeLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+
+   /**
+    * Create gb memory for the shader and upload the shader code.
+    * Kernel module will allocate an id for the shader and issue
+    * the DefineGBShader command.
+    */
+   variant->gb_shader = sws->shader_create(sws, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
+
+   return ret;
+}
+
+/**
+ * Helper function to define a gb shader for vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu10(struct svga_context *svga,
+                        SVGA3dShaderType type,
+                        struct svga_shader_variant *variant,
+                        unsigned codeLen)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   enum pipe_error ret;
+
+   /**
+    * Shaders in VGPU10 enabled device reside in the device COTable.
+    * SVGA driver will allocate an integer ID for the shader and
+    * issue DXDefineShader and DXBindShader commands.
+    */
+   variant->id = util_bitmask_add(svga->shader_id_bm);
+   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   /* Create gb memory for the shader and upload the shader code */
+   variant->gb_shader = swc->shader_create(swc,
+                                           variant->id, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader) {
+      /* Free the shader ID */
+      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
+      goto fail_no_allocation;
+   }
+
+   /**
+    * Since we don't want to do any flush within state emission to avoid
+    * partial state in a command buffer, it's important to make sure that
+    * there is enough room to send both the DXDefineShader & DXBindShader
+    * commands in the same command buffer. So let's send both
+    * commands in one command reservation. If it fails, we'll undo
+    * the shader creation and return an error.
+    */
+   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
+                                           variant->id, type, codeLen);
+
+   if (ret != PIPE_OK)
+      goto fail;
+
+   return PIPE_OK;
+
+fail:
+   swc->shader_destroy(swc, variant->gb_shader);
+   variant->gb_shader = NULL;
+
+fail_no_allocation:
+   util_bitmask_clear(svga->shader_id_bm, variant->id);
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
 
 /**
  * Issue the SVGA3D commands to define a new shader.
- * \param result  contains the shader tokens, etc.  The result->id field will
- *                be set here.
+ * \param variant  contains the shader tokens, etc.  The result->id field will
+ *                 be set here.
  */
 enum pipe_error
 svga_define_shader(struct svga_context *svga,
@@ -42,27 +346,17 @@ svga_define_shader(struct svga_context *svga,
                    struct svga_shader_variant *variant)
 {
    unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
+   enum pipe_error ret;
 
-   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-      enum pipe_error ret;
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
 
-      variant->gb_shader = sws->shader_create(sws, type,
-                                              variant->tokens, codeLen);
-      if (!variant->gb_shader)
-         return PIPE_ERROR_OUT_OF_MEMORY;
-
-      ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
-      if (ret != PIPE_OK) {
-         sws->shader_destroy(sws, variant->gb_shader);
-         variant->gb_shader = NULL;
-      }
-
-      return ret;
+   if (svga_have_gb_objects(svga)) {
+      if (svga_have_vgpu10(svga))
+         return define_gb_shader_vgpu10(svga, type, variant, codeLen);
+      else
+         return define_gb_shader_vgpu9(svga, type, variant, codeLen);
    }
    else {
-      enum pipe_error ret;
-
       /* Allocate an integer ID for the shader */
       variant->id = util_bitmask_add(svga->shader_id_bm);
       if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
@@ -80,14 +374,53 @@ svga_define_shader(struct svga_context *svga,
          assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
          util_bitmask_clear(svga->shader_id_bm, variant->id);
          variant->id = UTIL_BITMASK_INVALID_INDEX;
-         return ret;
       }
    }
 
-   return PIPE_OK;
+   return ret;
+}
+
+
+/**
+ * Issue the SVGA3D commands to set/bind a shader.
+ * \param result  the shader to bind.
+ */
+enum pipe_error
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant)
+{
+   enum pipe_error ret;
+   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
+
+   assert(type == SVGA3D_SHADERTYPE_VS ||
+          type == SVGA3D_SHADERTYPE_GS ||
+          type == SVGA3D_SHADERTYPE_PS);
+
+   if (svga_have_gb_objects(svga)) {
+      struct svga_winsys_gb_shader *gbshader =
+         variant ? variant->gb_shader : NULL;
+
+      if (svga_have_vgpu10(svga))
+         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
+      else
+         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
+   }
+   else {
+      ret = SVGA3D_SetShader(svga->swc, type, id);
+   }
+
+   return ret;
 }
 
 
+struct svga_shader_variant *
+svga_new_shader_variant(struct svga_context *svga)
+{
+   svga->hud.num_shaders++;
+   return CALLOC_STRUCT(svga_shader_variant);
+}
+
 
 enum pipe_error
 svga_destroy_shader_variant(struct svga_context *svga,
@@ -96,32 +429,94 @@ svga_destroy_shader_variant(struct svga_context *svga,
 {
    enum pipe_error ret = PIPE_OK;
 
-   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-
-      sws->shader_destroy(sws, variant->gb_shader);
+   if (svga_have_gb_objects(svga) && variant->gb_shader) {
+      if (svga_have_vgpu10(svga)) {
+         struct svga_winsys_context *swc = svga->swc;
+         swc->shader_destroy(swc, variant->gb_shader);
+         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
+      }
+      else {
+         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+         sws->shader_destroy(sws, variant->gb_shader);
+      }
       variant->gb_shader = NULL;
-      goto end;
    }
-
-   /* first try */
-   if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
-      ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-
-      if (ret != PIPE_OK) {
-         /* flush and try again */
-         svga_context_flush(svga, NULL);
-
+   else {
+      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
          ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-         assert(ret == PIPE_OK);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
+            assert(ret == PIPE_OK);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
       }
-
-      util_bitmask_clear(svga->shader_id_bm, variant->id);
    }
 
-end:
    FREE((unsigned *)variant->tokens);
    FREE(variant);
 
+   svga->hud.num_shaders--;
+
    return ret;
 }
+
+/*
+ * Rebind shaders.
+ * Called at the beginning of every new command buffer to ensure that
+ * shaders are properly paged-in. Instead of sending the SetShader
+ * command, this function sends a private allocation command to
+ * page in a shader. This avoids emitting redundant state to the device
+ * just to page in a resource.
+ */
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * If the underlying winsys layer does not need resource rebinding,
+    * just clear the rebind flags and return.
+    */
+   if (swc->resource_rebind == NULL) {
+      svga->rebind.flags.vs = 0;
+      svga->rebind.flags.gs = 0;
+      svga->rebind.flags.fs = 0;
+
+      return PIPE_OK;
+   }
+
+   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.vs = 0;
+
+   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.gs = 0;
+
+   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.fs = 0;
+
+   return PIPE_OK;
+}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_shader.h b/lib/mesa/src/gallium/drivers/svga/svga_shader.h
index 5102159b9..f49fdb46d 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_shader.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_shader.h
@@ -27,8 +27,244 @@
 #define SVGA_SHADER_H
 
 #include "svga3d_reg.h"
+#include "svga_context.h"
+#include "svga_streamout.h"
 
-struct svga_shader_variant;
+
+/**
+ * We use a 64-bit mask to keep track of the generic indexes.
+ * This is the maximum semantic index for a TGSI GENERIC[i] register.
+ */
+#define MAX_GENERIC_VARYING 64
+
+
+struct svga_context;
+
+
+struct svga_compile_key
+{
+   /* vertex shader only */
+   struct {
+      uint64_t fs_generic_inputs;
+      unsigned passthrough:1;
+      unsigned need_prescale:1;
+      unsigned undo_viewport:1;
+      unsigned allow_psiz:1;
+      /** The following are all 32-bit bitmasks (per VS input) */
+      unsigned adjust_attrib_range;
+      unsigned attrib_is_pure_int;
+      unsigned adjust_attrib_w_1;
+      unsigned adjust_attrib_itof;
+      unsigned adjust_attrib_utof;
+      unsigned attrib_is_bgra;
+      unsigned attrib_puint_to_snorm;
+      unsigned attrib_puint_to_uscaled;
+      unsigned attrib_puint_to_sscaled;
+   } vs;
+
+   /* geometry shader only */
+   struct {
+      uint64_t vs_generic_outputs;
+      unsigned need_prescale:1;
+      unsigned writes_psize:1;
+      unsigned wide_point:1;
+   } gs;
+
+   /* fragment shader only */
+   struct {
+      uint64_t vs_generic_outputs;
+      uint64_t gs_generic_outputs;
+      unsigned light_twoside:1;
+      unsigned front_ccw:1;
+      unsigned white_fragments:1;
+      unsigned flatshade:1;
+      unsigned pstipple:1;
+      unsigned alpha_func:4;  /**< SVGA3D_CMP_x */
+      unsigned write_color0_to_n_cbufs:4;
+      unsigned aa_point:1;
+      int aa_point_coord_index;
+      float alpha_ref;
+   } fs;
+
+   /* any shader type */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+   unsigned num_textures:8;
+   unsigned num_unnormalized_coords:8;
+   unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
+   unsigned sprite_origin_lower_left:1;
+   unsigned sprite_coord_enable;
+   struct {
+      unsigned compare_mode:1;
+      unsigned compare_func:3;
+      unsigned unnormalized:1;
+      unsigned width_height_idx:5; /**< texture unit */
+      unsigned texture_target:4;   /**< PIPE_TEXTURE_x */
+      unsigned texture_msaa:1;    /**< A multisample texture? */
+      unsigned sprite_texgen:1;
+      unsigned swizzle_r:3;
+      unsigned swizzle_g:3;
+      unsigned swizzle_b:3;
+      unsigned swizzle_a:3;
+      unsigned return_type:3;  /**< TGSI_RETURN_TYPE_x */
+   } tex[PIPE_MAX_SAMPLERS];
+   /* Note: svga_compile_keys_equal() depends on the variable-size
+    * tex[] array being at the end of this structure.
+    */
+};
+
+/* A key for a variant of token string of a shader */
+struct svga_token_key {
+   struct {
+      unsigned sprite_coord_enable:24;
+      unsigned sprite_origin_upper_left:1;
+      unsigned point_pos_stream_out:1;
+      unsigned writes_psize:1;
+      unsigned aa_point:1;
+   } gs;
+};
+
+/**
+ * A single TGSI shader may be compiled into different variants of
+ * SVGA3D shaders depending on the compile key.  Each user shader
+ * will have a linked list of these variants.
+ */
+struct svga_shader_variant
+{
+   const struct svga_shader *shader;
+
+   /** Parameters used to generate this variant */
+   struct svga_compile_key key;
+
+   /* Compiled shader tokens:
+    */
+   const unsigned *tokens;
+   unsigned nr_tokens;
+
+   /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
+    * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
+    */
+   unsigned id;
+
+   /** Start of extra constants (number of float[4] constants) */
+   unsigned extra_const_start;
+
+   /* GB object buffer containing the bytecode */
+   struct svga_winsys_gb_shader *gb_shader;
+
+   boolean uses_flat_interp;   /** TRUE if flat interpolation qualifier is
+                                *  applied to any of the varyings.
+                                */
+
+   /** Is the color output just a constant value? (fragment shader only) */
+   boolean constant_color_output;
+
+   /** For FS-based polygon stipple */
+   unsigned pstipple_sampler_unit;
+
+   /** Next variant */
+   struct svga_shader_variant *next;
+};
+
+
+struct svga_shader
+{
+   const struct tgsi_token *tokens;
+   struct svga_token_key token_key;     /* token key for the token string */
+   struct tgsi_shader_info info;
+
+   /* List of shaders with tokens derived from the same token string */
+   struct svga_shader *next;
+   struct svga_shader *parent;   /* shader with the original token string */
+
+   struct svga_stream_output *stream_output;
+
+   /** Head of linked list of compiled variants */
+   struct svga_shader_variant *variants;
+
+   unsigned id;  /**< for debugging only */
+};
+
+
+struct svga_fragment_shader
+{
+   struct svga_shader base;
+
+   struct draw_fragment_shader *draw_shader;
+
+   /** Mask of which generic varying variables are read by this shader */
+   uint64_t generic_inputs;
+
+   /** Table mapping original TGSI generic indexes to low integers */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+};
+
+
+struct svga_vertex_shader
+{
+   struct svga_shader base;
+
+   struct draw_vertex_shader *draw_shader;
+
+   /** Mask of which generic varying variables are written by this shader */
+   uint64_t generic_outputs;
+
+   /** Generated geometry shader that goes with this vertex shader */
+   struct svga_geometry_shader *gs;
+};
+
+
+struct svga_geometry_shader
+{
+   struct svga_shader base;
+
+   struct draw_geometry_shader *draw_shader;
+
+   /** Table mapping original TGSI generic indexes to low integers */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+   uint64_t generic_outputs;
+
+   unsigned aa_point_coord_index; /* generic index for aa point coord */
+
+   unsigned wide_point:1;      /* set if the shader emulates wide point */
+};
+
+
+static inline boolean
+svga_compile_keys_equal(const struct svga_compile_key *a,
+                        const struct svga_compile_key *b)
+{
+   unsigned key_size =
+      (const char *) &a->tex[a->num_textures] - (const char *) a;
+
+   return memcmp(a, b, key_size) == 0;
+}
+
+
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
+
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
+
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING]);
+
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index);
+
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+                            struct svga_compile_key *key);
+
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key);
+
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *shader,
+                             const struct svga_token_key *key);
 
 enum pipe_error
 svga_define_shader(struct svga_context *svga,
@@ -36,10 +272,20 @@ svga_define_shader(struct svga_context *svga,
                    struct svga_shader_variant *variant);
 
 enum pipe_error
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant);
+
+struct svga_shader_variant *
+svga_new_shader_variant(struct svga_context *svga);
+
+enum pipe_error
 svga_destroy_shader_variant(struct svga_context *svga,
                             SVGA3dShaderType type,
                             struct svga_shader_variant *variant);
 
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga);
 
 /**
  * Check if a shader's bytecode exceeds the device limits.
@@ -62,4 +308,40 @@ svga_shader_too_large(const struct svga_context *svga,
 }
 
 
+/**
+ * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ */
+static inline SVGA3dShaderType
+svga_shader_type(unsigned shader)
+{
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      return SVGA3D_SHADERTYPE_VS;
+   case PIPE_SHADER_GEOMETRY:
+      return SVGA3D_SHADERTYPE_GS;
+   case PIPE_SHADER_FRAGMENT:
+      return SVGA3D_SHADERTYPE_PS;
+   default:
+      assert(!"Invalid shader type");
+      return SVGA3D_SHADERTYPE_VS;
+   }
+}
+
+
+/** Does the current VS have stream output? */
+static inline boolean
+svga_have_vs_streamout(const struct svga_context *svga)
+{
+   return svga->curr.vs != NULL && svga->curr.vs->base.stream_output != NULL;
+}
+
+
+/** Does the current GS have stream output? */
+static inline boolean
+svga_have_gs_streamout(const struct svga_context *svga)
+{
+   return svga->curr.gs != NULL && svga->curr.gs->base.stream_output != NULL;
+}
+
+
 #endif /* SVGA_SHADER_H */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state.c b/lib/mesa/src/gallium/drivers/svga/svga_state.c
index b0bc867f6..4479a2712 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state.c
@@ -23,6 +23,7 @@
  *
  **********************************************************/
 
+#include "util/u_bitmask.h"
 #include "util/u_debug.h"
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
@@ -63,14 +64,19 @@ static const struct svga_tracked_state *hw_clear_state[] =
  */
 static const struct svga_tracked_state *hw_draw_state[] =
 {
+   &svga_need_tgsi_transform,
    &svga_hw_fs,
+   &svga_hw_gs,
    &svga_hw_vs,
    &svga_hw_rss,
-   &svga_hw_tss,
-   &svga_hw_tss_binding,
+   &svga_hw_sampler,           /* VGPU10 */
+   &svga_hw_sampler_bindings,  /* VGPU10 */
+   &svga_hw_tss,               /* pre-VGPU10 */
+   &svga_hw_tss_binding,       /* pre-VGPU10 */
    &svga_hw_clip_planes,
    &svga_hw_vdecl,
    &svga_hw_fs_constants,
+   &svga_hw_gs_constants,
    &svga_hw_vs_constants,
    NULL
 };
@@ -123,7 +129,11 @@ update_state(struct svga_context *svga,
              const struct svga_tracked_state *atoms[],
              unsigned *state)
 {
+#ifdef DEBUG
    boolean debug = TRUE;
+#else
+   boolean debug = FALSE;
+#endif
    enum pipe_error ret = PIPE_OK;
    unsigned i;
 
@@ -219,6 +229,9 @@ svga_update_state(struct svga_context *svga, unsigned max_level)
       svga->state.dirty[i] |= svga->dirty;
 
    svga->dirty = 0;
+
+   svga->hud.num_validations++;
+
    return PIPE_OK;
 }
 
@@ -255,23 +268,55 @@ do {                                            \
  */
 enum pipe_error svga_emit_initial_state( struct svga_context *svga )
 {
-   SVGA3dRenderState *rs;
-   unsigned count = 0;
-   const unsigned COUNT = 2;
-   enum pipe_error ret;
-
-   ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
-   if (ret != PIPE_OK)
+   if (svga_have_vgpu10(svga)) {
+      SVGA3dRasterizerStateId id = util_bitmask_add(svga->rast_object_id_bm);
+      enum pipe_error ret;
+
+      /* XXX preliminary code */
+      ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+                                             id,
+                                             SVGA3D_FILLMODE_FILL,
+                                             SVGA3D_CULL_NONE,
+                                             1, /* frontCounterClockwise */
+                                             0, /* depthBias */
+                                             0.0f, /* depthBiasClamp */
+                                             0.0f, /* slopeScaledDepthBiasClamp */
+                                             0, /* depthClampEnable */
+                                             0, /* scissorEnable */
+                                             0, /* multisampleEnable */
+                                             0, /* aalineEnable */
+                                             1.0f, /* lineWidth */
+                                             0, /* lineStippleEnable */
+                                             0, /* lineStippleFactor */
+                                             0, /* lineStipplePattern */
+                                             0); /* provokingVertexLast */
+
+
+      assert(ret == PIPE_OK);
+
+      ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, id);
       return ret;
+   }
+   else {
+      SVGA3dRenderState *rs;
+      unsigned count = 0;
+      const unsigned COUNT = 2;
+      enum pipe_error ret;
 
-   /* Always use D3D style coordinate space as this is the only one
-    * which is implemented on all backends.
-    */
-   EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED );
-   EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
-   
-   assert( COUNT == count );
-   SVGA_FIFOCommitAll( svga->swc );
+      ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
+      if (ret != PIPE_OK)
+         return ret;
 
-   return PIPE_OK;
+      /* Always use D3D style coordinate space as this is the only one
+       * which is implemented on all backends.
+       */
+      EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE,
+              SVGA3D_COORDINATE_LEFTHANDED );
+      EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
+
+      assert( COUNT == count );
+      SVGA_FIFOCommitAll( svga->swc );
+
+      return PIPE_OK;
+   }
 }
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state.h b/lib/mesa/src/gallium/drivers/svga/svga_state.h
index 3325626a4..04b20e161 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state.h
@@ -57,14 +57,20 @@ extern struct svga_tracked_state svga_hw_framebuffer;
 
 /* HW_DRAW
  */
+extern struct svga_tracked_state svga_need_tgsi_transform;
 extern struct svga_tracked_state svga_hw_vs;
 extern struct svga_tracked_state svga_hw_fs;
+extern struct svga_tracked_state svga_hw_gs;
 extern struct svga_tracked_state svga_hw_rss;
+extern struct svga_tracked_state svga_hw_pstipple;
+extern struct svga_tracked_state svga_hw_sampler;
+extern struct svga_tracked_state svga_hw_sampler_bindings;
 extern struct svga_tracked_state svga_hw_tss;
 extern struct svga_tracked_state svga_hw_tss_binding;
 extern struct svga_tracked_state svga_hw_clip_planes;
 extern struct svga_tracked_state svga_hw_vdecl;
 extern struct svga_tracked_state svga_hw_fs_constants;
+extern struct svga_tracked_state svga_hw_gs_constants;
 extern struct svga_tracked_state svga_hw_vs_constants;
 
 /* SWTNL_DRAW
@@ -93,10 +99,14 @@ enum pipe_error svga_emit_initial_state( struct svga_context *svga );
 
 enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga );
 
+enum pipe_error svga_rebind_framebuffer_bindings( struct svga_context *svga );
+
 enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga );
 
 enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga);
 
 enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga);
 
+enum pipe_error svga_reemit_gs_bindings(struct svga_context *svga);
+
 #endif
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c b/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c
index 1e1fbb099..8ab169308 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c
@@ -1,3 +1,4 @@
+
 /**********************************************************
  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
  *
@@ -23,9 +24,11 @@
  *
  **********************************************************/
 
+#include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "pipe/p_defines.h"
+#include "util/u_upload_mgr.h"
 
 #include "svga_screen.h"
 #include "svga_context.h"
@@ -34,6 +37,7 @@
 #include "svga_tgsi.h"
 #include "svga_debug.h"
 #include "svga_resource_buffer.h"
+#include "svga_shader.h"
 
 #include "svga_hw_reg.h"
 
@@ -52,65 +56,135 @@
 /** Guest-backed surface constant buffers must be this size */
 #define GB_CONSTBUF_SIZE (SVGA3D_CONSTREG_MAX)
 
+
 /**
- * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ * Emit any extra shader-type-independent shader constants into the buffer
+ * pointed to by 'dest'.
+ * \return number of float[4] constants put into the 'dest' buffer
  */
 static unsigned
-svga_shader_type(unsigned shader)
+svga_get_extra_constants_common(struct svga_context *svga,
+                                const struct svga_shader_variant *variant,
+                                unsigned shader, float *dest)
 {
-   switch (shader) {
-   case PIPE_SHADER_VERTEX:
-      return SVGA3D_SHADERTYPE_VS;
-   case PIPE_SHADER_FRAGMENT:
-      return SVGA3D_SHADERTYPE_PS;
-   default:
-      assert(!"Unexpected shader type");
-      return SVGA3D_SHADERTYPE_VS;
+   uint32_t *dest_u = (uint32_t *) dest;  // uint version of dest
+   unsigned i;
+   unsigned count = 0;
+
+   for (i = 0; i < variant->key.num_textures; i++) {
+      struct pipe_sampler_view *sv = svga->curr.sampler_views[shader][i];
+      if (sv) {
+         struct pipe_resource *tex = sv->texture;
+         /* Scaling factors needed for handling unnormalized texture coordinates
+          * for texture rectangles.
+          */
+         if (variant->key.tex[i].unnormalized) {
+            /* debug/sanity check */
+            assert(variant->key.tex[i].width_height_idx == count);
+
+            *dest++ = 1.0 / (float)tex->width0;
+            *dest++ = 1.0 / (float)tex->height0;
+            *dest++ = 1.0;
+            *dest++ = 1.0;
+
+            count++;
+         }
+
+         /* Store the sizes for texture buffers.
+         */
+         if (tex->target == PIPE_BUFFER) {
+            unsigned bytes_per_element = util_format_get_blocksize(sv->format);
+            *dest_u++ = tex->width0 / bytes_per_element;
+            *dest_u++ = 1;
+            *dest_u++ = 1;
+            *dest_u++ = 1;
+
+            count++;
+         }
+      }
    }
+
+   return count;
 }
 
 
 /**
  * Emit any extra fragment shader constants into the buffer pointed
  * to by 'dest'.
- * In particular, these would be the scaling factors needed for handling
- * unnormalized texture coordinates for texture rectangles.
  * \return number of float[4] constants put into the dest buffer
  */
 static unsigned
 svga_get_extra_fs_constants(struct svga_context *svga, float *dest)
 {
    const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
-   const struct svga_fs_compile_key *key = &variant->key.fkey;
    unsigned count = 0;
 
-   /* SVGA_NEW_VS_VARIANT
-    */
-   if (key->num_unnormalized_coords) {
-      unsigned i;
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_FRAGMENT, dest);
 
-      for (i = 0; i < key->num_textures; i++) {
-         if (key->tex[i].unnormalized) {
-            struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
+   assert(count <= MAX_EXTRA_CONSTS);
 
-            /* debug/sanity check */
-            assert(key->tex[i].width_height_idx == count);
+   return count;
+}
 
-            *dest++ = 1.0 / (float)tex->width0;
-            *dest++ = 1.0 / (float)tex->height0;
-            *dest++ = 1.0;
-            *dest++ = 1.0;
+/**
+ * Emit extra constants needed for prescale computation into the
+ * the buffer pointed to by '*dest'. The updated buffer pointer
+ * will be returned in 'dest'.
+ */
+static unsigned
+svga_get_prescale_constants(struct svga_context *svga, float **dest)
+{
+   memcpy(*dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
+   *dest += 4;
 
-            count++;
-         }
-      }
-   }
+   memcpy(*dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+   *dest += 4;
 
-   assert(count <= MAX_EXTRA_CONSTS);
+   return 2;
+}
 
-   return count;
+/**
+ * Emit extra constants needed for point sprite emulation.
+ */
+static unsigned
+svga_get_pt_sprite_constants(struct svga_context *svga, float **dest)
+{
+   struct svga_screen *screen = svga_screen(svga->pipe.screen);
+   float *dst = *dest;
+
+   dst[0] = 1.0 / (svga->curr.viewport.scale[0] * 2);
+   dst[1] = 1.0 / (svga->curr.viewport.scale[1] * 2);
+   dst[2] = svga->curr.rast->pointsize;
+   dst[3] = screen->maxPointSize;
+   *dest = *dest + 4;
+   return 1;
 }
 
+/**
+ * Emit user-defined clip plane coefficients into the buffer pointed to
+ * by '*dest'. The updated buffer pointer will be returned in 'dest'.
+ */
+static unsigned
+svga_get_clip_plane_constants(struct svga_context *svga,
+                              const struct svga_shader_variant *variant,
+                              float **dest)
+{
+   unsigned count = 0;
+
+   /* SVGA_NEW_CLIP */
+   if (svga_have_vgpu10(svga)) {
+      /* append user-defined clip plane coefficients onto constant buffer */
+      unsigned clip_planes = variant->key.clip_plane_enable;
+      while (clip_planes) {
+         int i = u_bit_scan(&clip_planes);
+         COPY_4V(*dest, svga->curr.clip.ucp[i]);
+         *dest += 4;
+         count += 1;
+      }
+   }
+   return count;
+}
 
 /**
  * Emit any extra vertex shader constants into the buffer pointed
@@ -124,26 +198,71 @@ static unsigned
 svga_get_extra_vs_constants(struct svga_context *svga, float *dest)
 {
    const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
-   const struct svga_vs_compile_key *key = &variant->key.vkey;
    unsigned count = 0;
 
    /* SVGA_NEW_VS_VARIANT
     */
-   if (key->need_prescale) {
-      memcpy(dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
-      dest += 4;
+   if (variant->key.vs.need_prescale) {
+      count += svga_get_prescale_constants(svga, &dest);
+   }
 
-      memcpy(dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+   if (variant->key.vs.undo_viewport) {
+      /* Used to convert window coords back to NDC coords */
+      dest[0] = 1.0f / svga->curr.viewport.scale[0];
+      dest[1] = 1.0f / svga->curr.viewport.scale[1];
+      dest[2] = -svga->curr.viewport.translate[0];
+      dest[3] = -svga->curr.viewport.translate[1];
       dest += 4;
-
-      count = 2;
+      count += 1;
    }
 
+   /* SVGA_NEW_CLIP */
+   count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+   /* common constants */
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_VERTEX, dest);
+
    assert(count <= MAX_EXTRA_CONSTS);
 
    return count;
 }
 
+/**
+ * Emit any extra geometry shader constants into the buffer pointed
+ * to by 'dest'.
+ */
+static unsigned
+svga_get_extra_gs_constants(struct svga_context *svga, float *dest)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+   unsigned count = 0;
+
+   /* SVGA_NEW_GS_VARIANT
+    */
+
+   /* Constants for point sprite
+    * These are used in the transformed gs that supports point sprite.
+    * They need to be added before the prescale constants.
+    */
+   if (variant->key.gs.wide_point) {
+      count += svga_get_pt_sprite_constants(svga, &dest);
+   }
+
+   if (variant->key.gs.need_prescale) {
+      count += svga_get_prescale_constants(svga, &dest);
+   }
+
+   /* SVGA_NEW_CLIP */
+   count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+   /* common constants */
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_GEOMETRY, dest);
+
+   assert(count <= MAX_EXTRA_CONSTS);
+   return count;
+}
 
 /**
  * Check and emit one shader constant register.
@@ -159,6 +278,7 @@ emit_const(struct svga_context *svga, unsigned shader, unsigned i,
 
    assert(shader < PIPE_SHADER_TYPES);
    assert(i < SVGA3D_CONSTREG_MAX);
+   assert(!svga_have_vgpu10(svga));
 
    if (memcmp(svga->state.hw_draw.cb[shader][i], value,
               4 * sizeof(float)) != 0) {
@@ -202,6 +322,10 @@ emit_const_range(struct svga_context *svga,
    unsigned i, j;
    enum pipe_error ret;
 
+   assert(shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_FRAGMENT);
+   assert(!svga_have_vgpu10(svga));
+
 #ifdef DEBUG
    if (offset + count > SVGA3D_CONSTREG_MAX) {
       debug_printf("svga: too many constants (offset %u + count %u = %u (max = %u))\n",
@@ -307,10 +431,12 @@ emit_const_range(struct svga_context *svga,
 
 /**
  * Emit all the constants in a constant buffer for a shader stage.
+ * On VGPU10, emit_consts_vgpu10 is used instead.
  */
 static enum pipe_error
-emit_consts(struct svga_context *svga, unsigned shader)
+emit_consts_vgpu9(struct svga_context *svga, unsigned shader)
 {
+   const struct pipe_constant_buffer *cbuf;
    struct svga_screen *ss = svga_screen(svga->pipe.screen);
    struct pipe_transfer *transfer = NULL;
    unsigned count;
@@ -320,85 +446,318 @@ emit_consts(struct svga_context *svga, unsigned shader)
    const unsigned offset = 0;
 
    assert(shader < PIPE_SHADER_TYPES);
+   assert(!svga_have_vgpu10(svga));
+   /* Only one constant buffer per shader is supported before VGPU10.
+    * This is only an approximate check against that.
+    */
+   assert(svga->curr.constbufs[shader][1].buffer == NULL);
 
-   if (svga->curr.cbufs[shader].buffer == NULL)
-      goto done;
+   cbuf = &svga->curr.constbufs[shader][0];
 
-   data = (const float (*)[4])pipe_buffer_map(&svga->pipe,
-                                              svga->curr.cbufs[shader].buffer,
-                                              PIPE_TRANSFER_READ,
-					      &transfer);
-   if (data == NULL) {
-      ret = PIPE_ERROR_OUT_OF_MEMORY;
-      goto done;
-   }
+   if (svga->curr.constbufs[shader][0].buffer) {
+      /* emit user-provided constants */
+      data = (const float (*)[4])
+         pipe_buffer_map(&svga->pipe, svga->curr.constbufs[shader][0].buffer,
+                         PIPE_TRANSFER_READ, &transfer);
+      if (!data) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
 
-   /* sanity check */
-   assert(svga->curr.cbufs[shader].buffer->width0 >=
-          svga->curr.cbufs[shader].buffer_size);
+      /* sanity check */
+      assert(cbuf->buffer->width0 >=
+             cbuf->buffer_size);
 
-   /* Use/apply the constant buffer size and offsets here */
-   count = svga->curr.cbufs[shader].buffer_size / (4 * sizeof(float));
-   data += svga->curr.cbufs[shader].buffer_offset / (4 * sizeof(float));
+      /* Use/apply the constant buffer size and offsets here */
+      count = cbuf->buffer_size / (4 * sizeof(float));
+      data += cbuf->buffer_offset / (4 * sizeof(float));
 
-   if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-      ret = emit_const_range( svga, shader, offset, count, data );
-      if (ret != PIPE_OK) {
-         goto done;
+      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+         ret = emit_const_range( svga, shader, offset, count, data );
       }
-   } else {
-      for (i = 0; i < count; i++) {
-         ret = emit_const( svga, shader, offset + i, data[i] );
-         if (ret != PIPE_OK) {
-            goto done;
+      else {
+         for (i = 0; i < count; i++) {
+            ret = emit_const( svga, shader, offset + i, data[i] );
+            if (ret != PIPE_OK) {
+               break;
+            }
          }
       }
-   }
 
-done:
-   if (data)
       pipe_buffer_unmap(&svga->pipe, transfer);
 
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   /* emit extra shader constants */
+   {
+      const struct svga_shader_variant *variant = NULL;
+      unsigned offset;
+      float extras[MAX_EXTRA_CONSTS][4];
+      unsigned count, i;
+
+      switch (shader) {
+      case PIPE_SHADER_VERTEX:
+         variant = svga->state.hw_draw.vs;
+         count = svga_get_extra_vs_constants(svga, (float *) extras);
+         break;
+      case PIPE_SHADER_FRAGMENT:
+         variant = svga->state.hw_draw.fs;
+         count = svga_get_extra_fs_constants(svga, (float *) extras);
+         break;
+      default:
+         assert(!"Unexpected shader type");
+         count = 0;
+      }
+
+      assert(variant);
+      offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      assert(count <= Elements(extras));
+
+      if (count > 0) {
+         if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+            ret = emit_const_range(svga, shader, offset, count,
+                                   (const float (*) [4])extras);
+         }
+         else {
+            for (i = 0; i < count; i++) {
+               ret = emit_const(svga, shader, offset + i, extras[i]);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
+         }
+      }
+   }
+
    return ret;
 }
 
 
+
 static enum pipe_error
-emit_fs_consts(struct svga_context *svga, unsigned dirty)
+emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
 {
-   struct svga_screen *ss = svga_screen(svga->pipe.screen);
-   const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
+   const struct pipe_constant_buffer *cbuf;
+   struct pipe_resource *dst_buffer = NULL;
    enum pipe_error ret = PIPE_OK;
+   struct pipe_transfer *src_transfer;
+   struct svga_winsys_surface *dst_handle;
+   float extras[MAX_EXTRA_CONSTS][4];
+   unsigned extra_count, extra_size, extra_offset;
+   unsigned new_buf_size;
+   void *src_map = NULL, *dst_map;
+   unsigned offset;
+   const struct svga_shader_variant *variant;
 
-   /* SVGA_NEW_FS_VARIANT
-    */
-   if (variant == NULL)
+   assert(shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_GEOMETRY ||
+          shader == PIPE_SHADER_FRAGMENT);
+
+   cbuf = &svga->curr.constbufs[shader][0];
+
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      variant = svga->state.hw_draw.vs;
+      extra_count = svga_get_extra_vs_constants(svga, (float *) extras);
+      break;
+   case PIPE_SHADER_FRAGMENT:
+      variant = svga->state.hw_draw.fs;
+      extra_count = svga_get_extra_fs_constants(svga, (float *) extras);
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      variant = svga->state.hw_draw.gs;
+      extra_count = svga_get_extra_gs_constants(svga, (float *) extras);
+      break;
+   default:
+      assert(!"Unexpected shader type");
+      /* Don't return an error code since we don't want to keep re-trying
+       * this function and getting stuck in an infinite loop.
+       */
       return PIPE_OK;
+   }
 
-   /* SVGA_NEW_FS_CONST_BUFFER
+   assert(variant);
+
+   /* Compute extra constants size and offset in bytes */
+   extra_size = extra_count * 4 * sizeof(float);
+   extra_offset = 4 * sizeof(float) * variant->extra_const_start;
+
+   if (cbuf->buffer_size + extra_size == 0)
+      return PIPE_OK;  /* nothing to do */
+
+   /* Typically, the cbuf->buffer here is a user-space buffer so mapping
+    * it is really cheap.  If we ever get real HW buffers for constants
+    * we should void mapping and instead use a ResourceCopy command.
+    */
+   if (cbuf->buffer_size > 0) {
+      src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer,
+                                      cbuf->buffer_offset, cbuf->buffer_size,
+                                      PIPE_TRANSFER_READ, &src_transfer);
+      assert(src_map);
+      if (!src_map) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+
+   /* The new/dest buffer's size must be large enough to hold the original,
+    * user-specified constants, plus the extra constants.
+    * The size of the original constant buffer _should_ agree with what the
+    * shader is expecting, but it might not (it's not enforced anywhere by
+    * gallium).
+    */
+   new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size;
+
+   /* According to the DX10 spec, the constant buffer size must be
+    * in multiples of 16.
+    */
+   new_buf_size = align(new_buf_size, 16);
+
+   u_upload_alloc(svga->const0_upload, 0, new_buf_size,
+                  CONST0_UPLOAD_ALIGNMENT, &offset,
+                  &dst_buffer, &dst_map);
+   if (!dst_map) {
+      if (src_map)
+         pipe_buffer_unmap(&svga->pipe, src_transfer);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   if (src_map) {
+      memcpy(dst_map, src_map, cbuf->buffer_size);
+      pipe_buffer_unmap(&svga->pipe, src_transfer);
+   }
+
+   if (extra_size) {
+      assert(extra_offset + extra_size <= new_buf_size);
+      memcpy((char *) dst_map + extra_offset, extras, extra_size);
+   }
+   u_upload_unmap(svga->const0_upload);
+
+   /* Issue the SetSingleConstantBuffer command */
+   dst_handle = svga_buffer_handle(svga, dst_buffer);
+   if (!dst_handle) {
+      pipe_resource_reference(&dst_buffer, NULL);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   assert(new_buf_size % 16 == 0);
+   ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+                                               0, /* index */
+                                               svga_shader_type(shader),
+                                               dst_handle,
+                                               offset,
+                                               new_buf_size);
+
+   if (ret != PIPE_OK) {
+      pipe_resource_reference(&dst_buffer, NULL);
+      return ret;
+   }
+
+   /* Save this const buffer until it's replaced in the future.
+    * Otherwise, all references to the buffer will go away after the
+    * command buffer is submitted, it'll get recycled and we will have
+    * incorrect constant buffer bindings.
     */
-   ret = emit_consts( svga, PIPE_SHADER_FRAGMENT );
-   if (ret != PIPE_OK)
+   pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer);
+
+   svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size;
+
+   pipe_resource_reference(&dst_buffer, NULL);
+
+   return ret;
+}
+
+
+static enum pipe_error
+emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
+{
+   enum pipe_error ret;
+   unsigned dirty_constbufs;
+   unsigned enabled_constbufs;
+
+   /* Emit 0th constant buffer (with extra constants) */
+   ret = emit_constbuf_vgpu10(svga, shader);
+   if (ret != PIPE_OK) {
       return ret;
+   }
 
-   /* emit extra shader constants */
-   {
-      unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      float extras[MAX_EXTRA_CONSTS][4];
-      unsigned count, i;
+   enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u;
 
-      count = svga_get_extra_fs_constants(svga, (float *) extras);
+   /* Emit other constant buffers (UBOs) */
+   dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u;
 
-      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-         ret = emit_const_range(svga, PIPE_SHADER_FRAGMENT, offset, count,
-                                (const float (*) [4])extras);
-      } else {
-         for (i = 0; i < count; i++) {
-            ret = emit_const(svga, PIPE_SHADER_FRAGMENT, offset + i, extras[i]);
-            if (ret != PIPE_OK)
-               return ret;
+   while (dirty_constbufs) {
+      unsigned index = u_bit_scan(&dirty_constbufs);
+      unsigned offset = svga->curr.constbufs[shader][index].buffer_offset;
+      unsigned size = svga->curr.constbufs[shader][index].buffer_size;
+      struct svga_buffer *buffer =
+         svga_buffer(svga->curr.constbufs[shader][index].buffer);
+      struct svga_winsys_surface *handle;
+
+      if (buffer) {
+         handle = svga_buffer_handle(svga, &buffer->b.b);
+         enabled_constbufs |= 1 << index;
+      }
+      else {
+         handle = NULL;
+         enabled_constbufs &= ~(1 << index);
+         assert(offset == 0);
+         assert(size == 0);
+      }
+
+      if (size % 16 != 0) {
+         /* GL's buffer range sizes can be any number of bytes but the
+          * SVGA3D device requires a multiple of 16 bytes.
+          */
+         const unsigned total_size = buffer->b.b.width0;
+
+         if (offset + align(size, 16) <= total_size) {
+            /* round up size to multiple of 16 */
+            size = align(size, 16);
+         }
+         else {
+            /* round down to mulitple of 16 (this may cause rendering problems
+             * but should avoid a device error).
+             */
+            size &= ~15;
          }
       }
+
+      assert(size % 16 == 0);
+      ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+                                                  index,
+                                                  svga_shader_type(shader),
+                                                  handle,
+                                                  offset,
+                                                  size);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs;
+   svga->state.dirty_constbufs[shader] = 0;
+
+   return ret;
+}
+
+static enum pipe_error
+emit_fs_consts(struct svga_context *svga, unsigned dirty)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
+   enum pipe_error ret = PIPE_OK;
+
+   /* SVGA_NEW_FS_VARIANT
+    */
+   if (!variant)
+      return PIPE_OK;
+
+   /* SVGA_NEW_FS_CONST_BUFFER
+    */
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_FRAGMENT);
+   }
+   else {
+      ret = emit_consts_vgpu9(svga, PIPE_SHADER_FRAGMENT);
    }
 
    return ret;
@@ -419,40 +778,21 @@ struct svga_tracked_state svga_hw_fs_constants =
 static enum pipe_error
 emit_vs_consts(struct svga_context *svga, unsigned dirty)
 {
-   struct svga_screen *ss = svga_screen(svga->pipe.screen);
    const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
    enum pipe_error ret = PIPE_OK;
 
    /* SVGA_NEW_VS_VARIANT
     */
-   if (variant == NULL)
+   if (!variant)
       return PIPE_OK;
 
    /* SVGA_NEW_VS_CONST_BUFFER
     */
-   ret = emit_consts( svga, PIPE_SHADER_VERTEX );
-   if (ret != PIPE_OK)
-      return ret;
-
-   /* emit extra shader constants */
-   {
-      unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      float extras[MAX_EXTRA_CONSTS][4];
-      unsigned count, i;
-
-      count = svga_get_extra_vs_constants(svga, (float *) extras);
-      assert(count <= Elements(extras));
-
-      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-         ret = emit_const_range(svga, PIPE_SHADER_VERTEX, offset, count,
-                                (const float (*) [4]) extras);
-      } else {
-         for (i = 0; i < count; i++) {
-            ret = emit_const(svga, PIPE_SHADER_VERTEX, offset + i, extras[i]);
-            if (ret != PIPE_OK)
-               return ret;
-         }
-      }
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_VERTEX);
+   }
+   else {
+      ret = emit_consts_vgpu9(svga, PIPE_SHADER_VERTEX);
    }
 
    return ret;
@@ -467,3 +807,42 @@ struct svga_tracked_state svga_hw_vs_constants =
     SVGA_NEW_VS_VARIANT),
    emit_vs_consts
 };
+
+
+static enum pipe_error
+emit_gs_consts(struct svga_context *svga, unsigned dirty)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+   enum pipe_error ret = PIPE_OK;
+
+   /* SVGA_NEW_GS_VARIANT
+    */
+   if (!variant)
+      return PIPE_OK;
+
+   /* SVGA_NEW_GS_CONST_BUFFER
+    */
+   if (svga_have_vgpu10(svga)) {
+      /**
+       * If only the rasterizer state has changed and the current geometry
+       * shader does not emit wide points, then there is no reason to
+       * re-emit the GS constants, so skip it.
+       */
+      if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point)
+         return PIPE_OK;
+
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY);
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_gs_constants =
+{
+   "hw gs params",
+   (SVGA_NEW_GS_CONST_BUFFER |
+    SVGA_NEW_RAST |
+    SVGA_NEW_GS_VARIANT),
+   emit_gs_consts
+};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c b/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c
index 1c174da31..4b0400bf8 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -26,12 +26,14 @@
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_format.h"
 
 #include "svga_context.h"
 #include "svga_state.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
 #include "svga_screen.h"
+#include "svga_surface.h"
 
 
 /*
@@ -46,30 +48,26 @@
 #define MAX_RT_PER_BATCH 8
 
 
-/***********************************************************************
- * Hardware state update
- */
-
 
 static enum pipe_error
-emit_framebuffer( struct svga_context *svga,
-                  unsigned dirty )
+emit_fb_vgpu9(struct svga_context *svga)
 {
    struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
    const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
    struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
-   boolean reemit = svga->rebind.rendertargets;
+   boolean reemit = svga->rebind.flags.rendertargets;
    unsigned i;
    enum pipe_error ret;
 
+   assert(!svga_have_vgpu10(svga));
+
    /*
     * We need to reemit non-null surface bindings, even when they are not
     * dirty, to ensure that the resources are paged in.
     */
 
    for (i = 0; i < svgascreen->max_color_buffers; i++) {
-      if (curr->cbufs[i] != hw->cbufs[i] ||
-          (reemit && hw->cbufs[i])) {
+      if ((curr->cbufs[i] != hw->cbufs[i]) || (reemit && hw->cbufs[i])) {
          if (svga->curr.nr_fbs++ > MAX_RT_PER_BATCH)
             return PIPE_ERROR_OUT_OF_MEMORY;
 
@@ -82,14 +80,13 @@ emit_framebuffer( struct svga_context *svga,
       }
    }
 
-   if (curr->zsbuf != hw->zsbuf ||
-       (reemit && hw->zsbuf)) {
+   if ((curr->zsbuf != hw->zsbuf) || (reemit && hw->zsbuf)) {
       ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
       if (ret != PIPE_OK)
          return ret;
 
       if (curr->zsbuf &&
-          curr->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+          util_format_is_depth_and_stencil(curr->zsbuf->format)) {
          ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL,
                                       curr->zsbuf);
          if (ret != PIPE_OK)
@@ -104,8 +101,6 @@ emit_framebuffer( struct svga_context *svga,
       pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
    }
 
-   svga->rebind.rendertargets = FALSE;
-
    return PIPE_OK;
 }
 
@@ -118,15 +113,15 @@ emit_framebuffer( struct svga_context *svga,
  * Called at the beginning of every new command buffer to ensure that
  * non-dirty rendertargets are properly paged-in.
  */
-enum pipe_error
-svga_reemit_framebuffer_bindings(struct svga_context *svga)
+static enum pipe_error
+svga_reemit_framebuffer_bindings_vgpu9(struct svga_context *svga)
 {
    struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
    struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
    unsigned i;
    enum pipe_error ret;
 
-   assert(svga->rebind.rendertargets);
+   assert(!svga_have_vgpu10(svga));
 
    for (i = 0; i < svgascreen->max_color_buffers; i++) {
       if (hw->cbufs[i]) {
@@ -145,7 +140,7 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga)
       }
 
       if (hw->zsbuf &&
-          hw->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+          util_format_is_depth_and_stencil(hw->zsbuf->format)) {
          ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf);
          if (ret != PIPE_OK) {
             return ret;
@@ -159,7 +154,161 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga)
       }
    }
 
-   svga->rebind.rendertargets = FALSE;
+   return PIPE_OK;
+}
+
+
+
+static enum pipe_error
+emit_fb_vgpu10(struct svga_context *svga)
+{
+   const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   struct pipe_surface *rtv[SVGA3D_MAX_RENDER_TARGETS];
+   struct pipe_surface *dsv;
+   struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   const unsigned num_color = MAX2(curr->nr_cbufs, hw->nr_cbufs);
+   unsigned i;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /* Setup render targets array.  Note that we loop over the max of the
+    * number of previously bound buffers and the new buffers to unbind
+    * any previously bound buffers when the new number of buffers is less
+    * than the old number of buffers.
+    */
+   for (i = 0; i < num_color; i++) {
+      if (curr->cbufs[i]) {
+         rtv[i] = svga_validate_surface_view(svga,
+                                             svga_surface(curr->cbufs[i]));
+         if (rtv[i] == NULL) {
+            return PIPE_ERROR_OUT_OF_MEMORY;
+         }
+
+         assert(svga_surface(rtv[i])->view_id != SVGA3D_INVALID_ID);
+      }
+      else {
+         rtv[i] = NULL;
+      }
+   }
+
+   /* Setup depth stencil view */
+   if (curr->zsbuf) {
+      dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf));
+      if (!dsv) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+   else {
+      dsv = NULL;
+   }
+
+   ret = SVGA3D_vgpu10_SetRenderTargets(svga->swc, num_color, rtv, dsv);
+   if (ret != PIPE_OK)
+      return ret;
+
+   for (i = 0; i < ss->max_color_buffers; i++) {
+      if (hw->cbufs[i] != curr->cbufs[i]) {
+         /* propagate the backed view surface before unbinding it */
+         if (hw->cbufs[i] && svga_surface(hw->cbufs[i])->backed) {
+            svga_propagate_surface(svga,
+                                   &svga_surface(hw->cbufs[i])->backed->base);
+         }
+         pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
+      }
+   }
+   hw->nr_cbufs = curr->nr_cbufs;
+
+   if (hw->zsbuf != curr->zsbuf) {
+      /* propagate the backed view surface before unbinding it */
+      if (hw->zsbuf && svga_surface(hw->zsbuf)->backed) {
+         svga_propagate_surface(svga, &svga_surface(hw->zsbuf)->backed->base);
+      }
+      pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+emit_framebuffer(struct svga_context *svga, unsigned dirty)
+{
+   if (svga_have_vgpu10(svga)) {
+      return emit_fb_vgpu10(svga);
+   }
+   else {
+      return emit_fb_vgpu9(svga);
+   }
+}
+
+
+/*
+ * Rebind rendertargets.
+ *
+ * Similar to emit_framebuffer, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty rendertargets are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_framebuffer_bindings(struct svga_context *svga)
+{
+   enum pipe_error ret;
+
+   assert(svga->rebind.flags.rendertargets);
+
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_fb_vgpu10(svga);
+   }
+   else {
+      ret = svga_reemit_framebuffer_bindings_vgpu9(svga);
+   }
+
+   svga->rebind.flags.rendertargets = FALSE;
+
+   return ret;
+}
+
+
+/*
+ * Send a private allocation command to page in rendertargets resource.
+ */
+enum pipe_error
+svga_rebind_framebuffer_bindings(struct svga_context *svga)
+{
+   const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   unsigned i;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   if (!svga->rebind.flags.rendertargets)
+      return PIPE_OK;
+
+   for (i = 0; i < ss->max_color_buffers; i++) {
+      if (hw->cbufs[i]) {
+         ret = svga->swc->resource_rebind(svga->swc,
+                                          svga_surface(hw->cbufs[i])->handle,
+                                          NULL,
+                                          SVGA_RELOC_WRITE);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   if (hw->zsbuf) {
+      ret = svga->swc->resource_rebind(svga->swc,
+                                       svga_surface(hw->zsbuf)->handle,
+                                       NULL,
+                                       SVGA_RELOC_WRITE);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   svga->rebind.flags.rendertargets = 0;
 
    return PIPE_OK;
 }
@@ -202,6 +351,7 @@ emit_viewport( struct svga_context *svga,
    float fy = flip * viewport->scale[1] * -1.0f + viewport->translate[1];
    float fw =        viewport->scale[0] * 2.0f;
    float fh = flip * viewport->scale[1] * 2.0f;
+   boolean emit_vgpu10_viewport = FALSE;
 
    memset( &prescale, 0, sizeof(prescale) );
 
@@ -225,7 +375,16 @@ emit_viewport( struct svga_context *svga,
    prescale.translate[1] = 0;
    prescale.translate[2] = 0;
    prescale.translate[3] = 0;
-   prescale.enabled = TRUE;
+
+   /* Enable prescale to adjust vertex positions to match
+      VGPU10 convention only if rasterization is enabled.
+    */
+   if (svga->curr.rast->templ.rasterizer_discard) {
+      degenerate = TRUE;
+      goto out;
+   } else {
+      prescale.enabled = TRUE;
+   }
 
    if (fw < 0) {
       prescale.scale[0] *= -1.0f;
@@ -235,7 +394,14 @@ emit_viewport( struct svga_context *svga,
    }
 
    if (fh < 0.0) {
-      prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+      if (svga_have_vgpu10(svga)) {
+         /* floating point viewport params below */
+         prescale.translate[1] = fh + fy * 2.0f;
+      }
+      else {
+         /* integer viewport params below */
+         prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+      }
       fh = -fh;
       fy -= fh;
       prescale.scale[1] = -1.0f;
@@ -321,19 +487,31 @@ emit_viewport( struct svga_context *svga,
       float adjust_x = 0.0;
       float adjust_y = 0.0;
 
-      switch (svga->curr.reduced_prim) {
-      case PIPE_PRIM_POINTS:
-         adjust_x = -0.375;
-         adjust_y = -0.75;
-         break;
-      case PIPE_PRIM_LINES:
-         adjust_x = -0.5;
-         adjust_y = 0;
-         break;
-      case PIPE_PRIM_TRIANGLES:
-         adjust_x = -0.5;
-         adjust_y = -0.5;
-         break;
+      if (svga_have_vgpu10(svga)) {
+         /* Normally, we don't have to do any sub-pixel coordinate
+          * adjustments for VGPU10.  But when we draw wide points with
+          * a GS we need an X adjustment in order to be conformant.
+          */
+         if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+             svga->curr.rast->pointsize > 1.0f) {
+            adjust_x = 0.5;
+         }
+      }
+      else {
+         switch (svga->curr.reduced_prim) {
+         case PIPE_PRIM_POINTS:
+            adjust_x = -0.375;
+            adjust_y = -0.75;
+            break;
+         case PIPE_PRIM_LINES:
+            adjust_x = -0.5;
+            adjust_y = 0;
+            break;
+         case PIPE_PRIM_TRIANGLES:
+            adjust_x = -0.5;
+            adjust_y = -0.5;
+            break;
+         }
       }
 
       if (invertY)
@@ -360,6 +538,17 @@ emit_viewport( struct svga_context *svga,
       prescale.scale[2] = -prescale.scale[2];
    }
 
+   /* If zmin is less than 0, clamp zmin to 0 and adjust the prescale.
+    * zmin can be set to -1 when viewport->scale[2] is set to 1 and
+    * viewport->translate[2] is set to 0 in the blit code.
+    */
+   if (range_min < 0.0f) {
+      range_min = -0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+      range_max = 0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+      prescale.scale[2] *= 2.0f;
+      prescale.translate[2] -= 0.5f;
+   }
+
    if (prescale.enabled) {
       float H[2];
       float J[2];
@@ -428,21 +617,49 @@ out:
       prescale.enabled = FALSE;
    }
 
-   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
-      ret = SVGA3D_SetViewport(svga->swc, &rect);
-      if(ret != PIPE_OK)
-         return ret;
+   if (!svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
+      if (svga_have_vgpu10(svga)) {
+         emit_vgpu10_viewport = TRUE;
+      }
+      else {
+         ret = SVGA3D_SetViewport(svga->swc, &rect);
+         if (ret != PIPE_OK)
+            return ret;
 
-      memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect));
-      assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport));
+         svga->state.hw_clear.viewport = rect;
+      }
    }
 
    if (svga->state.hw_clear.depthrange.zmin != range_min ||
-       svga->state.hw_clear.depthrange.zmax != range_max) {
-      ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
-      if(ret != PIPE_OK)
+       svga->state.hw_clear.depthrange.zmax != range_max)
+   {
+      if (svga_have_vgpu10(svga)) {
+         emit_vgpu10_viewport = TRUE;
+      }
+      else {
+         ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_clear.depthrange.zmin = range_min;
+         svga->state.hw_clear.depthrange.zmax = range_max;
+      }
+   }
+
+   if (emit_vgpu10_viewport) {
+      SVGA3dViewport vp;
+      vp.x = (float) rect.x;
+      vp.y = (float) rect.y;
+      vp.width = (float) rect.w;
+      vp.height = (float) rect.h;
+      vp.minDepth = range_min;
+      vp.maxDepth = range_max;
+      ret = SVGA3D_vgpu10_SetViewports(svga->swc, 1, &vp);
+      if (ret != PIPE_OK)
          return ret;
 
+      svga->state.hw_clear.viewport = rect;
+
       svga->state.hw_clear.depthrange.zmin = range_min;
       svga->state.hw_clear.depthrange.zmax = range_max;
    }
@@ -475,14 +692,27 @@ emit_scissor_rect( struct svga_context *svga,
                    unsigned dirty )
 {
    const struct pipe_scissor_state *scissor = &svga->curr.scissor;
-   SVGA3dRect rect;
 
-   rect.x = scissor->minx;
-   rect.y = scissor->miny;
-   rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
-   rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+   if (svga_have_vgpu10(svga)) {
+      SVGASignedRect rect;
+
+      rect.left = scissor->minx;
+      rect.top = scissor->miny;
+      rect.right = scissor->maxx;
+      rect.bottom = scissor->maxy;
+
+      return SVGA3D_vgpu10_SetScissorRects(svga->swc, 1, &rect);
+   }
+   else {
+      SVGA3dRect rect;
 
-   return SVGA3D_SetScissorRect(svga->swc, &rect);
+      rect.x = scissor->minx;
+      rect.y = scissor->miny;
+      rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
+      rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+
+      return SVGA3D_SetScissorRect(svga->swc, &rect);
+   }
 }
 
 
@@ -527,9 +757,15 @@ emit_clip_planes( struct svga_context *svga,
       plane[2] = 2.0f * c;
       plane[3] = d - c;
 
-      ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
-      if(ret != PIPE_OK)
-         return ret;
+      if (svga_have_vgpu10(svga)) {
+         //debug_printf("XXX emit DX10 clip plane\n");
+         ret = PIPE_OK;
+      }
+      else {
+         ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
+         if (ret != PIPE_OK)
+            return ret;
+      }
    }
 
    return PIPE_OK;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c
index 8cdce742b..bac91669b 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c
@@ -36,43 +36,12 @@
 #include "svga_shader.h"
 #include "svga_resource_texture.h"
 #include "svga_tgsi.h"
+#include "svga_format.h"
 
 #include "svga_hw_reg.h"
 
 
 
-static inline int
-compare_fs_keys(const struct svga_fs_compile_key *a,
-                const struct svga_fs_compile_key *b)
-{
-   unsigned keysize_a = svga_fs_key_size( a );
-   unsigned keysize_b = svga_fs_key_size( b );
-
-   if (keysize_a != keysize_b) {
-      return (int)(keysize_a - keysize_b);
-   }
-   return memcmp( a, b, keysize_a );
-}
-
-
-/** Search for a fragment shader variant */
-static struct svga_shader_variant *
-search_fs_key(const struct svga_fragment_shader *fs,
-              const struct svga_fs_compile_key *key)
-{
-   struct svga_shader_variant *variant = fs->base.variants;
-
-   assert(key);
-
-   for ( ; variant; variant = variant->next) {
-      if (compare_fs_keys( key, &variant->key.fkey ) == 0)
-         return variant;
-   }
-   
-   return NULL;
-}
-
-
 /**
  * If we fail to compile a fragment shader (because it uses too many
  * registers, for example) we'll use a dummy/fallback shader that
@@ -111,13 +80,30 @@ get_dummy_fragment_shader(void)
 }
 
 
+static struct svga_shader_variant *
+translate_fragment_program(struct svga_context *svga,
+                           const struct svga_fragment_shader *fs,
+                           const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &fs->base, key,
+                                        PIPE_SHADER_FRAGMENT);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(svga, &fs->base, key,
+                                       PIPE_SHADER_FRAGMENT);
+   }
+}
+
+
 /**
  * Replace the given shader's instruction with a simple constant-color
  * shader.  We use this when normal shader translation fails.
  */
 static struct svga_shader_variant *
-get_compiled_dummy_shader(struct svga_fragment_shader *fs,
-                          const struct svga_fs_compile_key *key)
+get_compiled_dummy_shader(struct svga_context *svga,
+                          struct svga_fragment_shader *fs,
+                          const struct svga_compile_key *key)
 {
    const struct tgsi_token *dummy = get_dummy_fragment_shader();
    struct svga_shader_variant *variant;
@@ -129,7 +115,7 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs,
    FREE((void *) fs->base.tokens);
    fs->base.tokens = dummy;
 
-   variant = svga_translate_fragment_program(fs, key);
+   variant = translate_fragment_program(svga, fs, key);
    return variant;
 }
 
@@ -140,52 +126,47 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs,
 static enum pipe_error
 compile_fs(struct svga_context *svga,
            struct svga_fragment_shader *fs,
-           const struct svga_fs_compile_key *key,
+           const struct svga_compile_key *key,
            struct svga_shader_variant **out_variant)
 {
    struct svga_shader_variant *variant;
    enum pipe_error ret = PIPE_ERROR;
 
-   variant = svga_translate_fragment_program( fs, key );
+   variant = translate_fragment_program(svga, fs, key);
    if (variant == NULL) {
       debug_printf("Failed to compile fragment shader,"
                    " using dummy shader instead.\n");
-      variant = get_compiled_dummy_shader(fs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+      variant = get_compiled_dummy_shader(svga, fs, key);
    }
-
-   if (svga_shader_too_large(svga, variant)) {
+   else if (svga_shader_too_large(svga, variant)) {
       /* too big, use dummy shader */
-      debug_printf("Shader too large (%lu bytes),"
+      debug_printf("Shader too large (%u bytes),"
                    " using dummy shader instead.\n",
-                   (unsigned long ) variant->nr_tokens * sizeof(variant->tokens[0]));
-      variant = get_compiled_dummy_shader(fs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+                   (unsigned) (variant->nr_tokens
+                               * sizeof(variant->tokens[0])));
+      /* Free the too-large variant */
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      /* Use simple pass-through shader instead */
+      variant = get_compiled_dummy_shader(svga, fs, key);
+   }
+
+   if (!variant) {
+      return PIPE_ERROR;
    }
 
    ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
-   if (ret != PIPE_OK)
-      goto fail;
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      return ret;
+   }
 
    *out_variant = variant;
 
-   /* insert variants at head of linked list */
+   /* insert variant at head of linked list */
    variant->next = fs->base.variants;
    fs->base.variants = variant;
 
    return PIPE_OK;
-
-fail:
-   if (variant) {
-      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
-   }
-   return ret;
 }
 
 
@@ -197,23 +178,45 @@ fail:
 static enum pipe_error
 make_fs_key(const struct svga_context *svga,
             struct svga_fragment_shader *fs,
-            struct svga_fs_compile_key *key)
+            struct svga_compile_key *key)
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
    unsigned i;
-   int idx = 0;
 
    memset(key, 0, sizeof *key);
 
+   memcpy(key->generic_remap_table, fs->generic_remap_table,
+          sizeof(fs->generic_remap_table));
+
+   /* SVGA_NEW_GS, SVGA_NEW_VS
+    */
+   if (svga->curr.gs) {
+      key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs;
+   } else {
+      key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+   }
+
    /* Only need fragment shader fixup for twoside lighting if doing
     * hwtnl.  Otherwise the draw module does the whole job for us.
     *
     * SVGA_NEW_SWTNL
     */
    if (!svga->state.sw.need_swtnl) {
-      /* SVGA_NEW_RAST
+      /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
        */
-      key->light_twoside = svga->curr.rast->templ.light_twoside;
-      key->front_ccw = svga->curr.rast->templ.front_ccw;
+      key->fs.light_twoside = svga->curr.rast->templ.light_twoside;
+      key->fs.front_ccw = svga->curr.rast->templ.front_ccw;
+      key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable &&
+                          svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES);
+      key->fs.aa_point = (svga->curr.rast->templ.point_smooth &&
+                          svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+                          (svga->curr.rast->pointsize > 1.0 ||
+                           svga->curr.vs->base.info.writes_psize));
+      if (key->fs.aa_point) {
+         assert(svga->curr.gs != NULL);
+         assert(svga->curr.gs->aa_point_coord_index != -1);
+         key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index;
+      }
    }
 
    /* The blend workaround for simulating logicop xor behaviour
@@ -231,7 +234,7 @@ make_fs_key(const struct svga_context *svga,
     * SVGA_NEW_BLEND
     */
    if (svga->curr.blend->need_white_fragments) {
-      key->white_fragments = 1;
+      key->fs.white_fragments = 1;
    }
 
 #ifdef DEBUG
@@ -241,22 +244,23 @@ make_fs_key(const struct svga_context *svga,
     */
    {
       static boolean warned = FALSE;
-      unsigned i, n = MAX2(svga->curr.num_sampler_views,
-                           svga->curr.num_samplers);
+      unsigned i, n = MAX2(svga->curr.num_sampler_views[shader],
+                           svga->curr.num_samplers[shader]);
       /* Only warn once to prevent too much debug output */
       if (!warned) {
-         if (svga->curr.num_sampler_views != svga->curr.num_samplers) {
+         if (svga->curr.num_sampler_views[shader] !=
+             svga->curr.num_samplers[shader]) {
             debug_printf("svga: mismatched number of sampler views (%u) "
                          "vs. samplers (%u)\n",
-                         svga->curr.num_sampler_views,
-                         svga->curr.num_samplers);
+                         svga->curr.num_sampler_views[shader],
+                         svga->curr.num_samplers[shader]);
          }
          for (i = 0; i < n; i++) {
-            if ((svga->curr.sampler_views[i] == NULL) !=
-                (svga->curr.sampler[i] == NULL))
+            if ((svga->curr.sampler_views[shader][i] == NULL) !=
+                (svga->curr.sampler[shader][i] == NULL))
                debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n",
-                            i, svga->curr.sampler_views[i],
-                            i, svga->curr.sampler[i]);
+                            i, svga->curr.sampler_views[shader][i],
+                            i, svga->curr.sampler[shader][i]);
          }
          warned = TRUE;
       }
@@ -268,68 +272,62 @@ make_fs_key(const struct svga_context *svga,
     *
     * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
     */
-   for (i = 0; i < svga->curr.num_sampler_views; i++) {
-      if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
-         assert(svga->curr.sampler_views[i]->texture);
-         key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target;
-         if (!svga->curr.sampler[i]->normalized_coords) {
-            key->tex[i].width_height_idx = idx++;
-            key->tex[i].unnormalized = TRUE;
-            ++key->num_unnormalized_coords;
-         }
-
-         key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r;
-         key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g;
-         key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b;
-         key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a;
-      }
-   }
-   key->num_textures = svga->curr.num_sampler_views;
-
-   idx = 0;
-   for (i = 0; i < svga->curr.num_samplers; ++i) {
-      if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
-         struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
-         struct svga_texture *stex = svga_texture(tex);
-         SVGA3dSurfaceFormat format = stex->key.format;
-
-         if (format == SVGA3D_Z_D16 ||
-             format == SVGA3D_Z_D24X8 ||
-             format == SVGA3D_Z_D24S8) {
-            /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
-             * or SVGA3D_Z_D24S8 surface, we'll automatically get
-             * shadow comparison.  But we only get LEQUAL mode.
-             * Set TEX_COMPARE_NONE here so we don't emit the extra FS
-             * code for shadow comparison.
-             */
-            key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
-            key->tex[i].compare_func = PIPE_FUNC_NEVER;
-            /* These depth formats _only_ support comparison mode and
-             * not ordinary sampling so warn if the later is expected.
-             */
-            if (svga->curr.sampler[i]->compare_mode !=
-                PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               debug_warn_once("Unsupported shadow compare mode");
-            }                   
-            /* The only supported comparison mode is LEQUAL */
-            if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) {
-               debug_warn_once("Unsupported shadow compare function");
+   svga_init_shader_key_common(svga, shader, key);
+
+   for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+      if (view) {
+         struct pipe_resource *tex = view->texture;
+         if (tex->target != PIPE_BUFFER) {
+            struct svga_texture *stex = svga_texture(tex);
+            SVGA3dSurfaceFormat format = stex->key.format;
+
+            if (!svga_have_vgpu10(svga) &&
+                (format == SVGA3D_Z_D16 ||
+                 format == SVGA3D_Z_D24X8 ||
+                 format == SVGA3D_Z_D24S8)) {
+               /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
+                * or SVGA3D_Z_D24S8 surface, we'll automatically get
+                * shadow comparison.  But we only get LEQUAL mode.
+                * Set TEX_COMPARE_NONE here so we don't emit the extra FS
+                * code for shadow comparison.
+                */
+               key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
+               key->tex[i].compare_func = PIPE_FUNC_NEVER;
+               /* These depth formats _only_ support comparison mode and
+                * not ordinary sampling so warn if the later is expected.
+                */
+               if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+                  debug_warn_once("Unsupported shadow compare mode");
+               }
+               /* The shader translation code can emit code to
+                * handle ALWAYS and NEVER compare functions
+                */
+               else if (sampler->compare_func == PIPE_FUNC_ALWAYS ||
+                        sampler->compare_func == PIPE_FUNC_NEVER) {
+                  key->tex[i].compare_mode = sampler->compare_mode;
+                  key->tex[i].compare_func = sampler->compare_func;
+               }
+               else if (sampler->compare_func != PIPE_FUNC_LEQUAL) {
+                  debug_warn_once("Unsupported shadow compare function");
+               }
+            }
+            else {
+               /* For other texture formats, just use the compare func/mode
+                * as-is.  Should be no-ops for color textures.  For depth
+                * textures, we do not get automatic depth compare.  We have
+                * to do it ourselves in the shader.  And we don't get PCF.
+                */
+               key->tex[i].compare_mode = sampler->compare_mode;
+               key->tex[i].compare_func = sampler->compare_func;
             }
-         }
-         else {
-            /* For other texture formats, just use the compare func/mode
-             * as-is.  Should be no-ops for color textures.  For depth
-             * textures, we do not get automatic depth compare.  We have
-             * to do it ourselves in the shader.  And we don't get PCF.
-             */
-            key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
-            key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
          }
       }
    }
 
    /* sprite coord gen state */
-   for (i = 0; i < svga->curr.num_samplers; ++i) {
+   for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
       key->tex[i].sprite_texgen =
          svga->curr.rast->templ.sprite_coord_enable & (1 << i);
    }
@@ -337,10 +335,25 @@ make_fs_key(const struct svga_context *svga,
    key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
                                     == PIPE_SPRITE_COORD_LOWER_LEFT);
 
+   key->fs.flatshade = svga->curr.rast->templ.flatshade;
+
+   /* SVGA_NEW_DEPTH_STENCIL_ALPHA */
+   if (svga_have_vgpu10(svga)) {
+      /* Alpha testing is not supported in integer-valued render targets. */
+      if (svga_has_any_integer_cbufs(svga)) {
+         key->fs.alpha_func = SVGA3D_CMP_ALWAYS;
+         key->fs.alpha_ref = 0;
+      }
+      else {
+         key->fs.alpha_func = svga->curr.depth->alphafunc;
+         key->fs.alpha_ref = svga->curr.depth->alpharef;
+      }
+   }
+
    /* SVGA_NEW_FRAME_BUFFER */
    if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
       /* Replicate color0 output to N colorbuffers */
-      key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
+      key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
    }
 
    return PIPE_OK;
@@ -355,18 +368,32 @@ svga_reemit_fs_bindings(struct svga_context *svga)
 {
    enum pipe_error ret;
 
-   assert(svga->rebind.fs);
+   assert(svga->rebind.flags.fs);
    assert(svga_have_gb_objects(svga));
 
    if (!svga->state.hw_draw.fs)
       return PIPE_OK;
 
-   ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
-                            svga->state.hw_draw.fs->gb_shader);
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL,
+                                        svga->state.hw_draw.fs->gb_shader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   if (svga_have_vgpu10(svga))
+      ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+                                    svga->state.hw_draw.fs->gb_shader,
+                                    svga->state.hw_draw.fs->id);
+   else
+      ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+                               svga->state.hw_draw.fs->gb_shader);
+
+ out:
    if (ret != PIPE_OK)
       return ret;
 
-   svga->rebind.fs = FALSE;
+   svga->rebind.flags.fs = FALSE;
    return PIPE_OK;
 }
 
@@ -378,7 +405,7 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
    struct svga_shader_variant *variant = NULL;
    enum pipe_error ret = PIPE_OK;
    struct svga_fragment_shader *fs = svga->curr.fs;
-   struct svga_fs_compile_key key;
+   struct svga_compile_key key;
 
    /* SVGA_NEW_BLEND
     * SVGA_NEW_TEXTURE_BINDING
@@ -386,14 +413,16 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
     * SVGA_NEW_NEED_SWTNL
     * SVGA_NEW_SAMPLER
     * SVGA_NEW_FRAME_BUFFER
+    * SVGA_NEW_DEPTH_STENCIL_ALPHA
+    * SVGA_NEW_VS
     */
-   ret = make_fs_key( svga, fs, &key );
+   ret = make_fs_key(svga, fs, &key);
    if (ret != PIPE_OK)
       return ret;
 
-   variant = search_fs_key( fs, &key );
+   variant = svga_search_shader_key(&fs->base, &key);
    if (!variant) {
-      ret = compile_fs( svga, fs, &key, &variant );
+      ret = compile_fs(svga, fs, &key, &variant);
       if (ret != PIPE_OK)
          return ret;
    }
@@ -401,22 +430,14 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
    assert(variant);
 
    if (variant != svga->state.hw_draw.fs) {
-      if (svga_have_gb_objects(svga)) {
-         ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
-                                  variant->gb_shader);
-         if (ret != PIPE_OK)
-            return ret;
+      ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
+      if (ret != PIPE_OK)
+         return ret;
 
-         svga->rebind.fs = FALSE;
-      }
-      else {
-         ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id);
-         if (ret != PIPE_OK)
-            return ret;
-      }
+      svga->rebind.flags.fs = FALSE;
 
       svga->dirty |= SVGA_NEW_FS_VARIANT;
-      svga->state.hw_draw.fs = variant;      
+      svga->state.hw_draw.fs = variant;
    }
 
    return PIPE_OK;
@@ -426,11 +447,16 @@ struct svga_tracked_state svga_hw_fs =
 {
    "fragment shader (hwtnl)",
    (SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_VS |
     SVGA_NEW_TEXTURE_BINDING |
     SVGA_NEW_NEED_SWTNL |
     SVGA_NEW_RAST |
+    SVGA_NEW_STIPPLE |
+    SVGA_NEW_REDUCED_PRIMITIVE |
     SVGA_NEW_SAMPLER |
     SVGA_NEW_FRAME_BUFFER |
+    SVGA_NEW_DEPTH_STENCIL_ALPHA |
     SVGA_NEW_BLEND),
    emit_hw_fs
 };
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c
new file mode 100644
index 000000000..618bec248
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c
@@ -0,0 +1,255 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "translate/translate.h"
+#include "tgsi/tgsi_ureg.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+#include "svga_streamout.h"
+#include "svga_format.h"
+
+/**
+ * If we fail to compile a geometry shader we'll use a dummy/fallback shader
+ * that simply emits the incoming vertices.
+ */
+static const struct tgsi_token *
+get_dummy_geometry_shader(void)
+{
+   //XXX
+   return NULL;
+}
+
+
+static struct svga_shader_variant *
+translate_geometry_program(struct svga_context *svga,
+                           const struct svga_geometry_shader *gs,
+                           const struct svga_compile_key *key)
+{
+   assert(svga_have_vgpu10(svga));
+   return svga_tgsi_vgpu10_translate(svga, &gs->base, key,
+                                     PIPE_SHADER_GEOMETRY);
+}
+
+
+/**
+ * Translate TGSI shader into an svga shader variant.
+ */
+static enum pipe_error
+compile_gs(struct svga_context *svga,
+           struct svga_geometry_shader *gs,
+           const struct svga_compile_key *key,
+           struct svga_shader_variant **out_variant)
+{
+   struct svga_shader_variant *variant;
+   enum pipe_error ret = PIPE_ERROR;
+
+   variant = translate_geometry_program(svga, gs, key);
+   if (!variant) {
+      /* some problem during translation, try the dummy shader */
+      const struct tgsi_token *dummy = get_dummy_geometry_shader();
+      if (!dummy) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+      debug_printf("Failed to compile geometry shader, using dummy shader instead.\n");
+      FREE((void *) gs->base.tokens);
+      gs->base.tokens = dummy;
+      variant = translate_geometry_program(svga, gs, key);
+      if (!variant) {
+         return PIPE_ERROR;
+      }
+   }
+
+   ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+      return ret;
+   }
+
+   *out_variant = variant;
+
+   return PIPE_OK;
+}
+
+
+static void
+make_gs_key(struct svga_context *svga, struct svga_compile_key *key)
+{
+   struct svga_geometry_shader *gs = svga->curr.gs;
+
+   memset(key, 0, sizeof *key);
+
+   /*
+    * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+    */
+   svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, key);
+
+   memcpy(key->generic_remap_table, gs->generic_remap_table,
+          sizeof(gs->generic_remap_table));
+
+   key->gs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+
+   key->gs.need_prescale = svga->state.hw_clear.prescale.enabled;
+
+   key->gs.writes_psize = gs->base.info.writes_psize;
+   key->gs.wide_point = gs->wide_point;
+   key->sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
+   key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
+                                    == PIPE_SPRITE_COORD_LOWER_LEFT);
+
+   /* SVGA_NEW_RAST */
+   key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
+}
+
+
+/**
+ * svga_reemit_gs_bindings - Reemit the geometry shader bindings
+ */
+enum pipe_error
+svga_reemit_gs_bindings(struct svga_context *svga)
+{
+   enum pipe_error ret;
+   struct svga_winsys_gb_shader *gbshader = NULL;
+   SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;
+
+   assert(svga->rebind.flags.gs);
+   assert(svga_have_gb_objects(svga));
+
+   /* Geometry Shader is only supported in vgpu10 */
+   assert(svga_have_vgpu10(svga));
+
+   if (svga->state.hw_draw.gs) {
+      gbshader = svga->state.hw_draw.gs->gb_shader;
+      shaderId = svga->state.hw_draw.gs->id;
+   }
+
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_GS,
+                                 gbshader, shaderId);
+
+ out:
+   if (ret != PIPE_OK)
+      return ret;
+
+   svga->rebind.flags.gs = FALSE;
+   return PIPE_OK;
+}
+
+static enum pipe_error
+emit_hw_gs(struct svga_context *svga, unsigned dirty)
+{
+   struct svga_shader_variant *variant;
+   struct svga_geometry_shader *gs = svga->curr.gs;
+   enum pipe_error ret = PIPE_OK;
+   struct svga_compile_key key;
+
+   /* If there's a user-defined GS, we should have a pointer to a derived
+    * GS.  This should have been resolved in update_tgsi_transform().
+    */
+   if (svga->curr.user_gs)
+      assert(svga->curr.gs);
+
+   if (!gs) {
+      if (svga->state.hw_draw.gs != NULL) {
+
+         /** The previous geometry shader is made inactive.
+          *  Needs to unbind the geometry shader.
+          */
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+         svga->state.hw_draw.gs = NULL;
+      }
+      return ret;
+   }
+
+   /* If there is stream output info for this geometry shader, then use
+    * it instead of the one from the vertex shader.
+    */
+   if (svga_have_gs_streamout(svga)) {
+      svga_set_stream_output(svga, gs->base.stream_output);
+   }
+   else if (!svga_have_vs_streamout(svga)) {
+      /* turn off stream out */
+      svga_set_stream_output(svga, NULL);
+   }
+
+   /* SVGA_NEW_NEED_SWTNL */
+   if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+      /* No geometry shader is needed */
+      variant = NULL;
+   }
+   else {
+      make_gs_key(svga, &key);
+
+      /* See if we already have a GS variant that matches the key */
+      variant = svga_search_shader_key(&gs->base, &key);
+
+      if (!variant) {
+         ret = compile_gs(svga, gs, &key, &variant);
+         if (ret != PIPE_OK)
+            return ret;
+
+         /* insert the new variant at head of linked list */
+         assert(variant);
+         variant->next = gs->base.variants;
+         gs->base.variants = variant;
+      }
+   }
+
+   if (variant != svga->state.hw_draw.gs) {
+      /* Bind the new variant */
+      ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->rebind.flags.gs = FALSE;
+      svga->dirty |= SVGA_NEW_GS_VARIANT;
+      svga->state.hw_draw.gs = variant;
+   }
+
+   return PIPE_OK;
+}
+
+struct svga_tracked_state svga_hw_gs =
+{
+   "geometry shader (hwtnl)",
+   (SVGA_NEW_VS |
+    SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_RAST |
+    SVGA_NEW_NEED_SWTNL),
+   emit_hw_gs
+};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c b/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c
index cac39d62f..b07c62da4 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -26,6 +26,7 @@
 #include "util/u_inlines.h"
 #include "pipe/p_state.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_debug.h"
 #include "svga_hw_reg.h"
@@ -61,6 +62,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
 {
    boolean need_pipeline = FALSE;
    struct svga_vertex_shader *vs = svga->curr.vs;
+   const char *reason = "";
 
    /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
     */
@@ -75,6 +77,20 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
                  svga->curr.rast->need_pipeline_lines_str,
                  svga->curr.rast->need_pipeline_points_str);
       need_pipeline = TRUE;
+
+      switch (svga->curr.reduced_prim) {
+      case PIPE_PRIM_POINTS:
+         reason = svga->curr.rast->need_pipeline_points_str;
+         break;
+      case PIPE_PRIM_LINES:
+         reason = svga->curr.rast->need_pipeline_lines_str;
+         break;
+      case PIPE_PRIM_TRIANGLES:
+         reason = svga->curr.rast->need_pipeline_tris_str;
+         break;
+      default:
+         assert(!"Unexpected reduced prim type");
+      }
    }
 
    /* EDGEFLAGS
@@ -82,6 +98,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
     if (vs && vs->base.info.writes_edgeflag) {
       SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__);
       need_pipeline = TRUE;
+      reason = "edge flags";
    }
 
    /* SVGA_NEW_FS, SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
@@ -91,7 +108,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
       unsigned generic_inputs =
          svga->curr.fs ? svga->curr.fs->generic_inputs : 0;
 
-      if (sprite_coord_gen &&
+      if (!svga_have_vgpu10(svga) && sprite_coord_gen &&
           (generic_inputs & ~sprite_coord_gen)) {
          /* The fragment shader is using some generic inputs that are
           * not being replaced by auto-generated point/sprite coords (and
@@ -103,6 +120,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
           * point stage.
           */
          need_pipeline = TRUE;
+         reason = "point sprite coordinate generation";
       }
    }
 
@@ -115,6 +133,12 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
    if (0 && svga->state.sw.need_pipeline)
       debug_printf("sw.need_pipeline = %d\n", svga->state.sw.need_pipeline);
 
+   if (svga->state.sw.need_pipeline) {
+      assert(reason);
+      pipe_debug_message(&svga->debug.callback, FALLBACK,
+                         "Using semi-fallback for %s", reason);
+   }
+
    return PIPE_OK;
 }
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c b/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c
index ebb98373e..d43894d71 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c
@@ -23,16 +23,20 @@
  *
  **********************************************************/
 
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
-#include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "svga_context.h"
 #include "svga_screen.h"
 #include "svga_state.h"
 #include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_shader.h"
 
 
 struct rs_queue {
@@ -77,7 +81,7 @@ svga_queue_rs( struct rs_queue *q,
  * the "to" state.
  */
 static enum pipe_error
-emit_rss(struct svga_context *svga, unsigned dirty)
+emit_rss_vgpu9(struct svga_context *svga, unsigned dirty)
 {
    struct svga_screen *screen = svga_screen(svga->pipe.screen);
    struct rs_queue queue;
@@ -85,7 +89,7 @@ emit_rss(struct svga_context *svga, unsigned dirty)
 
    queue.rs_count = 0;
 
-   if (dirty & SVGA_NEW_BLEND) {
+   if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
       const struct svga_blend_state *curr = svga->curr.blend;
 
       EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
@@ -119,7 +123,7 @@ emit_rss(struct svga_context *svga, unsigned dirty)
       EMIT_RS( svga, color, BLENDCOLOR, fail );
    }
 
-   if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_RAST)) {
       const struct svga_depth_stencil_state *curr = svga->curr.depth; 
       const struct svga_rasterizer_state *rast = svga->curr.rast; 
 
@@ -300,6 +304,151 @@ fail:
    return PIPE_ERROR_OUT_OF_MEMORY;
 }
 
+/** Returns a non-culling rasterizer state object to be used with
+ *  point sprite.
+ */
+static struct svga_rasterizer_state *
+get_no_cull_rasterizer_state(struct svga_context *svga)
+{
+   const struct svga_rasterizer_state *r = svga->curr.rast;
+   unsigned int aa_point = r->templ.point_smooth;
+
+   if (!svga->rasterizer_no_cull[aa_point]) {
+      struct pipe_rasterizer_state rast;
+
+      memset(&rast, 0, sizeof(rast));
+      rast.flatshade = 1;
+      rast.front_ccw = 1;
+      rast.point_smooth = r->templ.point_smooth;
+
+      /* All rasterizer states have the same half_pixel_center,
+       * bottom_edge_rule and clip_halfz values since they are
+       * constant for a context. If we ever implement
+       * GL_ARB_clip_control, the clip_halfz field would have to be observed.
+       */
+      rast.half_pixel_center = r->templ.half_pixel_center;
+      rast.bottom_edge_rule = r->templ.bottom_edge_rule;
+      rast.clip_halfz = r->templ.clip_halfz;
+
+      svga->rasterizer_no_cull[aa_point] =
+               svga->pipe.create_rasterizer_state(&svga->pipe, &rast);
+   }
+   return svga->rasterizer_no_cull[aa_point];
+}
+
+static enum pipe_error
+emit_rss_vgpu10(struct svga_context *svga, unsigned dirty)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   svga_hwtnl_flush_retry(svga);
+
+   if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
+      const struct svga_blend_state *curr;
+      float blend_factor[4];
+
+      if (svga_has_any_integer_cbufs(svga)) {
+         /* Blending is not supported in integer-valued render targets. */
+         curr = svga->noop_blend;
+         blend_factor[0] =
+         blend_factor[1] =
+         blend_factor[2] =
+         blend_factor[3] = 0;
+      }
+      else {
+         curr = svga->curr.blend;
+
+         if (curr->blend_color_alpha) {
+            blend_factor[0] =
+            blend_factor[1] =
+            blend_factor[2] =
+            blend_factor[3] = svga->curr.blend_color.color[3];
+         }
+         else {
+            blend_factor[0] = svga->curr.blend_color.color[0];
+            blend_factor[1] = svga->curr.blend_color.color[1];
+            blend_factor[2] = svga->curr.blend_color.color[2];
+            blend_factor[3] = svga->curr.blend_color.color[3];
+         }
+      }
+
+      /* Set/bind the blend state object */
+      if (svga->state.hw_draw.blend_id != curr->id ||
+          svga->state.hw_draw.blend_factor[0] != blend_factor[0] ||
+          svga->state.hw_draw.blend_factor[1] != blend_factor[1] ||
+          svga->state.hw_draw.blend_factor[2] != blend_factor[2] ||
+          svga->state.hw_draw.blend_factor[3] != blend_factor[3] ||
+          svga->state.hw_draw.blend_sample_mask != svga->curr.sample_mask) {
+         ret = SVGA3D_vgpu10_SetBlendState(svga->swc, curr->id,
+                                           blend_factor,
+                                           svga->curr.sample_mask);
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_draw.blend_id = curr->id;
+         svga->state.hw_draw.blend_factor[0] = blend_factor[0];
+         svga->state.hw_draw.blend_factor[1] = blend_factor[1];
+         svga->state.hw_draw.blend_factor[2] = blend_factor[2];
+         svga->state.hw_draw.blend_factor[3] = blend_factor[3];
+         svga->state.hw_draw.blend_sample_mask = svga->curr.sample_mask;
+      }
+   }
+
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_STENCIL_REF)) {
+      const struct svga_depth_stencil_state *curr = svga->curr.depth;
+      unsigned curr_ref = svga->curr.stencil_ref.ref_value[0];
+
+      if (curr->id != svga->state.hw_draw.depth_stencil_id ||
+          curr_ref != svga->state.hw_draw.stencil_ref) {
+         /* Set/bind the depth/stencil state object */
+         ret = SVGA3D_vgpu10_SetDepthStencilState(svga->swc, curr->id,
+                                                  curr_ref);
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_draw.depth_stencil_id = curr->id;
+         svga->state.hw_draw.stencil_ref = curr_ref;
+      }
+   }
+
+   if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST)) {
+      const struct svga_rasterizer_state *rast;
+
+      if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+          svga->curr.gs && svga->curr.gs->wide_point) {
+
+         /* If we are drawing a point sprite, we will need to
+          * bind a non-culling rasterizer state object
+          */
+         rast = get_no_cull_rasterizer_state(svga);
+      }
+      else {
+         rast = svga->curr.rast;
+      }
+
+      if (svga->state.hw_draw.rasterizer_id != rast->id) {
+         /* Set/bind the rasterizer state object */
+         ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rast->id);
+         if (ret != PIPE_OK)
+            return ret;
+         svga->state.hw_draw.rasterizer_id = rast->id;
+      }
+   }
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+emit_rss(struct svga_context *svga, unsigned dirty)
+{
+   if (svga_have_vgpu10(svga)) {
+      return emit_rss_vgpu10(svga, dirty);
+   }
+   else {
+      return emit_rss_vgpu9(svga, dirty);
+   }
+}
+
 
 struct svga_tracked_state svga_hw_rss = 
 {
@@ -307,11 +456,12 @@ struct svga_tracked_state svga_hw_rss =
 
    (SVGA_NEW_BLEND |
     SVGA_NEW_BLEND_COLOR |
-    SVGA_NEW_DEPTH_STENCIL |
+    SVGA_NEW_DEPTH_STENCIL_ALPHA |
     SVGA_NEW_STENCIL_REF |
     SVGA_NEW_RAST |
     SVGA_NEW_FRAME_BUFFER |
-    SVGA_NEW_NEED_PIPELINE),
+    SVGA_NEW_NEED_PIPELINE |
+    SVGA_NEW_REDUCED_PRIMITIVE),
 
    emit_rss
 };
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c b/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c
new file mode 100644
index 000000000..e7b540cc7
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright 2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+/**
+ * VGPU10 sampler and sampler view functions.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_resource_texture.h"
+#include "svga_shader.h"
+#include "svga_state.h"
+#include "svga_sampler_view.h"
+
+
+/** Get resource handle for a texture or buffer */
+static inline struct svga_winsys_surface *
+svga_resource_handle(struct pipe_resource *res)
+{
+   if (res->target == PIPE_BUFFER) {
+      return svga_buffer(res)->handle;
+   }
+   else {
+      return svga_texture(res)->handle;
+   }
+}
+
+
+/**
+ * This helper function returns TRUE if the specified resource collides with
+ * any of the resources bound to any of the currently bound sampler views.
+ */
+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+                                           struct svga_winsys_surface *res,
+                                           unsigned shader)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned i;
+
+   if (svga_screen(screen)->debug.no_surface_view) {
+      return FALSE;
+   }
+
+   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+      struct svga_pipe_sampler_view *sv =
+         svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+      if (sv && res == svga_resource_handle(sv->base.texture)) {
+         return TRUE;
+      }
+   }
+
+   return FALSE;
+}
+
+
+/**
+ * Create a DX ShaderResourceSamplerView for the given pipe_sampler_view,
+ * if needed.
+ */
+enum pipe_error
+svga_validate_pipe_sampler_view(struct svga_context *svga,
+                                struct svga_pipe_sampler_view *sv)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   if (sv->id == SVGA3D_INVALID_ID) {
+      struct svga_screen *ss = svga_screen(svga->pipe.screen);
+      struct pipe_resource *texture = sv->base.texture;
+      struct svga_winsys_surface *surface = svga_resource_handle(texture);
+      SVGA3dSurfaceFormat format;
+      SVGA3dResourceType resourceDim;
+      SVGA3dShaderResourceViewDesc viewDesc;
+
+      format = svga_translate_format(ss, sv->base.format,
+                                     PIPE_BIND_SAMPLER_VIEW);
+      assert(format != SVGA3D_FORMAT_INVALID);
+
+      /* Convert the format to a sampler-friendly format, if needed */
+      format = svga_sampler_format(format);
+
+      if (texture->target == PIPE_BUFFER) {
+         viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
+         viewDesc.buffer.numElements = (sv->base.u.buf.last_element -
+                                        sv->base.u.buf.first_element + 1);
+      }
+      else {
+         viewDesc.tex.mostDetailedMip = sv->base.u.tex.first_level;
+         viewDesc.tex.firstArraySlice = sv->base.u.tex.first_layer;
+         viewDesc.tex.mipLevels = (sv->base.u.tex.last_level -
+                                   sv->base.u.tex.first_level + 1);
+      }
+
+      /* arraySize in viewDesc specifies the number of array slices in a
+       * texture array. For 3D texture, last_layer in
+       * pipe_sampler_view specifies the last slice of the texture
+       * which is different from the last slice in a texture array,
+       * hence we need to set arraySize to 1 explicitly.
+       */
+      viewDesc.tex.arraySize =
+         (texture->target == PIPE_TEXTURE_3D ||
+          texture->target == PIPE_BUFFER) ? 1 :
+            (sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1);
+
+      switch (texture->target) {
+      case PIPE_BUFFER:
+         resourceDim = SVGA3D_RESOURCE_BUFFER;
+         break;
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_1D_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE1D;
+         break;
+      case PIPE_TEXTURE_RECT:
+      case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_2D_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE2D;
+         break;
+      case PIPE_TEXTURE_3D:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE3D;
+         break;
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_CUBE_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURECUBE;
+         break;
+
+      default:
+         assert(!"Unexpected texture type");
+         resourceDim = SVGA3D_RESOURCE_TEXTURE2D;
+      }
+
+      sv->id = util_bitmask_add(svga->sampler_view_id_bm);
+
+      ret = SVGA3D_vgpu10_DefineShaderResourceView(svga->swc,
+                                                   sv->id,
+                                                   surface,
+                                                   format,
+                                                   resourceDim,
+                                                   &viewDesc);
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+         sv->id = SVGA3D_INVALID_ID;
+      }
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+update_sampler_resources(struct svga_context *svga, unsigned dirty)
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned shader;
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS];
+      struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+      unsigned count;
+      unsigned nviews;
+      unsigned i;
+
+      count = svga->curr.num_sampler_views[shader];
+      for (i = 0; i < count; i++) {
+         struct svga_pipe_sampler_view *sv =
+            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+         struct svga_winsys_surface *surface;
+
+         if (sv) {
+            surface = svga_resource_handle(sv->base.texture);
+
+            ret = svga_validate_pipe_sampler_view(svga, sv);
+            if (ret != PIPE_OK)
+               return ret;
+
+            assert(sv->id != SVGA3D_INVALID_ID);
+            ids[i] = sv->id;
+         }
+         else {
+            surface = NULL;
+            ids[i] = SVGA3D_INVALID_ID;
+         }
+         surfaces[i] = surface;
+      }
+
+      for (; i < Elements(ids); i++) {
+         ids[i] = SVGA3D_INVALID_ID;
+         surfaces[i] = NULL;
+      }
+
+      if (shader == PIPE_SHADER_FRAGMENT) {
+         /* Handle polygon stipple sampler view */
+         if (svga->curr.rast->templ.poly_stipple_enable) {
+            const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+            struct svga_pipe_sampler_view *sv =
+               svga->polygon_stipple.sampler_view;
+
+            assert(sv);
+            if (!sv) {
+               return PIPE_OK;  /* probably out of memory */
+            }
+
+            ret = svga_validate_pipe_sampler_view(svga, sv);
+            if (ret != PIPE_OK)
+               return ret;
+
+            ids[unit] = sv->id;
+            surfaces[unit] = svga_resource_handle(sv->base.texture);
+            count = MAX2(count, unit+1);
+         }
+      }
+
+      /* Number of ShaderResources that need to be modified. This includes
+       * the one that need to be unbound.
+       */
+      nviews = MAX2(svga->state.hw_draw.num_sampler_views[shader], count);
+      if (nviews > 0) {
+         ret = SVGA3D_vgpu10_SetShaderResources(svga->swc,
+                                                svga_shader_type(shader),
+                                                0, /* startView */
+                                                nviews,
+                                                ids,
+                                                surfaces);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+
+      /* Number of sampler views enabled in the device */
+      svga->state.hw_draw.num_sampler_views[shader] = count;
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_sampler_bindings = {
+   "shader resources emit",
+   SVGA_NEW_STIPPLE |
+   SVGA_NEW_TEXTURE_BINDING,
+   update_sampler_resources
+};
+
+
+
+static enum pipe_error
+update_samplers(struct svga_context *svga, unsigned dirty )
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned shader;
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      const unsigned count = svga->curr.num_samplers[shader];
+      SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS];
+      unsigned i;
+
+      for (i = 0; i < count; i++) {
+         if (svga->curr.sampler[shader][i]) {
+            ids[i] = svga->curr.sampler[shader][i]->id;
+            assert(ids[i] != SVGA3D_INVALID_ID);
+         }
+         else {
+            ids[i] = SVGA3D_INVALID_ID;
+         }
+      }
+
+      if (count > 0) {
+         if (count != svga->state.hw_draw.num_samplers[shader] ||
+             memcmp(ids, svga->state.hw_draw.samplers[shader],
+                    count * sizeof(ids[0])) != 0) {
+            /* HW state is really changing */
+            ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+                                            count,
+                                            0,                       /* start */
+                                            svga_shader_type(shader), /* type */
+                                            ids);
+            if (ret != PIPE_OK)
+               return ret;
+            memcpy(svga->state.hw_draw.samplers[shader], ids,
+                   count * sizeof(ids[0]));
+            svga->state.hw_draw.num_samplers[shader] = count;
+         }
+      }
+   }
+
+   /* Handle polygon stipple sampler texture */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      struct svga_sampler_state *sampler = svga->polygon_stipple.sampler;
+
+      assert(sampler);
+      if (!sampler) {
+         return PIPE_OK; /* probably out of memory */
+      }
+
+      ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+                                      1, /* count */
+                                      unit, /* start */
+                                      SVGA3D_SHADERTYPE_PS,
+                                      &sampler->id);
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_sampler = {
+   "texture sampler emit",
+   (SVGA_NEW_SAMPLER |
+    SVGA_NEW_STIPPLE |
+    SVGA_NEW_TEXTURE_FLAGS),
+   update_samplers
+};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_tgsi_transform.c b/lib/mesa/src/gallium/drivers/svga/svga_state_tgsi_transform.c
new file mode 100644
index 000000000..9e643ff49
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_tgsi_transform.c
@@ -0,0 +1,293 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_simple_shaders.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_point_sprite.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "svga_context.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+
+
+/**
+ * Bind a new GS.  This updates the derived current gs state, not the
+ * user-specified GS state.
+ */
+static void
+bind_gs_state(struct svga_context *svga,
+              struct svga_geometry_shader *gs)
+{
+   svga->curr.gs = gs;
+   svga->dirty |= SVGA_NEW_GS;
+}
+
+
+/**
+ * emulate_point_sprite searches the shader variants list to see it there is
+ * a shader variant with a token string that matches the emulation
+ * requirement. It there isn't, then it will use a tgsi utility
+ * tgsi_add_point_sprite to transform the original token string to support
+ * point sprite. A new geometry shader state will be created with the
+ * transformed token string and added to the shader variants list of the
+ * original geometry shader. The new geometry shader state will then be
+ * bound as the current geometry shader.
+ */
+static struct svga_shader *
+emulate_point_sprite(struct svga_context *svga,
+                     struct svga_shader *shader,
+                     const struct tgsi_token *tokens)
+{
+   struct svga_token_key key;
+   struct tgsi_token *new_tokens;
+   const struct tgsi_token *orig_tokens;
+   struct svga_geometry_shader *orig_gs = (struct svga_geometry_shader *)shader;
+   struct svga_geometry_shader *gs = NULL;
+   struct pipe_shader_state templ;
+   struct svga_stream_output *streamout = NULL;
+   int pos_out_index = -1;
+   int aa_point_coord_index = -1;
+
+   assert(tokens != NULL);
+
+   orig_tokens = tokens;
+
+   /* Create a token key */
+   memset(&key, 0, sizeof key);
+   key.gs.writes_psize = 1;
+   key.gs.sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
+
+   key.gs.sprite_origin_upper_left =
+      !(svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+
+   key.gs.aa_point = svga->curr.rast->templ.point_smooth;
+
+   if (orig_gs) {
+
+      /* Check if the original geometry shader has stream output and
+       * if position is one of the outputs.
+       */
+      streamout = orig_gs->base.stream_output;
+      if (streamout) {
+         pos_out_index = streamout->pos_out_index;
+         key.gs.point_pos_stream_out = pos_out_index != -1;
+      }
+
+      /* Search the shader lists to see if there is a variant that matches
+       * this token key.
+       */
+      gs = (struct svga_geometry_shader *)
+              svga_search_shader_token_key(&orig_gs->base, &key);
+   }
+
+   /* If there isn't, then call the tgsi utility tgsi_add_point_sprite
+    * to transform the original tokens to support point sprite.
+    * Flip the sprite origin as SVGA3D device only supports an
+    * upper-left origin.
+    */
+   if (!gs) {
+      new_tokens = tgsi_add_point_sprite(orig_tokens,
+                                         key.gs.sprite_coord_enable,
+                                         key.gs.sprite_origin_upper_left,
+                                         key.gs.point_pos_stream_out,
+                                         key.gs.aa_point ?
+                                            &aa_point_coord_index : NULL);
+
+      if (!new_tokens) {
+         /* if no new tokens are generated for whatever reason, just return */
+         return NULL;
+      }
+
+      if (0) {
+         debug_printf("Before tgsi_add_point_sprite ---------------\n");
+         tgsi_dump(orig_tokens, 0);
+         debug_printf("After tgsi_add_point_sprite --------------\n");
+         tgsi_dump(new_tokens, 0);
+      }
+
+      templ.tokens = new_tokens;
+      templ.stream_output.num_outputs = 0;
+
+      if (streamout) {
+         templ.stream_output = streamout->info;
+         /* The tgsi_add_point_sprite utility adds an extra output
+          * for the original point position for stream output purpose.
+          * We need to replace the position output register index in the
+          * stream output declaration with the new register index.
+          */
+         if (pos_out_index != -1) {
+            assert(orig_gs != NULL);
+            templ.stream_output.output[pos_out_index].register_index =
+               orig_gs->base.info.num_outputs;
+         }
+      }
+
+      /* Create a new geometry shader state with the new tokens */
+      gs = svga->pipe.create_gs_state(&svga->pipe, &templ);
+
+      /* Don't need the token string anymore. There is a local copy
+       * in the shader state.
+       */
+      FREE(new_tokens);
+
+      if (!gs) {
+         return NULL;
+      }
+
+      gs->wide_point = TRUE;
+      gs->aa_point_coord_index = aa_point_coord_index;
+      gs->base.token_key = key;
+      gs->base.parent = &orig_gs->base;
+      gs->base.next = NULL;
+
+      /* Add the new geometry shader to the head of the shader list
+       * pointed to by the original geometry shader.
+       */
+      if (orig_gs) {
+         gs->base.next = orig_gs->base.next;
+         orig_gs->base.next = &gs->base;
+      }
+   }
+
+   /* Bind the new geometry shader state */
+   bind_gs_state(svga, gs);
+
+   return &gs->base;
+}
+
+/**
+ * Generate a geometry shader that emits a wide point by drawing a quad.
+ * This function first creates a passthrough geometry shader and then
+ * calls emulate_point_sprite() to transform the geometry shader to
+ * support point sprite.
+ */
+static struct svga_shader *
+add_point_sprite_shader(struct svga_context *svga)
+{
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_geometry_shader *orig_gs = vs->gs;
+   struct svga_geometry_shader *new_gs;
+   const struct tgsi_token *tokens;
+
+   if (orig_gs == NULL) {
+
+      /* If this is the first time adding a geometry shader to this
+       * vertex shader to support point sprite, then create
+       * a passthrough geometry shader first.
+       */
+      orig_gs = (struct svga_geometry_shader *)
+                   util_make_geometry_passthrough_shader(
+                      &svga->pipe, vs->base.info.num_outputs,
+                      vs->base.info.output_semantic_name,
+                      vs->base.info.output_semantic_index);
+
+      if (!orig_gs)
+         return NULL;
+   }
+   else {
+      if (orig_gs->base.parent)
+         orig_gs = (struct svga_geometry_shader *)orig_gs->base.parent;
+   }
+   tokens = orig_gs->base.tokens;
+
+   /* Call emulate_point_sprite to find or create a transformed
+    * geometry shader for supporting point sprite.
+    */
+   new_gs = (struct svga_geometry_shader *)
+               emulate_point_sprite(svga, &orig_gs->base, tokens);
+
+   /* If this is the first time creating a geometry shader to
+    * support vertex point size, then add the new geometry shader
+    * to the vertex shader.
+    */
+   if (vs->gs == NULL) {
+      vs->gs = new_gs;
+   }
+
+   return &new_gs->base;
+}
+
+/* update_tgsi_transform provides a hook to transform a shader if needed.
+ */
+static enum pipe_error
+update_tgsi_transform(struct svga_context *svga, unsigned dirty)
+{
+   struct svga_geometry_shader *gs = svga->curr.user_gs;   /* current gs */
+   struct svga_vertex_shader *vs = svga->curr.vs;     /* currently bound vs */
+   struct svga_shader *orig_gs;                       /* original gs */
+   struct svga_shader *new_gs;                        /* new gs */
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) {
+      /* If the current prim type is POINTS and the current geometry shader
+       * emits wide points, transform the shader to emulate wide points using
+       * quads.
+       */
+      if (gs != NULL && (gs->base.info.writes_psize || gs->wide_point)) {
+         orig_gs = gs->base.parent ? gs->base.parent : &gs->base;
+         new_gs = emulate_point_sprite(svga, orig_gs, orig_gs->tokens);
+      }
+
+      /* If there is not an active geometry shader and the current vertex
+       * shader emits wide point then create a new geometry shader to emulate
+       * wide point.
+       */
+      else if (gs == NULL &&
+               (svga->curr.rast->pointsize > 1.0 ||
+                vs->base.info.writes_psize)) {
+         new_gs = add_point_sprite_shader(svga);
+      }
+      else {
+         /* use the user's GS */
+         bind_gs_state(svga, svga->curr.user_gs);
+      }
+   }
+   else if (svga->curr.gs != svga->curr.user_gs) {
+      /* If current primitive type is not POINTS, then make sure
+       * we don't bind to any of the generated geometry shader
+       */
+      bind_gs_state(svga, svga->curr.user_gs);
+   }
+   (void) new_gs;    /* silence the unused var warning */
+
+   return PIPE_OK;
+}
+
+struct svga_tracked_state svga_need_tgsi_transform =
+{
+   "transform shader for optimization",
+   (SVGA_NEW_VS |
+    SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_REDUCED_PRIMITIVE |
+    SVGA_NEW_RAST),
+   update_tgsi_transform
+};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_tss.c b/lib/mesa/src/gallium/drivers/svga/svga_state_tss.c
index 41334bd7c..4debbf166 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_tss.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_tss.c
@@ -31,24 +31,28 @@
 #include "svga_sampler_view.h"
 #include "svga_winsys.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_cmd.h"
 
 
+/**
+ * Called when tearing down a context to free resources and samplers.
+ */
 void svga_cleanup_tss_binding(struct svga_context *svga)
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
    unsigned i;
-   unsigned count = MAX2( svga->curr.num_sampler_views,
-                          svga->state.hw_draw.num_views );
 
-   for (i = 0; i < count; i++) {
+   for (i = 0; i < Elements(svga->state.hw_draw.views); i++) {
       struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-
-      svga_sampler_view_reference(&view->v, NULL);
-      pipe_sampler_view_release(&svga->pipe, &svga->curr.sampler_views[i]);
-      pipe_resource_reference( &view->texture, NULL );
-
-      view->dirty = 1;
+      if (view) {
+         svga_sampler_view_reference(&view->v, NULL);
+         pipe_sampler_view_release(&svga->pipe,
+                                   &svga->curr.sampler_views[shader][i]);
+         pipe_resource_reference(&view->texture, NULL);
+         view->dirty = TRUE;
+      }
    }
 }
 
@@ -63,73 +67,113 @@ struct bind_queue {
 };
 
 
+/**
+ * Update the texture binding for one texture unit.
+ */
+static void
+emit_tex_binding_unit(struct svga_context *svga,
+                      unsigned unit,
+                      const struct svga_sampler_state *s,
+                      const struct pipe_sampler_view *sv,
+                      struct svga_hw_view_state *view,
+                      boolean reemit,
+                      struct bind_queue *queue)
+{
+   struct pipe_resource *texture = NULL;
+   unsigned last_level, min_lod, max_lod;
+
+   /* get min max lod */
+   if (sv && s) {
+      if (s->mipfilter == SVGA3D_TEX_FILTER_NONE) {
+         /* just use the base level image */
+         min_lod = max_lod = sv->u.tex.first_level;
+      }
+      else {
+         last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level);
+         min_lod = s->view_min_lod + sv->u.tex.first_level;
+         min_lod = MIN2(min_lod, last_level);
+         max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level);
+      }
+      texture = sv->texture;
+   }
+   else {
+      min_lod = 0;
+      max_lod = 0;
+   }
+
+   if (view->texture != texture ||
+       view->min_lod != min_lod ||
+       view->max_lod != max_lod) {
+
+      svga_sampler_view_reference(&view->v, NULL);
+      pipe_resource_reference( &view->texture, texture );
+
+      view->dirty = TRUE;
+      view->min_lod = min_lod;
+      view->max_lod = max_lod;
+
+      if (texture) {
+         view->v = svga_get_tex_sampler_view(&svga->pipe,
+                                             texture,
+                                             min_lod,
+                                             max_lod);
+      }
+   }
+
+   /*
+    * We need to reemit non-null texture bindings, even when they are not
+    * dirty, to ensure that the resources are paged in.
+    */
+   if (view->dirty || (reemit && view->v)) {
+      queue->bind[queue->bind_count].unit = unit;
+      queue->bind[queue->bind_count].view = view;
+      queue->bind_count++;
+   }
+
+   if (!view->dirty && view->v) {
+      svga_validate_sampler_view(svga, view->v);
+   }
+}
+
+
 static enum pipe_error
 update_tss_binding(struct svga_context *svga, 
                    unsigned dirty )
 {
-   boolean reemit = svga->rebind.texture_samplers;
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
+   boolean reemit = svga->rebind.flags.texture_samplers;
    unsigned i;
-   unsigned count = MAX2( svga->curr.num_sampler_views,
+   unsigned count = MAX2( svga->curr.num_sampler_views[shader],
                           svga->state.hw_draw.num_views );
-   unsigned min_lod;
-   unsigned max_lod;
 
    struct bind_queue queue;
 
+   if (svga_have_vgpu10(svga))
+      return PIPE_OK;
+
    queue.bind_count = 0;
    
    for (i = 0; i < count; i++) {
-      const struct svga_sampler_state *s = svga->curr.sampler[i];
-      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-      struct pipe_resource *texture = NULL;
-      struct pipe_sampler_view *sv = svga->curr.sampler_views[i];
-
-      /* get min max lod */
-      if (sv && s) {
-         min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
-         max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level,
-                        sv->texture->last_level);
-         texture = sv->texture;
-      } else {
-         min_lod = 0;
-         max_lod = 0;
-      }
-
-      if (view->texture != texture ||
-          view->min_lod != min_lod ||
-          view->max_lod != max_lod) {
-
-         svga_sampler_view_reference(&view->v, NULL);
-         pipe_resource_reference( &view->texture, texture );
-
-         view->dirty = TRUE;
-         view->min_lod = min_lod;
-         view->max_lod = max_lod;
-
-         if (texture)
-            view->v = svga_get_tex_sampler_view(&svga->pipe, 
-                                                texture, 
-                                                min_lod,
-                                                max_lod);
-      }
-
-      /*
-       * We need to reemit non-null texture bindings, even when they are not
-       * dirty, to ensure that the resources are paged in.
-       */
-
-      if (view->dirty ||
-          (reemit && view->v)) {
-         queue.bind[queue.bind_count].unit = i;
-         queue.bind[queue.bind_count].view = view;
-         queue.bind_count++;
-      } 
-      if (!view->dirty && view->v) {
-         svga_validate_sampler_view(svga, view->v);
-      }
+      emit_tex_binding_unit(svga, i,
+                            svga->curr.sampler[shader][i],
+                            svga->curr.sampler_views[shader][i],
+                            &svga->state.hw_draw.views[i],
+                            reemit,
+                            &queue);
    }
 
-   svga->state.hw_draw.num_views = svga->curr.num_sampler_views;
+   svga->state.hw_draw.num_views = svga->curr.num_sampler_views[shader];
+
+   /* Polygon stipple */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      emit_tex_binding_unit(svga, unit,
+                            svga->polygon_stipple.sampler,
+                            &svga->polygon_stipple.sampler_view->base,
+                            &svga->state.hw_draw.views[unit],
+                            reemit,
+                            &queue);
+   }
 
    if (queue.bind_count) {
       SVGA3dTextureState *ts;
@@ -163,7 +207,7 @@ update_tss_binding(struct svga_context *svga,
       SVGA_FIFOCommitAll( svga->swc );
    }
 
-   svga->rebind.texture_samplers = FALSE;
+   svga->rebind.flags.texture_samplers = FALSE;
 
    return PIPE_OK;
 
@@ -187,7 +231,8 @@ svga_reemit_tss_bindings(struct svga_context *svga)
    enum pipe_error ret;
    struct bind_queue queue;
 
-   assert(svga->rebind.texture_samplers);
+   assert(!svga_have_vgpu10(svga));
+   assert(svga->rebind.flags.texture_samplers);
 
    queue.bind_count = 0;
 
@@ -201,6 +246,18 @@ svga_reemit_tss_bindings(struct svga_context *svga)
       }
    }
 
+   /* Polygon stipple */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      struct svga_hw_view_state *view = &svga->state.hw_draw.views[unit];
+
+      if (view->v) {
+         queue.bind[queue.bind_count].unit = unit;
+         queue.bind[queue.bind_count].view = view;
+         queue.bind_count++;
+      }
+   }
+
    if (queue.bind_count) {
       SVGA3dTextureState *ts;
 
@@ -229,7 +286,7 @@ svga_reemit_tss_bindings(struct svga_context *svga)
       SVGA_FIFOCommitAll(svga->swc);
    }
 
-   svga->rebind.texture_samplers = FALSE;
+   svga->rebind.flags.texture_samplers = FALSE;
 
    return PIPE_OK;
 }
@@ -238,6 +295,7 @@ svga_reemit_tss_bindings(struct svga_context *svga)
 struct svga_tracked_state svga_hw_tss_binding = {
    "texture binding emit",
    SVGA_NEW_TEXTURE_BINDING |
+   SVGA_NEW_STIPPLE |
    SVGA_NEW_SAMPLER,
    update_tss_binding
 };
@@ -252,78 +310,98 @@ struct ts_queue {
 };
 
 
-#define EMIT_TS(svga, unit, val, token, fail)                           \
+static inline void
+svga_queue_tss( struct ts_queue *q,
+                unsigned unit,
+                unsigned tss,
+                unsigned value )
+{
+   assert(q->ts_count < ARRAY_SIZE(q->ts));
+   q->ts[q->ts_count].stage = unit;
+   q->ts[q->ts_count].name = tss;
+   q->ts[q->ts_count].value = value;
+   q->ts_count++;
+}
+
+
+#define EMIT_TS(svga, unit, val, token)                                 \
 do {                                                                    \
    assert(unit < Elements(svga->state.hw_draw.ts));                     \
    assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
    if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
-      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga_queue_tss( queue, unit, SVGA3D_TS_##token, val );            \
       svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
    }                                                                    \
 } while (0)
 
-#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail)                  \
+#define EMIT_TS_FLOAT(svga, unit, fvalue, token)                        \
 do {                                                                    \
    unsigned val = fui(fvalue);                                          \
    assert(unit < Elements(svga->state.hw_draw.ts));                     \
    assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
    if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
-      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga_queue_tss( queue, unit, SVGA3D_TS_##token, val );            \
       svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
    }                                                                    \
 } while (0)
 
 
-static inline void 
-svga_queue_tss( struct ts_queue *q,
-                unsigned unit,
-                unsigned tss,
-                unsigned value )
+/**
+ * Emit texture sampler state (tss) for one texture unit.
+ */
+static void
+emit_tss_unit(struct svga_context *svga, unsigned unit,
+              const struct svga_sampler_state *state,
+              struct ts_queue *queue)
 {
-   assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
-   q->ts[q->ts_count].stage = unit;
-   q->ts[q->ts_count].name = tss;
-   q->ts[q->ts_count].value = value;
-   q->ts_count++;
+   EMIT_TS(svga, unit, state->mipfilter, MIPFILTER);
+   EMIT_TS(svga, unit, state->min_lod, TEXTURE_MIPMAP_LEVEL);
+   EMIT_TS(svga, unit, state->magfilter, MAGFILTER);
+   EMIT_TS(svga, unit, state->minfilter, MINFILTER);
+   EMIT_TS(svga, unit, state->aniso_level, TEXTURE_ANISOTROPIC_LEVEL);
+   EMIT_TS_FLOAT(svga, unit, state->lod_bias, TEXTURE_LOD_BIAS);
+   EMIT_TS(svga, unit, state->addressu, ADDRESSU);
+   EMIT_TS(svga, unit, state->addressw, ADDRESSW);
+   EMIT_TS(svga, unit, state->bordercolor, BORDERCOLOR);
+   // TEXCOORDINDEX -- hopefully not needed
+
+   if (svga->curr.tex_flags.flag_1d & (1 << unit))
+      EMIT_TS(svga, unit, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV);
+   else
+      EMIT_TS(svga, unit, state->addressv, ADDRESSV);
+
+   if (svga->curr.tex_flags.flag_srgb & (1 << unit))
+      EMIT_TS_FLOAT(svga, unit, 2.2f, GAMMA);
+   else
+      EMIT_TS_FLOAT(svga, unit, 1.0f, GAMMA);
 }
 
-
 static enum pipe_error
 update_tss(struct svga_context *svga, 
            unsigned dirty )
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
    unsigned i;
    struct ts_queue queue;
 
-   queue.ts_count = 0;
-   for (i = 0; i < svga->curr.num_samplers; i++) {
-      if (svga->curr.sampler[i]) {
-         const struct svga_sampler_state *curr = svga->curr.sampler[i];
-
-         EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail);
-         EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail);
-         EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail);
-         EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail);
-         EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail);
-         EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail);
-         EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail);
-         EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail);
-         EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail);
-         // TEXCOORDINDEX -- hopefully not needed
-
-         if (svga->curr.tex_flags.flag_1d & (1 << i)) {
-            EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
-         }
-         else
-            EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail);
-
-         if (svga->curr.tex_flags.flag_srgb & (1 << i))
-            EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail);
-         else
-            EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail);
+   if (svga_have_vgpu10(svga))
+      return PIPE_OK;
 
+   queue.ts_count = 0;
+   for (i = 0; i < svga->curr.num_samplers[shader]; i++) {
+      if (svga->curr.sampler[shader][i]) {
+         const struct svga_sampler_state *curr = svga->curr.sampler[shader][i];
+         emit_tss_unit(svga, i, curr, &queue);
       }
    }
+
+   /* polygon stipple sampler */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      emit_tss_unit(svga,
+                    svga->state.hw_draw.fs->pstipple_sampler_unit,
+                    svga->polygon_stipple.sampler,
+                    &queue);
+   }
  
    if (queue.ts_count) {
       SVGA3dTextureState *ts;
@@ -357,6 +435,7 @@ fail:
 struct svga_tracked_state svga_hw_tss = {
    "texture state emit",
    (SVGA_NEW_SAMPLER |
+    SVGA_NEW_STIPPLE |
     SVGA_NEW_TEXTURE_FLAGS),
    update_tss
 };
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_vdecl.c b/lib/mesa/src/gallium/drivers/svga/svga_state_vdecl.c
index a33eda383..e1b6a1c2a 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -33,6 +33,7 @@
 #include "svga_draw.h"
 #include "svga_tgsi.h"
 #include "svga_screen.h"
+#include "svga_shader.h"
 #include "svga_resource_buffer.h"
 #include "svga_hw_reg.h"
 
@@ -42,16 +43,14 @@ static enum pipe_error
 emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
 {
    const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+   SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX];
+   unsigned buffer_indexes[SVGA3D_INPUTREG_MAX];
    unsigned i;
    unsigned neg_bias = 0;
 
    assert(svga->curr.velems->count >=
           svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
 
-   /* specify number of vertex element declarations to come */
-   svga_hwtnl_reset_vdecl( svga->hwtnl,
-                           svga->curr.velems->count );
-
    /**
     * We can't set the VDECL offset to something negative, so we
     * must calculate a common negative additional index bias, and modify
@@ -70,15 +69,16 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
    for (i = 0; i < svga->curr.velems->count; i++) {
       const struct pipe_vertex_buffer *vb =
          &svga->curr.vb[ve[i].vertex_buffer_index];
-      const struct svga_buffer *buffer;
+      struct svga_buffer *buffer;
       unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+      unsigned tmp_neg_bias = 0;
 
       if (!vb->buffer)
          continue;
 
       buffer = svga_buffer(vb->buffer);
       if (buffer->uploaded.start > offset) {
-         unsigned tmp_neg_bias = buffer->uploaded.start - offset;
+         tmp_neg_bias = buffer->uploaded.start - offset;
          if (vb->stride)
             tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
          neg_bias = MAX2(neg_bias, tmp_neg_bias);
@@ -89,8 +89,7 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
       const struct pipe_vertex_buffer *vb =
          &svga->curr.vb[ve[i].vertex_buffer_index];
       unsigned usage, index;
-      const struct svga_buffer *buffer;
-      SVGA3dVertexDecl decl;
+      struct svga_buffer *buffer;
 
       if (!vb->buffer)
          continue;
@@ -100,29 +99,37 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
 
       /* SVGA_NEW_VELEMENT
        */
-      decl.identity.type = svga->curr.velems->decl_type[i];
-      decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT;
-      decl.identity.usage = usage;
-      decl.identity.usageIndex = index;
-      decl.array.stride = vb->stride;
+      decls[i].identity.type = svga->curr.velems->decl_type[i];
+      decls[i].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
+      decls[i].identity.usage = usage;
+      decls[i].identity.usageIndex = index;
+      decls[i].array.stride = vb->stride;
 
       /* Compensate for partially uploaded vbo, and
        * for the negative index bias.
        */
-      decl.array.offset = (vb->buffer_offset
+      decls[i].array.offset = (vb->buffer_offset
                            + ve[i].src_offset
 			   + neg_bias * vb->stride
 			   - buffer->uploaded.start);
 
-      assert(decl.array.offset >= 0);
+      assert(decls[i].array.offset >= 0);
+
+      buffer_indexes[i] = ve[i].vertex_buffer_index;
 
-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &decl,
-                        buffer->uploaded.buffer ? buffer->uploaded.buffer :
-                        vb->buffer );
+      assert(!buffer->uploaded.buffer);
    }
 
+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga->curr.velems->count,
+                           decls,
+                           buffer_indexes,
+                           svga->curr.velems->id);
+
+   svga_hwtnl_vertex_buffers(svga->hwtnl,
+                             svga->curr.num_vertex_buffers,
+                             svga->curr.vb);
+
    svga_hwtnl_set_index_bias( svga->hwtnl, -(int) neg_bias );
    return PIPE_OK;
 }
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c
index c2a0f1ee6..a103dab25 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c
@@ -25,7 +25,6 @@
 
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
-#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_bitmask.h"
@@ -41,33 +40,6 @@
 #include "svga_hw_reg.h"
 
 
-static inline int
-compare_vs_keys(const struct svga_vs_compile_key *a,
-                const struct svga_vs_compile_key *b)
-{
-   unsigned keysize = svga_vs_key_size( a );
-   return memcmp( a, b, keysize );
-}
-
-
-/** Search for a vertex shader variant */
-static struct svga_shader_variant *
-search_vs_key(const struct svga_vertex_shader *vs,
-              const struct svga_vs_compile_key *key)
-{
-   struct svga_shader_variant *variant = vs->base.variants;
-
-   assert(key);
-
-   for ( ; variant; variant = variant->next) {
-      if (compare_vs_keys( key, &variant->key.vkey ) == 0)
-         return variant;
-   }
-   
-   return NULL;
-}
-
-
 /**
  * If we fail to compile a vertex shader we'll use a dummy/fallback shader
  * that simply emits a (0,0,0,1) vertex position.
@@ -99,13 +71,30 @@ get_dummy_vertex_shader(void)
 }
 
 
+static struct svga_shader_variant *
+translate_vertex_program(struct svga_context *svga,
+                         const struct svga_vertex_shader *vs,
+                         const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &vs->base, key,
+                                        PIPE_SHADER_VERTEX);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(svga, &vs->base, key,
+                                       PIPE_SHADER_VERTEX);
+   }
+}
+
+
 /**
  * Replace the given shader's instruction with a simple / dummy shader.
  * We use this when normal shader translation fails.
  */
 static struct svga_shader_variant *
-get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
-                                 const struct svga_vs_compile_key *key)
+get_compiled_dummy_vertex_shader(struct svga_context *svga,
+                                 struct svga_vertex_shader *vs,
+                                 const struct svga_compile_key *key)
 {
    const struct tgsi_token *dummy = get_dummy_vertex_shader();
    struct svga_shader_variant *variant;
@@ -117,7 +106,7 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
    FREE((void *) vs->base.tokens);
    vs->base.tokens = dummy;
 
-   variant = svga_translate_vertex_program(vs, key);
+   variant = translate_vertex_program(svga, vs, key);
    return variant;
 }
 
@@ -128,69 +117,90 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
 static enum pipe_error
 compile_vs(struct svga_context *svga,
            struct svga_vertex_shader *vs,
-           const struct svga_vs_compile_key *key,
+           const struct svga_compile_key *key,
            struct svga_shader_variant **out_variant)
 {
    struct svga_shader_variant *variant;
    enum pipe_error ret = PIPE_ERROR;
 
-   variant = svga_translate_vertex_program( vs, key );
+   variant = translate_vertex_program(svga, vs, key);
    if (variant == NULL) {
-      /* some problem during translation, try the dummy shader */
-      variant = get_compiled_dummy_vertex_shader(vs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+      debug_printf("Failed to compile vertex shader,"
+                   " using dummy shader instead.\n");
+      variant = get_compiled_dummy_vertex_shader(svga, vs, key);
    }
-
-   if (svga_shader_too_large(svga, variant)) {
+   else if (svga_shader_too_large(svga, variant)) {
       /* too big, use dummy shader */
-      debug_printf("Shader too large (%lu bytes),"
+      debug_printf("Shader too large (%u bytes),"
                    " using dummy shader instead.\n",
-                   (unsigned long ) variant->nr_tokens
-                   * sizeof(variant->tokens[0]));
-      variant = get_compiled_dummy_vertex_shader(vs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+                   (unsigned) (variant->nr_tokens
+                               * sizeof(variant->tokens[0])));
+      /* Free the too-large variant */
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      /* Use simple pass-through shader instead */
+      variant = get_compiled_dummy_vertex_shader(svga, vs, key);
+   }
+
+   if (!variant) {
+      return PIPE_ERROR;
    }
 
    ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
-   if (ret != PIPE_OK)
-      goto fail;
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      return ret;
+   }
 
    *out_variant = variant;
 
-   /* insert variants at head of linked list */
-   variant->next = vs->base.variants;
-   vs->base.variants = variant;
-
    return PIPE_OK;
-
-fail:
-   if (variant) {
-      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
-   }
-   return ret;
 }
 
+
 /* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_FS
  */
 static void
-make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key)
+make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
 {
+   const unsigned shader = PIPE_SHADER_VERTEX;
+
    memset(key, 0, sizeof *key);
-   key->need_prescale = svga->state.hw_clear.prescale.enabled;
-   key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
+
+   if (svga->state.sw.need_swtnl && svga_have_vgpu10(svga)) {
+      /* Set both of these flags, to match compile_passthrough_vs() */
+      key->vs.passthrough = 1;
+      key->vs.undo_viewport = 1;
+      return;
+   }
+
+   /* SVGA_NEW_PRESCALE */
+   key->vs.need_prescale = svga->state.hw_clear.prescale.enabled &&
+                           (svga->curr.gs == NULL);
+
+   /* SVGA_NEW_RAST */
+   key->vs.allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
 
    /* SVGA_NEW_FS */
-   key->fs_generic_inputs = svga->curr.fs->generic_inputs;
+   key->vs.fs_generic_inputs = svga->curr.fs->generic_inputs;
+
+   svga_remap_generics(key->vs.fs_generic_inputs, key->generic_remap_table);
 
    /* SVGA_NEW_VELEMENT */
-   key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
-   key->adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+   key->vs.adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
+   key->vs.adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+   key->vs.attrib_is_pure_int = svga->curr.velems->attrib_is_pure_int;
+   key->vs.adjust_attrib_itof = svga->curr.velems->adjust_attrib_itof;
+   key->vs.adjust_attrib_utof = svga->curr.velems->adjust_attrib_utof;
+   key->vs.attrib_is_bgra = svga->curr.velems->attrib_is_bgra;
+   key->vs.attrib_puint_to_snorm = svga->curr.velems->attrib_puint_to_snorm;
+   key->vs.attrib_puint_to_uscaled = svga->curr.velems->attrib_puint_to_uscaled;
+   key->vs.attrib_puint_to_sscaled = svga->curr.velems->attrib_puint_to_sscaled;
+
+   /* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */
+   svga_init_shader_key_common(svga, shader, key);
+
+   /* SVGA_NEW_RAST */
+   key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
 }
 
 
@@ -201,63 +211,196 @@ enum pipe_error
 svga_reemit_vs_bindings(struct svga_context *svga)
 {
    enum pipe_error ret;
-   struct svga_winsys_gb_shader *gbshader =
-      svga->state.hw_draw.vs ? svga->state.hw_draw.vs->gb_shader : NULL;
+   struct svga_winsys_gb_shader *gbshader = NULL;
+   SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;
 
-   assert(svga->rebind.vs);
+   assert(svga->rebind.flags.vs);
    assert(svga_have_gb_objects(svga));
 
-   ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+   if (svga->state.hw_draw.vs) {
+      gbshader = svga->state.hw_draw.vs->gb_shader;
+      shaderId = svga->state.hw_draw.vs->id;
+   }
+
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   if (svga_have_vgpu10(svga))
+      ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS,
+                                    gbshader, shaderId);
+   else
+      ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+
+ out:
    if (ret != PIPE_OK)
       return ret;
 
-   svga->rebind.vs = FALSE;
+   svga->rebind.flags.vs = FALSE;
    return PIPE_OK;
 }
 
 
+/**
+ * The current vertex shader is already executed by the 'draw'
+ * module, so we just need to generate a simple vertex shader
+ * to pass through all those VS outputs that will
+ * be consumed by the fragment shader.
+ * Used when we employ the 'draw' module.
+ */
 static enum pipe_error
-emit_hw_vs(struct svga_context *svga, unsigned dirty)
+compile_passthrough_vs(struct svga_context *svga,
+                       struct svga_vertex_shader *vs,
+                       struct svga_fragment_shader *fs,
+                       struct svga_shader_variant **out_variant)
 {
    struct svga_shader_variant *variant = NULL;
+   unsigned num_inputs;
+   unsigned i;
+   unsigned num_elements;
+   struct svga_vertex_shader new_vs;
+   struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_program *ureg;
+   unsigned num_tokens;
+   struct svga_compile_key key;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+   assert(fs);
+
+   num_inputs = fs->base.info.num_inputs;
+
+   ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!ureg)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* draw will always add position */
+   dst[0] = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+   src[0] = ureg_DECL_vs_input(ureg, 0);
+   num_elements = 1;
+
+   /**
+    * swtnl backend redefines the input layout based on the
+    * fragment shader's inputs. So we only need to passthrough
+    * those inputs that will be consumed by the fragment shader.
+    * Note: DX10 requires the number of vertex elements
+    * specified in the input layout to be no less than the
+    * number of inputs to the vertex shader.
+    */
+   for (i = 0; i < num_inputs; i++) {
+      switch (fs->base.info.input_semantic_name[i]) {
+      case TGSI_SEMANTIC_COLOR:
+      case TGSI_SEMANTIC_GENERIC:
+      case TGSI_SEMANTIC_FOG:
+         dst[num_elements] = ureg_DECL_output(ureg,
+                                fs->base.info.input_semantic_name[i],
+                                fs->base.info.input_semantic_index[i]);
+         src[num_elements] = ureg_DECL_vs_input(ureg, num_elements);
+         num_elements++;
+         break;
+      default:
+         break;
+      }
+   }
+
+   for (i = 0; i < num_elements; i++) {
+      ureg_MOV(ureg, dst[i], src[i]);
+   }
+
+   ureg_END(ureg);
+
+   memset(&new_vs, 0, sizeof(new_vs));
+   new_vs.base.tokens = ureg_get_tokens(ureg, &num_tokens);
+   tgsi_scan_shader(new_vs.base.tokens, &new_vs.base.info);
+
+   memset(&key, 0, sizeof(key));
+   key.vs.undo_viewport = 1;
+
+   ret = compile_vs(svga, &new_vs, &key, &variant);
+   if (ret != PIPE_OK)
+      return ret;
+
+   ureg_free_tokens(new_vs.base.tokens);
+   ureg_destroy(ureg);
+
+   /* Overwrite the variant key to indicate it's a pass-through VS */
+   memset(&variant->key, 0, sizeof(variant->key));
+   variant->key.vs.passthrough = 1;
+   variant->key.vs.undo_viewport = 1;
+
+   *out_variant = variant;
+
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+emit_hw_vs(struct svga_context *svga, unsigned dirty)
+{
+   struct svga_shader_variant *variant;
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_fragment_shader *fs = svga->curr.fs;
    enum pipe_error ret = PIPE_OK;
+   struct svga_compile_key key;
+
+   /* If there is an active geometry shader, and it has stream output
+    * defined, then we will skip the stream output from the vertex shader
+    */
+   if (!svga_have_gs_streamout(svga)) {
+      /* No GS stream out */
+      if (svga_have_vs_streamout(svga)) {
+         /* Set VS stream out */
+         svga_set_stream_output(svga, vs->base.stream_output);
+      }
+      else {
+         /* turn off stream out */
+         svga_set_stream_output(svga, NULL);
+      }
+   }
 
    /* SVGA_NEW_NEED_SWTNL */
-   if (!svga->state.sw.need_swtnl) {
-      struct svga_vertex_shader *vs = svga->curr.vs;
-      struct svga_vs_compile_key key;
+   if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+      /* No vertex shader is needed */
+      variant = NULL;
+   }
+   else {
+      make_vs_key(svga, &key);
 
-      make_vs_key( svga, &key );
+      /* See if we already have a VS variant that matches the key */
+      variant = svga_search_shader_key(&vs->base, &key);
 
-      variant = search_vs_key( vs, &key );
       if (!variant) {
-         ret = compile_vs( svga, vs, &key, &variant );
+         /* Create VS variant now */
+         if (key.vs.passthrough) {
+            ret = compile_passthrough_vs(svga, vs, fs, &variant);
+         }
+         else {
+            ret = compile_vs(svga, vs, &key, &variant);
+         }
          if (ret != PIPE_OK)
             return ret;
-      }
 
-      assert(variant);
+         /* insert the new variant at head of linked list */
+         assert(variant);
+         variant->next = vs->base.variants;
+         vs->base.variants = variant;
+      }
    }
 
    if (variant != svga->state.hw_draw.vs) {
-      if (svga_have_gb_objects(svga)) {
-         struct svga_winsys_gb_shader *gbshader =
-            variant ? variant->gb_shader : NULL;
-         ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
-         if (ret != PIPE_OK)
-            return ret;
-
-         svga->rebind.vs = FALSE;
-      }
-      else {
-         unsigned id = variant ? variant->id : SVGA_ID_INVALID;
-         ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS, id);
+      /* Bind the new variant */
+      if (variant) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
          if (ret != PIPE_OK)
             return ret;
+         svga->rebind.flags.vs = FALSE;
       }
 
       svga->dirty |= SVGA_NEW_VS_VARIANT;
-      svga->state.hw_draw.vs = variant;      
+      svga->state.hw_draw.vs = variant;
    }
 
    return PIPE_OK;
@@ -268,6 +411,9 @@ struct svga_tracked_state svga_hw_vs =
    "vertex shader (hwtnl)",
    (SVGA_NEW_VS |
     SVGA_NEW_FS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_RAST |
     SVGA_NEW_PRESCALE |
     SVGA_NEW_VELEMENT |
     SVGA_NEW_NEED_SWTNL),
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_streamout.h b/lib/mesa/src/gallium/drivers/svga/svga_streamout.h
new file mode 100644
index 000000000..da0c4457d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_streamout.h
@@ -0,0 +1,50 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_STREAMOUT_H
+#define SVGA_STREAMOUT_H
+
+struct svga_shader;
+
+struct svga_stream_output {
+   struct pipe_stream_output_info info;
+   unsigned pos_out_index;                  // position output index
+   unsigned id;
+};
+
+struct svga_stream_output *
+svga_create_stream_output(struct svga_context *svga,
+                          struct svga_shader *shader,
+                          const struct pipe_stream_output_info *info);
+
+enum pipe_error
+svga_set_stream_output(struct svga_context *svga,
+                       struct svga_stream_output *streamout);
+
+void
+svga_delete_stream_output(struct svga_context *svga,
+                          struct svga_stream_output *streamout);
+
+#endif /* SVGA_STREAMOUT_H */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_surface.c b/lib/mesa/src/gallium/drivers/svga/svga_surface.c
index 85d015460..ad06a1d53 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_surface.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_surface.c
@@ -29,6 +29,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
 #include "os/os_thread.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
@@ -36,19 +37,21 @@
 #include "svga_format.h"
 #include "svga_screen.h"
 #include "svga_context.h"
+#include "svga_sampler_view.h"
 #include "svga_resource_texture.h"
 #include "svga_surface.h"
 #include "svga_debug.h"
 
+static void svga_mark_surface_dirty(struct pipe_surface *surf);
 
 void
 svga_texture_copy_handle(struct svga_context *svga,
                          struct svga_winsys_surface *src_handle,
                          unsigned src_x, unsigned src_y, unsigned src_z,
-                         unsigned src_level, unsigned src_face,
+                         unsigned src_level, unsigned src_layer,
                          struct svga_winsys_surface *dst_handle,
                          unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                         unsigned dst_level, unsigned dst_face,
+                         unsigned dst_level, unsigned dst_layer,
                          unsigned width, unsigned height, unsigned depth)
 {
    struct svga_surface dst, src;
@@ -59,12 +62,12 @@ svga_texture_copy_handle(struct svga_context *svga,
 
    src.handle = src_handle;
    src.real_level = src_level;
-   src.real_face = src_face;
+   src.real_layer = src_layer;
    src.real_zslice = 0;
 
    dst.handle = dst_handle;
    dst.real_level = dst_level;
-   dst.real_face = dst_face;
+   dst.real_layer = dst_layer;
    dst.real_zslice = 0;
 
    box.x = dst_x;
@@ -103,11 +106,13 @@ svga_texture_copy_handle(struct svga_context *svga,
 struct svga_winsys_surface *
 svga_texture_view_surface(struct svga_context *svga,
                           struct svga_texture *tex,
+                          unsigned bind_flags,
                           SVGA3dSurfaceFlags flags,
                           SVGA3dSurfaceFormat format,
                           unsigned start_mip,
                           unsigned num_mip,
-                          int face_pick,
+                          int layer_pick,
+                          unsigned num_layers,
                           int zslice_pick,
                           struct svga_host_surface_cache_key *key) /* OUT */
 {
@@ -117,8 +122,8 @@ svga_texture_view_surface(struct svga_context *svga,
    unsigned z_offset = 0;
 
    SVGA_DBG(DEBUG_PERF, 
-            "svga: Create surface view: face %d zslice %d mips %d..%d\n",
-            face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
+            "svga: Create surface view: layer %d zslice %d mips %d..%d\n",
+            layer_pick, zslice_pick, start_mip, start_mip+num_mip-1);
 
    key->flags = flags;
    key->format = format;
@@ -127,12 +132,20 @@ svga_texture_view_surface(struct svga_context *svga,
    key->size.height = u_minify(tex->b.b.height0, start_mip);
    key->size.depth = zslice_pick < 0 ? u_minify(tex->b.b.depth0, start_mip) : 1;
    key->cachable = 1;
+   key->arraySize = 1;
+   key->numFaces = 1;
+   key->sampleCount = tex->b.b.nr_samples;
+
+   if (key->sampleCount > 1) {
+      key->flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
+   }
    
-   if (tex->b.b.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
+   if (tex->b.b.target == PIPE_TEXTURE_CUBE && layer_pick < 0) {
       key->flags |= SVGA3D_SURFACE_CUBEMAP;
       key->numFaces = 6;
-   } else {
-      key->numFaces = 1;
+   } else if (tex->b.b.target == PIPE_TEXTURE_1D_ARRAY ||
+              tex->b.b.target == PIPE_TEXTURE_2D_ARRAY) {
+      key->arraySize = num_layers;
    }
 
    if (key->format == SVGA3D_FORMAT_INVALID) {
@@ -141,7 +154,7 @@ svga_texture_view_surface(struct svga_context *svga,
    }
 
    SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n");
-   handle = svga_screen_surface_create(ss, key);
+   handle = svga_screen_surface_create(ss, bind_flags, PIPE_USAGE_DEFAULT, key);
    if (!handle) {
       key->cachable = 0;
       return NULL;
@@ -149,15 +162,15 @@ svga_texture_view_surface(struct svga_context *svga,
 
    SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle);
 
-   if (face_pick < 0)
-      face_pick = 0;
+   if (layer_pick < 0)
+      layer_pick = 0;
 
    if (zslice_pick >= 0)
       z_offset = zslice_pick;
 
    for (i = 0; i < key->numMipLevels; i++) {
-      for (j = 0; j < key->numFaces; j++) {
-         if (svga_is_texture_level_defined(tex, j + face_pick, i + start_mip)) {
+      for (j = 0; j < key->numFaces * key->arraySize; j++) {
+         if (svga_is_texture_level_defined(tex, j + layer_pick, i + start_mip)) {
             unsigned depth = (zslice_pick < 0 ?
                               u_minify(tex->b.b.depth0, i + start_mip) :
                               1);
@@ -166,7 +179,7 @@ svga_texture_view_surface(struct svga_context *svga,
                                      tex->handle, 
                                      0, 0, z_offset, 
                                      i + start_mip, 
-                                     j + face_pick,
+                                     j + layer_pick,
                                      handle, 0, 0, 0, i, j,
                                      u_minify(tex->b.b.width0, i + start_mip),
                                      u_minify(tex->b.b.height0, i + start_mip),
@@ -179,33 +192,43 @@ svga_texture_view_surface(struct svga_context *svga,
 }
 
 
+/**
+ * A helper function to create a surface view.
+ * The view boolean flag specifies whether svga_texture_view_surface()
+ * will be called to create a cloned surface and resource for the view.
+ */
 static struct pipe_surface *
-svga_create_surface(struct pipe_context *pipe,
-                    struct pipe_resource *pt,
-                    const struct pipe_surface *surf_tmpl)
+svga_create_surface_view(struct pipe_context *pipe,
+                         struct pipe_resource *pt,
+                         const struct pipe_surface *surf_tmpl,
+                         boolean view)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_texture *tex = svga_texture(pt);
    struct pipe_screen *screen = pipe->screen;
    struct svga_screen *ss = svga_screen(screen);
    struct svga_surface *s;
-   unsigned face, zslice;
-   boolean view = FALSE;
-   SVGA3dSurfaceFlags flags;
+   unsigned layer, zslice, bind;
+   unsigned nlayers = 1;
+   SVGA3dSurfaceFlags flags = 0;
    SVGA3dSurfaceFormat format;
 
-   assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
-
    s = CALLOC_STRUCT(svga_surface);
    if (!s)
       return NULL;
 
    if (pt->target == PIPE_TEXTURE_CUBE) {
-      face = surf_tmpl->u.tex.first_layer;
+      layer = surf_tmpl->u.tex.first_layer;
       zslice = 0;
    }
+   else if (pt->target == PIPE_TEXTURE_1D_ARRAY ||
+            pt->target == PIPE_TEXTURE_2D_ARRAY) {
+      layer = surf_tmpl->u.tex.first_layer;
+      zslice = 0;
+      nlayers = surf_tmpl->u.tex.last_layer - surf_tmpl->u.tex.first_layer + 1;
+   }
    else {
-      face = 0;
+      layer = 0;
       zslice = surf_tmpl->u.tex.first_layer;
    }
 
@@ -218,25 +241,100 @@ svga_create_surface(struct pipe_context *pipe,
    s->base.u.tex.level = surf_tmpl->u.tex.level;
    s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
    s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+   s->view_id = SVGA3D_INVALID_ID;
+
+   s->backed = NULL;
 
    if (util_format_is_depth_or_stencil(surf_tmpl->format)) {
-      flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+      flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL |
+              SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+      bind = PIPE_BIND_DEPTH_STENCIL;
    }
    else {
-      flags = SVGA3D_SURFACE_HINT_RENDERTARGET;
+      flags = SVGA3D_SURFACE_HINT_RENDERTARGET |
+              SVGA3D_SURFACE_BIND_RENDER_TARGET;
+      bind = PIPE_BIND_RENDER_TARGET;
    }
 
-   format = svga_translate_format(ss, surf_tmpl->format, 0);
+   if (tex->imported)
+      format = tex->key.format;
+   else
+      format = svga_translate_format(ss, surf_tmpl->format, bind);
+
    assert(format != SVGA3D_FORMAT_INVALID);
 
-   if (svga_screen(screen)->debug.force_surface_view)
-      view = TRUE;
+   if (view) {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u layer %u z %u, %p\n",
+               pt, surf_tmpl->u.tex.level, layer, zslice, s);
+
+      if (svga_have_vgpu10(svga)) {
+         switch (pt->target) {
+         case PIPE_TEXTURE_1D:
+            flags |= SVGA3D_SURFACE_1D;
+            break;
+         case PIPE_TEXTURE_1D_ARRAY:
+            flags |= SVGA3D_SURFACE_1D | SVGA3D_SURFACE_ARRAY;
+            break;
+         case PIPE_TEXTURE_2D_ARRAY:
+            flags |= SVGA3D_SURFACE_ARRAY;
+            break;
+         case PIPE_TEXTURE_3D:
+            flags |= SVGA3D_SURFACE_VOLUME;
+            break;
+         case PIPE_TEXTURE_CUBE:
+            if (nlayers == 6)
+               flags |= SVGA3D_SURFACE_CUBEMAP;
+            break;
+         default:
+            break;
+         }
+      }
 
-   /* Currently only used for compressed textures */
-   if (format != svga_translate_format(ss, surf_tmpl->format, 0)) {
-      view = TRUE;
+      /* When we clone the surface view resource, use the format used in
+       * the creation of the original resource.
+       */
+      s->handle = svga_texture_view_surface(svga, tex, bind, flags, tex->key.format,
+                                            surf_tmpl->u.tex.level, 1,
+                                            layer, nlayers, zslice, &s->key);
+      if (!s->handle) {
+         FREE(s);
+         return NULL;
+      }
+
+      s->key.format = format;
+      s->real_layer = 0;
+      s->real_level = 0;
+      s->real_zslice = 0;
+   } else {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, layer %u, z %u, %p\n",
+               pt, surf_tmpl->u.tex.level, layer, zslice, s);
+
+      memset(&s->key, 0, sizeof s->key);
+      s->key.format = format;
+      s->handle = tex->handle;
+      s->real_layer = layer;
+      s->real_zslice = zslice;
+      s->real_level = surf_tmpl->u.tex.level;
    }
 
+   svga->hud.num_surface_views++;
+
+   return &s->base;
+}
+
+
+static struct pipe_surface *
+svga_create_surface(struct pipe_context *pipe,
+                    struct pipe_resource *pt,
+                    const struct pipe_surface *surf_tmpl)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct pipe_screen *screen = pipe->screen;
+   boolean view = FALSE;
+
+   if (svga_screen(screen)->debug.force_surface_view)
+      view = TRUE;
+
    if (surf_tmpl->u.tex.level != 0 &&
        svga_screen(screen)->debug.force_level_surface_view)
       view = TRUE;
@@ -244,49 +342,177 @@ svga_create_surface(struct pipe_context *pipe,
    if (pt->target == PIPE_TEXTURE_3D)
       view = TRUE;
 
-   if (svga_screen(screen)->debug.no_surface_view)
+   if (svga_have_vgpu10(svga) || svga_screen(screen)->debug.no_surface_view)
       view = FALSE;
 
-   if (view) {
-      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
-               pt, surf_tmpl->u.tex.level, face, zslice, s);
+   return svga_create_surface_view(pipe, pt, surf_tmpl, view);
+}
 
-      s->handle = svga_texture_view_surface(svga, tex, flags, format,
-                                            surf_tmpl->u.tex.level,
-                                            1, face, zslice, &s->key);
-      s->real_face = 0;
-      s->real_level = 0;
-      s->real_zslice = 0;
-   } else {
-      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
-               pt, surf_tmpl->u.tex.level, face, zslice, s);
 
-      memset(&s->key, 0, sizeof s->key);
-      s->handle = tex->handle;
-      s->real_face = face;
-      s->real_zslice = zslice;
-      s->real_level = surf_tmpl->u.tex.level;
+/**
+ * Clone the surface view and its associated resource.
+ */
+static struct svga_surface *
+create_backed_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+   struct svga_surface *bs = s->backed;
+
+   if (!bs) {
+      struct svga_texture *tex = svga_texture(s->base.texture);
+      struct pipe_surface *backed_view;
+
+      backed_view = svga_create_surface_view(&svga->pipe,
+                                             &tex->b.b,
+                                             &s->base,
+                                             TRUE);
+      if (!backed_view)
+         return NULL;
+
+      bs = svga_surface(backed_view);
+      s->backed = bs;
    }
 
+   svga_mark_surface_dirty(&bs->base);
+
+   return bs;
+}
+
+/**
+ * Create a DX RenderTarget/DepthStencil View for the given surface,
+ * if needed.
+ */
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * DX spec explicitly specifies that no resource can be bound to a render
+    * target view and a shader resource view simultanously.
+    * So first check if the resource bound to this surface view collides with
+    * a sampler view. If so, then we will clone this surface view and its
+    * associated resource. We will then use the cloned surface view for
+    * render target.
+    */
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+         SVGA_DBG(DEBUG_VIEWS,
+                  "same resource used in shaderResource and renderTarget 0x%x\n",
+                  s->handle);
+         s = create_backed_surface_view(svga, s);
+         if (!s)
+            return NULL;
+
+         break;
+      }
+   }
+
+   if (s->view_id == SVGA3D_INVALID_ID) {
+      SVGA3dResourceType resType;
+      SVGA3dRenderTargetViewDesc desc;
+
+      desc.tex.mipSlice = s->real_level;
+      desc.tex.firstArraySlice = s->real_layer + s->real_zslice;
+      desc.tex.arraySize =
+         s->base.u.tex.last_layer - s->base.u.tex.first_layer + 1;
+
+      s->view_id = util_bitmask_add(svga->surface_view_id_bm);
+
+      switch (s->base.texture->target) {
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_1D_ARRAY:
+         resType = SVGA3D_RESOURCE_TEXTURE1D;
+         break;
+      case PIPE_TEXTURE_RECT:
+      case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_2D_ARRAY:
+      case PIPE_TEXTURE_CUBE:
+         /* drawing to cube map is treated as drawing to 2D array */
+         resType = SVGA3D_RESOURCE_TEXTURE2D;
+         break;
+      case PIPE_TEXTURE_3D:
+         resType = SVGA3D_RESOURCE_TEXTURE3D;
+         break;
+      default:
+         assert(!"Unexpected texture target");
+         resType = SVGA3D_RESOURCE_TEXTURE2D;
+      }
+
+      if (util_format_is_depth_or_stencil(s->base.format)) {
+         ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
+                                                    s->view_id,
+                                                    s->handle,
+                                                    s->key.format,
+                                                    resType,
+                                                    &desc);
+      }
+      else {
+         ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
+                                                    s->view_id,
+                                                    s->handle,
+                                                    s->key.format,
+                                                    resType,
+                                                    &desc);
+      }
+
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+         s->view_id = SVGA3D_INVALID_ID;
+         return NULL;
+      }
+   }
    return &s->base;
 }
 
 
+
 static void
 svga_surface_destroy(struct pipe_context *pipe,
                      struct pipe_surface *surf)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_surface *s = svga_surface(surf);
    struct svga_texture *t = svga_texture(surf->texture);
    struct svga_screen *ss = svga_screen(surf->texture->screen);
+   enum pipe_error ret = PIPE_OK;
+
+   /* Destroy the backed view surface if it exists */
+   if (s->backed) {
+      svga_surface_destroy(pipe, &s->backed->base);
+      s->backed = NULL;
+   }
 
    if (s->handle != t->handle) {
       SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
       svga_screen_surface_destroy(ss, &s->key, &s->handle);
    }
 
+   if (s->view_id != SVGA3D_INVALID_ID) {
+      unsigned try;
+
+      assert(svga_have_vgpu10(svga));
+      for (try = 0; try < 2; try++) {
+         if (util_format_is_depth_or_stencil(s->base.format)) {
+            ret = SVGA3D_vgpu10_DestroyDepthStencilView(svga->swc, s->view_id);
+         }
+         else {
+            ret = SVGA3D_vgpu10_DestroyRenderTargetView(svga->swc, s->view_id);
+         }
+         if (ret == PIPE_OK)
+            break;
+         svga_context_flush(svga, NULL);
+      }
+      assert(ret == PIPE_OK);
+      util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+   }
+
    pipe_resource_reference(&surf->texture, NULL);
    FREE(surf);
+
+   svga->hud.num_surface_views--;
 }
 
 
@@ -294,29 +520,25 @@ static void
 svga_mark_surface_dirty(struct pipe_surface *surf)
 {
    struct svga_surface *s = svga_surface(surf);
+   struct svga_texture *tex = svga_texture(surf->texture);
 
    if (!s->dirty) {
-      struct svga_texture *tex = svga_texture(surf->texture);
-
       s->dirty = TRUE;
 
       if (s->handle == tex->handle) {
          /* hmm so 3d textures always have all their slices marked ? */
-         if (surf->texture->target == PIPE_TEXTURE_CUBE)
-            svga_define_texture_level(tex, surf->u.tex.first_layer,
-                                      surf->u.tex.level);
-         else
-            svga_define_texture_level(tex, 0, surf->u.tex.level);
+         svga_define_texture_level(tex, surf->u.tex.first_layer,
+                                   surf->u.tex.level);
       }
       else {
          /* this will happen later in svga_propagate_surface */
       }
-
-      /* Increment the view_age and texture age for this surface's mipmap
-       * level so that any sampler views into the texture are re-validated too.
-       */
-      svga_age_texture_view(tex, surf->u.tex.level);
    }
+
+   /* Increment the view_age and texture age for this surface's mipmap
+    * level so that any sampler views into the texture are re-validated too.
+    */
+   svga_age_texture_view(tex, surf->u.tex.level);
 }
 
 
@@ -345,18 +567,26 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
    struct svga_surface *s = svga_surface(surf);
    struct svga_texture *tex = svga_texture(surf->texture);
    struct svga_screen *ss = svga_screen(surf->texture->screen);
-   unsigned zslice, face;
+   unsigned zslice, layer;
+   unsigned nlayers = 1;
+   unsigned i;
 
    if (!s->dirty)
       return;
 
    if (surf->texture->target == PIPE_TEXTURE_CUBE) {
       zslice = 0;
-      face = surf->u.tex.first_layer;
+      layer = surf->u.tex.first_layer;
+   }
+   else if (surf->texture->target == PIPE_TEXTURE_1D_ARRAY ||
+            surf->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+      zslice = 0;
+      layer = surf->u.tex.first_layer;
+      nlayers = surf->u.tex.last_layer - surf->u.tex.first_layer + 1;
    }
    else {
       zslice = surf->u.tex.first_layer;
-      face = 0;
+      layer = 0;
    }
 
    s->dirty = FALSE;
@@ -367,12 +597,14 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
       SVGA_DBG(DEBUG_VIEWS,
                "svga: Surface propagate: tex %p, level %u, from %p\n",
                tex, surf->u.tex.level, surf);
-      svga_texture_copy_handle(svga,
-                               s->handle, 0, 0, 0, s->real_level, s->real_face,
-                               tex->handle, 0, 0, zslice, surf->u.tex.level, face,
-                               u_minify(tex->b.b.width0, surf->u.tex.level),
-                               u_minify(tex->b.b.height0, surf->u.tex.level), 1);
-      svga_define_texture_level(tex, face, surf->u.tex.level);
+      for (i = 0; i < nlayers; i++) {
+         svga_texture_copy_handle(svga,
+                                  s->handle, 0, 0, 0, s->real_level, s->real_layer + i,
+                                  tex->handle, 0, 0, zslice, surf->u.tex.level, layer + i,
+                                  u_minify(tex->b.b.width0, surf->u.tex.level),
+                                  u_minify(tex->b.b.height0, surf->u.tex.level), 1);
+         svga_define_texture_level(tex, layer + i, surf->u.tex.level);
+      }
    }
 }
 
@@ -390,10 +622,76 @@ svga_surface_needs_propagation(const struct pipe_surface *surf)
 }
 
 
+static void
+svga_get_sample_position(struct pipe_context *context,
+                         unsigned sample_count, unsigned sample_index,
+                         float *pos_out)
+{
+   /* We can't actually query the device to learn the sample positions.
+    * These were grabbed from nvidia's driver.
+    */
+   static const float pos1[1][2] = {
+      { 0.5, 0.5 }
+   };
+   static const float pos4[4][2] = {
+      { 0.375000, 0.125000 },
+      { 0.875000, 0.375000 },
+      { 0.125000, 0.625000 },
+      { 0.625000, 0.875000 }
+   };
+   static const float pos8[8][2] = {
+      { 0.562500, 0.312500 },
+      { 0.437500, 0.687500 },
+      { 0.812500, 0.562500 },
+      { 0.312500, 0.187500 },
+      { 0.187500, 0.812500 },
+      { 0.062500, 0.437500 },
+      { 0.687500, 0.937500 },
+      { 0.937500, 0.062500 }
+   };
+   static const float pos16[16][2] = {
+      { 0.187500, 0.062500 },
+      { 0.437500, 0.187500 },
+      { 0.062500, 0.312500 },
+      { 0.312500, 0.437500 },
+      { 0.687500, 0.062500 },
+      { 0.937500, 0.187500 },
+      { 0.562500, 0.312500 },
+      { 0.812500, 0.437500 },
+      { 0.187500, 0.562500 },
+      { 0.437500, 0.687500 },
+      { 0.062500, 0.812500 },
+      { 0.312500, 0.937500 },
+      { 0.687500, 0.562500 },
+      { 0.937500, 0.687500 },
+      { 0.562500, 0.812500 },
+      { 0.812500, 0.937500 }
+   };
+   const float (*positions)[2];
+
+   switch (sample_count) {
+   case 4:
+      positions = pos4;
+      break;
+   case 8:
+      positions = pos8;
+      break;
+   case 16:
+      positions = pos16;
+      break;
+   default:
+      positions = pos1;
+   }
+
+   pos_out[0] = positions[sample_index][0];
+   pos_out[1] = positions[sample_index][1];
+}
+
 
 void
 svga_init_surface_functions(struct svga_context *svga)
 {
    svga->pipe.create_surface = svga_create_surface;
    svga->pipe.surface_destroy = svga_surface_destroy;
+   svga->pipe.get_sample_position = svga_get_sample_position;
 }
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_surface.h b/lib/mesa/src/gallium/drivers/svga/svga_surface.h
index 2fa72a1c8..0e5794b0b 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_surface.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_surface.h
@@ -47,11 +47,15 @@ struct svga_surface
    struct svga_host_surface_cache_key key;
    struct svga_winsys_surface *handle;
 
-   unsigned real_face;
+   unsigned real_layer;
    unsigned real_level;
    unsigned real_zslice;
 
    boolean dirty;
+
+   /* VGPU10 */
+   SVGA3dRenderTargetViewId view_id;
+   struct svga_surface *backed;
 };
 
 
@@ -64,11 +68,13 @@ svga_surface_needs_propagation(const struct pipe_surface *surf);
 struct svga_winsys_surface *
 svga_texture_view_surface(struct svga_context *svga,
                           struct svga_texture *tex,
+                          unsigned bind_flags,
                           SVGA3dSurfaceFlags flags,
                           SVGA3dSurfaceFormat format,
                           unsigned start_mip,
                           unsigned num_mip,
-                          int face_pick,
+                          int layer_pick,
+                          unsigned num_layers,
                           int zslice_pick,
                           struct svga_host_surface_cache_key *key); /* OUT */
 
@@ -99,4 +105,8 @@ svga_surface_const(const struct pipe_surface *surface)
    return (const struct svga_surface *)surface;
 }
 
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s);
+
+
 #endif
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_backend.c b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_backend.c
index ded8bcbd5..4bdb21a98 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_backend.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -40,6 +40,7 @@
 #include "svga_reg.h"
 #include "svga3d_reg.h"
 #include "svga_draw.h"
+#include "svga_shader.h"
 #include "svga_swtnl_private.h"
 
 
@@ -129,9 +130,12 @@ svga_vbuf_render_map_vertices( struct vbuf_render *render )
                                          PIPE_TRANSFER_DISCARD_RANGE |
                                          PIPE_TRANSFER_UNSYNCHRONIZED,
                                          &svga_render->vbuf_transfer);
-      if (ptr)
+      if (ptr) {
+         svga_render->vbuf_ptr = ptr;
          return ptr + svga_render->vbuf_offset;
+      }
       else {
+         svga_render->vbuf_ptr = NULL;
          svga_render->vbuf_transfer = NULL;
          return NULL;
       }
@@ -154,6 +158,18 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
 
    offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
    length = svga_render->vertex_size * (max_index + 1 - min_index);
+
+   if (0) {
+      /* dump vertex data */
+      const float *f = (const float *) ((char *) svga_render->vbuf_ptr +
+                                        svga_render->vbuf_offset);
+      unsigned i;
+      debug_printf("swtnl vertex data:\n");
+      for (i = 0; i < length / 4; i += 4) {
+         debug_printf("%u: %f %f %f %f\n", i, f[i], f[i+1], f[i+2], f[i+3]);
+      }
+   }
+
    pipe_buffer_flush_mapped_range(&svga->pipe,
 				  svga_render->vbuf_transfer,
 				  offset, length);
@@ -178,6 +194,7 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
    SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
    enum pipe_error ret;
    unsigned i;
+   static const unsigned zero[PIPE_MAX_ATTRIBS] = {0};
 
    /* if the vdecl or vbuf hasn't changed do nothing */
    if (!svga->swtnl.new_vdecl)
@@ -192,18 +209,27 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
       ret = svga_hwtnl_flush(svga->hwtnl);
       /* if we hit this path we might become synced with hw */
       svga->swtnl.new_vbuf = TRUE;
-      assert(ret == 0);
+      assert(ret == PIPE_OK);
    }
 
-   svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
-
    for (i = 0; i < svga_render->vdecl_count; i++) {
       vdecl[i].array.offset += svga_render->vdecl_offset;
+   }
 
-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &vdecl[i],
-                        svga_render->vbuf );
+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga_render->vdecl_count,
+                           vdecl,
+                           zero,
+                           svga_render->layout_id);
+
+   /* Specify the vertex buffer (there's only ever one) */
+   {
+      struct pipe_vertex_buffer vb;
+      vb.buffer = svga_render->vbuf;
+      vb.buffer_offset = svga_render->vdecl_offset;
+      vb.stride = vdecl[0].array.stride;
+      vb.user_buffer = NULL;
+      svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb);
    }
 
    /* We have already taken care of flatshading, so let the hwtnl
@@ -211,15 +237,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
     */
    if (svga->state.sw.need_pipeline) {
       svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
-      svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+      svga_hwtnl_set_fillmode(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
    }
    else {
       svga_hwtnl_set_flatshade( svga->hwtnl,
-                                svga->curr.rast->templ.flatshade,
+                                svga->curr.rast->templ.flatshade ||
+                                svga->state.hw_draw.fs->uses_flat_interp,
                                 svga->curr.rast->templ.flatshade_first );
 
-      svga_hwtnl_set_unfilled( svga->hwtnl,
-                               svga->curr.rast->hw_unfilled );
+      svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
    }
 
    svga->swtnl.new_vdecl = FALSE;
@@ -227,13 +253,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
 
 static void
 svga_vbuf_render_draw_arrays( struct vbuf_render *render,
-                              unsigned start,
-                              uint nr )
+                              unsigned start, uint nr )
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    struct svga_context *svga = svga_render->svga;
    unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
    enum pipe_error ret = PIPE_OK;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;
 
    /* off to hardware */
    svga_vbuf_submit_state(svga_render);
@@ -244,10 +272,13 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render,
     */
    svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
 
-   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr,
+                                start_instance, instance_count);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
-      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim,
+                                   start + bias, nr,
+                                   start_instance, instance_count);
       svga->swtnl.new_vbuf = TRUE;
       assert(ret == PIPE_OK);
    }
@@ -265,6 +296,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
    int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
    boolean ret;
    size_t size = 2 * nr_indices;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;
 
    assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
    
@@ -299,7 +333,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
                                         svga_render->min_index,
                                         svga_render->max_index,
                                         svga_render->prim,
-                                        svga_render->ibuf_offset / 2, nr_indices);
+                                        svga_render->ibuf_offset / 2, nr_indices,
+                                        start_instance, instance_count);
    if(ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
@@ -309,7 +344,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
                                            svga_render->min_index,
                                            svga_render->max_index,
                                            svga_render->prim,
-                                           svga_render->ibuf_offset / 2, nr_indices);
+                                           svga_render->ibuf_offset / 2,
+                                           nr_indices,
+                                           start_instance, instance_count);
       svga->swtnl.new_vbuf = TRUE;
       assert(ret == PIPE_OK);
    }
@@ -349,6 +386,7 @@ svga_vbuf_render_create( struct svga_context *svga )
    svga_render->vbuf_size = 0;
    svga_render->ibuf_alloc_size = 4*1024;
    svga_render->vbuf_alloc_size = 64*1024;
+   svga_render->layout_id = SVGA3D_INVALID_ID;
    svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
    svga_render->base.max_indices = 65536;
    svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_draw.c
index 832249523..6a8e857ce 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -42,9 +42,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
 {
    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = { 0 };
    struct pipe_transfer *ib_transfer = NULL;
-   struct pipe_transfer *cb_transfer = NULL;
+   struct pipe_transfer *cb_transfer[SVGA_MAX_CONST_BUFS] = { 0 };
    struct draw_context *draw = svga->swtnl.draw;
-   unsigned i;
+   unsigned i, old_num_vertex_buffers;
    const void *map;
    enum pipe_error ret;
 
@@ -76,6 +76,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
          draw_set_mapped_vertex_buffer(draw, i, map, ~0);
       }
    }
+   old_num_vertex_buffers = svga->curr.num_vertex_buffers;
 
    /* Map index buffer, if present */
    map = NULL;
@@ -88,16 +89,21 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
                        svga->curr.ib.index_size, ~0);
    }
 
-   if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
+   /* Map constant buffers */
+   for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+      if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer == NULL) {
+         continue;
+      }
+
       map = pipe_buffer_map(&svga->pipe,
-                            svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer,
+                            svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer,
                             PIPE_TRANSFER_READ,
-			    &cb_transfer);
+                            &cb_transfer[i]);
       assert(map);
       draw_set_mapped_constant_buffer(
-         draw, PIPE_SHADER_VERTEX, 0,
+         draw, PIPE_SHADER_VERTEX, i,
          map,
-         svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer->width0);
+         svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer->width0);
    }
 
    draw_vbo(draw, info);
@@ -105,8 +111,8 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
    draw_flush(svga->swtnl.draw);
 
    /* Ensure the draw module didn't touch this */
-   assert(i == svga->curr.num_vertex_buffers);
-   
+   assert(old_num_vertex_buffers == svga->curr.num_vertex_buffers);
+
    /*
     * unmap vertex/index buffers
     */
@@ -122,8 +128,10 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
       draw_set_indexes(draw, NULL, 0, 0);
    }
 
-   if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
-      pipe_buffer_unmap(&svga->pipe, cb_transfer);
+   for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+      if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer) {
+         pipe_buffer_unmap(&svga->pipe, cb_transfer[i]);
+      }
    }
 
    /* Now safe to remove the need_swtnl flag in any update_state call */
@@ -167,9 +175,6 @@ boolean svga_init_swtnl( struct svga_context *svga )
    if (!screen->haveLineSmooth)
       draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe);
 
-   /* always install polygon stipple stage */
-   draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
-
    /* enable/disable line stipple stage depending on device caps */
    draw_enable_line_stipple(svga->swtnl.draw, !screen->haveLineStipple);
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_private.h b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_private.h
index e2106e1e8..0a226abca 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_private.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_private.h
@@ -43,6 +43,8 @@ struct svga_vbuf_render {
 
    unsigned vertex_size;
 
+   SVGA3dElementLayoutId layout_id; /**< current element layout id */
+
    unsigned prim;
 
    struct pipe_resource *vbuf;
@@ -50,6 +52,8 @@ struct svga_vbuf_render {
    struct pipe_transfer *vbuf_transfer;
    struct pipe_transfer *ibuf_transfer;
 
+   void *vbuf_ptr;
+
    /* current size of buffer */
    size_t vbuf_size;
    size_t ibuf_size;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_state.c b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_state.c
index e62698e11..4d21f4f0e 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -25,10 +25,13 @@
 
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
+#include "util/u_bitmask.h"
 #include "util/u_inlines.h"
 #include "pipe/p_state.h"
 
+#include "svga_cmd.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_swtnl.h"
 #include "svga_state.h"
 #include "svga_tgsi.h"
@@ -51,30 +54,37 @@ static void set_draw_viewport( struct svga_context *svga )
    float adjx = 0.0f;
    float adjy = 0.0f;
 
-   switch (svga->curr.reduced_prim) {
-   case PIPE_PRIM_POINTS:
-      adjx = SVGA_POINT_ADJ_X;
-      adjy = SVGA_POINT_ADJ_Y;
-      break;
-   case PIPE_PRIM_LINES:
-      /* XXX: This is to compensate for the fact that wide lines are
-       * going to be drawn with triangles, but we're not catching all
-       * cases where that will happen.
-       */
-      if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
-      {
-         adjx = SVGA_LINE_ADJ_X + 0.175f;
-         adjy = SVGA_LINE_ADJ_Y - 0.175f;
+   if (svga_have_vgpu10(svga)) {
+      if (svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES) {
+         adjy = 0.25;
       }
-      else {
-         adjx = SVGA_LINE_ADJ_X;
-         adjy = SVGA_LINE_ADJ_Y;
+   }
+   else {
+      switch (svga->curr.reduced_prim) {
+      case PIPE_PRIM_POINTS:
+         adjx = SVGA_POINT_ADJ_X;
+         adjy = SVGA_POINT_ADJ_Y;
+         break;
+      case PIPE_PRIM_LINES:
+         /* XXX: This is to compensate for the fact that wide lines are
+          * going to be drawn with triangles, but we're not catching all
+          * cases where that will happen.
+          */
+         if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
+         {
+            adjx = SVGA_LINE_ADJ_X + 0.175f;
+            adjy = SVGA_LINE_ADJ_Y - 0.175f;
+         }
+         else {
+            adjx = SVGA_LINE_ADJ_X;
+            adjy = SVGA_LINE_ADJ_Y;
+         }
+         break;
+      case PIPE_PRIM_TRIANGLES:
+         adjx += SVGA_TRIANGLE_ADJ_X;
+         adjy += SVGA_TRIANGLE_ADJ_Y;
+         break;
       }
-      break;
-   case PIPE_PRIM_TRIANGLES:
-      adjx += SVGA_TRIANGLE_ADJ_X;
-      adjy += SVGA_TRIANGLE_ADJ_Y;
-      break;
    }
 
    vp.translate[0] += adjx;
@@ -150,6 +160,59 @@ struct svga_tracked_state svga_update_swtnl_draw =
 };
 
 
+static SVGA3dSurfaceFormat
+translate_vertex_format(SVGA3dDeclType format)
+{
+   switch (format) {
+   case SVGA3D_DECLTYPE_FLOAT1:
+      return SVGA3D_R32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT2:
+      return SVGA3D_R32G32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT3:
+      return SVGA3D_R32G32B32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT4:
+      return SVGA3D_R32G32B32A32_FLOAT;
+   default:
+      assert(!"Unexpected format in translate_vertex_format()");
+      return SVGA3D_R32G32B32A32_FLOAT;
+   }
+}
+
+
+static SVGA3dElementLayoutId
+svga_vdecl_to_input_element(struct svga_context *svga,
+                            const SVGA3dVertexDecl *vdecl, unsigned num_decls)
+{
+   SVGA3dElementLayoutId id;
+   SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(num_decls <= PIPE_MAX_ATTRIBS);
+   assert(svga_have_vgpu10(svga));
+
+   for (i = 0; i < num_decls; i++) {
+      elements[i].inputSlot = 0; /* vertex buffer index */
+      elements[i].alignedByteOffset = vdecl[i].array.offset;
+      elements[i].format = translate_vertex_format(vdecl[i].identity.type);
+      elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+      elements[i].instanceDataStepRate = 0;
+      elements[i].inputRegister = i;
+   }
+
+   id = util_bitmask_add(svga->input_element_object_id_bm);
+
+   ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+      assert(ret == PIPE_OK);
+   }
+
+   return id;
+}
+
+
 enum pipe_error
 svga_swtnl_update_vdecl( struct svga_context *svga )
 {
@@ -157,23 +220,24 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
    struct draw_context *draw = svga->swtnl.draw;
    struct vertex_info *vinfo = &svga_render->vertex_info;
    SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
-   const enum interp_mode colorInterp =
-      svga->curr.rast->templ.flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
    struct svga_fragment_shader *fs = svga->curr.fs;
    int offset = 0;
    int nr_decls = 0;
    int src;
    unsigned i;
+   int any_change;
 
    memset(vinfo, 0, sizeof(*vinfo));
    memset(vdecl, 0, sizeof(vdecl));
 
    draw_prepare_shader_outputs(draw);
+
    /* always add position */
    src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
-   draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
+   draw_emit_vertex_attr(vinfo, EMIT_4F, src);
    vinfo->attrib[0].emit = EMIT_4F;
    vdecl[0].array.offset = offset;
+   vdecl[0].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
    vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4;
    vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT;
    vdecl[0].identity.usageIndex = 0;
@@ -191,14 +255,14 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
 
       switch (sem_name) {
       case TGSI_SEMANTIC_COLOR:
-         draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+         draw_emit_vertex_attr(vinfo, EMIT_4F, src);
          vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_COLOR;
          vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
          offset += 16;
          nr_decls++;
          break;
       case TGSI_SEMANTIC_GENERIC:
-         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+         draw_emit_vertex_attr(vinfo, EMIT_4F, src);
          vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
          vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
          vdecl[nr_decls].identity.usageIndex =
@@ -207,7 +271,7 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
          nr_decls++;
          break;
       case TGSI_SEMANTIC_FOG:
-         draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+         draw_emit_vertex_attr(vinfo, EMIT_1F, src);
          vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
          vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT1;
          assert(vdecl[nr_decls].identity.usageIndex == 0);
@@ -225,16 +289,67 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
    draw_compute_vertex_size(vinfo);
 
    svga_render->vdecl_count = nr_decls;
-   for (i = 0; i < svga_render->vdecl_count; i++)
+   for (i = 0; i < svga_render->vdecl_count; i++) {
       vdecl[i].array.stride = offset;
+   }
 
-   if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0)
-      return PIPE_OK;
+   any_change = memcmp(svga_render->vdecl, vdecl, sizeof(vdecl));
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      if (!any_change && svga_render->layout_id != SVGA3D_INVALID_ID) {
+         return PIPE_OK;
+      }
+
+      if (svga_render->layout_id != SVGA3D_INVALID_ID) {
+         /* destroy old */
+         ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+                                                  svga_render->layout_id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+                                                     svga_render->layout_id);
+            assert(ret == PIPE_OK);
+         }
+
+         /**
+          * reset current layout id state after the element layout is
+          * destroyed, so that if a new layout has the same layout id, we
+          * will know to re-issue the SetInputLayout command.
+          */
+         if (svga->state.hw_draw.layout_id == svga_render->layout_id)
+            svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+         util_bitmask_clear(svga->input_element_object_id_bm,
+                            svga_render->layout_id);
+      }
+
+      svga_render->layout_id =
+         svga_vdecl_to_input_element(svga, vdecl, nr_decls);
+
+      /* bind new */
+      if (svga->state.hw_draw.layout_id != svga_render->layout_id) {
+         ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, svga_render->layout_id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+                                               svga_render->layout_id);
+            assert(ret == PIPE_OK);
+         }
+
+         svga->state.hw_draw.layout_id = svga_render->layout_id;
+      }
+   }
+   else {
+      if (!any_change)
+         return PIPE_OK;
+   }
 
    memcpy(svga_render->vdecl, vdecl, sizeof(vdecl));
    svga->swtnl.new_vdecl = TRUE;
 
-   return PIPE_OK;
+   return 0;
 }
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c
index 2e2ff5e46..c62d4d671 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c
@@ -37,6 +37,7 @@
 #include "svgadump/svga_shader_dump.h"
 
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_tgsi.h"
 #include "svga_tgsi_emit.h"
 #include "svga_debug.h"
@@ -70,7 +71,7 @@ svga_shader_expand(struct svga_shader_emitter *emit)
    else
       new_buf = NULL;
 
-   if (new_buf == NULL) {
+   if (!new_buf) {
       emit->ptr = err_buf;
       emit->buf = err_buf;
       emit->size = sizeof(err_buf);
@@ -166,97 +167,6 @@ svga_shader_emit_header(struct svga_shader_emitter *emit)
 
 
 /**
- * Use the shader info to generate a bitmask indicating which generic
- * inputs are used by the shader.  A set bit indicates that GENERIC[i]
- * is used.
- */
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
-{
-   unsigned i, mask = 0x0;
-
-   for (i = 0; i < info->num_inputs; i++) {
-      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
-         unsigned j = info->input_semantic_index[i];
-         assert(j < sizeof(mask) * 8);
-         mask |= 1 << j;
-      }
-   }
-
-   return mask;
-}
-
-
-/**
- * Given a mask of used generic variables (as returned by the above functions)
- * fill in a table which maps those indexes to small integers.
- * This table is used by the remap_generic_index() function in
- * svga_tgsi_decl_sm30.c
- * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
- * GENERIC[3] are used.  The remap_table will contain:
- *   table[1] = 0;
- *   table[3] = 1;
- * The remaining table entries will be filled in with the next unused
- * generic index (in this example, 2).
- */
-void
-svga_remap_generics(unsigned generics_mask,
-                    int8_t remap_table[MAX_GENERIC_VARYING])
-{
-   /* Note texcoord[0] is reserved so start at 1 */
-   unsigned count = 1, i;
-
-   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
-      remap_table[i] = -1;
-   }
-
-   /* for each bit set in generic_mask */
-   while (generics_mask) {
-      unsigned index = ffs(generics_mask) - 1;
-      remap_table[index] = count++;
-      generics_mask &= ~(1 << index);
-   }
-}
-
-
-/**
- * Use the generic remap table to map a TGSI generic varying variable
- * index to a small integer.  If the remapping table doesn't have a
- * valid value for the given index (the table entry is -1) it means
- * the fragment shader doesn't use that VS output.  Just allocate
- * the next free value in that case.  Alternately, we could cull
- * VS instructions that write to register, or replace the register
- * with a dummy temp register.
- * XXX TODO: we should do one of the later as it would save precious
- * texcoord registers.
- */
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
-                         int generic_index)
-{
-   assert(generic_index < MAX_GENERIC_VARYING);
-
-   if (generic_index >= MAX_GENERIC_VARYING) {
-      /* just don't return a random/garbage value */
-      generic_index = MAX_GENERIC_VARYING - 1;
-   }
-
-   if (remap_table[generic_index] == -1) {
-      /* This is a VS output that has no matching PS input.  Find a
-       * free index.
-       */
-      int i, max = 0;
-      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
-         max = MAX2(max, remap_table[i]);
-      }
-      remap_table[generic_index] = max + 1;
-   }
-
-   return remap_table[generic_index];
-}
-
-
-/**
  * Parse TGSI shader and translate to SVGA/DX9 serialized
  * representation.
  *
@@ -264,9 +174,10 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
  * can be dynamically grown.  Once we've finished and know how large
  * it is, it will be copied to a hardware buffer for upload.
  */
-static struct svga_shader_variant *
-svga_tgsi_translate(const struct svga_shader *shader,
-                    const struct svga_compile_key *key, unsigned unit)
+struct svga_shader_variant *
+svga_tgsi_vgpu9_translate(struct svga_context *svga,
+                          const struct svga_shader *shader,
+                          const struct svga_compile_key *key, unsigned unit)
 {
    struct svga_shader_variant *variant = NULL;
    struct svga_shader_emitter emit;
@@ -288,10 +199,10 @@ svga_tgsi_translate(const struct svga_shader *shader,
    emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1;
 
    if (unit == PIPE_SHADER_FRAGMENT)
-      emit.imm_start += key->fkey.num_unnormalized_coords;
+      emit.imm_start += key->num_unnormalized_coords;
 
    if (unit == PIPE_SHADER_VERTEX) {
-      emit.imm_start += key->vkey.need_prescale ? 2 : 0;
+      emit.imm_start += key->vs.need_prescale ? 2 : 0;
    }
 
    emit.nr_hw_float_const =
@@ -317,8 +228,8 @@ svga_tgsi_translate(const struct svga_shader *shader,
       goto fail;
    }
 
-   variant = CALLOC_STRUCT(svga_shader_variant);
-   if (variant == NULL)
+   variant = svga_new_shader_variant(svga);
+   if (!variant)
       goto fail;
 
    variant->shader = shader;
@@ -327,7 +238,18 @@ svga_tgsi_translate(const struct svga_shader *shader,
    memcpy(&variant->key, key, sizeof(*key));
    variant->id = UTIL_BITMASK_INVALID_INDEX;
 
-   if (SVGA_DEBUG & DEBUG_TGSI) {
+   variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
+
+   /* If there was exactly one write to a fragment shader output register
+    * and it came from a constant buffer, we know all fragments will have
+    * the same color (except for blending).
+    */
+   variant->constant_color_output =
+      emit.constant_color_output && emit.num_output_writes == 1;
+
+#if 0
+   if (!svga_shader_verify(variant->tokens, variant->nr_tokens) ||
+       SVGA_DEBUG & DEBUG_TGSI) {
       debug_printf("#####################################\n");
       debug_printf("Shader %u below\n", shader->id);
       tgsi_dump(shader->tokens, 0);
@@ -337,6 +259,7 @@ svga_tgsi_translate(const struct svga_shader *shader,
       }
       debug_printf("#####################################\n");
    }
+#endif
 
    return variant;
 
@@ -345,39 +268,3 @@ svga_tgsi_translate(const struct svga_shader *shader,
    FREE(emit.buf);
    return NULL;
 }
-
-
-struct svga_shader_variant *
-svga_translate_fragment_program(const struct svga_fragment_shader *fs,
-                                const struct svga_fs_compile_key *fkey)
-{
-   struct svga_compile_key key;
-
-   memset(&key, 0, sizeof(key));
-
-   memcpy(&key.fkey, fkey, sizeof *fkey);
-
-   memcpy(key.generic_remap_table, fs->generic_remap_table,
-          sizeof(fs->generic_remap_table));
-
-   return svga_tgsi_translate(&fs->base, &key, PIPE_SHADER_FRAGMENT);
-}
-
-
-struct svga_shader_variant *
-svga_translate_vertex_program(const struct svga_vertex_shader *vs,
-                              const struct svga_vs_compile_key *vkey)
-{
-   struct svga_compile_key key;
-
-   memset(&key, 0, sizeof(key));
-
-   memcpy(&key.vkey, vkey, sizeof *vkey);
-
-   /* Note: we could alternately store the remap table in the vkey but
-    * that would make it larger.  We just regenerate it here instead.
-    */
-   svga_remap_generics(vkey->fs_generic_inputs, key.generic_remap_table);
-
-   return svga_tgsi_translate(&vs->base, &key, PIPE_SHADER_VERTEX);
-}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.h b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.h
index 5c47a4ad3..258113570 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.h
@@ -26,94 +26,16 @@
 #ifndef SVGA_TGSI_H
 #define SVGA_TGSI_H
 
-#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+#include "svga3d_reg.h"
 
-#include "svga_hw_reg.h"
 
+#define MAX_VGPU10_ADDR_REGS 2
 
-/**
- * We use a 32-bit mask to keep track of the generic indexes.
- */
-#define MAX_GENERIC_VARYING 32
-
-
-struct svga_fragment_shader;
-struct svga_vertex_shader;
+struct svga_compile_key;
+struct svga_context;
 struct svga_shader;
-struct tgsi_shader_info;
-struct tgsi_token;
-
-
-struct svga_vs_compile_key
-{
-   unsigned fs_generic_inputs;
-   unsigned need_prescale:1;
-   unsigned allow_psiz:1;
-   unsigned adjust_attrib_range:16;
-   unsigned adjust_attrib_w_1:16;
-};
-
-struct svga_fs_compile_key
-{
-   unsigned light_twoside:1;
-   unsigned front_ccw:1;
-   unsigned white_fragments:1;
-   unsigned write_color0_to_n_cbufs:3;
-   unsigned num_textures:8;
-   unsigned num_unnormalized_coords:8;
-   unsigned sprite_origin_lower_left:1;
-   struct {
-      unsigned compare_mode:1;
-      unsigned compare_func:3;
-      unsigned unnormalized:1;
-      unsigned width_height_idx:7;
-      unsigned texture_target:8;
-      unsigned sprite_texgen:1;
-      unsigned swizzle_r:3;
-      unsigned swizzle_g:3;
-      unsigned swizzle_b:3;
-      unsigned swizzle_a:3;
-   } tex[PIPE_MAX_SAMPLERS];
-};
-
-/**
- * Key/index for identifying shader variants.
- */
-struct svga_compile_key {
-   struct svga_vs_compile_key vkey;
-   struct svga_fs_compile_key fkey;
-   int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-/**
- * A single TGSI shader may be compiled into different variants of
- * SVGA3D shaders depending on the compile key.  Each user shader
- * will have a linked list of these variants.
- */
-struct svga_shader_variant
-{
-   const struct svga_shader *shader;
-
-   /** Parameters used to generate this variant */
-   struct svga_compile_key key;
-
-   /* Compiled shader tokens:
-    */
-   const unsigned *tokens;
-   unsigned nr_tokens;
-
-   /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
-    * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
-    */
-   unsigned id;
-   
-   /* GB object buffer containing the bytecode */
-   struct svga_winsys_gb_shader *gb_shader;
-
-   /** Next variant */
-   struct svga_shader_variant *next;
-};
+struct svga_shader_variant;
 
 
 /* TGSI doesn't provide use with VS input semantics (they're actually
@@ -140,37 +62,17 @@ static inline void svga_generate_vdecl_semantics( unsigned idx,
 
 
 
-static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
-{
-   return sizeof *key;
-}
-
-static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
-{
-   return (const char *)&key->tex[key->num_textures] - (const char *)key;
-}
-
 struct svga_shader_variant *
-svga_translate_fragment_program( const struct svga_fragment_shader *fs,
-                                 const struct svga_fs_compile_key *fkey );
+svga_tgsi_vgpu9_translate(struct svga_context *svga,
+                          const struct svga_shader *shader,
+                          const struct svga_compile_key *key, unsigned unit);
 
 struct svga_shader_variant *
-svga_translate_vertex_program( const struct svga_vertex_shader *fs,
-                               const struct svga_vs_compile_key *vkey );
-
-
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
-
-unsigned
-svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
-
-void
-svga_remap_generics(unsigned generics_mask,
-                    int8_t remap_table[MAX_GENERIC_VARYING]);
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+                           const struct svga_shader *shader,
+                           const struct svga_compile_key *key,
+                           unsigned unit);
 
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
-                         int generic_index);
+boolean svga_shader_verify(const uint32_t *tokens, unsigned nr_tokens);
 
 #endif
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
index 42d6f489b..ca4009b9e 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -216,7 +216,7 @@ ps30_input(struct svga_shader_emitter *emit,
 
       return emit_decl( emit, reg, 0, 0 );
    }
-   else if (emit->key.fkey.light_twoside &&
+   else if (emit->key.fs.light_twoside &&
             (semantic.Name == TGSI_SEMANTIC_COLOR)) {
 
       if (!translate_vs_ps_semantic( emit, semantic, &usage, &index ))
@@ -285,9 +285,9 @@ ps30_input(struct svga_shader_emitter *emit,
          return FALSE;
 
       if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
-          emit->key.fkey.sprite_origin_lower_left &&
+          emit->key.sprite_origin_lower_left &&
           index >= 1 &&
-          emit->key.fkey.tex[index - 1].sprite_texgen) {
+          emit->key.tex[index - 1].sprite_texgen) {
          /* This is a sprite texture coord with lower-left origin.
           * We need to invert the texture T coordinate since the SVGA3D
           * device only supports an upper-left origin.
@@ -329,7 +329,7 @@ ps30_output(struct svga_shader_emitter *emit,
    switch (semantic.Name) {
    case TGSI_SEMANTIC_COLOR:
       if (emit->unit == PIPE_SHADER_FRAGMENT) {
-         if (emit->key.fkey.white_fragments) {
+         if (emit->key.fs.white_fragments) {
             /* Used for XOR logicop mode */
             emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                                   emit->nr_hw_temp++ );
@@ -337,14 +337,14 @@ ps30_output(struct svga_shader_emitter *emit,
             emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT, 
                                                         semantic.Index);
          }
-         else if (emit->key.fkey.write_color0_to_n_cbufs) {
+         else if (emit->key.fs.write_color0_to_n_cbufs) {
             /* We'll write color output [0] to all render targets.
              * Prepare all the output registers here, but only when the
              * semantic.Index == 0 so we don't do this more than once.
              */
             if (semantic.Index == 0) {
                unsigned i;
-               for (i = 0; i < emit->key.fkey.write_color0_to_n_cbufs; i++) {
+               for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
                   emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
                                                      emit->nr_hw_temp++);
                   emit->temp_color_output[i] = emit->output_map[idx+i];
@@ -487,7 +487,7 @@ vs30_output(struct svga_shader_emitter *emit,
       /* This has the effect of not declaring psiz (below) and not 
        * emitting the final MOV to true_psiz in the postamble.
        */
-      if (!emit->key.vkey.allow_psiz)
+      if (!emit->key.vs.allow_psiz)
          return TRUE;
 
       emit->true_psiz = dcl.dst;
@@ -517,7 +517,7 @@ vs30_output(struct svga_shader_emitter *emit,
 static ubyte
 svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
 {
-   switch (emit->key.fkey.tex[idx].texture_target) {
+   switch (emit->key.tex[idx].texture_target) {
    case PIPE_TEXTURE_1D:
       return SVGA3DSAMP_2D;
    case PIPE_TEXTURE_2D:
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_emit.h b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_emit.h
index 1a1dac235..83f0c8bd4 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -28,6 +28,7 @@
 
 #include "tgsi/tgsi_scan.h"
 #include "svga_hw_reg.h"
+#include "svga_shader.h"
 #include "svga_tgsi.h"
 #include "svga3d_shaderdefs.h"
 
@@ -83,6 +84,9 @@ struct svga_shader_emitter
 
    int dynamic_branching_level;
 
+   unsigned num_output_writes;
+   boolean constant_color_output;
+
    boolean in_main_func;
 
    boolean created_common_immediate;
@@ -130,6 +134,8 @@ struct svga_shader_emitter
    struct svga_arl_consts arl_consts[12];
    int num_arl_consts;
    int current_arl;
+
+   unsigned pstipple_sampler_unit;
 };
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c
index bac956066..489e68f88 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -29,6 +29,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_pstipple.h"
 
 #include "svga_tgsi_emit.h"
 #include "svga_context.h"
@@ -98,6 +99,7 @@ translate_dst_register( struct svga_shader_emitter *emit,
        * Need to lookup a table built at decl time:
        */
       dest = emit->output_map[reg->Register.Index];
+      emit->num_output_writes++;
       break;
 
    default:
@@ -164,7 +166,7 @@ scalar(struct src_register src, unsigned comp)
 static boolean
 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
 {
-   int i;
+   unsigned i;
 
    for (i = 0; i < emit->num_arl_consts; ++i) {
       if (emit->arl_consts[i].arl_num == emit->current_arl)
@@ -177,7 +179,7 @@ svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
 static int
 svga_arl_adjustment( const struct svga_shader_emitter *emit )
 {
-   int i;
+   unsigned i;
 
    for (i = 0; i < emit->num_arl_consts; ++i) {
       if (emit->arl_consts[i].arl_num == emit->current_arl)
@@ -862,7 +864,7 @@ create_common_immediate( struct svga_shader_emitter *emit )
    idx++;
 
    /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
-   if (emit->key.vkey.adjust_attrib_range) {
+   if (emit->key.vs.adjust_attrib_range) {
       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
                            idx, 2.0f, 0.0f, 0.0f, 0.0f ))
          return FALSE;
@@ -1015,7 +1017,7 @@ get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
    struct src_register reg;
 
    /* the width/height indexes start right after constants */
-   idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+   idx = emit->key.tex[sampler_num].width_height_idx +
          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
 
    reg = src_register( SVGA3DREG_CONST, idx );
@@ -1173,7 +1175,7 @@ emit_div(struct svga_shader_emitter *emit,
    const struct src_register src1 =
       translate_src_register(emit, &insn->Src[1] );
    SVGA3dShaderDestToken temp = get_temp( emit );
-   int i;
+   unsigned i;
 
    /* For each enabled element, perform a RCP instruction.  Note that
     * RCP is scalar in SVGA3D:
@@ -1723,7 +1725,7 @@ emit_tex2(struct svga_shader_emitter *emit,
    texcoord = translate_src_register( emit, &insn->Src[0] );
    sampler = translate_src_register( emit, &insn->Src[1] );
 
-   if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
+   if (emit->key.tex[sampler.base.num].unnormalized ||
        emit->dynamic_branching_level > 0)
       tmp = get_temp( emit );
 
@@ -1755,7 +1757,7 @@ emit_tex2(struct svga_shader_emitter *emit,
 
    /* Explicit normalization of texcoords:
     */
-   if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
+   if (emit->key.tex[sampler.base.num].unnormalized) {
       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
 
       /* MUL  tmp, SRC0, WH */
@@ -1820,7 +1822,7 @@ emit_tex_swizzle(struct svga_shader_emitter *emit,
    const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
    unsigned srcSwizzle[4];
    unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
-   int i;
+   unsigned i;
 
    /* build writemasks and srcSwizzle terms */
    for (i = 0; i < 4; i++) {
@@ -1891,14 +1893,14 @@ emit_tex(struct svga_shader_emitter *emit,
    const unsigned unit = src1.base.num;
 
    /* check for shadow samplers */
-   boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
+   boolean compare = (emit->key.tex[unit].compare_mode ==
                       PIPE_TEX_COMPARE_R_TO_TEXTURE);
 
    /* texture swizzle */
-   boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
-                      emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
-                      emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
-                      emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
+   boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
+                      emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
+                      emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
+                      emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
 
    boolean saturate = insn->Instruction.Saturate;
 
@@ -1965,7 +1967,7 @@ emit_tex(struct svga_shader_emitter *emit,
 
          /* Compare texture sample value against R component of texcoord */
          if (!emit_select(emit,
-                          emit->key.fkey.tex[unit].compare_func,
+                          emit->key.tex[unit].compare_func,
                           writemask( dst2, TGSI_WRITEMASK_XYZ ),
                           r_coord,
                           tex_src_x))
@@ -1991,10 +1993,10 @@ emit_tex(struct svga_shader_emitter *emit,
       /* swizzle from tex_result to dst (handles saturation too, if any) */
       emit_tex_swizzle(emit,
                        dst, src(tex_result),
-                       emit->key.fkey.tex[unit].swizzle_r,
-                       emit->key.fkey.tex[unit].swizzle_g,
-                       emit->key.fkey.tex[unit].swizzle_b,
-                       emit->key.fkey.tex[unit].swizzle_a);
+                       emit->key.tex[unit].swizzle_r,
+                       emit->key.tex[unit].swizzle_g,
+                       emit->key.tex[unit].swizzle_b,
+                       emit->key.tex[unit].swizzle_a);
    }
 
    return TRUE;
@@ -2102,6 +2104,29 @@ emit_simple_instruction(struct svga_shader_emitter *emit,
 
 
 /**
+ * TGSI_OPCODE_MOVE is only special-cased here to detect the
+ * svga_fragment_shader::constant_color_output case.
+ */
+static boolean
+emit_mov(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
+{
+   const struct tgsi_full_src_register *src = &insn->Src[0];
+   const struct tgsi_full_dst_register *dst = &insn->Dst[0];
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+       dst->Register.File == TGSI_FILE_OUTPUT &&
+       dst->Register.Index == 0 &&
+       src->Register.File == TGSI_FILE_CONSTANT &&
+       !src->Register.Indirect) {
+      emit->constant_color_output = TRUE;
+   }
+
+   return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
+}
+
+
+/**
  * Translate/emit TGSI DDX, DDY instructions.
  */
 static boolean
@@ -3044,6 +3069,9 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
    case TGSI_OPCODE_SSG:
       return emit_ssg( emit, insn );
 
+   case TGSI_OPCODE_MOV:
+      return emit_mov( emit, insn );
+
    default:
       {
          unsigned opcode = translate_opcode(insn->Instruction.Opcode);
@@ -3113,7 +3141,7 @@ make_immediate(struct svga_shader_emitter *emit,
 static boolean
 emit_vs_preamble(struct svga_shader_emitter *emit)
 {
-   if (!emit->key.vkey.need_prescale) {
+   if (!emit->key.vs.need_prescale) {
       if (!make_immediate( emit, 0, 0, .5, .5,
                            &emit->imm_0055))
          return FALSE;
@@ -3190,7 +3218,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit)
           * logicop workaround.
           */
          if (emit->unit == PIPE_SHADER_FRAGMENT &&
-             emit->key.fkey.white_fragments) {
+             emit->key.fs.white_fragments) {
             struct src_register one = get_one_immediate(emit);
 
             if (!submit_op1( emit,
@@ -3200,7 +3228,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit)
                return FALSE;
          }
          else if (emit->unit == PIPE_SHADER_FRAGMENT &&
-                  i < emit->key.fkey.write_color0_to_n_cbufs) {
+                  i < emit->key.fs.write_color0_to_n_cbufs) {
             /* Write temp color output [0] to true output [i] */
             if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
                             emit->true_color_output[i],
@@ -3244,7 +3272,7 @@ emit_vs_postamble(struct svga_shader_emitter *emit)
    /* Need to perform various manipulations on vertex position to cope
     * with the different GL and D3D clip spaces.
     */
-   if (emit->key.vkey.need_prescale) {
+   if (emit->key.vs.need_prescale) {
       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
       SVGA3dShaderDestToken depth = emit->depth_pos;
       SVGA3dShaderDestToken pos = emit->true_pos;
@@ -3343,7 +3371,7 @@ emit_light_twoside(struct svga_shader_emitter *emit)
    struct src_register back[2];
    SVGA3dShaderDestToken color[2];
    int count = emit->internal_color_count;
-   int i;
+   unsigned i;
    SVGA3dShaderInstToken if_token;
 
    if (count == 0)
@@ -3372,7 +3400,7 @@ emit_light_twoside(struct svga_shader_emitter *emit)
 
    if_token = inst_token( SVGA3DOP_IFC );
 
-   if (emit->key.fkey.front_ccw)
+   if (emit->key.fs.front_ccw)
       if_token.control = SVGA3DOPCOMP_LT;
    else
       if_token.control = SVGA3DOPCOMP_GT;
@@ -3423,7 +3451,7 @@ emit_frontface(struct svga_shader_emitter *emit)
    temp = dst_register( SVGA3DREG_TEMP,
                         emit->nr_hw_temp++ );
 
-   if (emit->key.fkey.front_ccw) {
+   if (emit->key.fs.front_ccw) {
       pass = get_zero_immediate(emit);
       fail = get_one_immediate(emit);
    } else {
@@ -3494,8 +3522,8 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit)
 static boolean
 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
 {
-   unsigned adjust_mask = (emit->key.vkey.adjust_attrib_range |
-                           emit->key.vkey.adjust_attrib_w_1);
+   unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
+                           emit->key.vs.adjust_attrib_w_1);
  
    while (adjust_mask) {
       /* Adjust vertex attrib range and/or set W component = 1 */
@@ -3506,7 +3534,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
       tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
       emit->nr_hw_temp++;
 
-      if (emit->key.vkey.adjust_attrib_range & (1 << index)) {
+      if (emit->key.vs.adjust_attrib_range & (1 << index)) {
          /* The vertex input/attribute is supposed to be a signed value in
           * the range [-1,1] but we actually fetched/converted it to the
           * range [0,1].  This most likely happens when the app specifies a
@@ -3558,7 +3586,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
             return FALSE;
       }
 
-      if (emit->key.vkey.adjust_attrib_w_1 & (1 << index)) {
+      if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
          /* move 1 into W position of tmp */
          if (!submit_op1(emit,
                          inst_token(SVGA3DOP_MOV),
@@ -3588,10 +3616,10 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
    unsigned i;
 
    if (emit->unit == PIPE_SHADER_FRAGMENT) {
-      if (emit->key.fkey.light_twoside)
+      if (emit->key.fs.light_twoside)
          return TRUE;
 
-      if (emit->key.fkey.white_fragments)
+      if (emit->key.fs.white_fragments)
          return TRUE;
 
       if (emit->emit_frontface)
@@ -3606,16 +3634,16 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
          return TRUE;
 
       /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
-      for (i = 0; i < emit->key.fkey.num_textures; i++) {
-         if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
+      for (i = 0; i < emit->key.num_textures; i++) {
+         if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
             return TRUE;
       }
 
-      for (i = 0; i < emit->key.fkey.num_textures; i++) {
-         if (emit->key.fkey.tex[i].compare_mode
+      for (i = 0; i < emit->key.num_textures; i++) {
+         if (emit->key.tex[i].compare_mode
              == PIPE_TEX_COMPARE_R_TO_TEXTURE)
             return TRUE;
       }
@@ -3623,8 +3651,8 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
    else if (emit->unit == PIPE_SHADER_VERTEX) {
       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
          return TRUE;
-      if (emit->key.vkey.adjust_attrib_range ||
-          emit->key.vkey.adjust_attrib_w_1)
+      if (emit->key.vs.adjust_attrib_range ||
+          emit->key.vs.adjust_attrib_w_1)
          return TRUE;
    }
 
@@ -3670,7 +3698,7 @@ static boolean
 pre_parse_add_indirect( struct svga_shader_emitter *emit,
                         int num, int current_arl)
 {
-   int i;
+   unsigned i;
    assert(num < 0);
 
    for (i = 0; i < emit->num_arl_consts; ++i) {
@@ -3772,7 +3800,7 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
       if (!emit_ps_preamble( emit ))
          return FALSE;
 
-      if (emit->key.fkey.light_twoside) {
+      if (emit->key.fs.light_twoside) {
          if (!emit_light_twoside( emit ))
             return FALSE;
       }
@@ -3787,14 +3815,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
    }
    else {
       assert(emit->unit == PIPE_SHADER_VERTEX);
-      if (emit->key.vkey.adjust_attrib_range ||
-          emit->key.vkey.adjust_attrib_w_1) {
-         if (!emit_adjusted_vertex_attribs(emit))
+      if (emit->key.vs.adjust_attrib_range) {
+         if (!emit_adjusted_vertex_attribs(emit) ||
+             emit->key.vs.adjust_attrib_w_1) {
             return FALSE;
+         }
       }
    }
 
-
    return TRUE;
 }
 
@@ -3808,10 +3836,31 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
                               const struct tgsi_token *tokens)
 {
    struct tgsi_parse_context parse;
+   const struct tgsi_token *new_tokens = NULL;
    boolean ret = TRUE;
    boolean helpers_emitted = FALSE;
    unsigned line_nr = 0;
 
+   if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
+      unsigned unit;
+
+      new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
+                                                        TGSI_FILE_INPUT);
+
+      if (new_tokens) {
+         /* Setup texture state for stipple */
+         emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+         emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+         emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+         emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+         emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+         emit->pstipple_sampler_unit = unit;
+
+         tokens = new_tokens;
+      }
+   }
+
    tgsi_parse_init( &parse, tokens );
    emit->internal_imm_count = 0;
 
@@ -3878,5 +3927,9 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
 
 done:
    tgsi_parse_free( &parse );
+   if (new_tokens) {
+      tgsi_free_tokens(new_tokens);
+   }
+
    return ret;
 }
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
new file mode 100644
index 000000000..0c5afeb4c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -0,0 +1,6846 @@
+/**********************************************************
+ * Copyright 1998-2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file svga_tgsi_vgpu10.c
+ *
+ * TGSI -> VGPU10 shader translation.
+ *
+ * \author Mingcheng Chen
+ * \author Brian Paul
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_two_side.h"
+#include "tgsi/tgsi_aa_point.h"
+#include "tgsi/tgsi_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_debug.h"
+#include "util/u_pstipple.h"
+
+#include "svga_context.h"
+#include "svga_debug.h"
+#include "svga_link.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+
+#include "VGPU10ShaderTokens.h"
+
+
+#define INVALID_INDEX 99999
+#define MAX_INTERNAL_TEMPS 3
+#define MAX_SYSTEM_VALUES 4
+#define MAX_IMMEDIATE_COUNT \
+        (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
+#define MAX_TEMP_ARRAYS 64  /* Enough? */
+
+
+/**
+ * Clipping is complicated.  There's four different cases which we
+ * handle during VS/GS shader translation:
+ */
+enum clipping_mode
+{
+   CLIP_NONE,     /**< No clipping enabled */
+   CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
+                   * one or more user-defined clip planes are enabled.  We
+                   * generate extra code to emit clip distances.
+                   */
+   CLIP_DISTANCE, /**< The shader already declares clip distance output
+                   * registers and has code to write to them.
+                   */
+   CLIP_VERTEX    /**< The shader declares a clip vertex output register and
+                  * has code that writes to the register.  We convert the
+                  * clipvertex position into one or more clip distances.
+                  */
+};
+
+
+struct svga_shader_emitter_v10
+{
+   /* The token output buffer */
+   unsigned size;
+   char *buf;
+   char *ptr;
+
+   /* Information about the shader and state (does not change) */
+   struct svga_compile_key key;
+   struct tgsi_shader_info info;
+   unsigned unit;
+
+   unsigned inst_start_token;
+   boolean discard_instruction; /**< throw away current instruction? */
+
+   union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
+   unsigned num_immediates;      /**< Number of immediates emitted */
+   unsigned common_immediate_pos[8];  /**< literals for common immediates */
+   unsigned num_common_immediates;
+   boolean immediates_emitted;
+
+   unsigned num_outputs;      /**< include any extra outputs */
+                              /**  The first extra output is reserved for
+                               *   non-adjusted vertex position for
+                               *   stream output purpose
+                               */
+
+   /* Temporary Registers */
+   unsigned num_shader_temps; /**< num of temps used by original shader */
+   unsigned internal_temp_count;  /**< currently allocated internal temps */
+   struct {
+      unsigned start, size;
+   } temp_arrays[MAX_TEMP_ARRAYS];
+   unsigned num_temp_arrays;
+
+   /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
+   struct {
+      unsigned arrayId, index;
+   } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
+
+   /** Number of constants used by original shader for each constant buffer.
+    * The size should probably always match with that of svga_state.constbufs.
+    */
+   unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
+
+   /* Samplers */
+   unsigned num_samplers;
+
+   /* Address regs (really implemented with temps) */
+   unsigned num_address_regs;
+   unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
+
+   /* Output register usage masks */
+   ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
+
+   /* To map TGSI system value index to VGPU shader input indexes */
+   ubyte system_value_indexes[MAX_SYSTEM_VALUES];
+
+   struct {
+      /* vertex position scale/translation */
+      unsigned out_index;  /**< the real position output reg */
+      unsigned tmp_index;  /**< the fake/temp position output reg */
+      unsigned so_index;   /**< the non-adjusted position output reg */
+      unsigned prescale_scale_index, prescale_trans_index;
+      boolean  need_prescale;
+   } vposition;
+
+   /* For vertex shaders only */
+   struct {
+      /* viewport constant */
+      unsigned viewport_index;
+
+      /* temp index of adjusted vertex attributes */
+      unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
+   } vs;
+
+   /* For fragment shaders only */
+   struct {
+      /* apha test */
+      unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
+      unsigned color_tmp_index;  /**< fake/temp color output reg */
+      unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
+
+      /* front-face */
+      unsigned face_input_index; /**< real fragment shader face reg (bool) */
+      unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
+
+      unsigned pstipple_sampler_unit;
+
+      unsigned fragcoord_input_index;  /**< real fragment position input reg */
+      unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
+   } fs;
+
+   /* For geometry shaders only */
+   struct {
+      VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
+      VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
+      unsigned input_size;       /**< size of input arrays */
+      unsigned prim_id_index;    /**< primitive id register index */
+      unsigned max_out_vertices; /**< maximum number of output vertices */
+   } gs;
+
+   /* For vertex or geometry shaders */
+   enum clipping_mode clip_mode;
+   unsigned clip_dist_out_index; /**< clip distance output register index */
+   unsigned clip_dist_tmp_index; /**< clip distance temporary register */
+   unsigned clip_dist_so_index;  /**< clip distance shadow copy */
+
+   /** Index of temporary holding the clipvertex coordinate */
+   unsigned clip_vertex_out_index; /**< clip vertex output register index */
+   unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
+
+   /* user clip plane constant slot indexes */
+   unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
+
+   unsigned num_output_writes;
+   boolean constant_color_output;
+
+   boolean uses_flat_interp;
+
+   /* For all shaders: const reg index for RECT coord scaling */
+   unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
+
+   /* For all shaders: const reg index for texture buffer size */
+   unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
+
+   /* VS/GS/FS Linkage info */
+   struct shader_linkage linkage;
+
+   bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
+};
+
+
+static boolean
+emit_post_helpers(struct svga_shader_emitter_v10 *emit);
+
+static boolean
+emit_vertex(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst);
+
+static char err_buf[128];
+
+static boolean
+expand(struct svga_shader_emitter_v10 *emit)
+{
+   char *new_buf;
+   unsigned newsize = emit->size * 2;
+
+   if (emit->buf != err_buf)
+      new_buf = REALLOC(emit->buf, emit->size, newsize);
+   else
+      new_buf = NULL;
+
+   if (!new_buf) {
+      emit->ptr = err_buf;
+      emit->buf = err_buf;
+      emit->size = sizeof(err_buf);
+      return FALSE;
+   }
+
+   emit->size = newsize;
+   emit->ptr = new_buf + (emit->ptr - emit->buf);
+   emit->buf = new_buf;
+   return TRUE;
+}
+
+/**
+ * Create and initialize a new svga_shader_emitter_v10 object.
+ */
+static struct svga_shader_emitter_v10 *
+alloc_emitter(void)
+{
+   struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
+
+   if (!emit)
+      return NULL;
+
+   /* to initialize the output buffer */
+   emit->size = 512;
+   if (!expand(emit)) {
+      FREE(emit);
+      return NULL;
+   }
+   return emit;
+}
+
+/**
+ * Free an svga_shader_emitter_v10 object.
+ */
+static void
+free_emitter(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit);
+   FREE(emit->buf);    /* will be NULL if translation succeeded */
+   FREE(emit);
+}
+
+static inline boolean
+reserve(struct svga_shader_emitter_v10 *emit,
+        unsigned nr_dwords)
+{
+   while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
+      if (!expand(emit))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean
+emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
+{
+   if (!reserve(emit, 1))
+      return FALSE;
+
+   *(uint32 *)emit->ptr = dword;
+   emit->ptr += sizeof dword;
+   return TRUE;
+}
+
+static boolean
+emit_dwords(struct svga_shader_emitter_v10 *emit,
+            const uint32 *dwords,
+            unsigned nr)
+{
+   if (!reserve(emit, nr))
+      return FALSE;
+
+   memcpy(emit->ptr, dwords, nr * sizeof *dwords);
+   emit->ptr += nr * sizeof *dwords;
+   return TRUE;
+}
+
+/** Return the number of tokens in the emitter's buffer */
+static unsigned
+emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
+{
+   return (emit->ptr - emit->buf) / sizeof(unsigned);
+}
+
+
+/**
+ * Check for register overflow.  If we overflow we'll set an
+ * error flag.  This function can be called for register declarations
+ * or use as src/dst instruction operands.
+ * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
+                or VGPU10_OPCODE_DCL_x
+ * \param index  the register index
+ */
+static void
+check_register_index(struct svga_shader_emitter_v10 *emit,
+                     unsigned operandType, unsigned index)
+{
+   bool overflow_before = emit->register_overflow;
+
+   switch (operandType) {
+   case VGPU10_OPERAND_TYPE_TEMP:
+   case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
+   case VGPU10_OPCODE_DCL_TEMPS:
+      if (index >= VGPU10_MAX_TEMPS) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
+   case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
+      if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_INPUT:
+   case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
+   case VGPU10_OPCODE_DCL_INPUT:
+   case VGPU10_OPCODE_DCL_INPUT_SGV:
+   case VGPU10_OPCODE_DCL_INPUT_SIV:
+   case VGPU10_OPCODE_DCL_INPUT_PS:
+   case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
+   case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
+      if ((emit->unit == PIPE_SHADER_VERTEX &&
+           index >= VGPU10_MAX_VS_INPUTS) ||
+          (emit->unit == PIPE_SHADER_GEOMETRY &&
+           index >= VGPU10_MAX_GS_INPUTS) ||
+          (emit->unit == PIPE_SHADER_FRAGMENT &&
+           index >= VGPU10_MAX_FS_INPUTS)) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_OUTPUT:
+   case VGPU10_OPCODE_DCL_OUTPUT:
+   case VGPU10_OPCODE_DCL_OUTPUT_SGV:
+   case VGPU10_OPCODE_DCL_OUTPUT_SIV:
+      if ((emit->unit == PIPE_SHADER_VERTEX &&
+           index >= VGPU10_MAX_VS_OUTPUTS) ||
+          (emit->unit == PIPE_SHADER_GEOMETRY &&
+           index >= VGPU10_MAX_GS_OUTPUTS) ||
+          (emit->unit == PIPE_SHADER_FRAGMENT &&
+           index >= VGPU10_MAX_FS_OUTPUTS)) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_SAMPLER:
+   case VGPU10_OPCODE_DCL_SAMPLER:
+      if (index >= VGPU10_MAX_SAMPLERS) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_RESOURCE:
+   case VGPU10_OPCODE_DCL_RESOURCE:
+      if (index >= VGPU10_MAX_RESOURCES) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
+      if (index >= MAX_IMMEDIATE_COUNT) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   default:
+      assert(0);
+      ; /* nothing */
+   }
+
+   if (emit->register_overflow && !overflow_before) {
+      debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
+                   operandType, index);
+   }
+}
+
+
+/**
+ * Examine misc state to determine the clipping mode.
+ */
+static void
+determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->info.num_written_clipdistance > 0) {
+      emit->clip_mode = CLIP_DISTANCE;
+   }
+   else if (emit->info.writes_clipvertex) {
+      emit->clip_mode = CLIP_VERTEX;
+   }
+   else if (emit->key.clip_plane_enable) {
+      emit->clip_mode = CLIP_LEGACY;
+   }
+   else {
+      emit->clip_mode = CLIP_NONE;
+   }
+}
+
+
+/**
+ * For clip distance register declarations and clip distance register
+ * writes we need to mask the declaration usage or instruction writemask
+ * (respectively) against the set of the really-enabled clipping planes.
+ *
+ * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
+ * has a VS that writes to all 8 clip distance registers, but the plane enable
+ * flags are a subset of that.
+ *
+ * This function is used to apply the plane enable flags to the register
+ * declaration or instruction writemask.
+ *
+ * \param writemask  the declaration usage mask or instruction writemask
+ * \param clip_reg_index  which clip plane register is being declared/written.
+ *                        The legal values are 0 and 1 (two clip planes per
+ *                        register, for a total of 8 clip planes)
+ */
+static unsigned
+apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
+                      unsigned writemask, unsigned clip_reg_index)
+{
+   unsigned shift;
+
+   assert(clip_reg_index < 2);
+
+   /* four clip planes per clip register: */
+   shift = clip_reg_index * 4;
+   writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
+
+   return writemask;
+}
+
+
+/**
+ * Translate gallium shader type into VGPU10 type.
+ */
+static VGPU10_PROGRAM_TYPE
+translate_shader_type(unsigned type)
+{
+   switch (type) {
+   case PIPE_SHADER_VERTEX:
+      return VGPU10_VERTEX_SHADER;
+   case PIPE_SHADER_GEOMETRY:
+      return VGPU10_GEOMETRY_SHADER;
+   case PIPE_SHADER_FRAGMENT:
+      return VGPU10_PIXEL_SHADER;
+   default:
+      assert(!"Unexpected shader type");
+      return VGPU10_VERTEX_SHADER;
+   }
+}
+
+
+/**
+ * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
+ * Note: we only need to translate the opcodes for "simple" instructions,
+ * as seen below.  All other opcodes are handled/translated specially.
+ */
+static VGPU10_OPCODE_TYPE
+translate_opcode(unsigned opcode)
+{
+   switch (opcode) {
+   case TGSI_OPCODE_MOV:
+      return VGPU10_OPCODE_MOV;
+   case TGSI_OPCODE_MUL:
+      return VGPU10_OPCODE_MUL;
+   case TGSI_OPCODE_ADD:
+      return VGPU10_OPCODE_ADD;
+   case TGSI_OPCODE_DP3:
+      return VGPU10_OPCODE_DP3;
+   case TGSI_OPCODE_DP4:
+      return VGPU10_OPCODE_DP4;
+   case TGSI_OPCODE_MIN:
+      return VGPU10_OPCODE_MIN;
+   case TGSI_OPCODE_MAX:
+      return VGPU10_OPCODE_MAX;
+   case TGSI_OPCODE_MAD:
+      return VGPU10_OPCODE_MAD;
+   case TGSI_OPCODE_SQRT:
+      return VGPU10_OPCODE_SQRT;
+   case TGSI_OPCODE_FRC:
+      return VGPU10_OPCODE_FRC;
+   case TGSI_OPCODE_FLR:
+      return VGPU10_OPCODE_ROUND_NI;
+   case TGSI_OPCODE_FSEQ:
+      return VGPU10_OPCODE_EQ;
+   case TGSI_OPCODE_FSGE:
+      return VGPU10_OPCODE_GE;
+   case TGSI_OPCODE_FSNE:
+      return VGPU10_OPCODE_NE;
+   case TGSI_OPCODE_DDX:
+      return VGPU10_OPCODE_DERIV_RTX;
+   case TGSI_OPCODE_DDY:
+      return VGPU10_OPCODE_DERIV_RTY;
+   case TGSI_OPCODE_RET:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_DIV:
+      return VGPU10_OPCODE_DIV;
+   case TGSI_OPCODE_IDIV:
+      return VGPU10_OPCODE_IDIV;
+   case TGSI_OPCODE_DP2:
+      return VGPU10_OPCODE_DP2;
+   case TGSI_OPCODE_BRK:
+      return VGPU10_OPCODE_BREAK;
+   case TGSI_OPCODE_IF:
+      return VGPU10_OPCODE_IF;
+   case TGSI_OPCODE_ELSE:
+      return VGPU10_OPCODE_ELSE;
+   case TGSI_OPCODE_ENDIF:
+      return VGPU10_OPCODE_ENDIF;
+   case TGSI_OPCODE_CEIL:
+      return VGPU10_OPCODE_ROUND_PI;
+   case TGSI_OPCODE_I2F:
+      return VGPU10_OPCODE_ITOF;
+   case TGSI_OPCODE_NOT:
+      return VGPU10_OPCODE_NOT;
+   case TGSI_OPCODE_TRUNC:
+      return VGPU10_OPCODE_ROUND_Z;
+   case TGSI_OPCODE_SHL:
+      return VGPU10_OPCODE_ISHL;
+   case TGSI_OPCODE_AND:
+      return VGPU10_OPCODE_AND;
+   case TGSI_OPCODE_OR:
+      return VGPU10_OPCODE_OR;
+   case TGSI_OPCODE_XOR:
+      return VGPU10_OPCODE_XOR;
+   case TGSI_OPCODE_CONT:
+      return VGPU10_OPCODE_CONTINUE;
+   case TGSI_OPCODE_EMIT:
+      return VGPU10_OPCODE_EMIT;
+   case TGSI_OPCODE_ENDPRIM:
+      return VGPU10_OPCODE_CUT;
+   case TGSI_OPCODE_BGNLOOP:
+      return VGPU10_OPCODE_LOOP;
+   case TGSI_OPCODE_ENDLOOP:
+      return VGPU10_OPCODE_ENDLOOP;
+   case TGSI_OPCODE_ENDSUB:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_NOP:
+      return VGPU10_OPCODE_NOP;
+   case TGSI_OPCODE_BREAKC:
+      return VGPU10_OPCODE_BREAKC;
+   case TGSI_OPCODE_END:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_F2I:
+      return VGPU10_OPCODE_FTOI;
+   case TGSI_OPCODE_IMAX:
+      return VGPU10_OPCODE_IMAX;
+   case TGSI_OPCODE_IMIN:
+      return VGPU10_OPCODE_IMIN;
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_MOD:
+      return VGPU10_OPCODE_UDIV;
+   case TGSI_OPCODE_IMUL_HI:
+      return VGPU10_OPCODE_IMUL;
+   case TGSI_OPCODE_INEG:
+      return VGPU10_OPCODE_INEG;
+   case TGSI_OPCODE_ISHR:
+      return VGPU10_OPCODE_ISHR;
+   case TGSI_OPCODE_ISGE:
+      return VGPU10_OPCODE_IGE;
+   case TGSI_OPCODE_ISLT:
+      return VGPU10_OPCODE_ILT;
+   case TGSI_OPCODE_F2U:
+      return VGPU10_OPCODE_FTOU;
+   case TGSI_OPCODE_UADD:
+      return VGPU10_OPCODE_IADD;
+   case TGSI_OPCODE_U2F:
+      return VGPU10_OPCODE_UTOF;
+   case TGSI_OPCODE_UCMP:
+      return VGPU10_OPCODE_MOVC;
+   case TGSI_OPCODE_UMAD:
+      return VGPU10_OPCODE_UMAD;
+   case TGSI_OPCODE_UMAX:
+      return VGPU10_OPCODE_UMAX;
+   case TGSI_OPCODE_UMIN:
+      return VGPU10_OPCODE_UMIN;
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_UMUL_HI:
+      return VGPU10_OPCODE_UMUL;
+   case TGSI_OPCODE_USEQ:
+      return VGPU10_OPCODE_IEQ;
+   case TGSI_OPCODE_USGE:
+      return VGPU10_OPCODE_UGE;
+   case TGSI_OPCODE_USHR:
+      return VGPU10_OPCODE_USHR;
+   case TGSI_OPCODE_USLT:
+      return VGPU10_OPCODE_ULT;
+   case TGSI_OPCODE_USNE:
+      return VGPU10_OPCODE_INE;
+   case TGSI_OPCODE_SWITCH:
+      return VGPU10_OPCODE_SWITCH;
+   case TGSI_OPCODE_CASE:
+      return VGPU10_OPCODE_CASE;
+   case TGSI_OPCODE_DEFAULT:
+      return VGPU10_OPCODE_DEFAULT;
+   case TGSI_OPCODE_ENDSWITCH:
+      return VGPU10_OPCODE_ENDSWITCH;
+   case TGSI_OPCODE_FSLT:
+      return VGPU10_OPCODE_LT;
+   case TGSI_OPCODE_ROUND:
+      return VGPU10_OPCODE_ROUND_NE;
+   default:
+      assert(!"Unexpected TGSI opcode in translate_opcode()");
+      return VGPU10_OPCODE_NOP;
+   }
+}
+
+
+/**
+ * Translate a TGSI register file type into a VGPU10 operand type.
+ * \param array  is the TGSI_FILE_TEMPORARY register an array?
+ */
+static VGPU10_OPERAND_TYPE
+translate_register_file(enum tgsi_file_type file, boolean array)
+{
+   switch (file) {
+   case TGSI_FILE_CONSTANT:
+      return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
+   case TGSI_FILE_INPUT:
+      return VGPU10_OPERAND_TYPE_INPUT;
+   case TGSI_FILE_OUTPUT:
+      return VGPU10_OPERAND_TYPE_OUTPUT;
+   case TGSI_FILE_TEMPORARY:
+      return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
+                   : VGPU10_OPERAND_TYPE_TEMP;
+   case TGSI_FILE_IMMEDIATE:
+      /* all immediates are 32-bit values at this time so
+       * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
+       */
+      return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
+   case TGSI_FILE_SAMPLER:
+      return VGPU10_OPERAND_TYPE_SAMPLER;
+   case TGSI_FILE_SYSTEM_VALUE:
+      return VGPU10_OPERAND_TYPE_INPUT;
+
+   /* XXX TODO more cases to finish */
+
+   default:
+      assert(!"Bad tgsi register file!");
+      return VGPU10_OPERAND_TYPE_NULL;
+   }
+}
+
+
+/**
+ * Emit a null dst register
+ */
+static void
+emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OperandToken0 operand;
+
+   operand.value = 0;
+   operand.operandType = VGPU10_OPERAND_TYPE_NULL;
+   operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
+
+   emit_dword(emit, operand.value);
+}
+
+
+/**
+ * If the given register is a temporary, return the array ID.
+ * Else return zero.
+ */
+static unsigned
+get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
+                  unsigned file, unsigned index)
+{
+   if (file == TGSI_FILE_TEMPORARY) {
+      return emit->temp_map[index].arrayId;
+   }
+   else {
+      return 0;
+   }
+}
+
+
+/**
+ * If the given register is a temporary, convert the index from a TGSI
+ * TEMPORARY index to a VGPU10 temp index.
+ */
+static unsigned
+remap_temp_index(const struct svga_shader_emitter_v10 *emit,
+                 unsigned file, unsigned index)
+{
+   if (file == TGSI_FILE_TEMPORARY) {
+      return emit->temp_map[index].index;
+   }
+   else {
+      return index;
+   }
+}
+
+
+/**
+ * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
+ * Note: the operandType field must already be initialized.
+ */
+static VGPU10OperandToken0
+setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
+                        VGPU10OperandToken0 operand0,
+                        unsigned file,
+                        boolean indirect, boolean index2D,
+                        unsigned tempArrayID)
+{
+   unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D;
+
+   /*
+    * Compute index dimensions
+    */
+   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
+       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+      /* there's no swizzle for in-line immediates */
+      indexDim = VGPU10_OPERAND_INDEX_0D;
+      assert(operand0.selectionMode == 0);
+   }
+   else {
+      if (index2D ||
+          tempArrayID > 0 ||
+          operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
+         indexDim = VGPU10_OPERAND_INDEX_2D;
+      }
+      else {
+         indexDim = VGPU10_OPERAND_INDEX_1D;
+      }
+   }
+
+   /*
+    * Compute index representations (immediate, relative, etc).
+    */
+   if (tempArrayID > 0) {
+      assert(file == TGSI_FILE_TEMPORARY);
+      /* First index is the array ID, second index is the array element */
+      index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      if (indirect) {
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+      else {
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      }
+   }
+   else if (indirect) {
+      if (file == TGSI_FILE_CONSTANT) {
+         /* index[0] indicates which constant buffer while index[1] indicates
+          * the position in the constant buffer.
+          */
+         index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+      else {
+         /* All other register files are 1-dimensional */
+         index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+   }
+   else {
+      index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   }
+
+   operand0.indexDimension = indexDim;
+   operand0.index0Representation = index0Rep;
+   operand0.index1Representation = index1Rep;
+
+   return operand0;
+}
+
+
+/**
+ * Emit the operand for expressing an address register for indirect indexing.
+ * Note that the address register is really just a temp register.
+ * \param addr_reg_index  which address register to use
+ */
+static void
+emit_indirect_register(struct svga_shader_emitter_v10 *emit,
+                       unsigned addr_reg_index)
+{
+   unsigned tmp_reg_index;
+   VGPU10OperandToken0 operand0;
+
+   assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
+
+   tmp_reg_index = emit->address_reg_index[addr_reg_index];
+
+   /* operand0 is a simple temporary register, selecting one component */
+   operand0.value = 0;
+   operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+   operand0.swizzleX = 0;
+   operand0.swizzleY = 1;
+   operand0.swizzleZ = 2;
+   operand0.swizzleW = 3;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
+}
+
+
+/**
+ * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
+ * \param emit  the emitter context
+ * \param reg  the TGSI dst register to translate
+ */
+static void
+emit_dst_register(struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_dst_register *reg)
+{
+   unsigned file = reg->Register.File;
+   unsigned index = reg->Register.Index;
+   const unsigned sem_name = emit->info.output_semantic_name[index];
+   const unsigned sem_index = emit->info.output_semantic_index[index];
+   unsigned writemask = reg->Register.WriteMask;
+   const unsigned indirect = reg->Register.Indirect;
+   const unsigned tempArrayId = get_temp_array_id(emit, file, index);
+   const unsigned index2d = reg->Register.Dimension;
+   VGPU10OperandToken0 operand0;
+
+   if (file == TGSI_FILE_OUTPUT) {
+      if (emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY) {
+         if (index == emit->vposition.out_index &&
+             emit->vposition.tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
+             * vertex position result in a temporary so that we can modify
+             * it in the post_helper() code.
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->vposition.tmp_index;
+         }
+         else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
+                  emit->clip_dist_tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
+             * We store the clip distance in a temporary first, then
+             * we'll copy it to the shadow copy and to CLIPDIST with the
+             * enabled planes mask in emit_clip_distance_instructions().
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->clip_dist_tmp_index + sem_index;
+         }
+         else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
+                  emit->clip_vertex_tmp_index != INVALID_INDEX) {
+            /* replace the CLIPVERTEX output register with a temporary */
+            assert(emit->clip_mode == CLIP_VERTEX);
+            assert(sem_index == 0);
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->clip_vertex_tmp_index;
+         }
+      }
+      else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+         if (sem_name == TGSI_SEMANTIC_POSITION) {
+            /* Fragment depth output register */
+            operand0.value = 0;
+            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
+            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+            emit_dword(emit, operand0.value);
+            return;
+         }
+         else if (index == emit->fs.color_out_index[0] &&
+             emit->fs.color_tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
+             * fragment color result in a temporary so that we can read it
+             * it in the post_helper() code.
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->fs.color_tmp_index;
+         }
+         else {
+            /* Typically, for fragment shaders, the output register index
+             * matches the color semantic index.  But not when we write to
+             * the fragment depth register.  In that case, OUT[0] will be
+             * fragdepth and OUT[1] will be the 0th color output.  We need
+             * to use the semantic index for color outputs.
+             */
+            assert(sem_name == TGSI_SEMANTIC_COLOR);
+            index = emit->info.output_semantic_index[index];
+
+            emit->num_output_writes++;
+         }
+      }
+   }
+
+   /* init operand tokens to all zero */
+   operand0.value = 0;
+
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+   /* the operand has a writemask */
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+
+   /* Which of the four dest components to write to. Note that we can use a
+    * simple assignment here since TGSI writemasks match VGPU10 writemasks.
+    */
+   STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
+   operand0.mask = writemask;
+
+   /* translate TGSI register file type to VGPU10 operand type */
+   operand0.operandType = translate_register_file(file, tempArrayId > 0);
+
+   check_register_index(emit, operand0.operandType, index);
+
+   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
+                                      index2d, tempArrayId);
+
+   /* Emit tokens */
+   emit_dword(emit, operand0.value);
+   if (tempArrayId > 0) {
+      emit_dword(emit, tempArrayId);
+   }
+
+   emit_dword(emit, remap_temp_index(emit, file, index));
+
+   if (indirect) {
+      emit_indirect_register(emit, reg->Indirect.Index);
+   }
+}
+
+
+/**
+ * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
+ */
+static void
+emit_src_register(struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_src_register *reg)
+{
+   unsigned file = reg->Register.File;
+   unsigned index = reg->Register.Index;
+   const unsigned indirect = reg->Register.Indirect;
+   const unsigned tempArrayId = get_temp_array_id(emit, file, index);
+   const unsigned index2d = reg->Register.Dimension;
+   const unsigned swizzleX = reg->Register.SwizzleX;
+   const unsigned swizzleY = reg->Register.SwizzleY;
+   const unsigned swizzleZ = reg->Register.SwizzleZ;
+   const unsigned swizzleW = reg->Register.SwizzleW;
+   const unsigned absolute = reg->Register.Absolute;
+   const unsigned negate = reg->Register.Negate;
+   bool is_prim_id = FALSE;
+
+   VGPU10OperandToken0 operand0;
+   VGPU10OperandToken1 operand1;
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+      file == TGSI_FILE_INPUT) {
+      if (index == emit->fs.face_input_index) {
+         /* Replace INPUT[FACE] with TEMP[FACE] */
+         file = TGSI_FILE_TEMPORARY;
+         index = emit->fs.face_tmp_index;
+      }
+      else if (index == emit->fs.fragcoord_input_index) {
+         /* Replace INPUT[POSITION] with TEMP[POSITION] */
+         file = TGSI_FILE_TEMPORARY;
+         index = emit->fs.fragcoord_tmp_index;
+      }
+      else {
+         /* We remap fragment shader inputs to that FS input indexes
+          * match up with VS/GS output indexes.
+          */
+         index = emit->linkage.input_map[index];
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_GEOMETRY &&
+            file == TGSI_FILE_INPUT) {
+      is_prim_id = (index == emit->gs.prim_id_index);
+      index = emit->linkage.input_map[index];
+   }
+   else if (emit->unit == PIPE_SHADER_VERTEX) {
+      if (file == TGSI_FILE_INPUT) {
+         /* if input is adjusted... */
+         if ((emit->key.vs.adjust_attrib_w_1 |
+              emit->key.vs.adjust_attrib_itof |
+              emit->key.vs.adjust_attrib_utof |
+              emit->key.vs.attrib_is_bgra |
+              emit->key.vs.attrib_puint_to_snorm |
+              emit->key.vs.attrib_puint_to_uscaled |
+              emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->vs.adjusted_input[index];
+         }
+      }
+      else if (file == TGSI_FILE_SYSTEM_VALUE) {
+         assert(index < Elements(emit->system_value_indexes));
+         index = emit->system_value_indexes[index];
+      }
+   }
+
+   operand0.value = operand1.value = 0;
+
+   if (is_prim_id) {
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+   }
+   else {
+      operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+      operand0.operandType = translate_register_file(file, tempArrayId > 0);
+   }
+
+   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
+                                      index2d, tempArrayId);
+
+   if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
+       operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+      /* there's no swizzle for in-line immediates */
+      if (swizzleX == swizzleY &&
+          swizzleX == swizzleZ &&
+          swizzleX == swizzleW) {
+         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+      }
+      else {
+         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+      }
+
+      operand0.swizzleX = swizzleX;
+      operand0.swizzleY = swizzleY;
+      operand0.swizzleZ = swizzleZ;
+      operand0.swizzleW = swizzleW;
+
+      if (absolute || negate) {
+         operand0.extended = 1;
+         operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
+         if (absolute && !negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
+         if (!absolute && negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
+         if (absolute && negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
+      }
+   }
+
+   /* Emit the operand tokens */
+   emit_dword(emit, operand0.value);
+   if (operand0.extended)
+      emit_dword(emit, operand1.value);
+
+   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
+      /* Emit the four float/int in-line immediate values */
+      unsigned *c;
+      assert(index < Elements(emit->immediates));
+      assert(file == TGSI_FILE_IMMEDIATE);
+      assert(swizzleX < 4);
+      assert(swizzleY < 4);
+      assert(swizzleZ < 4);
+      assert(swizzleW < 4);
+      c = (unsigned *) emit->immediates[index];
+      emit_dword(emit, c[swizzleX]);
+      emit_dword(emit, c[swizzleY]);
+      emit_dword(emit, c[swizzleZ]);
+      emit_dword(emit, c[swizzleW]);
+   }
+   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
+      /* Emit the register index(es) */
+      if (index2d ||
+          operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
+         emit_dword(emit, reg->Dimension.Index);
+      }
+
+      if (tempArrayId > 0) {
+         emit_dword(emit, tempArrayId);
+      }
+
+      emit_dword(emit, remap_temp_index(emit, file, index));
+
+      if (indirect) {
+         emit_indirect_register(emit, reg->Indirect.Index);
+      }
+   }
+}
+
+
+/**
+ * Emit a resource operand (for use with a SAMPLE instruction).
+ */
+static void
+emit_resource_register(struct svga_shader_emitter_v10 *emit,
+                       unsigned resource_number)
+{
+   VGPU10OperandToken0 operand0;
+
+   check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+   operand0.swizzleX = VGPU10_COMPONENT_X;
+   operand0.swizzleY = VGPU10_COMPONENT_Y;
+   operand0.swizzleZ = VGPU10_COMPONENT_Z;
+   operand0.swizzleW = VGPU10_COMPONENT_W;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, resource_number);
+}
+
+
+/**
+ * Emit a sampler operand (for use with a SAMPLE instruction).
+ */
+static void
+emit_sampler_register(struct svga_shader_emitter_v10 *emit,
+                      unsigned sampler_number)
+{
+   VGPU10OperandToken0 operand0;
+
+   check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, sampler_number);
+}
+
+
+/**
+ * Emit an operand which reads the IS_FRONT_FACING register.
+ */
+static void
+emit_face_register(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OperandToken0 operand0;
+   unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+   operand0.swizzleX = VGPU10_COMPONENT_X;
+   operand0.swizzleY = VGPU10_COMPONENT_X;
+   operand0.swizzleZ = VGPU10_COMPONENT_X;
+   operand0.swizzleW = VGPU10_COMPONENT_X;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, index);
+}
+
+
+/**
+ * Emit the token for a VGPU10 opcode.
+ * \param saturate   clamp result to [0,1]?
+ */
+static void
+emit_opcode(struct svga_shader_emitter_v10 *emit,
+            unsigned vgpu10_opcode, boolean saturate)
+{
+   VGPU10OpcodeToken0 token0;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = vgpu10_opcode;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.saturate = saturate;
+
+   emit_dword(emit, token0.value);
+}
+
+
+/**
+ * Emit the token for a VGPU10 resinfo instruction.
+ * \param modifier   return type modifier, _uint or _rcpFloat.
+ *                   TODO: We may want to remove this parameter if it will
+ *                   only ever be used as _uint.
+ */
+static void
+emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
+                    VGPU10_RESINFO_RETURN_TYPE modifier)
+{
+   VGPU10OpcodeToken0 token0;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = VGPU10_OPCODE_RESINFO;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.resinfoReturnType = modifier;
+
+   emit_dword(emit, token0.value);
+}
+
+
+/**
+ * Emit opcode tokens for a texture sample instruction.  Texture instructions
+ * can be rather complicated (texel offsets, etc) so we have this specialized
+ * function.
+ */
+static void
+emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
+                   unsigned vgpu10_opcode, boolean saturate,
+                   const int offsets[3])
+{
+   VGPU10OpcodeToken0 token0;
+   VGPU10OpcodeToken1 token1;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = vgpu10_opcode;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.saturate = saturate;
+
+   if (offsets[0] || offsets[1] || offsets[2]) {
+      assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+      assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+      assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+
+      token0.extended = 1;
+      token1.value = 0;
+      token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
+      token1.offsetU = offsets[0];
+      token1.offsetV = offsets[1];
+      token1.offsetW = offsets[2];
+   }
+
+   emit_dword(emit, token0.value);
+   if (token0.extended) {
+      emit_dword(emit, token1.value);
+   }
+}
+
+
+/**
+ * Emit a DISCARD opcode token.
+ * If nonzero is set, we'll discard the fragment if the X component is not 0.
+ * Otherwise, we'll discard the fragment if the X component is 0.
+ */
+static void
+emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
+   if (nonzero)
+      opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
+
+   emit_dword(emit, opcode0.value);
+}
+
+
+/**
+ * We need to call this before we begin emitting a VGPU10 instruction.
+ */
+static void
+begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->inst_start_token == 0);
+   /* Save location of the instruction's VGPU10OpcodeToken0 token.
+    * Note, we can't save a pointer because it would become invalid if
+    * we have to realloc the output buffer.
+    */
+   emit->inst_start_token = emit_get_num_tokens(emit);
+}
+
+
+/**
+ * We need to call this after we emit the last token of a VGPU10 instruction.
+ * This function patches in the opcode token's instructionLength field.
+ */
+static void
+end_emit_instruction(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
+   unsigned inst_length;
+
+   assert(emit->inst_start_token > 0);
+
+   if (emit->discard_instruction) {
+      /* Back up the emit->ptr to where this instruction started so
+       * that we discard the current instruction.
+       */
+      emit->ptr = (char *) (tokens + emit->inst_start_token);
+   }
+   else {
+      /* Compute instruction length and patch that into the start of
+       * the instruction.
+       */
+      inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
+
+      assert(inst_length > 0);
+
+      tokens[emit->inst_start_token].instructionLength = inst_length;
+   }
+
+   emit->inst_start_token = 0; /* reset to zero for error checking */
+   emit->discard_instruction = FALSE;
+}
+
+
+/**
+ * Return index for a free temporary register.
+ */
+static unsigned
+get_temp_index(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
+   return emit->num_shader_temps + emit->internal_temp_count++;
+}
+
+
+/**
+ * Release the temporaries which were generated by get_temp_index().
+ */
+static void
+free_temp_indexes(struct svga_shader_emitter_v10 *emit)
+{
+   emit->internal_temp_count = 0;
+}
+
+
+/**
+ * Create a tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+make_src_reg(unsigned file, unsigned index)
+{
+   struct tgsi_full_src_register reg;
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = file;
+   reg.Register.Index = index;
+   reg.Register.SwizzleX = TGSI_SWIZZLE_X;
+   reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
+   reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+   reg.Register.SwizzleW = TGSI_SWIZZLE_W;
+   return reg;
+}
+
+
+/**
+ * Create a tgsi_full_src_register for a temporary.
+ */
+static struct tgsi_full_src_register
+make_src_temp_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_TEMPORARY, index);
+}
+
+
+/**
+ * Create a tgsi_full_src_register for a constant.
+ */
+static struct tgsi_full_src_register
+make_src_const_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_CONSTANT, index);
+}
+
+
+/**
+ * Create a tgsi_full_src_register for an immediate constant.
+ */
+static struct tgsi_full_src_register
+make_src_immediate_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_IMMEDIATE, index);
+}
+
+
+/**
+ * Create a tgsi_full_dst_register.
+ */
+static struct tgsi_full_dst_register
+make_dst_reg(unsigned file, unsigned index)
+{
+   struct tgsi_full_dst_register reg;
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = file;
+   reg.Register.Index = index;
+   reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+   return reg;
+}
+
+
+/**
+ * Create a tgsi_full_dst_register for a temporary.
+ */
+static struct tgsi_full_dst_register
+make_dst_temp_reg(unsigned index)
+{
+   return make_dst_reg(TGSI_FILE_TEMPORARY, index);
+}
+
+
+/**
+ * Create a tgsi_full_dst_register for an output.
+ */
+static struct tgsi_full_dst_register
+make_dst_output_reg(unsigned index)
+{
+   return make_dst_reg(TGSI_FILE_OUTPUT, index);
+}
+
+
+/**
+ * Create negated tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+negate_src(const struct tgsi_full_src_register *reg)
+{
+   struct tgsi_full_src_register neg = *reg;
+   neg.Register.Negate = !reg->Register.Negate;
+   return neg;
+}
+
+/**
+ * Create absolute value of a tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+absolute_src(const struct tgsi_full_src_register *reg)
+{
+   struct tgsi_full_src_register absolute = *reg;
+   absolute.Register.Absolute = 1;
+   return absolute;
+}
+
+
+/** Return the named swizzle term from the src register */
+static inline unsigned
+get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
+{
+   switch (term) {
+   case TGSI_SWIZZLE_X:
+      return reg->Register.SwizzleX;
+   case TGSI_SWIZZLE_Y:
+      return reg->Register.SwizzleY;
+   case TGSI_SWIZZLE_Z:
+      return reg->Register.SwizzleZ;
+   case TGSI_SWIZZLE_W:
+      return reg->Register.SwizzleW;
+   default:
+      assert(!"Bad swizzle");
+      return TGSI_SWIZZLE_X;
+   }
+}
+
+
+/**
+ * Create swizzled tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+swizzle_src(const struct tgsi_full_src_register *reg,
+            unsigned swizzleX, unsigned swizzleY,
+            unsigned swizzleZ, unsigned swizzleW)
+{
+   struct tgsi_full_src_register swizzled = *reg;
+   /* Note: we swizzle the current swizzle */
+   swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
+   swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
+   swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
+   swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
+   return swizzled;
+}
+
+
+/**
+ * Create swizzled tgsi_full_src_register where all the swizzle
+ * terms are the same.
+ */
+static struct tgsi_full_src_register
+scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
+{
+   struct tgsi_full_src_register swizzled = *reg;
+   /* Note: we swizzle the current swizzle */
+   swizzled.Register.SwizzleX =
+   swizzled.Register.SwizzleY =
+   swizzled.Register.SwizzleZ =
+   swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
+   return swizzled;
+}
+
+
+/**
+ * Create new tgsi_full_dst_register with writemask.
+ * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
+ */
+static struct tgsi_full_dst_register
+writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
+{
+   struct tgsi_full_dst_register masked = *reg;
+   masked.Register.WriteMask = mask;
+   return masked;
+}
+
+
+/**
+ * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
+ */
+static boolean
+same_swizzle_terms(const struct tgsi_full_src_register *reg)
+{
+   return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
+           reg->Register.SwizzleY == reg->Register.SwizzleZ &&
+           reg->Register.SwizzleZ == reg->Register.SwizzleW);
+}
+
+
+/**
+ * Search the vector for the value 'x' and return its position.
+ */
+static int
+find_imm_in_vec4(const union tgsi_immediate_data vec[4],
+                 union tgsi_immediate_data x)
+{
+   unsigned i;
+   for (i = 0; i < 4; i++) {
+      if (vec[i].Int == x.Int)
+         return i;
+   }
+   return -1;
+}
+
+
+/**
+ * Helper used by make_immediate_reg(), make_immediate_reg_4().
+ */
+static int
+find_immediate(struct svga_shader_emitter_v10 *emit,
+               union tgsi_immediate_data x, unsigned startIndex)
+{
+   const unsigned endIndex = emit->num_immediates;
+   unsigned i;
+
+   assert(emit->immediates_emitted);
+
+   /* Search immediates for x, y, z, w */
+   for (i = startIndex; i < endIndex; i++) {
+      if (x.Int == emit->immediates[i][0].Int ||
+          x.Int == emit->immediates[i][1].Int ||
+          x.Int == emit->immediates[i][2].Int ||
+          x.Int == emit->immediates[i][3].Int) {
+         return i;
+      }
+   }
+   /* Should never try to use an immediate value that wasn't pre-declared */
+   assert(!"find_immediate() failed!");
+   return -1;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal
+ * union tgsi_immediate_data[4] value.
+ * Note: the values must have been previously declared/allocated in
+ * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
+ * vec4 immediate.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
+                     const union tgsi_immediate_data imm[4])
+{
+   struct tgsi_full_src_register reg;
+   unsigned i;
+
+   for (i = 0; i < emit->num_common_immediates; i++) {
+      /* search for first component value */
+      int immpos = find_immediate(emit, imm[0], i);
+      int x, y, z, w;
+
+      assert(immpos >= 0);
+
+      /* find remaining components within the immediate vector */
+      x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
+      y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
+      z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
+      w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
+
+      if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
+         /* found them all */
+         memset(&reg, 0, sizeof(reg));
+         reg.Register.File = TGSI_FILE_IMMEDIATE;
+         reg.Register.Index = immpos;
+         reg.Register.SwizzleX = x;
+         reg.Register.SwizzleY = y;
+         reg.Register.SwizzleZ = z;
+         reg.Register.SwizzleW = w;
+         return reg;
+      }
+      /* else, keep searching */
+   }
+
+   assert(!"Failed to find immediate register!");
+
+   /* Just return IMM[0].xxxx */
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = TGSI_FILE_IMMEDIATE;
+   return reg;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal
+ * union tgsi_immediate_data value of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg(struct svga_shader_emitter_v10 *emit,
+                   union tgsi_immediate_data value)
+{
+   struct tgsi_full_src_register reg;
+   int immpos = find_immediate(emit, value, 0);
+
+   assert(immpos >= 0);
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = TGSI_FILE_IMMEDIATE;
+   reg.Register.Index = immpos;
+   reg.Register.SwizzleX =
+   reg.Register.SwizzleY =
+   reg.Register.SwizzleZ =
+   reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
+
+   return reg;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal float[4] value.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
+                          float x, float y, float z, float w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Float = x;
+   imm[1].Float = y;
+   imm[2].Float = z;
+   imm[3].Float = w;
+   return make_immediate_reg_4(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal float value
+ * of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
+{
+   union tgsi_immediate_data imm;
+   imm.Float = value;
+   return make_immediate_reg(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
+                        int x, int y, int z, int w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Int = x;
+   imm[1].Int = y;
+   imm[2].Int = z;
+   imm[3].Int = w;
+   return make_immediate_reg_4(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal int value
+ * of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
+{
+   union tgsi_immediate_data imm;
+   imm.Int = value;
+   return make_immediate_reg(emit, imm);
+}
+
+
+/**
+ * Allocate space for a union tgsi_immediate_data[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
+                  const union tgsi_immediate_data imm[4])
+{
+   unsigned n = emit->num_immediates++;
+   assert(!emit->immediates_emitted);
+   assert(n < Elements(emit->immediates));
+   emit->immediates[n][0] = imm[0];
+   emit->immediates[n][1] = imm[1];
+   emit->immediates[n][2] = imm[2];
+   emit->immediates[n][3] = imm[3];
+   return n;
+}
+
+
+/**
+ * Allocate space for a float[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
+                       float x, float y, float z, float w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Float = x;
+   imm[1].Float = y;
+   imm[2].Float = z;
+   imm[3].Float = w;
+   return alloc_immediate_4(emit, imm);
+}
+
+
+/**
+ * Allocate space for a int[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
+                       int x, int y, int z, int w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Int = x;
+   imm[1].Int = y;
+   imm[2].Int = z;
+   imm[3].Int = w;
+   return alloc_immediate_4(emit, imm);
+}
+
+
+/**
+ * Allocate a shader input to store a system value.
+ */
+static unsigned
+alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
+{
+   const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index;
+   assert(index < Elements(emit->system_value_indexes));
+   emit->system_value_indexes[index] = n;
+   return n;
+}
+
+
+/**
+ * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
+ */
+static boolean
+emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_immediate *imm)
+{
+   /* We don't actually emit any code here.  We just save the
+    * immediate values and emit them later.
+    */
+   alloc_immediate_4(emit, imm->u);
+   return TRUE;
+}
+
+
+/**
+ * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
+ * containing all the immediate values previously allocated
+ * with alloc_immediate_4().
+ */
+static boolean
+emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 token;
+
+   assert(!emit->immediates_emitted);
+
+   token.value = 0;
+   token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
+   token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
+
+   /* Note: no begin/end_emit_instruction() calls */
+   emit_dword(emit, token.value);
+   emit_dword(emit, 2 + 4 * emit->num_immediates);
+   emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
+
+   emit->immediates_emitted = TRUE;
+
+   return TRUE;
+}
+
+
+/**
+ * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
+ * interpolation mode.
+ * \return a VGPU10_INTERPOLATION_x value
+ */
+static unsigned
+translate_interpolation(const struct svga_shader_emitter_v10 *emit,
+                        unsigned interp, unsigned interpolate_loc)
+{
+   if (interp == TGSI_INTERPOLATE_COLOR) {
+      interp = emit->key.fs.flatshade ?
+         TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
+   }
+
+   switch (interp) {
+   case TGSI_INTERPOLATE_CONSTANT:
+      return VGPU10_INTERPOLATION_CONSTANT;
+   case TGSI_INTERPOLATE_LINEAR:
+      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
+             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
+             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
+   case TGSI_INTERPOLATE_PERSPECTIVE:
+      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
+             VGPU10_INTERPOLATION_LINEAR_CENTROID :
+             VGPU10_INTERPOLATION_LINEAR;
+   default:
+      assert(!"Unexpected interpolation mode");
+      return VGPU10_INTERPOLATION_CONSTANT;
+   }
+}
+
+
+/**
+ * Translate a TGSI property to VGPU10.
+ * Don't emit any instructions yet, only need to gather the primitive property information.
+ * The output primitive topology might be changed later. The final property instructions
+ * will be emitted as part of the pre-helper code.
+ */
+static boolean
+emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
+                     const struct tgsi_full_property *prop)
+{
+   static const VGPU10_PRIMITIVE primType[] = {
+      VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
+      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
+      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
+      VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
+      VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
+   };
+
+   static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
+      VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
+   };
+
+   static const unsigned inputArraySize[] = {
+      0,       /* VGPU10_PRIMITIVE_UNDEFINED */
+      1,       /* VGPU10_PRIMITIVE_POINT */
+      2,       /* VGPU10_PRIMITIVE_LINE */
+      3,       /* VGPU10_PRIMITIVE_TRIANGLE */
+      0,
+      0,
+      4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
+      6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
+   };
+
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_GS_INPUT_PRIM:
+      assert(prop->u[0].Data < Elements(primType));
+      emit->gs.prim_type = primType[prop->u[0].Data];
+      assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
+      emit->gs.input_size = inputArraySize[emit->gs.prim_type];
+      break;
+
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+      assert(prop->u[0].Data < Elements(primTopology));
+      emit->gs.prim_topology = primTopology[prop->u[0].Data];
+      assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
+      break;
+
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+      emit->gs.max_out_vertices = prop->u[0].Data;
+      break;
+
+   default:
+      break;
+   }
+
+   return TRUE;
+}
+
+
+static void
+emit_property_instruction(struct svga_shader_emitter_v10 *emit,
+                          VGPU10OpcodeToken0 opcode0, unsigned nData,
+                          unsigned data)
+{
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   if (nData)
+      emit_dword(emit, data);
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit property instructions
+ */
+static void
+emit_property_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   /* emit input primitive type declaration */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
+   opcode0.primitive = emit->gs.prim_type;
+   emit_property_instruction(emit, opcode0, 0, 0);
+
+   /* emit output primitive topology declaration */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
+   opcode0.primitiveTopology = emit->gs.prim_topology;
+   emit_property_instruction(emit, opcode0, 0, 0);
+
+   /* emit max output vertices */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
+   emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
+}
+
+
+/**
+ * Emit a vgpu10 declaration "instruction".
+ * \param index  the register index
+ * \param size   array size of the operand. In most cases, it is 1,
+ *               but for inputs to geometry shader, the array size varies
+ *               depending on the primitive type.
+ */
+static void
+emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
+                      VGPU10OpcodeToken0 opcode0,
+                      VGPU10OperandToken0 operand0,
+                      VGPU10NameToken name_token,
+                      unsigned index, unsigned size)
+{
+   assert(opcode0.opcodeType);
+   assert(operand0.mask);
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+
+   emit_dword(emit, operand0.value);
+
+   if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
+      /* Next token is the index of the register to declare */
+      emit_dword(emit, index);
+   }
+   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
+      /* Next token is the size of the register */
+      emit_dword(emit, size);
+
+      /* Followed by the index of the register */
+      emit_dword(emit, index);
+   }
+
+   if (name_token.value) {
+      emit_dword(emit, name_token.value);
+   }
+
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit the declaration for a shader input.
+ * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
+ * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
+ * \param dim         index dimension
+ * \param index       the input register index
+ * \param size        array size of the operand. In most cases, it is 1,
+ *                    but for inputs to geometry shader, the array size varies
+ *                    depending on the primitive type.
+ * \param name        one of VGPU10_NAME_x
+ * \parma numComp     number of components
+ * \param selMode     component selection mode
+ * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
+ * \param interpMode  interpolation mode
+ */
+static void
+emit_input_declaration(struct svga_shader_emitter_v10 *emit,
+                       unsigned opcodeType, unsigned operandType,
+                       unsigned dim, unsigned index, unsigned size,
+                       unsigned name, unsigned numComp,
+                       unsigned selMode, unsigned usageMask,
+                       unsigned interpMode)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
+   assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
+          operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
+   assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
+   assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
+   assert(dim <= VGPU10_OPERAND_INDEX_3D);
+   assert(name == VGPU10_NAME_UNDEFINED ||
+          name == VGPU10_NAME_POSITION ||
+          name == VGPU10_NAME_INSTANCE_ID ||
+          name == VGPU10_NAME_VERTEX_ID ||
+          name == VGPU10_NAME_PRIMITIVE_ID ||
+          name == VGPU10_NAME_IS_FRONT_FACE);
+   assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
+          interpMode == VGPU10_INTERPOLATION_CONSTANT ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
+
+   check_register_index(emit, opcodeType, index);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = opcodeType;
+   opcode0.interpolationMode = interpMode;
+
+   operand0.operandType = operandType;
+   operand0.numComponents = numComp;
+   operand0.selectionMode = selMode;
+   operand0.mask = usageMask;
+   operand0.indexDimension = dim;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   if (dim == VGPU10_OPERAND_INDEX_2D)
+      operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+   name_token.name = name;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
+}
+
+
+/**
+ * Emit the declaration for a shader output.
+ * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
+ * \param index  the output register index
+ * \param name  one of VGPU10_NAME_x
+ * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
+ */
+static void
+emit_output_declaration(struct svga_shader_emitter_v10 *emit,
+                        unsigned type, unsigned index,
+                        unsigned name, unsigned usageMask)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
+          type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
+          type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
+   assert(name == VGPU10_NAME_UNDEFINED ||
+          name == VGPU10_NAME_POSITION ||
+          name == VGPU10_NAME_PRIMITIVE_ID ||
+          name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
+          name == VGPU10_NAME_CLIP_DISTANCE);
+
+   check_register_index(emit, type, index);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = type;
+   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+   operand0.mask = usageMask;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+   name_token.name = name;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
+}
+
+
+/**
+ * Emit the declaration for the fragment depth output.
+ */
+static void
+emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
+   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
+   operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
+}
+
+
+/**
+ * Emit the declaration for a system value input/output.
+ */
+static void
+emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
+                              unsigned semantic_name, unsigned index)
+{
+   switch (semantic_name) {
+   case TGSI_SEMANTIC_INSTANCEID:
+      index = alloc_system_value_index(emit, index);
+      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
+                             VGPU10_OPERAND_TYPE_INPUT,
+                             VGPU10_OPERAND_INDEX_1D,
+                             index, 1,
+                             VGPU10_NAME_INSTANCE_ID,
+                             VGPU10_OPERAND_4_COMPONENT,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
+                             VGPU10_INTERPOLATION_UNDEFINED);
+      break;
+   case TGSI_SEMANTIC_VERTEXID:
+      index = alloc_system_value_index(emit, index);
+      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
+                             VGPU10_OPERAND_TYPE_INPUT,
+                             VGPU10_OPERAND_INDEX_1D,
+                             index, 1,
+                             VGPU10_NAME_VERTEX_ID,
+                             VGPU10_OPERAND_4_COMPONENT,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
+                             VGPU10_INTERPOLATION_UNDEFINED);
+      break;
+   default:
+      ; /* XXX */
+   }
+}
+
+/**
+ * Translate a TGSI declaration to VGPU10.
+ */
+static boolean
+emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
+                        const struct tgsi_full_declaration *decl)
+{
+   switch (decl->Declaration.File) {
+   case TGSI_FILE_INPUT:
+      /* do nothing - see emit_input_declarations() */
+      return TRUE;
+
+   case TGSI_FILE_OUTPUT:
+      assert(decl->Range.First == decl->Range.Last);
+      emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
+      return TRUE;
+
+   case TGSI_FILE_TEMPORARY:
+      /* Don't declare the temps here.  Just keep track of how many
+       * and emit the declaration later.
+       */
+      if (decl->Declaration.Array) {
+         /* Indexed temporary array.  Save the start index of the array
+          * and the size of the array.
+          */
+         const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
+         unsigned i;
+
+         assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
+
+         /* Save this array so we can emit the declaration for it later */
+         emit->temp_arrays[arrayID].start = decl->Range.First;
+         emit->temp_arrays[arrayID].size =
+            decl->Range.Last - decl->Range.First + 1;
+
+         emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
+         assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
+         emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
+
+         /* Fill in the temp_map entries for this array */
+         for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+            emit->temp_map[i].arrayId = arrayID;
+            emit->temp_map[i].index = i - decl->Range.First;
+         }
+      }
+
+      /* for all temps, indexed or not, keep track of highest index */
+      emit->num_shader_temps = MAX2(emit->num_shader_temps,
+                                    decl->Range.Last + 1);
+      return TRUE;
+
+   case TGSI_FILE_CONSTANT:
+      /* Don't declare constants here.  Just keep track and emit later. */
+      {
+         unsigned constbuf = 0, num_consts;
+         if (decl->Declaration.Dimension) {
+            constbuf = decl->Dim.Index2D;
+         }
+         /* We throw an assertion here when, in fact, the shader should never
+          * have linked due to constbuf index out of bounds, so we shouldn't
+          * have reached here.
+          */
+         assert(constbuf < Elements(emit->num_shader_consts));
+
+         num_consts = MAX2(emit->num_shader_consts[constbuf],
+                           decl->Range.Last + 1);
+
+         if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+            debug_printf("Warning: constant buffer is declared to size [%u]"
+                         " but [%u] is the limit.\n",
+                         num_consts,
+                         VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+         }
+         /* The linker doesn't enforce the max UBO size so we clamp here */
+         emit->num_shader_consts[constbuf] =
+            MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+      }
+      return TRUE;
+
+   case TGSI_FILE_IMMEDIATE:
+      assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
+      return FALSE;
+
+   case TGSI_FILE_SYSTEM_VALUE:
+      emit_system_value_declaration(emit, decl->Semantic.Name,
+                                    decl->Range.First);
+      return TRUE;
+
+   case TGSI_FILE_SAMPLER:
+      /* Don't declare samplers here.  Just keep track and emit later. */
+      emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
+      return TRUE;
+
+#if 0
+   case TGSI_FILE_RESOURCE:
+      /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
+      /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
+      assert(!"TGSI_FILE_RESOURCE not handled yet");
+      return FALSE;
+#endif
+
+   case TGSI_FILE_ADDRESS:
+      emit->num_address_regs = MAX2(emit->num_address_regs,
+                                    decl->Range.Last + 1);
+      return TRUE;
+
+   case TGSI_FILE_SAMPLER_VIEW:
+      /* Not used at this time, but maybe in the future.
+       * See emit_resource_declarations().
+       */
+      return TRUE;
+
+   default:
+      assert(!"Unexpected type of declaration");
+      return FALSE;
+   }
+}
+
+
+
+/**
+ * Emit all input declarations.
+ */
+static boolean
+emit_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+
+      for (i = 0; i < emit->linkage.num_inputs; i++) {
+         unsigned semantic_name = emit->info.input_semantic_name[i];
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = emit->linkage.input_map[i];
+         unsigned type, interpolationMode, name;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* fragment position input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_LINEAR;
+            name = VGPU10_NAME_POSITION;
+            if (usage_mask & TGSI_WRITEMASK_W) {
+               /* we need to replace use of 'w' with '1/w' */
+               emit->fs.fragcoord_input_index = i;
+            }
+         }
+         else if (semantic_name == TGSI_SEMANTIC_FACE) {
+            /* fragment front-facing input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+            name = VGPU10_NAME_IS_FRONT_FACE;
+            emit->fs.face_input_index = i;
+         }
+         else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+            /* primitive ID */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+            name = VGPU10_NAME_PRIMITIVE_ID;
+         }
+         else {
+            /* general fragment input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS;
+            interpolationMode =
+               translate_interpolation(emit,
+                                       emit->info.input_interpolate[i],
+                                       emit->info.input_interpolate_loc[i]);
+
+            /* keeps track if flat interpolation mode is being used */
+            emit->uses_flat_interp = emit->uses_flat_interp ||
+               (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
+
+            name = VGPU10_NAME_UNDEFINED;
+         }
+
+         emit_input_declaration(emit, type,
+                                VGPU10_OPERAND_TYPE_INPUT,
+                                VGPU10_OPERAND_INDEX_1D, index, 1,
+                                name,
+                                VGPU10_OPERAND_4_COMPONENT,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                interpolationMode);
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_GEOMETRY) {
+
+      for (i = 0; i < emit->info.num_inputs; i++) {
+         unsigned semantic_name = emit->info.input_semantic_name[i];
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = emit->linkage.input_map[i];
+         unsigned opcodeType, operandType;
+         unsigned numComp, selMode;
+         unsigned name;
+         unsigned dim;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         opcodeType = VGPU10_OPCODE_DCL_INPUT;
+         operandType = VGPU10_OPERAND_TYPE_INPUT;
+         numComp = VGPU10_OPERAND_4_COMPONENT;
+         selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+         name = VGPU10_NAME_UNDEFINED;
+
+         /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
+         dim = VGPU10_OPERAND_INDEX_2D;
+
+         if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+            /* Primitive ID */
+            operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+            dim = VGPU10_OPERAND_INDEX_0D;
+            numComp = VGPU10_OPERAND_0_COMPONENT;
+            selMode = 0;
+
+            /* also save the register index so we can check for
+             * primitive id when emit src register. We need to modify the
+             * operand type, index dimension when emit primitive id src reg.
+             */
+            emit->gs.prim_id_index = i;
+         }
+         else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* vertex position input */
+            opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
+            name = VGPU10_NAME_POSITION;
+         }
+
+         emit_input_declaration(emit, opcodeType, operandType,
+                                dim, index,
+                                emit->gs.input_size,
+                                name,
+                                numComp, selMode,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                VGPU10_INTERPOLATION_UNDEFINED);
+      }
+   }
+   else {
+      assert(emit->unit == PIPE_SHADER_VERTEX);
+
+      for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = i;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+                                VGPU10_OPERAND_TYPE_INPUT,
+                                VGPU10_OPERAND_INDEX_1D, index, 1,
+                                VGPU10_NAME_UNDEFINED,
+                                VGPU10_OPERAND_4_COMPONENT,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                VGPU10_INTERPOLATION_UNDEFINED);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit all output declarations.
+ */
+static boolean
+emit_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   for (i = 0; i < emit->info.num_outputs; i++) {
+      /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
+      const unsigned semantic_name = emit->info.output_semantic_name[i];
+      const unsigned semantic_index = emit->info.output_semantic_index[i];
+      unsigned index = i;
+
+      if (emit->unit == PIPE_SHADER_FRAGMENT) {
+         if (semantic_name == TGSI_SEMANTIC_COLOR) {
+            assert(semantic_index < Elements(emit->fs.color_out_index));
+
+            emit->fs.color_out_index[semantic_index] = index;
+
+            /* The semantic index is the shader's color output/buffer index */
+            emit_output_declaration(emit,
+                                    VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
+                                    VGPU10_NAME_UNDEFINED,
+                                    VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+
+            if (semantic_index == 0) {
+               if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+                  /* Emit declarations for the additional color outputs
+                   * for broadcasting.
+                   */
+                  unsigned j;
+                  for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
+                     /* Allocate a new output index */
+                     unsigned idx = emit->info.num_outputs + j - 1;
+                     emit->fs.color_out_index[j] = idx;
+                     emit_output_declaration(emit,
+                                        VGPU10_OPCODE_DCL_OUTPUT, idx,
+                                        VGPU10_NAME_UNDEFINED,
+                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+                     emit->info.output_semantic_index[idx] = j;
+                  }
+               }
+            }
+            else {
+               assert(!emit->key.fs.write_color0_to_n_cbufs);
+            }
+         }
+         else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* Fragment depth output */
+            emit_fragdepth_output_declaration(emit);
+         }
+         else {
+            assert(!"Bad output semantic name");
+         }
+      }
+      else {
+         /* VS or GS */
+         unsigned name, type;
+         unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+         switch (semantic_name) {
+         case TGSI_SEMANTIC_POSITION:
+            assert(emit->unit != PIPE_SHADER_FRAGMENT);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+            name = VGPU10_NAME_POSITION;
+            /* Save the index of the vertex position output register */
+            emit->vposition.out_index = index;
+            break;
+         case TGSI_SEMANTIC_CLIPDIST:
+            type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+            name = VGPU10_NAME_CLIP_DISTANCE;
+            /* save the starting index of the clip distance output register */
+            if (semantic_index == 0)
+               emit->clip_dist_out_index = index;
+            writemask = emit->output_usage_mask[index];
+            writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
+            if (writemask == 0x0) {
+               continue; /* discard this do-nothing declaration */
+            }
+            break;
+         case TGSI_SEMANTIC_PRIMID:
+            assert(emit->unit == PIPE_SHADER_GEOMETRY);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
+            name = VGPU10_NAME_PRIMITIVE_ID;
+            break;
+         case TGSI_SEMANTIC_LAYER:
+            assert(emit->unit == PIPE_SHADER_GEOMETRY);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
+            name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
+            break;
+         case TGSI_SEMANTIC_CLIPVERTEX:
+            type = VGPU10_OPCODE_DCL_OUTPUT;
+            name = VGPU10_NAME_UNDEFINED;
+            emit->clip_vertex_out_index = index;
+            break;
+         default:
+            /* generic output */
+            type = VGPU10_OPCODE_DCL_OUTPUT;
+            name = VGPU10_NAME_UNDEFINED;
+         }
+
+         emit_output_declaration(emit, type, index, name, writemask);
+      }
+   }
+
+   if (emit->vposition.so_index != INVALID_INDEX &&
+       emit->vposition.out_index != INVALID_INDEX) {
+
+      assert(emit->unit != PIPE_SHADER_FRAGMENT);
+
+      /* Emit the declaration for the non-adjusted vertex position
+       * for stream output purpose
+       */
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                              emit->vposition.so_index,
+                              VGPU10_NAME_UNDEFINED,
+                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   }
+
+   if (emit->clip_dist_so_index != INVALID_INDEX &&
+       emit->clip_dist_out_index != INVALID_INDEX) {
+
+      assert(emit->unit != PIPE_SHADER_FRAGMENT);
+
+      /* Emit the declaration for the clip distance shadow copy which
+       * will be used for stream output purpose and for clip distance
+       * varying variable
+       */
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                              emit->clip_dist_so_index,
+                              VGPU10_NAME_UNDEFINED,
+                              emit->output_usage_mask[emit->clip_dist_out_index]);
+
+      if (emit->info.num_written_clipdistance > 4) {
+         /* for the second clip distance register, each handles 4 planes */
+         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                                 emit->clip_dist_so_index + 1,
+                                 VGPU10_NAME_UNDEFINED,
+                                 emit->output_usage_mask[emit->clip_dist_out_index+1]);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit the declaration for the temporary registers.
+ */
+static boolean
+emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned total_temps, reg, i;
+
+   total_temps = emit->num_shader_temps;
+
+   /* Allocate extra temps for specially-implemented instructions,
+    * such as LIT.
+    */
+   total_temps += MAX_INTERNAL_TEMPS;
+
+   if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
+      if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
+          emit->key.clip_plane_enable ||
+          emit->vposition.so_index != INVALID_INDEX) {
+         emit->vposition.tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->unit == PIPE_SHADER_VERTEX) {
+         unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
+                                 emit->key.vs.adjust_attrib_itof |
+                                 emit->key.vs.adjust_attrib_utof |
+                                 emit->key.vs.attrib_is_bgra |
+                                 emit->key.vs.attrib_puint_to_snorm |
+                                 emit->key.vs.attrib_puint_to_uscaled |
+                                 emit->key.vs.attrib_puint_to_sscaled);
+         while (attrib_mask) {
+            unsigned index = u_bit_scan(&attrib_mask);
+            emit->vs.adjusted_input[index] = total_temps++;
+         }
+      }
+
+      if (emit->clip_mode == CLIP_DISTANCE) {
+         /* We need to write the clip distance to a temporary register
+          * first. Then it will be copied to the shadow copy for
+          * the clip distance varying variable and stream output purpose.
+          * It will also be copied to the actual CLIPDIST register
+          * according to the enabled clip planes
+          */
+         emit->clip_dist_tmp_index = total_temps++;
+         if (emit->info.num_written_clipdistance > 4)
+            total_temps++; /* second clip register */
+      }
+      else if (emit->clip_mode == CLIP_VERTEX) {
+         /* We need to convert the TGSI CLIPVERTEX output to one or more
+          * clip distances.  Allocate a temp reg for the clipvertex here.
+          */
+         assert(emit->info.writes_clipvertex > 0);
+         emit->clip_vertex_tmp_index = total_temps;
+         total_temps++;
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
+          emit->key.fs.white_fragments ||
+          emit->key.fs.write_color0_to_n_cbufs > 1) {
+         /* Allocate a temp to hold the output color */
+         emit->fs.color_tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->fs.face_input_index != INVALID_INDEX) {
+         /* Allocate a temp for the +/-1 face register */
+         emit->fs.face_tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
+         /* Allocate a temp for modified fragment position register */
+         emit->fs.fragcoord_tmp_index = total_temps;
+         total_temps += 1;
+      }
+   }
+
+   for (i = 0; i < emit->num_address_regs; i++) {
+      emit->address_reg_index[i] = total_temps++;
+   }
+
+   /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
+    * temp indexes.  Basically, we compact all the non-array temp register
+    * indexes into a consecutive series.
+    *
+    * Before, we may have some TGSI declarations like:
+    *   DCL TEMP[0..1], LOCAL
+    *   DCL TEMP[2..4], ARRAY(1), LOCAL
+    *   DCL TEMP[5..7], ARRAY(2), LOCAL
+    *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
+    *
+    * After, we'll have a map like this:
+    *   temp_map[0] = { array 0, index 0 }
+    *   temp_map[1] = { array 0, index 1 }
+    *   temp_map[2] = { array 1, index 0 }
+    *   temp_map[3] = { array 1, index 1 }
+    *   temp_map[4] = { array 1, index 2 }
+    *   temp_map[5] = { array 2, index 0 }
+    *   temp_map[6] = { array 2, index 1 }
+    *   temp_map[7] = { array 2, index 2 }
+    *   temp_map[8] = { array 0, index 2 }
+    *   temp_map[9] = { array 0, index 3 }
+    *
+    * We'll declare two arrays of 3 elements, plus a set of four non-indexed
+    * temps numbered 0..3
+    *
+    * Any time we emit a temporary register index, we'll have to use the
+    * temp_map[] table to convert the TGSI index to the VGPU10 index.
+    *
+    * Finally, we recompute the total_temps value here.
+    */
+   reg = 0;
+   for (i = 0; i < total_temps; i++) {
+      if (emit->temp_map[i].arrayId == 0) {
+         emit->temp_map[i].index = reg++;
+      }
+   }
+   total_temps = reg;
+
+   if (0) {
+      debug_printf("total_temps %u\n", total_temps);
+      for (i = 0; i < 30; i++) {
+         debug_printf("temp %u ->  array %u  index %u\n",
+                      i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
+      }
+   }
+
+   /* Emit declaration of ordinary temp registers */
+   if (total_temps > 0) {
+      VGPU10OpcodeToken0 opcode0;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, total_temps);
+      end_emit_instruction(emit);
+   }
+
+   /* Emit declarations for indexable temp arrays.  Skip 0th entry since
+    * it's unused.
+    */
+   for (i = 1; i < emit->num_temp_arrays; i++) {
+      unsigned num_temps = emit->temp_arrays[i].size;
+
+      if (num_temps > 0) {
+         VGPU10OpcodeToken0 opcode0;
+
+         opcode0.value = 0;
+         opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
+
+         begin_emit_instruction(emit);
+         emit_dword(emit, opcode0.value);
+         emit_dword(emit, i); /* which array */
+         emit_dword(emit, num_temps);
+         emit_dword(emit, 4); /* num components */
+         end_emit_instruction(emit);
+
+         total_temps += num_temps;
+      }
+   }
+
+   /* Check that the grand total of all regular and indexed temps is
+    * under the limit.
+    */
+   check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
+
+   return TRUE;
+}
+
+
+static boolean
+emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   unsigned total_consts, i;
+
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
+   opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
+   /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
+
+   operand0.value = 0;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+   operand0.swizzleX = 0;
+   operand0.swizzleY = 1;
+   operand0.swizzleZ = 2;
+   operand0.swizzleW = 3;
+
+   /**
+    * Emit declaration for constant buffer [0].  We also allocate
+    * room for the extra constants here.
+    */
+   total_consts = emit->num_shader_consts[0];
+
+   /* Now, allocate constant slots for the "extra" constants */
+
+   /* Vertex position scale/translation */
+   if (emit->vposition.need_prescale) {
+      emit->vposition.prescale_scale_index = total_consts++;
+      emit->vposition.prescale_trans_index = total_consts++;
+   }
+
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      if (emit->key.vs.undo_viewport) {
+         emit->vs.viewport_index = total_consts++;
+      }
+   }
+
+   /* user-defined clip planes */
+   if (emit->key.clip_plane_enable) {
+      unsigned n = util_bitcount(emit->key.clip_plane_enable);
+      assert(emit->unit == PIPE_SHADER_VERTEX ||
+             emit->unit == PIPE_SHADER_GEOMETRY);
+      for (i = 0; i < n; i++) {
+         emit->clip_plane_const[i] = total_consts++;
+      }
+   }
+
+   /* Texcoord scale factors for RECT textures */
+   {
+      for (i = 0; i < emit->num_samplers; i++) {
+         if (emit->key.tex[i].unnormalized) {
+            emit->texcoord_scale_index[i] = total_consts++;
+         }
+      }
+   }
+
+   /* Texture buffer sizes */
+   for (i = 0; i < emit->num_samplers; i++) {
+      if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
+         emit->texture_buffer_size_index[i] = total_consts++;
+      }
+   }
+
+   if (total_consts > 0) {
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, 0);  /* which const buffer slot */
+      emit_dword(emit, total_consts);
+      end_emit_instruction(emit);
+   }
+
+   /* Declare remaining constant buffers (UBOs) */
+   for (i = 1; i < Elements(emit->num_shader_consts); i++) {
+      if (emit->num_shader_consts[i] > 0) {
+         begin_emit_instruction(emit);
+         emit_dword(emit, opcode0.value);
+         emit_dword(emit, operand0.value);
+         emit_dword(emit, i);  /* which const buffer slot */
+         emit_dword(emit, emit->num_shader_consts[i]);
+         end_emit_instruction(emit);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit declarations for samplers.
+ */
+static boolean
+emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   for (i = 0; i < emit->num_samplers; i++) {
+      VGPU10OpcodeToken0 opcode0;
+      VGPU10OperandToken0 operand0;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
+      opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
+
+      operand0.value = 0;
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
+      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, i);
+      end_emit_instruction(emit);
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
+ */
+static unsigned
+pipe_texture_to_resource_dimension(unsigned target, bool msaa)
+{
+   switch (target) {
+   case PIPE_BUFFER:
+      return VGPU10_RESOURCE_DIMENSION_BUFFER;
+   case PIPE_TEXTURE_1D:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+      return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   case PIPE_TEXTURE_3D:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
+   case PIPE_TEXTURE_CUBE:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
+   case PIPE_TEXTURE_1D_ARRAY:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
+   case PIPE_TEXTURE_2D_ARRAY:
+      return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
+   default:
+      assert(!"Unexpected resource type");
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   }
+}
+
+
+/**
+ * Given a tgsi_return_type, return true iff it is an integer type.
+ */
+static boolean
+is_integer_type(enum tgsi_return_type type)
+{
+   switch (type) {
+      case TGSI_RETURN_TYPE_SINT:
+      case TGSI_RETURN_TYPE_UINT:
+         return TRUE;
+      case TGSI_RETURN_TYPE_FLOAT:
+      case TGSI_RETURN_TYPE_UNORM:
+      case TGSI_RETURN_TYPE_SNORM:
+         return FALSE;
+      case TGSI_RETURN_TYPE_COUNT:
+      default:
+         assert(!"is_integer_type: Unknown tgsi_return_type");
+         return FALSE;
+   }
+}
+
+
+/**
+ * Emit declarations for resources.
+ * XXX When we're sure that all TGSI shaders will be generated with
+ * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
+ * rework this code.
+ */
+static boolean
+emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   /* Emit resource decl for each sampler */
+   for (i = 0; i < emit->num_samplers; i++) {
+      VGPU10OpcodeToken0 opcode0;
+      VGPU10OperandToken0 operand0;
+      VGPU10ResourceReturnTypeToken return_type;
+      VGPU10_RESOURCE_RETURN_TYPE rt;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
+      opcode0.resourceDimension =
+         pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
+                                            emit->key.tex[i].texture_msaa);
+      operand0.value = 0;
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+#if 1
+      /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
+      assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
+      rt = emit->key.tex[i].return_type + 1;
+#else
+      switch (emit->key.tex[i].return_type) {
+         case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
+         case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
+         case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
+         case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
+         case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
+         case TGSI_RETURN_TYPE_COUNT:
+         default:
+            rt = VGPU10_RETURN_TYPE_FLOAT;
+            assert(!"emit_resource_declarations: Unknown tgsi_return_type");
+      }
+#endif
+
+      return_type.value = 0;
+      return_type.component0 = rt;
+      return_type.component1 = rt;
+      return_type.component2 = rt;
+      return_type.component3 = rt;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, i);
+      emit_dword(emit, return_type.value);
+      end_emit_instruction(emit);
+   }
+
+   return TRUE;
+}
+
+static void
+emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src);
+   end_emit_instruction(emit);
+}
+
+static void
+emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src1,
+                     const struct tgsi_full_src_register *src2,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src1);
+   emit_src_register(emit, src2);
+   end_emit_instruction(emit);
+}
+
+static void
+emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src1,
+                     const struct tgsi_full_src_register *src2,
+                     const struct tgsi_full_src_register *src3,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src1);
+   emit_src_register(emit, src2);
+   emit_src_register(emit, src3);
+   end_emit_instruction(emit);
+}
+
+/**
+ * Emit the actual clip distance instructions to be used for clipping
+ * by copying the clip distance from the temporary registers to the
+ * CLIPDIST registers written with the enabled planes mask.
+ * Also copy the clip distance from the temporary to the clip distance
+ * shadow copy register which will be referenced by the input shader
+ */
+static void
+emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   struct tgsi_full_src_register tmp_clip_dist_src;
+   struct tgsi_full_dst_register clip_dist_dst;
+
+   unsigned i;
+   unsigned clip_plane_enable = emit->key.clip_plane_enable;
+   unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
+   int num_written_clipdist = emit->info.num_written_clipdistance;
+
+   assert(emit->clip_dist_out_index != INVALID_INDEX);
+   assert(emit->clip_dist_tmp_index != INVALID_INDEX);
+
+   /**
+    * Temporary reset the temporary clip dist register index so
+    * that the copy to the real clip dist register will not
+    * attempt to copy to the temporary register again
+    */
+   emit->clip_dist_tmp_index = INVALID_INDEX;
+
+   for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
+
+      tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
+
+      /**
+       * copy to the shadow copy for use by varying variable and
+       * stream output. All clip distances
+       * will be written regardless of the enabled clipping planes.
+       */
+      clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
+                                   emit->clip_dist_so_index + i);
+
+      /* MOV clip_dist_so, tmp_clip_dist */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
+                           &tmp_clip_dist_src, FALSE);
+
+      /**
+       * copy those clip distances to enabled clipping planes
+       * to CLIPDIST registers for clipping
+       */
+      if (clip_plane_enable & 0xf) {
+         clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
+                                      emit->clip_dist_out_index + i);
+         clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
+
+         /* MOV CLIPDIST, tmp_clip_dist */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
+                              &tmp_clip_dist_src, FALSE);
+      }
+      /* four clip planes per clip register */
+      clip_plane_enable >>= 4;
+   }
+   /**
+    * set the temporary clip dist register index back to the
+    * temporary index for the next vertex
+    */
+   emit->clip_dist_tmp_index = clip_dist_tmp_index;
+}
+
+/* Declare clip distance output registers for user-defined clip planes
+ * or the TGSI_CLIPVERTEX output.
+ */
+static void
+emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
+   unsigned index = emit->num_outputs;
+   unsigned plane_mask;
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+   assert(num_clip_planes <= 8);
+
+   if (emit->clip_mode != CLIP_LEGACY &&
+       emit->clip_mode != CLIP_VERTEX) {
+      return;
+   }
+
+   if (num_clip_planes == 0)
+      return;
+
+   /* Declare one or two clip output registers.  The number of components
+    * in the mask reflects the number of clip planes.  For example, if 5
+    * clip planes are needed, we'll declare outputs similar to:
+    * dcl_output_siv o2.xyzw, clip_distance
+    * dcl_output_siv o3.x, clip_distance
+    */
+   emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
+
+   plane_mask = (1 << num_clip_planes) - 1;
+   if (plane_mask & 0xf) {
+      unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
+                              VGPU10_NAME_CLIP_DISTANCE, cmask);
+      emit->num_outputs++;
+   }
+   if (plane_mask & 0xf0) {
+      unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
+                              VGPU10_NAME_CLIP_DISTANCE, cmask);
+      emit->num_outputs++;
+   }
+}
+
+
+/**
+ * Emit the instructions for writing to the clip distance registers
+ * to handle legacy/automatic clip planes.
+ * For each clip plane, the distance is the dot product of the vertex
+ * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
+ * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
+ * output registers already declared.
+ */
+static void
+emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
+                             unsigned vpos_tmp_index)
+{
+   unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
+
+   assert(emit->clip_mode == CLIP_LEGACY);
+   assert(num_clip_planes <= 8);
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+
+   for (i = 0; i < num_clip_planes; i++) {
+      struct tgsi_full_dst_register dst;
+      struct tgsi_full_src_register plane_src, vpos_src;
+      unsigned reg_index = emit->clip_dist_out_index + i / 4;
+      unsigned comp = i % 4;
+      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
+
+      /* create dst, src regs */
+      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
+      dst = writemask_dst(&dst, writemask);
+
+      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
+      vpos_src = make_src_temp_reg(vpos_tmp_index);
+
+      /* DP4 clip_dist, plane, vpos */
+      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
+                           &plane_src, &vpos_src, FALSE);
+   }
+}
+
+
+/**
+ * Emit the instructions for computing the clip distance results from
+ * the clip vertex temporary.
+ * For each clip plane, the distance is the dot product of the clip vertex
+ * position (found in a temp reg) and the clip plane coefficients.
+ */
+static void
+emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
+   unsigned i;
+   struct tgsi_full_dst_register dst;
+   struct tgsi_full_src_register clipvert_src;
+   const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+
+   assert(emit->clip_mode == CLIP_VERTEX);
+
+   clipvert_src = make_src_temp_reg(clip_vertex_tmp);
+
+   for (i = 0; i < num_clip; i++) {
+      struct tgsi_full_src_register plane_src;
+      unsigned reg_index = emit->clip_dist_out_index + i / 4;
+      unsigned comp = i % 4;
+      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
+
+      /* create dst, src regs */
+      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
+      dst = writemask_dst(&dst, writemask);
+
+      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
+
+      /* DP4 clip_dist, plane, vpos */
+      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
+                           &plane_src, &clipvert_src, FALSE);
+   }
+
+   /* copy temporary clip vertex register to the clip vertex register */
+
+   assert(emit->clip_vertex_out_index != INVALID_INDEX);
+
+   /**
+    * temporary reset the temporary clip vertex register index so
+    * that copy to the clip vertex register will not attempt
+    * to copy to the temporary register again
+    */
+   emit->clip_vertex_tmp_index = INVALID_INDEX;
+
+   /* MOV clip_vertex, clip_vertex_tmp */
+   dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                        &dst, &clipvert_src, FALSE);
+
+   /**
+    * set the temporary clip vertex register index back to the
+    * temporary index for the next vertex
+    */
+   emit->clip_vertex_tmp_index = clip_vertex_tmp;
+}
+
+/**
+ * Emit code to convert RGBA to BGRA
+ */
+static void
+emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register bgra_src =
+      swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, &bgra_src);
+   end_emit_instruction(emit);
+}
+
+
+/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
+static void
+emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
+                    const struct tgsi_full_dst_register *dst,
+                    const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
+   struct tgsi_full_src_register two =
+      make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
+   struct tgsi_full_src_register neg_two =
+      make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+
+   unsigned val_tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
+   struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
+
+   unsigned bias_tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
+   struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
+
+   /* val = src * 2.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
+                        src, &two, FALSE);
+
+   /* bias = src > 0.5 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
+                        src, &half, FALSE);
+
+   /* bias = bias & -2.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
+                        &bias_src, &neg_two, FALSE);
+
+   /* dst = val + bias */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
+                        &val_src, &bias_src, FALSE);
+
+   free_temp_indexes(emit);
+}
+
+
+/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
+static void
+emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_dst_register *dst,
+                      const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register scale =
+      make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
+
+   /* dst = src * scale */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
+}
+
+
+/** Convert from R32_UINT to 10_10_10_2_sscaled */
+static void
+emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_dst_register *dst,
+                      const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register lshift =
+      make_immediate_reg_int4(emit, 22, 12, 2, 0);
+   struct tgsi_full_src_register rshift =
+      make_immediate_reg_int4(emit, 22, 22, 22, 30);
+
+   struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+
+   /*
+    * r = (pixel << 22) >> 22;   # signed int in [511, -512]
+    * g = (pixel << 12) >> 22;   # signed int in [511, -512]
+    * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
+    * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
+    * dst = i_to_f(r,g,b,a);     # convert to float
+    */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
+                        &src_xxxx, &lshift, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
+                        &tmp_src, &rshift, FALSE);
+   emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
+
+   free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ABS instruction.
+ */
+static boolean
+emit_abs(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = ABS(s0):
+    *   dst = abs(s0)
+    * Translates into:
+    *   MOV dst, abs(s0)
+    */
+   struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
+
+   /* MOV dst, abs(s0) */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &abs_src0, inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
+ */
+static boolean
+emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
+              const struct tgsi_full_instruction *inst)
+{
+   unsigned index = inst->Dst[0].Register.Index;
+   struct tgsi_full_dst_register dst;
+   unsigned opcode;
+
+   assert(index < MAX_VGPU10_ADDR_REGS);
+   dst = make_dst_temp_reg(emit->address_reg_index[index]);
+
+   /* ARL dst, s0
+    * Translates into:
+    * FTOI address_tmp, s0
+    *
+    * UARL dst, s0
+    * Translates into:
+    * MOV address_tmp, s0
+    */
+   if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
+      opcode = VGPU10_OPCODE_FTOI;
+   else
+      opcode = VGPU10_OPCODE_MOV;
+
+   emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_CAL instruction.
+ */
+static boolean
+emit_cal(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   unsigned label = inst->Label.Label;
+   VGPU10OperandToken0 operand;
+   operand.value = 0;
+   operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, operand.value);
+   emit_dword(emit, label);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_IABS instruction.
+ */
+static boolean
+emit_iabs(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src0.x < 0) ? -src0.x : src0.x
+    * dst.y = (src0.y < 0) ? -src0.y : src0.y
+    * dst.z = (src0.z < 0) ? -src0.z : src0.z
+    * dst.w = (src0.w < 0) ? -src0.w : src0.w
+    *
+    * Translates into
+    *   IMAX dst, src, neg(src)
+    */
+   struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
+   emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
+                        &inst->Src[0], &neg_src, FALSE);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_CMP instruction.
+ */
+static boolean
+emit_cmp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src0.x < 0) ? src1.x : src2.x
+    * dst.y = (src0.y < 0) ? src1.y : src2.y
+    * dst.z = (src0.z < 0) ? src1.z : src2.z
+    * dst.w = (src0.w < 0) ? src1.w : src2.w
+    *
+    * Translates into
+    *   LT tmp, src0, 0.0
+    *   MOVC dst, tmp, src1, src2
+    */
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
+                        &inst->Src[0], &zero, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
+                        &tmp_src, &inst->Src[1], &inst->Src[2],
+                        inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DP2A instruction.
+ */
+static boolean
+emit_dp2a(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
+    * Translate into
+    *   MAD tmp.x, s0.y, s1.y, s2.x
+    *   MAD tmp.x, s0.x, s1.x, tmp.x
+    *   MOV dst.xyzw, tmp.xxxx
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+
+   struct tgsi_full_src_register src0_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register src1_xxxx =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register src2_xxxx =
+      scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
+
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
+                        &src1_yyyy, &src2_xxxx, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
+                        &src1_xxxx, &tmp_src_xxxx, FALSE);
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DPH instruction.
+ */
+static boolean
+emit_dph(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * DP3 tmp, s0, s1
+    * ADD dst, tmp, s1.wwww
+    */
+
+   struct tgsi_full_src_register s1_wwww =
+      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
+                  TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   /* DP3 tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* ADD dst, tmp, s1.wwww */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
+                        &s1_wwww, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DST instruction.
+ */
+static boolean
+emit_dst(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = 1
+    * dst.y = src0.y * src1.y
+    * dst.z = src0.z
+    * dst.w = src1.w
+    */
+
+   struct tgsi_full_src_register s0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s0_zzzz =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
+   struct tgsi_full_src_register s1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s1_wwww =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
+
+   /*
+    * If dst and either src0 and src1 are the same we need
+    * to create a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /* MOV dst.x, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+   }
+
+   /* MUL dst.y, s0.y, s1.y */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
+                           &s1_yyyy, inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.z, s0.z */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
+                           inst->Instruction.Saturate);
+  }
+
+   /* MOV dst.w, s1.w */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
+                           inst->Instruction.Saturate);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+
+/**
+ * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
+ */
+static boolean
+emit_endprim(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   /* We can't use emit_simple() because the TGSI instruction has one
+    * operand (vertex stream number) which we must ignore for VGPU10.
+    */
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
+   end_emit_instruction(emit);
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
+ */
+static boolean
+emit_ex2(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
+    * while VGPU10 computes four values.
+    *
+    * dst = EX2(src):
+    *   dst.xyzw = 2.0 ^ src.x
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* EXP tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_EXP instruction.
+ */
+static boolean
+emit_exp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = 2 ^ floor(s0.x)
+    * dst.y = s0.x - floor(s0.x)
+    * dst.z = 2 ^ s0.x
+    * dst.w = 1.0
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   /*
+    * If dst and src are the same we need to create
+    * a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /* only use X component of temp reg */
+   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* ROUND_NI tmp.x, s0.x */
+   emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
+                        &src_xxxx, FALSE); /* round to -infinity */
+
+   /* EXP dst.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
+                           inst->Instruction.Saturate);
+   }
+
+   /* ADD dst.y, s0.x, -tmp */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+      struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
+                           &neg_tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* EXP dst.z, s0.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
+                           inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
+                           FALSE);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_IF instruction.
+ */
+static boolean
+emit_if(struct svga_shader_emitter_v10 *emit,
+        const struct tgsi_full_instruction *inst)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   /* The src register should be a scalar */
+   assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
+          inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
+          inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
+
+   /* The only special thing here is that we need to set the
+    * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
+    * src.x is non-zero.
+    */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_IF;
+   opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
+ * the register components are negative).
+ */
+static boolean
+emit_kill_if(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* tmp = src[0] < 0.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
+                        &zero, FALSE);
+
+   if (!same_swizzle_terms(&inst->Src[0])) {
+      /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
+       * logically OR the swizzle terms.  Most uses of KILL_IF only
+       * test one channel so it's good to avoid these extra steps.
+       */
+      struct tgsi_full_src_register tmp_src_yyyy =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
+      struct tgsi_full_src_register tmp_src_zzzz =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
+      struct tgsi_full_src_register tmp_src_wwww =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_W);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_wwww, FALSE);
+   }
+
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
+   emit_src_register(emit, &tmp_src_xxxx);
+   end_emit_instruction(emit);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
+ */
+static boolean
+emit_kill(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   /* DISCARD if 0.0 is zero */
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, FALSE);
+   emit_src_register(emit, &zero);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LG2 instruction.
+ */
+static boolean
+emit_lg2(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
+    * while VGPU10 computes four values.
+    *
+    * dst = LG2(src):
+    *   dst.xyzw = log2(src.x)
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* LOG tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LIT instruction.
+ */
+static boolean
+emit_lit(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /*
+    * If dst and src are the same we need to create
+    * a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /*
+    * dst.x = 1
+    * dst.y = max(src.x, 0)
+    * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
+    * dst.w = 1
+    */
+
+   /* MOV dst.x, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+   }
+
+   /* MAX dst.y, src.x, 0.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+      struct tgsi_full_src_register zero =
+         make_immediate_reg_float(emit, 0.0f);
+      struct tgsi_full_src_register src_xxxx =
+         swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                     TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
+                           &zero, inst->Instruction.Saturate);
+   }
+
+   /*
+    * tmp1 = clamp(src.w, -128, 128);
+    *   MAX tmp1, src.w, -128
+    *   MIN tmp1, tmp1, 128
+    *
+    * tmp2 = max(tmp2, 0);
+    *   MAX tmp2, src.y, 0
+    *
+    * tmp1 = pow(tmp2, tmp1);
+    *   LOG tmp2, tmp2
+    *   MUL tmp1, tmp2, tmp1
+    *   EXP tmp1, tmp1
+    *
+    * tmp1 = (src.w == 0) ? 1 : tmp1;
+    *   EQ tmp2, 0, src.w
+    *   MOVC tmp1, tmp2, 1.0, tmp1
+    *
+    * dst.z = (0 < src.x) ? tmp1 : 0;
+    *   LT tmp2, 0, src.x
+    *   MOVC dst.z, tmp2, tmp1, 0.0
+    */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      unsigned tmp1 = get_temp_index(emit);
+      struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+      struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+      unsigned tmp2 = get_temp_index(emit);
+      struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+      struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+      struct tgsi_full_src_register src_xxxx =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+      struct tgsi_full_src_register src_yyyy =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+      struct tgsi_full_src_register src_wwww =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+
+      struct tgsi_full_src_register zero =
+         make_immediate_reg_float(emit, 0.0f);
+      struct tgsi_full_src_register lowerbound =
+         make_immediate_reg_float(emit, -128.0f);
+      struct tgsi_full_src_register upperbound =
+         make_immediate_reg_float(emit, 128.0f);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
+                           &lowerbound, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
+                           &upperbound, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
+                           &zero, FALSE);
+
+      /* POW tmp1, tmp2, tmp1 */
+      /* LOG tmp2, tmp2 */
+      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
+                           FALSE);
+
+      /* MUL tmp1, tmp2, tmp1 */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
+                           &tmp1_src, FALSE);
+
+      /* EXP tmp1, tmp1 */
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
+                           FALSE);
+
+      /* EQ tmp2, 0, src.w */
+      emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
+                           &src_wwww, FALSE);
+      /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
+                           &tmp2_src, &one, &tmp1_src, FALSE);
+
+      /* LT tmp2, 0, src.x */
+      emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
+                           &src_xxxx, FALSE);
+      /* MOVC dst.z, tmp2, tmp1, 0.0 */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
+                           &tmp2_src, &tmp1_src, &zero, FALSE);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LOG instruction.
+ */
+static boolean
+emit_log(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = floor(lg2(abs(s0.x)))
+    * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
+    * dst.z = lg2(abs(s0.x))
+    * dst.w = 1.0
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
+
+   /* only use X component of temp reg */
+   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* LOG tmp.x, abs(s0.x) */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
+      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
+                          &abs_src_xxxx, FALSE);
+   }
+
+   /* MOV dst.z, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
+                           &tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* FLR tmp.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
+      emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
+                           &tmp_src, FALSE);
+   }
+
+   /* MOV dst.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
+                           inst->Instruction.Saturate);
+   }
+
+   /* EXP tmp.x, tmp.x */
+   /* DIV dst.y, abs(s0.x), tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
+                           FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
+                           &tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+   }
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LRP instruction.
+ */
+static boolean
+emit_lrp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = LRP(s0, s1, s2):
+    *   dst = s0 * (s1 - s2) + s2
+    * Translates into:
+    *   SUB tmp, s1, s2;        tmp = s1 - s2
+    *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
+
+   /* ADD tmp, s1, -s2 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
+                        &inst->Src[1], &neg_src2, FALSE);
+
+   /* MAD dst, s1, tmp, s3 */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
+                        &inst->Src[0], &src_tmp, &inst->Src[2],
+                        inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_POW instruction.
+ */
+static boolean
+emit_pow(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
+    * src1.x while VGPU10 computes four values.
+    *
+    * dst = POW(src0, src1):
+    *   dst.xyzw = src0.x ^ src1.x
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register src0_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src1_xxxx =
+      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* LOG tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
+                        FALSE);
+
+   /* MUL tmp, tmp, s1.xxxx */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
+                        &src1_xxxx, FALSE);
+
+   /* EXP tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
+                        &tmp_src, inst->Instruction.Saturate);
+
+   /* free tmp */
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
+ */
+static boolean
+emit_rcp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* DIV tmp.x, 1.0, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
+                        &inst->Src[0], FALSE);
+
+   /* MOV dst, tmp.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RSQ instruction.
+ */
+static boolean
+emit_rsq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = RSQ(src):
+    *   dst.xyzw = 1 / sqrt(src.x)
+    * Translates into:
+    *   RSQ tmp, src.x
+    *   MOV dst, tmp.xxxx
+    */
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* RSQ tmp, src.x */
+   emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
+                        &inst->Src[0], FALSE);
+
+   /* MOV dst, tmp.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   /* free tmp */
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SCS instruction.
+ */
+static boolean
+emit_scs(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = cos(src.x)
+    * dst.y = sin(src.x)
+    * dst.z = 0.0
+    * dst.w = 1.0
+    */
+   struct tgsi_full_dst_register dst_x =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
+   struct tgsi_full_dst_register dst_y =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
+   struct tgsi_full_dst_register dst_zw =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
+
+   struct tgsi_full_src_register zero_one =
+      make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
+   emit_dst_register(emit, &dst_y);
+   emit_dst_register(emit, &dst_x);
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                        &dst_zw, &zero_one, inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
+ */
+static boolean
+emit_seq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SEQ(s0, s1):
+    *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* EQ tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
+ */
+static boolean
+emit_sge(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SGE(s0, s1):
+    *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* GE tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
+ */
+static boolean
+emit_sgt(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SGT(s0, s1):
+    *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* LT tmp, s1, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
+                        &inst->Src[0], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
+ */
+static boolean
+emit_sincos(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
+
+   if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
+   {
+      emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
+      emit_null_dst_register(emit);  /* second destination register */
+   }
+   else {
+      emit_null_dst_register(emit);
+      emit_dst_register(emit, &tmp_dst_x);
+   }
+
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
+ */
+static boolean
+emit_sle(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SLE(s0, s1):
+    *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* GE tmp, s1, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
+                        &inst->Src[0], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
+ */
+static boolean
+emit_slt(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SLT(s0, s1):
+    *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* LT tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
+ */
+static boolean
+emit_sne(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SNE(s0, s1):
+    *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* NE tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
+ */
+static boolean
+emit_ssg(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
+    * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
+    * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
+    * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
+    * Translates into:
+    *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
+    *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
+    *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
+    */
+   struct tgsi_full_src_register zero =
+      make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one =
+      make_immediate_reg_float(emit, 1.0f);
+   struct tgsi_full_src_register neg_one =
+      make_immediate_reg_float(emit, -1.0f);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
+                        &zero, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
+                        &neg_one, &zero, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
+                        &inst->Src[0], FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
+                        &one, &tmp2_src, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
+ */
+static boolean
+emit_issg(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
+    * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
+    * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
+    * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
+    * Translates into:
+    *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
+    *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
+    *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
+    */
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+   struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
+                        &inst->Src[0], &zero, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
+                        &zero, &inst->Src[0], FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
+                        &tmp1_src, &neg_tmp2, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SUB instruction.
+ */
+static boolean
+emit_sub(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SUB(s0, s1):
+    *   dst = s0 - s1
+    * Translates into:
+    *   ADD dst, s0, neg(s1)
+    */
+   struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]);
+
+   /* ADD dst, s0, neg(s1) */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0],
+                        &inst->Src[0], &neg_src1,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit a comparison instruction.  The dest register will get
+ * 0 or ~0 values depending on the outcome of comparing src0 to src1.
+ */
+static void
+emit_comparison(struct svga_shader_emitter_v10 *emit,
+                SVGA3dCmpFunc func,
+                const struct tgsi_full_dst_register *dst,
+                const struct tgsi_full_src_register *src0,
+                const struct tgsi_full_src_register *src1)
+{
+   struct tgsi_full_src_register immediate;
+   VGPU10OpcodeToken0 opcode0;
+   boolean swapSrc = FALSE;
+
+   /* Sanity checks for svga vs. gallium enums */
+   STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
+   STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
+
+   opcode0.value = 0;
+
+   switch (func) {
+   case SVGA3D_CMP_NEVER:
+      immediate = make_immediate_reg_int(emit, 0);
+      /* MOV dst, {0} */
+      begin_emit_instruction(emit);
+      emit_dword(emit, VGPU10_OPCODE_MOV);
+      emit_dst_register(emit, dst);
+      emit_src_register(emit, &immediate);
+      end_emit_instruction(emit);
+      return;
+   case SVGA3D_CMP_ALWAYS:
+      immediate = make_immediate_reg_int(emit, -1);
+      /* MOV dst, {-1} */
+      begin_emit_instruction(emit);
+      emit_dword(emit, VGPU10_OPCODE_MOV);
+      emit_dst_register(emit, dst);
+      emit_src_register(emit, &immediate);
+      end_emit_instruction(emit);
+      return;
+   case SVGA3D_CMP_LESS:
+      opcode0.opcodeType = VGPU10_OPCODE_LT;
+      break;
+   case SVGA3D_CMP_EQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_EQ;
+      break;
+   case SVGA3D_CMP_LESSEQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_GE;
+      swapSrc = TRUE;
+      break;
+   case SVGA3D_CMP_GREATER:
+      opcode0.opcodeType = VGPU10_OPCODE_LT;
+      swapSrc = TRUE;
+      break;
+   case SVGA3D_CMP_NOTEQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_NE;
+      break;
+   case SVGA3D_CMP_GREATEREQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_GE;
+      break;
+   default:
+      assert(!"Unexpected comparison mode");
+      opcode0.opcodeType = VGPU10_OPCODE_EQ;
+   }
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   emit_dst_register(emit, dst);
+   if (swapSrc) {
+      emit_src_register(emit, src1);
+      emit_src_register(emit, src0);
+   }
+   else {
+      emit_src_register(emit, src0);
+      emit_src_register(emit, src1);
+   }
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Get texel/address offsets for a texture instruction.
+ */
+static void
+get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_instruction *inst, int offsets[3])
+{
+   if (inst->Texture.NumOffsets == 1) {
+      /* According to OpenGL Shader Language spec the offsets are only
+       * fetched from a previously-declared immediate/literal.
+       */
+      const struct tgsi_texture_offset *off = inst->TexOffsets;
+      const unsigned index = off[0].Index;
+      const unsigned swizzleX = off[0].SwizzleX;
+      const unsigned swizzleY = off[0].SwizzleY;
+      const unsigned swizzleZ = off[0].SwizzleZ;
+      const union tgsi_immediate_data *imm = emit->immediates[index];
+
+      assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
+
+      offsets[0] = imm[swizzleX].Int;
+      offsets[1] = imm[swizzleY].Int;
+      offsets[2] = imm[swizzleZ].Int;
+   }
+   else {
+      offsets[0] = offsets[1] = offsets[2] = 0;
+   }
+}
+
+
+/**
+ * Set up the coordinate register for texture sampling.
+ * When we're sampling from a RECT texture we have to scale the
+ * unnormalized coordinate to a normalized coordinate.
+ * We do that by multiplying the coordinate by an "extra" constant.
+ * An alternative would be to use the RESINFO instruction to query the
+ * texture's size.
+ */
+static struct tgsi_full_src_register
+setup_texcoord(struct svga_shader_emitter_v10 *emit,
+               unsigned unit,
+               const struct tgsi_full_src_register *coord)
+{
+   if (emit->key.tex[unit].unnormalized) {
+      unsigned scale_index = emit->texcoord_scale_index[unit];
+      unsigned tmp = get_temp_index(emit);
+      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+      struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
+
+      /* MUL tmp, coord, const[] */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
+                           coord, &scale_src, FALSE);
+      return tmp_src;
+   }
+   else {
+      /* use texcoord as-is */
+      return *coord;
+   }
+}
+
+
+/**
+ * For SAMPLE_C instructions, emit the extra src register which indicates
+ * the reference/comparision value.
+ */
+static void
+emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
+                          unsigned target,
+                          const struct tgsi_full_src_register *coord)
+{
+   struct tgsi_full_src_register coord_src_ref;
+   unsigned component;
+
+   assert(tgsi_is_shadow_target(target));
+
+   assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
+   if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+       target == TGSI_TEXTURE_SHADOWCUBE)
+      component = TGSI_SWIZZLE_W;
+   else
+      component = TGSI_SWIZZLE_Z;
+
+   coord_src_ref = scalar_src(coord, component);
+
+   emit_src_register(emit, &coord_src_ref);
+}
+
+
+/**
+ * Info for implementing texture swizzles.
+ * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
+ * functions use this to encapsulate the extra steps needed to perform
+ * a texture swizzle, or shadow/depth comparisons.
+ * The shadow/depth comparison is only done here if for the cases where
+ * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
+ */
+struct tex_swizzle_info
+{
+   boolean swizzled;
+   boolean shadow_compare;
+   unsigned unit;
+   unsigned texture_target;  /**< TGSI_TEXTURE_x */
+   struct tgsi_full_src_register tmp_src;
+   struct tgsi_full_dst_register tmp_dst;
+   const struct tgsi_full_dst_register *inst_dst;
+   const struct tgsi_full_src_register *coord_src;
+};
+
+
+/**
+ * Do setup for handling texture swizzles or shadow compares.
+ * \param unit  the texture unit
+ * \param inst  the TGSI texture instruction
+ * \param shadow_compare  do shadow/depth comparison?
+ * \param swz  returns the swizzle info
+ */
+static void
+begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
+                  unsigned unit,
+                  const struct tgsi_full_instruction *inst,
+                  boolean shadow_compare,
+                  struct tex_swizzle_info *swz)
+{
+   swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
+                    emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
+                    emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
+                    emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
+
+   swz->shadow_compare = shadow_compare;
+   swz->texture_target = inst->Texture.Texture;
+
+   if (swz->swizzled || shadow_compare) {
+      /* Allocate temp register for the result of the SAMPLE instruction
+       * and the source of the MOV/compare/swizzle instructions.
+       */
+      unsigned tmp = get_temp_index(emit);
+      swz->tmp_src = make_src_temp_reg(tmp);
+      swz->tmp_dst = make_dst_temp_reg(tmp);
+
+      swz->unit = unit;
+   }
+   swz->inst_dst = &inst->Dst[0];
+   swz->coord_src = &inst->Src[0];
+}
+
+
+/**
+ * Returns the register to put the SAMPLE instruction results into.
+ * This will either be the original instruction dst reg (if no swizzle
+ * and no shadow comparison) or a temporary reg if there is a swizzle.
+ */
+static const struct tgsi_full_dst_register *
+get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
+{
+   return (swz->swizzled || swz->shadow_compare)
+      ? &swz->tmp_dst : swz->inst_dst;
+}
+
+
+/**
+ * This emits the MOV instruction that actually implements a texture swizzle
+ * and/or shadow comparison.
+ */
+static void
+end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
+                const struct tex_swizzle_info *swz)
+{
+   if (swz->shadow_compare) {
+      /* Emit extra instructions to compare the fetched texel value against
+       * a texture coordinate component.  The result of the comparison
+       * is 0.0 or 1.0.
+       */
+      struct tgsi_full_src_register coord_src;
+      struct tgsi_full_src_register texel_src =
+         scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      /* convert gallium comparison func to SVGA comparison func */
+      SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
+
+      assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+      switch (swz->texture_target) {
+      case TGSI_TEXTURE_SHADOW2D:
+      case TGSI_TEXTURE_SHADOWRECT:
+      case TGSI_TEXTURE_SHADOW1D_ARRAY:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
+         break;
+      case TGSI_TEXTURE_SHADOW1D:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
+         break;
+      case TGSI_TEXTURE_SHADOWCUBE:
+      case TGSI_TEXTURE_SHADOW2D_ARRAY:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
+         break;
+      default:
+         assert(!"Unexpected texture target in end_tex_swizzle()");
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
+      }
+
+      /* COMPARE tmp, coord, texel */
+      /* XXX it would seem that the texel and coord arguments should
+       * be transposed here, but piglit tests indicate otherwise.
+       */
+      emit_comparison(emit, compare_func,
+                      &swz->tmp_dst, &texel_src, &coord_src);
+
+      /* AND dest, tmp, {1.0} */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
+      if (swz->swizzled) {
+         emit_dst_register(emit, &swz->tmp_dst);
+      }
+      else {
+         emit_dst_register(emit, swz->inst_dst);
+      }
+      emit_src_register(emit, &swz->tmp_src);
+      emit_src_register(emit, &one);
+      end_emit_instruction(emit);
+   }
+
+   if (swz->swizzled) {
+      unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
+      unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
+      unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
+      unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
+      unsigned writemask_0 = 0, writemask_1 = 0;
+      boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
+
+      /* Swizzle w/out zero/one terms */
+      struct tgsi_full_src_register src_swizzled =
+         swizzle_src(&swz->tmp_src,
+                     swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED,
+                     swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN,
+                     swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE,
+                     swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA);
+
+      /* MOV dst, color(tmp).<swizzle> */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                           swz->inst_dst, &src_swizzled, FALSE);
+
+      /* handle swizzle zero terms */
+      writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) |
+                     ((swz_g == PIPE_SWIZZLE_ZERO) << 1) |
+                     ((swz_b == PIPE_SWIZZLE_ZERO) << 2) |
+                     ((swz_a == PIPE_SWIZZLE_ZERO) << 3));
+
+      if (writemask_0) {
+         struct tgsi_full_src_register zero = int_tex ?
+            make_immediate_reg_int(emit, 0) :
+            make_immediate_reg_float(emit, 0.0f);
+         struct tgsi_full_dst_register dst =
+            writemask_dst(swz->inst_dst, writemask_0);
+
+         /* MOV dst.writemask_0, {0,0,0,0} */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                              &dst, &zero, FALSE);
+      }
+
+      /* handle swizzle one terms */
+      writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) |
+                     ((swz_g == PIPE_SWIZZLE_ONE) << 1) |
+                     ((swz_b == PIPE_SWIZZLE_ONE) << 2) |
+                     ((swz_a == PIPE_SWIZZLE_ONE) << 3));
+
+      if (writemask_1) {
+         struct tgsi_full_src_register one = int_tex ?
+            make_immediate_reg_int(emit, 1) :
+            make_immediate_reg_float(emit, 1.0f);
+         struct tgsi_full_dst_register dst =
+            writemask_dst(swz->inst_dst, writemask_1);
+
+         /* MOV dst.writemask_1, {1,1,1,1} */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
+      }
+   }
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SAMPLE instruction.
+ */
+static boolean
+emit_sample(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   const unsigned resource_unit = inst->Src[1].Register.Index;
+   const unsigned sampler_unit = inst->Src[2].Register.Index;
+   struct tgsi_full_src_register coord;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
+
+   /* SAMPLE dst, coord(s0), resource, sampler */
+   begin_emit_instruction(emit);
+
+   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
+                      inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, resource_unit);
+   emit_sampler_register(emit, sampler_unit);
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Check if a texture instruction is valid.
+ * An example of an invalid texture instruction is doing shadow comparison
+ * with an integer-valued texture.
+ * If we detect an invalid texture instruction, we replace it with:
+ *   MOV dst, {1,1,1,1};
+ * \return TRUE if valid, FALSE if invalid.
+ */
+static boolean
+is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
+                         const struct tgsi_full_instruction *inst)
+{
+   const unsigned unit = inst->Src[1].Register.Index;
+   const unsigned target = inst->Texture.Texture;
+   boolean valid = TRUE;
+
+   if (tgsi_is_shadow_target(target) &&
+       is_integer_type(emit->key.tex[unit].return_type)) {
+      debug_printf("Invalid SAMPLE_C with an integer texture!\n");
+      valid = FALSE;
+   }
+   /* XXX might check for other conditions in the future here */
+
+   if (!valid) {
+      /* emit a MOV dst, {1,1,1,1} instruction. */
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &inst->Dst[0]);
+      emit_src_register(emit, &one);
+      end_emit_instruction(emit);
+   }
+
+   return valid;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
+ */
+static boolean
+emit_tex(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode;
+   struct tgsi_full_src_register coord;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE dst, coord(s0), resource, sampler */
+   begin_emit_instruction(emit);
+
+   if (tgsi_is_shadow_target(target))
+      opcode = VGPU10_OPCODE_SAMPLE_C;
+   else
+      opcode = VGPU10_OPCODE_SAMPLE;
+
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+      emit_tex_compare_refcoord(emit, target, &coord);
+   }
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXP (projective texture)
+ */
+static boolean
+emit_txp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode;
+   int offsets[3];
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register src0_wwww =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+   struct tgsi_full_src_register coord;
+   struct tex_swizzle_info swz_info;
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* DIV tmp, coord, coord.wwww */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
+                        &coord, &src0_wwww, FALSE);
+
+   /* SAMPLE dst, coord(tmp), resource, sampler */
+   begin_emit_instruction(emit);
+
+   if (tgsi_is_shadow_target(target))
+      opcode = VGPU10_OPCODE_SAMPLE_C;
+   else
+      opcode = VGPU10_OPCODE_SAMPLE;
+
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &tmp_src);  /* projected coord */
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+      emit_tex_compare_refcoord(emit, target, &tmp_src);
+   }
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/*
+ * Emit code for TGSI_OPCODE_XPD instruction.
+ */
+static boolean
+emit_xpd(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = src0.y * src1.z - src1.y * src0.z
+    * dst.y = src0.z * src1.x - src1.z * src0.x
+    * dst.z = src0.x * src1.y - src1.x * src0.y
+    * dst.w = 1
+    */
+   struct tgsi_full_src_register s0_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register s0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s0_zzzz =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
+
+   struct tgsi_full_src_register s1_xxxx =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register s1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s1_zzzz =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+   struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
+
+   unsigned tmp3 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
+   struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
+   struct tgsi_full_dst_register tmp3_dst_x =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_dst_register tmp3_dst_y =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
+   struct tgsi_full_dst_register tmp3_dst_z =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
+   struct tgsi_full_dst_register tmp3_dst_w =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
+
+   /* Note: we put all the intermediate computations into tmp3 in case
+    * the XPD dest register is that same as one of the src regs (in which
+    * case we could clobber a src reg before we're done with it) .
+    *
+    * Note: we could get by with just one temp register instead of three
+    * since we're doing scalar operations and there's enough room in one
+    * temp for everything.
+    */
+
+   /* MUL tmp1, src0.y, src1.z */
+   /* MUL tmp2, src1.y, src0.z */
+   /* ADD tmp3.x, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
+                           &s0_yyyy, &s1_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
+                           &s1_yyyy, &s0_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MUL tmp1, src0.z, src1.x */
+   /* MUL tmp2, src1.z, src0.x */
+   /* ADD tmp3.y, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
+                           &s1_xxxx, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
+                           &s0_xxxx, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MUL tmp1, src0.x, src1.y */
+   /* MUL tmp2, src1.x, src0.y */
+   /* ADD tmp3.z, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
+                           &s1_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
+                           &s0_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MOV tmp3.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
+   }
+
+   /* MOV dst, tmp3 */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
+                        inst->Instruction.Saturate);
+
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
+ */
+static boolean
+emit_txd(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[3].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   int offsets[3];
+   struct tgsi_full_src_register coord;
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+                     &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
+   begin_emit_instruction(emit);
+   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
+                      inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
+   emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXF (texel fetch)
+ */
+static boolean
+emit_txf(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   const unsigned msaa = emit->key.tex[unit].texture_msaa;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   if (msaa) {
+      /* Fetch one sample from an MSAA texture */
+      struct tgsi_full_src_register sampleIndex =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+      /* LD_MS dst, coord(s0), resource, sampleIndex */
+      begin_emit_instruction(emit);
+      emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
+                         inst->Instruction.Saturate, offsets);
+      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      emit_src_register(emit, &sampleIndex);
+      end_emit_instruction(emit);
+   }
+   else {
+      /* Fetch one texel specified by integer coordinate */
+      /* LD dst, coord(s0), resource */
+      begin_emit_instruction(emit);
+      emit_sample_opcode(emit, VGPU10_OPCODE_LD,
+                         inst->Instruction.Saturate, offsets);
+      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      end_emit_instruction(emit);
+   }
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
+ * or TGSI_OPCODE_TXB2 (for cube shadow maps).
+ */
+static boolean
+emit_txl_txb(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode, unit;
+   int offsets[3];
+   struct tgsi_full_src_register coord, lod_bias;
+   struct tex_swizzle_info swz_info;
+
+   assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
+          inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
+          inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+      lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+      unit = inst->Src[2].Register.Index;
+   }
+   else {
+      lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+      unit = inst->Src[1].Register.Index;
+   }
+
+   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+                     &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
+   begin_emit_instruction(emit);
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
+      opcode = VGPU10_OPCODE_SAMPLE_L;
+   }
+   else {
+      opcode = VGPU10_OPCODE_SAMPLE_B;
+   }
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   emit_src_register(emit, &lod_bias);
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
+ */
+static boolean
+emit_txq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+
+   if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
+      /* RESINFO does not support querying texture buffers, so we instead
+       * store texture buffer sizes in shader constants, then copy them to
+       * implement TXQ instead of emitting RESINFO.
+       * MOV dst, const[texture_buffer_size_index[unit]]
+       */
+      struct tgsi_full_src_register size_src =
+         make_src_const_reg(emit->texture_buffer_size_index[unit]);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
+                           FALSE);
+   } else {
+      /* RESINFO dst, srcMipLevel, resource */
+      begin_emit_instruction(emit);
+      emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
+      emit_dst_register(emit, &inst->Dst[0]);
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      end_emit_instruction(emit);
+   }
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit a simple instruction (like ADD, MUL, MIN, etc).
+ */
+static boolean
+emit_simple(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+   unsigned i;
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
+               inst->Instruction.Saturate);
+   for (i = 0; i < op->num_dst; i++) {
+      emit_dst_register(emit, &inst->Dst[i]);
+   }
+   for (i = 0; i < op->num_src; i++) {
+      emit_src_register(emit, &inst->Src[i]);
+   }
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * We only special case the MOV instruction to try to detect constant
+ * color writes in the fragment shader.
+ */
+static boolean
+emit_mov(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const struct tgsi_full_src_register *src = &inst->Src[0];
+   const struct tgsi_full_dst_register *dst = &inst->Dst[0];
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+       dst->Register.File == TGSI_FILE_OUTPUT &&
+       dst->Register.Index == 0 &&
+       src->Register.File == TGSI_FILE_CONSTANT &&
+       !src->Register.Indirect) {
+      emit->constant_color_output = TRUE;
+   }
+
+   return emit_simple(emit, inst);
+}
+
+
+/**
+ * Emit a simple VGPU10 instruction which writes to multiple dest registers,
+ * where TGSI only uses one dest register.
+ */
+static boolean
+emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
+                 const struct tgsi_full_instruction *inst,
+                 unsigned dst_count,
+                 unsigned dst_index)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+   unsigned i;
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
+               inst->Instruction.Saturate);
+
+   for (i = 0; i < dst_count; i++) {
+      if (i == dst_index) {
+         emit_dst_register(emit, &inst->Dst[0]);
+      } else {
+         emit_null_dst_register(emit);
+      }
+   }
+
+   for (i = 0; i < op->num_src; i++) {
+      emit_src_register(emit, &inst->Src[i]);
+   }
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Translate a single TGSI instruction to VGPU10.
+ */
+static boolean
+emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
+                        unsigned inst_number,
+                        const struct tgsi_full_instruction *inst)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+
+   switch (opcode) {
+   case TGSI_OPCODE_ADD:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_BRK:
+   case TGSI_OPCODE_CEIL:
+   case TGSI_OPCODE_CONT:
+   case TGSI_OPCODE_DDX:
+   case TGSI_OPCODE_DDY:
+   case TGSI_OPCODE_DIV:
+   case TGSI_OPCODE_DP2:
+   case TGSI_OPCODE_DP3:
+   case TGSI_OPCODE_DP4:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_ENDIF:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_ENDSUB:
+   case TGSI_OPCODE_F2I:
+   case TGSI_OPCODE_F2U:
+   case TGSI_OPCODE_FLR:
+   case TGSI_OPCODE_FRC:
+   case TGSI_OPCODE_FSEQ:
+   case TGSI_OPCODE_FSGE:
+   case TGSI_OPCODE_FSLT:
+   case TGSI_OPCODE_FSNE:
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_IMAX:
+   case TGSI_OPCODE_IMIN:
+   case TGSI_OPCODE_INEG:
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_ISHR:
+   case TGSI_OPCODE_ISLT:
+   case TGSI_OPCODE_MAD:
+   case TGSI_OPCODE_MAX:
+   case TGSI_OPCODE_MIN:
+   case TGSI_OPCODE_MUL:
+   case TGSI_OPCODE_NOP:
+   case TGSI_OPCODE_NOT:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_RET:
+   case TGSI_OPCODE_UADD:
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_UMIN:
+   case TGSI_OPCODE_UMAD:
+   case TGSI_OPCODE_UMAX:
+   case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_SQRT:
+   case TGSI_OPCODE_SHL:
+   case TGSI_OPCODE_TRUNC:
+   case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_UCMP:
+   case TGSI_OPCODE_USHR:
+   case TGSI_OPCODE_USNE:
+   case TGSI_OPCODE_XOR:
+      /* simple instructions */
+      return emit_simple(emit, inst);
+
+   case TGSI_OPCODE_MOV:
+      return emit_mov(emit, inst);
+   case TGSI_OPCODE_EMIT:
+      return emit_vertex(emit, inst);
+   case TGSI_OPCODE_ENDPRIM:
+      return emit_endprim(emit, inst);
+   case TGSI_OPCODE_ABS:
+      return emit_abs(emit, inst);
+   case TGSI_OPCODE_IABS:
+      return emit_iabs(emit, inst);
+   case TGSI_OPCODE_ARL:
+      /* fall-through */
+   case TGSI_OPCODE_UARL:
+      return emit_arl_uarl(emit, inst);
+   case TGSI_OPCODE_BGNSUB:
+      /* no-op */
+      return TRUE;
+   case TGSI_OPCODE_CAL:
+      return emit_cal(emit, inst);
+   case TGSI_OPCODE_CMP:
+      return emit_cmp(emit, inst);
+   case TGSI_OPCODE_COS:
+      return emit_sincos(emit, inst);
+   case TGSI_OPCODE_DP2A:
+      return emit_dp2a(emit, inst);
+   case TGSI_OPCODE_DPH:
+      return emit_dph(emit, inst);
+   case TGSI_OPCODE_DST:
+      return emit_dst(emit, inst);
+   case TGSI_OPCODE_EX2:
+      return emit_ex2(emit, inst);
+   case TGSI_OPCODE_EXP:
+      return emit_exp(emit, inst);
+   case TGSI_OPCODE_IF:
+      return emit_if(emit, inst);
+   case TGSI_OPCODE_KILL:
+      return emit_kill(emit, inst);
+   case TGSI_OPCODE_KILL_IF:
+      return emit_kill_if(emit, inst);
+   case TGSI_OPCODE_LG2:
+      return emit_lg2(emit, inst);
+   case TGSI_OPCODE_LIT:
+      return emit_lit(emit, inst);
+   case TGSI_OPCODE_LOG:
+      return emit_log(emit, inst);
+   case TGSI_OPCODE_LRP:
+      return emit_lrp(emit, inst);
+   case TGSI_OPCODE_POW:
+      return emit_pow(emit, inst);
+   case TGSI_OPCODE_RCP:
+      return emit_rcp(emit, inst);
+   case TGSI_OPCODE_RSQ:
+      return emit_rsq(emit, inst);
+   case TGSI_OPCODE_SAMPLE:
+      return emit_sample(emit, inst);
+   case TGSI_OPCODE_SCS:
+      return emit_scs(emit, inst);
+   case TGSI_OPCODE_SEQ:
+      return emit_seq(emit, inst);
+   case TGSI_OPCODE_SGE:
+      return emit_sge(emit, inst);
+   case TGSI_OPCODE_SGT:
+      return emit_sgt(emit, inst);
+   case TGSI_OPCODE_SIN:
+      return emit_sincos(emit, inst);
+   case TGSI_OPCODE_SLE:
+      return emit_sle(emit, inst);
+   case TGSI_OPCODE_SLT:
+      return emit_slt(emit, inst);
+   case TGSI_OPCODE_SNE:
+      return emit_sne(emit, inst);
+   case TGSI_OPCODE_SSG:
+      return emit_ssg(emit, inst);
+   case TGSI_OPCODE_ISSG:
+      return emit_issg(emit, inst);
+   case TGSI_OPCODE_SUB:
+      return emit_sub(emit, inst);
+   case TGSI_OPCODE_TEX:
+      return emit_tex(emit, inst);
+   case TGSI_OPCODE_TXP:
+      return emit_txp(emit, inst);
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXB2:
+   case TGSI_OPCODE_TXL:
+      return emit_txl_txb(emit, inst);
+   case TGSI_OPCODE_TXD:
+      return emit_txd(emit, inst);
+   case TGSI_OPCODE_TXF:
+      return emit_txf(emit, inst);
+   case TGSI_OPCODE_TXQ:
+      return emit_txq(emit, inst);
+   case TGSI_OPCODE_UIF:
+      return emit_if(emit, inst);
+   case TGSI_OPCODE_XPD:
+      return emit_xpd(emit, inst);
+   case TGSI_OPCODE_UMUL_HI:
+   case TGSI_OPCODE_IMUL_HI:
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_IDIV:
+      /* These cases use only the FIRST of two destination registers */
+      return emit_simple_1dst(emit, inst, 2, 0);
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_MOD:
+      /* These cases use only the SECOND of two destination registers */
+      return emit_simple_1dst(emit, inst, 2, 1);
+   case TGSI_OPCODE_END:
+      if (!emit_post_helpers(emit))
+         return FALSE;
+      return emit_simple(emit, inst);
+
+   default:
+      debug_printf("Unimplemented tgsi instruction %s\n",
+                   tgsi_get_opcode_name(opcode));
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit the extra instructions to adjust the vertex position.
+ * There are two possible adjustments:
+ * 1. Converting from Gallium to VGPU10 coordinate space by applying the
+ *    "prescale" and "pretranslate" values.
+ * 2. Undoing the viewport transformation when we use the swtnl/draw path.
+ * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
+ */
+static void
+emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
+                       unsigned vs_pos_tmp_index)
+{
+   struct tgsi_full_src_register tmp_pos_src;
+   struct tgsi_full_dst_register pos_dst;
+
+   /* Don't bother to emit any extra vertex instructions if vertex position is
+    * not written out
+    */
+   if (emit->vposition.out_index == INVALID_INDEX)
+      return;
+
+   tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
+   pos_dst = make_dst_output_reg(emit->vposition.out_index);
+
+   /* If non-adjusted vertex position register index
+    * is valid, copy the vertex position from the temporary
+    * vertex position register before it is modified by the
+    * prescale computation.
+    */
+   if (emit->vposition.so_index != INVALID_INDEX) {
+      struct tgsi_full_dst_register pos_so_dst =
+         make_dst_output_reg(emit->vposition.so_index);
+
+      /* MOV pos_so, tmp_pos */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
+                           &tmp_pos_src, FALSE);
+   }
+
+   if (emit->vposition.need_prescale) {
+      /* This code adjusts the vertex position to match the VGPU10 convention.
+       * If p is the position computed by the shader (usually by applying the
+       * modelview and projection matrices), the new position q is computed by:
+       *
+       * q.x = p.w * trans.x + p.x * scale.x
+       * q.y = p.w * trans.y + p.y * scale.y
+       * q.z = p.w * trans.z + p.z * scale.z;
+       * q.w = p.w * trans.w + p.w;
+       */
+      struct tgsi_full_src_register tmp_pos_src_w =
+         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
+      struct tgsi_full_dst_register tmp_pos_dst =
+         make_dst_temp_reg(vs_pos_tmp_index);
+      struct tgsi_full_dst_register tmp_pos_dst_xyz =
+         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
+
+      struct tgsi_full_src_register prescale_scale =
+         make_src_const_reg(emit->vposition.prescale_scale_index);
+      struct tgsi_full_src_register prescale_trans =
+         make_src_const_reg(emit->vposition.prescale_trans_index);
+
+      /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
+                           &tmp_pos_src, &prescale_scale, FALSE);
+
+      /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
+                           &prescale_trans, &tmp_pos_src, FALSE);
+   }
+   else if (emit->key.vs.undo_viewport) {
+      /* This code computes the final vertex position from the temporary
+       * vertex position by undoing the viewport transformation and the
+       * divide-by-W operation (we convert window coords back to clip coords).
+       * This is needed when we use the 'draw' module for fallbacks.
+       * If p is the temp pos in window coords, then the NDC coord q is:
+       *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
+       *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
+       *   q.z = p.z * p.w
+       *   q.w = p.w
+       * CONST[vs_viewport_index] contains:
+       *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
+       */
+      struct tgsi_full_dst_register tmp_pos_dst =
+         make_dst_temp_reg(vs_pos_tmp_index);
+      struct tgsi_full_dst_register tmp_pos_dst_xy =
+         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
+      struct tgsi_full_src_register tmp_pos_src_wwww =
+         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
+
+      struct tgsi_full_dst_register pos_dst_xyz =
+         writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
+      struct tgsi_full_dst_register pos_dst_w =
+         writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
+
+      struct tgsi_full_src_register vp_xyzw =
+         make_src_const_reg(emit->vs.viewport_index);
+      struct tgsi_full_src_register vp_zwww =
+         swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+                     TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+      /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
+                           &tmp_pos_src, &vp_zwww, FALSE);
+
+      /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
+                           &tmp_pos_src, &vp_xyzw, FALSE);
+
+      /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
+                           &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
+
+      /* MOV pos.w, tmp_pos.w */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
+                           &tmp_pos_src, FALSE);
+   }
+   else if (vs_pos_tmp_index != INVALID_INDEX) {
+      /* This code is to handle the case where the temporary vertex
+       * position register is created when the vertex shader has stream
+       * output and prescale is disabled because rasterization is to be
+       * discarded.
+       */
+      struct tgsi_full_dst_register pos_dst =
+         make_dst_output_reg(emit->vposition.out_index);
+
+      /* MOV pos, tmp_pos */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &pos_dst);
+      emit_src_register(emit, &tmp_pos_src);
+      end_emit_instruction(emit);
+   }
+}
+
+static void
+emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->clip_mode == CLIP_DISTANCE) {
+      /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
+      emit_clip_distance_instructions(emit);
+
+   } else if (emit->clip_mode == CLIP_VERTEX) {
+      /* Convert TGSI CLIPVERTEX to CLIPDIST */
+      emit_clip_vertex_instructions(emit);
+   }
+
+   /**
+    * Emit vertex position and take care of legacy user planes only if
+    * there is a valid vertex position register index.
+    * This is to take care of the case
+    * where the shader doesn't output vertex position. Then in
+    * this case, don't bother to emit more vertex instructions.
+    */
+   if (emit->vposition.out_index == INVALID_INDEX)
+      return;
+
+   /**
+    * Emit per-vertex clipping instructions for legacy user defined clip planes.
+    * NOTE: we must emit the clip distance instructions before the
+    * emit_vpos_instructions() call since the later function will change
+    * the TEMP[vs_pos_tmp_index] value.
+    */
+   if (emit->clip_mode == CLIP_LEGACY) {
+      /* Emit CLIPDIST for legacy user defined clip planes */
+      emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
+   }
+}
+
+
+/**
+ * Emit extra per-vertex instructions.  This includes clip-coordinate
+ * space conversion and computing clip distances.  This is called for
+ * each GS emit-vertex instruction and at the end of VS translation.
+ */
+static void
+emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
+
+   /* Emit clipping instructions based on clipping mode */
+   emit_clipping_instructions(emit);
+
+   /**
+    * Reset the temporary vertex position register index
+    * so that emit_dst_register() will use the real vertex position output
+    */
+   emit->vposition.tmp_index = INVALID_INDEX;
+
+   /* Emit vertex position instructions */
+   emit_vpos_instructions(emit, vs_pos_tmp_index);
+
+   /* Restore original vposition.tmp_index value for the next GS vertex.
+    * It doesn't matter for VS.
+    */
+   emit->vposition.tmp_index = vs_pos_tmp_index;
+}
+
+/**
+ * Translate the TGSI_OPCODE_EMIT GS instruction.
+ */
+static boolean
+emit_vertex(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   unsigned ret = TRUE;
+
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   emit_vertex_instructions(emit);
+
+   /* We can't use emit_simple() because the TGSI instruction has one
+    * operand (vertex stream number) which we must ignore for VGPU10.
+    */
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
+   end_emit_instruction(emit);
+
+   return ret;
+}
+
+
+/**
+ * Emit the extra code to convert from VGPU10's boolean front-face
+ * register to TGSI's signed front-face register.
+ *
+ * TODO: Make temporary front-face register a scalar.
+ */
+static void
+emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   if (emit->fs.face_input_index != INVALID_INDEX) {
+      /* convert vgpu10 boolean face register to gallium +/-1 value */
+      struct tgsi_full_dst_register tmp_dst =
+         make_dst_temp_reg(emit->fs.face_tmp_index);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      struct tgsi_full_src_register neg_one =
+         make_immediate_reg_float(emit, -1.0f);
+
+      /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
+      emit_dst_register(emit, &tmp_dst);
+      emit_face_register(emit);
+      emit_src_register(emit, &one);
+      emit_src_register(emit, &neg_one);
+      end_emit_instruction(emit);
+   }
+}
+
+
+/**
+ * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
+ */
+static void
+emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
+      struct tgsi_full_dst_register tmp_dst =
+         make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
+      struct tgsi_full_dst_register tmp_dst_xyz =
+         writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
+      struct tgsi_full_dst_register tmp_dst_w =
+         writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      struct tgsi_full_src_register fragcoord =
+         make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
+
+      /* save the input index */
+      unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
+      /* set to invalid to prevent substitution in emit_src_register() */
+      emit->fs.fragcoord_input_index = INVALID_INDEX;
+
+      /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &tmp_dst_xyz);
+      emit_src_register(emit, &fragcoord);
+      end_emit_instruction(emit);
+
+      /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
+      emit_dst_register(emit, &tmp_dst_w);
+      emit_src_register(emit, &one);
+      emit_src_register(emit, &fragcoord);
+      end_emit_instruction(emit);
+
+      /* restore saved value */
+      emit->fs.fragcoord_input_index = fragcoord_input_index;
+   }
+}
+
+
+/**
+ * Emit extra instructions to adjust VS inputs/attributes.  This can
+ * mean casting a vertex attribute from int to float or setting the
+ * W component to 1, or both.
+ */
+static void
+emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
+   const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
+   const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
+   const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
+   const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
+   const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
+   const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
+
+   unsigned adjust_mask = (save_w_1_mask |
+                           save_itof_mask |
+                           save_utof_mask |
+                           save_is_bgra_mask |
+                           save_puint_to_snorm_mask |
+                           save_puint_to_uscaled_mask |
+                           save_puint_to_sscaled_mask);
+
+   assert(emit->unit == PIPE_SHADER_VERTEX);
+
+   if (adjust_mask) {
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      struct tgsi_full_src_register one_int =
+         make_immediate_reg_int(emit, 1);
+
+      /* We need to turn off these bitmasks while emitting the
+       * instructions below, then restore them afterward.
+       */
+      emit->key.vs.adjust_attrib_w_1 = 0;
+      emit->key.vs.adjust_attrib_itof = 0;
+      emit->key.vs.adjust_attrib_utof = 0;
+      emit->key.vs.attrib_is_bgra = 0;
+      emit->key.vs.attrib_puint_to_snorm = 0;
+      emit->key.vs.attrib_puint_to_uscaled = 0;
+      emit->key.vs.attrib_puint_to_sscaled = 0;
+
+      while (adjust_mask) {
+         unsigned index = u_bit_scan(&adjust_mask);
+
+         /* skip the instruction if this vertex attribute is not being used */
+         if (emit->info.input_usage_mask[index] == 0)
+            continue;
+
+         unsigned tmp = emit->vs.adjusted_input[index];
+         struct tgsi_full_src_register input_src =
+            make_src_reg(TGSI_FILE_INPUT, index);
+
+         struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+         struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+         struct tgsi_full_dst_register tmp_dst_w =
+            writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
+
+         /* ITOF/UTOF/MOV tmp, input[index] */
+         if (save_itof_mask & (1 << index)) {
+            emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+         else if (save_utof_mask & (1 << index)) {
+            emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+         else if (save_puint_to_snorm_mask & (1 << index)) {
+            emit_puint_to_snorm(emit, &tmp_dst, &input_src);
+         }
+         else if (save_puint_to_uscaled_mask & (1 << index)) {
+            emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
+         }
+         else if (save_puint_to_sscaled_mask & (1 << index)) {
+            emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
+         }
+         else {
+            assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
+            emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+
+         if (save_is_bgra_mask & (1 << index)) {
+            emit_swap_r_b(emit, &tmp_dst, &tmp_src);
+         }
+
+         if (save_w_1_mask & (1 << index)) {
+            /* MOV tmp.w, 1.0 */
+            if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
+               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                    &tmp_dst_w, &one_int, FALSE);
+            }
+            else {
+               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                    &tmp_dst_w, &one, FALSE);
+            }
+         }
+      }
+
+      emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
+      emit->key.vs.adjust_attrib_itof = save_itof_mask;
+      emit->key.vs.adjust_attrib_utof = save_utof_mask;
+      emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
+      emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
+      emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
+      emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
+   }
+}
+
+
+/**
+ * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
+ * to implement some instructions.  We pre-allocate those values here
+ * in the immediate constant buffer.
+ */
+static void
+alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned n = 0;
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_int4(emit, 0, 1, 0, -1);
+
+   if (emit->key.vs.attrib_puint_to_snorm) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+   }
+
+   if (emit->key.vs.attrib_puint_to_uscaled) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
+   }
+
+   if (emit->key.vs.attrib_puint_to_sscaled) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_int4(emit, 22, 12, 2, 0);
+
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_int4(emit, 22, 30, 0, 0);
+   }
+
+   assert(n <= Elements(emit->common_immediate_pos));
+   emit->num_common_immediates = n;
+}
+
+
+/**
+ * Emit any extra/helper declarations/code that we might need between
+ * the declaration section and code section.
+ */
+static boolean
+emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
+{
+   /* Properties */
+   if (emit->unit == PIPE_SHADER_GEOMETRY)
+      emit_property_instructions(emit);
+
+   /* Declare inputs */
+   if (!emit_input_declarations(emit))
+      return FALSE;
+
+   /* Declare outputs */
+   if (!emit_output_declarations(emit))
+      return FALSE;
+
+   /* Declare temporary registers */
+   emit_temporaries_declaration(emit);
+
+   /* Declare constant registers */
+   emit_constant_declaration(emit);
+
+   /* Declare samplers and resources */
+   emit_sampler_declarations(emit);
+   emit_resource_declarations(emit);
+
+   /* Declare clip distance output registers */
+   if (emit->unit == PIPE_SHADER_VERTEX ||
+       emit->unit == PIPE_SHADER_GEOMETRY) {
+      emit_clip_distance_declarations(emit);
+   }
+
+   alloc_common_immediates(emit);
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+       emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
+      float alpha = emit->key.fs.alpha_ref;
+      emit->fs.alpha_ref_index =
+         alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
+   }
+
+   /* Now, emit the constant block containing all the immediates
+    * declared by shader, as well as the extra ones seen above.
+    */
+   emit_vgpu10_immediates_block(emit);
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      emit_frontface_instructions(emit);
+      emit_fragcoord_instructions(emit);
+   }
+   else if (emit->unit == PIPE_SHADER_VERTEX) {
+      emit_vertex_attrib_instructions(emit);
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
+ * against the alpha reference value and discards the fragment if the
+ * comparison fails.
+ */
+static void
+emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
+                             unsigned fs_color_tmp_index)
+{
+   /* compare output color's alpha to alpha ref and kill */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_src_register tmp_src_x =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register color_src =
+      make_src_temp_reg(fs_color_tmp_index);
+   struct tgsi_full_src_register color_src_w =
+      scalar_src(&color_src, TGSI_SWIZZLE_W);
+   struct tgsi_full_src_register ref_src =
+      make_src_immediate_reg(emit->fs.alpha_ref_index);
+   struct tgsi_full_dst_register color_dst =
+      make_dst_output_reg(emit->fs.color_out_index[0]);
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   /* dst = src0 'alpha_func' src1 */
+   emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
+                   &color_src_w, &ref_src);
+
+   /* DISCARD if dst.x == 0 */
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
+   emit_src_register(emit, &tmp_src_x);
+   end_emit_instruction(emit);
+
+   /* If we don't need to broadcast the color below or set fragments to
+    * white, emit final color here.
+    */
+   if (emit->key.fs.write_color0_to_n_cbufs <= 1 &&
+       !emit->key.fs.white_fragments) {
+      /* MOV output.color, tempcolor */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
+                           &color_src, FALSE);     /* XXX saturate? */
+   }
+
+   free_temp_indexes(emit);
+}
+
+
+/**
+ * When we need to emit white for all fragments (for emulating XOR logicop
+ * mode), this function copies white into the temporary color output register.
+ */
+static void
+emit_set_color_white(struct svga_shader_emitter_v10 *emit,
+                     unsigned fs_color_tmp_index)
+{
+   struct tgsi_full_dst_register color_dst =
+      make_dst_temp_reg(fs_color_tmp_index);
+   struct tgsi_full_src_register white =
+      make_immediate_reg_float(emit, 1.0f);
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE);
+}
+
+
+/**
+ * Emit instructions for writing a single color output to multiple
+ * color buffers.
+ * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
+ * when key.fs.white_fragments is true).
+ * property is set and the number of render targets is greater than one.
+ * \param fs_color_tmp_index  index of the temp register that holds the
+ *                            color to broadcast.
+ */
+static void
+emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
+                                 unsigned fs_color_tmp_index)
+{
+   const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
+   unsigned i;
+   struct tgsi_full_src_register color_src =
+      make_src_temp_reg(fs_color_tmp_index);
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   for (i = 0; i < n; i++) {
+      unsigned output_reg = emit->fs.color_out_index[i];
+      struct tgsi_full_dst_register color_dst =
+         make_dst_output_reg(output_reg);
+
+      /* Fill in this semantic here since we'll use it later in
+       * emit_dst_register().
+       */
+      emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
+
+      /* MOV output.color[i], tempcolor */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
+                           &color_src, FALSE);     /* XXX saturate? */
+   }
+}
+
+
+/**
+ * Emit extra helper code after the original shader code, but before the
+ * last END/RET instruction.
+ * For vertex shaders this means emitting the extra code to apply the
+ * prescale scale/translation.
+ */
+static boolean
+emit_post_helpers(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      emit_vertex_instructions(emit);
+   }
+   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
+
+      /* We no longer want emit_dst_register() to substitute the
+       * temporary fragment color register for the real color output.
+       */
+      emit->fs.color_tmp_index = INVALID_INDEX;
+
+      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
+         emit_alpha_test_instructions(emit, fs_color_tmp_index);
+      }
+      if (emit->key.fs.white_fragments) {
+         emit_set_color_white(emit, fs_color_tmp_index);
+      }
+      if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
+          emit->key.fs.white_fragments) {
+         emit_broadcast_color_instructions(emit, fs_color_tmp_index);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Translate the TGSI tokens into VGPU10 tokens.
+ */
+static boolean
+emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
+                         const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context parse;
+   boolean ret = TRUE;
+   boolean pre_helpers_emitted = FALSE;
+   unsigned inst_number = 0;
+
+   tgsi_parse_init(&parse, tokens);
+
+   while (!tgsi_parse_end_of_tokens(&parse)) {
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (!pre_helpers_emitted) {
+            ret = emit_pre_helpers(emit);
+            if (!ret)
+               goto done;
+            pre_helpers_emitted = TRUE;
+         }
+         ret = emit_vgpu10_instruction(emit, inst_number++,
+                                       &parse.FullToken.FullInstruction);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
+         if (!ret)
+            goto done;
+         break;
+
+      default:
+         break;
+      }
+   }
+
+done:
+   tgsi_parse_free(&parse);
+   return ret;
+}
+
+
+/**
+ * Emit the first VGPU10 shader tokens.
+ */
+static boolean
+emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10ProgramToken ptoken;
+
+   /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
+   ptoken.majorVersion = 4;
+   ptoken.minorVersion = 0;
+   ptoken.programType = translate_shader_type(emit->unit);
+   if (!emit_dword(emit, ptoken.value))
+      return FALSE;
+
+   /* Second token: total length of shader, in tokens.  We can't fill this
+    * in until we're all done.  Emit zero for now.
+    */
+   return emit_dword(emit, 0);
+}
+
+
+static boolean
+emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10ProgramToken *tokens;
+
+   /* Replace the second token with total shader length */
+   tokens = (VGPU10ProgramToken *) emit->buf;
+   tokens[1].value = emit_get_num_tokens(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Modify the FS to read the BCOLORs and use the FACE register
+ * to choose between the front/back colors.
+ */
+static const struct tgsi_token *
+transform_fs_twoside(const struct tgsi_token *tokens)
+{
+   if (0) {
+      debug_printf("Before tgsi_add_two_side ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+   tokens = tgsi_add_two_side(tokens);
+   if (0) {
+      debug_printf("After tgsi_add_two_side ------------------\n");
+      tgsi_dump(tokens, 0);
+   }
+   return tokens;
+}
+
+
+/**
+ * Modify the FS to do polygon stipple.
+ */
+static const struct tgsi_token *
+transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_token *tokens)
+{
+   const struct tgsi_token *new_tokens;
+   unsigned unit;
+
+   if (0) {
+      debug_printf("Before pstipple ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+
+   new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
+                                                     TGSI_FILE_INPUT);
+
+   emit->fs.pstipple_sampler_unit = unit;
+
+   /* Setup texture state for stipple */
+   emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+   emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+   emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+   emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+   emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+   if (0) {
+      debug_printf("After pstipple ------------------\n");
+      tgsi_dump(new_tokens, 0);
+   }
+
+   return new_tokens;
+}
+
+/**
+ * Modify the FS to support anti-aliasing point.
+ */
+static const struct tgsi_token *
+transform_fs_aapoint(const struct tgsi_token *tokens,
+                     int aa_coord_index)
+{
+   if (0) {
+      debug_printf("Before tgsi_add_aa_point ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+   tokens = tgsi_add_aa_point(tokens, aa_coord_index);
+   if (0) {
+      debug_printf("After tgsi_add_aa_point ------------------\n");
+      tgsi_dump(tokens, 0);
+   }
+   return tokens;
+}
+
+/**
+ * This is the main entrypoint for the TGSI -> VPGU10 translator.
+ */
+struct svga_shader_variant *
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+                           const struct svga_shader *shader,
+                           const struct svga_compile_key *key,
+                           unsigned unit)
+{
+   struct svga_shader_variant *variant = NULL;
+   struct svga_shader_emitter_v10 *emit;
+   const struct tgsi_token *tokens = shader->tokens;
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_geometry_shader *gs = svga->curr.gs;
+
+   assert(unit == PIPE_SHADER_VERTEX ||
+          unit == PIPE_SHADER_GEOMETRY ||
+          unit == PIPE_SHADER_FRAGMENT);
+
+   /* These two flags cannot be used together */
+   assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
+
+   /*
+    * Setup the code emitter
+    */
+   emit = alloc_emitter();
+   if (!emit)
+      return NULL;
+
+   emit->unit = unit;
+   emit->key = *key;
+
+   emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
+                                   emit->key.gs.need_prescale);
+   emit->vposition.tmp_index = INVALID_INDEX;
+   emit->vposition.so_index = INVALID_INDEX;
+   emit->vposition.out_index = INVALID_INDEX;
+
+   emit->fs.color_tmp_index = INVALID_INDEX;
+   emit->fs.face_input_index = INVALID_INDEX;
+   emit->fs.fragcoord_input_index = INVALID_INDEX;
+
+   emit->gs.prim_id_index = INVALID_INDEX;
+
+   emit->clip_dist_out_index = INVALID_INDEX;
+   emit->clip_dist_tmp_index = INVALID_INDEX;
+   emit->clip_dist_so_index = INVALID_INDEX;
+   emit->clip_vertex_out_index = INVALID_INDEX;
+
+   if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
+      emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
+   }
+
+   if (unit == PIPE_SHADER_FRAGMENT) {
+      if (key->fs.light_twoside) {
+         tokens = transform_fs_twoside(tokens);
+      }
+      if (key->fs.pstipple) {
+         const struct tgsi_token *new_tokens =
+            transform_fs_pstipple(emit, tokens);
+         if (tokens != shader->tokens) {
+            /* free the two-sided shader tokens */
+            tgsi_free_tokens(tokens);
+         }
+         tokens = new_tokens;
+      }
+      if (key->fs.aa_point) {
+         tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
+      }
+   }
+
+   if (SVGA_DEBUG & DEBUG_TGSI) {
+      debug_printf("#####################################\n");
+      debug_printf("### TGSI Shader %u\n", shader->id);
+      tgsi_dump(tokens, 0);
+   }
+
+   /**
+    * Rescan the header if the token string is different from the one
+    * included in the shader; otherwise, the header info is already up-to-date
+    */
+   if (tokens != shader->tokens) {
+      tgsi_scan_shader(tokens, &emit->info);
+   } else {
+      emit->info = shader->info;
+   }
+
+   emit->num_outputs = emit->info.num_outputs;
+
+   if (unit == PIPE_SHADER_FRAGMENT) {
+      /* Compute FS input remapping to match the output from VS/GS */
+      if (gs) {
+         svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
+      } else {
+         assert(vs);
+         svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
+      }
+   } else if (unit == PIPE_SHADER_GEOMETRY) {
+      assert(vs);
+      svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
+   }
+
+   determine_clipping_mode(emit);
+
+   if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
+      if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
+         /* if there is stream output declarations associated
+          * with this shader or the shader writes to ClipDistance
+          * then reserve extra registers for the non-adjusted vertex position
+          * and the ClipDistance shadow copy
+          */
+         emit->vposition.so_index = emit->num_outputs++;
+
+         if (emit->clip_mode == CLIP_DISTANCE) {
+            emit->clip_dist_so_index = emit->num_outputs++;
+            if (emit->info.num_written_clipdistance > 4)
+               emit->num_outputs++;
+         }
+      }
+   }
+
+   /*
+    * Do actual shader translation.
+    */
+   if (!emit_vgpu10_header(emit)) {
+      debug_printf("svga: emit VGPU10 header failed\n");
+      goto cleanup;
+   }
+
+   if (!emit_vgpu10_instructions(emit, tokens)) {
+      debug_printf("svga: emit VGPU10 instructions failed\n");
+      goto cleanup;
+   }
+
+   if (!emit_vgpu10_tail(emit)) {
+      debug_printf("svga: emit VGPU10 tail failed\n");
+      goto cleanup;
+   }
+
+   if (emit->register_overflow) {
+      goto cleanup;
+   }
+
+   /*
+    * Create, initialize the 'variant' object.
+    */
+   variant = svga_new_shader_variant(svga);
+   if (!variant)
+      goto cleanup;
+
+   variant->shader = shader;
+   variant->nr_tokens = emit_get_num_tokens(emit);
+   variant->tokens = (const unsigned *)emit->buf;
+   emit->buf = NULL;  /* buffer is no longer owed by emitter context */
+   memcpy(&variant->key, key, sizeof(*key));
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+   /* The extra constant starting offset starts with the number of
+    * shader constants declared in the shader.
+    */
+   variant->extra_const_start = emit->num_shader_consts[0];
+   if (key->gs.wide_point) {
+      /**
+       * The extra constant added in the transformed shader
+       * for inverse viewport scale is to be supplied by the driver.
+       * So the extra constant starting offset needs to be reduced by 1.
+       */
+      assert(variant->extra_const_start > 0);
+      variant->extra_const_start--;
+   }
+
+   variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
+
+   /* If there was exactly one write to a fragment shader output register
+    * and it came from a constant buffer, we know all fragments will have
+    * the same color (except for blending).
+    */
+   variant->constant_color_output =
+      emit->constant_color_output && emit->num_output_writes == 1;
+
+   /** keep track in the variant if flat interpolation is used
+    *  for any of the varyings.
+    */
+   variant->uses_flat_interp = emit->uses_flat_interp;
+
+   if (tokens != shader->tokens) {
+      tgsi_free_tokens(tokens);
+   }
+
+cleanup:
+   free_emitter(emit);
+
+   return variant;
+}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_winsys.h b/lib/mesa/src/gallium/drivers/svga/svga_winsys.h
index 19d074fd6..562c6690f 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_winsys.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_winsys.h
@@ -79,15 +79,20 @@ struct winsys_handle;
 #define SVGA_FENCE_FLAG_EXEC      (1 << 0)
 #define SVGA_FENCE_FLAG_QUERY     (1 << 1)
 
-#define SVGA_SURFACE_USAGE_SHARED (1 << 0)
+#define SVGA_SURFACE_USAGE_SHARED  (1 << 0)
+#define SVGA_SURFACE_USAGE_SCANOUT (1 << 1)
+
+#define SVGA_QUERY_FLAG_SET        (1 << 0)
+#define SVGA_QUERY_FLAG_REF        (1 << 1)
+
+#define SVGA_HINT_FLAG_CAN_PRE_FLUSH (1 << 0)  /* Can preemptively flush */
 
 /** Opaque surface handle */
 struct svga_winsys_surface;
 
-
 /** Opaque guest-backed objects */
 struct svga_winsys_gb_shader;
-
+struct svga_winsys_gb_query;
 
 
 /**
@@ -143,7 +148,8 @@ struct svga_winsys_context
 	                uint32 *shid,
 			uint32 *mobid,
 			uint32 *offset,
-	                struct svga_winsys_gb_shader *shader);
+	                struct svga_winsys_gb_shader *shader,
+                        unsigned flags);
 
    /**
     * Emit a relocation for a guest-backed context.
@@ -173,6 +179,26 @@ struct svga_winsys_context
 		     uint32 offset,
 		     unsigned flags);
 
+   /**
+    * Emit a relocation for a guest-backed query object.
+    *
+    * NOTE: Order of this call does matter. It should be the same order
+    * as relocations appear in the command buffer.
+    */
+   void
+   (*query_relocation)(struct svga_winsys_context *swc,
+	               SVGAMobId *id,
+	               struct svga_winsys_gb_query *query);
+
+   /**
+    * Bind queries to context.
+    * \param flags  exactly one of SVGA_QUERY_FLAG_SET/REF
+    */
+   enum pipe_error
+   (*query_bind)(struct svga_winsys_context *sws,
+                 struct svga_winsys_gb_query *query,
+                 unsigned flags);
+
    void
    (*commit)(struct svga_winsys_context *swc);
    
@@ -189,6 +215,11 @@ struct svga_winsys_context
    uint32 cid;
 
    /**
+    * Flags to hint the current context state
+    */
+   uint32 hints;
+
+   /**
     ** BEGIN new functions for guest-backed surfaces.
     **/
 
@@ -219,6 +250,36 @@ struct svga_winsys_context
                     struct svga_winsys_surface *surface,
                     boolean *rebind);
 
+   /**
+    * Create and define a DX GB shader that resides in the device COTable.
+    * Caller of this function will issue the DXDefineShader command.
+    */
+   struct svga_winsys_gb_shader *
+   (*shader_create)(struct svga_winsys_context *swc,
+                    uint32 shaderId,
+                    SVGA3dShaderType shaderType,
+                    const uint32 *bytecode,
+                    uint32 bytecodeLen);
+
+   /**
+    * Destroy a DX GB shader.
+    * This function will issue the DXDestroyShader command.
+    */
+   void
+   (*shader_destroy)(struct svga_winsys_context *swc,
+                     struct svga_winsys_gb_shader *shader);
+
+   /**
+    * Rebind a DX GB resource to a context.
+    * This is called to reference a DX GB resource in the command stream in
+    * order to page in the associated resource in case the memory has been
+    * paged out, and to fence it if necessary after command submission.
+    */
+   enum pipe_error
+   (*resource_rebind)(struct svga_winsys_context *swc,
+                      struct svga_winsys_surface *surface,
+                      struct svga_winsys_gb_shader *shader,
+                      unsigned flags);
 };
 
 
@@ -260,7 +321,7 @@ struct svga_winsys_screen
     * \param format Format Device surface format
     * \param usage Winsys usage: bitmask of SVGA_SURFACE_USAGE_x flags
     * \param size Surface size given in device format
-    * \param numFaces Number of faces of the surface (1 or 6)
+    * \param numLayers Number of layers of the surface (or cube faces)
     * \param numMipLevels Number of mipmap levels for each face
     *
     * Returns the surface ID (sid). Surfaces are generic
@@ -274,7 +335,7 @@ struct svga_winsys_screen
     * - Each face has a list of mipmap levels
     *
     * - Each mipmap image may have multiple volume
-    *   slices, if the image is three dimensional.
+    *   slices for 3D image, or multiple 2D slices for texture array.
     *
     * - Each slice is a 2D array of 'blocks'
     *
@@ -296,8 +357,9 @@ struct svga_winsys_screen
                      SVGA3dSurfaceFormat format,
                      unsigned usage,
                      SVGA3dSize size,
-                     uint32 numFaces,
-                     uint32 numMipLevels);
+                     uint32 numLayers,
+                     uint32 numMipLevels,
+                     unsigned sampleCount);
 
    /**
     * Creates a surface from a winsys handle.
@@ -343,7 +405,7 @@ struct svga_winsys_screen
    (*surface_can_create)(struct svga_winsys_screen *sws,
                          SVGA3dSurfaceFormat format,
                          SVGA3dSize size,
-                         uint32 numFaces,
+                         uint32 numLayers,
                          uint32 numMipLevels);
 
    /**
@@ -420,7 +482,7 @@ struct svga_winsys_screen
     */
    struct svga_winsys_gb_shader *
    (*shader_create)(struct svga_winsys_screen *sws,
-		    SVGA3dShaderType type,
+		    SVGA3dShaderType shaderType,
 		    const uint32 *bytecode,
 		    uint32 bytecodeLen);
 
@@ -432,6 +494,46 @@ struct svga_winsys_screen
    (*shader_destroy)(struct svga_winsys_screen *sws,
 		     struct svga_winsys_gb_shader *shader);
 
+   /**
+    * Create and define a GB query.
+    */
+   struct svga_winsys_gb_query *
+   (*query_create)(struct svga_winsys_screen *sws, uint32 len);
+
+   /**
+    * Destroy a GB query.
+    */
+   void
+   (*query_destroy)(struct svga_winsys_screen *sws,
+		    struct svga_winsys_gb_query *query);
+
+   /**
+    * Initialize the query state of the query that resides in the slot
+    * specified in offset
+    * \return zero on success.
+    */
+   int
+   (*query_init)(struct svga_winsys_screen *sws,
+                       struct svga_winsys_gb_query *query,
+                       unsigned offset,
+                       SVGA3dQueryState queryState);
+
+   /**
+    * Inquire for the query state and result of the query that resides
+    * in the slot specified in offset
+    */
+   void
+   (*query_get_result)(struct svga_winsys_screen *sws,
+                       struct svga_winsys_gb_query *query,
+                       unsigned offset,
+                       SVGA3dQueryState *queryState,
+                       void *result, uint32 resultLen);
+
+   /** Have VGPU v10 hardware? */
+   boolean have_vgpu10;
+
+   /** To rebind resources at the beginnning of a new command buffer */
+   boolean need_to_rebind_resources;
 };
 
 
diff --git a/lib/mesa/src/gallium/drivers/svga/svgadump/svga_dump.c b/lib/mesa/src/gallium/drivers/svga/svgadump/svga_dump.c
index 0874d2321..252e0d6c8 100644
--- a/lib/mesa/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/lib/mesa/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1369,12 +1369,6 @@ dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd)
    case SVGA3D_BUMPL6V5U5:
       _debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
       break;
-   case SVGA3D_BUMPX8L8V8U8:
-      _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
-      break;
-   case SVGA3D_BUMPL8V8U8:
-      _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
-      break;
    case SVGA3D_ARGB_S10E5:
       _debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
       break;
@@ -1528,15 +1522,6 @@ dump_SVGA3dCmdDestroyGBShader(const SVGA3dCmdDestroyGBShader *cmd)
 }
 
 static void
-dump_SVGA3dCmdBindGBShaderConsts(const SVGA3dCmdBindGBShaderConsts *cmd)
-{
-   _debug_printf("\t\t.cid = %u\n", cmd->cid);
-   _debug_printf("\t\t.shaderType = %u\n", cmd->shaderType);
-   _debug_printf("\t\t.shaderConstType = %u\n", cmd->shaderConstType);
-   _debug_printf("\t\t.sid = %u\n", cmd->sid);
-}
-
-static void
 dump_SVGA3dCmdBindGBSurface(const SVGA3dCmdBindGBSurface *cmd)
 {
    _debug_printf("\t\t.sid = %u\n", cmd->sid);
@@ -1929,14 +1914,6 @@ svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
          body = (const uint8_t *)&cmd[1];
       }
       break;
-   case SVGA_3D_CMD_BIND_SHADERCONSTS:
-      _debug_printf("\tSVGA_3D_CMD_BIND_SHADERCONSTS\n");
-      {
-         const SVGA3dCmdBindGBShaderConsts *cmd = (const SVGA3dCmdBindGBShaderConsts *) body;
-         dump_SVGA3dCmdBindGBShaderConsts(cmd);
-         body = (const uint8_t *)&cmd[1];
-      }
-      break;
    case SVGA_3D_CMD_BIND_GB_SURFACE:
       _debug_printf("\tSVGA_3D_CMD_BIND_GB_SURFACE\n");
       {
diff --git a/lib/mesa/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/lib/mesa/src/gallium/drivers/svga/svgadump/svga_shader_op.c
index ad1549d9f..03a63cf5e 100644
--- a/lib/mesa/src/gallium/drivers/svga/svgadump/svga_shader_op.c
+++ b/lib/mesa/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -144,7 +144,7 @@ const struct sh_opcode_info *svga_opcode_info( uint op )
 {
    struct sh_opcode_info *info;
 
-   if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) {
+   if (op >= ARRAY_SIZE(opcode_info)) {
       /* The opcode is either PHASE, COMMENT, END or out of range.
        */
       assert( 0 );
author	Jonathan Gray <jsg@cvs.openbsd.org>	2016-05-29 10:22:51 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2016-05-29 10:22:51 +0000
commit	c9223eed3c16cd3e98a8f56dda953d8f299de0e3 (patch)
tree	53e2a1c3f13bcf6b4ed201d7bc135e7213c94ebe /lib/mesa/src/gallium/drivers/svga
parent	6e8f2d062ab9c198239b9283b2b7ed12f4ea17d8 (diff)