diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-05-29 10:22:51 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-05-29 10:22:51 +0000 |
commit | c9223eed3c16cd3e98a8f56dda953d8f299de0e3 (patch) | |
tree | 53e2a1c3f13bcf6b4ed201d7bc135e7213c94ebe /lib/mesa/src/gallium/drivers/svga | |
parent | 6e8f2d062ab9c198239b9283b2b7ed12f4ea17d8 (diff) |
Import Mesa 11.2.2
Diffstat (limited to 'lib/mesa/src/gallium/drivers/svga')
87 files changed, 23892 insertions, 3718 deletions
diff --git a/lib/mesa/src/gallium/drivers/svga/Makefile.sources b/lib/mesa/src/gallium/drivers/svga/Makefile.sources index 276e6a8e2..5c022f437 100644 --- a/lib/mesa/src/gallium/drivers/svga/Makefile.sources +++ b/lib/mesa/src/gallium/drivers/svga/Makefile.sources @@ -1,6 +1,7 @@ C_SOURCES := \ svga_cmd.c \ svga_cmd.h \ + svga_cmd_vgpu10.c \ svga_context.c \ svga_context.h \ svga_debug.h \ @@ -12,6 +13,8 @@ C_SOURCES := \ svga_format.c \ svga_format.h \ svga_hw_reg.h \ + svga_link.c \ + svga_link.h \ svga_pipe_blend.c \ svga_pipe_blit.c \ svga_pipe_clear.c \ @@ -20,10 +23,12 @@ C_SOURCES := \ svga_pipe_draw.c \ svga_pipe_flush.c \ svga_pipe_fs.c \ + svga_pipe_gs.c \ svga_pipe_misc.c \ svga_pipe_query.c \ svga_pipe_rasterizer.c \ svga_pipe_sampler.c \ + svga_pipe_streamout.c \ svga_pipe_vertex.c \ svga_pipe_vs.c \ svga_public.h \ @@ -44,15 +49,19 @@ C_SOURCES := \ svga_shader.c \ svga_shader.h \ svga_state.c \ + svga_state.h \ svga_state_constants.c \ svga_state_framebuffer.c \ svga_state_fs.c \ - svga_state.h \ + svga_state_gs.c \ svga_state_need_swtnl.c \ svga_state_rss.c \ + svga_state_sampler.c \ + svga_state_tgsi_transform.c \ svga_state_tss.c \ svga_state_vdecl.c \ svga_state_vs.c \ + svga_streamout.h \ svga_surface.c \ svga_surface.h \ svga_swtnl_backend.c \ @@ -65,6 +74,7 @@ C_SOURCES := \ svga_tgsi_emit.h \ svga_tgsi.h \ svga_tgsi_insn.c \ + svga_tgsi_vgpu10.c \ svga_winsys.h \ \ svgadump/svga_dump.c \ @@ -80,6 +90,7 @@ SVGA_H_FILES := \ include/svga3d_caps.h \ include/svga3d_cmd.h \ include/svga3d_devcaps.h \ + include/svga3d_dx.h \ include/svga3d_limits.h \ include/svga3d_reg.h \ include/svga3d_shaderdefs.h \ @@ -89,5 +100,6 @@ SVGA_H_FILES := \ include/svga_overlay.h \ include/svga_reg.h \ include/svga_types.h \ + include/VGPU10ShaderTokens.h \ include/vmware_pack_begin.h \ include/vmware_pack_end.h diff --git a/lib/mesa/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h b/lib/mesa/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h new file mode 100644 index 000000000..444023589 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h @@ -0,0 +1,489 @@ +/********************************************************** + * Copyright 2007-2015 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * VGPU10ShaderTokens.h -- + * + * VGPU10 shader token definitions. + * + */ + +#ifndef VGPU10SHADERTOKENS_H +#define VGPU10SHADERTOKENS_H + +/* Shader limits */ +#define VGPU10_MAX_VS_INPUTS 16 +#define VGPU10_MAX_VS_OUTPUTS 16 +#define VGPU10_MAX_GS_INPUTS 16 +#define VGPU10_MAX_GS_OUTPUTS 32 +#define VGPU10_MAX_FS_INPUTS 32 +#define VGPU10_MAX_FS_OUTPUTS 8 +#define VGPU10_MAX_TEMPS 4096 +#define VGPU10_MAX_CONSTANT_BUFFERS 14 +#define VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT 4096 +#define VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT 4096 +#define VGPU10_MAX_SAMPLERS 16 +#define VGPU10_MAX_RESOURCES 128 +#define VGPU10_MIN_TEXEL_FETCH_OFFSET -8 +#define VGPU10_MAX_TEXEL_FETCH_OFFSET 7 + +typedef enum { + VGPU10_PIXEL_SHADER = 0, + VGPU10_VERTEX_SHADER = 1, + VGPU10_GEOMETRY_SHADER = 2 +} VGPU10_PROGRAM_TYPE; + +typedef union { + struct { + unsigned int minorVersion : 4; + unsigned int majorVersion : 4; + unsigned int : 8; + unsigned int programType : 16; /* VGPU10_PROGRAM_TYPE */ + }; + uint32 value; +} VGPU10ProgramToken; + + +typedef enum { + VGPU10_OPCODE_ADD = 0, + VGPU10_OPCODE_AND = 1, + VGPU10_OPCODE_BREAK = 2, + VGPU10_OPCODE_BREAKC = 3, + VGPU10_OPCODE_CALL = 4, + VGPU10_OPCODE_CALLC = 5, + VGPU10_OPCODE_CASE = 6, + VGPU10_OPCODE_CONTINUE = 7, + VGPU10_OPCODE_CONTINUEC = 8, + VGPU10_OPCODE_CUT = 9, + VGPU10_OPCODE_DEFAULT = 10, + VGPU10_OPCODE_DERIV_RTX = 11, + VGPU10_OPCODE_DERIV_RTY = 12, + VGPU10_OPCODE_DISCARD = 13, + VGPU10_OPCODE_DIV = 14, + VGPU10_OPCODE_DP2 = 15, + VGPU10_OPCODE_DP3 = 16, + VGPU10_OPCODE_DP4 = 17, + VGPU10_OPCODE_ELSE = 18, + VGPU10_OPCODE_EMIT = 19, + VGPU10_OPCODE_EMITTHENCUT = 20, + VGPU10_OPCODE_ENDIF = 21, + VGPU10_OPCODE_ENDLOOP = 22, + VGPU10_OPCODE_ENDSWITCH = 23, + VGPU10_OPCODE_EQ = 24, + VGPU10_OPCODE_EXP = 25, + VGPU10_OPCODE_FRC = 26, + VGPU10_OPCODE_FTOI = 27, + VGPU10_OPCODE_FTOU = 28, + VGPU10_OPCODE_GE = 29, + VGPU10_OPCODE_IADD = 30, + VGPU10_OPCODE_IF = 31, + VGPU10_OPCODE_IEQ = 32, + VGPU10_OPCODE_IGE = 33, + VGPU10_OPCODE_ILT = 34, + VGPU10_OPCODE_IMAD = 35, + VGPU10_OPCODE_IMAX = 36, + VGPU10_OPCODE_IMIN = 37, + VGPU10_OPCODE_IMUL = 38, + VGPU10_OPCODE_INE = 39, + VGPU10_OPCODE_INEG = 40, + VGPU10_OPCODE_ISHL = 41, + VGPU10_OPCODE_ISHR = 42, + VGPU10_OPCODE_ITOF = 43, + VGPU10_OPCODE_LABEL = 44, + VGPU10_OPCODE_LD = 45, + VGPU10_OPCODE_LD_MS = 46, + VGPU10_OPCODE_LOG = 47, + VGPU10_OPCODE_LOOP = 48, + VGPU10_OPCODE_LT = 49, + VGPU10_OPCODE_MAD = 50, + VGPU10_OPCODE_MIN = 51, + VGPU10_OPCODE_MAX = 52, + VGPU10_OPCODE_CUSTOMDATA = 53, + VGPU10_OPCODE_MOV = 54, + VGPU10_OPCODE_MOVC = 55, + VGPU10_OPCODE_MUL = 56, + VGPU10_OPCODE_NE = 57, + VGPU10_OPCODE_NOP = 58, + VGPU10_OPCODE_NOT = 59, + VGPU10_OPCODE_OR = 60, + VGPU10_OPCODE_RESINFO = 61, + VGPU10_OPCODE_RET = 62, + VGPU10_OPCODE_RETC = 63, + VGPU10_OPCODE_ROUND_NE = 64, + VGPU10_OPCODE_ROUND_NI = 65, + VGPU10_OPCODE_ROUND_PI = 66, + VGPU10_OPCODE_ROUND_Z = 67, + VGPU10_OPCODE_RSQ = 68, + VGPU10_OPCODE_SAMPLE = 69, + VGPU10_OPCODE_SAMPLE_C = 70, + VGPU10_OPCODE_SAMPLE_C_LZ = 71, + VGPU10_OPCODE_SAMPLE_L = 72, + VGPU10_OPCODE_SAMPLE_D = 73, + VGPU10_OPCODE_SAMPLE_B = 74, + VGPU10_OPCODE_SQRT = 75, + VGPU10_OPCODE_SWITCH = 76, + VGPU10_OPCODE_SINCOS = 77, + VGPU10_OPCODE_UDIV = 78, + VGPU10_OPCODE_ULT = 79, + VGPU10_OPCODE_UGE = 80, + VGPU10_OPCODE_UMUL = 81, + VGPU10_OPCODE_UMAD = 82, + VGPU10_OPCODE_UMAX = 83, + VGPU10_OPCODE_UMIN = 84, + VGPU10_OPCODE_USHR = 85, + VGPU10_OPCODE_UTOF = 86, + VGPU10_OPCODE_XOR = 87, + VGPU10_OPCODE_DCL_RESOURCE = 88, + VGPU10_OPCODE_DCL_CONSTANT_BUFFER = 89, + VGPU10_OPCODE_DCL_SAMPLER = 90, + VGPU10_OPCODE_DCL_INDEX_RANGE = 91, + VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY = 92, + VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE = 93, + VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT = 94, + VGPU10_OPCODE_DCL_INPUT = 95, + VGPU10_OPCODE_DCL_INPUT_SGV = 96, + VGPU10_OPCODE_DCL_INPUT_SIV = 97, + VGPU10_OPCODE_DCL_INPUT_PS = 98, + VGPU10_OPCODE_DCL_INPUT_PS_SGV = 99, + VGPU10_OPCODE_DCL_INPUT_PS_SIV = 100, + VGPU10_OPCODE_DCL_OUTPUT = 101, + VGPU10_OPCODE_DCL_OUTPUT_SGV = 102, + VGPU10_OPCODE_DCL_OUTPUT_SIV = 103, + VGPU10_OPCODE_DCL_TEMPS = 104, + VGPU10_OPCODE_DCL_INDEXABLE_TEMP = 105, + VGPU10_OPCODE_DCL_GLOBAL_FLAGS = 106, + VGPU10_OPCODE_IDIV = 107, + VGPU10_NUM_OPCODES /* Should be the last entry. */ +} VGPU10_OPCODE_TYPE; + +typedef enum { + VGPU10_INTERPOLATION_UNDEFINED = 0, + VGPU10_INTERPOLATION_CONSTANT = 1, + VGPU10_INTERPOLATION_LINEAR = 2, + VGPU10_INTERPOLATION_LINEAR_CENTROID = 3, + VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE = 4, + VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5, + VGPU10_INTERPOLATION_LINEAR_SAMPLE = 6, /* DX10.1 */ + VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7 /* DX10.1 */ +} VGPU10_INTERPOLATION_MODE; + +typedef enum { + VGPU10_RESOURCE_DIMENSION_UNKNOWN = 0, + VGPU10_RESOURCE_DIMENSION_BUFFER = 1, + VGPU10_RESOURCE_DIMENSION_TEXTURE1D = 2, + VGPU10_RESOURCE_DIMENSION_TEXTURE2D = 3, + VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS = 4, + VGPU10_RESOURCE_DIMENSION_TEXTURE3D = 5, + VGPU10_RESOURCE_DIMENSION_TEXTURECUBE = 6, + VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY = 7, + VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY = 8, + VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9, + VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10 +} VGPU10_RESOURCE_DIMENSION; + +typedef enum { + VGPU10_SAMPLER_MODE_DEFAULT = 0, + VGPU10_SAMPLER_MODE_COMPARISON = 1, + VGPU10_SAMPLER_MODE_MONO = 2 +} VGPU10_SAMPLER_MODE; + +typedef enum { + VGPU10_INSTRUCTION_TEST_ZERO = 0, + VGPU10_INSTRUCTION_TEST_NONZERO = 1 +} VGPU10_INSTRUCTION_TEST_BOOLEAN; + +typedef enum { + VGPU10_CB_IMMEDIATE_INDEXED = 0, + VGPU10_CB_DYNAMIC_INDEXED = 1 +} VGPU10_CB_ACCESS_PATTERN; + +typedef enum { + VGPU10_PRIMITIVE_UNDEFINED = 0, + VGPU10_PRIMITIVE_POINT = 1, + VGPU10_PRIMITIVE_LINE = 2, + VGPU10_PRIMITIVE_TRIANGLE = 3, + VGPU10_PRIMITIVE_LINE_ADJ = 6, + VGPU10_PRIMITIVE_TRIANGLE_ADJ = 7 +} VGPU10_PRIMITIVE; + +typedef enum { + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED = 0, + VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST = 1, + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST = 2, + VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP = 3, + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, + VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11, + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12, + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13 +} VGPU10_PRIMITIVE_TOPOLOGY; + +typedef enum { + VGPU10_CUSTOMDATA_COMMENT = 0, + VGPU10_CUSTOMDATA_DEBUGINFO = 1, + VGPU10_CUSTOMDATA_OPAQUE = 2, + VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER = 3 +} VGPU10_CUSTOMDATA_CLASS; + +typedef enum { + VGPU10_RESINFO_RETURN_FLOAT = 0, + VGPU10_RESINFO_RETURN_RCPFLOAT = 1, + VGPU10_RESINFO_RETURN_UINT = 2 +} VGPU10_RESINFO_RETURN_TYPE; + +typedef union { + struct { + unsigned int opcodeType : 11; /* VGPU10_OPCODE_TYPE */ + unsigned int interpolationMode : 4; /* VGPU10_INTERPOLATION_MODE */ + unsigned int : 3; + unsigned int testBoolean : 1; /* VGPU10_INSTRUCTION_TEST_BOOLEAN */ + unsigned int : 5; + unsigned int instructionLength : 7; + unsigned int extended : 1; + }; + struct { + unsigned int : 11; + unsigned int resourceDimension : 5; /* VGPU10_RESOURCE_DIMENSION */ + }; + struct { + unsigned int : 11; + unsigned int samplerMode : 4; /* VGPU10_SAMPLER_MODE */ + }; + struct { + unsigned int : 11; + unsigned int accessPattern : 1; /* VGPU10_CB_ACCESS_PATTERN */ + }; + struct { + unsigned int : 11; + unsigned int primitive : 6; /* VGPU10_PRIMITIVE */ + }; + struct { + unsigned int : 11; + unsigned int primitiveTopology : 6; /* VGPU10_PRIMITIVE_TOPOLOGY */ + }; + struct { + unsigned int : 11; + unsigned int customDataClass : 21; /* VGPU10_CUSTOMDATA_CLASS */ + }; + struct { + unsigned int : 11; + unsigned int resinfoReturnType : 2; /* VGPU10_RESINFO_RETURN_TYPE */ + unsigned int saturate : 1; + }; + struct { + unsigned int : 11; + unsigned int refactoringAllowed : 1; + }; + uint32 value; +} VGPU10OpcodeToken0; + + +typedef enum { + VGPU10_EXTENDED_OPCODE_EMPTY = 0, + VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS +} VGPU10_EXTENDED_OPCODE_TYPE; + +typedef union { + struct { + unsigned int opcodeType : 6; /* VGPU10_EXTENDED_OPCODE_TYPE */ + unsigned int : 3; + unsigned int offsetU : 4; /* Two's complement. */ + unsigned int offsetV : 4; /* Two's complement. */ + unsigned int offsetW : 4; /* Two's complement. */ + unsigned int : 10; + unsigned int extended : 1; + }; + uint32 value; +} VGPU10OpcodeToken1; + + +typedef enum { + VGPU10_OPERAND_0_COMPONENT = 0, + VGPU10_OPERAND_1_COMPONENT = 1, + VGPU10_OPERAND_4_COMPONENT = 2, + VGPU10_OPERAND_N_COMPONENT = 3 /* Unused for now. */ +} VGPU10_OPERAND_NUM_COMPONENTS; + +typedef enum { + VGPU10_OPERAND_4_COMPONENT_MASK_MODE = 0, + VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE = 1, + VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE = 2 +} VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE; + +#define VGPU10_OPERAND_4_COMPONENT_MASK_X 0x1 +#define VGPU10_OPERAND_4_COMPONENT_MASK_Y 0x2 +#define VGPU10_OPERAND_4_COMPONENT_MASK_Z 0x4 +#define VGPU10_OPERAND_4_COMPONENT_MASK_W 0x8 + +#define VGPU10_OPERAND_4_COMPONENT_MASK_XY (VGPU10_OPERAND_4_COMPONENT_MASK_X | VGPU10_OPERAND_4_COMPONENT_MASK_Y) +#define VGPU10_OPERAND_4_COMPONENT_MASK_XZ (VGPU10_OPERAND_4_COMPONENT_MASK_X | VGPU10_OPERAND_4_COMPONENT_MASK_Z) +#define VGPU10_OPERAND_4_COMPONENT_MASK_XW (VGPU10_OPERAND_4_COMPONENT_MASK_X | VGPU10_OPERAND_4_COMPONENT_MASK_W) +#define VGPU10_OPERAND_4_COMPONENT_MASK_YZ (VGPU10_OPERAND_4_COMPONENT_MASK_Y | VGPU10_OPERAND_4_COMPONENT_MASK_Z) +#define VGPU10_OPERAND_4_COMPONENT_MASK_YW (VGPU10_OPERAND_4_COMPONENT_MASK_Y | VGPU10_OPERAND_4_COMPONENT_MASK_W) +#define VGPU10_OPERAND_4_COMPONENT_MASK_ZW (VGPU10_OPERAND_4_COMPONENT_MASK_Z | VGPU10_OPERAND_4_COMPONENT_MASK_W) +#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZ (VGPU10_OPERAND_4_COMPONENT_MASK_XY | VGPU10_OPERAND_4_COMPONENT_MASK_Z) +#define VGPU10_OPERAND_4_COMPONENT_MASK_XYW (VGPU10_OPERAND_4_COMPONENT_MASK_XY | VGPU10_OPERAND_4_COMPONENT_MASK_W) +#define VGPU10_OPERAND_4_COMPONENT_MASK_XZW (VGPU10_OPERAND_4_COMPONENT_MASK_XZ | VGPU10_OPERAND_4_COMPONENT_MASK_W) +#define VGPU10_OPERAND_4_COMPONENT_MASK_YZW (VGPU10_OPERAND_4_COMPONENT_MASK_YZ | VGPU10_OPERAND_4_COMPONENT_MASK_W) +#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZW (VGPU10_OPERAND_4_COMPONENT_MASK_XYZ | VGPU10_OPERAND_4_COMPONENT_MASK_W) +#define VGPU10_OPERAND_4_COMPONENT_MASK_ALL VGPU10_OPERAND_4_COMPONENT_MASK_XYZW + +#define VGPU10_REGISTER_INDEX_FROM_SEMANTIC 0xffffffff + +typedef enum { + VGPU10_COMPONENT_X = 0, + VGPU10_COMPONENT_Y = 1, + VGPU10_COMPONENT_Z = 2, + VGPU10_COMPONENT_W = 3 +} VGPU10_COMPONENT_NAME; + +typedef enum { + VGPU10_OPERAND_TYPE_TEMP = 0, + VGPU10_OPERAND_TYPE_INPUT = 1, + VGPU10_OPERAND_TYPE_OUTPUT = 2, + VGPU10_OPERAND_TYPE_INDEXABLE_TEMP = 3, + VGPU10_OPERAND_TYPE_IMMEDIATE32 = 4, + VGPU10_OPERAND_TYPE_IMMEDIATE64 = 5, + VGPU10_OPERAND_TYPE_SAMPLER = 6, + VGPU10_OPERAND_TYPE_RESOURCE = 7, + VGPU10_OPERAND_TYPE_CONSTANT_BUFFER = 8, + VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9, + VGPU10_OPERAND_TYPE_LABEL = 10, + VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID = 11, + VGPU10_OPERAND_TYPE_OUTPUT_DEPTH = 12, + VGPU10_OPERAND_TYPE_NULL = 13, + VGPU10_OPERAND_TYPE_RASTERIZER = 14, /* DX10.1 */ + VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15 /* DX10.1 */ +} VGPU10_OPERAND_TYPE; + +typedef enum { + VGPU10_OPERAND_INDEX_0D = 0, + VGPU10_OPERAND_INDEX_1D = 1, + VGPU10_OPERAND_INDEX_2D = 2, + VGPU10_OPERAND_INDEX_3D = 3 +} VGPU10_OPERAND_INDEX_DIMENSION; + +typedef enum { + VGPU10_OPERAND_INDEX_IMMEDIATE32 = 0, + VGPU10_OPERAND_INDEX_IMMEDIATE64 = 1, + VGPU10_OPERAND_INDEX_RELATIVE = 2, + VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, + VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4 +} VGPU10_OPERAND_INDEX_REPRESENTATION; + +typedef union { + struct { + unsigned int numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */ + unsigned int selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */ + unsigned int mask : 4; /* D3D10_SB_OPERAND_4_COMPONENT_MASK_* */ + unsigned int : 4; + unsigned int operandType : 8; /* VGPU10_OPERAND_TYPE */ + unsigned int indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */ + unsigned int index0Representation : 3; /* VGPU10_OPERAND_INDEX_REPRESENTATION */ + unsigned int index1Representation : 3; /* VGPU10_OPERAND_INDEX_REPRESENTATION */ + unsigned int : 3; + unsigned int extended : 1; + }; + struct { + unsigned int : 4; + unsigned int swizzleX : 2; /* VGPU10_COMPONENT_NAME */ + unsigned int swizzleY : 2; /* VGPU10_COMPONENT_NAME */ + unsigned int swizzleZ : 2; /* VGPU10_COMPONENT_NAME */ + unsigned int swizzleW : 2; /* VGPU10_COMPONENT_NAME */ + }; + struct { + unsigned int : 4; + unsigned int selectMask : 2; /* VGPU10_COMPONENT_NAME */ + }; + uint32 value; +} VGPU10OperandToken0; + + +typedef enum { + VGPU10_EXTENDED_OPERAND_EMPTY = 0, + VGPU10_EXTENDED_OPERAND_MODIFIER = 1 +} VGPU10_EXTENDED_OPERAND_TYPE; + +typedef enum { + VGPU10_OPERAND_MODIFIER_NONE = 0, + VGPU10_OPERAND_MODIFIER_NEG = 1, + VGPU10_OPERAND_MODIFIER_ABS = 2, + VGPU10_OPERAND_MODIFIER_ABSNEG = 3 +} VGPU10_OPERAND_MODIFIER; + +typedef union { + struct { + unsigned int extendedOperandType : 6; /* VGPU10_EXTENDED_OPERAND_TYPE */ + unsigned int operandModifier : 8; /* VGPU10_OPERAND_MODIFIER */ + unsigned int : 17; + unsigned int extended : 1; + }; + uint32 value; +} VGPU10OperandToken1; + + +typedef enum { + VGPU10_RETURN_TYPE_UNORM = 1, + VGPU10_RETURN_TYPE_SNORM = 2, + VGPU10_RETURN_TYPE_SINT = 3, + VGPU10_RETURN_TYPE_UINT = 4, + VGPU10_RETURN_TYPE_FLOAT = 5, + VGPU10_RETURN_TYPE_MIXED = 6 +} VGPU10_RESOURCE_RETURN_TYPE; + +typedef union { + struct { + unsigned int component0 : 4; /* VGPU10_RESOURCE_RETURN_TYPE */ + unsigned int component1 : 4; /* VGPU10_RESOURCE_RETURN_TYPE */ + unsigned int component2 : 4; /* VGPU10_RESOURCE_RETURN_TYPE */ + unsigned int component3 : 4; /* VGPU10_RESOURCE_RETURN_TYPE */ + }; + uint32 value; +} VGPU10ResourceReturnTypeToken; + + +typedef enum { + VGPU10_NAME_UNDEFINED = 0, + VGPU10_NAME_POSITION = 1, + VGPU10_NAME_CLIP_DISTANCE = 2, + VGPU10_NAME_CULL_DISTANCE = 3, + VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX = 4, + VGPU10_NAME_VIEWPORT_ARRAY_INDEX = 5, + VGPU10_NAME_VERTEX_ID = 6, + VGPU10_NAME_PRIMITIVE_ID = 7, + VGPU10_NAME_INSTANCE_ID = 8, + VGPU10_NAME_IS_FRONT_FACE = 9, + VGPU10_NAME_SAMPLE_INDEX = 10, +} VGPU10_SYSTEM_NAME; + +typedef union { + struct { + unsigned int name : 16; /* VGPU10_SYSTEM_NAME */ + }; + uint32 value; +} VGPU10NameToken; + +#endif diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_caps.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_caps.h index c6c8e3667..01c8ba790 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_caps.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_caps.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 2007-2014 VMware, Inc. All rights reserved. + * Copyright 2007-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -111,4 +111,4 @@ SVGA3dCapsRecord; typedef uint32 SVGA3dCapPair[2]; -#endif // _SVGA3D_CAPS_H_ +#endif diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_cmd.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_cmd.h index 8953bf05f..c843417e8 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_cmd.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_cmd.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 1998-2014 VMware, Inc. All rights reserved. + * Copyright 1998-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -53,130 +53,227 @@ * and up. */ -#define SVGA_3D_CMD_LEGACY_BASE 1000 -#define SVGA_3D_CMD_BASE 1040 - -#define SVGA_3D_CMD_SURFACE_DEFINE 1040 -#define SVGA_3D_CMD_SURFACE_DESTROY 1041 -#define SVGA_3D_CMD_SURFACE_COPY 1042 -#define SVGA_3D_CMD_SURFACE_STRETCHBLT 1043 -#define SVGA_3D_CMD_SURFACE_DMA 1044 -#define SVGA_3D_CMD_CONTEXT_DEFINE 1045 -#define SVGA_3D_CMD_CONTEXT_DESTROY 1046 -#define SVGA_3D_CMD_SETTRANSFORM 1047 -#define SVGA_3D_CMD_SETZRANGE 1048 -#define SVGA_3D_CMD_SETRENDERSTATE 1049 -#define SVGA_3D_CMD_SETRENDERTARGET 1050 -#define SVGA_3D_CMD_SETTEXTURESTATE 1051 -#define SVGA_3D_CMD_SETMATERIAL 1052 -#define SVGA_3D_CMD_SETLIGHTDATA 1053 -#define SVGA_3D_CMD_SETLIGHTENABLED 1054 -#define SVGA_3D_CMD_SETVIEWPORT 1055 -#define SVGA_3D_CMD_SETCLIPPLANE 1056 -#define SVGA_3D_CMD_CLEAR 1057 -#define SVGA_3D_CMD_PRESENT 1058 -#define SVGA_3D_CMD_SHADER_DEFINE 1059 -#define SVGA_3D_CMD_SHADER_DESTROY 1060 -#define SVGA_3D_CMD_SET_SHADER 1061 -#define SVGA_3D_CMD_SET_SHADER_CONST 1062 -#define SVGA_3D_CMD_DRAW_PRIMITIVES 1063 -#define SVGA_3D_CMD_SETSCISSORRECT 1064 -#define SVGA_3D_CMD_BEGIN_QUERY 1065 -#define SVGA_3D_CMD_END_QUERY 1066 -#define SVGA_3D_CMD_WAIT_FOR_QUERY 1067 -#define SVGA_3D_CMD_PRESENT_READBACK 1068 -#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN 1069 -#define SVGA_3D_CMD_SURFACE_DEFINE_V2 1070 -#define SVGA_3D_CMD_GENERATE_MIPMAPS 1071 -#define SVGA_3D_CMD_VIDEO_CREATE_DECODER 1072 -#define SVGA_3D_CMD_VIDEO_DESTROY_DECODER 1073 -#define SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR 1074 -#define SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR 1075 -#define SVGA_3D_CMD_VIDEO_DECODE_START_FRAME 1076 -#define SVGA_3D_CMD_VIDEO_DECODE_RENDER 1077 -#define SVGA_3D_CMD_VIDEO_DECODE_END_FRAME 1078 -#define SVGA_3D_CMD_VIDEO_PROCESS_FRAME 1079 -#define SVGA_3D_CMD_ACTIVATE_SURFACE 1080 -#define SVGA_3D_CMD_DEACTIVATE_SURFACE 1081 -#define SVGA_3D_CMD_SCREEN_DMA 1082 -#define SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE 1083 -#define SVGA_3D_CMD_OPEN_CONTEXT_SURFACE 1084 - -#define SVGA_3D_CMD_LOGICOPS_BITBLT 1085 -#define SVGA_3D_CMD_LOGICOPS_TRANSBLT 1086 -#define SVGA_3D_CMD_LOGICOPS_STRETCHBLT 1087 -#define SVGA_3D_CMD_LOGICOPS_COLORFILL 1088 -#define SVGA_3D_CMD_LOGICOPS_ALPHABLEND 1089 -#define SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND 1090 - -#define SVGA_3D_CMD_SET_OTABLE_BASE 1091 -#define SVGA_3D_CMD_READBACK_OTABLE 1092 - -#define SVGA_3D_CMD_DEFINE_GB_MOB 1093 -#define SVGA_3D_CMD_DESTROY_GB_MOB 1094 -#define SVGA_3D_CMD_REDEFINE_GB_MOB 1095 -#define SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING 1096 - -#define SVGA_3D_CMD_DEFINE_GB_SURFACE 1097 -#define SVGA_3D_CMD_DESTROY_GB_SURFACE 1098 -#define SVGA_3D_CMD_BIND_GB_SURFACE 1099 -#define SVGA_3D_CMD_COND_BIND_GB_SURFACE 1100 -#define SVGA_3D_CMD_UPDATE_GB_IMAGE 1101 -#define SVGA_3D_CMD_UPDATE_GB_SURFACE 1102 -#define SVGA_3D_CMD_READBACK_GB_IMAGE 1103 -#define SVGA_3D_CMD_READBACK_GB_SURFACE 1104 -#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE 1105 -#define SVGA_3D_CMD_INVALIDATE_GB_SURFACE 1106 - -#define SVGA_3D_CMD_DEFINE_GB_CONTEXT 1107 -#define SVGA_3D_CMD_DESTROY_GB_CONTEXT 1108 -#define SVGA_3D_CMD_BIND_GB_CONTEXT 1109 -#define SVGA_3D_CMD_READBACK_GB_CONTEXT 1110 -#define SVGA_3D_CMD_INVALIDATE_GB_CONTEXT 1111 - -#define SVGA_3D_CMD_DEFINE_GB_SHADER 1112 -#define SVGA_3D_CMD_DESTROY_GB_SHADER 1113 -#define SVGA_3D_CMD_BIND_GB_SHADER 1114 - -#define SVGA_3D_CMD_BIND_SHADERCONSTS 1115 - -#define SVGA_3D_CMD_BEGIN_GB_QUERY 1116 -#define SVGA_3D_CMD_END_GB_QUERY 1117 -#define SVGA_3D_CMD_WAIT_FOR_GB_QUERY 1118 - -#define SVGA_3D_CMD_NOP 1119 - -#define SVGA_3D_CMD_ENABLE_GART 1120 -#define SVGA_3D_CMD_DISABLE_GART 1121 -#define SVGA_3D_CMD_MAP_MOB_INTO_GART 1122 -#define SVGA_3D_CMD_UNMAP_GART_RANGE 1123 - -#define SVGA_3D_CMD_DEFINE_GB_SCREENTARGET 1124 -#define SVGA_3D_CMD_DESTROY_GB_SCREENTARGET 1125 -#define SVGA_3D_CMD_BIND_GB_SCREENTARGET 1126 -#define SVGA_3D_CMD_UPDATE_GB_SCREENTARGET 1127 - -#define SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL 1128 -#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL 1129 - -#define SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE 1130 - -#define SVGA_3D_CMD_GB_SCREEN_DMA 1131 -#define SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH 1132 -#define SVGA_3D_CMD_GB_MOB_FENCE 1133 -#define SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 1134 -#define SVGA_3D_CMD_DEFINE_GB_MOB64 1135 -#define SVGA_3D_CMD_REDEFINE_GB_MOB64 1136 -#define SVGA_3D_CMD_NOP_ERROR 1137 - -#define SVGA_3D_CMD_RESERVED1 1138 -#define SVGA_3D_CMD_RESERVED2 1139 -#define SVGA_3D_CMD_RESERVED3 1140 -#define SVGA_3D_CMD_RESERVED4 1141 -#define SVGA_3D_CMD_RESERVED5 1142 - -#define SVGA_3D_CMD_MAX 1203 -#define SVGA_3D_CMD_FUTURE_MAX 3000 +typedef enum { + SVGA_3D_CMD_LEGACY_BASE = 1000, + SVGA_3D_CMD_BASE = 1040, + + SVGA_3D_CMD_SURFACE_DEFINE = 1040, + SVGA_3D_CMD_SURFACE_DESTROY = 1041, + SVGA_3D_CMD_SURFACE_COPY = 1042, + SVGA_3D_CMD_SURFACE_STRETCHBLT = 1043, + SVGA_3D_CMD_SURFACE_DMA = 1044, + SVGA_3D_CMD_CONTEXT_DEFINE = 1045, + SVGA_3D_CMD_CONTEXT_DESTROY = 1046, + SVGA_3D_CMD_SETTRANSFORM = 1047, + SVGA_3D_CMD_SETZRANGE = 1048, + SVGA_3D_CMD_SETRENDERSTATE = 1049, + SVGA_3D_CMD_SETRENDERTARGET = 1050, + SVGA_3D_CMD_SETTEXTURESTATE = 1051, + SVGA_3D_CMD_SETMATERIAL = 1052, + SVGA_3D_CMD_SETLIGHTDATA = 1053, + SVGA_3D_CMD_SETLIGHTENABLED = 1054, + SVGA_3D_CMD_SETVIEWPORT = 1055, + SVGA_3D_CMD_SETCLIPPLANE = 1056, + SVGA_3D_CMD_CLEAR = 1057, + SVGA_3D_CMD_PRESENT = 1058, + SVGA_3D_CMD_SHADER_DEFINE = 1059, + SVGA_3D_CMD_SHADER_DESTROY = 1060, + SVGA_3D_CMD_SET_SHADER = 1061, + SVGA_3D_CMD_SET_SHADER_CONST = 1062, + SVGA_3D_CMD_DRAW_PRIMITIVES = 1063, + SVGA_3D_CMD_SETSCISSORRECT = 1064, + SVGA_3D_CMD_BEGIN_QUERY = 1065, + SVGA_3D_CMD_END_QUERY = 1066, + SVGA_3D_CMD_WAIT_FOR_QUERY = 1067, + SVGA_3D_CMD_PRESENT_READBACK = 1068, + SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN = 1069, + SVGA_3D_CMD_SURFACE_DEFINE_V2 = 1070, + SVGA_3D_CMD_GENERATE_MIPMAPS = 1071, + SVGA_3D_CMD_VIDEO_CREATE_DECODER = 1072, + SVGA_3D_CMD_VIDEO_DESTROY_DECODER = 1073, + SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR = 1074, + SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR = 1075, + SVGA_3D_CMD_VIDEO_DECODE_START_FRAME = 1076, + SVGA_3D_CMD_VIDEO_DECODE_RENDER = 1077, + SVGA_3D_CMD_VIDEO_DECODE_END_FRAME = 1078, + SVGA_3D_CMD_VIDEO_PROCESS_FRAME = 1079, + SVGA_3D_CMD_ACTIVATE_SURFACE = 1080, + SVGA_3D_CMD_DEACTIVATE_SURFACE = 1081, + SVGA_3D_CMD_SCREEN_DMA = 1082, + SVGA_3D_CMD_DEAD1 = 1083, + SVGA_3D_CMD_DEAD2 = 1084, + + SVGA_3D_CMD_LOGICOPS_BITBLT = 1085, + SVGA_3D_CMD_LOGICOPS_TRANSBLT = 1086, + SVGA_3D_CMD_LOGICOPS_STRETCHBLT = 1087, + SVGA_3D_CMD_LOGICOPS_COLORFILL = 1088, + SVGA_3D_CMD_LOGICOPS_ALPHABLEND = 1089, + SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND = 1090, + + SVGA_3D_CMD_SET_OTABLE_BASE = 1091, + SVGA_3D_CMD_READBACK_OTABLE = 1092, + + SVGA_3D_CMD_DEFINE_GB_MOB = 1093, + SVGA_3D_CMD_DESTROY_GB_MOB = 1094, + SVGA_3D_CMD_DEAD3 = 1095, + SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING = 1096, + + SVGA_3D_CMD_DEFINE_GB_SURFACE = 1097, + SVGA_3D_CMD_DESTROY_GB_SURFACE = 1098, + SVGA_3D_CMD_BIND_GB_SURFACE = 1099, + SVGA_3D_CMD_COND_BIND_GB_SURFACE = 1100, + SVGA_3D_CMD_UPDATE_GB_IMAGE = 1101, + SVGA_3D_CMD_UPDATE_GB_SURFACE = 1102, + SVGA_3D_CMD_READBACK_GB_IMAGE = 1103, + SVGA_3D_CMD_READBACK_GB_SURFACE = 1104, + SVGA_3D_CMD_INVALIDATE_GB_IMAGE = 1105, + SVGA_3D_CMD_INVALIDATE_GB_SURFACE = 1106, + + SVGA_3D_CMD_DEFINE_GB_CONTEXT = 1107, + SVGA_3D_CMD_DESTROY_GB_CONTEXT = 1108, + SVGA_3D_CMD_BIND_GB_CONTEXT = 1109, + SVGA_3D_CMD_READBACK_GB_CONTEXT = 1110, + SVGA_3D_CMD_INVALIDATE_GB_CONTEXT = 1111, + + SVGA_3D_CMD_DEFINE_GB_SHADER = 1112, + SVGA_3D_CMD_DESTROY_GB_SHADER = 1113, + SVGA_3D_CMD_BIND_GB_SHADER = 1114, + + SVGA_3D_CMD_SET_OTABLE_BASE64 = 1115, + + SVGA_3D_CMD_BEGIN_GB_QUERY = 1116, + SVGA_3D_CMD_END_GB_QUERY = 1117, + SVGA_3D_CMD_WAIT_FOR_GB_QUERY = 1118, + + SVGA_3D_CMD_NOP = 1119, + + SVGA_3D_CMD_ENABLE_GART = 1120, + SVGA_3D_CMD_DISABLE_GART = 1121, + SVGA_3D_CMD_MAP_MOB_INTO_GART = 1122, + SVGA_3D_CMD_UNMAP_GART_RANGE = 1123, + + SVGA_3D_CMD_DEFINE_GB_SCREENTARGET = 1124, + SVGA_3D_CMD_DESTROY_GB_SCREENTARGET = 1125, + SVGA_3D_CMD_BIND_GB_SCREENTARGET = 1126, + SVGA_3D_CMD_UPDATE_GB_SCREENTARGET = 1127, + + SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL = 1128, + SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL = 1129, + + SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE = 1130, + + SVGA_3D_CMD_GB_SCREEN_DMA = 1131, + SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH = 1132, + SVGA_3D_CMD_GB_MOB_FENCE = 1133, + SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 = 1134, + SVGA_3D_CMD_DEFINE_GB_MOB64 = 1135, + SVGA_3D_CMD_REDEFINE_GB_MOB64 = 1136, + SVGA_3D_CMD_NOP_ERROR = 1137, + + SVGA_3D_CMD_SET_VERTEX_STREAMS = 1138, + SVGA_3D_CMD_SET_VERTEX_DECLS = 1139, + SVGA_3D_CMD_SET_VERTEX_DIVISORS = 1140, + SVGA_3D_CMD_DRAW = 1141, + SVGA_3D_CMD_DRAW_INDEXED = 1142, + + /* + * DX10 Commands + */ + SVGA_3D_CMD_DX_MIN = 1143, + SVGA_3D_CMD_DX_DEFINE_CONTEXT = 1143, + SVGA_3D_CMD_DX_DESTROY_CONTEXT = 1144, + SVGA_3D_CMD_DX_BIND_CONTEXT = 1145, + SVGA_3D_CMD_DX_READBACK_CONTEXT = 1146, + SVGA_3D_CMD_DX_INVALIDATE_CONTEXT = 1147, + SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER = 1148, + SVGA_3D_CMD_DX_SET_SHADER_RESOURCES = 1149, + SVGA_3D_CMD_DX_SET_SHADER = 1150, + SVGA_3D_CMD_DX_SET_SAMPLERS = 1151, + SVGA_3D_CMD_DX_DRAW = 1152, + SVGA_3D_CMD_DX_DRAW_INDEXED = 1153, + SVGA_3D_CMD_DX_DRAW_INSTANCED = 1154, + SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED = 1155, + SVGA_3D_CMD_DX_DRAW_AUTO = 1156, + SVGA_3D_CMD_DX_SET_INPUT_LAYOUT = 1157, + SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS = 1158, + SVGA_3D_CMD_DX_SET_INDEX_BUFFER = 1159, + SVGA_3D_CMD_DX_SET_TOPOLOGY = 1160, + SVGA_3D_CMD_DX_SET_RENDERTARGETS = 1161, + SVGA_3D_CMD_DX_SET_BLEND_STATE = 1162, + SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE = 1163, + SVGA_3D_CMD_DX_SET_RASTERIZER_STATE = 1164, + SVGA_3D_CMD_DX_DEFINE_QUERY = 1165, + SVGA_3D_CMD_DX_DESTROY_QUERY = 1166, + SVGA_3D_CMD_DX_BIND_QUERY = 1167, + SVGA_3D_CMD_DX_SET_QUERY_OFFSET = 1168, + SVGA_3D_CMD_DX_BEGIN_QUERY = 1169, + SVGA_3D_CMD_DX_END_QUERY = 1170, + SVGA_3D_CMD_DX_READBACK_QUERY = 1171, + SVGA_3D_CMD_DX_SET_PREDICATION = 1172, + SVGA_3D_CMD_DX_SET_SOTARGETS = 1173, + SVGA_3D_CMD_DX_SET_VIEWPORTS = 1174, + SVGA_3D_CMD_DX_SET_SCISSORRECTS = 1175, + SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW = 1176, + SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW = 1177, + SVGA_3D_CMD_DX_PRED_COPY_REGION = 1178, + SVGA_3D_CMD_DX_PRED_COPY = 1179, + SVGA_3D_CMD_DX_STRETCHBLT = 1180, + SVGA_3D_CMD_DX_GENMIPS = 1181, + SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE = 1182, + SVGA_3D_CMD_DX_READBACK_SUBRESOURCE = 1183, + SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE = 1184, + SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW = 1185, + SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW = 1186, + SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW = 1187, + SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW = 1188, + SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW = 1189, + SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW = 1190, + SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT = 1191, + SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT = 1192, + SVGA_3D_CMD_DX_DEFINE_BLEND_STATE = 1193, + SVGA_3D_CMD_DX_DESTROY_BLEND_STATE = 1194, + SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE = 1195, + SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE = 1196, + SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE = 1197, + SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE = 1198, + SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE = 1199, + SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE = 1200, + SVGA_3D_CMD_DX_DEFINE_SHADER = 1201, + SVGA_3D_CMD_DX_DESTROY_SHADER = 1202, + SVGA_3D_CMD_DX_BIND_SHADER = 1203, + SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT = 1204, + SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT = 1205, + SVGA_3D_CMD_DX_SET_STREAMOUTPUT = 1206, + SVGA_3D_CMD_DX_SET_COTABLE = 1207, + SVGA_3D_CMD_DX_READBACK_COTABLE = 1208, + SVGA_3D_CMD_DX_BUFFER_COPY = 1209, + SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER = 1210, + SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK = 1211, + SVGA_3D_CMD_DX_MOVE_QUERY = 1212, + SVGA_3D_CMD_DX_BIND_ALL_QUERY = 1213, + SVGA_3D_CMD_DX_READBACK_ALL_QUERY = 1214, + SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER = 1215, + SVGA_3D_CMD_DX_MOB_FENCE_64 = 1216, + SVGA_3D_CMD_DX_BIND_ALL_SHADER = 1217, + SVGA_3D_CMD_DX_HINT = 1218, + SVGA_3D_CMD_DX_BUFFER_UPDATE = 1219, + SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET = 1220, + SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET = 1221, + SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET = 1222, + + /* + * Reserve some IDs to be used for the DX11 shader types. + */ + SVGA_3D_CMD_DX_RESERVED1 = 1223, + SVGA_3D_CMD_DX_RESERVED2 = 1224, + SVGA_3D_CMD_DX_RESERVED3 = 1225, + + SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER = 1226, + + SVGA_3D_CMD_DX_MAX = 1227, + SVGA_3D_CMD_MAX = 1227, + SVGA_3D_CMD_FUTURE_MAX = 3000 +} SVGAFifo3dCmdId; /* * FIFO command format definitions: @@ -194,54 +291,6 @@ struct { #include "vmware_pack_end.h" SVGA3dCmdHeader; -typedef enum { - SVGA3D_SURFACE_CUBEMAP = (1 << 0), - - /* - * HINT flags are not enforced by the device but are useful for - * performance. - */ - SVGA3D_SURFACE_HINT_STATIC = (1 << 1), - SVGA3D_SURFACE_HINT_DYNAMIC = (1 << 2), - SVGA3D_SURFACE_HINT_INDEXBUFFER = (1 << 3), - SVGA3D_SURFACE_HINT_VERTEXBUFFER = (1 << 4), - SVGA3D_SURFACE_HINT_TEXTURE = (1 << 5), - SVGA3D_SURFACE_HINT_RENDERTARGET = (1 << 6), - SVGA3D_SURFACE_HINT_DEPTHSTENCIL = (1 << 7), - SVGA3D_SURFACE_HINT_WRITEONLY = (1 << 8), - SVGA3D_SURFACE_MASKABLE_ANTIALIAS = (1 << 9), - SVGA3D_SURFACE_AUTOGENMIPMAPS = (1 << 10), - SVGA3D_SURFACE_DECODE_RENDERTARGET = (1 << 11), - - /* - * Is this surface using a base-level pitch for it's mob backing? - * - * This flag is not intended to be set by guest-drivers, but is instead - * set by the device when the surface is bound to a mob with a specified - * pitch. - */ - SVGA3D_SURFACE_MOB_PITCH = (1 << 12), - - SVGA3D_SURFACE_INACTIVE = (1 << 13), - SVGA3D_SURFACE_HINT_RT_LOCKABLE = (1 << 14), - SVGA3D_SURFACE_VOLUME = (1 << 15), - - /* - * Required to be set on a surface to bind it to a screen target. - */ - SVGA3D_SURFACE_SCREENTARGET = (1 << 16), - - SVGA3D_SURFACE_RESERVED1 = (1 << 17), - SVGA3D_SURFACE_1D = (1 << 18), - SVGA3D_SURFACE_ARRAY = (1 << 19), - -} SVGA3dSurfaceFlags; - -#define SVGA3D_SURFACE_HB_DISALLOWED_MASK (SVGA3D_SURFACE_SCREENTARGET | \ - SVGA3D_SURFACE_MOB_PITCH | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT) - typedef #include "vmware_pack_begin.h" struct { @@ -669,6 +718,128 @@ SVGA3dCmdDrawPrimitives; /* SVGA_3D_CMD_DRAWPRIMITIVES */ typedef #include "vmware_pack_begin.h" struct { + uint32 cid; + + uint32 primitiveCount; /* How many primitives to render */ + uint32 startVertexLocation; /* Which vertex do we start rendering at. */ + + uint8 primitiveType; /* SVGA3dPrimitiveType */ + uint8 padding[3]; +} +#include "vmware_pack_end.h" +SVGA3dCmdDraw; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 cid; + + uint8 primitiveType; /* SVGA3dPrimitiveType */ + + uint32 indexBufferSid; /* Valid index buffer sid. */ + uint32 indexBufferOffset; /* Byte offset into the vertex buffer, almost */ + /* always 0 for DX9 guests, non-zero for OpenGL */ + /* guests. We can't represent non-multiple of */ + /* stride offsets in D3D9Renderer... */ + uint8 indexBufferStride; /* Allowable values = 1, 2, or 4 */ + + int32 baseVertexLocation; /* Bias applied to the index when selecting a */ + /* vertex from the streams, may be negative */ + + uint32 primitiveCount; /* How many primitives to render */ + uint32 pad0; + uint16 pad1; +} +#include "vmware_pack_end.h" +SVGA3dCmdDrawIndexed; + +typedef +#include "vmware_pack_begin.h" +struct { + /* + * Describe a vertex array's data type, and define how it is to be + * used by the fixed function pipeline or the vertex shader. It + * isn't useful to have two VertexDecls with the same + * VertexArrayIdentity in one draw call. + */ + uint16 streamOffset; + uint8 stream; + uint8 type; /* SVGA3dDeclType */ + uint8 method; /* SVGA3dDeclMethod */ + uint8 usage; /* SVGA3dDeclUsage */ + uint8 usageIndex; + uint8 padding; + +} +#include "vmware_pack_end.h" +SVGA3dVertexElement; + +/* + * Should the vertex element respect the stream value? The high bit of the + * stream should be set to indicate that the stream should be respected. If + * the high bit is not set, the stream will be ignored and replaced by the index + * of the position of the currently considered vertex element. + * + * All guests should set this bit and correctly specify the stream going + * forward. + */ +#define SVGA3D_VERTEX_ELEMENT_RESPECT_STREAM (1 << 7) + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 cid; + + uint32 numElements; + + /* + * Followed by numElements SVGA3dVertexElement structures. + * + * If numElements < SVGA3D_MAX_VERTEX_ARRAYS, the remaining elements + * are cleared and will not be used by following draws. + */ +} +#include "vmware_pack_end.h" +SVGA3dCmdSetVertexDecls; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 sid; + uint32 stride; + uint32 offset; +} +#include "vmware_pack_end.h" +SVGA3dVertexStream; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 cid; + + uint32 numStreams; + /* + * Followed by numStream SVGA3dVertexStream structures. + * + * If numStreams < SVGA3D_MAX_VERTEX_ARRAYS, the remaining streams + * are cleared and will not be used by following draws. + */ +} +#include "vmware_pack_end.h" +SVGA3dCmdSetVertexStreams; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 cid; + uint32 numDivisors; +} +#include "vmware_pack_end.h" +SVGA3dCmdSetVertexDivisors; + +typedef +#include "vmware_pack_begin.h" +struct { uint32 stage; SVGA3dTextureStateName name; union { @@ -989,38 +1160,6 @@ struct SVGA3dCmdScreenDMA { SVGA3dCmdScreenDMA; /* SVGA_3D_CMD_SCREEN_DMA */ /* - * Set Unity Surface Cookie - * - * Associates the supplied cookie with the surface id for use with - * Unity. This cookie is a hint from guest to host, there is no way - * for the guest to readback the cookie and the host is free to drop - * the cookie association at will. The default value for the cookie - * on all surfaces is 0. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdSetUnitySurfaceCookie { - uint32 sid; - uint64 cookie; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetUnitySurfaceCookie; /* SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE */ - -/* - * Open a context-specific surface in a non-context-specific manner. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdOpenContextSurface { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdOpenContextSurface; /* SVGA_3D_CMD_OPEN_CONTEXT_SURFACE */ - - -/* * Logic ops */ @@ -1139,8 +1278,8 @@ struct SVGA3dCmdLogicOpsClearTypeBlend { uint32 gamma; uint32 color; uint32 color2; - int alphaOffsetX; - int alphaOffsetY; + int32 alphaOffsetX; + int32 alphaOffsetY; /* Followed by variable number of SVGA3dBox structures */ } #include "vmware_pack_end.h" @@ -1151,12 +1290,80 @@ SVGA3dCmdLogicOpsClearTypeBlend; /* SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND */ * Guest-backed objects definitions. */ +typedef +#include "vmware_pack_begin.h" +struct { + SVGAMobFormat ptDepth; + uint32 sizeInBytes; + PPN64 base; +} +#include "vmware_pack_end.h" +SVGAOTableMobEntry; +#define SVGA3D_OTABLE_MOB_ENTRY_SIZE (sizeof(SVGAOTableMobEntry)) + +typedef +#include "vmware_pack_begin.h" +struct { + SVGA3dSurfaceFormat format; + SVGA3dSurfaceFlags surfaceFlags; + uint32 numMipLevels; + uint32 multisampleCount; + SVGA3dTextureFilter autogenFilter; + SVGA3dSize size; + SVGAMobId mobid; + uint32 arraySize; + uint32 mobPitch; + uint32 pad[5]; +} +#include "vmware_pack_end.h" +SVGAOTableSurfaceEntry; +#define SVGA3D_OTABLE_SURFACE_ENTRY_SIZE (sizeof(SVGAOTableSurfaceEntry)) + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 cid; + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGAOTableContextEntry; +#define SVGA3D_OTABLE_CONTEXT_ENTRY_SIZE (sizeof(SVGAOTableContextEntry)) + +typedef +#include "vmware_pack_begin.h" +struct { + SVGA3dShaderType type; + uint32 sizeInBytes; + uint32 offsetInBytes; + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGAOTableShaderEntry; +#define SVGA3D_OTABLE_SHADER_ENTRY_SIZE (sizeof(SVGAOTableShaderEntry)) + #define SVGA_STFLAG_PRIMARY (1 << 0) typedef uint32 SVGAScreenTargetFlags; typedef #include "vmware_pack_begin.h" struct { + SVGA3dSurfaceImageId image; + uint32 width; + uint32 height; + int32 xRoot; + int32 yRoot; + SVGAScreenTargetFlags flags; + uint32 dpi; + uint32 pad[7]; +} +#include "vmware_pack_end.h" +SVGAOTableScreenTargetEntry; +#define SVGA3D_OTABLE_SCREEN_TARGET_ENTRY_SIZE \ + (sizeof(SVGAOTableScreenTargetEntry)) + +typedef +#include "vmware_pack_begin.h" +struct { float value[4]; } #include "vmware_pack_end.h" @@ -1178,6 +1385,209 @@ struct { #include "vmware_pack_end.h" SVGA3dShaderConstBool; +typedef +#include "vmware_pack_begin.h" +struct { + uint16 streamOffset; + uint8 stream; + uint8 type; + uint8 methodUsage; + uint8 usageIndex; +} +#include "vmware_pack_end.h" +SVGAGBVertexElement; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 sid; + uint16 stride; + uint32 offset; +} +#include "vmware_pack_end.h" +SVGAGBVertexStream; +typedef +#include "vmware_pack_begin.h" +struct { + SVGA3dRect viewport; + SVGA3dRect scissorRect; + SVGA3dZRange zRange; + + SVGA3dSurfaceImageId renderTargets[SVGA3D_RT_MAX]; + SVGAGBVertexElement decl1[4]; + + uint32 renderStates[SVGA3D_RS_MAX]; + SVGAGBVertexElement decl2[18]; + uint32 pad0[2]; + + struct { + SVGA3dFace face; + SVGA3dMaterial material; + } material; + + float clipPlanes[SVGA3D_NUM_CLIPPLANES][4]; + float matrices[SVGA3D_TRANSFORM_MAX][16]; + + SVGA3dBool lightEnabled[SVGA3D_NUM_LIGHTS]; + SVGA3dLightData lightData[SVGA3D_NUM_LIGHTS]; + + /* + * Shaders currently bound + */ + uint32 shaders[SVGA3D_NUM_SHADERTYPE_PREDX]; + SVGAGBVertexElement decl3[10]; + uint32 pad1[3]; + + uint32 occQueryActive; + uint32 occQueryValue; + + /* + * Int/Bool Shader constants + */ + SVGA3dShaderConstInt pShaderIValues[SVGA3D_CONSTINTREG_MAX]; + SVGA3dShaderConstInt vShaderIValues[SVGA3D_CONSTINTREG_MAX]; + uint16 pShaderBValues; + uint16 vShaderBValues; + + + SVGAGBVertexStream streams[SVGA3D_MAX_VERTEX_ARRAYS]; + SVGA3dVertexDivisor divisors[SVGA3D_MAX_VERTEX_ARRAYS]; + uint32 numVertexDecls; + uint32 numVertexStreams; + uint32 numVertexDivisors; + uint32 pad2[30]; + + /* + * Texture Stages + * + * SVGA3D_TS_INVALID through SVGA3D_TS_CONSTANT are in the + * textureStages array. + * SVGA3D_TS_COLOR_KEY is in tsColorKey. + */ + uint32 tsColorKey[SVGA3D_NUM_TEXTURE_UNITS]; + uint32 textureStages[SVGA3D_NUM_TEXTURE_UNITS][SVGA3D_TS_CONSTANT + 1]; + uint32 tsColorKeyEnable[SVGA3D_NUM_TEXTURE_UNITS]; + + /* + * Float Shader constants. + */ + SVGA3dShaderConstFloat pShaderFValues[SVGA3D_CONSTREG_MAX]; + SVGA3dShaderConstFloat vShaderFValues[SVGA3D_CONSTREG_MAX]; +} +#include "vmware_pack_end.h" +SVGAGBContextData; +#define SVGA3D_CONTEXT_DATA_SIZE (sizeof(SVGAGBContextData)) + +/* + * SVGA3dCmdSetOTableBase -- + * + * This command allows the guest to specify the base PPN of the + * specified object table. + */ + +typedef +#include "vmware_pack_begin.h" +struct { + SVGAOTableType type; + PPN baseAddress; + uint32 sizeInBytes; + uint32 validSizeInBytes; + SVGAMobFormat ptDepth; +} +#include "vmware_pack_end.h" +SVGA3dCmdSetOTableBase; /* SVGA_3D_CMD_SET_OTABLE_BASE */ + +typedef +#include "vmware_pack_begin.h" +struct { + SVGAOTableType type; + PPN64 baseAddress; + uint32 sizeInBytes; + uint32 validSizeInBytes; + SVGAMobFormat ptDepth; +} +#include "vmware_pack_end.h" +SVGA3dCmdSetOTableBase64; /* SVGA_3D_CMD_SET_OTABLE_BASE64 */ + +typedef +#include "vmware_pack_begin.h" +struct { + SVGAOTableType type; +} +#include "vmware_pack_end.h" +SVGA3dCmdReadbackOTable; /* SVGA_3D_CMD_READBACK_OTABLE */ + +/* + * Define a memory object (Mob) in the OTable. + */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDefineGBMob { + SVGAMobId mobid; + SVGAMobFormat ptDepth; + PPN base; + uint32 sizeInBytes; +} +#include "vmware_pack_end.h" +SVGA3dCmdDefineGBMob; /* SVGA_3D_CMD_DEFINE_GB_MOB */ + + +/* + * Destroys an object in the OTable. + */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDestroyGBMob { + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDestroyGBMob; /* SVGA_3D_CMD_DESTROY_GB_MOB */ + + +/* + * Define a memory object (Mob) in the OTable with a PPN64 base. + */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDefineGBMob64 { + SVGAMobId mobid; + SVGAMobFormat ptDepth; + PPN64 base; + uint32 sizeInBytes; +} +#include "vmware_pack_end.h" +SVGA3dCmdDefineGBMob64; /* SVGA_3D_CMD_DEFINE_GB_MOB64 */ + +/* + * Redefine an object in the OTable with PPN64 base. + */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdRedefineGBMob64 { + SVGAMobId mobid; + SVGAMobFormat ptDepth; + PPN64 base; + uint32 sizeInBytes; +} +#include "vmware_pack_end.h" +SVGA3dCmdRedefineGBMob64; /* SVGA_3D_CMD_REDEFINE_GB_MOB64 */ + +/* + * Notification that the page tables have been modified. + */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdUpdateGBMobMapping { + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGA3dCmdUpdateGBMobMapping; /* SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING */ + /* * Define a guest-backed surface. */ @@ -1243,7 +1653,7 @@ SVGA3dCmdBindGBSurfaceWithPitch; /* SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH */ typedef #include "vmware_pack_begin.h" -struct{ +struct SVGA3dCmdCondBindGBSurface { uint32 sid; SVGAMobId testMobid; SVGAMobId mobid; @@ -1477,18 +1887,6 @@ struct SVGA3dCmdDestroyGBShader { #include "vmware_pack_end.h" SVGA3dCmdDestroyGBShader; /* SVGA_3D_CMD_DESTROY_GB_SHADER */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdBindGBShaderConsts { - uint32 cid; - SVGA3dShaderType shaderType; - SVGA3dShaderConstType shaderConstType; - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdBindGBShaderConsts; /* SVGA_3D_CMD_BIND_SHADERCONSTS */ - typedef #include "vmware_pack_begin.h" struct { @@ -1553,7 +1951,7 @@ typedef #include "vmware_pack_begin.h" struct { SVGAMobId mobid; - uint32 fbOffset; + uint32 mustBeZero; uint32 initialized; } #include "vmware_pack_end.h" @@ -1649,6 +2047,6 @@ struct { uint32 mobOffset; } #include "vmware_pack_end.h" -SVGA3dCmdGBMobFence; /* SVGA_3D_CMD_GB_MOB_FENCE*/ +SVGA3dCmdGBMobFence; /* SVGA_3D_CMD_GB_MOB_FENCE */ -#endif // _SVGA3D_CMD_H_ +#endif /* _SVGA3D_CMD_H_ */ diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_devcaps.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_devcaps.h index 915f3c757..ade210b41 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_devcaps.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_devcaps.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 1998-2014 VMware, Inc. All rights reserved. + * Copyright 1998-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -223,9 +223,230 @@ typedef enum { */ SVGA3D_DEVCAP_TS_COLOR_KEY = 93, /* boolean */ + /* + * Deprecated. + */ + SVGA3D_DEVCAP_DEAD2 = 94, + + /* + * Does the device support the DX commands? + */ + SVGA3D_DEVCAP_DX = 95, + + /* + * What is the maximum size of a texture array? + * + * (Even if this cap is zero, cubemaps are still allowed.) + */ + SVGA3D_DEVCAP_MAX_TEXTURE_ARRAY_SIZE = 96, + + /* + * What is the maximum number of vertex buffers that can + * be used in the DXContext inputAssembly? + */ + SVGA3D_DEVCAP_DX_MAX_VERTEXBUFFERS = 97, + + /* + * What is the maximum number of constant buffers + * that can be expected to work correctly with a + * DX context? + */ + SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS = 98, + + /* + * Does the device support provoking vertex control? + * If zero, the first vertex will always be the provoking vertex. + */ + SVGA3D_DEVCAP_DX_PROVOKING_VERTEX = 99, + + SVGA3D_DEVCAP_DXFMT_X8R8G8B8 = 100, + SVGA3D_DEVCAP_DXFMT_A8R8G8B8 = 101, + SVGA3D_DEVCAP_DXFMT_R5G6B5 = 102, + SVGA3D_DEVCAP_DXFMT_X1R5G5B5 = 103, + SVGA3D_DEVCAP_DXFMT_A1R5G5B5 = 104, + SVGA3D_DEVCAP_DXFMT_A4R4G4B4 = 105, + SVGA3D_DEVCAP_DXFMT_Z_D32 = 106, + SVGA3D_DEVCAP_DXFMT_Z_D16 = 107, + SVGA3D_DEVCAP_DXFMT_Z_D24S8 = 108, + SVGA3D_DEVCAP_DXFMT_Z_D15S1 = 109, + SVGA3D_DEVCAP_DXFMT_LUMINANCE8 = 110, + SVGA3D_DEVCAP_DXFMT_LUMINANCE4_ALPHA4 = 111, + SVGA3D_DEVCAP_DXFMT_LUMINANCE16 = 112, + SVGA3D_DEVCAP_DXFMT_LUMINANCE8_ALPHA8 = 113, + SVGA3D_DEVCAP_DXFMT_DXT1 = 114, + SVGA3D_DEVCAP_DXFMT_DXT2 = 115, + SVGA3D_DEVCAP_DXFMT_DXT3 = 116, + SVGA3D_DEVCAP_DXFMT_DXT4 = 117, + SVGA3D_DEVCAP_DXFMT_DXT5 = 118, + SVGA3D_DEVCAP_DXFMT_BUMPU8V8 = 119, + SVGA3D_DEVCAP_DXFMT_BUMPL6V5U5 = 120, + SVGA3D_DEVCAP_DXFMT_BUMPX8L8V8U8 = 121, + SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD1 = 122, + SVGA3D_DEVCAP_DXFMT_ARGB_S10E5 = 123, + SVGA3D_DEVCAP_DXFMT_ARGB_S23E8 = 124, + SVGA3D_DEVCAP_DXFMT_A2R10G10B10 = 125, + SVGA3D_DEVCAP_DXFMT_V8U8 = 126, + SVGA3D_DEVCAP_DXFMT_Q8W8V8U8 = 127, + SVGA3D_DEVCAP_DXFMT_CxV8U8 = 128, + SVGA3D_DEVCAP_DXFMT_X8L8V8U8 = 129, + SVGA3D_DEVCAP_DXFMT_A2W10V10U10 = 130, + SVGA3D_DEVCAP_DXFMT_ALPHA8 = 131, + SVGA3D_DEVCAP_DXFMT_R_S10E5 = 132, + SVGA3D_DEVCAP_DXFMT_R_S23E8 = 133, + SVGA3D_DEVCAP_DXFMT_RG_S10E5 = 134, + SVGA3D_DEVCAP_DXFMT_RG_S23E8 = 135, + SVGA3D_DEVCAP_DXFMT_BUFFER = 136, + SVGA3D_DEVCAP_DXFMT_Z_D24X8 = 137, + SVGA3D_DEVCAP_DXFMT_V16U16 = 138, + SVGA3D_DEVCAP_DXFMT_G16R16 = 139, + SVGA3D_DEVCAP_DXFMT_A16B16G16R16 = 140, + SVGA3D_DEVCAP_DXFMT_UYVY = 141, + SVGA3D_DEVCAP_DXFMT_YUY2 = 142, + SVGA3D_DEVCAP_DXFMT_NV12 = 143, + SVGA3D_DEVCAP_DXFMT_AYUV = 144, + SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS = 145, + SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT = 146, + SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT = 147, + SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS = 148, + SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT = 149, + SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT = 150, + SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT = 151, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS = 152, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT = 153, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM = 154, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT = 155, + SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS = 156, + SVGA3D_DEVCAP_DXFMT_R32G32_UINT = 157, + SVGA3D_DEVCAP_DXFMT_R32G32_SINT = 158, + SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS = 159, + SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT = 160, + SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS = 161, + SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT = 162, + SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS = 163, + SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT = 164, + SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT = 165, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS = 166, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM = 167, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB = 168, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT = 169, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT = 170, + SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS = 171, + SVGA3D_DEVCAP_DXFMT_R16G16_UINT = 172, + SVGA3D_DEVCAP_DXFMT_R16G16_SINT = 173, + SVGA3D_DEVCAP_DXFMT_R32_TYPELESS = 174, + SVGA3D_DEVCAP_DXFMT_D32_FLOAT = 175, + SVGA3D_DEVCAP_DXFMT_R32_UINT = 176, + SVGA3D_DEVCAP_DXFMT_R32_SINT = 177, + SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS = 178, + SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT = 179, + SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS = 180, + SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT = 181, + SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS = 182, + SVGA3D_DEVCAP_DXFMT_R8G8_UNORM = 183, + SVGA3D_DEVCAP_DXFMT_R8G8_UINT = 184, + SVGA3D_DEVCAP_DXFMT_R8G8_SINT = 185, + SVGA3D_DEVCAP_DXFMT_R16_TYPELESS = 186, + SVGA3D_DEVCAP_DXFMT_R16_UNORM = 187, + SVGA3D_DEVCAP_DXFMT_R16_UINT = 188, + SVGA3D_DEVCAP_DXFMT_R16_SNORM = 189, + SVGA3D_DEVCAP_DXFMT_R16_SINT = 190, + SVGA3D_DEVCAP_DXFMT_R8_TYPELESS = 191, + SVGA3D_DEVCAP_DXFMT_R8_UNORM = 192, + SVGA3D_DEVCAP_DXFMT_R8_UINT = 193, + SVGA3D_DEVCAP_DXFMT_R8_SNORM = 194, + SVGA3D_DEVCAP_DXFMT_R8_SINT = 195, + SVGA3D_DEVCAP_DXFMT_P8 = 196, + SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP = 197, + SVGA3D_DEVCAP_DXFMT_R8G8_B8G8_UNORM = 198, + SVGA3D_DEVCAP_DXFMT_G8R8_G8B8_UNORM = 199, + SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS = 200, + SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB = 201, + SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS = 202, + SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB = 203, + SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS = 204, + SVGA3D_DEVCAP_DXFMT_BC3_UNORM_SRGB = 205, + SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS = 206, + SVGA3D_DEVCAP_DXFMT_ATI1 = 207, + SVGA3D_DEVCAP_DXFMT_BC4_SNORM = 208, + SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS = 209, + SVGA3D_DEVCAP_DXFMT_ATI2 = 210, + SVGA3D_DEVCAP_DXFMT_BC5_SNORM = 211, + SVGA3D_DEVCAP_DXFMT_R10G10B10_XR_BIAS_A2_UNORM = 212, + SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS = 213, + SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB = 214, + SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS = 215, + SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB = 216, + SVGA3D_DEVCAP_DXFMT_Z_DF16 = 217, + SVGA3D_DEVCAP_DXFMT_Z_DF24 = 218, + SVGA3D_DEVCAP_DXFMT_Z_D24S8_INT = 219, + SVGA3D_DEVCAP_DXFMT_YV12 = 220, + SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT = 221, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT = 222, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM = 223, + SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT = 224, + SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM = 225, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM = 226, + SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT = 227, + SVGA3D_DEVCAP_DXFMT_R16G16_UNORM = 228, + SVGA3D_DEVCAP_DXFMT_R16G16_SNORM = 229, + SVGA3D_DEVCAP_DXFMT_R32_FLOAT = 230, + SVGA3D_DEVCAP_DXFMT_R8G8_SNORM = 231, + SVGA3D_DEVCAP_DXFMT_R16_FLOAT = 232, + SVGA3D_DEVCAP_DXFMT_D16_UNORM = 233, + SVGA3D_DEVCAP_DXFMT_A8_UNORM = 234, + SVGA3D_DEVCAP_DXFMT_BC1_UNORM = 235, + SVGA3D_DEVCAP_DXFMT_BC2_UNORM = 236, + SVGA3D_DEVCAP_DXFMT_BC3_UNORM = 237, + SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM = 238, + SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM = 239, + SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM = 240, + SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM = 241, + SVGA3D_DEVCAP_DXFMT_BC4_UNORM = 242, + SVGA3D_DEVCAP_DXFMT_BC5_UNORM = 243, + SVGA3D_DEVCAP_MAX /* This must be the last index. */ } SVGA3dDevCapIndex; +/* + * Bit definitions for DXFMT devcaps + * + * + * SUPPORTED: Can the format be defined? + * SHADER_SAMPLE: Can the format be sampled from a shader? + * COLOR_RENDERTARGET: Can the format be a color render target? + * DEPTH_RENDERTARGET: Can the format be a depth render target? + * BLENDABLE: Is the format blendable? + * MIPS: Does the format support mip levels? + * ARRAY: Does the format support texture arrays? + * VOLUME: Does the format support having volume? + * MULTISAMPLE_2: Does the format support 2x multisample? + * MULTISAMPLE_4: Does the format support 4x multisample? + * MULTISAMPLE_8: Does the format support 8x multisample? + */ +#define SVGA3D_DXFMT_SUPPORTED (1 << 0) +#define SVGA3D_DXFMT_SHADER_SAMPLE (1 << 1) +#define SVGA3D_DXFMT_COLOR_RENDERTARGET (1 << 2) +#define SVGA3D_DXFMT_DEPTH_RENDERTARGET (1 << 3) +#define SVGA3D_DXFMT_BLENDABLE (1 << 4) +#define SVGA3D_DXFMT_MIPS (1 << 5) +#define SVGA3D_DXFMT_ARRAY (1 << 6) +#define SVGA3D_DXFMT_VOLUME (1 << 7) +#define SVGA3D_DXFMT_DX_VERTEX_BUFFER (1 << 8) +#define SVGADX_DXFMT_MULTISAMPLE_2 (1 << 9) +#define SVGADX_DXFMT_MULTISAMPLE_4 (1 << 10) +#define SVGADX_DXFMT_MULTISAMPLE_8 (1 << 11) +#define SVGADX_DXFMT_MAX (1 << 12) + +/* + * Convenience mask for any multisample capability. + * + * The multisample bits imply both load and render capability. + */ +#define SVGA3D_DXFMT_MULTISAMPLE ( \ + SVGADX_DXFMT_MULTISAMPLE_2 | \ + SVGADX_DXFMT_MULTISAMPLE_4 | \ + SVGADX_DXFMT_MULTISAMPLE_8 ) + typedef union { Bool b; uint32 u; @@ -233,4 +454,4 @@ typedef union { float f; } SVGA3dDevCapResult; -#endif // _SVGA3D_DEVCAPS_H_ +#endif /* _SVGA3D_DEVCAPS_H_ */ diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_dx.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_dx.h new file mode 100644 index 000000000..fce2b0422 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_dx.h @@ -0,0 +1,1521 @@ +/********************************************************** + * Copyright 2007-2015 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga3d_dx.h -- + * + * SVGA 3d hardware definitions for DX10 support. + */ + +#ifndef _SVGA3D_DX_H_ +#define _SVGA3D_DX_H_ + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "svga3d_limits.h" + +#define SVGA3D_INPUT_MIN 0 +#define SVGA3D_INPUT_PER_VERTEX_DATA 0 +#define SVGA3D_INPUT_PER_INSTANCE_DATA 1 +#define SVGA3D_INPUT_MAX 2 +typedef uint32 SVGA3dInputClassification; + +#define SVGA3D_RESOURCE_TYPE_MIN 1 +#define SVGA3D_RESOURCE_BUFFER 1 +#define SVGA3D_RESOURCE_TEXTURE1D 2 +#define SVGA3D_RESOURCE_TEXTURE2D 3 +#define SVGA3D_RESOURCE_TEXTURE3D 4 +#define SVGA3D_RESOURCE_TEXTURECUBE 5 +#define SVGA3D_RESOURCE_TYPE_DX10_MAX 6 +#define SVGA3D_RESOURCE_BUFFEREX 6 +#define SVGA3D_RESOURCE_TYPE_MAX 7 +typedef uint32 SVGA3dResourceType; + +#define SVGA3D_DEPTH_WRITE_MASK_ZERO 0 +#define SVGA3D_DEPTH_WRITE_MASK_ALL 1 +typedef uint8 SVGA3dDepthWriteMask; + +#define SVGA3D_FILTER_MIP_LINEAR (1 << 0) +#define SVGA3D_FILTER_MAG_LINEAR (1 << 2) +#define SVGA3D_FILTER_MIN_LINEAR (1 << 4) +#define SVGA3D_FILTER_ANISOTROPIC (1 << 6) +#define SVGA3D_FILTER_COMPARE (1 << 7) +typedef uint32 SVGA3dFilter; + +#define SVGA3D_CULL_INVALID 0 +#define SVGA3D_CULL_MIN 1 +#define SVGA3D_CULL_NONE 1 +#define SVGA3D_CULL_FRONT 2 +#define SVGA3D_CULL_BACK 3 +#define SVGA3D_CULL_MAX 4 +typedef uint8 SVGA3dCullMode; + +#define SVGA3D_COMPARISON_INVALID 0 +#define SVGA3D_COMPARISON_MIN 1 +#define SVGA3D_COMPARISON_NEVER 1 +#define SVGA3D_COMPARISON_LESS 2 +#define SVGA3D_COMPARISON_EQUAL 3 +#define SVGA3D_COMPARISON_LESS_EQUAL 4 +#define SVGA3D_COMPARISON_GREATER 5 +#define SVGA3D_COMPARISON_NOT_EQUAL 6 +#define SVGA3D_COMPARISON_GREATER_EQUAL 7 +#define SVGA3D_COMPARISON_ALWAYS 8 +#define SVGA3D_COMPARISON_MAX 9 +typedef uint8 SVGA3dComparisonFunc; + +#define SVGA3D_DX_MAX_VERTEXBUFFERS 32 +#define SVGA3D_DX_MAX_VERTEXINPUTREGISTERS 16 +#define SVGA3D_DX_MAX_SOTARGETS 4 +#define SVGA3D_DX_MAX_SRVIEWS 128 +#define SVGA3D_DX_MAX_CONSTBUFFERS 16 +#define SVGA3D_DX_MAX_SAMPLERS 16 + +/* Id limits */ +static const uint32 SVGA3dBlendObjectCountPerContext = 4096; +static const uint32 SVGA3dDepthStencilObjectCountPerContext = 4096; + +typedef uint32 SVGA3dSurfaceId; +typedef uint32 SVGA3dShaderResourceViewId; +typedef uint32 SVGA3dRenderTargetViewId; +typedef uint32 SVGA3dDepthStencilViewId; + +typedef uint32 SVGA3dShaderId; +typedef uint32 SVGA3dElementLayoutId; +typedef uint32 SVGA3dSamplerId; +typedef uint32 SVGA3dBlendStateId; +typedef uint32 SVGA3dDepthStencilStateId; +typedef uint32 SVGA3dRasterizerStateId; +typedef uint32 SVGA3dQueryId; +typedef uint32 SVGA3dStreamOutputId; + +typedef union { + struct { + float r; + float g; + float b; + float a; + }; + + float value[4]; +} SVGA3dRGBAFloat; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 cid; + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGAOTableDXContextEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineContext { + uint32 cid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineContext; /* SVGA_3D_CMD_DX_DEFINE_CONTEXT */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyContext { + uint32 cid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyContext; /* SVGA_3D_CMD_DX_DESTROY_CONTEXT */ + +/* + * Bind a DX context. + * + * validContents should be set to 0 for new contexts, + * and 1 if this is an old context which is getting paged + * back on to the device. + * + * For new contexts, it is recommended that the driver + * issue commands to initialize all interesting state + * prior to rendering. + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXBindContext { + uint32 cid; + SVGAMobId mobid; + uint32 validContents; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXBindContext; /* SVGA_3D_CMD_DX_BIND_CONTEXT */ + +/* + * Readback a DX context. + * (Request that the device flush the contents back into guest memory.) + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXReadbackContext { + uint32 cid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXReadbackContext; /* SVGA_3D_CMD_DX_READBACK_CONTEXT */ + +/* + * Invalidate a guest-backed context. + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXInvalidateContext { + uint32 cid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXInvalidateContext; /* SVGA_3D_CMD_DX_INVALIDATE_CONTEXT */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dReplyFormatData { + uint32 formatSupport; + uint32 msaa2xQualityLevels:5; + uint32 msaa4xQualityLevels:5; + uint32 msaa8xQualityLevels:5; + uint32 msaa16xQualityLevels:5; + uint32 msaa32xQualityLevels:5; + uint32 pad:7; +} +#include "vmware_pack_end.h" +SVGA3dReplyFormatData; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetSingleConstantBuffer { + uint32 slot; + SVGA3dShaderType type; + SVGA3dSurfaceId sid; + uint32 offsetInBytes; + uint32 sizeInBytes; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetSingleConstantBuffer; +/* SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetShaderResources { + uint32 startView; + SVGA3dShaderType type; + + /* + * Followed by a variable number of SVGA3dShaderResourceViewId's. + */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetShaderResources; /* SVGA_3D_CMD_DX_SET_SHADER_RESOURCES */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetShader { + SVGA3dShaderId shaderId; + SVGA3dShaderType type; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetShader; /* SVGA_3D_CMD_DX_SET_SHADER */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetSamplers { + uint32 startSampler; + SVGA3dShaderType type; + + /* + * Followed by a variable number of SVGA3dSamplerId's. + */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetSamplers; /* SVGA_3D_CMD_DX_SET_SAMPLERS */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDraw { + uint32 vertexCount; + uint32 startVertexLocation; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDraw; /* SVGA_3D_CMD_DX_DRAW */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDrawIndexed { + uint32 indexCount; + uint32 startIndexLocation; + int32 baseVertexLocation; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDrawIndexed; /* SVGA_3D_CMD_DX_DRAW_INDEXED */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDrawInstanced { + uint32 vertexCountPerInstance; + uint32 instanceCount; + uint32 startVertexLocation; + uint32 startInstanceLocation; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDrawInstanced; /* SVGA_3D_CMD_DX_DRAW_INSTANCED */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDrawIndexedInstanced { + uint32 indexCountPerInstance; + uint32 instanceCount; + uint32 startIndexLocation; + int32 baseVertexLocation; + uint32 startInstanceLocation; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDrawIndexedInstanced; /* SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDrawAuto { + uint32 pad0; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDrawAuto; /* SVGA_3D_CMD_DX_DRAW_AUTO */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetInputLayout { + SVGA3dElementLayoutId elementLayoutId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetInputLayout; /* SVGA_3D_CMD_DX_SET_INPUT_LAYOUT */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dVertexBuffer { + SVGA3dSurfaceId sid; + uint32 stride; + uint32 offset; +} +#include "vmware_pack_end.h" +SVGA3dVertexBuffer; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetVertexBuffers { + uint32 startBuffer; + /* Followed by a variable number of SVGA3dVertexBuffer's. */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetVertexBuffers; /* SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetIndexBuffer { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + uint32 offset; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetIndexBuffer; /* SVGA_3D_CMD_DX_SET_INDEX_BUFFER */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetTopology { + SVGA3dPrimitiveType topology; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetTopology; /* SVGA_3D_CMD_DX_SET_TOPOLOGY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetRenderTargets { + SVGA3dDepthStencilViewId depthStencilViewId; + /* Followed by a variable number of SVGA3dRenderTargetViewId's. */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetRenderTargets; /* SVGA_3D_CMD_DX_SET_RENDERTARGETS */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetBlendState { + SVGA3dBlendStateId blendId; + float blendFactor[4]; + uint32 sampleMask; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetBlendState; /* SVGA_3D_CMD_DX_SET_BLEND_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetDepthStencilState { + SVGA3dDepthStencilStateId depthStencilId; + uint32 stencilRef; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetDepthStencilState; /* SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetRasterizerState { + SVGA3dRasterizerStateId rasterizerId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetRasterizerState; /* SVGA_3D_CMD_DX_SET_RASTERIZER_STATE */ + +#define SVGA3D_DXQUERY_FLAG_PREDICATEHINT (1 << 0) +typedef uint32 SVGA3dDXQueryFlags; + +/* + * The SVGADXQueryDeviceState and SVGADXQueryDeviceBits are used by the device + * to track query state transitions, but are not intended to be used by the + * driver. + */ +#define SVGADX_QDSTATE_INVALID ((uint8)-1) /* Query has no state */ +#define SVGADX_QDSTATE_MIN 0 +#define SVGADX_QDSTATE_IDLE 0 /* Query hasn't started yet */ +#define SVGADX_QDSTATE_ACTIVE 1 /* Query is actively gathering data */ +#define SVGADX_QDSTATE_PENDING 2 /* Query is waiting for results */ +#define SVGADX_QDSTATE_FINISHED 3 /* Query has completed */ +#define SVGADX_QDSTATE_MAX 4 +typedef uint8 SVGADXQueryDeviceState; + +typedef +#include "vmware_pack_begin.h" +struct { + SVGA3dQueryTypeUint8 type; + uint16 pad0; + SVGADXQueryDeviceState state; + SVGA3dDXQueryFlags flags; + SVGAMobId mobid; + uint32 offset; +} +#include "vmware_pack_end.h" +SVGACOTableDXQueryEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineQuery { + SVGA3dQueryId queryId; + SVGA3dQueryType type; + SVGA3dDXQueryFlags flags; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineQuery; /* SVGA_3D_CMD_DX_DEFINE_QUERY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyQuery { + SVGA3dQueryId queryId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyQuery; /* SVGA_3D_CMD_DX_DESTROY_QUERY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXBindQuery { + SVGA3dQueryId queryId; + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXBindQuery; /* SVGA_3D_CMD_DX_BIND_QUERY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetQueryOffset { + SVGA3dQueryId queryId; + uint32 mobOffset; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetQueryOffset; /* SVGA_3D_CMD_DX_SET_QUERY_OFFSET */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXBeginQuery { + SVGA3dQueryId queryId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXBeginQuery; /* SVGA_3D_CMD_DX_QUERY_BEGIN */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXEndQuery { + SVGA3dQueryId queryId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXEndQuery; /* SVGA_3D_CMD_DX_QUERY_END */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXReadbackQuery { + SVGA3dQueryId queryId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXReadbackQuery; /* SVGA_3D_CMD_DX_READBACK_QUERY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXMoveQuery { + SVGA3dQueryId queryId; + SVGAMobId mobid; + uint32 mobOffset; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXMoveQuery; /* SVGA_3D_CMD_DX_MOVE_QUERY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXBindAllQuery { + uint32 cid; + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXBindAllQuery; /* SVGA_3D_CMD_DX_BIND_ALL_QUERY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXReadbackAllQuery { + uint32 cid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXReadbackAllQuery; /* SVGA_3D_CMD_DX_READBACK_ALL_QUERY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetPredication { + SVGA3dQueryId queryId; + uint32 predicateValue; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetPredication; /* SVGA_3D_CMD_DX_SET_PREDICATION */ + +typedef +#include "vmware_pack_begin.h" +struct MKS3dDXSOState { + uint32 offset; /* Starting offset */ + uint32 intOffset; /* Internal offset */ + uint32 vertexCount; /* vertices written */ + uint32 sizeInBytes; /* max bytes to write */ +} +#include "vmware_pack_end.h" +SVGA3dDXSOState; + +/* Set the offset field to this value to append SO values to the buffer */ +#define SVGA3D_DX_SO_OFFSET_APPEND ((uint32) ~0u) + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dSoTarget { + SVGA3dSurfaceId sid; + uint32 offset; + uint32 sizeInBytes; +} +#include "vmware_pack_end.h" +SVGA3dSoTarget; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetSOTargets { + uint32 pad0; + /* Followed by a variable number of SVGA3dSOTarget's. */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetSOTargets; /* SVGA_3D_CMD_DX_SET_SOTARGETS */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dViewport +{ + float x; + float y; + float width; + float height; + float minDepth; + float maxDepth; +} +#include "vmware_pack_end.h" +SVGA3dViewport; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetViewports { + uint32 pad0; + /* Followed by a variable number of SVGA3dViewport's. */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetViewports; /* SVGA_3D_CMD_DX_SET_VIEWPORTS */ + +#define SVGA3D_DX_MAX_VIEWPORTS 16 + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetScissorRects { + uint32 pad0; + /* Followed by a variable number of SVGASignedRect's. */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetScissorRects; /* SVGA_3D_CMD_DX_SET_SCISSORRECTS */ + +#define SVGA3D_DX_MAX_SCISSORRECTS 16 + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXClearRenderTargetView { + SVGA3dRenderTargetViewId renderTargetViewId; + SVGA3dRGBAFloat rgba; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXClearRenderTargetView; /* SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXClearDepthStencilView { + uint16 flags; + uint16 stencil; + SVGA3dDepthStencilViewId depthStencilViewId; + float depth; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXClearDepthStencilView; /* SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXPredCopyRegion { + SVGA3dSurfaceId dstSid; + uint32 dstSubResource; + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dCopyBox box; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXPredCopyRegion; +/* SVGA_3D_CMD_DX_PRED_COPY_REGION */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXPredCopy { + SVGA3dSurfaceId dstSid; + SVGA3dSurfaceId srcSid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXPredCopy; /* SVGA_3D_CMD_DX_PRED_COPY */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXBufferCopy { + SVGA3dSurfaceId dest; + SVGA3dSurfaceId src; + uint32 destX; + uint32 srcX; + uint32 width; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXBufferCopy; +/* SVGA_3D_CMD_DX_BUFFER_COPY */ + +typedef uint32 SVGA3dDXStretchBltMode; +#define SVGADX_STRETCHBLT_LINEAR (1 << 0) +#define SVGADX_STRETCHBLT_FORCE_SRC_SRGB (1 << 1) + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXStretchBlt { + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dSurfaceId dstSid; + uint32 destSubResource; + SVGA3dBox boxSrc; + SVGA3dBox boxDest; + SVGA3dDXStretchBltMode mode; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXStretchBlt; /* SVGA_3D_CMD_DX_STRETCHBLT */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXGenMips { + SVGA3dShaderResourceViewId shaderResourceViewId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXGenMips; /* SVGA_3D_CMD_DX_GENMIPS */ + +/* + * Defines a resource/DX surface. Resources share the surfaceId namespace. + * + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDefineGBSurface_v2 { + uint32 sid; + SVGA3dSurfaceFlags surfaceFlags; + SVGA3dSurfaceFormat format; + uint32 numMipLevels; + uint32 multisampleCount; + SVGA3dTextureFilter autogenFilter; + SVGA3dSize size; + uint32 arraySize; + uint32 pad; +} +#include "vmware_pack_end.h" +SVGA3dCmdDefineGBSurface_v2; /* SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 */ + +/* + * Update a sub-resource in a guest-backed resource. + * (Inform the device that the guest-contents have been updated.) + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXUpdateSubResource { + SVGA3dSurfaceId sid; + uint32 subResource; + SVGA3dBox box; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXUpdateSubResource; /* SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE */ + +/* + * Readback a subresource in a guest-backed resource. + * (Request the device to flush the dirty contents into the guest.) + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXReadbackSubResource { + SVGA3dSurfaceId sid; + uint32 subResource; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXReadbackSubResource; /* SVGA_3D_CMD_DX_READBACK_SUBRESOURCE */ + +/* + * Invalidate an image in a guest-backed surface. + * (Notify the device that the contents can be lost.) + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXInvalidateSubResource { + SVGA3dSurfaceId sid; + uint32 subResource; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXInvalidateSubResource; /* SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE */ + + +/* + * Raw byte wise transfer from a buffer surface into another surface + * of the requested box. + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXTransferFromBuffer { + SVGA3dSurfaceId srcSid; + uint32 srcOffset; + uint32 srcPitch; + uint32 srcSlicePitch; + SVGA3dSurfaceId destSid; + uint32 destSubResource; + SVGA3dBox destBox; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXTransferFromBuffer; /* SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER */ + + +/* + * Raw byte wise transfer from a buffer surface into another surface + * of the requested box. Supported if SVGA3D_DEVCAP_DXCONTEXT is set. + * The context is implied from the command buffer header. + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXPredTransferFromBuffer { + SVGA3dSurfaceId srcSid; + uint32 srcOffset; + uint32 srcPitch; + uint32 srcSlicePitch; + SVGA3dSurfaceId destSid; + uint32 destSubResource; + SVGA3dBox destBox; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXPredTransferFromBuffer; +/* SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER */ + + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSurfaceCopyAndReadback { + SVGA3dSurfaceId srcSid; + SVGA3dSurfaceId destSid; + SVGA3dCopyBox box; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSurfaceCopyAndReadback; +/* SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXHint { + uint32 hintId; + + /* + * Followed by variable sized data depending on the hintId. + */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXHint; +/* SVGA_3D_CMD_DX_HINT */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXBufferUpdate { + SVGA3dSurfaceId sid; + uint32 x; + uint32 width; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXBufferUpdate; +/* SVGA_3D_CMD_DX_BUFFER_UPDATE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetConstantBufferOffset { + uint32 slot; + uint32 offsetInBytes; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetConstantBufferOffset; + +typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetVSConstantBufferOffset; +/* SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET */ + +typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetPSConstantBufferOffset; +/* SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET */ + +typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetGSConstantBufferOffset; +/* SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET */ + + +typedef +#include "vmware_pack_begin.h" +struct { + union { + struct { + uint32 firstElement; + uint32 numElements; + uint32 pad0; + uint32 pad1; + } buffer; + struct { + uint32 mostDetailedMip; + uint32 firstArraySlice; + uint32 mipLevels; + uint32 arraySize; + } tex; + struct { + uint32 firstElement; + uint32 numElements; + uint32 flags; + uint32 pad0; + } bufferex; + }; +} +#include "vmware_pack_end.h" +SVGA3dShaderResourceViewDesc; + +typedef +#include "vmware_pack_begin.h" +struct { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + SVGA3dShaderResourceViewDesc desc; + uint32 pad; +} +#include "vmware_pack_end.h" +SVGACOTableDXSRViewEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineShaderResourceView { + SVGA3dShaderResourceViewId shaderResourceViewId; + + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + + SVGA3dShaderResourceViewDesc desc; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineShaderResourceView; +/* SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyShaderResourceView { + SVGA3dShaderResourceViewId shaderResourceViewId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyShaderResourceView; +/* SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dRenderTargetViewDesc { + union { + struct { + uint32 firstElement; + uint32 numElements; + } buffer; + struct { + uint32 mipSlice; + uint32 firstArraySlice; + uint32 arraySize; + } tex; /* 1d, 2d, cube */ + struct { + uint32 mipSlice; + uint32 firstW; + uint32 wSize; + } tex3D; + }; +} +#include "vmware_pack_end.h" +SVGA3dRenderTargetViewDesc; + +typedef +#include "vmware_pack_begin.h" +struct { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + SVGA3dRenderTargetViewDesc desc; + uint32 pad[2]; +} +#include "vmware_pack_end.h" +SVGACOTableDXRTViewEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineRenderTargetView { + SVGA3dRenderTargetViewId renderTargetViewId; + + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + + SVGA3dRenderTargetViewDesc desc; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineRenderTargetView; +/* SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyRenderTargetView { + SVGA3dRenderTargetViewId renderTargetViewId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyRenderTargetView; +/* SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW */ + +/* + */ +#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_DEPTH 0x01 +#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_STENCIL 0x02 +#define SVGA3D_DXDSVIEW_CREATE_FLAG_MASK 0x03 +typedef uint8 SVGA3DCreateDSViewFlags; + +typedef +#include "vmware_pack_begin.h" +struct { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + uint32 mipSlice; + uint32 firstArraySlice; + uint32 arraySize; + SVGA3DCreateDSViewFlags flags; + uint8 pad0; + uint16 pad1; + uint32 pad2; +} +#include "vmware_pack_end.h" +SVGACOTableDXDSViewEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineDepthStencilView { + SVGA3dDepthStencilViewId depthStencilViewId; + + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + uint32 mipSlice; + uint32 firstArraySlice; + uint32 arraySize; + SVGA3DCreateDSViewFlags flags; + uint8 pad0; + uint16 pad1; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineDepthStencilView; +/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyDepthStencilView { + SVGA3dDepthStencilViewId depthStencilViewId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyDepthStencilView; +/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dInputElementDesc { + uint32 inputSlot; + uint32 alignedByteOffset; + SVGA3dSurfaceFormat format; + SVGA3dInputClassification inputSlotClass; + uint32 instanceDataStepRate; + uint32 inputRegister; +} +#include "vmware_pack_end.h" +SVGA3dInputElementDesc; + +typedef +#include "vmware_pack_begin.h" +struct { + /* + * XXX: How many of these can there be? + */ + uint32 elid; + uint32 numDescs; + SVGA3dInputElementDesc desc[32]; + uint32 pad[62]; +} +#include "vmware_pack_end.h" +SVGACOTableDXElementLayoutEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineElementLayout { + SVGA3dElementLayoutId elementLayoutId; + /* Followed by a variable number of SVGA3dInputElementDesc's. */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineElementLayout; +/* SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyElementLayout { + SVGA3dElementLayoutId elementLayoutId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyElementLayout; +/* SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT */ + + +#define SVGA3D_DX_MAX_RENDER_TARGETS 8 + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dDXBlendStatePerRT { + uint8 blendEnable; + uint8 srcBlend; + uint8 destBlend; + uint8 blendOp; + uint8 srcBlendAlpha; + uint8 destBlendAlpha; + uint8 blendOpAlpha; + uint8 renderTargetWriteMask; + uint8 logicOpEnable; + uint8 logicOp; + uint16 pad0; +} +#include "vmware_pack_end.h" +SVGA3dDXBlendStatePerRT; + +typedef +#include "vmware_pack_begin.h" +struct { + uint8 alphaToCoverageEnable; + uint8 independentBlendEnable; + uint16 pad0; + SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS]; + uint32 pad1[7]; +} +#include "vmware_pack_end.h" +SVGACOTableDXBlendStateEntry; + +/* + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineBlendState { + SVGA3dBlendStateId blendId; + uint8 alphaToCoverageEnable; + uint8 independentBlendEnable; + uint16 pad0; + SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS]; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineBlendState; /* SVGA_3D_CMD_DX_DEFINE_BLEND_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyBlendState { + SVGA3dBlendStateId blendId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyBlendState; /* SVGA_3D_CMD_DX_DESTROY_BLEND_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct { + uint8 depthEnable; + SVGA3dDepthWriteMask depthWriteMask; + SVGA3dComparisonFunc depthFunc; + uint8 stencilEnable; + uint8 frontEnable; + uint8 backEnable; + uint8 stencilReadMask; + uint8 stencilWriteMask; + + uint8 frontStencilFailOp; + uint8 frontStencilDepthFailOp; + uint8 frontStencilPassOp; + SVGA3dComparisonFunc frontStencilFunc; + + uint8 backStencilFailOp; + uint8 backStencilDepthFailOp; + uint8 backStencilPassOp; + SVGA3dComparisonFunc backStencilFunc; +} +#include "vmware_pack_end.h" +SVGACOTableDXDepthStencilEntry; + +/* + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineDepthStencilState { + SVGA3dDepthStencilStateId depthStencilId; + + uint8 depthEnable; + SVGA3dDepthWriteMask depthWriteMask; + SVGA3dComparisonFunc depthFunc; + uint8 stencilEnable; + uint8 frontEnable; + uint8 backEnable; + uint8 stencilReadMask; + uint8 stencilWriteMask; + + uint8 frontStencilFailOp; + uint8 frontStencilDepthFailOp; + uint8 frontStencilPassOp; + SVGA3dComparisonFunc frontStencilFunc; + + uint8 backStencilFailOp; + uint8 backStencilDepthFailOp; + uint8 backStencilPassOp; + SVGA3dComparisonFunc backStencilFunc; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineDepthStencilState; +/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyDepthStencilState { + SVGA3dDepthStencilStateId depthStencilId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyDepthStencilState; +/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct { + uint8 fillMode; + SVGA3dCullMode cullMode; + uint8 frontCounterClockwise; + uint8 provokingVertexLast; + int32 depthBias; + float depthBiasClamp; + float slopeScaledDepthBias; + uint8 depthClipEnable; + uint8 scissorEnable; + uint8 multisampleEnable; + uint8 antialiasedLineEnable; + float lineWidth; + uint8 lineStippleEnable; + uint8 lineStippleFactor; + uint16 lineStipplePattern; + uint32 forcedSampleCount; +} +#include "vmware_pack_end.h" +SVGACOTableDXRasterizerStateEntry; + +/* + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineRasterizerState { + SVGA3dRasterizerStateId rasterizerId; + + uint8 fillMode; + SVGA3dCullMode cullMode; + uint8 frontCounterClockwise; + uint8 provokingVertexLast; + int32 depthBias; + float depthBiasClamp; + float slopeScaledDepthBias; + uint8 depthClipEnable; + uint8 scissorEnable; + uint8 multisampleEnable; + uint8 antialiasedLineEnable; + float lineWidth; + uint8 lineStippleEnable; + uint8 lineStippleFactor; + uint16 lineStipplePattern; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineRasterizerState; +/* SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyRasterizerState { + SVGA3dRasterizerStateId rasterizerId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyRasterizerState; +/* SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct { + SVGA3dFilter filter; + uint8 addressU; + uint8 addressV; + uint8 addressW; + uint8 pad0; + float mipLODBias; + uint8 maxAnisotropy; + SVGA3dComparisonFunc comparisonFunc; + uint16 pad1; + SVGA3dRGBAFloat borderColor; + float minLOD; + float maxLOD; + uint32 pad2[6]; +} +#include "vmware_pack_end.h" +SVGACOTableDXSamplerEntry; + +/* + */ +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineSamplerState { + SVGA3dSamplerId samplerId; + SVGA3dFilter filter; + uint8 addressU; + uint8 addressV; + uint8 addressW; + uint8 pad0; + float mipLODBias; + uint8 maxAnisotropy; + SVGA3dComparisonFunc comparisonFunc; + uint16 pad1; + SVGA3dRGBAFloat borderColor; + float minLOD; + float maxLOD; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineSamplerState; /* SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroySamplerState { + SVGA3dSamplerId samplerId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroySamplerState; /* SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineShader { + SVGA3dShaderId shaderId; + SVGA3dShaderType type; + uint32 sizeInBytes; /* Number of bytes of shader text. */ +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineShader; /* SVGA_3D_CMD_DX_DEFINE_SHADER */ + +typedef +#include "vmware_pack_begin.h" +struct SVGACOTableDXShaderEntry { + SVGA3dShaderType type; + uint32 sizeInBytes; + uint32 offsetInBytes; + SVGAMobId mobid; + uint32 pad[4]; +} +#include "vmware_pack_end.h" +SVGACOTableDXShaderEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyShader { + SVGA3dShaderId shaderId; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyShader; /* SVGA_3D_CMD_DX_DESTROY_SHADER */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXBindShader { + uint32 cid; + uint32 shid; + SVGAMobId mobid; + uint32 offsetInBytes; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXBindShader; /* SVGA_3D_CMD_DX_BIND_SHADER */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXBindAllShader { + uint32 cid; + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXBindAllShader; /* SVGA_3D_CMD_DX_BIND_ALL_SHADER */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXCondBindAllShader { + uint32 cid; + SVGAMobId testMobid; + SVGAMobId mobid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXCondBindAllShader; /* SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER */ + +/* + * The maximum number of streamout decl's in each streamout entry. + */ +#define SVGA3D_MAX_STREAMOUT_DECLS 64 + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dStreamOutputDeclarationEntry { + uint32 outputSlot; + uint32 registerIndex; + uint8 registerMask; + uint8 pad0; + uint16 pad1; + uint32 stream; +} +#include "vmware_pack_end.h" +SVGA3dStreamOutputDeclarationEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGAOTableStreamOutputEntry { + uint32 numOutputStreamEntries; + SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS]; + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS]; + uint32 rasterizedStream; + uint32 pad[250]; +} +#include "vmware_pack_end.h" +SVGACOTableDXStreamOutputEntry; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDefineStreamOutput { + SVGA3dStreamOutputId soid; + uint32 numOutputStreamEntries; + SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS]; + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS]; + uint32 rasterizedStream; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDefineStreamOutput; /* SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXDestroyStreamOutput { + SVGA3dStreamOutputId soid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXDestroyStreamOutput; /* SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetStreamOutput { + SVGA3dStreamOutputId soid; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetStreamOutput; /* SVGA_3D_CMD_DX_SET_STREAMOUTPUT */ + +typedef +#include "vmware_pack_begin.h" +struct { + uint64 value; + uint32 mobId; + uint32 mobOffset; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXMobFence64; /* SVGA_3D_CMD_DX_MOB_FENCE_64 */ + +/* + * SVGA3dCmdSetCOTable -- + * + * This command allows the guest to bind a mob to a context-object table. + */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXSetCOTable { + uint32 cid; + uint32 mobid; + SVGACOTableType type; + uint32 validSizeInBytes; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXSetCOTable; /* SVGA_3D_CMD_DX_SET_COTABLE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCmdDXReadbackCOTable { + uint32 cid; + SVGACOTableType type; +} +#include "vmware_pack_end.h" +SVGA3dCmdDXReadbackCOTable; /* SVGA_3D_CMD_DX_READBACK_COTABLE */ + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dCOTableData { + uint32 mobid; +} +#include "vmware_pack_end.h" +SVGA3dCOTableData; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dBufferBinding { + uint32 bufferId; + uint32 stride; + uint32 offset; +} +#include "vmware_pack_end.h" +SVGA3dBufferBinding; + +typedef +#include "vmware_pack_begin.h" +struct SVGA3dConstantBufferBinding { + uint32 sid; + uint32 offsetInBytes; + uint32 sizeInBytes; +} +#include "vmware_pack_end.h" +SVGA3dConstantBufferBinding; + +typedef +#include "vmware_pack_begin.h" +struct SVGADXInputAssemblyMobFormat { + uint32 layoutId; + SVGA3dBufferBinding vertexBuffers[SVGA3D_DX_MAX_VERTEXBUFFERS]; + uint32 indexBufferSid; + uint32 pad; + uint32 indexBufferOffset; + uint32 indexBufferFormat; + uint32 topology; +} +#include "vmware_pack_end.h" +SVGADXInputAssemblyMobFormat; + +typedef +#include "vmware_pack_begin.h" +struct SVGADXContextMobFormat { + SVGADXInputAssemblyMobFormat inputAssembly; + + struct { + uint32 blendStateId; + uint32 blendFactor[4]; + uint32 sampleMask; + uint32 depthStencilStateId; + uint32 stencilRef; + uint32 rasterizerStateId; + uint32 depthStencilViewId; + uint32 renderTargetViewIds[SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS]; + uint32 unorderedAccessViewIds[SVGA3D_MAX_UAVIEWS]; + } renderState; + + struct { + uint32 targets[SVGA3D_DX_MAX_SOTARGETS]; + uint32 soid; + } streamOut; + uint32 pad0[11]; + + uint8 numViewports; + uint8 numScissorRects; + uint16 pad1[1]; + + uint32 pad2[3]; + + SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS]; + uint32 pad3[32]; + + SVGASignedRect scissorRects[SVGA3D_DX_MAX_SCISSORRECTS]; + uint32 pad4[64]; + + struct { + uint32 queryID; + uint32 value; + } predication; + uint32 pad5[2]; + + struct { + uint32 shaderId; + SVGA3dConstantBufferBinding constantBuffers[SVGA3D_DX_MAX_CONSTBUFFERS]; + uint32 shaderResources[SVGA3D_DX_MAX_SRVIEWS]; + uint32 samplers[SVGA3D_DX_MAX_SAMPLERS]; + } shaderState[SVGA3D_NUM_SHADERTYPE]; + uint32 pad6[26]; + + SVGA3dQueryId queryID[SVGA3D_MAX_QUERY]; + + SVGA3dCOTableData cotables[SVGA_COTABLE_MAX]; + uint32 pad7[380]; +} +#include "vmware_pack_end.h" +SVGADXContextMobFormat; + +#endif /* _SVGA3D_DX_H_ */ diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_limits.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_limits.h index 367e8cf7a..a1c36877a 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_limits.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_limits.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 2007-2014 VMware, Inc. All rights reserved. + * Copyright 2007-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -41,6 +41,7 @@ #define SVGA3D_NUM_CLIPPLANES 6 #define SVGA3D_MAX_RENDER_TARGETS 8 #define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS (SVGA3D_MAX_RENDER_TARGETS) +#define SVGA3D_MAX_UAVIEWS 8 #define SVGA3D_MAX_CONTEXT_IDS 256 #define SVGA3D_MAX_SURFACE_IDS (32 * 1024) @@ -56,9 +57,6 @@ #define SVGA3D_NUM_TEXTURE_UNITS 32 #define SVGA3D_NUM_LIGHTS 8 -#define SVGA3D_MAX_VIDEODECODERS 8 -#define SVGA3D_MAX_VIDEOPROCESSORS 8 -#define SVGA3D_MAX_VIDEODECODER_FRAMES 400 /* * Maximum size in dwords of shader text the SVGA device will allow. @@ -98,4 +96,4 @@ */ #define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32 -#endif // _SVGA3D_LIMITS_H_ +#endif /* _SVGA3D_LIMITS_H_ */ diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_reg.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_reg.h index 01705f314..b44ce648f 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_reg.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_reg.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 1998-2014 VMware, Inc. All rights reserved. + * Copyright 1998-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -43,6 +43,7 @@ #include "svga3d_types.h" #include "svga3d_limits.h" #include "svga3d_cmd.h" +#include "svga3d_dx.h" #include "svga3d_devcaps.h" diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h index ce5475b6f..efa358b54 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h @@ -1,27 +1,29 @@ -/********************************************************** - * Copyright 1998-2014 VMware, Inc. All rights reserved. +/************************************************************************** * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: + * Copyright © 1998-2015 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. * - **********************************************************/ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ /* * svga3d_surfacedefs.h -- @@ -53,645 +55,851 @@ */ enum svga3d_block_desc { - SVGA3DBLOCKDESC_NONE = 0, /* No channels are active */ - SVGA3DBLOCKDESC_BLUE = 1 << 0, /* Block with red channel - data */ - SVGA3DBLOCKDESC_U = 1 << 0, /* Block with bump U channel - data */ - SVGA3DBLOCKDESC_UV_VIDEO = 1 << 7, /* Block with alternating video - U and V */ - SVGA3DBLOCKDESC_GREEN = 1 << 1, /* Block with green channel - data */ - SVGA3DBLOCKDESC_V = 1 << 1, /* Block with bump V channel - data */ - SVGA3DBLOCKDESC_STENCIL = 1 << 1, /* Block with a stencil - channel */ - SVGA3DBLOCKDESC_RED = 1 << 2, /* Block with blue channel - data */ - SVGA3DBLOCKDESC_W = 1 << 2, /* Block with bump W channel - data */ - SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, /* Block with luminance channel - data */ - SVGA3DBLOCKDESC_Y = 1 << 2, /* Block with video luminance - data */ - SVGA3DBLOCKDESC_DEPTH = 1 << 2, /* Block with depth channel */ - SVGA3DBLOCKDESC_ALPHA = 1 << 3, /* Block with an alpha - channel */ - SVGA3DBLOCKDESC_Q = 1 << 3, /* Block with bump Q channel - data */ - SVGA3DBLOCKDESC_BUFFER = 1 << 4, /* Block stores 1 byte of - data */ - SVGA3DBLOCKDESC_COMPRESSED = 1 << 5, /* Block stores n bytes of - data depending on the - compression method used */ - SVGA3DBLOCKDESC_IEEE_FP = 1 << 6, /* Block stores data in an IEEE - floating point - representation in - all channels */ - SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 8, /* Three separate blocks store - data. */ - SVGA3DBLOCKDESC_U_VIDEO = 1 << 9, /* Block with U video data */ - SVGA3DBLOCKDESC_V_VIDEO = 1 << 10, /* Block with V video data */ - SVGA3DBLOCKDESC_EXP = 1 << 11, /* Shared exponent */ - SVGA3DBLOCKDESC_SRGB = 1 << 12, /* Data is in sRGB format */ - SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13, /* 2 planes of Y, UV, - e.g., NV12. */ - SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14, /* 3 planes of separate - Y, U, V, e.g., YV12. */ - - SVGA3DBLOCKDESC_RG = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN, - SVGA3DBLOCKDESC_RGB = SVGA3DBLOCKDESC_RG | - SVGA3DBLOCKDESC_BLUE, - SVGA3DBLOCKDESC_RGB_SRGB = SVGA3DBLOCKDESC_RGB | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_RGBA = SVGA3DBLOCKDESC_RGB | - SVGA3DBLOCKDESC_ALPHA, - SVGA3DBLOCKDESC_RGBA_SRGB = SVGA3DBLOCKDESC_RGBA | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_UV = SVGA3DBLOCKDESC_U | - SVGA3DBLOCKDESC_V, - SVGA3DBLOCKDESC_UVL = SVGA3DBLOCKDESC_UV | - SVGA3DBLOCKDESC_LUMINANCE, - SVGA3DBLOCKDESC_UVW = SVGA3DBLOCKDESC_UV | - SVGA3DBLOCKDESC_W, - SVGA3DBLOCKDESC_UVWA = SVGA3DBLOCKDESC_UVW | - SVGA3DBLOCKDESC_ALPHA, - SVGA3DBLOCKDESC_UVWQ = SVGA3DBLOCKDESC_U | - SVGA3DBLOCKDESC_V | - SVGA3DBLOCKDESC_W | - SVGA3DBLOCKDESC_Q, - SVGA3DBLOCKDESC_LA = SVGA3DBLOCKDESC_LUMINANCE | - SVGA3DBLOCKDESC_ALPHA, - SVGA3DBLOCKDESC_R_FP = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_IEEE_FP, - SVGA3DBLOCKDESC_RG_FP = SVGA3DBLOCKDESC_R_FP | - SVGA3DBLOCKDESC_GREEN, - SVGA3DBLOCKDESC_RGB_FP = SVGA3DBLOCKDESC_RG_FP | - SVGA3DBLOCKDESC_BLUE, - SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RGB_FP | - SVGA3DBLOCKDESC_ALPHA, - SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH | - SVGA3DBLOCKDESC_STENCIL, - SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_UV_VIDEO | - SVGA3DBLOCKDESC_Y, - SVGA3DBLOCKDESC_AYUV = SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_Y | - SVGA3DBLOCKDESC_U_VIDEO | - SVGA3DBLOCKDESC_V_VIDEO, - SVGA3DBLOCKDESC_RGBE = SVGA3DBLOCKDESC_RGB | - SVGA3DBLOCKDESC_EXP, - SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_PLANAR_YUV | - SVGA3DBLOCKDESC_2PLANAR_YUV, - SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_PLANAR_YUV | - SVGA3DBLOCKDESC_3PLANAR_YUV, -}; -/* - * SVGA3dSurfaceDesc describes the actual pixel data. - * - * This structure provides the following information: - * 1. Block description. - * 2. Dimensions of a block in the surface. - * 3. Size of block in bytes. - * 4. Bit depth of the pixel data. - * 5. Channel bit depths and masks (if applicable). - */ -#define SVGA3D_CHANNEL_DEF(type) \ - struct { \ - union { \ - type blue; \ - type u; \ - type uv_video; \ - type u_video; \ - }; \ - union { \ - type green; \ - type v; \ - type stencil; \ - type v_video; \ - }; \ - union { \ - type red; \ - type w; \ - type luminance; \ - type y; \ - type depth; \ - type data; \ - }; \ - union { \ - type alpha; \ - type q; \ - type exp; \ - }; \ - } - -struct svga3d_surface_desc { - enum svga3d_block_desc block_desc; - SVGA3dSize block_size; - uint32 bytes_per_block; - uint32 pitch_bytes_per_block; - - struct { - uint32 total; - SVGA3D_CHANNEL_DEF(uint8); - } bit_depth; - - struct { - SVGA3D_CHANNEL_DEF(uint8); - } bit_offset; + SVGA3DBLOCKDESC_NONE = 0, /* No channels are active */ + SVGA3DBLOCKDESC_BLUE = 1 << 0, /* Block with red channel data */ + SVGA3DBLOCKDESC_U = 1 << 0, /* Block with bump U channel data */ + SVGA3DBLOCKDESC_GREEN = 1 << 1, /* Block with green channel data */ + SVGA3DBLOCKDESC_V = 1 << 1, /* Block with bump V channel data */ + SVGA3DBLOCKDESC_RED = 1 << 2, /* Block with blue channel data */ + SVGA3DBLOCKDESC_W = 1 << 2, /* Block with bump W channel data */ + SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, /* Block with luminance channel data */ + SVGA3DBLOCKDESC_Y = 1 << 2, /* Block with video luminance data */ + SVGA3DBLOCKDESC_ALPHA = 1 << 3, /* Block with an alpha channel */ + SVGA3DBLOCKDESC_Q = 1 << 3, /* Block with bump Q channel data */ + SVGA3DBLOCKDESC_BUFFER = 1 << 4, /* Block stores 1 byte of data */ + SVGA3DBLOCKDESC_COMPRESSED = 1 << 5, /* Block stores n bytes of data depending + on the compression method used */ + SVGA3DBLOCKDESC_IEEE_FP = 1 << 6, /* Block stores data in an IEEE floating point + representation in all channels */ + SVGA3DBLOCKDESC_UV_VIDEO = 1 << 7, /* Block with alternating video U and V */ + SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 8, /* Three separate blocks store data. */ + SVGA3DBLOCKDESC_U_VIDEO = 1 << 9, /* Block with U video data */ + SVGA3DBLOCKDESC_V_VIDEO = 1 << 10, /* Block with V video data */ + SVGA3DBLOCKDESC_EXP = 1 << 11, /* Shared exponent */ + SVGA3DBLOCKDESC_SRGB = 1 << 12, /* Data is in sRGB format */ + SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13, /* 2 planes of Y, UV, e.g., NV12. */ + SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14, /* 3 planes of separate Y, U, V, e.g., YV12. */ + SVGA3DBLOCKDESC_DEPTH = 1 << 15, /* Block with depth channel */ + SVGA3DBLOCKDESC_STENCIL = 1 << 16, /* Block with a stencil channel */ + + SVGA3DBLOCKDESC_RG = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN, + SVGA3DBLOCKDESC_RGB = SVGA3DBLOCKDESC_RG | + SVGA3DBLOCKDESC_BLUE, + SVGA3DBLOCKDESC_RGB_SRGB = SVGA3DBLOCKDESC_RGB | + SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_RGBA = SVGA3DBLOCKDESC_RGB | + SVGA3DBLOCKDESC_ALPHA, + SVGA3DBLOCKDESC_RGBA_SRGB = SVGA3DBLOCKDESC_RGBA | + SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_UV = SVGA3DBLOCKDESC_U | + SVGA3DBLOCKDESC_V, + SVGA3DBLOCKDESC_UVL = SVGA3DBLOCKDESC_UV | + SVGA3DBLOCKDESC_LUMINANCE, + SVGA3DBLOCKDESC_UVW = SVGA3DBLOCKDESC_UV | + SVGA3DBLOCKDESC_W, + SVGA3DBLOCKDESC_UVWA = SVGA3DBLOCKDESC_UVW | + SVGA3DBLOCKDESC_ALPHA, + SVGA3DBLOCKDESC_UVWQ = SVGA3DBLOCKDESC_U | + SVGA3DBLOCKDESC_V | + SVGA3DBLOCKDESC_W | + SVGA3DBLOCKDESC_Q, + SVGA3DBLOCKDESC_LA = SVGA3DBLOCKDESC_LUMINANCE | + SVGA3DBLOCKDESC_ALPHA, + SVGA3DBLOCKDESC_R_FP = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_IEEE_FP, + SVGA3DBLOCKDESC_RG_FP = SVGA3DBLOCKDESC_R_FP | + SVGA3DBLOCKDESC_GREEN, + SVGA3DBLOCKDESC_RGB_FP = SVGA3DBLOCKDESC_RG_FP | + SVGA3DBLOCKDESC_BLUE, + SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RGB_FP | + SVGA3DBLOCKDESC_ALPHA, + SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH | + SVGA3DBLOCKDESC_STENCIL, + SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_UV_VIDEO | + SVGA3DBLOCKDESC_Y, + SVGA3DBLOCKDESC_AYUV = SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_Y | + SVGA3DBLOCKDESC_U_VIDEO | + SVGA3DBLOCKDESC_V_VIDEO, + SVGA3DBLOCKDESC_RGBE = SVGA3DBLOCKDESC_RGB | + SVGA3DBLOCKDESC_EXP, + SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED | + SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_PLANAR_YUV | + SVGA3DBLOCKDESC_2PLANAR_YUV, + SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_PLANAR_YUV | + SVGA3DBLOCKDESC_3PLANAR_YUV, }; -static const struct svga3d_surface_desc svga3d_surface_descs[] = { - {SVGA3DBLOCKDESC_NONE, - {1, 1, 1}, 0, 0, {0, {{0}, {0}, {0}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_FORMAT_INVALID */ - - {SVGA3DBLOCKDESC_RGB, - {1, 1, 1}, 4, 4, {24, {{8}, {8}, {8}, {0} } }, - {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_X8R8G8B8 */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_A8R8G8B8 */ - - {SVGA3DBLOCKDESC_RGB, - {1, 1, 1}, 2, 2, {16, {{5}, {6}, {5}, {0} } }, - {{{0}, {5}, {11}, {0} } } }, /* SVGA3D_R5G6B5 */ - - {SVGA3DBLOCKDESC_RGB, - {1, 1, 1}, 2, 2, {15, {{5}, {5}, {5}, {0} } }, - {{{0}, {5}, {10}, {0} } } }, /* SVGA3D_X1R5G5B5 */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 2, 2, {16, {{5}, {5}, {5}, {1} } }, - {{{0}, {5}, {10}, {15} } } }, /* SVGA3D_A1R5G5B5 */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 2, 2, {16, {{4}, {4}, {4}, {4} } }, - {{{0}, {4}, {8}, {12} } } }, /* SVGA3D_A4R4G4B4 */ - - {SVGA3DBLOCKDESC_DEPTH, - {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_Z_D32 */ - - {SVGA3DBLOCKDESC_DEPTH, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_Z_D16 */ - - {SVGA3DBLOCKDESC_DS, - {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } }, - {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_Z_D24S8 */ - - {SVGA3DBLOCKDESC_DS, - {1, 1, 1}, 2, 2, {16, {{0}, {1}, {15}, {0} } }, - {{{0}, {15}, {0}, {0} } } }, /* SVGA3D_Z_D15S1 */ - - {SVGA3DBLOCKDESC_LUMINANCE, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_LUMINANCE8 */ - - {SVGA3DBLOCKDESC_LA, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {4}, {4} } }, - {{{0}, {0}, {0}, {4} } } }, /* SVGA3D_LUMINANCE4_ALPHA4 */ - - {SVGA3DBLOCKDESC_LUMINANCE, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_LUMINANCE16 */ - - {SVGA3DBLOCKDESC_LA, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {8}, {8} } }, - {{{0}, {0}, {0}, {8} } } }, /* SVGA3D_LUMINANCE8_ALPHA8 */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT1 */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT2 */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT3 */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT4 */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT5 */ - - {SVGA3DBLOCKDESC_UV, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {8}, {8} } }, - {{{0}, {0}, {0}, {8} } } }, /* SVGA3D_BUMPU8V8 */ - - {SVGA3DBLOCKDESC_UVL, - {1, 1, 1}, 2, 2, {16, {{5}, {5}, {6}, {0} } }, - {{{11}, {6}, {0}, {0} } } }, /* SVGA3D_BUMPL6V5U5 */ - - {SVGA3DBLOCKDESC_UVL, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {0} } }, - {{{16}, {8}, {0}, {0} } } }, /* SVGA3D_BUMPX8L8V8U8 */ - - {SVGA3DBLOCKDESC_UVL, - {1, 1, 1}, 3, 3, {24, {{8}, {8}, {8}, {0} } }, - {{{16}, {8}, {0}, {0} } } }, /* SVGA3D_BUMPL8V8U8 */ - - {SVGA3DBLOCKDESC_RGBA_FP, - {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } }, - {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_ARGB_S10E5 */ - - {SVGA3DBLOCKDESC_RGBA_FP, - {1, 1, 1}, 16, 16, {128, {{32}, {32}, {32}, {32} } }, - {{{64}, {32}, {0}, {96} } } }, /* SVGA3D_ARGB_S23E8 */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } }, - {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_A2R10G10B10 */ - - {SVGA3DBLOCKDESC_UV, - {1, 1, 1}, 2, 2, {16, {{8}, {8}, {0}, {0} } }, - {{{8}, {0}, {0}, {0} } } }, /* SVGA3D_V8U8 */ - - {SVGA3DBLOCKDESC_UVWQ, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{24}, {16}, {8}, {0} } } }, /* SVGA3D_Q8W8V8U8 */ - - {SVGA3DBLOCKDESC_UV, - {1, 1, 1}, 2, 2, {16, {{8}, {8}, {0}, {0} } }, - {{{8}, {0}, {0}, {0} } } }, /* SVGA3D_CxV8U8 */ - - {SVGA3DBLOCKDESC_UVL, - {1, 1, 1}, 4, 4, {24, {{8}, {8}, {8}, {0} } }, - {{{16}, {8}, {0}, {0} } } }, /* SVGA3D_X8L8V8U8 */ - - {SVGA3DBLOCKDESC_UVWA, - {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } }, - {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_A2W10V10U10 */ - - {SVGA3DBLOCKDESC_ALPHA, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {0}, {8} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_ALPHA8 */ - - {SVGA3DBLOCKDESC_R_FP, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R_S10E5 */ - - {SVGA3DBLOCKDESC_R_FP, - {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R_S23E8 */ - - {SVGA3DBLOCKDESC_RG_FP, - {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } }, - {{{0}, {16}, {0}, {0} } } }, /* SVGA3D_RG_S10E5 */ - - {SVGA3DBLOCKDESC_RG_FP, - {1, 1, 1}, 8, 8, {64, {{0}, {32}, {32}, {0} } }, - {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_RG_S23E8 */ - - {SVGA3DBLOCKDESC_BUFFER, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BUFFER */ - - {SVGA3DBLOCKDESC_DEPTH, - {1, 1, 1}, 4, 4, {32, {{0}, {0}, {24}, {0} } }, - {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_Z_D24X8 */ - - {SVGA3DBLOCKDESC_UV, - {1, 1, 1}, 4, 4, {32, {{16}, {16}, {0}, {0} } }, - {{{16}, {0}, {0}, {0} } } }, /* SVGA3D_V16U16 */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } }, - {{{0}, {0}, {16}, {0} } } }, /* SVGA3D_G16R16 */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } }, - {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_A16B16G16R16 */ - - {SVGA3DBLOCKDESC_YUV, - {1, 1, 1}, 2, 2, {16, {{8}, {0}, {8}, {0} } }, - {{{0}, {0}, {8}, {0} } } }, /* SVGA3D_UYVY */ - - {SVGA3DBLOCKDESC_YUV, - {1, 1, 1}, 2, 2, {16, {{8}, {0}, {8}, {0} } }, - {{{8}, {0}, {0}, {0} } } }, /* SVGA3D_YUY2 */ - - {SVGA3DBLOCKDESC_NV12, - {2, 2, 1}, 6, 2, {48, {{0}, {0}, {48}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_NV12 */ - - {SVGA3DBLOCKDESC_AYUV, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_AYUV */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 16, 16, {128, {{32}, {32}, {32}, {32} } }, - {{{64}, {32}, {0}, {96} } } }, /* SVGA3D_R32G32B32A32_TYPELESS */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 16, 16, {128, {{32}, {32}, {32}, {32} } }, - {{{64}, {32}, {0}, {96} } } }, /* SVGA3D_R32G32B32A32_UINT */ - - {SVGA3DBLOCKDESC_UVWQ, - {1, 1, 1}, 16, 16, {128, {{32}, {32}, {32}, {32} } }, - {{{64}, {32}, {0}, {96} } } }, /* SVGA3D_R32G32B32A32_SINT */ - - {SVGA3DBLOCKDESC_RGB, - {1, 1, 1}, 12, 12, {96, {{32}, {32}, {32}, {0} } }, - {{{64}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32B32_TYPELESS */ - - {SVGA3DBLOCKDESC_RGB_FP, - {1, 1, 1}, 12, 12, {96, {{32}, {32}, {32}, {0} } }, - {{{64}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32B32_FLOAT */ - - {SVGA3DBLOCKDESC_RGB, - {1, 1, 1}, 12, 12, {96, {{32}, {32}, {32}, {0} } }, - {{{64}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32B32_UINT */ - - {SVGA3DBLOCKDESC_UVW, - {1, 1, 1}, 12, 12, {96, {{32}, {32}, {32}, {0} } }, - {{{64}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32B32_SINT */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } }, - {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_R16G16B16A16_TYPELESS */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } }, - {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_R16G16B16A16_UINT */ - - {SVGA3DBLOCKDESC_UVWQ, - {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } }, - {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_R16G16B16A16_SNORM */ - - {SVGA3DBLOCKDESC_UVWQ, - {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } }, - {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_R16G16B16A16_SINT */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 8, 8, {64, {{0}, {32}, {32}, {0} } }, - {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32_TYPELESS */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 8, 8, {64, {{0}, {32}, {32}, {0} } }, - {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32_UINT */ - {SVGA3DBLOCKDESC_UV, - {1, 1, 1}, 8, 8, {64, {{0}, {32}, {32}, {0} } }, - {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32_SINT */ +typedef struct SVGA3dChannelDef { + union { + uint8 blue; + uint8 u; + uint8 uv_video; + uint8 u_video; + }; + union { + uint8 green; + uint8 v; + uint8 stencil; + uint8 v_video; + }; + union { + uint8 red; + uint8 w; + uint8 luminance; + uint8 y; + uint8 depth; + uint8 data; + }; + union { + uint8 alpha; + uint8 q; + uint8 exp; + }; +} SVGA3dChannelDef; - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 8, 8, {64, {{0}, {8}, {32}, {0} } }, - {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_R32G8X24_TYPELESS */ - - {SVGA3DBLOCKDESC_DS, - {1, 1, 1}, 8, 8, {64, {{0}, {8}, {32}, {0} } }, - {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_D32_FLOAT_S8X24_UINT */ - - {SVGA3DBLOCKDESC_R_FP, - {1, 1, 1}, 8, 8, {64, {{0}, {0}, {32}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R32_FLOAT_X8_X24_TYPELESS */ - - {SVGA3DBLOCKDESC_GREEN, - {1, 1, 1}, 8, 8, {64, {{0}, {8}, {0}, {0} } }, - {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_X32_TYPELESS_G8X24_UINT */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } }, - {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_R10G10B10A2_TYPELESS */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } }, - {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_R10G10B10A2_UINT */ - - {SVGA3DBLOCKDESC_RGB_FP, - {1, 1, 1}, 4, 4, {32, {{10}, {11}, {11}, {0} } }, - {{{0}, {10}, {21}, {0} } } }, /* SVGA3D_R11G11B10_FLOAT */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_TYPELESS */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_UNORM */ - - {SVGA3DBLOCKDESC_RGBA_SRGB, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_UNORM_SRGB */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_UINT */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_SINT */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } }, - {{{0}, {16}, {0}, {0} } } }, /* SVGA3D_R16G16_TYPELESS */ - - {SVGA3DBLOCKDESC_RG_FP, - {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } }, - {{{0}, {16}, {0}, {0} } } }, /* SVGA3D_R16G16_UINT */ - - {SVGA3DBLOCKDESC_UV, - {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } }, - {{{0}, {16}, {0}, {0} } } }, /* SVGA3D_R16G16_SINT */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R32_TYPELESS */ - - {SVGA3DBLOCKDESC_DEPTH, - {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_D32_FLOAT */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R32_UINT */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R32_SINT */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } }, - {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_R24G8_TYPELESS */ - - {SVGA3DBLOCKDESC_DS, - {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } }, - {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_D24_UNORM_S8_UINT */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 4, 4, {32, {{0}, {0}, {24}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R24_UNORM_X8_TYPELESS */ - - {SVGA3DBLOCKDESC_GREEN, - {1, 1, 1}, 4, 4, {32, {{0}, {8}, {0}, {0} } }, - {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_X24_TYPELESS_G8_UINT */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } }, - {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_TYPELESS */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } }, - {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_UNORM */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } }, - {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_UINT */ - - {SVGA3DBLOCKDESC_UV, - {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } }, - {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_SINT */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_TYPELESS */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_UNORM */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_UINT */ - - {SVGA3DBLOCKDESC_U, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_SNORM */ - - {SVGA3DBLOCKDESC_U, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_SINT */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_TYPELESS */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_UNORM */ - - {SVGA3DBLOCKDESC_RED, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_UINT */ - - {SVGA3DBLOCKDESC_U, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_SNORM */ - - {SVGA3DBLOCKDESC_U, - {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_SINT */ - - {SVGA3DBLOCKDESC_RED, - {8, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R1_UNORM */ - - {SVGA3DBLOCKDESC_RGBE, - {1, 1, 1}, 4, 4, {32, {{9}, {9}, {9}, {5} } }, - {{{18}, {9}, {0}, {27} } } }, /* SVGA3D_R9G9B9E5_SHAREDEXP */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } }, - {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_B8G8_UNORM */ - - {SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } }, - {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_G8R8_G8B8_UNORM */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC1_TYPELESS */ - - {SVGA3DBLOCKDESC_COMPRESSED_SRGB, - {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC1_UNORM_SRGB */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC2_TYPELESS */ - - {SVGA3DBLOCKDESC_COMPRESSED_SRGB, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC2_UNORM_SRGB */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC3_TYPELESS */ - - {SVGA3DBLOCKDESC_COMPRESSED_SRGB, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC3_UNORM_SRGB */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC4_TYPELESS */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC4_UNORM */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC4_SNORM */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC5_TYPELESS */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC5_UNORM */ - - {SVGA3DBLOCKDESC_COMPRESSED, - {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC5_SNORM */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } }, - {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_R10G10B10_XR_BIAS_A2_UNORM */ - - {SVGA3DBLOCKDESC_RGBA, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_B8G8R8A8_TYPELESS */ - - {SVGA3DBLOCKDESC_RGBA_SRGB, - {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } }, - {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_B8G8R8A8_UNORM_SRGB */ - - {SVGA3DBLOCKDESC_RGB, - {1, 1, 1}, 4, 4, {24, {{8}, {8}, {8}, {0} } }, - {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_B8G8R8X8_TYPELESS */ - - {SVGA3DBLOCKDESC_RGB_SRGB, - {1, 1, 1}, 4, 4, {24, {{8}, {8}, {8}, {0} } }, - {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_B8G8R8X8_UNORM_SRGB */ - - {SVGA3DBLOCKDESC_DEPTH, - {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_Z_DF16 */ +struct svga3d_surface_desc { + SVGA3dSurfaceFormat format; + enum svga3d_block_desc block_desc; - {SVGA3DBLOCKDESC_DS, - {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } }, - {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_Z_DF24 */ + SVGA3dSize block_size; + uint32 bytes_per_block; + uint32 pitch_bytes_per_block; - {SVGA3DBLOCKDESC_DS, - {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } }, - {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_Z_D24S8_INT */ + uint32 totalBitDepth; + SVGA3dChannelDef bitDepth; + SVGA3dChannelDef bitOffset; +}; - {SVGA3DBLOCKDESC_YV12, - {2, 2, 1}, 6, 2, {48, {{0}, {0}, {48}, {0} } }, - {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_YV12 */ +static const struct svga3d_surface_desc svga3d_surface_descs[] = { + {SVGA3D_FORMAT_INVALID, SVGA3DBLOCKDESC_NONE, + {1, 1, 1}, 0, 0, + 0, {{0}, {0}, {0}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB, + {1, 1, 1}, 4, 4, + 24, {{8}, {8}, {8}, {0}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB, + {1, 1, 1}, 2, 2, + 16, {{5}, {6}, {5}, {0}}, + {{0}, {5}, {11}, {0}}}, + + {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB, + {1, 1, 1}, 2, 2, + 15, {{5}, {5}, {5}, {0}}, + {{0}, {5}, {10}, {0}}}, + + {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 2, 2, + 16, {{5}, {5}, {5}, {1}}, + {{0}, {5}, {10}, {15}}}, + + {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 2, 2, + 16, {{4}, {4}, {4}, {4}}, + {{0}, {4}, {8}, {12}}}, + + {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS, + {1, 1, 1}, 4, 4, + 32, {{0}, {8}, {24}, {0}}, + {{0}, {24}, {0}, {0}}}, + + {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS, + {1, 1, 1}, 2, 2, + 16, {{0}, {1}, {15}, {0}}, + {{0}, {15}, {0}, {0}}}, + + {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_LUMINANCE, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA, + {1 , 1, 1}, 1, 1, + 8, {{0}, {0}, {4}, {4}}, + {{0}, {0}, {0}, {4}}}, + + {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_LUMINANCE, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {8}, {8}}, + {{0}, {0}, {0}, {8}}}, + + {SVGA3D_DXT1, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 8, 8, + 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_DXT2, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_DXT3, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_DXT4, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_DXT5, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {8}, {8}}, + {{0}, {0}, {0}, {8}}}, + + {SVGA3D_BUMPL6V5U5, SVGA3DBLOCKDESC_UVL, + {1, 1, 1}, 2, 2, + 16, {{5}, {5}, {6}, {0}}, + {{11}, {6}, {0}, {0}}}, + + {SVGA3D_BUMPX8L8V8U8, SVGA3DBLOCKDESC_UVL, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {0}}, + {{16}, {8}, {0}, {0}}}, + + {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_UVL, + {0, 0, 0}, 0, 0, + 0, {{0}, {0}, {0}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_ARGB_S10E5, SVGA3DBLOCKDESC_RGBA_FP, + {1, 1, 1}, 8, 8, + 64, {{16}, {16}, {16}, {16}}, + {{32}, {16}, {0}, {48}}}, + + {SVGA3D_ARGB_S23E8, SVGA3DBLOCKDESC_RGBA_FP, + {1, 1, 1}, 16, 16, + 128, {{32}, {32}, {32}, {32}}, + {{64}, {32}, {0}, {96}}}, + + {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{10}, {10}, {10}, {2}}, + {{0}, {10}, {20}, {30}}}, + + {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV, + {1, 1, 1}, 2, 2, + 16, {{8}, {8}, {0}, {0}}, + {{8}, {0}, {0}, {0}}}, + + {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{24}, {16}, {8}, {0}}}, + + {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UV, + {1, 1, 1}, 2, 2, + 16, {{8}, {8}, {0}, {0}}, + {{8}, {0}, {0}, {0}}}, + + {SVGA3D_X8L8V8U8, SVGA3DBLOCKDESC_UVL, + {1, 1, 1}, 4, 4, + 24, {{8}, {8}, {8}, {0}}, + {{16}, {8}, {0}, {0}}}, + + {SVGA3D_A2W10V10U10, SVGA3DBLOCKDESC_UVWA, + {1, 1, 1}, 4, 4, + 32, {{10}, {10}, {10}, {2}}, + {{0}, {10}, {20}, {30}}}, + + {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_ALPHA, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {0}, {8}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R_S10E5, SVGA3DBLOCKDESC_R_FP, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R_S23E8, SVGA3DBLOCKDESC_R_FP, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_RG_S10E5, SVGA3DBLOCKDESC_RG_FP, + {1, 1, 1}, 4, 4, + 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, + + {SVGA3D_RG_S23E8, SVGA3DBLOCKDESC_RG_FP, + {1, 1, 1}, 8, 8, + 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {0}, {0}}}, + + {SVGA3D_BUFFER, SVGA3DBLOCKDESC_BUFFER, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {24}, {0}}, + {{0}, {24}, {0}, {0}}}, + + {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV, + {1, 1, 1}, 4, 4, + 32, {{16}, {16}, {0}, {0}}, + {{16}, {0}, {0}, {0}}}, + + {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 4, 4, + 32, {{0}, {16}, {16}, {0}}, + {{0}, {0}, {16}, {0}}}, + + {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 8, 8, + 64, {{16}, {16}, {16}, {16}}, + {{32}, {16}, {0}, {48}}}, + + {SVGA3D_UYVY, SVGA3DBLOCKDESC_YUV, + {1, 1, 1}, 2, 2, + 16, {{8}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}}, + + {SVGA3D_YUY2, SVGA3DBLOCKDESC_YUV, + {1, 1, 1}, 2, 2, + 16, {{8}, {0}, {8}, {0}}, + {{8}, {0}, {0}, {0}}}, + + {SVGA3D_NV12, SVGA3DBLOCKDESC_NV12, + {2, 2, 1}, 6, 2, + 48, {{0}, {0}, {48}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 16, 16, + 128, {{32}, {32}, {32}, {32}}, + {{64}, {32}, {0}, {96}}}, + + {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 16, 16, + 128, {{32}, {32}, {32}, {32}}, + {{64}, {32}, {0}, {96}}}, + + {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_UVWQ, + {1, 1, 1}, 16, 16, + 128, {{32}, {32}, {32}, {32}}, + {{64}, {32}, {0}, {96}}}, + + {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_RGB, + {1, 1, 1}, 12, 12, + 96, {{32}, {32}, {32}, {0}}, + {{64}, {32}, {0}, {0}}}, + + {SVGA3D_R32G32B32_FLOAT, SVGA3DBLOCKDESC_RGB_FP, + {1, 1, 1}, 12, 12, + 96, {{32}, {32}, {32}, {0}}, + {{64}, {32}, {0}, {0}}}, + + {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB, + {1, 1, 1}, 12, 12, + 96, {{32}, {32}, {32}, {0}}, + {{64}, {32}, {0}, {0}}}, + + {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_UVW, + {1, 1, 1}, 12, 12, + 96, {{32}, {32}, {32}, {0}}, + {{64}, {32}, {0}, {0}}}, + + {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 8, 8, + 64, {{16}, {16}, {16}, {16}}, + {{32}, {16}, {0}, {48}}}, + + {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 8, 8, + 64, {{16}, {16}, {16}, {16}}, + {{32}, {16}, {0}, {48}}}, + + {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_UVWQ, + {1, 1, 1}, 8, 8, + 64, {{16}, {16}, {16}, {16}}, + {{32}, {16}, {0}, {48}}}, + + {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_UVWQ, + {1, 1, 1}, 8, 8, + 64, {{16}, {16}, {16}, {16}}, + {{32}, {16}, {0}, {48}}}, + + {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 8, 8, + 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {0}, {0}}}, + + {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 8, 8, + 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {0}, {0}}}, + + {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_UV, + {1, 1, 1}, 8, 8, + 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {0}, {0}}}, + + {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 8, 8, + 64, {{0}, {8}, {32}, {0}}, + {{0}, {32}, {0}, {0}}}, + + {SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3DBLOCKDESC_DS, + {1, 1, 1}, 8, 8, + 64, {{0}, {8}, {32}, {0}}, + {{0}, {32}, {0}, {0}}}, + + {SVGA3D_R32_FLOAT_X8X24_TYPELESS, SVGA3DBLOCKDESC_R_FP, + {1, 1, 1}, 8, 8, + 64, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_X32_TYPELESS_G8X24_UINT, SVGA3DBLOCKDESC_GREEN, + {1, 1, 1}, 8, 8, + 64, {{0}, {8}, {0}, {0}}, + {{0}, {32}, {0}, {0}}}, + + {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{10}, {10}, {10}, {2}}, + {{0}, {10}, {20}, {30}}}, + + {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{10}, {10}, {10}, {2}}, + {{0}, {10}, {20}, {30}}}, + + {SVGA3D_R11G11B10_FLOAT, SVGA3DBLOCKDESC_RGB_FP, + {1, 1, 1}, 4, 4, + 32, {{10}, {11}, {11}, {0}}, + {{0}, {10}, {21}, {0}}}, + + {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{16}, {8}, {0}, {24}}}, + + {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{16}, {8}, {0}, {24}}}, + + {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{16}, {8}, {0}, {24}}}, + + {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{16}, {8}, {0}, {24}}}, + + {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{16}, {8}, {0}, {24}}}, + + {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 4, 4, + 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, + + {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_FP, + {1, 1, 1}, 4, 4, + 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, + + {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_UV, + {1, 1, 1}, 4, 4, + 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, + + {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 4, 4, + 32, {{0}, {8}, {24}, {0}}, + {{0}, {24}, {0}, {0}}}, + + {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS, + {1, 1, 1}, 4, 4, + 32, {{0}, {8}, {24}, {0}}, + {{0}, {24}, {0}, {0}}}, + + {SVGA3D_R24_UNORM_X8_TYPELESS, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {24}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_X24_TYPELESS_G8_UINT, SVGA3DBLOCKDESC_GREEN, + {1, 1, 1}, 4, 4, + 32, {{0}, {8}, {0}, {0}}, + {{0}, {24}, {0}, {0}}}, + + {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 2, 2, + 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, + + {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 2, 2, + 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, + + {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 2, 2, + 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, + + {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_UV, + {1, 1, 1}, 2, 2, + 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, + + {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_U, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_U, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_U, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_U, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_P8, SVGA3DBLOCKDESC_RED, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGBE, + {1, 1, 1}, 4, 4, + 32, {{9}, {9}, {9}, {5}}, + {{18}, {9}, {0}, {27}}}, + + {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 2, 2, + 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, + + {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 2, 2, + 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, + + {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 8, 8, + 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB, + {4, 4, 1}, 8, 8, + 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 8, 8, + 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_ATI1, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 8, 8, + 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 8, 8, + 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_ATI2, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{10}, {10}, {10}, {2}}, + {{0}, {10}, {20}, {30}}}, + + {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_RGB, + {1, 1, 1}, 4, 4, + 24, {{8}, {8}, {8}, {0}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_SRGB, + {1, 1, 1}, 4, 4, + 24, {{8}, {8}, {8}, {0}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH, + {1, 1, 1}, 4, 4, + 32, {{0}, {8}, {24}, {0}}, + {{0}, {24}, {0}, {0}}}, + + {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS, + {1, 1, 1}, 4, 4, + 32, {{0}, {8}, {24}, {0}}, + {{0}, {24}, {0}, {0}}}, + + {SVGA3D_YV12, SVGA3DBLOCKDESC_YV12, + {2, 2, 1}, 6, 2, + 48, {{0}, {0}, {48}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R32G32B32A32_FLOAT, SVGA3DBLOCKDESC_RGBA_FP, + {1, 1, 1}, 16, 16, + 128, {{32}, {32}, {32}, {32}}, + {{64}, {32}, {0}, {96}}}, + + {SVGA3D_R16G16B16A16_FLOAT, SVGA3DBLOCKDESC_RGBA_FP, + {1, 1, 1}, 8, 8, + 64, {{16}, {16}, {16}, {16}}, + {{32}, {16}, {0}, {48}}}, + + {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 8, 8, + 64, {{16}, {16}, {16}, {16}}, + {{32}, {16}, {0}, {48}}}, + + {SVGA3D_R32G32_FLOAT, SVGA3DBLOCKDESC_RG_FP, + {1, 1, 1}, 8, 8, + 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {0}, {0}}}, + + {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{10}, {10}, {10}, {2}}, + {{0}, {10}, {20}, {30}}}, + + {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{24}, {16}, {8}, {0}}}, + + {SVGA3D_R16G16_FLOAT, SVGA3DBLOCKDESC_RG_FP, + {1, 1, 1}, 4, 4, + 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, + + {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 4, 4, + 32, {{0}, {16}, {16}, {0}}, + {{0}, {0}, {16}, {0}}}, + + {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 4, 4, + 32, {{16}, {16}, {0}, {0}}, + {{16}, {0}, {0}, {0}}}, + + {SVGA3D_R32_FLOAT, SVGA3DBLOCKDESC_R_FP, + {1, 1, 1}, 4, 4, + 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG, + {1, 1, 1}, 2, 2, + 16, {{8}, {8}, {0}, {0}}, + {{8}, {0}, {0}, {0}}}, + + {SVGA3D_R16_FLOAT, SVGA3DBLOCKDESC_R_FP, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH, + {1, 1, 1}, 2, 2, + 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_ALPHA, + {1, 1, 1}, 1, 1, + 8, {{0}, {0}, {0}, {8}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 8, 8, + 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB, + {1, 1, 1}, 2, 2, + 16, {{5}, {6}, {5}, {0}}, + {{0}, {5}, {11}, {0}}}, + + {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 2, 2, + 16, {{5}, {5}, {5}, {1}}, + {{0}, {5}, {10}, {15}}}, + + {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA, + {1, 1, 1}, 4, 4, + 32, {{8}, {8}, {8}, {8}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB, + {1, 1, 1}, 4, 4, + 24, {{8}, {8}, {8}, {0}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 8, 8, + 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {4, 4, 1}, 16, 16, + 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, }; @@ -704,6 +912,16 @@ static inline uint32 clamped_umul32(uint32 a, uint32 b) return (tmp > (uint64_t) ((uint32) -1)) ? (uint32) -1 : tmp; } +static inline uint32 clamped_uadd32(uint32 a, uint32 b) +{ + uint32 c = a + b; + if (c < a || c < b) { + return MAX_UINT32; + } + return c; +} + + static inline const struct svga3d_surface_desc * svga3dsurface_get_desc(SVGA3dSurfaceFormat format) { @@ -828,7 +1046,7 @@ static inline uint32 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, SVGA3dSize baseLevelSize, uint32 numMipLevels, - uint32 face, + uint32 layer, uint32 mip) { @@ -853,7 +1071,7 @@ svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, } } - offset = mipChainBytes * face + mipChainBytesToLevel; + offset = mipChainBytes * layer + mipChainBytesToLevel; return offset; } @@ -863,7 +1081,7 @@ static inline uint32 svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format, SVGA3dSize base_level_size, uint32 num_mip_levels, - bool cubemap) + uint32 num_layers) { const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format); uint64_t total_size = 0; @@ -876,8 +1094,7 @@ svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format, &size, 0); } - if (cubemap) - total_size *= SVGA3D_MAX_SURFACE_FACES; + total_size *= num_layers; return (total_size > (uint64_t) MAX_UINT32) ? MAX_UINT32 : (uint32) total_size; diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h index fc4a6b95a..de711c388 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 1998-2014 VMware, Inc. All rights reserved. + * Copyright 2007-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -43,10 +43,6 @@ */ #define SVGA3D_INVALID_ID ((uint32)-1) -#define SVGA3D_INVALID_CID SVGA3D_INVALID_ID -#define SVGA3D_INVALID_SID SVGA3D_INVALID_ID -#define SVGA3D_INVALID_SHID SVGA3D_INVALID_ID - typedef uint32 SVGA3dBool; /* 32-bit Bool definition */ typedef uint32 SVGA3dColor; /* a, r, g, b */ @@ -116,13 +112,7 @@ SVGA3dPoint; /* * Surface formats. - * - * If you modify this list, be sure to keep GLUtil.c in sync. It - * includes the internal format definition of each surface in - * GLUtil_ConvertSurfaceFormat, and it contains a table of - * human-readable names in GLUtil_GetFormatName. */ - typedef enum SVGA3dSurfaceFormat { SVGA3D_FORMAT_INVALID = 0, @@ -155,7 +145,7 @@ typedef enum SVGA3dSurfaceFormat { SVGA3D_BUMPU8V8 = 20, SVGA3D_BUMPL6V5U5 = 21, SVGA3D_BUMPX8L8V8U8 = 22, - SVGA3D_BUMPL8V8U8 = 23, + SVGA3D_FORMAT_DEAD1 = 23, SVGA3D_ARGB_S10E5 = 24, /* 16-bit floating-point ARGB */ SVGA3D_ARGB_S23E8 = 25, /* 32-bit floating-point ARGB */ @@ -271,7 +261,7 @@ typedef enum SVGA3dSurfaceFormat { SVGA3D_B8G8R8X8_TYPELESS = 116, SVGA3D_B8G8R8X8_UNORM_SRGB = 117, - /* Advanced D3D9 depth formats. */ + /* Advanced depth formats. */ SVGA3D_Z_DF16 = 118, SVGA3D_Z_DF24 = 119, SVGA3D_Z_D24S8_INT = 120, @@ -306,13 +296,157 @@ typedef enum SVGA3dSurfaceFormat { SVGA3D_FORMAT_MAX } SVGA3dSurfaceFormat; +typedef uint32 SVGA3dSurfaceFlags; +#define SVGA3D_SURFACE_CUBEMAP (1 << 0) + +/* + * HINT flags are not enforced by the device but are useful for + * performance. + */ +#define SVGA3D_SURFACE_HINT_STATIC (1 << 1) +#define SVGA3D_SURFACE_HINT_DYNAMIC (1 << 2) +#define SVGA3D_SURFACE_HINT_INDEXBUFFER (1 << 3) +#define SVGA3D_SURFACE_HINT_VERTEXBUFFER (1 << 4) +#define SVGA3D_SURFACE_HINT_TEXTURE (1 << 5) +#define SVGA3D_SURFACE_HINT_RENDERTARGET (1 << 6) +#define SVGA3D_SURFACE_HINT_DEPTHSTENCIL (1 << 7) +#define SVGA3D_SURFACE_HINT_WRITEONLY (1 << 8) +#define SVGA3D_SURFACE_MASKABLE_ANTIALIAS (1 << 9) +#define SVGA3D_SURFACE_AUTOGENMIPMAPS (1 << 10) +#define SVGA3D_SURFACE_DECODE_RENDERTARGET (1 << 11) + +/* + * Is this surface using a base-level pitch for it's mob backing? + * + * This flag is not intended to be set by guest-drivers, but is instead + * set by the device when the surface is bound to a mob with a specified + * pitch. + */ +#define SVGA3D_SURFACE_MOB_PITCH (1 << 12) + +#define SVGA3D_SURFACE_INACTIVE (1 << 13) +#define SVGA3D_SURFACE_HINT_RT_LOCKABLE (1 << 14) +#define SVGA3D_SURFACE_VOLUME (1 << 15) + +/* + * Required to be set on a surface to bind it to a screen target. + */ +#define SVGA3D_SURFACE_SCREENTARGET (1 << 16) + +/* + * Align images in the guest-backing mob to 16-bytes. + */ +#define SVGA3D_SURFACE_ALIGN16 (1 << 17) + +#define SVGA3D_SURFACE_1D (1 << 18) +#define SVGA3D_SURFACE_ARRAY (1 << 19) + +/* + * Bind flags. + * These are enforced for any surface defined with DefineGBSurface_v2. + */ +#define SVGA3D_SURFACE_BIND_VERTEX_BUFFER (1 << 20) +#define SVGA3D_SURFACE_BIND_INDEX_BUFFER (1 << 21) +#define SVGA3D_SURFACE_BIND_CONSTANT_BUFFER (1 << 22) +#define SVGA3D_SURFACE_BIND_SHADER_RESOURCE (1 << 23) +#define SVGA3D_SURFACE_BIND_RENDER_TARGET (1 << 24) +#define SVGA3D_SURFACE_BIND_DEPTH_STENCIL (1 << 25) +#define SVGA3D_SURFACE_BIND_STREAM_OUTPUT (1 << 26) + +/* + * The STAGING flags notes that the surface will not be used directly by the + * drawing pipeline, i.e. that it will not be bound to any bind point. + * Staging surfaces may be used by copy operations to move data in and out + * of other surfaces. No bind flags may be set on surfaces with this flag. + * + * The HINT_INDIRECT_UPDATE flag suggests that the surface will receive + * updates indirectly, i.e. the surface will not be updated directly, but + * will receive copies from staging surfaces. + */ +#define SVGA3D_SURFACE_STAGING_UPLOAD (1 << 27) +#define SVGA3D_SURFACE_STAGING_DOWNLOAD (1 << 28) +#define SVGA3D_SURFACE_HINT_INDIRECT_UPDATE (1 << 29) + +/* + * Setting this flag allow this surface to be used with the + * SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command. It is only valid for + * buffer surfaces, and no bind flags are allowed to be set on surfaces + * with this flag. + */ +#define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER (1 << 30) /* - * These are really the D3DFORMAT_OP defines from the wdk. We need - * them so that we can query the host for what the supported surface - * operations are (when we're using the D3D backend, in particular), - * and so we can send those operations to the guest. + * Marker for the last defined bit in SVGA3dSurfaceFlags. */ +#define SVGA3D_SURFACE_FLAG_MAX (1 << 31) + +#define SVGA3D_SURFACE_HB_DISALLOWED_MASK \ + ( SVGA3D_SURFACE_MOB_PITCH | \ + SVGA3D_SURFACE_SCREENTARGET | \ + SVGA3D_SURFACE_ALIGN16 | \ + SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ + SVGA3D_SURFACE_STAGING_UPLOAD | \ + SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \ + ) + +#define SVGA3D_SURFACE_2D_DISALLOWED_MASK \ + ( SVGA3D_SURFACE_CUBEMAP | \ + SVGA3D_SURFACE_MASKABLE_ANTIALIAS | \ + SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_DECODE_RENDERTARGET | \ + SVGA3D_SURFACE_VOLUME | \ + SVGA3D_SURFACE_1D | \ + SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ + SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ + SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \ + ) + +#define SVGA3D_SURFACE_SCREENTARGET_DISALLOWED_MASK \ + ( SVGA3D_SURFACE_CUBEMAP | \ + SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_DECODE_RENDERTARGET | \ + SVGA3D_SURFACE_VOLUME | \ + SVGA3D_SURFACE_1D | \ + SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ + SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ + SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ + SVGA3D_SURFACE_INACTIVE | \ + SVGA3D_SURFACE_STAGING_UPLOAD | \ + SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \ + ) + +#define SVGA3D_SURFACE_DX_ONLY_MASK \ + ( SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ + SVGA3D_SURFACE_STAGING_UPLOAD | \ + SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \ + ) + +#define SVGA3D_SURFACE_STAGING_MASK \ + ( SVGA3D_SURFACE_STAGING_UPLOAD | \ + SVGA3D_SURFACE_STAGING_DOWNLOAD \ + ) + +#define SVGA3D_SURFACE_BIND_MASK \ + ( SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ + SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ + SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \ + SVGA3D_SURFACE_BIND_RENDER_TARGET | \ + SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT \ + ) + typedef enum { SVGA3DFORMAT_OP_TEXTURE = 0x00000001, SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002, @@ -656,25 +790,27 @@ union { SVGA3dLinePattern; typedef enum { - SVGA3D_BLENDOP_INVALID = 0, - SVGA3D_BLENDOP_MIN = 1, - SVGA3D_BLENDOP_ZERO = 1, - SVGA3D_BLENDOP_ONE = 2, - SVGA3D_BLENDOP_SRCCOLOR = 3, - SVGA3D_BLENDOP_INVSRCCOLOR = 4, - SVGA3D_BLENDOP_SRCALPHA = 5, - SVGA3D_BLENDOP_INVSRCALPHA = 6, - SVGA3D_BLENDOP_DESTALPHA = 7, - SVGA3D_BLENDOP_INVDESTALPHA = 8, - SVGA3D_BLENDOP_DESTCOLOR = 9, - SVGA3D_BLENDOP_INVDESTCOLOR = 10, - SVGA3D_BLENDOP_SRCALPHASAT = 11, - SVGA3D_BLENDOP_BLENDFACTOR = 12, - SVGA3D_BLENDOP_INVBLENDFACTOR = 13, - SVGA3D_BLENDOP_SRC1COLOR = 14, - SVGA3D_BLENDOP_INVSRC1COLOR = 15, - SVGA3D_BLENDOP_SRC1ALPHA = 16, - SVGA3D_BLENDOP_INVSRC1ALPHA = 17, + SVGA3D_BLENDOP_INVALID = 0, + SVGA3D_BLENDOP_MIN = 1, + SVGA3D_BLENDOP_ZERO = 1, + SVGA3D_BLENDOP_ONE = 2, + SVGA3D_BLENDOP_SRCCOLOR = 3, + SVGA3D_BLENDOP_INVSRCCOLOR = 4, + SVGA3D_BLENDOP_SRCALPHA = 5, + SVGA3D_BLENDOP_INVSRCALPHA = 6, + SVGA3D_BLENDOP_DESTALPHA = 7, + SVGA3D_BLENDOP_INVDESTALPHA = 8, + SVGA3D_BLENDOP_DESTCOLOR = 9, + SVGA3D_BLENDOP_INVDESTCOLOR = 10, + SVGA3D_BLENDOP_SRCALPHASAT = 11, + SVGA3D_BLENDOP_BLENDFACTOR = 12, + SVGA3D_BLENDOP_INVBLENDFACTOR = 13, + SVGA3D_BLENDOP_SRC1COLOR = 14, + SVGA3D_BLENDOP_INVSRC1COLOR = 15, + SVGA3D_BLENDOP_SRC1ALPHA = 16, + SVGA3D_BLENDOP_INVSRC1ALPHA = 17, + SVGA3D_BLENDOP_BLENDFACTORALPHA = 18, + SVGA3D_BLENDOP_INVBLENDFACTORALPHA = 19, SVGA3D_BLENDOP_MAX } SVGA3dBlendOp; @@ -690,6 +826,27 @@ typedef enum { } SVGA3dBlendEquation; typedef enum { + SVGA3D_DX11_LOGICOP_MIN = 0, + SVGA3D_DX11_LOGICOP_CLEAR = 0, + SVGA3D_DX11_LOGICOP_SET = 1, + SVGA3D_DX11_LOGICOP_COPY = 2, + SVGA3D_DX11_LOGICOP_COPY_INVERTED = 3, + SVGA3D_DX11_LOGICOP_NOOP = 4, + SVGA3D_DX11_LOGICOP_INVERT = 5, + SVGA3D_DX11_LOGICOP_AND = 6, + SVGA3D_DX11_LOGICOP_NAND = 7, + SVGA3D_DX11_LOGICOP_OR = 8, + SVGA3D_DX11_LOGICOP_NOR = 9, + SVGA3D_DX11_LOGICOP_XOR = 10, + SVGA3D_DX11_LOGICOP_EQUIV = 11, + SVGA3D_DX11_LOGICOP_AND_REVERSE = 12, + SVGA3D_DX11_LOGICOP_AND_INVERTED = 13, + SVGA3D_DX11_LOGICOP_OR_REVERSE = 14, + SVGA3D_DX11_LOGICOP_OR_INVERTED = 15, + SVGA3D_DX11_LOGICOP_MAX +} SVGA3dDX11LogicOp; + +typedef enum { SVGA3D_FRONTWINDING_INVALID = 0, SVGA3D_FRONTWINDING_CW = 1, SVGA3D_FRONTWINDING_CCW = 2, @@ -952,10 +1109,10 @@ typedef enum { SVGA3D_TEX_FILTER_NEAREST = 1, SVGA3D_TEX_FILTER_LINEAR = 2, SVGA3D_TEX_FILTER_ANISOTROPIC = 3, - SVGA3D_TEX_FILTER_FLATCUBIC = 4, // Deprecated, not implemented - SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, // Deprecated, not implemented - SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, // Not currently implemented - SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, // Not currently implemented + SVGA3D_TEX_FILTER_FLATCUBIC = 4, /* Deprecated, not implemented */ + SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, /* Deprecated, not implemented */ + SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, /* Not currently implemented */ + SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, /* Not currently implemented */ SVGA3D_TEX_FILTER_MAX } SVGA3dTextureFilter; @@ -1013,19 +1170,19 @@ typedef enum { typedef enum { SVGA3D_DECLUSAGE_POSITION = 0, - SVGA3D_DECLUSAGE_BLENDWEIGHT, // 1 - SVGA3D_DECLUSAGE_BLENDINDICES, // 2 - SVGA3D_DECLUSAGE_NORMAL, // 3 - SVGA3D_DECLUSAGE_PSIZE, // 4 - SVGA3D_DECLUSAGE_TEXCOORD, // 5 - SVGA3D_DECLUSAGE_TANGENT, // 6 - SVGA3D_DECLUSAGE_BINORMAL, // 7 - SVGA3D_DECLUSAGE_TESSFACTOR, // 8 - SVGA3D_DECLUSAGE_POSITIONT, // 9 - SVGA3D_DECLUSAGE_COLOR, // 10 - SVGA3D_DECLUSAGE_FOG, // 11 - SVGA3D_DECLUSAGE_DEPTH, // 12 - SVGA3D_DECLUSAGE_SAMPLE, // 13 + SVGA3D_DECLUSAGE_BLENDWEIGHT, + SVGA3D_DECLUSAGE_BLENDINDICES, + SVGA3D_DECLUSAGE_NORMAL, + SVGA3D_DECLUSAGE_PSIZE, + SVGA3D_DECLUSAGE_TEXCOORD, + SVGA3D_DECLUSAGE_TANGENT, + SVGA3D_DECLUSAGE_BINORMAL, + SVGA3D_DECLUSAGE_TESSFACTOR, + SVGA3D_DECLUSAGE_POSITIONT, + SVGA3D_DECLUSAGE_COLOR, + SVGA3D_DECLUSAGE_FOG, + SVGA3D_DECLUSAGE_DEPTH, + SVGA3D_DECLUSAGE_SAMPLE, SVGA3D_DECLUSAGE_MAX } SVGA3dDeclUsage; @@ -1033,10 +1190,11 @@ typedef enum { SVGA3D_DECLMETHOD_DEFAULT = 0, SVGA3D_DECLMETHOD_PARTIALU, SVGA3D_DECLMETHOD_PARTIALV, - SVGA3D_DECLMETHOD_CROSSUV, // Normal + SVGA3D_DECLMETHOD_CROSSUV, /* Normal */ SVGA3D_DECLMETHOD_UV, - SVGA3D_DECLMETHOD_LOOKUP, // Lookup a displacement map - SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map + SVGA3D_DECLMETHOD_LOOKUP, /* Lookup a displacement map */ + SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement */ + /* map */ } SVGA3dDeclMethod; typedef enum { @@ -1162,17 +1320,23 @@ typedef enum { SVGA3D_SHADERTYPE_MIN = 1, SVGA3D_SHADERTYPE_VS = 1, SVGA3D_SHADERTYPE_PS = 2, - SVGA3D_SHADERTYPE_MAX = 3, SVGA3D_SHADERTYPE_PREDX_MAX = 3, SVGA3D_SHADERTYPE_GS = 3, - SVGA3D_SHADERTYPE_DX_MAX = 4, + SVGA3D_SHADERTYPE_DX10_MAX = 4, + SVGA3D_SHADERTYPE_HS = 4, + SVGA3D_SHADERTYPE_DS = 5, + SVGA3D_SHADERTYPE_CS = 6, + SVGA3D_SHADERTYPE_MAX = 7 } SVGA3dShaderType; #define SVGA3D_NUM_SHADERTYPE_PREDX \ (SVGA3D_SHADERTYPE_PREDX_MAX - SVGA3D_SHADERTYPE_MIN) -#define SVGA3D_NUM_SHADERTYPE_DX \ - (SVGA3D_SHADERTYPE_DX_MAX - SVGA3D_SHADERTYPE_MIN) +#define SVGA3D_NUM_SHADERTYPE_DX10 \ + (SVGA3D_SHADERTYPE_DX10_MAX - SVGA3D_SHADERTYPE_MIN) + +#define SVGA3D_NUM_SHADERTYPE \ + (SVGA3D_SHADERTYPE_MAX - SVGA3D_SHADERTYPE_MIN) typedef enum { SVGA3D_CONST_TYPE_MIN = 0, @@ -1196,33 +1360,151 @@ typedef enum { } SVGA3dStretchBltMode; typedef enum { - SVGA3D_QUERYTYPE_INVALID = ((uint32)-1), + SVGA3D_QUERYTYPE_INVALID = ((uint8)-1), SVGA3D_QUERYTYPE_MIN = 0, SVGA3D_QUERYTYPE_OCCLUSION = 0, - SVGA3D_QUERYTYPE_EVENT = 1, - SVGA3D_QUERYTYPE_TIMESTAMP = 2, - SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT = 3, - SVGA3D_QUERYTYPE_PIPELINESTATS = 4, - SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE = 5, - SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS = 6, - SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE = 7, - SVGA3D_QUERYTYPE_OCCLUSION64 = 8, + SVGA3D_QUERYTYPE_TIMESTAMP = 1, + SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT = 2, + SVGA3D_QUERYTYPE_PIPELINESTATS = 3, + SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE = 4, + SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS = 5, + SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE = 6, + SVGA3D_QUERYTYPE_OCCLUSION64 = 7, + SVGA3D_QUERYTYPE_DX10_MAX = 8, + SVGA3D_QUERYTYPE_SOSTATS_STREAM0 = 8, + SVGA3D_QUERYTYPE_SOSTATS_STREAM1 = 9, + SVGA3D_QUERYTYPE_SOSTATS_STREAM2 = 10, + SVGA3D_QUERYTYPE_SOSTATS_STREAM3 = 11, + SVGA3D_QUERYTYPE_SOP_STREAM0 = 12, + SVGA3D_QUERYTYPE_SOP_STREAM1 = 13, + SVGA3D_QUERYTYPE_SOP_STREAM2 = 14, + SVGA3D_QUERYTYPE_SOP_STREAM3 = 15, SVGA3D_QUERYTYPE_MAX } SVGA3dQueryType; +typedef uint8 SVGA3dQueryTypeUint8; + #define SVGA3D_NUM_QUERYTYPE (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN) /* * This is the maximum number of queries per context that can be active * simultaneously between a beginQuery and endQuery. */ -#define SVGA3D_MAX_QUERY_PER_CONTEXT 64 +#define SVGA3D_MAX_QUERY 64 + +/* + * Query result buffer formats + */ +typedef +#include "vmware_pack_begin.h" +struct { + uint32 samplesRendered; +} +#include "vmware_pack_end.h" +SVGADXOcclusionQueryResult; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 passed; +} +#include "vmware_pack_end.h" +SVGADXEventQueryResult; + +typedef +#include "vmware_pack_begin.h" +struct { + uint64 timestamp; +} +#include "vmware_pack_end.h" +SVGADXTimestampQueryResult; + +typedef +#include "vmware_pack_begin.h" +struct { + uint64 realFrequency; + uint32 disjoint; +} +#include "vmware_pack_end.h" +SVGADXTimestampDisjointQueryResult; + +typedef +#include "vmware_pack_begin.h" +struct { + uint64 inputAssemblyVertices; + uint64 inputAssemblyPrimitives; + uint64 vertexShaderInvocations; + uint64 geometryShaderInvocations; + uint64 geometryShaderPrimitives; + uint64 clipperInvocations; + uint64 clipperPrimitives; + uint64 pixelShaderInvocations; + uint64 hullShaderInvocations; + uint64 domainShaderInvocations; + uint64 computeShaderInvocations; +} +#include "vmware_pack_end.h" +SVGADXPipelineStatisticsQueryResult; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 anySamplesRendered; +} +#include "vmware_pack_end.h" +SVGADXOcclusionPredicateQueryResult; + +typedef +#include "vmware_pack_begin.h" +struct { + uint64 numPrimitivesWritten; + uint64 numPrimitivesRequired; +} +#include "vmware_pack_end.h" +SVGADXStreamOutStatisticsQueryResult; + +typedef +#include "vmware_pack_begin.h" +struct { + uint32 overflowed; +} +#include "vmware_pack_end.h" +SVGADXStreamOutPredicateQueryResult; + +typedef +#include "vmware_pack_begin.h" +struct { + uint64 samplesRendered; +} +#include "vmware_pack_end.h" +SVGADXOcclusion64QueryResult; + +/* + * SVGADXQueryResultUnion is not intended for use in the protocol, but is + * very helpful when working with queries generically. + */ +typedef +#include "vmware_pack_begin.h" +union SVGADXQueryResultUnion { + SVGADXOcclusionQueryResult occ; + SVGADXEventQueryResult event; + SVGADXTimestampQueryResult ts; + SVGADXTimestampDisjointQueryResult tsDisjoint; + SVGADXPipelineStatisticsQueryResult pipelineStats; + SVGADXOcclusionPredicateQueryResult occPred; + SVGADXStreamOutStatisticsQueryResult soStats; + SVGADXStreamOutPredicateQueryResult soPred; + SVGADXOcclusion64QueryResult occ64; +} +#include "vmware_pack_end.h" +SVGADXQueryResultUnion; + typedef enum { - SVGA3D_QUERYSTATE_PENDING = 0, /* Waiting on the host (set by guest) */ - SVGA3D_QUERYSTATE_SUCCEEDED = 1, /* Completed successfully (set by host) */ - SVGA3D_QUERYSTATE_FAILED = 2, /* Completed unsuccessfully (set by host) */ - SVGA3D_QUERYSTATE_NEW = 3, /* Never submitted (For guest use only) */ + SVGA3D_QUERYSTATE_PENDING = 0, /* Query is not finished yet */ + SVGA3D_QUERYSTATE_SUCCEEDED = 1, /* Completed successfully */ + SVGA3D_QUERYSTATE_FAILED = 2, /* Completed unsuccessfully */ + SVGA3D_QUERYSTATE_NEW = 3, /* Never submitted (guest only) */ } SVGA3dQueryState; typedef enum { @@ -1249,9 +1531,9 @@ typedef struct { union { struct { - uint16 function; // SVGA3dFogFunction - uint8 type; // SVGA3dFogType - uint8 base; // SVGA3dFogBase + uint16 function; /* SVGA3dFogFunction */ + uint8 type; /* SVGA3dFogType */ + uint8 base; /* SVGA3dFogBase */ }; uint32 uintValue; }; @@ -1287,8 +1569,47 @@ SVGA3dSize; /* * Guest-backed objects definitions. */ +typedef enum { + SVGA_OTABLE_MOB = 0, + SVGA_OTABLE_MIN = 0, + SVGA_OTABLE_SURFACE = 1, + SVGA_OTABLE_CONTEXT = 2, + SVGA_OTABLE_SHADER = 3, + SVGA_OTABLE_SCREENTARGET = 4, + + SVGA_OTABLE_DX9_MAX = 5, -typedef uint32 SVGAMobId; + SVGA_OTABLE_DXCONTEXT = 5, + SVGA_OTABLE_MAX = 6 +} SVGAOTableType; + +/* + * Deprecated. + */ +#define SVGA_OTABLE_COUNT 4 + +typedef enum { + SVGA_COTABLE_MIN = 0, + SVGA_COTABLE_RTVIEW = 0, + SVGA_COTABLE_DSVIEW = 1, + SVGA_COTABLE_SRVIEW = 2, + SVGA_COTABLE_ELEMENTLAYOUT = 3, + SVGA_COTABLE_BLENDSTATE = 4, + SVGA_COTABLE_DEPTHSTENCIL = 5, + SVGA_COTABLE_RASTERIZERSTATE = 6, + SVGA_COTABLE_SAMPLER = 7, + SVGA_COTABLE_STREAMOUTPUT = 8, + SVGA_COTABLE_DXQUERY = 9, + SVGA_COTABLE_DXSHADER = 10, + SVGA_COTABLE_DX10_MAX = 11, + SVGA_COTABLE_UAVIEW = 11, + SVGA_COTABLE_MAX +} SVGACOTableType; + +/* + * The largest size (number of entries) allowed in a COTable. + */ +#define SVGA_COTABLE_MAX_IDS (MAX_UINT16 - 2) typedef enum SVGAMobFormat { SVGA3D_MOBFMT_INVALID = SVGA3D_INVALID_ID, @@ -1300,7 +1621,11 @@ typedef enum SVGAMobFormat { SVGA3D_MOBFMT_PTDEPTH64_0 = 4, SVGA3D_MOBFMT_PTDEPTH64_1 = 5, SVGA3D_MOBFMT_PTDEPTH64_2 = 6, + SVGA3D_MOBFMT_PREDX_MAX = 7, + SVGA3D_MOBFMT_EMPTY = 7, SVGA3D_MOBFMT_MAX, } SVGAMobFormat; -#endif // _SVGA3D_TYPES_H_ +#define SVGA3D_MOB_EMPTY_BASE 1 + +#endif /* _SVGA3D_TYPES_H_ */ diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga_escape.h b/lib/mesa/src/gallium/drivers/svga/include/svga_escape.h index 9d44c4704..884b1d1fb 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga_escape.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga_escape.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 2007-2014 VMware, Inc. All rights reserved. + * Copyright 2007-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -75,7 +75,7 @@ */ #define SVGA_ESCAPE_VMWARE_HINT 0x00030000 -#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN 0x00030001 // Deprecated +#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN 0x00030001 /* Deprecated */ typedef struct { diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga_overlay.h b/lib/mesa/src/gallium/drivers/svga/include/svga_overlay.h index ccbf7912e..161c3de7b 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga_overlay.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga_overlay.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 2007-2014 VMware, Inc. All rights reserved. + * Copyright 2007-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -38,9 +38,9 @@ * Video formats we support */ -#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2' -#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2' -#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y' +#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */ +#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */ +#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */ typedef enum { SVGA_OVERLAY_FORMAT_INVALID = 0, @@ -68,7 +68,7 @@ struct SVGAEscapeVideoSetRegs { uint32 streamId; } header; - // May include zero or more items. + /* May include zero or more items. */ struct { uint32 registerId; uint32 value; @@ -134,12 +134,12 @@ struct { */ static inline Bool -VMwareVideoGetAttributes(const SVGAOverlayFormat format, // IN - uint32 *width, // IN / OUT - uint32 *height, // IN / OUT - uint32 *size, // OUT - uint32 *pitches, // OUT (optional) - uint32 *offsets) // OUT (optional) +VMwareVideoGetAttributes(const SVGAOverlayFormat format, /* IN */ + uint32 *width, /* IN / OUT */ + uint32 *height, /* IN / OUT */ + uint32 *size, /* OUT */ + uint32 *pitches, /* OUT (optional) */ + uint32 *offsets) /* OUT (optional) */ { int tmp; @@ -196,4 +196,4 @@ VMwareVideoGetAttributes(const SVGAOverlayFormat format, // IN return TRUE; } -#endif // _SVGA_OVERLAY_H_ +#endif /* _SVGA_OVERLAY_H_ */ diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga_reg.h b/lib/mesa/src/gallium/drivers/svga/include/svga_reg.h index e75b442f9..2661eef03 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga_reg.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga_reg.h @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 1998-2014 VMware, Inc. All rights reserved. + * Copyright 1998-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -43,6 +43,8 @@ typedef enum { SVGA_REG_ENABLE_HIDE = (1 << 1), } SvgaRegEnable; +typedef uint32 SVGAMobId; + /* * Arbitrary and meaningless limits. Please ignore these when writing * new drivers. @@ -490,7 +492,7 @@ typedef struct SVGAGMRImageFormat { struct { uint32 bitsPerPixel : 8; uint32 colorDepth : 8; - uint32 reserved : 16; // Must be zero + uint32 reserved : 16; /* Must be zero */ }; uint32 value; @@ -533,7 +535,7 @@ typedef struct SVGAColorBGRX { uint32 b : 8; uint32 g : 8; uint32 r : 8; - uint32 x : 8; // Unused + uint32 x : 8; /* Unused */ }; uint32 value; @@ -605,24 +607,35 @@ struct { * SVGA_CAP_COMMAND_BUFFERS -- * Enable register based command buffer submission. * + * SVGA_CAP_DEAD1 -- + * This cap was incorrectly used by old drivers and should not be + * reused. + * + * SVGA_CAP_CMD_BUFFERS_2 -- + * Enable support for the prepend command buffer submision + * registers. SVGA_REG_CMD_PREPEND_LOW and + * SVGA_REG_CMD_PREPEND_HIGH. + * * SVGA_CAP_GBOBJECTS -- * Enable guest-backed objects and surfaces. * + * SVGA_CAP_CMD_BUFFERS_3 -- + * Enable support for command buffers in a mob. */ #define SVGA_CAP_NONE 0x00000000 #define SVGA_CAP_RECT_COPY 0x00000002 #define SVGA_CAP_CURSOR 0x00000020 -#define SVGA_CAP_CURSOR_BYPASS 0x00000040 // Legacy (Use Cursor Bypass 3 instead) -#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080 // Legacy (Use Cursor Bypass 3 instead) +#define SVGA_CAP_CURSOR_BYPASS 0x00000040 +#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080 #define SVGA_CAP_8BIT_EMULATION 0x00000100 #define SVGA_CAP_ALPHA_CURSOR 0x00000200 #define SVGA_CAP_3D 0x00004000 #define SVGA_CAP_EXTENDED_FIFO 0x00008000 -#define SVGA_CAP_MULTIMON 0x00010000 // Legacy multi-monitor support +#define SVGA_CAP_MULTIMON 0x00010000 #define SVGA_CAP_PITCHLOCK 0x00020000 #define SVGA_CAP_IRQMASK 0x00040000 -#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000 // Legacy multi-monitor support +#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000 #define SVGA_CAP_GMR 0x00100000 #define SVGA_CAP_TRACES 0x00200000 #define SVGA_CAP_GMR2 0x00400000 @@ -631,6 +644,9 @@ struct { #define SVGA_CAP_DEAD1 0x02000000 #define SVGA_CAP_CMD_BUFFERS_2 0x04000000 #define SVGA_CAP_GBOBJECTS 0x08000000 +#define SVGA_CAP_CMD_BUFFERS_3 0x10000000 + +#define SVGA_CAP_CMD_RESERVED 0x80000000 /* @@ -698,7 +714,7 @@ enum { SVGA_FIFO_CAPABILITIES = 4, SVGA_FIFO_FLAGS, - // Valid with SVGA_FIFO_CAP_FENCE: + /* Valid with SVGA_FIFO_CAP_FENCE: */ SVGA_FIFO_FENCE, /* @@ -710,20 +726,20 @@ enum { * These in block 3a, the VMX currently considers mandatory for the * extended FIFO. */ - - // Valid if exists (i.e. if extended FIFO enabled): + + /* Valid if exists (i.e. if extended FIFO enabled): */ SVGA_FIFO_3D_HWVERSION, /* See SVGA3dHardwareVersion in svga3d_reg.h */ - // Valid with SVGA_FIFO_CAP_PITCHLOCK: + /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */ SVGA_FIFO_PITCHLOCK, - // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: + /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */ SVGA_FIFO_CURSOR_ON, /* Cursor bypass 3 show/hide register */ SVGA_FIFO_CURSOR_X, /* Cursor bypass 3 x register */ SVGA_FIFO_CURSOR_Y, /* Cursor bypass 3 y register */ SVGA_FIFO_CURSOR_COUNT, /* Incremented when any of the other 3 change */ SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */ - // Valid with SVGA_FIFO_CAP_RESERVE: + /* Valid with SVGA_FIFO_CAP_RESERVE: */ SVGA_FIFO_RESERVED, /* Bytes past NEXT_CMD with real contents */ /* @@ -789,7 +805,7 @@ enum { * sets SVGA_FIFO_MIN high enough to leave room for them. */ - // Valid if register exists: + /* Valid if register exists: */ SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */ SVGA_FIFO_FENCE_GOAL, /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */ SVGA_FIFO_BUSY, /* See "FIFO Synchronization Registers" */ @@ -1046,7 +1062,7 @@ enum { #define SVGA_FIFO_FLAG_NONE 0 #define SVGA_FIFO_FLAG_ACCELFRONT (1<<0) -#define SVGA_FIFO_FLAG_RESERVED (1<<31) // Internal use only +#define SVGA_FIFO_FLAG_RESERVED (1<<31) /* Internal use only */ /* * FIFO reservation sentinel value @@ -1079,22 +1095,23 @@ enum { SVGA_VIDEO_DATA_OFFSET, SVGA_VIDEO_FORMAT, SVGA_VIDEO_COLORKEY, - SVGA_VIDEO_SIZE, // Deprecated + SVGA_VIDEO_SIZE, /* Deprecated */ SVGA_VIDEO_WIDTH, SVGA_VIDEO_HEIGHT, SVGA_VIDEO_SRC_X, SVGA_VIDEO_SRC_Y, SVGA_VIDEO_SRC_WIDTH, SVGA_VIDEO_SRC_HEIGHT, - SVGA_VIDEO_DST_X, // Signed int32 - SVGA_VIDEO_DST_Y, // Signed int32 + SVGA_VIDEO_DST_X, /* Signed int32 */ + SVGA_VIDEO_DST_Y, /* Signed int32 */ SVGA_VIDEO_DST_WIDTH, SVGA_VIDEO_DST_HEIGHT, SVGA_VIDEO_PITCH_1, SVGA_VIDEO_PITCH_2, SVGA_VIDEO_PITCH_3, - SVGA_VIDEO_DATA_GMRID, // Optional, defaults to SVGA_GMR_FRAMEBUFFER - SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID) + SVGA_VIDEO_DATA_GMRID, /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */ + SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords */ + /* (SVGA_ID_INVALID) */ SVGA_VIDEO_NUM_REGS }; @@ -1180,10 +1197,10 @@ typedef struct SVGADisplayTopology { * value of zero means no cloning should happen. */ -#define SVGA_SCREEN_MUST_BE_SET (1 << 0) // Must be set or results undefined -#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET // Deprecated -#define SVGA_SCREEN_IS_PRIMARY (1 << 1) // Guest considers this screen to be 'primary' -#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) // Guest is running a fullscreen app here +#define SVGA_SCREEN_MUST_BE_SET (1 << 0) +#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */ +#define SVGA_SCREEN_IS_PRIMARY (1 << 1) +#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) /* * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2. When the screen is @@ -1207,7 +1224,7 @@ typedef struct SVGADisplayTopology { typedef struct { - uint32 structSize; // sizeof(SVGAScreenObject) + uint32 structSize; /* sizeof(SVGAScreenObject) */ uint32 id; uint32 flags; struct { @@ -1224,6 +1241,13 @@ struct { * with SVGA_FIFO_CAP_SCREEN_OBJECT. */ SVGAGuestImage backingStore; + + /* + * The cloneCount field is treated as a hint from the guest that + * the user wants this display to be cloned, cloneCount times. + * + * A value of zero means no cloning should happen. + */ uint32 cloneCount; } SVGAScreenObject; @@ -1238,7 +1262,7 @@ struct { * Note the holes in the command ID numbers: These commands have been * deprecated, and the old IDs must not be reused. * - * Command IDs from 1000 to 1999 are reserved for use by the SVGA3D + * Command IDs from 1000 to 2999 are reserved for use by the SVGA3D * protocol. * * Each command's parameters are described by the comments and @@ -1267,6 +1291,8 @@ typedef enum { SVGA_CMD_REMAP_GMR2 = 42, SVGA_CMD_DEAD = 43, SVGA_CMD_DEAD_2 = 44, + SVGA_CMD_NOP = 45, + SVGA_CMD_NOP_ERROR = 46, SVGA_CMD_MAX } SVGAFifoCmdId; @@ -1372,13 +1398,13 @@ struct { typedef struct { - uint32 id; // Reserved, must be zero. + uint32 id; /* Reserved, must be zero. */ uint32 hotspotX; uint32 hotspotY; uint32 width; uint32 height; - uint32 andMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL - uint32 xorMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL + uint32 andMaskDepth; /* Value must be 1 or equal to BITS_PER_PIXEL */ + uint32 xorMaskDepth; /* Value must be 1 or equal to BITS_PER_PIXEL */ /* * Followed by scanline data for AND mask, then XOR mask. * Each scanline is padded to a 32-bit boundary. @@ -1401,7 +1427,7 @@ struct { typedef struct { - uint32 id; // Reserved, must be zero. + uint32 id; /* Reserved, must be zero. */ uint32 hotspotX; uint32 hotspotY; uint32 width; @@ -1449,12 +1475,12 @@ struct { typedef struct { - uint32 color; // In the same format as the GFB + uint32 color; /* In the same format as the GFB */ uint32 x; uint32 y; uint32 width; uint32 height; - uint32 rop; // Must be SVGA_ROP_COPY + uint32 rop; /* Must be SVGA_ROP_COPY */ } SVGAFifoCmdFrontRopFill; @@ -1526,7 +1552,7 @@ struct { typedef struct { - SVGAScreenObject screen; // Variable-length according to version + SVGAScreenObject screen; /* Variable-length according to version */ } SVGAFifoCmdDefineScreen; @@ -1807,8 +1833,8 @@ typedef struct { uint32 gmrId; SVGARemapGMR2Flags flags; - uint32 offsetPages; // offset in pages to begin remap - uint32 numPages; // number of pages to remap + uint32 offsetPages; /* offset in pages to begin remap */ + uint32 numPages; /* number of pages to remap */ /* * Followed by additional data depending on SVGARemapGMR2Flags. * @@ -1823,7 +1849,7 @@ struct { /* * Size of SVGA device memory such as frame buffer and FIFO. */ -#define SVGA_VRAM_MIN_SIZE (4 * 640 * 480) // bytes +#define SVGA_VRAM_MIN_SIZE (4 * 640 * 480) /* bytes */ #define SVGA_VRAM_MIN_SIZE_3D (16 * 1024 * 1024) #define SVGA_VRAM_MAX_SIZE (128 * 1024 * 1024) #define SVGA_MEMORY_SIZE_MAX (1024 * 1024 * 1024) @@ -1832,7 +1858,7 @@ struct { #define SVGA_GRAPHICS_MEMORY_KB_MAX (2 * 1024 * 1024) #define SVGA_GRAPHICS_MEMORY_KB_DEFAULT (256 * 1024) -#define SVGA_VRAM_SIZE_W2K (64 * 1024 * 1024) // 64 MB +#define SVGA_VRAM_SIZE_W2K (64 * 1024 * 1024) /* 64 MB */ /* * To simplify autoDetect display configuration, support a minimum of @@ -1848,7 +1874,7 @@ struct { #define SVGA_VRAM_SIZE (4 * 1024 * 1024) #define SVGA_VRAM_SIZE_3D (64 * 1024 * 1024) #define SVGA_FIFO_SIZE (256 * 1024) -#define SVGA_FIFO_SIZE_3D (516 * 1024) // Bump to 516KB to workaround WDDM driver issue (see bug# 744318) +#define SVGA_FIFO_SIZE_3D (516 * 1024) #define SVGA_MEMORY_SIZE_DEFAULT (160 * 1024 * 1024) #define SVGA_AUTODETECT_DEFAULT FALSE #else diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd.c b/lib/mesa/src/gallium/drivers/svga/svga_cmd.c index b27183217..e45b3e72a 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_cmd.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd.c @@ -66,7 +66,7 @@ surface_to_surfaceid(struct svga_winsys_context *swc, // IN if (surface) { struct svga_surface *s = svga_surface(surface); swc->surface_relocation(swc, &id->sid, NULL, s->handle, flags); - id->face = s->real_face; /* faces have the same order */ + id->face = s->real_layer; /* faces have the same order */ id->mipmap = s->real_level; } else { @@ -337,7 +337,7 @@ SVGA3D_DefineSurface2D(struct svga_winsys_context *swc, // IN mipSizes[0].height = height; mipSizes[0].depth = 1; - swc->commit(swc);; + swc->commit(swc); return PIPE_OK; } @@ -372,7 +372,7 @@ SVGA3D_DestroySurface(struct svga_winsys_context *swc, swc->surface_relocation(swc, &cmd->sid, NULL, sid, SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL); - swc->commit(swc);; + swc->commit(swc); return PIPE_OK; } @@ -460,7 +460,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, swc->surface_relocation(swc, &cmd->host.sid, NULL, texture->handle, surface_flags); - cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */ + cmd->host.face = st->slice; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */ cmd->host.mipmap = st->base.level; cmd->transfer = transfer; @@ -473,6 +473,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, pSuffix->flags = flags; swc->commit(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; return PIPE_OK; } @@ -543,6 +544,7 @@ SVGA3D_BufferDMA(struct svga_winsys_context *swc, pSuffix->flags = flags; swc->commit(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; return PIPE_OK; } @@ -842,6 +844,8 @@ SVGA3D_SetShader(struct svga_winsys_context *swc, { SVGA3dCmdSetShader *cmd; + assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS); + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_SET_SHADER, sizeof *cmd, 0); @@ -1014,6 +1018,8 @@ SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc, *decls = declArray; *ranges = rangeArray; + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; + return PIPE_OK; } @@ -1382,10 +1388,10 @@ SVGA3D_BeginGBQuery(struct svga_winsys_context *swc, SVGA_3D_CMD_BEGIN_GB_QUERY, sizeof *cmd, 1); - if(!cmd) + if (!cmd) return PIPE_ERROR_OUT_OF_MEMORY; - swc->context_relocation(swc, &cmd->cid); + cmd->cid = swc->cid; cmd->type = type; swc->commit(swc); @@ -1462,10 +1468,10 @@ SVGA3D_EndGBQuery(struct svga_winsys_context *swc, SVGA_3D_CMD_END_GB_QUERY, sizeof *cmd, 2); - if(!cmd) + if (!cmd) return PIPE_ERROR_OUT_OF_MEMORY; - swc->context_relocation(swc, &cmd->cid); + cmd->cid = swc->cid; cmd->type = type; swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer, @@ -1549,10 +1555,10 @@ SVGA3D_WaitForGBQuery(struct svga_winsys_context *swc, SVGA_3D_CMD_WAIT_FOR_GB_QUERY, sizeof *cmd, 2); - if(!cmd) + if (!cmd) return PIPE_ERROR_OUT_OF_MEMORY; - swc->context_relocation(swc, &cmd->cid); + cmd->cid = swc->cid; cmd->type = type; swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer, @@ -1615,36 +1621,6 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc, enum pipe_error -SVGA3D_DefineGBShader(struct svga_winsys_context *swc, - struct svga_winsys_gb_shader *gbshader, - SVGA3dShaderType type, - uint32 sizeInBytes) -{ - SVGA3dCmdDefineGBShader *cmd; - - assert(sizeInBytes % 4 == 0); - assert(type == SVGA3D_SHADERTYPE_VS || - type == SVGA3D_SHADERTYPE_PS); - - cmd = SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_DEFINE_GB_SHADER, - sizeof *cmd, - 1); /* one relocation */ - - if (!cmd) - return PIPE_ERROR_OUT_OF_MEMORY; - - swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader); - cmd->type = type; - cmd->sizeInBytes = sizeInBytes; - - swc->commit(swc); - - return PIPE_OK; -} - - -enum pipe_error SVGA3D_BindGBShader(struct svga_winsys_context *swc, struct svga_winsys_gb_shader *gbshader) { @@ -1658,7 +1634,7 @@ SVGA3D_BindGBShader(struct svga_winsys_context *swc, return PIPE_ERROR_OUT_OF_MEMORY; swc->shader_relocation(swc, &cmd->shid, &cmd->mobid, - &cmd->offsetInBytes, gbshader); + &cmd->offsetInBytes, gbshader, 0); swc->commit(swc); @@ -1672,6 +1648,8 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc, struct svga_winsys_gb_shader *gbshader) { SVGA3dCmdSetShader *cmd; + + assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS); cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_SET_SHADER, @@ -1680,10 +1658,10 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc, if (!cmd) return PIPE_ERROR_OUT_OF_MEMORY; - swc->context_relocation(swc, &cmd->cid); + cmd->cid = swc->cid; cmd->type = type; if (gbshader) - swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader); + swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0); else cmd->shid = SVGA_ID_INVALID; swc->commit(swc); @@ -1692,27 +1670,6 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc, } -enum pipe_error -SVGA3D_DestroyGBShader(struct svga_winsys_context *swc, - struct svga_winsys_gb_shader *gbshader) -{ - SVGA3dCmdDestroyGBShader *cmd = - SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_DESTROY_GB_SHADER, - sizeof *cmd, - 1); /* one relocation */ - - if (!cmd) - return PIPE_ERROR_OUT_OF_MEMORY; - - swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader); - - swc->commit(swc); - - return PIPE_OK; -} - - /** * \param flags mask of SVGA_RELOC_READ / _WRITE */ @@ -1738,89 +1695,6 @@ SVGA3D_BindGBSurface(struct svga_winsys_context *swc, } -enum pipe_error -SVGA3D_DefineGBContext(struct svga_winsys_context *swc) -{ - SVGA3dCmdDefineGBContext *cmd = - SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_DEFINE_GB_CONTEXT, - sizeof *cmd, - 1); /* one relocation */ - - if (!cmd) - return PIPE_ERROR_OUT_OF_MEMORY; - - swc->context_relocation(swc, &cmd->cid); - - swc->commit(swc); - - return PIPE_OK; -} - - -enum pipe_error -SVGA3D_DestroyGBContext(struct svga_winsys_context *swc) -{ - SVGA3dCmdDestroyGBContext *cmd = - SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_DESTROY_GB_CONTEXT, - sizeof *cmd, - 1); /* one relocation */ - - if (!cmd) - return PIPE_ERROR_OUT_OF_MEMORY; - - swc->context_relocation(swc, &cmd->cid); - - swc->commit(swc); - - return PIPE_OK; -} - - -enum pipe_error -SVGA3D_BindGBContext(struct svga_winsys_context *swc) -{ - SVGA3dCmdBindGBContext *cmd = - SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_BIND_GB_CONTEXT, - sizeof *cmd, - 2); /* two relocations */ - - if (!cmd) - return PIPE_ERROR_OUT_OF_MEMORY; - - swc->context_relocation(swc, &cmd->cid); - swc->context_relocation(swc, &cmd->mobid); - cmd->validContents = 0; /* XXX pass as a parameter? */ - - swc->commit(swc); - - return PIPE_OK; -} - - -enum pipe_error -SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc) -{ - SVGA3dCmdInvalidateGBContext *cmd = - SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_INVALIDATE_GB_CONTEXT, - sizeof *cmd, - 1); /* one relocation */ - - if (!cmd) - return PIPE_ERROR_OUT_OF_MEMORY; - - swc->context_relocation(swc, &cmd->cid); - - swc->commit(swc); - - return PIPE_OK; -} - - - /** * Update an image in a guest-backed surface. * (Inform the device that the guest-contents have been updated.) @@ -1848,6 +1722,7 @@ SVGA3D_UpdateGBImage(struct svga_winsys_context *swc, cmd->box = *box; swc->commit(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; return PIPE_OK; } @@ -1874,6 +1749,7 @@ SVGA3D_UpdateGBSurface(struct svga_winsys_context *swc, SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL); swc->commit(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; return PIPE_OK; } @@ -1903,6 +1779,7 @@ SVGA3D_ReadbackGBImage(struct svga_winsys_context *swc, cmd->image.mipmap = mipLevel; swc->commit(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; return PIPE_OK; } @@ -1929,6 +1806,7 @@ SVGA3D_ReadbackGBSurface(struct svga_winsys_context *swc, SVGA_RELOC_READ | SVGA_RELOC_INTERNAL); swc->commit(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; return PIPE_OK; } @@ -1957,6 +1835,7 @@ SVGA3D_ReadbackGBImagePartial(struct svga_winsys_context *swc, cmd->invertBox = invertBox; swc->commit(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; return PIPE_OK; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd.h b/lib/mesa/src/gallium/drivers/svga/svga_cmd.h index 6f658bf3a..26e4690e6 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_cmd.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd.h @@ -47,6 +47,7 @@ struct svga_winsys_context; struct svga_winsys_buffer; struct svga_winsys_surface; struct svga_winsys_gb_shader; +struct svga_winsys_gb_query; /* @@ -229,12 +230,6 @@ SVGA3D_SetShader(struct svga_winsys_context *swc, */ enum pipe_error -SVGA3D_DefineGBShader(struct svga_winsys_context *swc, - struct svga_winsys_gb_shader *gbshader, - SVGA3dShaderType type, - uint32 sizeInBytes); - -enum pipe_error SVGA3D_BindGBShader(struct svga_winsys_context *swc, struct svga_winsys_gb_shader *gbshader); @@ -244,26 +239,10 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc, struct svga_winsys_gb_shader *gbshader); enum pipe_error -SVGA3D_DestroyGBShader(struct svga_winsys_context *swc, - struct svga_winsys_gb_shader *gbshader); - -enum pipe_error SVGA3D_BindGBSurface(struct svga_winsys_context *swc, struct svga_winsys_surface *surface); enum pipe_error -SVGA3D_DefineGBContext(struct svga_winsys_context *swc); - -enum pipe_error -SVGA3D_DestroyGBContext(struct svga_winsys_context *swc); - -enum pipe_error -SVGA3D_BindGBContext(struct svga_winsys_context *swc); - -enum pipe_error -SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc); - -enum pipe_error SVGA3D_UpdateGBImage(struct svga_winsys_context *swc, struct svga_winsys_surface *surface, const SVGA3dBox *box, @@ -327,4 +306,340 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc, SVGA3dQueryType type, struct svga_winsys_buffer *buffer); + + +/* + * VGPU10 commands + */ + +enum pipe_error +SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc, + struct svga_winsys_surface *dstSurf, + uint32 dstSubResource, + struct svga_winsys_surface *srcSurf, + uint32 srcSubResource, + const SVGA3dCopyBox *box); + +enum pipe_error +SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc, + struct svga_winsys_surface *dstSurf, + struct svga_winsys_surface *srcSurf); + +enum pipe_error +SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc, + unsigned count, const SVGA3dViewport *viewports); + +enum pipe_error +SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc, + SVGA3dShaderType type, + struct svga_winsys_gb_shader *gbshader, + SVGA3dShaderId shaderId); + +enum pipe_error +SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc, + SVGA3dShaderType type, + uint32 startView, + unsigned count, + const SVGA3dShaderResourceViewId ids[], + struct svga_winsys_surface **views); + +enum pipe_error +SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc, + unsigned count, + uint32 startSampler, + SVGA3dShaderType type, + const SVGA3dSamplerId *samplerIds); + +enum pipe_error +SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc, + unsigned color_count, + struct pipe_surface **color_surfs, + struct pipe_surface *depth_stencil_surf); + +enum pipe_error +SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc, + SVGA3dBlendStateId blendId, + const float *blendFactor, uint32 sampleMask); + +enum pipe_error +SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc, + SVGA3dDepthStencilStateId depthStencilId, + uint32 stencilRef); + +enum pipe_error +SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc, + SVGA3dRasterizerStateId rasterizerId); + +enum pipe_error +SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc, + SVGA3dQueryId queryId, + uint32 predicateValue); + +enum pipe_error +SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc, + unsigned count, const SVGA3dSoTarget *targets, + struct svga_winsys_surface **surfaces); + +enum pipe_error +SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc, + unsigned count, + const SVGASignedRect *rects); + +enum pipe_error +SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc, + SVGA3dStreamOutputId soid); + +enum pipe_error +SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc, + uint32 vertexCount, uint32 startVertexLocation); + +enum pipe_error +SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc, + uint32 indexCount, uint32 startIndexLocation, + int32 baseVertexLocation); + +enum pipe_error +SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc, + uint32 vertexCountPerInstance, + uint32 instanceCount, + uint32 startVertexLocation, + uint32 startInstanceLocation); + +enum pipe_error +SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc, + uint32 indexCountPerInstance, + uint32 instanceCount, + uint32 startIndexLocation, + int32 baseVertexLocation, + uint32 startInstanceLocation); + +enum pipe_error +SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc); + +enum pipe_error +SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc, + SVGA3dQueryId queryId, + SVGA3dQueryType type, + SVGA3dDXQueryFlags flags); + +enum pipe_error +SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc, + SVGA3dQueryId queryId); + +enum pipe_error +SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc, + struct svga_winsys_gb_query *gbQuery, + SVGA3dQueryId queryId); + +enum pipe_error +SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc, + SVGA3dQueryId queryId, + uint32 mobOffset); + +enum pipe_error +SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc, + SVGA3dQueryId queryId); + +enum pipe_error +SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc, + SVGA3dQueryId queryId); + +enum pipe_error +SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc, + struct pipe_surface *color_surf, + const float *rgba); + +enum pipe_error +SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc, + struct pipe_surface *ds_surf, + uint16 flags, uint16 stencil, float depth); + +enum pipe_error +SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc, + SVGA3dShaderResourceViewId shaderResourceViewId, + struct svga_winsys_surface *surf, + SVGA3dSurfaceFormat format, + SVGA3dResourceType resourceDimension, + const SVGA3dShaderResourceViewDesc *desc); + +enum pipe_error +SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc, + SVGA3dShaderResourceViewId shaderResourceViewId); + +enum pipe_error +SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc, + SVGA3dRenderTargetViewId renderTargetViewId, + struct svga_winsys_surface *surface, + SVGA3dSurfaceFormat format, + SVGA3dResourceType resourceDimension, + const SVGA3dRenderTargetViewDesc *desc); + +enum pipe_error +SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc, + SVGA3dRenderTargetViewId renderTargetViewId); + +enum pipe_error +SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc, + SVGA3dDepthStencilViewId depthStencilViewId, + struct svga_winsys_surface *surface, + SVGA3dSurfaceFormat format, + SVGA3dResourceType resourceDimension, + const SVGA3dRenderTargetViewDesc *desc); + + +enum pipe_error +SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc, + SVGA3dDepthStencilViewId depthStencilViewId); + +enum pipe_error +SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc, + unsigned count, + SVGA3dElementLayoutId elementLayoutId, + const SVGA3dInputElementDesc *elements); + +enum pipe_error +SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc, + SVGA3dElementLayoutId elementLayoutId); + +enum pipe_error +SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc, + SVGA3dBlendStateId blendId, + uint8 alphaToCoverageEnable, + uint8 independentBlendEnable, + const SVGA3dDXBlendStatePerRT *perRT); + +enum pipe_error +SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc, + SVGA3dBlendStateId blendId); + +enum pipe_error +SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc, + SVGA3dDepthStencilStateId depthStencilId, + uint8 depthEnable, + SVGA3dDepthWriteMask depthWriteMask, + SVGA3dComparisonFunc depthFunc, + uint8 stencilEnable, + uint8 frontEnable, + uint8 backEnable, + uint8 stencilReadMask, + uint8 stencilWriteMask, + uint8 frontStencilFailOp, + uint8 frontStencilDepthFailOp, + uint8 frontStencilPassOp, + SVGA3dComparisonFunc frontStencilFunc, + uint8 backStencilFailOp, + uint8 backStencilDepthFailOp, + uint8 backStencilPassOp, + SVGA3dComparisonFunc backStencilFunc); + +enum pipe_error +SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc, + SVGA3dDepthStencilStateId depthStencilId); + +enum pipe_error +SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc, + SVGA3dRasterizerStateId rasterizerId, + uint8 fillMode, + SVGA3dCullMode cullMode, + uint8 frontCounterClockwise, + int32 depthBias, + float depthBiasClamp, + float slopeScaledDepthBias, + uint8 depthClipEnable, + uint8 scissorEnable, + uint8 multisampleEnable, + uint8 antialiasedLineEnable, + float lineWidth, + uint8 lineStippleEnable, + uint8 lineStippleFactor, + uint16 lineStipplePattern, + uint8 provokingVertexLast); + +enum pipe_error +SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc, + SVGA3dRasterizerStateId rasterizerId); + +enum pipe_error +SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc, + SVGA3dSamplerId samplerId, + SVGA3dFilter filter, + uint8 addressU, + uint8 addressV, + uint8 addressW, + float mipLODBias, + uint8 maxAnisotropy, + uint8 comparisonFunc, + SVGA3dRGBAFloat borderColor, + float minLOD, + float maxLOD); + +enum pipe_error +SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc, + SVGA3dSamplerId samplerId); + +enum pipe_error +SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc, + SVGA3dShaderId shaderId); + +enum pipe_error +SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc, + struct svga_winsys_gb_shader *gbshader, + SVGA3dShaderId shaderId, + SVGA3dShaderType type, + uint32 sizeInBytes); + +enum pipe_error +SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc, + SVGA3dStreamOutputId soid, + uint32 numOutputStreamEntries, + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS], + const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS]); + +enum pipe_error +SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc, + SVGA3dStreamOutputId soid); + +enum pipe_error +SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc, + struct svga_winsys_surface *surface, + unsigned subResource); + +enum pipe_error +SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc, + SVGA3dElementLayoutId elementLayoutId); + +enum pipe_error +SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc, + unsigned count, + uint32 startBuffer, + const SVGA3dVertexBuffer *bufferInfo, + struct svga_winsys_surface **surfaces); + +enum pipe_error +SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc, + SVGA3dPrimitiveType topology); + +enum pipe_error +SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc, + struct svga_winsys_surface *indexes, + SVGA3dSurfaceFormat format, uint32 offset); + +enum pipe_error +SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc, + unsigned slot, + SVGA3dShaderType type, + struct svga_winsys_surface *surface, + uint32 offsetInBytes, + uint32 sizeInBytes); + +enum pipe_error +SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc, + struct svga_winsys_surface *surface, + const SVGA3dBox *box, + unsigned subResource); + +enum pipe_error +SVGA3D_vgpu10_GenMips(struct svga_winsys_context *swc, + const SVGA3dShaderResourceViewId shaderResourceViewId, + struct svga_winsys_surface *view); #endif /* __SVGA3D_H__ */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/lib/mesa/src/gallium/drivers/svga/svga_cmd_vgpu10.c new file mode 100644 index 000000000..99c9add17 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd_vgpu10.c @@ -0,0 +1,1316 @@ +/********************************************************** + * Copyright 2008-2013 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file svga_cmd_vgpu10.c + * + * Command construction utility for the vgpu10 SVGA3D protocol. + * + * \author Mingcheng Chen + * \author Brian Paul + */ + + +#include "svga_winsys.h" +#include "svga_resource_buffer.h" +#include "svga_resource_texture.h" +#include "svga_surface.h" +#include "svga_cmd.h" + + +/** + * Emit a surface relocation for RenderTargetViewId + */ +static void +view_relocation(struct svga_winsys_context *swc, // IN + struct pipe_surface *surface, // IN + SVGA3dRenderTargetViewId *id, // OUT + unsigned flags) +{ + if (surface) { + struct svga_surface *s = svga_surface(surface); + assert(s->handle); + swc->surface_relocation(swc, id, NULL, s->handle, flags); + } + else { + swc->surface_relocation(swc, id, NULL, NULL, flags); + } +} + + +/** + * Emit a surface relocation for a ResourceId. + */ +static void +surface_to_resourceid(struct svga_winsys_context *swc, // IN + struct svga_winsys_surface *surface, // IN + SVGA3dSurfaceId *sid, // OUT + unsigned flags) // IN +{ + if (surface) { + swc->surface_relocation(swc, sid, NULL, surface, flags); + } + else { + swc->surface_relocation(swc, sid, NULL, NULL, flags); + } +} + + +#define SVGA3D_CREATE_COMMAND(CommandName, CommandCode) \ +SVGA3dCmdDX##CommandName *cmd; \ +{ \ + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \ + sizeof(SVGA3dCmdDX##CommandName), 0); \ + if (!cmd) \ + return PIPE_ERROR_OUT_OF_MEMORY; \ +} + +#define SVGA3D_CREATE_CMD_COUNT(CommandName, CommandCode, ElementClassName) \ +SVGA3dCmdDX##CommandName *cmd; \ +{ \ + assert(count > 0); \ + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \ + sizeof(SVGA3dCmdDX##CommandName) + \ + count * sizeof(ElementClassName), 0); \ + if (!cmd) \ + return PIPE_ERROR_OUT_OF_MEMORY; \ +} + +#define SVGA3D_COPY_BASIC(VariableName) \ +{ \ + cmd->VariableName = VariableName; \ +} + +#define SVGA3D_COPY_BASIC_2(VariableName1, VariableName2) \ +{ \ + SVGA3D_COPY_BASIC(VariableName1); \ + SVGA3D_COPY_BASIC(VariableName2); \ +} + +#define SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3) \ +{ \ + SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \ + SVGA3D_COPY_BASIC(VariableName3); \ +} + +#define SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \ + VariableName4) \ +{ \ + SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \ + SVGA3D_COPY_BASIC_2(VariableName3, VariableName4); \ +} + +#define SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \ + VariableName4, VariableName5) \ +{\ + SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \ + SVGA3D_COPY_BASIC_2(VariableName4, VariableName5); \ +} + +#define SVGA3D_COPY_BASIC_6(VariableName1, VariableName2, VariableName3, \ + VariableName4, VariableName5, VariableName6) \ +{\ + SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \ + SVGA3D_COPY_BASIC_3(VariableName4, VariableName5, VariableName6); \ +} + +#define SVGA3D_COPY_BASIC_7(VariableName1, VariableName2, VariableName3, \ + VariableName4, VariableName5, VariableName6, \ + VariableName7) \ +{\ + SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \ + VariableName4); \ + SVGA3D_COPY_BASIC_3(VariableName5, VariableName6, VariableName7); \ +} + +#define SVGA3D_COPY_BASIC_8(VariableName1, VariableName2, VariableName3, \ + VariableName4, VariableName5, VariableName6, \ + VariableName7, VariableName8) \ +{\ + SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \ + VariableName4); \ + SVGA3D_COPY_BASIC_4(VariableName5, VariableName6, VariableName7, \ + VariableName8); \ +} + +#define SVGA3D_COPY_BASIC_9(VariableName1, VariableName2, VariableName3, \ + VariableName4, VariableName5, VariableName6, \ + VariableName7, VariableName8, VariableName9) \ +{\ + SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \ + VariableName4, VariableName5); \ + SVGA3D_COPY_BASIC_4(VariableName6, VariableName7, VariableName8, \ + VariableName9); \ +} + + +enum pipe_error +SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc, + struct svga_winsys_surface *dstSurf, + uint32 dstSubResource, + struct svga_winsys_surface *srcSurf, + uint32 srcSubResource, + const SVGA3dCopyBox *box) +{ + SVGA3dCmdDXPredCopyRegion *cmd = + SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_PRED_COPY_REGION, + sizeof(SVGA3dCmdDXPredCopyRegion), + 2); /* two relocations */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE); + swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ); + cmd->dstSubResource = dstSubResource; + cmd->srcSubResource = srcSubResource; + cmd->box = *box; + + swc->commit(swc); + + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc, + struct svga_winsys_surface *dstSurf, + struct svga_winsys_surface *srcSurf) +{ + SVGA3dCmdDXPredCopy *cmd = + SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_PRED_COPY, + sizeof(SVGA3dCmdDXPredCopy), + 2); /* two relocations */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE); + swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ); + + swc->commit(swc); + + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc, + unsigned count, + const SVGA3dViewport *viewports) +{ + SVGA3D_CREATE_CMD_COUNT(SetViewports, SET_VIEWPORTS, SVGA3dViewport); + + memcpy(cmd + 1, viewports, count * sizeof(SVGA3dViewport)); + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc, + SVGA3dShaderType type, + struct svga_winsys_gb_shader *gbshader, + SVGA3dShaderId shaderId) +{ + SVGA3dCmdDXSetShader *cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_SET_SHADER, + sizeof *cmd, + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->shader_relocation(swc, &cmd->shaderId, NULL, NULL, gbshader, 0); + + cmd->type = type; + cmd->shaderId = shaderId; + swc->commit(swc); + + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc, + SVGA3dShaderType type, + uint32 startView, + unsigned count, + const SVGA3dShaderResourceViewId ids[], + struct svga_winsys_surface **views) +{ + SVGA3dCmdDXSetShaderResources *cmd; + SVGA3dShaderResourceViewId *cmd_ids; + unsigned i; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_SET_SHADER_RESOURCES, + sizeof(SVGA3dCmdDXSetShaderResources) + + count * sizeof(SVGA3dShaderResourceViewId), + count); /* 'count' relocations */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + + cmd->type = type; + cmd->startView = startView; + + cmd_ids = (SVGA3dShaderResourceViewId *) (cmd + 1); + for (i = 0; i < count; i++) { + swc->surface_relocation(swc, cmd_ids + i, NULL, views[i], + SVGA_RELOC_READ); + cmd_ids[i] = ids[i]; + } + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc, + unsigned count, + uint32 startSampler, + SVGA3dShaderType type, + const SVGA3dSamplerId *samplerIds) +{ + SVGA3D_CREATE_CMD_COUNT(SetSamplers, SET_SAMPLERS, SVGA3dSamplerId); + + SVGA3D_COPY_BASIC_2(startSampler, type); + memcpy(cmd + 1, samplerIds, count * sizeof(SVGA3dSamplerId)); + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc, + struct pipe_surface *color_surf, + const float *rgba) +{ + SVGA3dCmdDXClearRenderTargetView *cmd; + struct svga_surface *ss = svga_surface(color_surf); + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW, + sizeof(SVGA3dCmdDXClearRenderTargetView), + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + + /* NOTE: The following is pretty tricky. We need to emit a view/surface + * relocation and we have to provide a pointer to an ID which lies in + * the bounds of the command space which we just allocated. However, + * we then need to overwrite it with the original RenderTargetViewId. + */ + view_relocation(swc, color_surf, &cmd->renderTargetViewId, + SVGA_RELOC_WRITE); + cmd->renderTargetViewId = ss->view_id; + + COPY_4V(cmd->rgba.value, rgba); + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc, + unsigned color_count, + struct pipe_surface **color_surfs, + struct pipe_surface *depth_stencil_surf) +{ + const unsigned surf_count = color_count + 1; + SVGA3dCmdDXSetRenderTargets *cmd; + SVGA3dRenderTargetViewId *ctarget; + struct svga_surface *ss; + unsigned i; + + assert(surf_count > 0); + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_SET_RENDERTARGETS, + sizeof(SVGA3dCmdDXSetRenderTargets) + + color_count * sizeof(SVGA3dRenderTargetViewId), + surf_count); /* 'surf_count' relocations */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* NOTE: See earlier comment about the tricky handling of the ViewIds. + */ + + /* Depth / Stencil buffer */ + if (depth_stencil_surf) { + ss = svga_surface(depth_stencil_surf); + view_relocation(swc, depth_stencil_surf, &cmd->depthStencilViewId, + SVGA_RELOC_WRITE); + cmd->depthStencilViewId = ss->view_id; + } + else { + /* no depth/stencil buffer - still need a relocation */ + view_relocation(swc, NULL, &cmd->depthStencilViewId, + SVGA_RELOC_WRITE); + cmd->depthStencilViewId = SVGA3D_INVALID_ID; + } + + /* Color buffers */ + ctarget = (SVGA3dRenderTargetViewId *) &cmd[1]; + for (i = 0; i < color_count; i++) { + if (color_surfs[i]) { + ss = svga_surface(color_surfs[i]); + view_relocation(swc, color_surfs[i], ctarget + i, SVGA_RELOC_WRITE); + ctarget[i] = ss->view_id; + } + else { + view_relocation(swc, NULL, ctarget + i, SVGA_RELOC_WRITE); + ctarget[i] = SVGA3D_INVALID_ID; + } + } + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc, + SVGA3dBlendStateId blendId, + const float *blendFactor, + uint32 sampleMask) +{ + SVGA3D_CREATE_COMMAND(SetBlendState, SET_BLEND_STATE); + + SVGA3D_COPY_BASIC_2(blendId, sampleMask); + memcpy(cmd->blendFactor, blendFactor, sizeof(float) * 4); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc, + SVGA3dDepthStencilStateId depthStencilId, + uint32 stencilRef) +{ + SVGA3D_CREATE_COMMAND(SetDepthStencilState, SET_DEPTHSTENCIL_STATE); + + SVGA3D_COPY_BASIC_2(depthStencilId, stencilRef); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc, + SVGA3dRasterizerStateId rasterizerId) +{ + SVGA3D_CREATE_COMMAND(SetRasterizerState, SET_RASTERIZER_STATE); + + cmd->rasterizerId = rasterizerId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc, + SVGA3dQueryId queryId, + uint32 predicateValue) +{ + SVGA3dCmdDXSetPredication *cmd; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_PREDICATION, + sizeof *cmd, 0); + + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->queryId = queryId; + cmd->predicateValue = predicateValue; + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc, + unsigned count, + const SVGA3dSoTarget *targets, + struct svga_winsys_surface **surfaces) +{ + SVGA3dCmdDXSetSOTargets *cmd; + SVGA3dSoTarget *sot; + unsigned i; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_SET_SOTARGETS, + sizeof(SVGA3dCmdDXSetSOTargets) + + count * sizeof(SVGA3dSoTarget), + count); + + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->pad0 = 0; + sot = (SVGA3dSoTarget *)(cmd + 1); + for (i = 0; i < count; i++, sot++) { + if (surfaces[i]) { + sot->offset = targets[i].offset; + sot->sizeInBytes = targets[i].sizeInBytes; + swc->surface_relocation(swc, &sot->sid, NULL, surfaces[i], + SVGA_RELOC_WRITE); + } + else { + sot->offset = 0; + sot->sizeInBytes = ~0u; + swc->surface_relocation(swc, &sot->sid, NULL, NULL, + SVGA_RELOC_WRITE); + } + } + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc, + unsigned count, + const SVGASignedRect *rects) +{ + SVGA3dCmdDXSetScissorRects *cmd; + + assert(count > 0); + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SCISSORRECTS, + sizeof(SVGA3dCmdDXSetScissorRects) + + count * sizeof(SVGASignedRect), + 0); + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(cmd + 1, rects, count * sizeof(SVGASignedRect)); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc, + SVGA3dStreamOutputId soid) +{ + SVGA3D_CREATE_COMMAND(SetStreamOutput, SET_STREAMOUTPUT); + + cmd->soid = soid; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc, + uint32 vertexCount, + uint32 startVertexLocation) +{ + SVGA3D_CREATE_COMMAND(Draw, DRAW); + + SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation); + + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc, + uint32 indexCount, + uint32 startIndexLocation, + int32 baseVertexLocation) +{ + SVGA3D_CREATE_COMMAND(DrawIndexed, DRAW_INDEXED); + + SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation, + baseVertexLocation); + + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc, + uint32 vertexCountPerInstance, + uint32 instanceCount, + uint32 startVertexLocation, + uint32 startInstanceLocation) +{ + SVGA3D_CREATE_COMMAND(DrawInstanced, DRAW_INSTANCED); + + SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount, + startVertexLocation, startInstanceLocation); + + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc, + uint32 indexCountPerInstance, + uint32 instanceCount, + uint32 startIndexLocation, + int32 baseVertexLocation, + uint32 startInstanceLocation) +{ + SVGA3D_CREATE_COMMAND(DrawIndexedInstanced, DRAW_INDEXED_INSTANCED); + + SVGA3D_COPY_BASIC_5(indexCountPerInstance, instanceCount, + startIndexLocation, baseVertexLocation, + startInstanceLocation); + + + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc) +{ + SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO); + + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc, + SVGA3dQueryId queryId, + SVGA3dQueryType type, + SVGA3dDXQueryFlags flags) +{ + SVGA3D_CREATE_COMMAND(DefineQuery, DEFINE_QUERY); + + SVGA3D_COPY_BASIC_3(queryId, type, flags); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc, + SVGA3dQueryId queryId) +{ + SVGA3D_CREATE_COMMAND(DestroyQuery, DESTROY_QUERY); + + cmd->queryId = queryId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc, + struct svga_winsys_gb_query *gbQuery, + SVGA3dQueryId queryId) +{ + SVGA3dCmdDXBindQuery *cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_BIND_QUERY, + sizeof *cmd, + 1); + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->queryId = queryId; + swc->query_relocation(swc, &cmd->mobid, gbQuery); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc, + SVGA3dQueryId queryId, + uint32 mobOffset) +{ + SVGA3D_CREATE_COMMAND(SetQueryOffset, SET_QUERY_OFFSET); + SVGA3D_COPY_BASIC_2(queryId, mobOffset); + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc, + SVGA3dQueryId queryId) +{ + SVGA3D_CREATE_COMMAND(BeginQuery, BEGIN_QUERY); + cmd->queryId = queryId; + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc, + SVGA3dQueryId queryId) +{ + SVGA3D_CREATE_COMMAND(EndQuery, END_QUERY); + cmd->queryId = queryId; + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc, + struct pipe_surface *ds_surf, + uint16 flags, + uint16 stencil, + float depth) +{ + SVGA3dCmdDXClearDepthStencilView *cmd; + struct svga_surface *ss = svga_surface(ds_surf); + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW, + sizeof(SVGA3dCmdDXClearDepthStencilView), + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* NOTE: The following is pretty tricky. We need to emit a view/surface + * relocation and we have to provide a pointer to an ID which lies in + * the bounds of the command space which we just allocated. However, + * we then need to overwrite it with the original DepthStencilViewId. + */ + view_relocation(swc, ds_surf, &cmd->depthStencilViewId, + SVGA_RELOC_WRITE); + cmd->depthStencilViewId = ss->view_id; + cmd->flags = flags; + cmd->stencil = stencil; + cmd->depth = depth; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc, + SVGA3dShaderResourceViewId shaderResourceViewId, + struct svga_winsys_surface *surface, + SVGA3dSurfaceFormat format, + SVGA3dResourceType resourceDimension, + const SVGA3dShaderResourceViewDesc *desc) +{ + SVGA3dCmdDXDefineShaderResourceView *cmd; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW, + sizeof(SVGA3dCmdDXDefineShaderResourceView), + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + SVGA3D_COPY_BASIC_3(shaderResourceViewId, format, resourceDimension); + + swc->surface_relocation(swc, &cmd->sid, NULL, surface, + SVGA_RELOC_READ); + + cmd->desc = *desc; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc, + SVGA3dShaderResourceViewId shaderResourceViewId) +{ + SVGA3D_CREATE_COMMAND(DestroyShaderResourceView, + DESTROY_SHADERRESOURCE_VIEW); + + cmd->shaderResourceViewId = shaderResourceViewId; + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc, + SVGA3dRenderTargetViewId renderTargetViewId, + struct svga_winsys_surface *surface, + SVGA3dSurfaceFormat format, + SVGA3dResourceType resourceDimension, + const SVGA3dRenderTargetViewDesc *desc) +{ + SVGA3dCmdDXDefineRenderTargetView *cmd; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW, + sizeof(SVGA3dCmdDXDefineRenderTargetView), + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + SVGA3D_COPY_BASIC_3(renderTargetViewId, format, resourceDimension); + cmd->desc = *desc; + + surface_to_resourceid(swc, surface, + &cmd->sid, + SVGA_RELOC_READ | SVGA_RELOC_WRITE); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc, + SVGA3dRenderTargetViewId renderTargetViewId) +{ + SVGA3D_CREATE_COMMAND(DestroyRenderTargetView, DESTROY_RENDERTARGET_VIEW); + + cmd->renderTargetViewId = renderTargetViewId; + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc, + SVGA3dDepthStencilViewId depthStencilViewId, + struct svga_winsys_surface *surface, + SVGA3dSurfaceFormat format, + SVGA3dResourceType resourceDimension, + const SVGA3dRenderTargetViewDesc *desc) +{ + SVGA3dCmdDXDefineDepthStencilView *cmd; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW, + sizeof(SVGA3dCmdDXDefineDepthStencilView), + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + SVGA3D_COPY_BASIC_3(depthStencilViewId, format, resourceDimension); + cmd->mipSlice = desc->tex.mipSlice; + cmd->firstArraySlice = desc->tex.firstArraySlice; + cmd->arraySize = desc->tex.arraySize; + + surface_to_resourceid(swc, surface, + &cmd->sid, + SVGA_RELOC_READ | SVGA_RELOC_WRITE); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc, + SVGA3dDepthStencilViewId depthStencilViewId) +{ + SVGA3D_CREATE_COMMAND(DestroyDepthStencilView, DESTROY_DEPTHSTENCIL_VIEW); + + cmd->depthStencilViewId = depthStencilViewId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc, + unsigned count, + SVGA3dElementLayoutId elementLayoutId, + const SVGA3dInputElementDesc *elements) +{ + SVGA3dCmdDXDefineElementLayout *cmd; + unsigned i; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT, + sizeof(SVGA3dCmdDXDefineElementLayout) + + count * sizeof(SVGA3dInputElementDesc), 0); + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* check that all offsets are multiples of four */ + for (i = 0; i < count; i++) { + assert(elements[i].alignedByteOffset % 4 == 0); + } + (void) i; /* silence unused var in release build */ + + cmd->elementLayoutId = elementLayoutId; + memcpy(cmd + 1, elements, count * sizeof(SVGA3dInputElementDesc)); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc, + SVGA3dElementLayoutId elementLayoutId) +{ + SVGA3D_CREATE_COMMAND(DestroyElementLayout, DESTROY_ELEMENTLAYOUT); + + cmd->elementLayoutId = elementLayoutId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc, + SVGA3dBlendStateId blendId, + uint8 alphaToCoverageEnable, + uint8 independentBlendEnable, + const SVGA3dDXBlendStatePerRT *perRT) +{ + SVGA3D_CREATE_COMMAND(DefineBlendState, DEFINE_BLEND_STATE); + + cmd->blendId = blendId; + cmd->alphaToCoverageEnable = alphaToCoverageEnable; + cmd->independentBlendEnable = independentBlendEnable; + memcpy(cmd->perRT, perRT, sizeof(cmd->perRT)); + cmd->pad0 = 0; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc, + SVGA3dBlendStateId blendId) +{ + SVGA3D_CREATE_COMMAND(DestroyBlendState, DESTROY_BLEND_STATE); + + cmd->blendId = blendId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc, + SVGA3dDepthStencilStateId depthStencilId, + uint8 depthEnable, + SVGA3dDepthWriteMask depthWriteMask, + SVGA3dComparisonFunc depthFunc, + uint8 stencilEnable, + uint8 frontEnable, + uint8 backEnable, + uint8 stencilReadMask, + uint8 stencilWriteMask, + uint8 frontStencilFailOp, + uint8 frontStencilDepthFailOp, + uint8 frontStencilPassOp, + SVGA3dComparisonFunc frontStencilFunc, + uint8 backStencilFailOp, + uint8 backStencilDepthFailOp, + uint8 backStencilPassOp, + SVGA3dComparisonFunc backStencilFunc) +{ + SVGA3D_CREATE_COMMAND(DefineDepthStencilState, DEFINE_DEPTHSTENCIL_STATE); + + SVGA3D_COPY_BASIC_9(depthStencilId, depthEnable, + depthWriteMask, depthFunc, + stencilEnable, frontEnable, + backEnable, stencilReadMask, + stencilWriteMask); + SVGA3D_COPY_BASIC_8(frontStencilFailOp, frontStencilDepthFailOp, + frontStencilPassOp, frontStencilFunc, + backStencilFailOp, backStencilDepthFailOp, + backStencilPassOp, backStencilFunc); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc, + SVGA3dDepthStencilStateId depthStencilId) +{ + SVGA3D_CREATE_COMMAND(DestroyDepthStencilState, + DESTROY_DEPTHSTENCIL_STATE); + + cmd->depthStencilId = depthStencilId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc, + SVGA3dRasterizerStateId rasterizerId, + uint8 fillMode, + SVGA3dCullMode cullMode, + uint8 frontCounterClockwise, + int32 depthBias, + float depthBiasClamp, + float slopeScaledDepthBias, + uint8 depthClipEnable, + uint8 scissorEnable, + uint8 multisampleEnable, + uint8 antialiasedLineEnable, + float lineWidth, + uint8 lineStippleEnable, + uint8 lineStippleFactor, + uint16 lineStipplePattern, + uint8 provokingVertexLast) +{ + SVGA3D_CREATE_COMMAND(DefineRasterizerState, DEFINE_RASTERIZER_STATE); + + SVGA3D_COPY_BASIC_5(rasterizerId, fillMode, + cullMode, frontCounterClockwise, + depthBias); + SVGA3D_COPY_BASIC_6(depthBiasClamp, slopeScaledDepthBias, + depthClipEnable, scissorEnable, + multisampleEnable, antialiasedLineEnable); + cmd->lineWidth = lineWidth; + cmd->lineStippleEnable = lineStippleEnable; + cmd->lineStippleFactor = lineStippleFactor; + cmd->lineStipplePattern = lineStipplePattern; + cmd->provokingVertexLast = provokingVertexLast; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc, + SVGA3dRasterizerStateId rasterizerId) +{ + SVGA3D_CREATE_COMMAND(DestroyRasterizerState, DESTROY_RASTERIZER_STATE); + + cmd->rasterizerId = rasterizerId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc, + SVGA3dSamplerId samplerId, + SVGA3dFilter filter, + uint8 addressU, + uint8 addressV, + uint8 addressW, + float mipLODBias, + uint8 maxAnisotropy, + uint8 comparisonFunc, + SVGA3dRGBAFloat borderColor, + float minLOD, + float maxLOD) +{ + SVGA3D_CREATE_COMMAND(DefineSamplerState, DEFINE_SAMPLER_STATE); + + SVGA3D_COPY_BASIC_6(samplerId, filter, + addressU, addressV, + addressW, mipLODBias); + SVGA3D_COPY_BASIC_5(maxAnisotropy, comparisonFunc, + borderColor, minLOD, + maxLOD); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc, + SVGA3dSamplerId samplerId) +{ + SVGA3D_CREATE_COMMAND(DestroySamplerState, DESTROY_SAMPLER_STATE); + + cmd->samplerId = samplerId; + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc, + struct svga_winsys_gb_shader *gbshader, + SVGA3dShaderId shaderId, + SVGA3dShaderType type, + uint32 sizeInBytes) +{ + SVGA3dCmdHeader *header; + SVGA3dCmdDXDefineShader *dcmd; + SVGA3dCmdDXBindShader *bcmd; + unsigned totalSize = 2 * sizeof(*header) + + sizeof(*dcmd) + sizeof(*bcmd); + + /* Make sure there is room for both commands */ + header = swc->reserve(swc, totalSize, 2); + if (!header) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* DXDefineShader command */ + header->id = SVGA_3D_CMD_DX_DEFINE_SHADER; + header->size = sizeof(*dcmd); + dcmd = (SVGA3dCmdDXDefineShader *)(header + 1); + dcmd->shaderId = shaderId; + dcmd->type = type; + dcmd->sizeInBytes = sizeInBytes; + + /* DXBindShader command */ + header = (SVGA3dCmdHeader *)(dcmd + 1); + + header->id = SVGA_3D_CMD_DX_BIND_SHADER; + header->size = sizeof(*bcmd); + bcmd = (SVGA3dCmdDXBindShader *)(header + 1); + + bcmd->cid = swc->cid; + swc->shader_relocation(swc, NULL, &bcmd->mobid, + &bcmd->offsetInBytes, gbshader, 0); + + bcmd->shid = shaderId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc, + SVGA3dShaderId shaderId) +{ + SVGA3D_CREATE_COMMAND(DestroyShader, DESTROY_SHADER); + + cmd->shaderId = shaderId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc, + SVGA3dStreamOutputId soid, + uint32 numOutputStreamEntries, + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS], + const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS]) +{ + unsigned i; + SVGA3D_CREATE_COMMAND(DefineStreamOutput, DEFINE_STREAMOUTPUT); + + cmd->soid = soid; + cmd->numOutputStreamEntries = numOutputStreamEntries; + + for (i = 0; i < Elements(cmd->streamOutputStrideInBytes); i++) + cmd->streamOutputStrideInBytes[i] = streamOutputStrideInBytes[i]; + + memcpy(cmd->decl, decl, + sizeof(SVGA3dStreamOutputDeclarationEntry) + * SVGA3D_MAX_STREAMOUT_DECLS); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc, + SVGA3dStreamOutputId soid) +{ + SVGA3D_CREATE_COMMAND(DestroyStreamOutput, DESTROY_STREAMOUTPUT); + + cmd->soid = soid; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc, + SVGA3dElementLayoutId elementLayoutId) +{ + SVGA3D_CREATE_COMMAND(SetInputLayout, SET_INPUT_LAYOUT); + + cmd->elementLayoutId = elementLayoutId; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc, + unsigned count, + uint32 startBuffer, + const SVGA3dVertexBuffer *bufferInfo, + struct svga_winsys_surface **surfaces) +{ + SVGA3dCmdDXSetVertexBuffers *cmd; + SVGA3dVertexBuffer *bufs; + unsigned i; + + assert(count > 0); + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS, + sizeof(SVGA3dCmdDXSetVertexBuffers) + + count * sizeof(SVGA3dVertexBuffer), + count); /* 'count' relocations */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->startBuffer = startBuffer; + + bufs = (SVGA3dVertexBuffer *) &cmd[1]; + for (i = 0; i < count; i++) { + bufs[i].stride = bufferInfo[i].stride; + bufs[i].offset = bufferInfo[i].offset; + assert(bufs[i].stride % 4 == 0); + assert(bufs[i].offset % 4 == 0); + swc->surface_relocation(swc, &bufs[i].sid, NULL, surfaces[i], + SVGA_RELOC_READ); + } + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc, + SVGA3dPrimitiveType topology) +{ + SVGA3D_CREATE_COMMAND(SetTopology, SET_TOPOLOGY); + + cmd->topology = topology; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc, + struct svga_winsys_surface *indexes, + SVGA3dSurfaceFormat format, + uint32 offset) +{ + SVGA3dCmdDXSetIndexBuffer *cmd; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_INDEX_BUFFER, + sizeof(SVGA3dCmdDXSetIndexBuffer), + 1); /* one relocations */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->sid, NULL, indexes, SVGA_RELOC_READ); + SVGA3D_COPY_BASIC_2(format, offset); + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc, + unsigned slot, + SVGA3dShaderType type, + struct svga_winsys_surface *surface, + uint32 offsetInBytes, + uint32 sizeInBytes) +{ + SVGA3dCmdDXSetSingleConstantBuffer *cmd; + + assert(offsetInBytes % 256 == 0); + if (!surface) + assert(sizeInBytes == 0); + else + assert(sizeInBytes > 0); + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER, + sizeof(SVGA3dCmdDXSetSingleConstantBuffer), + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->slot = slot; + cmd->type = type; + swc->surface_relocation(swc, &cmd->sid, NULL, surface, SVGA_RELOC_READ); + cmd->offsetInBytes = offsetInBytes; + cmd->sizeInBytes = sizeInBytes; + + swc->commit(swc); + + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc, + struct svga_winsys_surface *surface, + unsigned subResource) +{ + SVGA3dCmdDXReadbackSubResource *cmd; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_READBACK_SUBRESOURCE, + sizeof(SVGA3dCmdDXReadbackSubResource), + 1); + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->sid, NULL, surface, + SVGA_RELOC_READ | SVGA_RELOC_INTERNAL); + cmd->subResource = subResource; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc, + struct svga_winsys_surface *surface, + const SVGA3dBox *box, + unsigned subResource) +{ + SVGA3dCmdDXUpdateSubResource *cmd; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE, + sizeof(SVGA3dCmdDXUpdateSubResource), + 1); + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->sid, NULL, surface, + SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL); + cmd->subResource = subResource; + cmd->box = *box; + + swc->commit(swc); + return PIPE_OK; +} + +enum pipe_error +SVGA3D_vgpu10_GenMips(struct svga_winsys_context *swc, + SVGA3dShaderResourceViewId shaderResourceViewId, + struct svga_winsys_surface *view) +{ + SVGA3dCmdDXGenMips *cmd; + + cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_GENMIPS, + sizeof(SVGA3dCmdDXGenMips), 1); + + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->shaderResourceViewId, NULL, view, + SVGA_RELOC_WRITE); + cmd->shaderResourceViewId = shaderResourceViewId; + + swc->commit(swc); + return PIPE_OK; +} diff --git a/lib/mesa/src/gallium/drivers/svga/svga_context.c b/lib/mesa/src/gallium/drivers/svga/svga_context.c index 0ffff9c50..b10eb45e5 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_context.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_context.c @@ -30,6 +30,7 @@ #include "pipe/p_screen.h" #include "util/u_memory.h" #include "util/u_bitmask.h" +#include "util/u_upload_mgr.h" #include "svga_context.h" #include "svga_screen.h" @@ -42,6 +43,9 @@ #include "svga_draw.h" #include "svga_debug.h" #include "svga_state.h" +#include "svga_winsys.h" + +#define CONST0_UPLOAD_DEFAULT_SIZE 65536 DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE) DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE); @@ -53,27 +57,67 @@ DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE", static void svga_destroy( struct pipe_context *pipe ) { struct svga_context *svga = svga_context( pipe ); - struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws; - unsigned shader; + unsigned shader, i; + + /* free any alternate rasterizer states used for point sprite */ + for (i = 0; i < Elements(svga->rasterizer_no_cull); i++) { + if (svga->rasterizer_no_cull[i]) { + pipe->delete_rasterizer_state(pipe, svga->rasterizer_no_cull[i]); + } + } + + /* free polygon stipple state */ + if (svga->polygon_stipple.sampler) { + pipe->delete_sampler_state(pipe, svga->polygon_stipple.sampler); + } + if (svga->polygon_stipple.sampler_view) { + pipe->sampler_view_destroy(pipe, + &svga->polygon_stipple.sampler_view->base); + } + pipe_resource_reference(&svga->polygon_stipple.texture, NULL); + + /* free HW constant buffers */ + for (shader = 0; shader < Elements(svga->state.hw_draw.constbuf); shader++) { + pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], NULL); + } + + pipe->delete_blend_state(pipe, svga->noop_blend); + + /* free query gb object */ + if (svga->gb_query) { + pipe->destroy_query(pipe, NULL); + svga->gb_query = NULL; + } util_blitter_destroy(svga->blitter); svga_cleanup_framebuffer( svga ); svga_cleanup_tss_binding( svga ); - svga_hwtnl_destroy( svga->hwtnl ); - svga_cleanup_vertex_state(svga); - svga->swc->destroy(svga->swc); - svga_destroy_swtnl( svga ); + svga_hwtnl_destroy( svga->hwtnl ); - util_bitmask_destroy( svga->shader_id_bm ); + svga->swc->destroy(svga->swc); + util_bitmask_destroy(svga->blend_object_id_bm); + util_bitmask_destroy(svga->ds_object_id_bm); + util_bitmask_destroy(svga->input_element_object_id_bm); + util_bitmask_destroy(svga->rast_object_id_bm); + util_bitmask_destroy(svga->sampler_object_id_bm); + util_bitmask_destroy(svga->sampler_view_id_bm); + util_bitmask_destroy(svga->shader_id_bm); + util_bitmask_destroy(svga->surface_view_id_bm); + util_bitmask_destroy(svga->stream_output_id_bm); + util_bitmask_destroy(svga->query_id_bm); + u_upload_destroy(svga->const0_upload); + + /* free user's constant buffers */ for (shader = 0; shader < PIPE_SHADER_TYPES; ++shader) { - pipe_resource_reference( &svga->curr.cbufs[shader].buffer, NULL ); - sws->surface_reference(sws, &svga->state.hw_draw.hw_cb[shader], NULL); + for (i = 0; i < Elements(svga->curr.constbufs[shader]); ++i) { + pipe_resource_reference(&svga->curr.constbufs[shader][i].buffer, NULL); + } } FREE( svga ); @@ -81,16 +125,16 @@ static void svga_destroy( struct pipe_context *pipe ) -struct pipe_context *svga_context_create( struct pipe_screen *screen, - void *priv ) +struct pipe_context *svga_context_create(struct pipe_screen *screen, + void *priv, unsigned flags) { struct svga_screen *svgascreen = svga_screen(screen); struct svga_context *svga = NULL; enum pipe_error ret; svga = CALLOC_STRUCT(svga_context); - if (svga == NULL) - goto no_svga; + if (!svga) + goto cleanup; LIST_INITHEAD(&svga->dirty_buffers); @@ -100,8 +144,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, svga->pipe.clear = svga_clear; svga->swc = svgascreen->sws->context_create(svgascreen->sws); - if(!svga->swc) - goto no_swc; + if (!svga->swc) + goto cleanup; svga_init_resource_functions(svga); svga_init_blend_functions(svga); @@ -114,11 +158,15 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, svga_init_sampler_functions(svga); svga_init_fs_functions(svga); svga_init_vs_functions(svga); + svga_init_gs_functions(svga); svga_init_vertex_functions(svga); svga_init_constbuffer_functions(svga); svga_init_query_functions(svga); svga_init_surface_functions(svga); + svga_init_stream_output_functions(svga); + /* init misc state */ + svga->curr.sample_mask = ~0; /* debug */ svga->debug.no_swtnl = debug_get_option_no_swtnl(); @@ -128,21 +176,54 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, svga->debug.no_line_width = debug_get_option_no_line_width(); svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple(); - svga->shader_id_bm = util_bitmask_create(); - if (svga->shader_id_bm == NULL) - goto no_shader_bm; + if (!(svga->blend_object_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->ds_object_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->input_element_object_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->rast_object_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->sampler_object_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->sampler_view_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->shader_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->surface_view_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->stream_output_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->query_id_bm = util_bitmask_create())) + goto cleanup; svga->hwtnl = svga_hwtnl_create(svga); if (svga->hwtnl == NULL) - goto no_hwtnl; + goto cleanup; if (!svga_init_swtnl(svga)) - goto no_swtnl; + goto cleanup; ret = svga_emit_initial_state( svga ); if (ret != PIPE_OK) - goto no_state; - + goto cleanup; + + svga->const0_upload = u_upload_create(&svga->pipe, + CONST0_UPLOAD_DEFAULT_SIZE, + PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STREAM); + if (!svga->const0_upload) + goto cleanup; + /* Avoid shortcircuiting state with initial value of zero. */ memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear)); @@ -151,24 +232,64 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw)); memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views)); + memset(&svga->state.hw_draw.num_sampler_views, 0, + sizeof(svga->state.hw_draw.num_sampler_views)); svga->state.hw_draw.num_views = 0; - memset(&svga->state.hw_draw.hw_cb, 0x0, sizeof(svga->state.hw_draw.hw_cb)); + + /* Initialize the shader pointers */ + svga->state.hw_draw.vs = NULL; + svga->state.hw_draw.gs = NULL; + svga->state.hw_draw.fs = NULL; + memset(svga->state.hw_draw.constbuf, 0, + sizeof(svga->state.hw_draw.constbuf)); + memset(svga->state.hw_draw.default_constbuf_size, 0, + sizeof(svga->state.hw_draw.default_constbuf_size)); + memset(svga->state.hw_draw.enabled_constbufs, 0, + sizeof(svga->state.hw_draw.enabled_constbufs)); + + /* Create a no-operation blend state which we will bind whenever the + * requested blend state is impossible (e.g. due to having an integer + * render target attached). + * + * XXX: We will probably actually need 16 of these, one for each possible + * RGBA color mask (4 bits). Then, we would bind the one with a color mask + * matching the blend state it is replacing. + */ + { + struct pipe_blend_state noop_tmpl = {0}; + unsigned i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { + // Set the color mask to all-ones. Later this may change. + noop_tmpl.rt[i].colormask = PIPE_MASK_RGBA; + } + svga->noop_blend = svga->pipe.create_blend_state(&svga->pipe, &noop_tmpl); + } svga->dirty = ~0; return &svga->pipe; -no_state: +cleanup: svga_destroy_swtnl(svga); -no_swtnl: - svga_hwtnl_destroy( svga->hwtnl ); -no_hwtnl: - util_bitmask_destroy( svga->shader_id_bm ); -no_shader_bm: - svga->swc->destroy(svga->swc); -no_swc: + + if (svga->const0_upload) + u_upload_destroy(svga->const0_upload); + if (svga->hwtnl) + svga_hwtnl_destroy(svga->hwtnl); + if (svga->swc) + svga->swc->destroy(svga->swc); + util_bitmask_destroy(svga->blend_object_id_bm); + util_bitmask_destroy(svga->ds_object_id_bm); + util_bitmask_destroy(svga->input_element_object_id_bm); + util_bitmask_destroy(svga->rast_object_id_bm); + util_bitmask_destroy(svga->sampler_object_id_bm); + util_bitmask_destroy(svga->sampler_view_id_bm); + util_bitmask_destroy(svga->shader_id_bm); + util_bitmask_destroy(svga->surface_view_id_bm); + util_bitmask_destroy(svga->stream_output_id_bm); + util_bitmask_destroy(svga->query_id_bm); FREE(svga); -no_svga: return NULL; } @@ -190,16 +311,26 @@ void svga_context_flush( struct svga_context *svga, */ svga->swc->flush(svga->swc, &fence); + svga->hud.num_flushes++; + svga_screen_cache_flush(svgascreen, fence); /* To force the re-emission of rendertargets and texture sampler bindings on * the next command buffer. */ - svga->rebind.rendertargets = TRUE; - svga->rebind.texture_samplers = TRUE; + svga->rebind.flags.rendertargets = TRUE; + svga->rebind.flags.texture_samplers = TRUE; + if (svga_have_gb_objects(svga)) { - svga->rebind.vs = TRUE; - svga->rebind.fs = TRUE; + + svga->rebind.flags.constbufs = TRUE; + svga->rebind.flags.vs = TRUE; + svga->rebind.flags.fs = TRUE; + svga->rebind.flags.gs = TRUE; + + if (svga_need_to_rebind_resources(svga)) { + svga->rebind.flags.query = TRUE; + } } if (SVGA_DEBUG & DEBUG_SYNC) { @@ -208,13 +339,33 @@ void svga_context_flush( struct svga_context *svga, PIPE_TIMEOUT_INFINITE); } - if(pfence) + if (pfence) svgascreen->sws->fence_reference(svgascreen->sws, pfence, fence); svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL); } +/** + * Flush pending commands and wait for completion with a fence. + */ +void +svga_context_finish(struct svga_context *svga) +{ + struct pipe_screen *screen = svga->pipe.screen; + struct pipe_fence_handle *fence = NULL; + + svga_context_flush(svga, &fence); + svga->pipe.screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE); + screen->fence_reference(screen, &fence, NULL); +} + + +/** + * Emit pending drawing commands to the command buffer. + * If the command buffer overflows, we flush it and retry. + * \sa svga_hwtnl_flush() + */ void svga_hwtnl_flush_retry( struct svga_context *svga ) { enum pipe_error ret = PIPE_OK; @@ -225,7 +376,7 @@ void svga_hwtnl_flush_retry( struct svga_context *svga ) ret = svga_hwtnl_flush( svga->hwtnl ); } - assert(ret == 0); + assert(ret == PIPE_OK); } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_context.h b/lib/mesa/src/gallium/drivers/svga/svga_context.h index 71f038df8..f1a2041b6 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_context.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_context.h @@ -38,17 +38,44 @@ #include "svga_screen.h" #include "svga_state.h" -#include "svga_tgsi.h" #include "svga_winsys.h" #include "svga_hw_reg.h" #include "svga3d_shaderdefs.h" /** Non-GPU queries for gallium HUD */ -#define SVGA_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) -#define SVGA_QUERY_FALLBACKS (PIPE_QUERY_DRIVER_SPECIFIC + 1) -#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 2) +/* per-frame counters */ +#define SVGA_QUERY_NUM_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) +#define SVGA_QUERY_NUM_FALLBACKS (PIPE_QUERY_DRIVER_SPECIFIC + 1) +#define SVGA_QUERY_NUM_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 2) +#define SVGA_QUERY_NUM_VALIDATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 3) +#define SVGA_QUERY_MAP_BUFFER_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 4) +#define SVGA_QUERY_NUM_RESOURCES_MAPPED (PIPE_QUERY_DRIVER_SPECIFIC + 5) +#define SVGA_QUERY_NUM_BYTES_UPLOADED (PIPE_QUERY_DRIVER_SPECIFIC + 6) + +/* running total counters */ +#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 7) +#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 8) +#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 9) +#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 11) +#define SVGA_QUERY_NUM_GENERATE_MIPMAP (PIPE_QUERY_DRIVER_SPECIFIC + 12) +/*SVGA_QUERY_MAX has to be last because it is size of an array*/ +#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 13) + +/** + * Maximum supported number of constant buffers per shader + */ +#define SVGA_MAX_CONST_BUFS 14 + +/** + * Maximum constant buffer size that can be set in the + * DXSetSingleConstantBuffer command is + * DX10 constant buffer element count * 4 4-bytes components + */ +#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int)) +#define CONST0_UPLOAD_ALIGNMENT 256 struct draw_vertex_shader; struct draw_fragment_shader; @@ -57,49 +84,16 @@ struct SVGACmdMemory; struct util_bitmask; -struct svga_shader -{ - const struct tgsi_token *tokens; - - struct tgsi_shader_info info; - - /** Head of linked list of variants */ - struct svga_shader_variant *variants; - - unsigned id; /**< for debugging only */ -}; - - -struct svga_fragment_shader -{ - struct svga_shader base; - - struct draw_fragment_shader *draw_shader; - - /** Mask of which generic varying variables are read by this shader */ - unsigned generic_inputs; - /** Table mapping original TGSI generic indexes to low integers */ - int8_t generic_remap_table[MAX_GENERIC_VARYING]; -}; - - -struct svga_vertex_shader -{ - struct svga_shader base; - - struct draw_vertex_shader *draw_shader; -}; - - struct svga_cache_context; struct svga_tracked_state; struct svga_blend_state { + unsigned need_white_fragments:1; + unsigned independent_blend_enable:1; + unsigned alpha_to_coverage:1; + unsigned blend_color_alpha:1; /**< set blend color to alpha value */ - boolean need_white_fragments; - - /* Should be per-render-target: - */ + /** Per-render target state */ struct { uint8_t writemask; @@ -112,8 +106,9 @@ struct svga_blend_state { uint8_t srcblend_alpha; uint8_t dstblend_alpha; uint8_t blendeq_alpha; + } rt[PIPE_MAX_COLOR_BUFS]; - } rt[1]; + SVGA3dBlendStateId id; /**< vgpu10 */ }; struct svga_depth_stencil_state { @@ -139,6 +134,8 @@ struct svga_depth_stencil_state { unsigned stencil_writemask:8; float alpharef; + + SVGA3dDepthStencilStateId id; /**< vgpu10 */ }; #define SVGA_UNFILLED_DISABLE 0 @@ -167,11 +164,13 @@ struct svga_rasterizer_state { float pointsize; float linewidth; - unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */ + unsigned hw_fillmode:2; /* PIPE_POLYGON_MODE_x */ /** Which prims do we need help for? Bitmask of (1 << PIPE_PRIM_x) flags */ unsigned need_pipeline:16; + SVGA3dRasterizerStateId id; /**< vgpu10 */ + /** For debugging: */ const char* need_pipeline_tris_str; const char* need_pipeline_lines_str; @@ -195,15 +194,45 @@ struct svga_sampler_state { unsigned min_lod; unsigned view_min_lod; unsigned view_max_lod; + + SVGA3dSamplerId id; }; + +struct svga_pipe_sampler_view +{ + struct pipe_sampler_view base; + + SVGA3dShaderResourceViewId id; +}; + + +static inline struct svga_pipe_sampler_view * +svga_pipe_sampler_view(struct pipe_sampler_view *v) +{ + return (struct svga_pipe_sampler_view *) v; +} + + struct svga_velems_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */ - unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */ - unsigned adjust_attrib_w_1; /* bitmask of attrs needing w = 1 */ + + /** Bitmasks indicating which attributes need format conversion */ + unsigned adjust_attrib_range; /**< range adjustment */ + unsigned attrib_is_pure_int; /**< pure int */ + unsigned adjust_attrib_w_1; /**< set w = 1 */ + unsigned adjust_attrib_itof; /**< int->float */ + unsigned adjust_attrib_utof; /**< uint->float */ + unsigned attrib_is_bgra; /**< R / B swizzling */ + unsigned attrib_puint_to_snorm; /**< 10_10_10_2 packed uint -> snorm */ + unsigned attrib_puint_to_uscaled; /**< 10_10_10_2 packed uint -> uscaled */ + unsigned attrib_puint_to_sscaled; /**< 10_10_10_2 packed uint -> sscaled */ + boolean need_swvfetch; + + SVGA3dElementLayoutId id; /**< VGPU10 */ }; /* Use to calculate differences between state emitted to hardware and @@ -214,16 +243,22 @@ struct svga_state const struct svga_blend_state *blend; const struct svga_depth_stencil_state *depth; const struct svga_rasterizer_state *rast; - const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct svga_sampler_state *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; const struct svga_velems_state *velems; - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; /* or texture ID's? */ + struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; /* or texture ID's? */ struct svga_fragment_shader *fs; struct svga_vertex_shader *vs; + struct svga_geometry_shader *user_gs; /* user-specified GS */ + struct svga_geometry_shader *gs; /* derived GS */ struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; struct pipe_index_buffer ib; - struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES]; + /** Constant buffers for each shader. + * The size should probably always match with that of + * svga_shader_emitter_v10.num_shader_consts. + */ + struct pipe_constant_buffer constbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS]; struct pipe_framebuffer_state framebuffer; float depthscale; @@ -240,8 +275,8 @@ struct svga_state struct pipe_clip_state clip; struct pipe_viewport_state viewport; - unsigned num_samplers; - unsigned num_sampler_views; + unsigned num_samplers[PIPE_SHADER_TYPES]; + unsigned num_sampler_views[PIPE_SHADER_TYPES]; unsigned num_vertex_buffers; unsigned reduced_prim; @@ -249,6 +284,8 @@ struct svga_state unsigned flag_1d; unsigned flag_srgb; } tex_flags; + + unsigned sample_mask; }; struct svga_prescale { @@ -262,9 +299,7 @@ struct svga_prescale { */ struct svga_hw_clear_state { - struct { - unsigned x,y,w,h; - } viewport; + SVGA3dRect viewport; struct { float zmin, zmax; @@ -280,7 +315,7 @@ struct svga_hw_view_state struct svga_sampler_view *v; unsigned min_lod; unsigned max_lod; - int dirty; + boolean dirty; }; /* Updated by calling svga_update_state( SVGA_STATE_HW_DRAW ) @@ -291,16 +326,41 @@ struct svga_hw_draw_state unsigned ts[SVGA3D_PIXEL_SAMPLERREG_MAX][SVGA3D_TS_MAX]; float cb[PIPE_SHADER_TYPES][SVGA3D_CONSTREG_MAX][4]; - /** - * For guest backed shader constants only. - */ - struct svga_winsys_surface *hw_cb[PIPE_SHADER_TYPES]; - struct svga_shader_variant *fs; struct svga_shader_variant *vs; + struct svga_shader_variant *gs; struct svga_hw_view_state views[PIPE_MAX_SAMPLERS]; - unsigned num_views; + struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; + + /* Bitmask of enabled constant bufffers */ + unsigned enabled_constbufs[PIPE_SHADER_TYPES]; + + /* VGPU10 HW state (used to prevent emitting redundant state) */ + SVGA3dDepthStencilStateId depth_stencil_id; + unsigned stencil_ref; + SVGA3dBlendStateId blend_id; + float blend_factor[4]; + unsigned blend_sample_mask; + SVGA3dRasterizerStateId rasterizer_id; + SVGA3dElementLayoutId layout_id; + SVGA3dPrimitiveType topology; + + /** Vertex buffer state */ + SVGA3dVertexBuffer vbuffers[PIPE_MAX_ATTRIBS]; + struct svga_winsys_surface *vbuffer_handles[PIPE_MAX_ATTRIBS]; + unsigned num_vbuffers; + + struct svga_winsys_surface *ib; /**< index buffer for drawing */ + SVGA3dSurfaceFormat ib_format; + unsigned ib_offset; + + unsigned num_samplers[PIPE_SHADER_TYPES]; + SVGA3dSamplerId samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + + /* used for rebinding */ + unsigned num_sampler_views[PIPE_SHADER_TYPES]; + unsigned default_constbuf_size[PIPE_SHADER_TYPES]; }; @@ -326,12 +386,14 @@ struct svga_sw_state struct svga_hw_queue; struct svga_query; +struct svga_qmem_alloc_entry; struct svga_context { struct pipe_context pipe; struct svga_winsys_context *swc; struct blitter_context *blitter; + struct u_upload_mgr *const0_upload; struct { boolean no_swtnl; @@ -345,6 +407,9 @@ struct svga_context boolean no_line_width; boolean force_hw_line_stipple; + + /** To report perf/conformance/etc issues to the state tracker */ + struct pipe_debug_callback callback; } debug; struct { @@ -355,12 +420,42 @@ struct svga_context boolean new_vdecl; } swtnl; + /* Bitmask of blend state objects IDs */ + struct util_bitmask *blend_object_id_bm; + + /* Bitmask of depth/stencil state objects IDs */ + struct util_bitmask *ds_object_id_bm; + + /* Bitmaks of input element object IDs */ + struct util_bitmask *input_element_object_id_bm; + + /* Bitmask of rasterizer object IDs */ + struct util_bitmask *rast_object_id_bm; + + /* Bitmask of sampler state objects IDs */ + struct util_bitmask *sampler_object_id_bm; + + /* Bitmask of sampler view IDs */ + struct util_bitmask *sampler_view_id_bm; + /* Bitmask of used shader IDs */ struct util_bitmask *shader_id_bm; + /* Bitmask of used surface view IDs */ + struct util_bitmask *surface_view_id_bm; + + /* Bitmask of used stream output IDs */ + struct util_bitmask *stream_output_id_bm; + + /* Bitmask of used query IDs */ + struct util_bitmask *query_id_bm; + struct { unsigned dirty[SVGA_STATE_MAX]; + /** bitmasks of which const buffers are changed */ + unsigned dirty_constbufs[PIPE_SHADER_TYPES]; + unsigned texture_timestamp; /* @@ -373,30 +468,72 @@ struct svga_context struct svga_state curr; /* state from the state tracker */ unsigned dirty; /* statechanges since last update_state() */ - struct { - unsigned rendertargets:1; - unsigned texture_samplers:1; - unsigned vs:1; - unsigned fs:1; + union { + struct { + unsigned rendertargets:1; + unsigned texture_samplers:1; + unsigned constbufs:1; + unsigned vs:1; + unsigned fs:1; + unsigned gs:1; + unsigned query:1; + } flags; + unsigned val; } rebind; struct svga_hwtnl *hwtnl; - /** The occlusion query currently in progress */ - struct svga_query *sq; + /** Queries states */ + struct svga_winsys_gb_query *gb_query; /**< gb query object, one per context */ + unsigned gb_query_len; /**< gb query object size */ + struct util_bitmask *gb_query_alloc_mask; /**< gb query object allocation mask */ + struct svga_qmem_alloc_entry *gb_query_map[SVGA_QUERY_MAX]; + /**< query mem block mapping */ + struct svga_query *sq[SVGA_QUERY_MAX]; /**< queries currently in progress */ /** List of buffers with queued transfers */ struct list_head dirty_buffers; - /** performance / info queries */ - uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */ - uint64_t num_fallbacks; /**< SVGA_QUERY_FALLBACKS */ + /** performance / info queries for HUD */ + struct { + uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */ + uint64_t num_fallbacks; /**< SVGA_QUERY_NUM_FALLBACKS */ + uint64_t num_flushes; /**< SVGA_QUERY_NUM_FLUSHES */ + uint64_t num_validations; /**< SVGA_QUERY_NUM_VALIDATIONS */ + uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */ + uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */ + uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */ + uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */ + uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */ + uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */ + uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */ + } hud; + + /** The currently bound stream output targets */ + unsigned num_so_targets; + struct svga_winsys_surface *so_surfaces[SVGA3D_DX_MAX_SOTARGETS]; + struct pipe_stream_output_target *so_targets[SVGA3D_DX_MAX_SOTARGETS]; + struct svga_stream_output *current_so; + + /** A blend state with blending disabled, for falling back to when blending + * is illegal (e.g. an integer texture is bound) + */ + struct svga_blend_state *noop_blend; + + struct { + struct pipe_resource *texture; + struct svga_pipe_sampler_view *sampler_view; + void *sampler; + } polygon_stipple; + + /** Alternate rasterizer states created for point sprite */ + struct svga_rasterizer_state *rasterizer_no_cull[2]; }; /* A flag for each state_tracker state object: */ #define SVGA_NEW_BLEND 0x1 -#define SVGA_NEW_DEPTH_STENCIL 0x2 +#define SVGA_NEW_DEPTH_STENCIL_ALPHA 0x2 #define SVGA_NEW_RAST 0x4 #define SVGA_NEW_SAMPLER 0x8 #define SVGA_NEW_TEXTURE 0x10 @@ -422,7 +559,9 @@ struct svga_context #define SVGA_NEW_VS_VARIANT 0x1000000 #define SVGA_NEW_TEXTURE_FLAGS 0x4000000 #define SVGA_NEW_STENCIL_REF 0x8000000 - +#define SVGA_NEW_GS 0x10000000 +#define SVGA_NEW_GS_CONST_BUFFER 0x20000000 +#define SVGA_NEW_GS_VARIANT 0x40000000 @@ -457,11 +596,13 @@ void svga_init_rasterizer_functions( struct svga_context *svga ); void svga_init_sampler_functions( struct svga_context *svga ); void svga_init_fs_functions( struct svga_context *svga ); void svga_init_vs_functions( struct svga_context *svga ); +void svga_init_gs_functions( struct svga_context *svga ); void svga_init_vertex_functions( struct svga_context *svga ); void svga_init_constbuffer_functions( struct svga_context *svga ); void svga_init_draw_functions( struct svga_context *svga ); void svga_init_query_functions( struct svga_context *svga ); void svga_init_surface_functions(struct svga_context *svga); +void svga_init_stream_output_functions( struct svga_context *svga ); void svga_cleanup_vertex_state( struct svga_context *svga ); void svga_cleanup_tss_binding( struct svga_context *svga ); @@ -470,6 +611,8 @@ void svga_cleanup_framebuffer( struct svga_context *svga ); void svga_context_flush( struct svga_context *svga, struct pipe_fence_handle **pfence ); +void svga_context_finish(struct svga_context *svga); + void svga_hwtnl_flush_retry( struct svga_context *svga ); void svga_hwtnl_flush_buffer( struct svga_context *svga, struct pipe_resource *buffer ); @@ -478,7 +621,7 @@ void svga_surfaces_flush(struct svga_context *svga); struct pipe_context * svga_context_create(struct pipe_screen *screen, - void *priv); + void *priv, unsigned flags); /*********************************************************************** @@ -504,5 +647,22 @@ svga_have_gb_dma(const struct svga_context *svga) return svga_screen(svga->pipe.screen)->sws->have_gb_dma; } +static inline boolean +svga_have_vgpu10(const struct svga_context *svga) +{ + return svga_screen(svga->pipe.screen)->sws->have_vgpu10; +} + +static inline boolean +svga_need_to_rebind_resources(const struct svga_context *svga) +{ + return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources; +} + +static inline boolean +svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2) +{ + return memcmp(r1, r2, sizeof(*r1)) == 0; +} #endif diff --git a/lib/mesa/src/gallium/drivers/svga/svga_debug.h b/lib/mesa/src/gallium/drivers/svga/svga_debug.h index 82c9b602d..039f79d4e 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_debug.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_debug.h @@ -29,21 +29,22 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#define DEBUG_DMA 0x1 -#define DEBUG_TGSI 0x4 -#define DEBUG_PIPE 0x8 -#define DEBUG_STATE 0x10 -#define DEBUG_SCREEN 0x20 -#define DEBUG_TEX 0x40 -#define DEBUG_SWTNL 0x80 -#define DEBUG_CONSTS 0x100 -#define DEBUG_VIEWPORT 0x200 -#define DEBUG_VIEWS 0x400 -#define DEBUG_PERF 0x800 /* print something when we hit any slow path operation */ -#define DEBUG_FLUSH 0x1000 /* flush after every draw */ -#define DEBUG_SYNC 0x2000 /* sync after every flush */ -#define DEBUG_QUERY 0x4000 -#define DEBUG_CACHE 0x8000 +#define DEBUG_DMA 0x1 +#define DEBUG_TGSI 0x4 +#define DEBUG_PIPE 0x8 +#define DEBUG_STATE 0x10 +#define DEBUG_SCREEN 0x20 +#define DEBUG_TEX 0x40 +#define DEBUG_SWTNL 0x80 +#define DEBUG_CONSTS 0x100 +#define DEBUG_VIEWPORT 0x200 +#define DEBUG_VIEWS 0x400 +#define DEBUG_PERF 0x800 /* print something when we hit any slow path operation */ +#define DEBUG_FLUSH 0x1000 /* flush after every draw */ +#define DEBUG_SYNC 0x2000 /* sync after every flush */ +#define DEBUG_QUERY 0x4000 +#define DEBUG_CACHE 0x8000 +#define DEBUG_STREAMOUT 0x10000 #ifdef DEBUG extern int SVGA_DEBUG; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_draw.c index 366a2dccd..80526ed4d 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw.c @@ -26,17 +26,19 @@ #include "pipe/p_compiler.h" #include "util/u_inlines.h" #include "pipe/p_defines.h" +#include "util/u_helpers.h" #include "util/u_memory.h" #include "util/u_math.h" -#include "util/u_upload_mgr.h" #include "svga_context.h" #include "svga_draw.h" #include "svga_draw_private.h" #include "svga_debug.h" #include "svga_screen.h" +#include "svga_resource.h" #include "svga_resource_buffer.h" #include "svga_resource_texture.h" +#include "svga_shader.h" #include "svga_surface.h" #include "svga_winsys.h" #include "svga_cmd.h" @@ -46,7 +48,7 @@ struct svga_hwtnl * svga_hwtnl_create(struct svga_context *svga) { struct svga_hwtnl *hwtnl = CALLOC_STRUCT(svga_hwtnl); - if (hwtnl == NULL) + if (!hwtnl) goto fail; hwtnl->svga = svga; @@ -71,8 +73,8 @@ svga_hwtnl_destroy(struct svga_hwtnl *hwtnl) } } - for (i = 0; i < hwtnl->cmd.vdecl_count; i++) - pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL); + for (i = 0; i < hwtnl->cmd.vbuf_count; i++) + pipe_resource_reference(&hwtnl->cmd.vbufs[i].buffer, NULL); for (i = 0; i < hwtnl->cmd.prim_count; i++) pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL); @@ -85,45 +87,55 @@ void svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl, boolean flatshade, boolean flatshade_first) { - hwtnl->hw_pv = PV_FIRST; + struct svga_screen *svgascreen = svga_screen(hwtnl->svga->pipe.screen); + + /* User-specified PV */ hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST; + + /* Device supported PV */ + if (svgascreen->haveProvokingVertex) { + /* use the mode specified by the user */ + hwtnl->hw_pv = hwtnl->api_pv; + } + else { + /* the device only support first provoking vertex */ + hwtnl->hw_pv = PV_FIRST; + } } void -svga_hwtnl_set_unfilled(struct svga_hwtnl *hwtnl, unsigned mode) +svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode) { hwtnl->api_fillmode = mode; } void -svga_hwtnl_reset_vdecl(struct svga_hwtnl *hwtnl, unsigned count) +svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl, + unsigned count, + const SVGA3dVertexDecl * decls, + const unsigned *buffer_indexes, + SVGA3dElementLayoutId layout_id) { - unsigned i; - assert(hwtnl->cmd.prim_count == 0); - - for (i = count; i < hwtnl->cmd.vdecl_count; i++) { - pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL); - } - hwtnl->cmd.vdecl_count = count; + hwtnl->cmd.vdecl_layout_id = layout_id; + memcpy(hwtnl->cmd.vdecl, decls, count * sizeof(*decls)); + memcpy(hwtnl->cmd.vdecl_buffer_index, buffer_indexes, + count * sizeof(unsigned)); } +/** + * Specify vertex buffers for hardware drawing. + */ void -svga_hwtnl_vdecl(struct svga_hwtnl *hwtnl, - unsigned i, - const SVGA3dVertexDecl * decl, struct pipe_resource *vb) +svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl, + unsigned count, struct pipe_vertex_buffer *buffers) { - assert(hwtnl->cmd.prim_count == 0); - - assert(i < hwtnl->cmd.vdecl_count); - - hwtnl->cmd.vdecl[i] = *decl; - - pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], vb); + util_set_vertex_buffers_count(hwtnl->cmd.vbufs, + &hwtnl->cmd.vbuf_count, buffers, 0, count); } @@ -145,8 +157,8 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl, return FALSE; } - for (i = 0; i < hwtnl->cmd.vdecl_count; ++i) { - if (hwtnl->cmd.vdecl_vb[i] == buffer) { + for (i = 0; i < hwtnl->cmd.vbuf_count; ++i) { + if (hwtnl->cmd.vbufs[i].buffer == buffer) { return TRUE; } } @@ -161,116 +173,463 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl, } -enum pipe_error -svga_hwtnl_flush(struct svga_hwtnl *hwtnl) +static enum pipe_error +draw_vgpu9(struct svga_hwtnl *hwtnl) { struct svga_winsys_context *swc = hwtnl->cmd.swc; struct svga_context *svga = hwtnl->svga; enum pipe_error ret; + struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; + struct svga_winsys_surface *ib_handle[QSZ]; + struct svga_winsys_surface *handle; + SVGA3dVertexDecl *vdecl; + SVGA3dPrimitiveRange *prim; + unsigned i; - if (hwtnl->cmd.prim_count) { - struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; - struct svga_winsys_surface *ib_handle[QSZ]; - struct svga_winsys_surface *handle; - SVGA3dVertexDecl *vdecl; - SVGA3dPrimitiveRange *prim; - unsigned i; + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { + unsigned j = hwtnl->cmd.vdecl_buffer_index[i]; + handle = svga_buffer_handle(svga, hwtnl->cmd.vbufs[j].buffer); + if (!handle) + return PIPE_ERROR_OUT_OF_MEMORY; - for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { - assert(!svga_buffer_is_user_buffer(hwtnl->cmd.vdecl_vb[i])); - handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); - if (handle == NULL) + vb_handle[i] = handle; + } + + for (i = 0; i < hwtnl->cmd.prim_count; i++) { + if (hwtnl->cmd.prim_ib[i]) { + handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); + if (!handle) return PIPE_ERROR_OUT_OF_MEMORY; + } + else + handle = NULL; + + ib_handle[i] = handle; + } + + if (svga->rebind.flags.rendertargets) { + ret = svga_reemit_framebuffer_bindings(svga); + if (ret != PIPE_OK) { + return ret; + } + } + + if (svga->rebind.flags.texture_samplers) { + ret = svga_reemit_tss_bindings(svga); + if (ret != PIPE_OK) { + return ret; + } + } + + if (svga->rebind.flags.vs) { + ret = svga_reemit_vs_bindings(svga); + if (ret != PIPE_OK) { + return ret; + } + } + + if (svga->rebind.flags.fs) { + ret = svga_reemit_fs_bindings(svga); + if (ret != PIPE_OK) { + return ret; + } + } - vb_handle[i] = handle; + SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", + svga->curr.framebuffer.cbufs[0] ? + svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, + hwtnl->cmd.prim_count); + + ret = SVGA3D_BeginDrawPrimitives(swc, + &vdecl, + hwtnl->cmd.vdecl_count, + &prim, hwtnl->cmd.prim_count); + if (ret != PIPE_OK) + return ret; + + memcpy(vdecl, + hwtnl->cmd.vdecl, + hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]); + + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { + /* check for 4-byte alignment */ + assert(vdecl[i].array.offset % 4 == 0); + assert(vdecl[i].array.stride % 4 == 0); + + /* Given rangeHint is considered to be relative to indexBias, and + * indexBias varies per primitive, we cannot accurately supply an + * rangeHint when emitting more than one primitive per draw command. + */ + if (hwtnl->cmd.prim_count == 1) { + vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0]; + vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1; } + else { + vdecl[i].rangeHint.first = 0; + vdecl[i].rangeHint.last = 0; + } + + swc->surface_relocation(swc, + &vdecl[i].array.surfaceId, + NULL, vb_handle[i], SVGA_RELOC_READ); + } + + memcpy(prim, + hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]); + + for (i = 0; i < hwtnl->cmd.prim_count; i++) { + swc->surface_relocation(swc, + &prim[i].indexArray.surfaceId, + NULL, ib_handle[i], SVGA_RELOC_READ); + pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL); + } + + SVGA_FIFOCommitAll(swc); + + hwtnl->cmd.prim_count = 0; + + return PIPE_OK; +} + + +static SVGA3dSurfaceFormat +xlate_index_format(unsigned indexWidth) +{ + if (indexWidth == 2) { + return SVGA3D_R16_UINT; + } + else if (indexWidth == 4) { + return SVGA3D_R32_UINT; + } + else { + assert(!"Bad indexWidth"); + return SVGA3D_R32_UINT; + } +} + - for (i = 0; i < hwtnl->cmd.prim_count; i++) { - if (hwtnl->cmd.prim_ib[i]) { - assert(!svga_buffer_is_user_buffer(hwtnl->cmd.prim_ib[i])); - handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); - if (handle == NULL) - return PIPE_ERROR_OUT_OF_MEMORY; +static enum pipe_error +validate_sampler_resources(struct svga_context *svga) +{ + unsigned shader; + + assert(svga_have_vgpu10(svga)); + + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + unsigned count = svga->curr.num_sampler_views[shader]; + unsigned i; + struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS]; + enum pipe_error ret; + + /* + * Reference bound sampler resources to ensure pending updates are + * noticed by the device. + */ + for (i = 0; i < count; i++) { + struct svga_pipe_sampler_view *sv = + svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]); + + if (sv) { + if (sv->base.texture->target == PIPE_BUFFER) { + surfaces[i] = svga_buffer_handle(svga, sv->base.texture); + } + else { + surfaces[i] = svga_texture(sv->base.texture)->handle; + } } else { - handle = NULL; + surfaces[i] = NULL; } + } + + if (shader == PIPE_SHADER_FRAGMENT && + svga->curr.rast->templ.poly_stipple_enable) { + const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + struct svga_pipe_sampler_view *sv = + svga->polygon_stipple.sampler_view; - ib_handle[i] = handle; + assert(sv); + surfaces[unit] = svga_texture(sv->base.texture)->handle; + count = MAX2(count, unit+1); } - if (svga->rebind.rendertargets) { - ret = svga_reemit_framebuffer_bindings(svga); - if (ret != PIPE_OK) { - return ret; + /* rebind the shader resources if needed */ + if (svga->rebind.flags.texture_samplers) { + for (i = 0; i < count; i++) { + if (surfaces[i]) { + ret = svga->swc->resource_rebind(svga->swc, + surfaces[i], + NULL, + SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; + } } } + } + svga->rebind.flags.texture_samplers = FALSE; - if (svga->rebind.texture_samplers) { - ret = svga_reemit_tss_bindings(svga); - if (ret != PIPE_OK) { - return ret; + return PIPE_OK; +} + + +static enum pipe_error +validate_constant_buffers(struct svga_context *svga) +{ + unsigned shader; + + assert(svga_have_vgpu10(svga)); + + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + enum pipe_error ret; + struct svga_buffer *buffer; + struct svga_winsys_surface *handle; + unsigned enabled_constbufs; + + /* Rebind the default constant buffer if needed */ + if (svga->rebind.flags.constbufs) { + buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]); + if (buffer) { + ret = svga->swc->resource_rebind(svga->swc, + buffer->handle, + NULL, + SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; } } - if (svga->rebind.vs) { - ret = svga_reemit_vs_bindings(svga); - if (ret != PIPE_OK) { - return ret; + /* + * Reference other bound constant buffers to ensure pending updates are + * noticed by the device. + */ + enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u; + while (enabled_constbufs) { + unsigned i = u_bit_scan(&enabled_constbufs); + buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer); + if (buffer) { + handle = svga_buffer_handle(svga, &buffer->b.b); + + if (svga->rebind.flags.constbufs) { + ret = svga->swc->resource_rebind(svga->swc, + handle, + NULL, + SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; + } } } + } + svga->rebind.flags.constbufs = FALSE; - if (svga->rebind.fs) { - ret = svga_reemit_fs_bindings(svga); - if (ret != PIPE_OK) { - return ret; - } + return PIPE_OK; +} + + +static enum pipe_error +draw_vgpu10(struct svga_hwtnl *hwtnl, + const SVGA3dPrimitiveRange *range, + unsigned vcount, + unsigned min_index, + unsigned max_index, struct pipe_resource *ib, + unsigned start_instance, unsigned instance_count) +{ + struct svga_context *svga = hwtnl->svga; + struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; + struct svga_winsys_surface *ib_handle; + const unsigned vbuf_count = hwtnl->cmd.vbuf_count; + enum pipe_error ret; + unsigned i; + + assert(svga_have_vgpu10(svga)); + assert(hwtnl->cmd.prim_count == 0); + + /* We need to reemit all the current resource bindings along with the Draw + * command to be sure that the referenced resources are available for the + * Draw command, just in case the surfaces associated with the resources + * are paged out. + */ + if (svga->rebind.val) { + ret = svga_rebind_framebuffer_bindings(svga); + if (ret != PIPE_OK) + return ret; + + ret = svga_rebind_shaders(svga); + if (ret != PIPE_OK) + return ret; + } + + ret = validate_sampler_resources(svga); + if (ret != PIPE_OK) + return ret; + + ret = validate_constant_buffers(svga); + if (ret != PIPE_OK) + return ret; + + /* Get handle for each referenced vertex buffer */ + for (i = 0; i < vbuf_count; i++) { + struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer); + + if (sbuf) { + assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER); + vb_handle[i] = svga_buffer_handle(svga, &sbuf->b.b); + if (vb_handle[i] == NULL) + return PIPE_ERROR_OUT_OF_MEMORY; } + else { + vb_handle[i] = NULL; + } + } + + /* Get handles for the index buffers */ + if (ib) { + struct svga_buffer *sbuf = svga_buffer(ib); + + assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER); + (void) sbuf; /* silence unused var warning */ - SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", - svga->curr.framebuffer.cbufs[0] ? - svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, - hwtnl->cmd.prim_count); + ib_handle = svga_buffer_handle(svga, ib); + if (!ib_handle) + return PIPE_ERROR_OUT_OF_MEMORY; + } + else { + ib_handle = NULL; + } - ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count, - &prim, hwtnl->cmd.prim_count); + /* setup vertex attribute input layout */ + if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) { + ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, + hwtnl->cmd.vdecl_layout_id); if (ret != PIPE_OK) return ret; - memcpy(vdecl, hwtnl->cmd.vdecl, - hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]); + svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id; + } - for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { - /* Given rangeHint is considered to be relative to indexBias, and - * indexBias varies per primitive, we cannot accurately supply an - * rangeHint when emitting more than one primitive per draw command. + /* setup vertex buffers */ + { + SVGA3dVertexBuffer buffers[PIPE_MAX_ATTRIBS]; + + for (i = 0; i < vbuf_count; i++) { + buffers[i].stride = hwtnl->cmd.vbufs[i].stride; + buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; + } + if (vbuf_count > 0) { + /* If we haven't yet emitted a drawing command or if any + * vertex buffer state is changing, issue that state now. */ - if (hwtnl->cmd.prim_count == 1) { - vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0]; - vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1; + if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) == 0) || + vbuf_count != svga->state.hw_draw.num_vbuffers || + memcmp(buffers, svga->state.hw_draw.vbuffers, + vbuf_count * sizeof(buffers[0])) || + memcmp(vb_handle, svga->state.hw_draw.vbuffer_handles, + vbuf_count * sizeof(vb_handle[0]))) { + ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count, + 0, /* startBuffer */ + buffers, vb_handle); + if (ret != PIPE_OK) + return ret; + + svga->state.hw_draw.num_vbuffers = vbuf_count; + memcpy(svga->state.hw_draw.vbuffers, buffers, + vbuf_count * sizeof(buffers[0])); + memcpy(svga->state.hw_draw.vbuffer_handles, vb_handle, + vbuf_count * sizeof(vb_handle[0])); } - else { - vdecl[i].rangeHint.first = 0; - vdecl[i].rangeHint.last = 0; - } - - swc->surface_relocation(swc, &vdecl[i].array.surfaceId, NULL, - vb_handle[i], SVGA_RELOC_READ); } + } - memcpy(prim, hwtnl->cmd.prim, - hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]); + /* Set primitive type (line, tri, etc) */ + if (svga->state.hw_draw.topology != range->primType) { + ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType); + if (ret != PIPE_OK) + return ret; - for (i = 0; i < hwtnl->cmd.prim_count; i++) { - swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, NULL, - ib_handle[i], SVGA_RELOC_READ); - pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL); + svga->state.hw_draw.topology = range->primType; + } + + if (ib_handle) { + /* indexed drawing */ + SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth); + + /* setup index buffer */ + if (ib_handle != svga->state.hw_draw.ib || + indexFormat != svga->state.hw_draw.ib_format || + range->indexArray.offset != svga->state.hw_draw.ib_offset) { + ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle, + indexFormat, + range->indexArray.offset); + if (ret != PIPE_OK) + return ret; + svga->state.hw_draw.ib = ib_handle; + svga->state.hw_draw.ib_format = indexFormat; + svga->state.hw_draw.ib_offset = range->indexArray.offset; } - SVGA_FIFOCommitAll(swc); - hwtnl->cmd.prim_count = 0; + if (instance_count > 1) { + ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc, + vcount, + instance_count, + 0, /* startIndexLocation */ + range->indexBias, + start_instance); + if (ret != PIPE_OK) + return ret; + } + else { + /* non-instanced drawing */ + ret = SVGA3D_vgpu10_DrawIndexed(svga->swc, + vcount, + 0, /* startIndexLocation */ + range->indexBias); + if (ret != PIPE_OK) + return ret; + } + } + else { + /* non-indexed drawing */ + if (instance_count > 1) { + ret = SVGA3D_vgpu10_DrawInstanced(svga->swc, + vcount, + instance_count, + range->indexBias, + start_instance); + if (ret != PIPE_OK) + return ret; + } + else { + /* non-instanced */ + ret = SVGA3D_vgpu10_Draw(svga->swc, + vcount, + range->indexBias); + if (ret != PIPE_OK) + return ret; + } } + hwtnl->cmd.prim_count = 0; + + return PIPE_OK; +} + + + +/** + * Emit any pending drawing commands to the command buffer. + * When we receive VGPU9 drawing commands we accumulate them and don't + * immediately emit them into the command buffer. + * This function needs to be called before we change state that could + * effect those pending draws. + */ +enum pipe_error +svga_hwtnl_flush(struct svga_hwtnl *hwtnl) +{ + if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) { + /* we only queue up primitive for VGPU9 */ + return draw_vgpu9(hwtnl); + } return PIPE_OK; } @@ -298,18 +657,28 @@ check_draw_params(struct svga_hwtnl *hwtnl, { unsigned i; + assert(!svga_have_vgpu10(hwtnl->svga)); + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { - struct pipe_resource *vb = hwtnl->cmd.vdecl_vb[i]; - unsigned size = vb ? vb->width0 : 0; + unsigned j = hwtnl->cmd.vdecl_buffer_index[i]; + const struct pipe_vertex_buffer *vb = &hwtnl->cmd.vbufs[j]; + unsigned size = vb->buffer ? vb->buffer->width0 : 0; unsigned offset = hwtnl->cmd.vdecl[i].array.offset; unsigned stride = hwtnl->cmd.vdecl[i].array.stride; int index_bias = (int) range->indexBias + hwtnl->index_bias; unsigned width; + if (size == 0) + continue; + assert(vb); assert(size); assert(offset < size); assert(min_index <= max_index); + (void) width; + (void) stride; + (void) offset; + (void) size; switch (hwtnl->cmd.vdecl[i].identity.type) { case SVGA3D_DECLTYPE_FLOAT1: @@ -390,6 +759,9 @@ check_draw_params(struct svga_hwtnl *hwtnl, assert(size); assert(offset < size); assert(stride); + (void) size; + (void) offset; + (void) stride; switch (range->primType) { case SVGA3D_PRIMITIVE_POINTLIST: @@ -421,33 +793,57 @@ check_draw_params(struct svga_hwtnl *hwtnl, } +/** + * All drawing filters down into this function, either directly + * on the hardware path or after doing software vertex processing. + */ enum pipe_error svga_hwtnl_prim(struct svga_hwtnl *hwtnl, const SVGA3dPrimitiveRange * range, + unsigned vcount, unsigned min_index, - unsigned max_index, struct pipe_resource *ib) + unsigned max_index, struct pipe_resource *ib, + unsigned start_instance, unsigned instance_count) { enum pipe_error ret = PIPE_OK; + if (svga_have_vgpu10(hwtnl->svga)) { + /* draw immediately */ + ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib, + start_instance, instance_count); + if (ret != PIPE_OK) { + svga_context_flush(hwtnl->svga, NULL); + ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib, + start_instance, instance_count); + assert(ret == PIPE_OK); + } + } + else { + /* batch up drawing commands */ #ifdef DEBUG - check_draw_params(hwtnl, range, min_index, max_index, ib); + check_draw_params(hwtnl, range, min_index, max_index, ib); + assert(start_instance == 0); + assert(instance_count <= 1); +#else + (void) check_draw_params; #endif - if (hwtnl->cmd.prim_count + 1 >= QSZ) { - ret = svga_hwtnl_flush(hwtnl); - if (ret != PIPE_OK) - return ret; - } + if (hwtnl->cmd.prim_count + 1 >= QSZ) { + ret = svga_hwtnl_flush(hwtnl); + if (ret != PIPE_OK) + return ret; + } - /* min/max indices are relative to bias */ - hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index; - hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index; + /* min/max indices are relative to bias */ + hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index; + hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index; - hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range; - hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias; + hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range; + hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias; - pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib); - hwtnl->cmd.prim_count++; + pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib); + hwtnl->cmd.prim_count++; + } return ret; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw.h b/lib/mesa/src/gallium/drivers/svga/svga_draw.h index 1db79cd91..af8ecabcb 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw.h @@ -35,54 +35,50 @@ struct svga_winsys_context; struct svga_screen; struct svga_context; struct pipe_resource; +struct u_upload_mgr; -struct svga_hwtnl * -svga_hwtnl_create(struct svga_context *svga); +struct svga_hwtnl *svga_hwtnl_create(struct svga_context *svga); -void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl ); +void svga_hwtnl_destroy(struct svga_hwtnl *hwtnl); -void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl, - boolean flatshade, - boolean flatshade_first ); +void svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl, + boolean flatshade, boolean flatshade_first); -void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl, - unsigned mode ); +void svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode); -void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl, - unsigned i, - const SVGA3dVertexDecl *decl, - struct pipe_resource *vb); +void +svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl, + unsigned count, + const SVGA3dVertexDecl * decls, + const unsigned *buffer_indexes, + SVGA3dElementLayoutId layoutId); -void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl, - unsigned count ); +void +svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl, + unsigned count, struct pipe_vertex_buffer *buffers); - -enum pipe_error -svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl, - unsigned prim, - unsigned start, - unsigned count); +enum pipe_error +svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, + unsigned prim, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_count); enum pipe_error -svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl, - struct pipe_resource *indexBuffer, - unsigned index_size, - int index_bias, - unsigned min_index, - unsigned max_index, - unsigned prim, - unsigned start, - unsigned count ); +svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, + struct pipe_resource *indexBuffer, + unsigned index_size, + int index_bias, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_count); boolean -svga_hwtnl_is_buffer_referred( struct svga_hwtnl *hwtnl, - struct pipe_resource *buffer ); +svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl, + struct pipe_resource *buffer); -enum pipe_error -svga_hwtnl_flush( struct svga_hwtnl *hwtnl ); +enum pipe_error svga_hwtnl_flush(struct svga_hwtnl *hwtnl); -void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl, - int index_bias); +void svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias); #endif /* SVGA_DRAW_H_ */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c b/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c index d4d77200f..7d8293067 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c @@ -26,12 +26,14 @@ #include "svga_cmd.h" #include "util/u_inlines.h" +#include "util/u_prim.h" #include "indices/u_indices.h" #include "svga_hw_reg.h" #include "svga_draw.h" #include "svga_draw_private.h" #include "svga_context.h" +#include "svga_shader.h" #define DBG 0 @@ -49,13 +51,13 @@ generate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *dst = NULL; void *dst_map = NULL; - dst = pipe_buffer_create(pipe->screen, - PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size); - if (dst == NULL) + dst = pipe_buffer_create(pipe->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_IMMUTABLE, size); + if (!dst) goto fail; dst_map = pipe_buffer_map(pipe, dst, PIPE_TRANSFER_WRITE, &transfer); - if (dst_map == NULL) + if (!dst_map) goto fail; generate(0, nr, dst_map); @@ -168,7 +170,8 @@ retrieve_or_generate_indices(struct svga_hwtnl *hwtnl, static enum pipe_error simple_draw_arrays(struct svga_hwtnl *hwtnl, - unsigned prim, unsigned start, unsigned count) + unsigned prim, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_count) { SVGA3dPrimitiveRange range; unsigned hw_prim; @@ -191,17 +194,47 @@ simple_draw_arrays(struct svga_hwtnl *hwtnl, * looking at those numbers knows to adjust them by * range.indexBias. */ - return svga_hwtnl_prim(hwtnl, &range, 0, count - 1, NULL); + return svga_hwtnl_prim(hwtnl, &range, count, + 0, count - 1, NULL, + start_instance, instance_count); } enum pipe_error svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, - unsigned prim, unsigned start, unsigned count) + unsigned prim, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_count) { - unsigned gen_prim, gen_size, gen_nr, gen_type; + unsigned gen_prim, gen_size, gen_nr; + enum indices_mode gen_type; u_generate_func gen_func; enum pipe_error ret = PIPE_OK; + unsigned api_pv = hwtnl->api_pv; + struct svga_context *svga = hwtnl->svga; + + if (svga->curr.rast->templ.flatshade && + svga->state.hw_draw.fs->constant_color_output) { + /* The fragment color is a constant, not per-vertex so the whole + * primitive will be the same color (except for possible blending). + * We can ignore the current provoking vertex state and use whatever + * the hardware wants. + */ + api_pv = hwtnl->hw_pv; + + if (hwtnl->api_fillmode == PIPE_POLYGON_MODE_FILL) { + /* Do some simple primitive conversions to avoid index buffer + * generation below. Note that polygons and quads are not directly + * supported by the svga device. Also note, we can only do this + * for flat/constant-colored rendering because of provoking vertex. + */ + if (prim == PIPE_PRIM_POLYGON) { + prim = PIPE_PRIM_TRIANGLE_FAN; + } + else if (prim == PIPE_PRIM_QUADS && count == 4) { + prim = PIPE_PRIM_TRIANGLE_FAN; + } + } + } if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && prim >= PIPE_PRIM_TRIANGLES) { @@ -222,13 +255,14 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, prim, start, count, - hwtnl->api_pv, + api_pv, hwtnl->hw_pv, &gen_prim, &gen_size, &gen_nr, &gen_func); } if (gen_type == U_GENERATE_LINEAR) { - return simple_draw_arrays(hwtnl, gen_prim, start, count); + return simple_draw_arrays(hwtnl, gen_prim, start, count, + start_instance, instance_count); } else { struct pipe_resource *gen_buf = NULL; @@ -244,14 +278,19 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, if (ret != PIPE_OK) goto done; + pipe_debug_message(&svga->debug.callback, PERF_INFO, + "generating temporary index buffer for drawing %s", + u_prim_name(prim)); + ret = svga_hwtnl_simple_draw_range_elements(hwtnl, gen_buf, gen_size, start, 0, count - 1, - gen_prim, 0, gen_nr); - + gen_prim, 0, gen_nr, + start_instance, + instance_count); if (ret != PIPE_OK) goto done; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c b/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c index 038500a35..74bfebda1 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c @@ -25,6 +25,7 @@ #include "util/u_inlines.h" #include "util/u_prim.h" +#include "util/u_upload_mgr.h" #include "indices/u_indices.h" #include "svga_cmd.h" @@ -45,7 +46,7 @@ translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src, struct pipe_context *pipe = &hwtnl->svga->pipe; struct pipe_transfer *src_transfer = NULL; struct pipe_transfer *dst_transfer = NULL; - unsigned size; + unsigned size = index_size * nr; const void *src_map = NULL; struct pipe_resource *dst = NULL; void *dst_map = NULL; @@ -59,15 +60,15 @@ translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src, dst = pipe_buffer_create(pipe->screen, PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size); - if (dst == NULL) + if (!dst) goto fail; src_map = pipe_buffer_map(pipe, src, PIPE_TRANSFER_READ, &src_transfer); - if (src_map == NULL) + if (!src_map) goto fail; dst_map = pipe_buffer_map(pipe, dst, PIPE_TRANSFER_WRITE, &dst_transfer); - if (dst_map == NULL) + if (!dst_map) goto fail; translate((const char *) src_map + offset, 0, 0, nr, 0, dst_map); @@ -98,7 +99,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl, unsigned index_size, int index_bias, unsigned min_index, unsigned max_index, unsigned prim, unsigned start, - unsigned count) + unsigned count, + unsigned start_instance, + unsigned instance_count) { SVGA3dPrimitiveRange range; unsigned hw_prim; @@ -109,12 +112,6 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl, if (hw_count == 0) return PIPE_OK; /* nothing to draw */ - /* We should never see user-space buffers in the driver. The vbuf - * module should have converted them into real buffers. - */ - if (index_buffer) - assert(!svga_buffer_is_user_buffer(index_buffer)); - range.primType = hw_prim; range.primitiveCount = hw_count; range.indexArray.offset = index_offset; @@ -122,7 +119,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl, range.indexWidth = index_size; range.indexBias = index_bias; - return svga_hwtnl_prim(hwtnl, &range, min_index, max_index, index_buffer); + return svga_hwtnl_prim(hwtnl, &range, count, + min_index, max_index, index_buffer, + start_instance, instance_count); } @@ -131,9 +130,11 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, struct pipe_resource *index_buffer, unsigned index_size, int index_bias, unsigned min_index, unsigned max_index, - unsigned prim, unsigned start, unsigned count) + unsigned prim, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_count) { - unsigned gen_prim, gen_size, gen_nr, gen_type; + unsigned gen_prim, gen_size, gen_nr; + enum indices_mode gen_type; u_translate_func gen_func; enum pipe_error ret = PIPE_OK; @@ -165,7 +166,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, index_bias, min_index, max_index, - gen_prim, start, count); + gen_prim, start, count, + start_instance, + instance_count); } else { struct pipe_resource *gen_buf = NULL; @@ -190,7 +193,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, index_bias, min_index, max_index, - gen_prim, 0, gen_nr); + gen_prim, 0, gen_nr, + start_instance, + instance_count); if (ret != PIPE_OK) goto done; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h b/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h index 9ab87e825..c8217422c 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h @@ -46,7 +46,11 @@ static const unsigned svga_hw_prims = (1 << PIPE_PRIM_LINE_STRIP) | (1 << PIPE_PRIM_TRIANGLES) | (1 << PIPE_PRIM_TRIANGLE_STRIP) | - (1 << PIPE_PRIM_TRIANGLE_FAN)); + (1 << PIPE_PRIM_TRIANGLE_FAN) | + (1 << PIPE_PRIM_LINES_ADJACENCY) | + (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) | + (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) | + (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY)); /** @@ -57,8 +61,8 @@ static const unsigned svga_hw_prims = * PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON. We convert * those to other types of primitives with index/translation code. */ -static inline unsigned -svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count) +static inline SVGA3dPrimitiveType +svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count) { switch (mode) { case PIPE_PRIM_POINTS: @@ -85,6 +89,22 @@ svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count) *prim_count = vcount - 2; return SVGA3D_PRIMITIVE_TRIANGLEFAN; + case PIPE_PRIM_LINES_ADJACENCY: + *prim_count = vcount / 4; + return SVGA3D_PRIMITIVE_LINELIST_ADJ; + + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + *prim_count = vcount - 3; + return SVGA3D_PRIMITIVE_LINESTRIP_ADJ; + + case PIPE_PRIM_TRIANGLES_ADJACENCY: + *prim_count = vcount / 6; + return SVGA3D_PRIMITIVE_TRIANGLELIST_ADJ; + + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + *prim_count = vcount / 2 - 2 ; + return SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ; + default: assert(0); *prim_count = 0; @@ -110,13 +130,19 @@ struct index_cache { struct draw_cmd { struct svga_winsys_context *swc; + /* vertex layout info */ SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX]; - struct pipe_resource *vdecl_vb[SVGA3D_INPUTREG_MAX]; unsigned vdecl_count; + SVGA3dElementLayoutId vdecl_layout_id; + unsigned vdecl_buffer_index[SVGA3D_INPUTREG_MAX]; + + /* vertex buffer info */ + struct pipe_vertex_buffer vbufs[SVGA3D_INPUTREG_MAX]; + unsigned vbuf_count; SVGA3dPrimitiveRange prim[QSZ]; struct pipe_resource *prim_ib[QSZ]; - unsigned prim_count; + unsigned prim_count; /**< number of primitives for this draw */ unsigned min_index[QSZ]; unsigned max_index[QSZ]; }; @@ -158,9 +184,11 @@ struct svga_hwtnl { enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, const SVGA3dPrimitiveRange *range, + unsigned vcount, unsigned min_index, unsigned max_index, - struct pipe_resource *ib ); + struct pipe_resource *ib, + unsigned start_instance, unsigned instance_count); enum pipe_error svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, @@ -171,7 +199,9 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, unsigned max_index, unsigned prim, unsigned start, - unsigned count ); + unsigned count, + unsigned start_instance, + unsigned instance_count); #endif diff --git a/lib/mesa/src/gallium/drivers/svga/svga_format.c b/lib/mesa/src/gallium/drivers/svga/svga_format.c index 8c1b161e6..0186736c8 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_format.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_format.c @@ -26,6 +26,7 @@ #include "pipe/p_format.h" #include "util/u_debug.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "svga_winsys.h" @@ -33,6 +34,347 @@ #include "svga_format.h" +/** Describes mapping from gallium formats to SVGA vertex/pixel formats */ +struct vgpu10_format_entry +{ + enum pipe_format pformat; + SVGA3dSurfaceFormat vertex_format; + SVGA3dSurfaceFormat pixel_format; + unsigned flags; +}; + + +static const struct vgpu10_format_entry format_conversion_table[] = +{ + /* Gallium format SVGA3D vertex format SVGA3D pixel format Flags */ + { PIPE_FORMAT_NONE, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B8G8R8A8_UNORM, SVGA3D_B8G8R8A8_UNORM, SVGA3D_B8G8R8A8_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_B8G8R8X8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8X8_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_A8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_X8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B5G5R5A1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G5R5A1_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B5G6R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G6R5_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_I8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_UYVY, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_YUYV, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_D16_UNORM, 0 }, + { PIPE_FORMAT_Z32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_Z32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, 0 }, + { PIPE_FORMAT_Z24_UNORM_S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, 0 }, + { PIPE_FORMAT_S8_UINT_Z24_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_Z24X8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, 0 }, + { PIPE_FORMAT_X8Z24_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64B64_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64B64A64_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS }, + { PIPE_FORMAT_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS }, + { PIPE_FORMAT_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, TF_GEN_MIPS }, + { PIPE_FORMAT_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS }, + { PIPE_FORMAT_R32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32A32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32_USCALED, SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R32G32_USCALED, SVGA3D_R32G32_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R32G32B32_USCALED, SVGA3D_R32G32B32_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R32G32B32A32_USCALED, SVGA3D_R32G32B32A32_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R32_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32A32_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32_SSCALED, SVGA3D_R32_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R32G32_SSCALED, SVGA3D_R32G32_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R32G32B32_SSCALED, SVGA3D_R32G32B32_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R32G32B32A32_SSCALED, SVGA3D_R32G32B32A32_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R16_UNORM, SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_R16G16_UNORM, SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_R16G16B16_UNORM, SVGA3D_R16G16B16A16_UNORM, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_R16_USCALED, SVGA3D_R16_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R16G16_USCALED, SVGA3D_R16G16_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R16G16B16_USCALED, SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_U_TO_F_CAST }, + { PIPE_FORMAT_R16G16B16A16_USCALED, SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R16_SNORM, SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, 0 }, + { PIPE_FORMAT_R16G16_SNORM, SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, 0 }, + { PIPE_FORMAT_R16G16B16_SNORM, SVGA3D_R16G16B16A16_SNORM, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, 0 }, + { PIPE_FORMAT_R16_SSCALED, SVGA3D_R16_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R16G16_SSCALED, SVGA3D_R16G16_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R16G16B16_SSCALED, SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_I_TO_F_CAST }, + { PIPE_FORMAT_R16G16B16A16_SSCALED, SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R8_UNORM, SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_R8G8_UNORM, SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_R8G8B8_UNORM, SVGA3D_R8G8B8A8_UNORM, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, TF_GEN_MIPS }, + { PIPE_FORMAT_X8B8G8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8_USCALED, SVGA3D_R8_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R8G8_USCALED, SVGA3D_R8G8_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { PIPE_FORMAT_R8G8B8_USCALED, SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_U_TO_F_CAST }, + { PIPE_FORMAT_R8G8B8A8_USCALED, SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, + { 73, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8_SNORM, SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, 0 }, + { PIPE_FORMAT_R8G8_SNORM, SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, 0 }, + { PIPE_FORMAT_R8G8B8_SNORM, SVGA3D_R8G8B8A8_SNORM, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, 0 }, + { 78, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { 79, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { 80, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { 81, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8_SSCALED, SVGA3D_R8_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R8G8_SSCALED, SVGA3D_R8G8_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { PIPE_FORMAT_R8G8B8_SSCALED, SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_I_TO_F_CAST }, + { PIPE_FORMAT_R8G8B8A8_SSCALED, SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, + { 86, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32_FIXED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32_FIXED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32_FIXED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32A32_FIXED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16_FLOAT, SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, TF_GEN_MIPS }, + { PIPE_FORMAT_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, TF_GEN_MIPS }, + { PIPE_FORMAT_R16G16B16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, TF_GEN_MIPS }, + { PIPE_FORMAT_L8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8B8G8R8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_X8B8G8R8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B8G8R8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8A8_UNORM_SRGB, TF_GEN_MIPS }, + { PIPE_FORMAT_B8G8R8X8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8X8_UNORM_SRGB, TF_GEN_MIPS }, + { PIPE_FORMAT_A8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_X8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8B8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_R8G8B8A8_UNORM_SRGB, TF_GEN_MIPS }, + { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 }, + { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 }, + { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM, 0 }, + { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM, 0 }, + { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM_SRGB, 0 }, + { PIPE_FORMAT_RGTC1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_UNORM, 0 }, + { PIPE_FORMAT_RGTC1_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_SNORM, 0 }, + { PIPE_FORMAT_RGTC2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC5_UNORM, 0 }, + { PIPE_FORMAT_RGTC2_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC5_SNORM, 0 }, + { PIPE_FORMAT_R8G8_B8G8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_G8R8_G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8SG8SB8UX8U_NORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R5SG5SB6U_NORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8B8G8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B5G5R5X1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R10G10B10A2_USCALED, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_PUINT_TO_USCALED }, + { PIPE_FORMAT_R11G11B10_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_R11G11B10_FLOAT, TF_GEN_MIPS }, + { PIPE_FORMAT_R9G9B9E5_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_R9G9B9E5_SHAREDEXP, 0 }, + { PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT_S8X24_UINT, 0 }, + { PIPE_FORMAT_R1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R10G10B10X2_USCALED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R10G10B10X2_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B10G10R10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_BGRA }, + { PIPE_FORMAT_R10SG10SB10SA2U_NORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8Bx_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8B8X8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B4G4R4X4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_X24S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_S8X24_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_X32_S8X24_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B2G3R3_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16A16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_LATC1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_LATC1_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_LATC2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_LATC2_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L8A8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16A16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16A16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L32A32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_YV12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_YV16, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_IYUV, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_NV12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_NV21, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A4R4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R10G10B10A2_SSCALED, SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, VF_PUINT_TO_SSCALED }, + { PIPE_FORMAT_R10G10B10A2_SNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_PUINT_TO_SNORM }, + { PIPE_FORMAT_B10G10R10A2_USCALED, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_USCALED }, + { PIPE_FORMAT_B10G10R10A2_SSCALED, SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_SSCALED }, + { PIPE_FORMAT_B10G10R10A2_SNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_SNORM }, + { PIPE_FORMAT_R8_UINT, SVGA3D_R8_UINT, SVGA3D_R8_UINT, 0 }, + { PIPE_FORMAT_R8G8_UINT, SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, 0 }, + { PIPE_FORMAT_R8G8B8_UINT, SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, 0 }, + { PIPE_FORMAT_R8_SINT, SVGA3D_R8_SINT, SVGA3D_R8_SINT, 0 }, + { PIPE_FORMAT_R8G8_SINT, SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, 0 }, + { PIPE_FORMAT_R8G8B8_SINT, SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, 0 }, + { PIPE_FORMAT_R16_UINT, SVGA3D_R16_UINT, SVGA3D_R16_UINT, 0 }, + { PIPE_FORMAT_R16G16_UINT, SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, 0 }, + { PIPE_FORMAT_R16G16B16_UINT, SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, 0 }, + { PIPE_FORMAT_R16_SINT, SVGA3D_R16_SINT, SVGA3D_R16_SINT, 0 }, + { PIPE_FORMAT_R16G16_SINT, SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, 0 }, + { PIPE_FORMAT_R16G16B16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, + { PIPE_FORMAT_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, 0 }, + { PIPE_FORMAT_R32_UINT, SVGA3D_R32_UINT, SVGA3D_R32_UINT, 0 }, + { PIPE_FORMAT_R32G32_UINT, SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, 0 }, + { PIPE_FORMAT_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, 0 }, + { PIPE_FORMAT_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, 0 }, + { PIPE_FORMAT_R32_SINT, SVGA3D_R32_SINT, SVGA3D_R32_SINT, 0 }, + { PIPE_FORMAT_R32G32_SINT, SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, 0 }, + { PIPE_FORMAT_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, 0 }, + { PIPE_FORMAT_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, 0 }, + { PIPE_FORMAT_A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L32A32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_I32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_L32A32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B10G10R10A2_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC1_RGB8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8_R8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_G8R8_B8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8B8X8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8B8X8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8B8X8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8G8B8X8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_B10G10R10X2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16G16B16X16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16G16B16X16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16G16B16X16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16G16B16X16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16G16B16X16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32X32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32X32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32G32B32X32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8A8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16A16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16A16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16A16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32A32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32A32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R32A32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, 0 }, + { PIPE_FORMAT_B5G6R5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_BPTC_RGBA_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_BPTC_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_BPTC_RGB_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_BPTC_RGB_UFLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8L8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8L8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A16L16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_G8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_G8R8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_G16R16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_G16R16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8B8G8R8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_X8B8G8R8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_RGB8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_SRGB8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_RGB8A1, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_SRGB8A1, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_RGBA8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_SRGBA8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_R11_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_R11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_RG11_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ETC2_RG11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_4x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_4x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x12_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, +}; + + +/** + * Translate a gallium vertex format to a vgpu10 vertex format. + * Also, return any special vertex format flags. + */ +void +svga_translate_vertex_format_vgpu10(enum pipe_format format, + SVGA3dSurfaceFormat *svga_format, + unsigned *vf_flags) +{ + assert(format < Elements(format_conversion_table)); + if (format >= Elements(format_conversion_table)) { + format = PIPE_FORMAT_NONE; + } + *svga_format = format_conversion_table[format].vertex_format; + *vf_flags = format_conversion_table[format].flags; +} + + /* * Translate from gallium format to SVGA3D format. */ @@ -41,8 +383,16 @@ svga_translate_format(struct svga_screen *ss, enum pipe_format format, unsigned bind) { - switch(format) { + if (ss->sws->have_vgpu10) { + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { + return format_conversion_table[format].vertex_format; + } + else { + return format_conversion_table[format].pixel_format; + } + } + switch(format) { case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_A8R8G8B8; case PIPE_FORMAT_B8G8R8X8_UNORM: @@ -70,10 +420,13 @@ svga_translate_format(struct svga_screen *ss, return SVGA3D_A16B16G16R16; case PIPE_FORMAT_Z16_UNORM: + assert(!ss->sws->have_vgpu10); return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.z16 : SVGA3D_Z_D16; case PIPE_FORMAT_S8_UINT_Z24_UNORM: + assert(!ss->sws->have_vgpu10); return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.s8z24 : SVGA3D_Z_D24S8; case PIPE_FORMAT_X8Z24_UNORM: + assert(!ss->sws->have_vgpu10); return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.x8z24 : SVGA3D_Z_D24X8; case PIPE_FORMAT_A8_UNORM: @@ -116,12 +469,17 @@ svga_translate_format(struct svga_screen *ss, * Format capability description entry. */ struct format_cap { + const char *name; + SVGA3dSurfaceFormat format; /* * Capability index corresponding to the format. */ - SVGA3dDevCapIndex index; + SVGA3dDevCapIndex devcap; + + /* size of each pixel/block */ + unsigned block_width, block_height, block_bytes; /* * Mask of supported SVGA3dFormatOp operations, to be inferred when the @@ -134,413 +492,1367 @@ struct format_cap { /* * Format capability description table. * - * Ordererd by increasing SVGA3dSurfaceFormat value, but with gaps. + * Ordered by increasing SVGA3dSurfaceFormat value, but with gaps. + * + * Note: there are some special cases below where we set devcap=0 and + * avoid querying the host. In particular, depth/stencil formats which + * can be rendered to and sampled from. For example, the gallium format + * PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT + * for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling. + * If we want to query if a format supports both rendering and sampling the + * host will tell us no for SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D16_UNORM and + * SVGA3D_R24_UNORM_X8_TYPELESS. So we override the host query for those + * formats and report that both can do rendering and sampling. */ static const struct format_cap format_cap_table[] = { { + "SVGA3D_FORMAT_INVALID", + SVGA3D_FORMAT_INVALID, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_X8R8G8B8", SVGA3D_X8R8G8B8, SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | SVGA3DFORMAT_OP_DISPLAYMODE | - SVGA3DFORMAT_OP_3DACCELERATION | - SVGA3DFORMAT_OP_CONVERT_TO_ARGB | - SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_A8R8G8B8", SVGA3D_A8R8G8B8, SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_CONVERT_TO_ARGB | - SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | - SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_R5G6B5", SVGA3D_R5G6B5, SVGA3D_DEVCAP_SURFACEFMT_R5G6B5, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | SVGA3DFORMAT_OP_DISPLAYMODE | - SVGA3DFORMAT_OP_3DACCELERATION | - SVGA3DFORMAT_OP_CONVERT_TO_ARGB | - SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_X1R5G5B5", SVGA3D_X1R5G5B5, SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_CONVERT_TO_ARGB | - SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_A1R5G5B5", SVGA3D_A1R5G5B5, SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_CONVERT_TO_ARGB | - SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | - SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_A4R4G4B4", SVGA3D_A4R4G4B4, SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | - SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, - /* - * SVGA3D_Z_D32 is not yet supported, and has no corresponding - * SVGA3D_DEVCAP_xxx. - */ { + /* + * SVGA3D_Z_D32 is not yet supported, and has no corresponding + * SVGA3D_DEVCAP_xxx. + */ + "SVGA3D_Z_D32", + SVGA3D_Z_D32, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_Z_D16", SVGA3D_Z_D16, SVGA3D_DEVCAP_SURFACEFMT_Z_D16, - SVGA3DFORMAT_OP_ZSTENCIL | - SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH + 1, 1, 2, + SVGA3DFORMAT_OP_ZSTENCIL }, { + "SVGA3D_Z_D24S8", SVGA3D_Z_D24S8, SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8, - SVGA3DFORMAT_OP_ZSTENCIL | - SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH + 1, 1, 4, + SVGA3DFORMAT_OP_ZSTENCIL }, { + "SVGA3D_Z_D15S1", SVGA3D_Z_D15S1, SVGA3D_DEVCAP_MAX, - SVGA3DFORMAT_OP_ZSTENCIL | - SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH + 1, 1, 2, + SVGA3DFORMAT_OP_ZSTENCIL }, { + "SVGA3D_LUMINANCE8", SVGA3D_LUMINANCE8, SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8, + 1, 1, 1, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_VOLUMETEXTURE }, { - SVGA3D_LUMINANCE8_ALPHA8, - SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8, - SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + /* + * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding + * SVGA3D_DEVCAP_xxx. + */ + "SVGA3D_LUMINANCE4_ALPHA4", + SVGA3D_LUMINANCE4_ALPHA4, 0, 0, 0, 0, 0 }, - /* - * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding - * SVGA3D_DEVCAP_xxx. - */ { + "SVGA3D_LUMINANCE16", SVGA3D_LUMINANCE16, SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_VOLUMETEXTURE }, { + "SVGA3D_LUMINANCE8_ALPHA8", + SVGA3D_LUMINANCE8_ALPHA8, + SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE + }, + { + "SVGA3D_DXT1", SVGA3D_DXT1, SVGA3D_DEVCAP_SURFACEFMT_DXT1, + 4, 4, 8, SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_CUBETEXTURE }, { + "SVGA3D_DXT2", SVGA3D_DXT2, SVGA3D_DEVCAP_SURFACEFMT_DXT2, + 4, 4, 8, SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_CUBETEXTURE }, { + "SVGA3D_DXT3", SVGA3D_DXT3, SVGA3D_DEVCAP_SURFACEFMT_DXT3, + 4, 4, 16, SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_CUBETEXTURE }, { + "SVGA3D_DXT4", SVGA3D_DXT4, SVGA3D_DEVCAP_SURFACEFMT_DXT4, + 4, 4, 16, SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_CUBETEXTURE }, { + "SVGA3D_DXT5", SVGA3D_DXT5, SVGA3D_DEVCAP_SURFACEFMT_DXT5, + 4, 4, 8, SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_CUBETEXTURE }, { + "SVGA3D_BUMPU8V8", SVGA3D_BUMPU8V8, SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_BUMPMAP | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_VOLUMETEXTURE + }, + { + /* + * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding + * SVGA3D_DEVCAP_xxx. + */ + "SVGA3D_BUMPL6V5U5", + SVGA3D_BUMPL6V5U5, 0, 0, 0, 0, 0 }, - /* - * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding - * SVGA3D_DEVCAP_xxx. - */ { + "SVGA3D_BUMPX8L8V8U8", SVGA3D_BUMPX8L8V8U8, SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_BUMPMAP | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_CUBETEXTURE }, - /* - * SVGA3D_BUMPL8V8U8 is unsupported; it has no corresponding - * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead. - */ { + "SVGA3D_FORMAT_DEAD1", + SVGA3D_FORMAT_DEAD1, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_ARGB_S10E5", SVGA3D_ARGB_S10E5, SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_ARGB_S23E8", SVGA3D_ARGB_S23E8, SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_A2R10G10B10", SVGA3D_A2R10G10B10, SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_CONVERT_TO_ARGB | - SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, - /* - * SVGA3D_V8U8 is unsupported; it has no corresponding - * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead. - */ { + /* + * SVGA3D_V8U8 is unsupported; it has no corresponding + * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead. + */ + "SVGA3D_V8U8", + SVGA3D_V8U8, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_Q8W8V8U8", SVGA3D_Q8W8V8U8, SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_BUMPMAP | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_CUBETEXTURE }, { + "SVGA3D_CxV8U8", SVGA3D_CxV8U8, SVGA3D_DEVCAP_SURFACEFMT_CxV8U8, - SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_BUMPMAP | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE + }, + { + /* + * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding + * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead. + */ + "SVGA3D_X8L8V8U8", + SVGA3D_X8L8V8U8, 0, 0, 0, 0, 0 }, - /* - * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding - * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead. - */ { + "SVGA3D_A2W10V10U10", SVGA3D_A2W10V10U10, SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10, - SVGA3DFORMAT_OP_TEXTURE | - SVGA3DFORMAT_OP_BUMPMAP | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE }, { + "SVGA3D_ALPHA8", SVGA3D_ALPHA8, SVGA3D_DEVCAP_SURFACEFMT_ALPHA8, + 1, 1, 1, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_VOLUMETEXTURE }, { + "SVGA3D_R_S10E5", SVGA3D_R_S10E5, SVGA3D_DEVCAP_SURFACEFMT_R_S10E5, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_R_S23E8", SVGA3D_R_S23E8, SVGA3D_DEVCAP_SURFACEFMT_R_S23E8, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_RG_S10E5", SVGA3D_RG_S10E5, SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5, + 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_RG_S23E8", SVGA3D_RG_S23E8, SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SRGBREAD | - SVGA3DFORMAT_OP_SRGBWRITE | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, - /* - * SVGA3D_BUFFER is a placeholder format for index/vertex buffers. - */ { + /* + * SVGA3D_BUFFER is a placeholder format for index/vertex buffers. + */ + "SVGA3D_BUFFER", + SVGA3D_BUFFER, 0, 1, 1, 1, 0 + }, + { + "SVGA3D_Z_D24X8", SVGA3D_Z_D24X8, SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8, - SVGA3DFORMAT_OP_ZSTENCIL | - SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH + 1, 1, 4, + SVGA3DFORMAT_OP_ZSTENCIL }, { + "SVGA3D_V16U16", SVGA3D_V16U16, SVGA3D_DEVCAP_SURFACEFMT_V16U16, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | - SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_BUMPMAP | - SVGA3DFORMAT_OP_OFFSCREENPLAIN + SVGA3DFORMAT_OP_VOLUMETEXTURE }, { + "SVGA3D_G16R16", SVGA3D_G16R16, SVGA3D_DEVCAP_SURFACEFMT_G16R16, + 1, 1, 4, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_A16B16G16R16", SVGA3D_A16B16G16R16, SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16, + 1, 1, 8, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | - SVGA3DFORMAT_OP_OFFSCREENPLAIN | - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET }, { + "SVGA3D_UYVY", SVGA3D_UYVY, SVGA3D_DEVCAP_SURFACEFMT_UYVY, + 0, 0, 0, 0 }, { + "SVGA3D_YUY2", SVGA3D_YUY2, SVGA3D_DEVCAP_SURFACEFMT_YUY2, + 0, 0, 0, 0 }, { + "SVGA3D_NV12", SVGA3D_NV12, SVGA3D_DEVCAP_SURFACEFMT_NV12, + 0, 0, 0, 0 }, { + "SVGA3D_AYUV", SVGA3D_AYUV, SVGA3D_DEVCAP_SURFACEFMT_AYUV, + 0, 0, 0, 0 }, { + "SVGA3D_R32G32B32A32_TYPELESS", + SVGA3D_R32G32B32A32_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS, + 1, 1, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32B32A32_UINT", + SVGA3D_R32G32B32A32_UINT, + SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT, + 1, 1, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32B32A32_SINT", + SVGA3D_R32G32B32A32_SINT, + SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT, + 1, 1, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32B32_TYPELESS", + SVGA3D_R32G32B32_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS, + 1, 1, 12, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32B32_FLOAT", + SVGA3D_R32G32B32_FLOAT, + SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT, + 1, 1, 12, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32B32_UINT", + SVGA3D_R32G32B32_UINT, + SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT, + 1, 1, 12, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32B32_SINT", + SVGA3D_R32G32B32_SINT, + SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT, + 1, 1, 12, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16B16A16_TYPELESS", + SVGA3D_R16G16B16A16_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16B16A16_UINT", + SVGA3D_R16G16B16A16_UINT, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16B16A16_SNORM", + SVGA3D_R16G16B16A16_SNORM, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16B16A16_SINT", + SVGA3D_R16G16B16A16_SINT, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32_TYPELESS", + SVGA3D_R32G32_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32_UINT", + SVGA3D_R32G32_UINT, + SVGA3D_DEVCAP_DXFMT_R32G32_UINT, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32_SINT", + SVGA3D_R32G32_SINT, + SVGA3D_DEVCAP_DXFMT_R32G32_SINT, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G8X24_TYPELESS", + SVGA3D_R32G8X24_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + /* Special case: no devcap / report sampler and depth/stencil ability + */ + "SVGA3D_D32_FLOAT_S8X24_UINT", + SVGA3D_D32_FLOAT_S8X24_UINT, + 0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT*/ + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + /* Special case: no devcap / report sampler and depth/stencil ability + */ + "SVGA3D_R32_FLOAT_X8X24_TYPELESS", + SVGA3D_R32_FLOAT_X8X24_TYPELESS, + 0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS*/ + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + "SVGA3D_X32_TYPELESS_G8X24_UINT", + SVGA3D_X32_TYPELESS_G8X24_UINT, + SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R10G10B10A2_TYPELESS", + SVGA3D_R10G10B10A2_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R10G10B10A2_UINT", + SVGA3D_R10G10B10A2_UINT, + SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R11G11B10_FLOAT", + SVGA3D_R11G11B10_FLOAT, + SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8B8A8_TYPELESS", + SVGA3D_R8G8B8A8_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8B8A8_UNORM", + SVGA3D_R8G8B8A8_UNORM, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8B8A8_UNORM_SRGB", + SVGA3D_R8G8B8A8_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8B8A8_UINT", + SVGA3D_R8G8B8A8_UINT, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8B8A8_SINT", + SVGA3D_R8G8B8A8_SINT, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16_TYPELESS", + SVGA3D_R16G16_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16_UINT", + SVGA3D_R16G16_UINT, + SVGA3D_DEVCAP_DXFMT_R16G16_UINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16_SINT", + SVGA3D_R16G16_SINT, + SVGA3D_DEVCAP_DXFMT_R16G16_SINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32_TYPELESS", + SVGA3D_R32_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R32_TYPELESS, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + /* Special case: no devcap / report sampler and depth/stencil ability + */ + "SVGA3D_D32_FLOAT", + SVGA3D_D32_FLOAT, + 0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT*/ + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + "SVGA3D_R32_UINT", + SVGA3D_R32_UINT, + SVGA3D_DEVCAP_DXFMT_R32_UINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32_SINT", + SVGA3D_R32_SINT, + SVGA3D_DEVCAP_DXFMT_R32_SINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R24G8_TYPELESS", + SVGA3D_R24G8_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + /* Special case: no devcap / report sampler and depth/stencil ability + */ + "SVGA3D_D24_UNORM_S8_UINT", + SVGA3D_D24_UNORM_S8_UINT, + 0, /*SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT*/ + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + /* Special case: no devcap / report sampler and depth/stencil ability + */ + "SVGA3D_R24_UNORM_X8_TYPELESS", + SVGA3D_R24_UNORM_X8_TYPELESS, + 0, /*SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS*/ + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + "SVGA3D_X24_TYPELESS_G8_UINT", + SVGA3D_X24_TYPELESS_G8_UINT, + SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + "SVGA3D_R8G8_TYPELESS", + SVGA3D_R8G8_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8_UNORM", + SVGA3D_R8G8_UNORM, + SVGA3D_DEVCAP_DXFMT_R8G8_UNORM, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8_UINT", + SVGA3D_R8G8_UINT, + SVGA3D_DEVCAP_DXFMT_R8G8_UINT, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8_SINT", + SVGA3D_R8G8_SINT, + SVGA3D_DEVCAP_DXFMT_R8G8_SINT, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16_TYPELESS", + SVGA3D_R16_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R16_TYPELESS, + 1, 1, 2, + SVGA3DFORMAT_OP_ZSTENCIL | + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16_UNORM", + SVGA3D_R16_UNORM, + SVGA3D_DEVCAP_DXFMT_R16_UNORM, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16_UINT", + SVGA3D_R16_UINT, + SVGA3D_DEVCAP_DXFMT_R16_UINT, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16_SNORM", + SVGA3D_R16_SNORM, + SVGA3D_DEVCAP_DXFMT_R16_SNORM, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16_SINT", + SVGA3D_R16_SINT, + SVGA3D_DEVCAP_DXFMT_R16_SINT, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8_TYPELESS", + SVGA3D_R8_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R8_TYPELESS, + 1, 1, 1, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8_UNORM", + SVGA3D_R8_UNORM, + SVGA3D_DEVCAP_DXFMT_R8_UNORM, + 1, 1, 1, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8_UINT", + SVGA3D_R8_UINT, + SVGA3D_DEVCAP_DXFMT_R8_UINT, + 1, 1, 1, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8_SNORM", + SVGA3D_R8_SNORM, + SVGA3D_DEVCAP_DXFMT_R8_SNORM, + 1, 1, 1, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8_SINT", + SVGA3D_R8_SINT, + SVGA3D_DEVCAP_DXFMT_R8_SINT, + 1, 1, 1, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_P8", + SVGA3D_P8, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_R9G9B9E5_SHAREDEXP", + SVGA3D_R9G9B9E5_SHAREDEXP, + SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8_B8G8_UNORM", + SVGA3D_R8G8_B8G8_UNORM, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_G8R8_G8B8_UNORM", + SVGA3D_G8R8_G8B8_UNORM, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_BC1_TYPELESS", + SVGA3D_BC1_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC1_UNORM_SRGB", + SVGA3D_BC1_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC2_TYPELESS", + SVGA3D_BC2_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC2_UNORM_SRGB", + SVGA3D_BC2_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC3_TYPELESS", + SVGA3D_BC3_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC3_UNORM_SRGB", + SVGA3D_BC3_UNORM_SRGB, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC4_TYPELESS", + SVGA3D_BC4_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_ATI1", + SVGA3D_ATI1, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_BC4_SNORM", + SVGA3D_BC4_SNORM, + SVGA3D_DEVCAP_DXFMT_BC4_SNORM, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC5_TYPELESS", + SVGA3D_BC5_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_ATI2", + SVGA3D_ATI2, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_BC5_SNORM", + SVGA3D_BC5_SNORM, + SVGA3D_DEVCAP_DXFMT_BC5_SNORM, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_R10G10B10_XR_BIAS_A2_UNORM", + SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_B8G8R8A8_TYPELESS", + SVGA3D_B8G8R8A8_TYPELESS, + SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_B8G8R8A8_UNORM_SRGB", + SVGA3D_B8G8R8A8_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_B8G8R8X8_TYPELESS", + SVGA3D_B8G8R8X8_TYPELESS, + SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_B8G8R8X8_UNORM_SRGB", + SVGA3D_B8G8R8X8_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_Z_DF16", SVGA3D_Z_DF16, SVGA3D_DEVCAP_SURFACEFMT_Z_DF16, + 1, 1, 2, 0 }, { + "SVGA3D_Z_DF24", SVGA3D_Z_DF24, SVGA3D_DEVCAP_SURFACEFMT_Z_DF24, + 1, 1, 4, 0 }, { + "SVGA3D_Z_D24S8_INT", SVGA3D_Z_D24S8_INT, SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT, - 0 + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL }, + { + "SVGA3D_YV12", + SVGA3D_YV12, 0, 0, 0, 0, 0 + }, + { + "SVGA3D_R32G32B32A32_FLOAT", + SVGA3D_R32G32B32A32_FLOAT, + SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT, + 1, 1, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16B16A16_FLOAT", + SVGA3D_R16G16B16A16_FLOAT, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16B16A16_UNORM", + SVGA3D_R16G16B16A16_UNORM, + SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R32G32_FLOAT", + SVGA3D_R32G32_FLOAT, + SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT, + 1, 1, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R10G10B10A2_UNORM", + SVGA3D_R10G10B10A2_UNORM, + SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R8G8B8A8_SNORM", + SVGA3D_R8G8B8A8_SNORM, + SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16_FLOAT", + SVGA3D_R16G16_FLOAT, + SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16_UNORM", + SVGA3D_R16G16_UNORM, + SVGA3D_DEVCAP_DXFMT_R16G16_UNORM, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16G16_SNORM", + SVGA3D_R16G16_SNORM, + SVGA3D_DEVCAP_DXFMT_R16G16_SNORM, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + /* Special case: no devcap / report sampler, render target and + * depth/stencil ability + */ + "SVGA3D_R32_FLOAT", + SVGA3D_R32_FLOAT, + 0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT*/ + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + "SVGA3D_R8G8_SNORM", + SVGA3D_R8G8_SNORM, + SVGA3D_DEVCAP_DXFMT_R8G8_SNORM, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_R16_FLOAT", + SVGA3D_R16_FLOAT, + SVGA3D_DEVCAP_DXFMT_R16_FLOAT, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_D16_UNORM", + SVGA3D_D16_UNORM, + 0, /*SVGA3D_DEVCAP_DXFMT_D16_UNORM*/ + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_ZSTENCIL + }, + { + "SVGA3D_A8_UNORM", + SVGA3D_A8_UNORM, + SVGA3D_DEVCAP_DXFMT_A8_UNORM, + 1, 1, 1, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_BC1_UNORM", + SVGA3D_BC1_UNORM, + SVGA3D_DEVCAP_DXFMT_BC1_UNORM, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC2_UNORM", + SVGA3D_BC2_UNORM, + SVGA3D_DEVCAP_DXFMT_BC2_UNORM, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC3_UNORM", + SVGA3D_BC3_UNORM, + SVGA3D_DEVCAP_DXFMT_BC3_UNORM, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_B5G6R5_UNORM", + SVGA3D_B5G6R5_UNORM, + SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_DISPLAYMODE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_B5G5R5A1_UNORM", + SVGA3D_B5G5R5A1_UNORM, + SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM, + 1, 1, 2, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_DISPLAYMODE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_B8G8R8A8_UNORM", + SVGA3D_B8G8R8A8_UNORM, + SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_B8G8R8X8_UNORM", + SVGA3D_B8G8R8X8_UNORM, + SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM, + 1, 1, 4, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE | + SVGA3DFORMAT_OP_VOLUMETEXTURE | + SVGA3DFORMAT_OP_DISPLAYMODE | + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET + }, + { + "SVGA3D_BC4_UNORM", + SVGA3D_BC4_UNORM, + SVGA3D_DEVCAP_DXFMT_BC4_UNORM, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + }, + { + "SVGA3D_BC5_UNORM", + SVGA3D_BC5_UNORM, + SVGA3D_DEVCAP_DXFMT_BC5_UNORM, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE + } }; +/** + * Debug only: + * 1. check that format_cap_table[i] matches the i-th SVGA3D format. + * 2. check that format_conversion_table[i].pformat == i. + */ +static void +check_format_tables(void) +{ + static boolean first_call = TRUE; + + if (first_call) { + unsigned i; + + STATIC_ASSERT(Elements(format_cap_table) == SVGA3D_FORMAT_MAX); + for (i = 0; i < Elements(format_cap_table); i++) { + assert(format_cap_table[i].format == i); + } + + STATIC_ASSERT(Elements(format_conversion_table) == PIPE_FORMAT_COUNT); + for (i = 0; i < Elements(format_conversion_table); i++) { + assert(format_conversion_table[i].pformat == i); + } + + first_call = FALSE; + } +} + + /* * Get format capabilities from the host. It takes in consideration * deprecated/unsupported formats, and formats which are implicitely assumed to @@ -551,181 +1863,333 @@ svga_get_format_cap(struct svga_screen *ss, SVGA3dSurfaceFormat format, SVGA3dSurfaceFormatCaps *caps) { + struct svga_winsys_screen *sws = ss->sws; + SVGA3dDevCapResult result; const struct format_cap *entry; - for (entry = format_cap_table; entry < format_cap_table + Elements(format_cap_table); ++entry) { - if (entry->format == format) { - struct svga_winsys_screen *sws = ss->sws; - SVGA3dDevCapResult result; +#ifdef DEBUG + check_format_tables(); +#else + (void) check_format_tables; +#endif - if (sws->get_cap(sws, entry->index, &result)) { - /* Explicitly advertised format */ - caps->value = result.u; - } else { - /* Implicitly advertised format -- use default caps */ - caps->value = entry->defaultOperations; - } + assert(format < Elements(format_cap_table)); + entry = &format_cap_table[format]; + assert(entry->format == format); - return; + if (entry->devcap && sws->get_cap(sws, entry->devcap, &result)) { + /* Explicitly advertised format */ + if (entry->devcap > SVGA3D_DEVCAP_DX) { + /* Translate DX/VGPU10 format cap to VGPU9 cap */ + caps->value = 0; + if (result.u & SVGA3D_DXFMT_COLOR_RENDERTARGET) + caps->value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET; + if (!(result.u & SVGA3D_DXFMT_BLENDABLE)) + caps->value |= SVGA3DFORMAT_OP_NOALPHABLEND; + if (result.u & SVGA3D_DXFMT_DEPTH_RENDERTARGET) + caps->value |= SVGA3DFORMAT_OP_ZSTENCIL; + if (result.u & SVGA3D_DXFMT_SHADER_SAMPLE) + caps->value |= (SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE); + if (result.u & SVGA3D_DXFMT_VOLUME) + caps->value |= SVGA3DFORMAT_OP_VOLUMETEXTURE; + } + else { + /* Return VGPU9 format cap as-is */ + caps->value = result.u; } - } - /* Unsupported format */ - caps->value = 0; + } else { + /* Implicitly advertised format -- use default caps */ + caps->value = entry->defaultOperations; + } } -/** - * Return block size and bytes per block for the given SVGA3D format. - * block_width and block_height are one for uncompressed formats and - * greater than one for compressed formats. - * Note: we don't handle formats that are unsupported, according to - * the format_cap_table above. - */ void svga_format_size(SVGA3dSurfaceFormat format, unsigned *block_width, unsigned *block_height, unsigned *bytes_per_block) { - *block_width = *block_height = 1; + assert(format < Elements(format_cap_table)); + *block_width = format_cap_table[format].block_width; + *block_height = format_cap_table[format].block_height; + *bytes_per_block = format_cap_table[format].block_bytes; + /* Make sure the the table entry was valid */ + if (*block_width == 0) + debug_printf("Bad table entry for %s\n", svga_format_name(format)); + assert(*block_width); + assert(*block_height); + assert(*bytes_per_block); +} + + +const char * +svga_format_name(SVGA3dSurfaceFormat format) +{ + assert(format < Elements(format_cap_table)); + return format_cap_table[format].name; +} + +/** + * Is the given SVGA3dSurfaceFormat a signed or unsigned integer color format? + */ +boolean +svga_format_is_integer(SVGA3dSurfaceFormat format) +{ switch (format) { - case SVGA3D_X8R8G8B8: - case SVGA3D_A8R8G8B8: - *bytes_per_block = 4; - return; - - case SVGA3D_R5G6B5: - case SVGA3D_X1R5G5B5: - case SVGA3D_A1R5G5B5: - case SVGA3D_A4R4G4B4: - *bytes_per_block = 2; - return; - - case SVGA3D_Z_D32: - *bytes_per_block = 4; - return; - - case SVGA3D_Z_D16: - *bytes_per_block = 2; - return; - - case SVGA3D_Z_D24S8: - *bytes_per_block = 4; - return; - - case SVGA3D_Z_D15S1: - *bytes_per_block = 2; - return; - - case SVGA3D_LUMINANCE8: - case SVGA3D_LUMINANCE4_ALPHA4: - *bytes_per_block = 1; - return; - - case SVGA3D_LUMINANCE16: - case SVGA3D_LUMINANCE8_ALPHA8: - *bytes_per_block = 2; - return; - - case SVGA3D_DXT1: - case SVGA3D_DXT2: - *block_width = *block_height = 4; - *bytes_per_block = 8; - return; - - case SVGA3D_DXT3: - case SVGA3D_DXT4: - case SVGA3D_DXT5: - *block_width = *block_height = 4; - *bytes_per_block = 16; - return; - - case SVGA3D_BUMPU8V8: - case SVGA3D_BUMPL6V5U5: - *bytes_per_block = 2; - return; - - case SVGA3D_BUMPX8L8V8U8: - *bytes_per_block = 4; - return; - - case SVGA3D_ARGB_S10E5: - *bytes_per_block = 8; - return; - - case SVGA3D_ARGB_S23E8: - *bytes_per_block = 16; - return; - - case SVGA3D_A2R10G10B10: - *bytes_per_block = 4; - return; - - case SVGA3D_Q8W8V8U8: - *bytes_per_block = 4; - return; - - case SVGA3D_CxV8U8: - *bytes_per_block = 2; - return; - - case SVGA3D_X8L8V8U8: - case SVGA3D_A2W10V10U10: - *bytes_per_block = 4; - return; - - case SVGA3D_ALPHA8: - *bytes_per_block = 1; - return; - - case SVGA3D_R_S10E5: - *bytes_per_block = 2; - return; - case SVGA3D_R_S23E8: - *bytes_per_block = 4; - return; - case SVGA3D_RG_S10E5: - *bytes_per_block = 4; - return; - case SVGA3D_RG_S23E8: - *bytes_per_block = 8; - return; - - case SVGA3D_BUFFER: - *bytes_per_block = 1; - return; - - case SVGA3D_Z_D24X8: - *bytes_per_block = 4; - return; - - case SVGA3D_V16U16: - *bytes_per_block = 4; - return; - - case SVGA3D_G16R16: - *bytes_per_block = 4; - return; - - case SVGA3D_A16B16G16R16: - *bytes_per_block = 8; - return; - - case SVGA3D_Z_DF16: - *bytes_per_block = 2; - return; - case SVGA3D_Z_DF24: - *bytes_per_block = 4; - return; - case SVGA3D_Z_D24S8_INT: - *bytes_per_block = 4; - return; + case SVGA3D_R32G32B32A32_SINT: + case SVGA3D_R32G32B32_SINT: + case SVGA3D_R32G32_SINT: + case SVGA3D_R32_SINT: + case SVGA3D_R16G16B16A16_SINT: + case SVGA3D_R16G16_SINT: + case SVGA3D_R16_SINT: + case SVGA3D_R8G8B8A8_SINT: + case SVGA3D_R8G8_SINT: + case SVGA3D_R8_SINT: + case SVGA3D_R32G32B32A32_UINT: + case SVGA3D_R32G32B32_UINT: + case SVGA3D_R32G32_UINT: + case SVGA3D_R32_UINT: + case SVGA3D_R16G16B16A16_UINT: + case SVGA3D_R16G16_UINT: + case SVGA3D_R16_UINT: + case SVGA3D_R8G8B8A8_UINT: + case SVGA3D_R8G8_UINT: + case SVGA3D_R8_UINT: + case SVGA3D_R10G10B10A2_UINT: + return TRUE; + default: + return FALSE; + } +} +boolean +svga_format_support_gen_mips(enum pipe_format format) +{ + assert(format < Elements(format_conversion_table)); + return ((format_conversion_table[format].flags & TF_GEN_MIPS) > 0); +} + + +/** + * Given a texture format, return the expected data type returned from + * the texture sampler. For example, UNORM8 formats return floating point + * values while SINT formats returned signed integer values. + * Note: this function could be moved into the gallum u_format.[ch] code + * if it's useful to anyone else. + */ +enum tgsi_return_type +svga_get_texture_datatype(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + enum tgsi_return_type t; + + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ) { + if (util_format_is_depth_or_stencil(format)) { + t = TGSI_RETURN_TYPE_FLOAT; /* XXX revisit this */ + } + else if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { + t = TGSI_RETURN_TYPE_FLOAT; + } + else if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { + t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_UNORM : TGSI_RETURN_TYPE_UINT; + } + else if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_SNORM : TGSI_RETURN_TYPE_SINT; + } + else { + assert(!"Unexpected channel type in svga_get_texture_datatype()"); + t = TGSI_RETURN_TYPE_FLOAT; + } + } + else { + /* compressed format, shared exponent format, etc. */ + switch (format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_ETC1_RGB8: + t = TGSI_RETURN_TYPE_UNORM; + break; + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC1_SNORM: + case PIPE_FORMAT_LATC2_SNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + t = TGSI_RETURN_TYPE_SNORM; + break; + case PIPE_FORMAT_R11G11B10_FLOAT: + case PIPE_FORMAT_R9G9B9E5_FLOAT: + t = TGSI_RETURN_TYPE_FLOAT; + break; + default: + assert(!"Unexpected channel type in svga_get_texture_datatype()"); + t = TGSI_RETURN_TYPE_FLOAT; + } + } + + return t; +} + + +/** + * Given an svga context, return true iff there are currently any integer color + * buffers attached to the framebuffer. + */ +boolean +svga_has_any_integer_cbufs(const struct svga_context *svga) +{ + unsigned i; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { + struct pipe_surface *cbuf = svga->curr.framebuffer.cbufs[i]; + + if (cbuf && util_format_is_pure_integer(cbuf->format)) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Given an SVGA format, return the corresponding typeless format. + * If there is no typeless format, return the format unchanged. + */ +SVGA3dSurfaceFormat +svga_typeless_format(SVGA3dSurfaceFormat format) +{ + switch (format) { + case SVGA3D_R32G32B32A32_UINT: + case SVGA3D_R32G32B32A32_SINT: + case SVGA3D_R32G32B32A32_FLOAT: + return SVGA3D_R32G32B32A32_TYPELESS; + case SVGA3D_R32G32B32_FLOAT: + case SVGA3D_R32G32B32_UINT: + case SVGA3D_R32G32B32_SINT: + return SVGA3D_R32G32B32_TYPELESS; + case SVGA3D_R16G16B16A16_UINT: + case SVGA3D_R16G16B16A16_UNORM: + case SVGA3D_R16G16B16A16_SNORM: + case SVGA3D_R16G16B16A16_SINT: + case SVGA3D_R16G16B16A16_FLOAT: + return SVGA3D_R16G16B16A16_TYPELESS; + case SVGA3D_R32G32_UINT: + case SVGA3D_R32G32_SINT: + case SVGA3D_R32G32_FLOAT: + return SVGA3D_R32G32_TYPELESS; + case SVGA3D_D32_FLOAT_S8X24_UINT: + return SVGA3D_R32G8X24_TYPELESS; + case SVGA3D_X32_TYPELESS_G8X24_UINT: + return SVGA3D_R32_FLOAT_X8X24_TYPELESS; + case SVGA3D_R10G10B10A2_UINT: + case SVGA3D_R10G10B10A2_UNORM: + return SVGA3D_R10G10B10A2_TYPELESS; + case SVGA3D_R8G8B8A8_UNORM: + case SVGA3D_R8G8B8A8_SNORM: + case SVGA3D_R8G8B8A8_UNORM_SRGB: + case SVGA3D_R8G8B8A8_UINT: + case SVGA3D_R8G8B8A8_SINT: + return SVGA3D_R8G8B8A8_TYPELESS; + case SVGA3D_R16G16_UINT: + case SVGA3D_R16G16_SINT: + case SVGA3D_R16G16_UNORM: + case SVGA3D_R16G16_SNORM: + case SVGA3D_R16G16_FLOAT: + return SVGA3D_R16G16_TYPELESS; + case SVGA3D_D32_FLOAT: + case SVGA3D_R32_FLOAT: + case SVGA3D_R32_UINT: + case SVGA3D_R32_SINT: + return SVGA3D_R32_TYPELESS; + case SVGA3D_D24_UNORM_S8_UINT: + return SVGA3D_R24G8_TYPELESS; + case SVGA3D_X24_TYPELESS_G8_UINT: + return SVGA3D_R24_UNORM_X8_TYPELESS; + case SVGA3D_R8G8_UNORM: + case SVGA3D_R8G8_SNORM: + case SVGA3D_R8G8_UINT: + case SVGA3D_R8G8_SINT: + return SVGA3D_R8G8_TYPELESS; + case SVGA3D_D16_UNORM: + case SVGA3D_R16_UNORM: + case SVGA3D_R16_UINT: + case SVGA3D_R16_SNORM: + case SVGA3D_R16_SINT: + case SVGA3D_R16_FLOAT: + return SVGA3D_R16_TYPELESS; + case SVGA3D_R8_UNORM: + case SVGA3D_R8_UINT: + case SVGA3D_R8_SNORM: + case SVGA3D_R8_SINT: + return SVGA3D_R8_TYPELESS; + case SVGA3D_B8G8R8A8_UNORM_SRGB: + case SVGA3D_B8G8R8A8_UNORM: + return SVGA3D_B8G8R8A8_TYPELESS; + case SVGA3D_B8G8R8X8_UNORM_SRGB: + case SVGA3D_B8G8R8X8_UNORM: + return SVGA3D_B8G8R8X8_TYPELESS; + case SVGA3D_BC1_UNORM: + case SVGA3D_BC1_UNORM_SRGB: + return SVGA3D_BC1_TYPELESS; + case SVGA3D_BC2_UNORM: + case SVGA3D_BC2_UNORM_SRGB: + return SVGA3D_BC2_TYPELESS; + case SVGA3D_BC3_UNORM: + case SVGA3D_BC3_UNORM_SRGB: + return SVGA3D_BC3_TYPELESS; + case SVGA3D_BC4_UNORM: + case SVGA3D_BC4_SNORM: + return SVGA3D_BC4_TYPELESS; + case SVGA3D_BC5_UNORM: + case SVGA3D_BC5_SNORM: + return SVGA3D_BC5_TYPELESS; + + /* Special cases (no corresponding _TYPELESS formats) */ + case SVGA3D_A8_UNORM: + case SVGA3D_B5G5R5A1_UNORM: + case SVGA3D_B5G6R5_UNORM: + case SVGA3D_R11G11B10_FLOAT: + case SVGA3D_R9G9B9E5_SHAREDEXP: + return format; + default: + debug_printf("Unexpected format %s in %s\n", + svga_format_name(format), __FUNCTION__); + return format; + } +} + + +/** + * Given a surface format, return the corresponding format to use for + * a texture sampler. In most cases, it's the format unchanged, but there + * are some special cases. + */ +SVGA3dSurfaceFormat +svga_sampler_format(SVGA3dSurfaceFormat format) +{ + switch (format) { + case SVGA3D_D16_UNORM: + return SVGA3D_R16_UNORM; + case SVGA3D_D24_UNORM_S8_UINT: + return SVGA3D_R24_UNORM_X8_TYPELESS; + case SVGA3D_D32_FLOAT: + return SVGA3D_R32_FLOAT; + case SVGA3D_D32_FLOAT_S8X24_UINT: + return SVGA3D_R32_FLOAT_X8X24_TYPELESS; default: - debug_printf("format %u\n", (unsigned) format); - assert(!"unexpected format in svga_format_size()"); - *bytes_per_block = 4; + return format; } } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_format.h b/lib/mesa/src/gallium/drivers/svga/svga_format.h index 94c867acf..630a86a49 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_format.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_format.h @@ -28,6 +28,7 @@ #include "pipe/p_format.h" +#include "svga_context.h" #include "svga_types.h" #include "svga_reg.h" #include "svga3d_reg.h" @@ -36,6 +37,31 @@ struct svga_screen; +/** + * Vertex format flags. These are used to specify that some vertex formats + * need extra processing/conversion in the vertex shader. For example, + * setting the W component to 1, or swapping R/B, or converting packed uint + * types to signed int/snorm. + */ +#define VF_ADJUST_RANGE (1 << 0) +#define VF_W_TO_1 (1 << 1) +#define VF_U_TO_F_CAST (1 << 2) /* convert uint to float */ +#define VF_I_TO_F_CAST (1 << 3) /* convert sint to float */ +#define VF_BGRA (1 << 4) /* swap R/B */ +#define VF_PUINT_TO_SNORM (1 << 5) /* 10_10_10_2 to snorm */ +#define VF_PUINT_TO_USCALED (1 << 6) /* 10_10_10_2 to uscaled */ +#define VF_PUINT_TO_SSCALED (1 << 7) /* 10_10_10_2 to sscaled */ + +/** + * Texture format flags. + */ +#define TF_GEN_MIPS (1 << 8) /* supports hw generate mipmap */ + +void +svga_translate_vertex_format_vgpu10(enum pipe_format format, + SVGA3dSurfaceFormat *svga_format, + unsigned *vf_flags); + enum SVGA3dSurfaceFormat svga_translate_format(struct svga_screen *ss, enum pipe_format format, @@ -52,5 +78,30 @@ svga_format_size(SVGA3dSurfaceFormat format, unsigned *block_height, unsigned *bytes_per_block); +const char * +svga_format_name(SVGA3dSurfaceFormat format); + +boolean +svga_format_is_integer(SVGA3dSurfaceFormat format); + +boolean +svga_format_support_gen_mips(enum pipe_format format); + +enum tgsi_return_type +svga_get_texture_datatype(enum pipe_format format); + + +// XXX: Move this to svga_context? +boolean +svga_has_any_integer_cbufs(const struct svga_context *svga); + + +SVGA3dSurfaceFormat +svga_typeless_format(SVGA3dSurfaceFormat format); + + +SVGA3dSurfaceFormat +svga_sampler_format(SVGA3dSurfaceFormat format); + #endif /* SVGA_FORMAT_H_ */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_link.c b/lib/mesa/src/gallium/drivers/svga/svga_link.c new file mode 100644 index 000000000..f3e524d38 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_link.c @@ -0,0 +1,120 @@ +/*/ + * Copyright 2013 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "svga_context.h" +#include "svga_link.h" + +#include "tgsi/tgsi_strings.h" + + +#define INVALID_INDEX 255 + + +/** + * Examine input and output shaders info to link outputs from the + * output shader to inputs from the input shader. + * Basically, we'll remap input shader's input slots to new numbers + * based on semantic name/index of the outputs from the output shader. + */ +void +svga_link_shaders(const struct tgsi_shader_info *outshader_info, + const struct tgsi_shader_info *inshader_info, + struct shader_linkage *linkage) +{ + unsigned i, free_slot; + + for (i = 0; i < Elements(linkage->input_map); i++) { + linkage->input_map[i] = INVALID_INDEX; + } + + /* Assign input slots for input shader inputs. + * Basically, we want to use the same index for the output shader's outputs + * and the input shader's inputs that should be linked together. + * We'll modify the input shader's inputs to match the output shader. + */ + assert(inshader_info->num_inputs <= + Elements(inshader_info->input_semantic_name)); + + /* free register index that can be used for built-in varyings */ + free_slot = outshader_info->num_outputs + 1; + + for (i = 0; i < inshader_info->num_inputs; i++) { + unsigned sem_name = inshader_info->input_semantic_name[i]; + unsigned sem_index = inshader_info->input_semantic_index[i]; + unsigned j; + /** + * Get the clip distance inputs from the output shader's + * clip distance shadow copy. + */ + if (sem_name == TGSI_SEMANTIC_CLIPDIST) { + linkage->input_map[i] = outshader_info->num_outputs + 1 + sem_index; + /* make sure free_slot includes this extra output */ + free_slot = MAX2(free_slot, linkage->input_map[i] + 1); + } + else { + /* search output shader outputs for same item */ + for (j = 0; j < outshader_info->num_outputs; j++) { + assert(j < Elements(outshader_info->output_semantic_name)); + if (outshader_info->output_semantic_name[j] == sem_name && + outshader_info->output_semantic_index[j] == sem_index) { + linkage->input_map[i] = j; + break; + } + } + } + } + + linkage->num_inputs = inshader_info->num_inputs; + + /* Things like the front-face register are handled here */ + for (i = 0; i < inshader_info->num_inputs; i++) { + if (linkage->input_map[i] == INVALID_INDEX) { + unsigned j = free_slot++; + linkage->input_map[i] = j; + } + } + + /* Debug */ + if (0) { + unsigned reg = 0; + for (i = 0; i < linkage->num_inputs; i++) { + + assert(linkage->input_map[i] != INVALID_INDEX); + + debug_printf("input shader input[%d] slot %u %s %u %s\n", + i, + linkage->input_map[i], + tgsi_semantic_names[inshader_info->input_semantic_name[i]], + inshader_info->input_semantic_index[i], + tgsi_interpolate_names[inshader_info->input_interpolate[i]]); + + /* make sure no repeating register index */ + if (reg & 1 << linkage->input_map[i]) { + assert(0); + } + reg |= 1 << linkage->input_map[i]; + } + } +} diff --git a/lib/mesa/src/gallium/drivers/svga/svga_link.h b/lib/mesa/src/gallium/drivers/svga/svga_link.h new file mode 100644 index 000000000..724c61194 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_link.h @@ -0,0 +1,20 @@ + +#ifndef SVGA_LINK_H +#define SVGA_LINK_H + +#include "pipe/p_defines.h" + +struct svga_context; + +struct shader_linkage +{ + unsigned num_inputs; + ubyte input_map[PIPE_MAX_SHADER_INPUTS]; +}; + +void +svga_link_shaders(const struct tgsi_shader_info *outshader_info, + const struct tgsi_shader_info *inshader_info, + struct shader_linkage *linkage); + +#endif /* SVGA_LINK_H */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c index 2890516c0..0af80cd42 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c @@ -27,14 +27,15 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "svga_context.h" - #include "svga_hw_reg.h" +#include "svga_cmd.h" static inline unsigned -svga_translate_blend_factor(unsigned factor) +svga_translate_blend_factor(const struct svga_context *svga, unsigned factor) { switch (factor) { case PIPE_BLENDFACTOR_ZERO: return SVGA3D_BLENDOP_ZERO; @@ -50,8 +51,21 @@ svga_translate_blend_factor(unsigned factor) case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT; case PIPE_BLENDFACTOR_CONST_COLOR: return SVGA3D_BLENDOP_BLENDFACTOR; case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + if (svga_have_vgpu10(svga)) + return SVGA3D_BLENDOP_BLENDFACTORALPHA; + else + return SVGA3D_BLENDOP_BLENDFACTOR; /* as close as we can get */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if (svga_have_vgpu10(svga)) + return SVGA3D_BLENDOP_INVBLENDFACTORALPHA; + else + return SVGA3D_BLENDOP_INVBLENDFACTOR; /* as close as we can get */ + case PIPE_BLENDFACTOR_SRC1_COLOR: return SVGA3D_BLENDOP_SRC1COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return SVGA3D_BLENDOP_INVSRC1COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: return SVGA3D_BLENDOP_SRC1ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return SVGA3D_BLENDOP_INVSRC1ALPHA; + case 0: return SVGA3D_BLENDOP_ONE; default: assert(0); return SVGA3D_BLENDOP_ZERO; @@ -74,18 +88,64 @@ svga_translate_blend_func(unsigned mode) } +/** + * Define a vgpu10 blend state object for the given + * svga blend state. + */ +static void +define_blend_state_object(struct svga_context *svga, + struct svga_blend_state *bs) +{ + SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS]; + unsigned try; + int i; + + assert(svga_have_vgpu10(svga)); + + bs->id = util_bitmask_add(svga->blend_object_id_bm); + + for (i = 0; i < SVGA3D_DX_MAX_RENDER_TARGETS; i++) { + perRT[i].blendEnable = bs->rt[i].blend_enable; + perRT[i].srcBlend = bs->rt[i].srcblend; + perRT[i].destBlend = bs->rt[i].dstblend; + perRT[i].blendOp = bs->rt[i].blendeq; + perRT[i].srcBlendAlpha = bs->rt[i].srcblend_alpha; + perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha; + perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha; + perRT[i].renderTargetWriteMask = bs->rt[i].writemask; + perRT[i].logicOpEnable = 0; + perRT[i].logicOp = SVGA3D_LOGICOP_COPY; + assert(perRT[i].srcBlend == perRT[0].srcBlend); + } + + /* Loop in case command buffer is full and we need to flush and retry */ + for (try = 0; try < 2; try++) { + enum pipe_error ret; + + ret = SVGA3D_vgpu10_DefineBlendState(svga->swc, + bs->id, + bs->alpha_to_coverage, + bs->independent_blend_enable, + perRT); + if (ret == PIPE_OK) + return; + svga_context_flush(svga, NULL); + } +} + + static void * svga_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *templ) { + struct svga_context *svga = svga_context(pipe); struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state ); unsigned i; - /* Fill in the per-rendertarget blend state. We currently only - * have one rendertarget. + * support independent blend enable and colormask per render target. */ - for (i = 0; i < 1; i++) { + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* No way to set this in SVGA3D, and no way to correctly implement it on * top of D3D9 API. Instead we try to simulate with various blend modes. */ @@ -107,6 +167,9 @@ svga_create_blend_state(struct pipe_context *pipe, break; case PIPE_LOGICOP_COPY: blend->rt[i].blend_enable = FALSE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; break; case PIPE_LOGICOP_COPY_INVERTED: blend->rt[i].blend_enable = TRUE; @@ -169,38 +232,110 @@ svga_create_blend_state(struct pipe_context *pipe, case PIPE_LOGICOP_EQUIV: /* Fill these in with plausible values */ blend->rt[i].blend_enable = FALSE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; break; default: assert(0); break; } + blend->rt[i].srcblend_alpha = blend->rt[i].srcblend; + blend->rt[i].dstblend_alpha = blend->rt[i].dstblend; + blend->rt[i].blendeq_alpha = blend->rt[i].blendeq; + + if (templ->logicop_func == PIPE_LOGICOP_XOR) { + pipe_debug_message(&svga->debug.callback, CONFORMANCE, + "XOR logicop mode has limited support"); + } + else if (templ->logicop_func != PIPE_LOGICOP_COPY) { + pipe_debug_message(&svga->debug.callback, CONFORMANCE, + "general logicops are not supported"); + } } else { - blend->rt[i].blend_enable = templ->rt[0].blend_enable; - - if (templ->rt[0].blend_enable) { - blend->rt[i].srcblend = svga_translate_blend_factor(templ->rt[0].rgb_src_factor); - blend->rt[i].dstblend = svga_translate_blend_factor(templ->rt[0].rgb_dst_factor); - blend->rt[i].blendeq = svga_translate_blend_func(templ->rt[0].rgb_func); - blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_src_factor); - blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_dst_factor); - blend->rt[i].blendeq_alpha = svga_translate_blend_func(templ->rt[0].alpha_func); + /* Note: the vgpu10 device does not yet support independent + * blend terms per render target. Target[0] always specifies the + * blending terms. + */ + if (templ->independent_blend_enable || templ->rt[0].blend_enable) { + /* always use the 0th target's blending terms for now */ + blend->rt[i].srcblend = + svga_translate_blend_factor(svga, templ->rt[0].rgb_src_factor); + blend->rt[i].dstblend = + svga_translate_blend_factor(svga, templ->rt[0].rgb_dst_factor); + blend->rt[i].blendeq = + svga_translate_blend_func(templ->rt[0].rgb_func); + blend->rt[i].srcblend_alpha = + svga_translate_blend_factor(svga, templ->rt[0].alpha_src_factor); + blend->rt[i].dstblend_alpha = + svga_translate_blend_factor(svga, templ->rt[0].alpha_dst_factor); + blend->rt[i].blendeq_alpha = + svga_translate_blend_func(templ->rt[0].alpha_func); if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend || blend->rt[i].dstblend_alpha != blend->rt[i].dstblend || - blend->rt[i].blendeq_alpha != blend->rt[i].blendeq) - { + blend->rt[i].blendeq_alpha != blend->rt[i].blendeq) { blend->rt[i].separate_alpha_blend_enable = TRUE; } } + else { + /* disabled - default blend terms */ + blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; + blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ONE; + blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD; + } + + if (templ->independent_blend_enable) { + blend->rt[i].blend_enable = templ->rt[i].blend_enable; + } + else { + blend->rt[i].blend_enable = templ->rt[0].blend_enable; + } } - blend->rt[i].writemask = templ->rt[0].colormask; + /* Some GL blend modes are not supported by the VGPU9 device (there's + * no equivalent of PIPE_BLENDFACTOR_[INV_]CONST_ALPHA). + * When we set this flag, we copy the constant blend alpha value + * to the R, G, B components. + * This works as long as the src/dst RGB blend factors doesn't use + * PIPE_BLENDFACTOR_CONST_COLOR and PIPE_BLENDFACTOR_CONST_ALPHA + * at the same time. There's no work-around for that. + */ + if (!svga_have_vgpu10(svga)) { + if (templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_CONST_ALPHA || + templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_CONST_ALPHA || + templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA || + templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA) { + blend->blend_color_alpha = TRUE; + } + } + + if (templ->independent_blend_enable) { + blend->rt[i].writemask = templ->rt[i].colormask; + } + else { + blend->rt[i].writemask = templ->rt[0].colormask; + } + } + + blend->independent_blend_enable = templ->independent_blend_enable; + + blend->alpha_to_coverage = templ->alpha_to_coverage; + + if (svga_have_vgpu10(svga)) { + define_blend_state_object(svga, blend); } + svga->hud.num_state_objects++; + return blend; } + static void svga_bind_blend_state(struct pipe_context *pipe, void *blend) { @@ -210,10 +345,32 @@ static void svga_bind_blend_state(struct pipe_context *pipe, svga->dirty |= SVGA_NEW_BLEND; } - -static void svga_delete_blend_state(struct pipe_context *pipe, void *blend) +static void svga_delete_blend_state(struct pipe_context *pipe, + void *blend) { + struct svga_context *svga = svga_context(pipe); + struct svga_blend_state *bs = + (struct svga_blend_state *) blend; + + if (bs->id != SVGA3D_INVALID_ID) { + enum pipe_error ret; + + ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id); + assert(ret == PIPE_OK); + } + + if (bs->id == svga->state.hw_draw.blend_id) + svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID; + + util_bitmask_clear(svga->blend_object_id_bm, bs->id); + bs->id = SVGA3D_INVALID_ID; + } + FREE(blend); + svga->hud.num_state_objects--; } static void svga_set_blend_color( struct pipe_context *pipe, @@ -235,6 +392,3 @@ void svga_init_blend_functions( struct svga_context *svga ) svga->pipe.set_blend_color = svga_set_blend_color; } - - - diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c index dbb9f4b51..2b34f9640 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c @@ -29,6 +29,7 @@ #include "svga_cmd.h" #include "svga_surface.h" +//#include "util/u_blit_sw.h" #include "util/u_format.h" #include "util/u_surface.h" @@ -159,7 +160,8 @@ static void svga_blit(struct pipe_context *pipe, struct svga_context *svga = svga_context(pipe); struct pipe_blit_info info = *blit_info; - if (info.src.resource->nr_samples > 1 && + if (!svga_have_vgpu10(svga) && + info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1 && !util_format_is_depth_or_stencil(info.src.resource->format) && !util_format_is_pure_integer(info.src.resource->format)) { @@ -171,12 +173,8 @@ static void svga_blit(struct pipe_context *pipe, return; /* done */ } - if (info.mask & PIPE_MASK_S) { - debug_printf("svga: cannot blit stencil, skipping\n"); - info.mask &= ~PIPE_MASK_S; - } - - if (!util_blitter_is_blit_supported(svga->blitter, &info)) { + if ((info.mask & PIPE_MASK_S) || + !util_blitter_is_blit_supported(svga->blitter, &info)) { debug_printf("svga: blit unsupported %s -> %s\n", util_format_short_name(info.src.resource->format), util_format_short_name(info.dst.resource->format)); @@ -188,9 +186,9 @@ static void svga_blit(struct pipe_context *pipe, util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb); util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems); util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs); - /*util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);*/ - /*util_blitter_save_so_targets(svga->blitter, svga->num_so_targets, - (struct pipe_stream_output_target**)svga->so_targets);*/ + util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs); + util_blitter_save_so_targets(svga->blitter, svga->num_so_targets, + (struct pipe_stream_output_target**)svga->so_targets); util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast); util_blitter_save_viewport(svga->blitter, &svga->curr.viewport); util_blitter_save_scissor(svga->blitter, &svga->curr.scissor); @@ -199,14 +197,14 @@ static void svga_blit(struct pipe_context *pipe, util_blitter_save_depth_stencil_alpha(svga->blitter, (void*)svga->curr.depth); util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref); - /*util_blitter_save_sample_mask(svga->blitter, svga->sample_mask);*/ + util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask); util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer); util_blitter_save_fragment_sampler_states(svga->blitter, - svga->curr.num_samplers, - (void**)svga->curr.sampler); + svga->curr.num_samplers[PIPE_SHADER_FRAGMENT], + (void**)svga->curr.sampler[PIPE_SHADER_FRAGMENT]); util_blitter_save_fragment_sampler_views(svga->blitter, - svga->curr.num_sampler_views, - svga->curr.sampler_views); + svga->curr.num_sampler_views[PIPE_SHADER_FRAGMENT], + svga->curr.sampler_views[PIPE_SHADER_FRAGMENT]); /*util_blitter_save_render_condition(svga->blitter, svga->render_cond_query, svga->render_cond_cond, svga->render_cond_mode);*/ util_blitter_blit(svga->blitter, &info); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c index c4edced9b..c874726b6 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c @@ -34,6 +34,78 @@ #include "svga_surface.h" +/** + * Clear the whole color buffer(s) by drawing a quad. For VGPU10 we use + * this when clearing integer render targets. We'll also clear the + * depth and/or stencil buffers if the clear_buffers mask specifies them. + */ +static void +clear_buffers_with_quad(struct svga_context *svga, + unsigned clear_buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + const struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; + + util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb); + util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems); + util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs); + util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs); + util_blitter_save_so_targets(svga->blitter, svga->num_so_targets, + (struct pipe_stream_output_target**)svga->so_targets); + util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast); + util_blitter_save_viewport(svga->blitter, &svga->curr.viewport); + util_blitter_save_scissor(svga->blitter, &svga->curr.scissor); + util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs); + util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend); + util_blitter_save_depth_stencil_alpha(svga->blitter, + (void*)svga->curr.depth); + util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref); + util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask); + + util_blitter_clear(svga->blitter, + fb->width, fb->height, + 1, /* num_layers */ + clear_buffers, color, + depth, stencil); +} + + +/** + * Check if any of the color buffers are integer buffers. + */ +static boolean +is_integer_target(struct pipe_framebuffer_state *fb, unsigned buffers) +{ + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) { + if ((buffers & (PIPE_CLEAR_COLOR0 << i)) && + fb->cbufs[i] && + util_format_is_pure_integer(fb->cbufs[i]->format)) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Check if the integer values in the clear color can be represented + * by floats. If so, we can use the VGPU10 ClearRenderTargetView command. + * Otherwise, we need to clear with a quad. + */ +static boolean +ints_fit_in_floats(const union pipe_color_union *color) +{ + const int max = 1 << 24; + return (color->i[0] <= max && + color->i[1] <= max && + color->i[2] <= max && + color->i[3] <= max); +} + + static enum pipe_error try_clear(struct svga_context *svga, unsigned buffers, @@ -52,7 +124,7 @@ try_clear(struct svga_context *svga, if (ret != PIPE_OK) return ret; - if (svga->rebind.rendertargets) { + if (svga->rebind.flags.rendertargets) { ret = svga_reemit_framebuffer_bindings(svga); if (ret != PIPE_OK) { return ret; @@ -71,29 +143,72 @@ try_clear(struct svga_context *svga, if (buffers & PIPE_CLEAR_DEPTH) flags |= SVGA3D_CLEAR_DEPTH; - if ((svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) && - (buffers & PIPE_CLEAR_STENCIL)) + if (buffers & PIPE_CLEAR_STENCIL) flags |= SVGA3D_CLEAR_STENCIL; rect.w = MAX2(rect.w, fb->zsbuf->width); rect.h = MAX2(rect.h, fb->zsbuf->height); } - if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) { + if (!svga_have_vgpu10(svga) && + !svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) { restore_viewport = TRUE; ret = SVGA3D_SetViewport(svga->swc, &rect); if (ret != PIPE_OK) return ret; } - ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil, - rect.x, rect.y, rect.w, rect.h); - if (ret != PIPE_OK) - return ret; + if (svga_have_vgpu10(svga)) { + if (flags & SVGA3D_CLEAR_COLOR) { + unsigned i; + + if (is_integer_target(fb, buffers) && !ints_fit_in_floats(color)) { + clear_buffers_with_quad(svga, buffers, color, depth, stencil); + /* We also cleared depth/stencil, so that's done */ + flags &= ~(SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL); + } + else { + struct pipe_surface *rtv; + + /* Issue VGPU10 Clear commands */ + for (i = 0; i < fb->nr_cbufs; i++) { + if ((fb->cbufs[i] == NULL) || + !(buffers & (PIPE_CLEAR_COLOR0 << i))) + continue; + + rtv = svga_validate_surface_view(svga, + svga_surface(fb->cbufs[i])); + if (!rtv) + return PIPE_ERROR_OUT_OF_MEMORY; + + ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, + rtv, color->f); + if (ret != PIPE_OK) + return ret; + } + } + } + if (flags & (SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL)) { + struct pipe_surface *dsv = + svga_validate_surface_view(svga, svga_surface(fb->zsbuf)); + if (!dsv) + return PIPE_ERROR_OUT_OF_MEMORY; + + ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, flags, + stencil, (float) depth); + if (ret != PIPE_OK) + return ret; + } + } + else { + ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil, + rect.x, rect.y, rect.w, rect.h); + if (ret != PIPE_OK) + return ret; + } if (restore_viewport) { - memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect); - ret = SVGA3D_SetViewport(svga->swc, &rect); + ret = SVGA3D_SetViewport(svga->swc, &svga->state.hw_clear.viewport); } return ret; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c index c32b66d41..8150879ea 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c @@ -48,28 +48,46 @@ static void svga_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_constant_buffer *cb) { + struct svga_screen *svgascreen = svga_screen(pipe->screen); struct svga_context *svga = svga_context(pipe); struct pipe_resource *buf = cb ? cb->buffer : NULL; - - if (cb && cb->user_buffer) { - buf = svga_user_buffer_create(pipe->screen, - (void *) cb->user_buffer, - cb->buffer_size, - PIPE_BIND_CONSTANT_BUFFER); + unsigned buffer_size = 0; + + if (cb) { + buffer_size = cb->buffer_size; + if (cb->user_buffer) { + buf = svga_user_buffer_create(pipe->screen, + (void *) cb->user_buffer, + cb->buffer_size, + PIPE_BIND_CONSTANT_BUFFER); + } } assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); + assert(index < Elements(svga->curr.constbufs[shader])); + assert(index < svgascreen->max_const_buffers); + (void) svgascreen; + + pipe_resource_reference(&svga->curr.constbufs[shader][index].buffer, buf); + + /* Make sure the constant buffer size to be updated is within the + * limit supported by the device. + */ + svga->curr.constbufs[shader][index].buffer_size = + MIN2(buffer_size, SVGA_MAX_CONST_BUF_SIZE); - pipe_resource_reference(&svga->curr.cbufs[shader].buffer, buf); - svga->curr.cbufs[shader].buffer_size = cb ? cb->buffer_size : 0; - svga->curr.cbufs[shader].buffer_offset = cb ? cb->buffer_offset : 0; - svga->curr.cbufs[shader].user_buffer = NULL; /* not used */ + svga->curr.constbufs[shader][index].buffer_offset = cb ? cb->buffer_offset : 0; + svga->curr.constbufs[shader][index].user_buffer = NULL; /* not used */ if (shader == PIPE_SHADER_FRAGMENT) svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; - else + else if (shader == PIPE_SHADER_VERTEX) svga->dirty |= SVGA_NEW_VS_CONST_BUFFER; + else + svga->dirty |= SVGA_NEW_GS_CONST_BUFFER; + + /* update bitmask of dirty const buffers */ + svga->state.dirty_constbufs[shader] |= (1 << index); if (cb && cb->user_buffer) { pipe_resource_reference(&buf, NULL); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_depthstencil.c index 8db21fd74..d84ed1df4 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_depthstencil.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_depthstencil.c @@ -23,13 +23,15 @@ * **********************************************************/ -#include "util/u_inlines.h" #include "pipe/p_defines.h" +#include "util/u_bitmask.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "svga_context.h" #include "svga_hw_reg.h" +#include "svga_cmd.h" static inline unsigned @@ -69,10 +71,67 @@ svga_translate_stencil_op(unsigned op) } +/** + * Define a vgpu10 depth/stencil state object for the given + * svga depth/stencil state. + */ +static void +define_depth_stencil_state_object(struct svga_context *svga, + struct svga_depth_stencil_state *ds) +{ + unsigned try; + + assert(svga_have_vgpu10(svga)); + + ds->id = util_bitmask_add(svga->ds_object_id_bm); + + /* spot check that these comparision tokens are the same */ + assert(SVGA3D_COMPARISON_NEVER == SVGA3D_CMP_NEVER); + assert(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS); + assert(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL); + + /* Loop in case command buffer is full and we need to flush and retry */ + for (try = 0; try < 2; try++) { + enum pipe_error ret; + + /* Note: we use the ds->stencil[0].enabled value for both the front + * and back-face enables. If single-side stencil is used, we'll have + * set the back state the same as the front state. + */ + ret = SVGA3D_vgpu10_DefineDepthStencilState(svga->swc, + ds->id, + /* depth/Z */ + ds->zenable, + ds->zwriteenable, + ds->zfunc, + /* Stencil */ + ds->stencil[0].enabled, /*f|b*/ + ds->stencil[0].enabled, /*f*/ + ds->stencil[0].enabled, /*b*/ + ds->stencil_mask, + ds->stencil_writemask, + /* front stencil */ + ds->stencil[0].fail, + ds->stencil[0].zfail, + ds->stencil[0].pass, + ds->stencil[0].func, + /* back stencil */ + ds->stencil[1].fail, + ds->stencil[1].zfail, + ds->stencil[1].pass, + ds->stencil[1].func); + if (ret == PIPE_OK) + return; + svga_context_flush(svga, NULL); + } +} + + static void * svga_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *templ) { + struct svga_context *svga = svga_context(pipe); struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state ); /* Don't try to figure out CW/CCW correspondence with @@ -92,10 +151,18 @@ svga_create_depth_stencil_state(struct pipe_context *pipe, ds->stencil_mask = templ->stencil[0].valuemask & 0xff; ds->stencil_writemask = templ->stencil[0].writemask & 0xff; } + else { + ds->stencil[0].func = SVGA3D_CMP_ALWAYS; + ds->stencil[0].fail = SVGA3D_STENCILOP_KEEP; + ds->stencil[0].zfail = SVGA3D_STENCILOP_KEEP; + ds->stencil[0].pass = SVGA3D_STENCILOP_KEEP; + } ds->stencil[1].enabled = templ->stencil[1].enabled; if (templ->stencil[1].enabled) { + assert(templ->stencil[0].enabled); + /* two-sided stencil */ ds->stencil[1].func = svga_translate_compare_func(templ->stencil[1].func); ds->stencil[1].fail = svga_translate_stencil_op(templ->stencil[1].fail_op); ds->stencil[1].zfail = svga_translate_stencil_op(templ->stencil[1].zfail_op); @@ -104,6 +171,13 @@ svga_create_depth_stencil_state(struct pipe_context *pipe, ds->stencil_mask = templ->stencil[1].valuemask & 0xff; ds->stencil_writemask = templ->stencil[1].writemask & 0xff; } + else { + /* back face state is same as front-face state */ + ds->stencil[1].func = ds->stencil[0].func; + ds->stencil[1].fail = ds->stencil[0].fail; + ds->stencil[1].zfail = ds->stencil[0].zfail; + ds->stencil[1].pass = ds->stencil[0].pass; + } ds->zenable = templ->depth.enabled; @@ -111,12 +185,24 @@ svga_create_depth_stencil_state(struct pipe_context *pipe, ds->zfunc = svga_translate_compare_func(templ->depth.func); ds->zwriteenable = templ->depth.writemask; } + else { + ds->zfunc = SVGA3D_CMP_ALWAYS; + } ds->alphatestenable = templ->alpha.enabled; if (ds->alphatestenable) { ds->alphafunc = svga_translate_compare_func(templ->alpha.func); ds->alpharef = templ->alpha.ref_value; } + else { + ds->alphafunc = SVGA3D_CMP_ALWAYS; + } + + if (svga_have_vgpu10(svga)) { + define_depth_stencil_state_object(svga, ds); + } + + svga->hud.num_state_objects++; return ds; } @@ -126,14 +212,45 @@ static void svga_bind_depth_stencil_state(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); + if (svga_have_vgpu10(svga)) { + /* flush any previously queued drawing before changing state */ + svga_hwtnl_flush_retry(svga); + } + svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil; - svga->dirty |= SVGA_NEW_DEPTH_STENCIL; + svga->dirty |= SVGA_NEW_DEPTH_STENCIL_ALPHA; } static void svga_delete_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) { + struct svga_context *svga = svga_context(pipe); + struct svga_depth_stencil_state *ds = + (struct svga_depth_stencil_state *) depth_stencil; + + if (svga_have_vgpu10(svga)) { + enum pipe_error ret; + + svga_hwtnl_flush_retry(svga); + + assert(ds->id != SVGA3D_INVALID_ID); + + ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id); + assert(ret == PIPE_OK); + } + + if (ds->id == svga->state.hw_draw.depth_stencil_id) + svga->state.hw_draw.depth_stencil_id = SVGA3D_INVALID_ID; + + util_bitmask_clear(svga->ds_object_id_bm, ds->id); + ds->id = SVGA3D_INVALID_ID; + } + FREE(depth_stencil); + svga->hud.num_state_objects--; } @@ -142,6 +259,11 @@ static void svga_set_stencil_ref( struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); + if (svga_have_vgpu10(svga)) { + /* flush any previously queued drawing before changing state */ + svga_hwtnl_flush_retry(svga); + } + svga->curr.stencil_ref = *stencil_ref; svga->dirty |= SVGA_NEW_STENCIL_REF; @@ -151,6 +273,11 @@ static void svga_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) { + struct svga_context *svga = svga_context(pipe); + + svga->curr.sample_mask = sample_mask; + + svga->dirty |= SVGA_NEW_BLEND; /* See emit_rss_vgpu10() */ } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c index 87f6b3d71..50ebb53df 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c @@ -27,7 +27,9 @@ #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_prim.h" +#include "util/u_prim_restart.h" #include "util/u_time.h" +#include "util/u_upload_mgr.h" #include "indices/u_indices.h" #include "svga_hw_reg.h" @@ -35,12 +37,12 @@ #include "svga_context.h" #include "svga_screen.h" #include "svga_draw.h" +#include "svga_shader.h" #include "svga_state.h" #include "svga_swtnl.h" #include "svga_debug.h" #include "svga_resource_buffer.h" - static enum pipe_error retry_draw_range_elements( struct svga_context *svga, struct pipe_resource *index_buffer, @@ -51,26 +53,31 @@ retry_draw_range_elements( struct svga_context *svga, unsigned prim, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_count, boolean do_retry ) { enum pipe_error ret = PIPE_OK; - svga_hwtnl_set_unfilled( svga->hwtnl, - svga->curr.rast->hw_unfilled ); - - svga_hwtnl_set_flatshade( svga->hwtnl, - svga->curr.rast->templ.flatshade, - svga->curr.rast->templ.flatshade_first ); + svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode); ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret != PIPE_OK) goto retry; + /** determine if flatshade is to be used after svga_update_state() + * in case the fragment shader is changed. + */ + svga_hwtnl_set_flatshade(svga->hwtnl, + svga->curr.rast->templ.flatshade || + svga->state.hw_draw.fs->uses_flat_interp, + svga->curr.rast->templ.flatshade_first); + ret = svga_hwtnl_draw_range_elements( svga->hwtnl, index_buffer, index_size, index_bias, min_index, max_index, - prim, start, count ); + prim, start, count, + start_instance, instance_count); if (ret != PIPE_OK) goto retry; @@ -85,7 +92,7 @@ retry: index_buffer, index_size, index_bias, min_index, max_index, prim, start, count, - instance_count, FALSE ); + start_instance, instance_count, FALSE ); } return ret; @@ -94,27 +101,28 @@ retry: static enum pipe_error retry_draw_arrays( struct svga_context *svga, - unsigned prim, - unsigned start, - unsigned count, - unsigned instance_count, + unsigned prim, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_count, boolean do_retry ) { enum pipe_error ret; - svga_hwtnl_set_unfilled( svga->hwtnl, - svga->curr.rast->hw_unfilled ); - - svga_hwtnl_set_flatshade( svga->hwtnl, - svga->curr.rast->templ.flatshade, - svga->curr.rast->templ.flatshade_first ); + svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode); ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret != PIPE_OK) goto retry; - ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim, - start, count ); + /** determine if flatshade is to be used after svga_update_state() + * in case the fragment shader is changed. + */ + svga_hwtnl_set_flatshade(svga->hwtnl, + svga->curr.rast->templ.flatshade || + svga->state.hw_draw.fs->uses_flat_interp, + svga->curr.rast->templ.flatshade_first); + + ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count, + start_instance, instance_count); if (ret != PIPE_OK) goto retry; @@ -125,18 +133,41 @@ retry: { svga_context_flush( svga, NULL ); - return retry_draw_arrays( svga, - prim, - start, - count, - instance_count, - FALSE ); + return retry_draw_arrays(svga, prim, start, count, + start_instance, instance_count, + FALSE ); } return ret; } +/** + * Determine if we need to implement primitive restart with a fallback + * path which breaks the original primitive into sub-primitive at the + * restart indexes. + */ +static boolean +need_fallback_prim_restart(const struct svga_context *svga, + const struct pipe_draw_info *info) +{ + if (info->primitive_restart && info->indexed) { + if (!svga_have_vgpu10(svga)) + return TRUE; + else if (!svga->state.sw.need_swtnl) { + if (svga->curr.ib.index_size == 1) + return TRUE; /* no device support for 1-byte indexes */ + else if (svga->curr.ib.index_size == 2) + return info->restart_index != 0xffff; + else + return info->restart_index != 0xffffffff; + } + } + + return FALSE; +} + + static void svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { @@ -146,9 +177,10 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) enum pipe_error ret = 0; boolean needed_swtnl; - svga->num_draw_calls++; /* for SVGA_QUERY_DRAW_CALLS */ + svga->hud.num_draw_calls++; /* for SVGA_QUERY_NUM_DRAW_CALLS */ - if (!u_trim_pipe_prim( info->mode, &count )) + if (u_reduced_prim(info->mode) == PIPE_PRIM_TRIANGLES && + svga->curr.rast->templ.cull_face == PIPE_FACE_FRONT_AND_BACK) return; /* @@ -165,6 +197,17 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE; } + if (need_fallback_prim_restart(svga, info)) { + enum pipe_error r; + r = util_draw_vbo_without_prim_restart(pipe, &svga->curr.ib, info); + assert(r == PIPE_OK); + (void) r; + return; + } + + if (!u_trim_pipe_prim( info->mode, &count )) + return; + needed_swtnl = svga->state.sw.need_swtnl; svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL ); @@ -176,7 +219,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) #endif if (svga->state.sw.need_swtnl) { - svga->num_fallbacks++; /* for SVGA_QUERY_FALLBACKS */ + svga->hud.num_fallbacks++; /* for SVGA_QUERY_NUM_FALLBACKS */ if (!needed_swtnl) { /* * We're switching from HW to SW TNL. SW TNL will require mapping all @@ -208,17 +251,15 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->max_index, info->mode, info->start + offset, - info->count, + count, + info->start_instance, info->instance_count, TRUE ); } else { - ret = retry_draw_arrays( svga, - info->mode, - info->start, - info->count, - info->instance_count, - TRUE ); + ret = retry_draw_arrays(svga, info->mode, info->start, count, + info->start_instance, info->instance_count, + TRUE); } } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c index d593c7816..8e0af12d2 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c @@ -24,6 +24,7 @@ **********************************************************/ #include "pipe/p_defines.h" +#include "util/u_debug_image.h" #include "util/u_string.h" #include "svga_screen.h" #include "svga_surface.h" diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_fs.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_fs.c index 75299c50d..4a9b3c96a 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_fs.c @@ -31,7 +31,6 @@ #include "draw/draw_context.h" #include "svga_context.h" -#include "svga_tgsi.h" #include "svga_hw_reg.h" #include "svga_cmd.h" #include "svga_debug.h" @@ -63,12 +62,6 @@ svga_create_fs_state(struct pipe_context *pipe, fs->draw_shader = draw_create_fragment_shader(svga->swtnl.draw, templ); - if (SVGA_DEBUG & DEBUG_TGSI || 0) { - debug_printf("%s id: %u, inputs: %u, outputs: %u\n", - __FUNCTION__, fs->base.id, - fs->base.info.num_inputs, fs->base.info.num_outputs); - } - return fs; } @@ -94,20 +87,30 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader) svga_hwtnl_flush_retry(svga); + assert(fs->base.parent == NULL); + draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader); for (variant = fs->base.variants; variant; variant = tmp) { tmp = variant->next; - ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant); - (void) ret; /* PIPE_ERROR_ not handled yet */ - - /* - * Remove stale references to this variant to ensure a new variant on the - * same address will be detected as a change. - */ - if (variant == svga->state.hw_draw.fs) + /* Check if deleting currently bound shader */ + if (variant == svga->state.hw_draw.fs) { + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL); + assert(ret == PIPE_OK); + } svga->state.hw_draw.fs = NULL; + } + + ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant); + assert(ret == PIPE_OK); + } } FREE((void *)fs->base.tokens); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_gs.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_gs.c new file mode 100644 index 000000000..d614e9d6c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_gs.c @@ -0,0 +1,142 @@ +/********************************************************** + * Copyright 2014 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_bitmask.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_text.h" + +#include "svga_context.h" +#include "svga_cmd.h" +#include "svga_debug.h" +#include "svga_shader.h" +#include "svga_streamout.h" + +static void * +svga_create_gs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_geometry_shader *gs = CALLOC_STRUCT(svga_geometry_shader); + + if (!gs) + return NULL; + + gs->base.tokens = tgsi_dup_tokens(templ->tokens); + + /* Collect basic info that we'll need later: + */ + tgsi_scan_shader(gs->base.tokens, &gs->base.info); + + gs->draw_shader = draw_create_geometry_shader(svga->swtnl.draw, templ); + + gs->base.id = svga->debug.shader_id++; + + gs->generic_outputs = svga_get_generic_outputs_mask(&gs->base.info); + + /* check for any stream output declarations */ + if (templ->stream_output.num_outputs) { + gs->base.stream_output = svga_create_stream_output(svga, &gs->base, + &templ->stream_output); + } + + return gs; +} + + +static void +svga_bind_gs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader; + struct svga_context *svga = svga_context(pipe); + + svga->curr.user_gs = gs; + svga->dirty |= SVGA_NEW_GS; +} + + +static void +svga_delete_gs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader; + struct svga_geometry_shader *next_gs; + struct svga_shader_variant *variant, *tmp; + enum pipe_error ret; + + svga_hwtnl_flush_retry(svga); + + /* Start deletion from the original geometry shader state */ + if (gs->base.parent != NULL) + gs = (struct svga_geometry_shader *)gs->base.parent; + + /* Free the list of geometry shaders */ + while (gs) { + next_gs = (struct svga_geometry_shader *)gs->base.next; + + if (gs->base.stream_output != NULL) + svga_delete_stream_output(svga, gs->base.stream_output); + + draw_delete_geometry_shader(svga->swtnl.draw, gs->draw_shader); + + for (variant = gs->base.variants; variant; variant = tmp) { + tmp = variant->next; + + /* Check if deleting currently bound shader */ + if (variant == svga->state.hw_draw.gs) { + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL); + assert(ret == PIPE_OK); + } + svga->state.hw_draw.gs = NULL; + } + + ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, + variant); + assert(ret == PIPE_OK); + } + } + + FREE((void *)gs->base.tokens); + FREE(gs); + gs = next_gs; + } +} + + +void +svga_init_gs_functions(struct svga_context *svga) +{ + svga->pipe.create_gs_state = svga_create_gs_state; + svga->pipe.bind_gs_state = svga_bind_gs_state; + svga->pipe.delete_gs_state = svga_delete_gs_state; +} diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c index 1df32a13b..af9356d7c 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c @@ -27,6 +27,7 @@ #include "util/u_framebuffer.h" #include "util/u_inlines.h" +#include "util/u_pstipple.h" #include "svga_context.h" #include "svga_screen.h" @@ -46,10 +47,37 @@ static void svga_set_scissor_states( struct pipe_context *pipe, } -static void svga_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) +static void +svga_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) { - /* overridden by the draw module */ + struct svga_context *svga = svga_context(pipe); + + /* release old texture */ + pipe_resource_reference(&svga->polygon_stipple.texture, NULL); + + /* release old sampler view */ + if (svga->polygon_stipple.sampler_view) { + pipe->sampler_view_destroy(pipe, + &svga->polygon_stipple.sampler_view->base); + } + + /* create new stipple texture */ + svga->polygon_stipple.texture = + util_pstipple_create_stipple_texture(pipe, stipple->stipple); + + /* create new sampler view */ + svga->polygon_stipple.sampler_view = + (struct svga_pipe_sampler_view *) + util_pstipple_create_sampler_view(pipe, + svga->polygon_stipple.texture); + + /* allocate sampler state, if first time */ + if (!svga->polygon_stipple.sampler) { + svga->polygon_stipple.sampler = util_pstipple_create_sampler(pipe); + } + + svga->dirty |= SVGA_NEW_STIPPLE; } @@ -83,6 +111,11 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe, boolean propagate = FALSE; unsigned i; + /* make sure any pending drawing calls are flushed before changing + * the framebuffer state + */ + svga_hwtnl_flush_retry(svga); + dst->width = fb->width; dst->height = fb->height; dst->nr_cbufs = fb->nr_cbufs; @@ -99,9 +132,6 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe, } if (propagate) { - /* make sure that drawing calls comes before propagation calls */ - svga_hwtnl_flush_retry( svga ); - for (i = 0; i < dst->nr_cbufs; i++) { struct pipe_surface *s = i < fb->nr_cbufs ? fb->cbufs[i] : NULL; if (dst->cbufs[i] && dst->cbufs[i] != s) @@ -109,13 +139,30 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe, } } - /* XXX: Actually the virtual hardware may support rendertargets with - * different size, depending on the host API and driver, but since we cannot - * know that make no such assumption here. */ - for(i = 0; i < fb->nr_cbufs; ++i) { - if (fb->zsbuf && fb->cbufs[i]) { - assert(fb->zsbuf->width == fb->cbufs[i]->width); - assert(fb->zsbuf->height == fb->cbufs[i]->height); + /* Check that all surfaces are the same size. + * Actually, the virtual hardware may support rendertargets with + * different size, depending on the host API and driver, + */ + { + int width = 0, height = 0; + if (fb->zsbuf) { + width = fb->zsbuf->width; + height = fb->zsbuf->height; + } + for (i = 0; i < fb->nr_cbufs; ++i) { + if (fb->cbufs[i]) { + if (width && height) { + if (fb->cbufs[i]->width != width || + fb->cbufs[i]->height != height) { + debug_warning("Mixed-size color and depth/stencil surfaces " + "may not work properly"); + } + } + else { + width = fb->cbufs[i]->width; + height = fb->cbufs[i]->height; + } + } } } @@ -197,6 +244,22 @@ static void svga_set_viewport_states( struct pipe_context *pipe, } +/** + * Called by state tracker to specify a callback function the driver + * can use to report info back to the state tracker. + */ +static void +svga_set_debug_callback(struct pipe_context *pipe, + const struct pipe_debug_callback *cb) +{ + struct svga_context *svga = svga_context(pipe); + + if (cb) + svga->debug.callback = *cb; + else + memset(&svga->debug.callback, 0, sizeof(svga->debug.callback)); +} + void svga_init_misc_functions( struct svga_context *svga ) { @@ -205,6 +268,7 @@ void svga_init_misc_functions( struct svga_context *svga ) svga->pipe.set_framebuffer_state = svga_set_framebuffer_state; svga->pipe.set_clip_state = svga_set_clip_state; svga->pipe.set_viewport_states = svga_set_viewport_states; + svga->pipe.set_debug_callback = svga_set_debug_callback; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_query.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_query.c index 208a2cd14..255494a5d 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_query.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_query.c @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. + * Copyright 2008-2015 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -25,6 +25,8 @@ #include "pipe/p_state.h" #include "pipe/p_context.h" + +#include "util/u_bitmask.h" #include "util/u_memory.h" #include "svga_cmd.h" @@ -42,16 +44,26 @@ struct pipe_query { int dummy; }; - struct svga_query { struct pipe_query base; unsigned type; /**< PIPE_QUERY_x or SVGA_QUERY_x */ SVGA3dQueryType svga_type; /**< SVGA3D_QUERYTYPE_x or unused */ + unsigned id; /** Per-context query identifier */ + + struct pipe_fence_handle *fence; + /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */ + + /* For VGPU9 */ struct svga_winsys_buffer *hwbuf; volatile SVGA3dQueryResult *queryResult; - struct pipe_fence_handle *fence; + + /** For VGPU10 */ + struct svga_winsys_gb_query *gb_query; + SVGA3dDXQueryFlags flags; + unsigned offset; /**< offset to the gb_query memory */ + struct pipe_query *predicate; /** The associated query that can be used for predicate */ /** For non-GPU SVGA_QUERY_x queries */ uint64_t begin_count, end_count; @@ -72,54 +84,655 @@ svga_get_query_result(struct pipe_context *pipe, boolean wait, union pipe_query_result *result); +static enum pipe_error +define_query_vgpu9(struct svga_context *svga, + struct svga_query *sq) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + + sq->hwbuf = svga_winsys_buffer_create(svga, 1, + SVGA_BUFFER_USAGE_PINNED, + sizeof *sq->queryResult); + if (!sq->hwbuf) + return PIPE_ERROR_OUT_OF_MEMORY; + + sq->queryResult = (SVGA3dQueryResult *) + sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE); + if (!sq->queryResult) { + sws->buffer_destroy(sws, sq->hwbuf); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + sq->queryResult->totalSize = sizeof *sq->queryResult; + sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; + + /* We request the buffer to be pinned and assume it is always mapped. + * The reason is that we don't want to wait for fences when checking the + * query status. + */ + sws->buffer_unmap(sws, sq->hwbuf); + + return PIPE_OK; +} + +static enum pipe_error +begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + enum pipe_error ret = PIPE_OK; + + if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) { + /* The application doesn't care for the pending query result. + * We cannot let go of the existing buffer and just get a new one + * because its storage may be reused for other purposes and clobbered + * by the host when it determines the query result. So the only + * option here is to wait for the existing query's result -- not a + * big deal, given that no sane application would do this. + */ + uint64_t result; + svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result); + assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING); + } + + sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; + sws->fence_reference(sws, &sq->fence, NULL); + + ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type); + } + return ret; +} + +static enum pipe_error +end_query_vgpu9(struct svga_context *svga, struct svga_query *sq) +{ + enum pipe_error ret = PIPE_OK; + + /* Set to PENDING before sending EndQuery. */ + sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING; + + ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf); + } + return ret; +} + +static boolean +get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, + boolean wait, uint64_t *result) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + enum pipe_error ret; + SVGA3dQueryState state; + + if (!sq->fence) { + /* The query status won't be updated by the host unless + * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause + * a synchronous wait on the host. + */ + ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf); + } + assert (ret == PIPE_OK); + svga_context_flush(svga, &sq->fence); + assert(sq->fence); + } + + state = sq->queryResult->state; + if (state == SVGA3D_QUERYSTATE_PENDING) { + if (!wait) + return FALSE; + sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); + state = sq->queryResult->state; + } + + assert(state == SVGA3D_QUERYSTATE_SUCCEEDED || + state == SVGA3D_QUERYSTATE_FAILED); + + *result = (uint64_t)sq->queryResult->result32; + return TRUE; +} + + +/** + * VGPU10 + * + * There is one query mob allocated for each context to be shared by all + * query types. The mob is used to hold queries's state and result. Since + * each query result type is of different length, to ease the query allocation + * management, the mob is divided into memory blocks. Each memory block + * will hold queries of the same type. Multiple memory blocks can be allocated + * for a particular query type. + * + * Currently each memory block is of 184 bytes. We support up to 128 + * memory blocks. The query memory size is arbitrary right now. + * Each occlusion query takes about 8 bytes. One memory block can accomodate + * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion + * queries. That seems reasonable for now. If we think this limit is + * not enough, we can increase the limit or try to grow the mob in runtime. + * Note, SVGA device does not impose one mob per context for queries, + * we could allocate multiple mobs for queries; however, wddm KMD does not + * currently support that. + * + * Also note that the GL guest driver does not issue any of the + * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery. + */ +#define SVGA_QUERY_MEM_BLOCK_SIZE (sizeof(SVGADXQueryResultUnion) * 2) +#define SVGA_QUERY_MEM_SIZE (128 * SVGA_QUERY_MEM_BLOCK_SIZE) + +struct svga_qmem_alloc_entry +{ + unsigned start_offset; /* start offset of the memory block */ + unsigned block_index; /* block index of the memory block */ + unsigned query_size; /* query size in this memory block */ + unsigned nquery; /* number of queries allocated */ + struct util_bitmask *alloc_mask; /* allocation mask */ + struct svga_qmem_alloc_entry *next; /* next memory block */ +}; + + +/** + * Allocate a memory block from the query object memory + * \return -1 if out of memory, else index of the query memory block + */ +static int +allocate_query_block(struct svga_context *svga) +{ + int index; + unsigned offset; + + /* Find the next available query block */ + index = util_bitmask_add(svga->gb_query_alloc_mask); + + if (index == UTIL_BITMASK_INVALID_INDEX) + return -1; + + offset = index * SVGA_QUERY_MEM_BLOCK_SIZE; + if (offset >= svga->gb_query_len) { + unsigned i; + + /** + * All the memory blocks are allocated, lets see if there is + * any empty memory block around that can be freed up. + */ + index = -1; + for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) { + struct svga_qmem_alloc_entry *alloc_entry; + struct svga_qmem_alloc_entry *prev_alloc_entry = NULL; + + alloc_entry = svga->gb_query_map[i]; + while (alloc_entry && index == -1) { + if (alloc_entry->nquery == 0) { + /* This memory block is empty, it can be recycled. */ + if (prev_alloc_entry) { + prev_alloc_entry->next = alloc_entry->next; + } else { + svga->gb_query_map[i] = alloc_entry->next; + } + index = alloc_entry->block_index; + } else { + prev_alloc_entry = alloc_entry; + alloc_entry = alloc_entry->next; + } + } + } + } + + return index; +} + +/** + * Allocate a slot in the specified memory block. + * All slots in this memory block are of the same size. + * + * \return -1 if out of memory, else index of the query slot + */ +static int +allocate_query_slot(struct svga_context *svga, + struct svga_qmem_alloc_entry *alloc) +{ + int index; + unsigned offset; + + /* Find the next available slot */ + index = util_bitmask_add(alloc->alloc_mask); + + if (index == UTIL_BITMASK_INVALID_INDEX) + return -1; + + offset = index * alloc->query_size; + if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE) + return -1; + + alloc->nquery++; + + return index; +} + +/** + * Deallocate the specified slot in the memory block. + * If all slots are freed up, then deallocate the memory block + * as well, so it can be allocated for other query type + */ +static void +deallocate_query_slot(struct svga_context *svga, + struct svga_qmem_alloc_entry *alloc, + unsigned index) +{ + assert(index != UTIL_BITMASK_INVALID_INDEX); + + util_bitmask_clear(alloc->alloc_mask, index); + alloc->nquery--; + + /** + * Don't worry about deallocating the empty memory block here. + * The empty memory block will be recycled when no more memory block + * can be allocated. + */ +} + +static struct svga_qmem_alloc_entry * +allocate_query_block_entry(struct svga_context *svga, + unsigned len) +{ + struct svga_qmem_alloc_entry *alloc_entry; + int block_index = -1; + + block_index = allocate_query_block(svga); + if (block_index == -1) + return NULL; + alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry); + if (!alloc_entry) + return NULL; + + alloc_entry->block_index = block_index; + alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE; + alloc_entry->nquery = 0; + alloc_entry->alloc_mask = util_bitmask_create(); + alloc_entry->next = NULL; + alloc_entry->query_size = len; + + return alloc_entry; +} + +/** + * Allocate a memory slot for a query of the specified type. + * It will first search through the memory blocks that are allocated + * for the query type. If no memory slot is available, it will try + * to allocate another memory block within the query object memory for + * this query type. + */ +static int +allocate_query(struct svga_context *svga, + SVGA3dQueryType type, + unsigned len) +{ + struct svga_qmem_alloc_entry *alloc_entry; + int slot_index = -1; + unsigned offset; + + assert(type < SVGA_QUERY_MAX); + + alloc_entry = svga->gb_query_map[type]; + + if (!alloc_entry) { + /** + * No query memory block has been allocated for this query type, + * allocate one now + */ + alloc_entry = allocate_query_block_entry(svga, len); + if (!alloc_entry) + return -1; + svga->gb_query_map[type] = alloc_entry; + } + + /* Allocate a slot within the memory block allocated for this query type */ + slot_index = allocate_query_slot(svga, alloc_entry); + + if (slot_index == -1) { + /* This query memory block is full, allocate another one */ + alloc_entry = allocate_query_block_entry(svga, len); + if (!alloc_entry) + return -1; + alloc_entry->next = svga->gb_query_map[type]; + svga->gb_query_map[type] = alloc_entry; + slot_index = allocate_query_slot(svga, alloc_entry); + } + + assert(slot_index != -1); + offset = slot_index * len + alloc_entry->start_offset; + + return offset; +} + + +/** + * Deallocate memory slot allocated for the specified query + */ +static void +deallocate_query(struct svga_context *svga, + struct svga_query *sq) +{ + struct svga_qmem_alloc_entry *alloc_entry; + unsigned slot_index; + unsigned offset = sq->offset; + + alloc_entry = svga->gb_query_map[sq->svga_type]; + + while (alloc_entry) { + if (offset >= alloc_entry->start_offset && + offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) { + + /* The slot belongs to this memory block, deallocate it */ + slot_index = (offset - alloc_entry->start_offset) / + alloc_entry->query_size; + deallocate_query_slot(svga, alloc_entry, slot_index); + alloc_entry = NULL; + } else { + alloc_entry = alloc_entry->next; + } + } +} + + +/** + * Destroy the gb query object and all the related query structures + */ +static void +destroy_gb_query_obj(struct svga_context *svga) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + unsigned i; + + for (i = 0; i < SVGA_QUERY_MAX; i++) { + struct svga_qmem_alloc_entry *alloc_entry, *next; + alloc_entry = svga->gb_query_map[i]; + while (alloc_entry) { + next = alloc_entry->next; + util_bitmask_destroy(alloc_entry->alloc_mask); + FREE(alloc_entry); + alloc_entry = next; + } + svga->gb_query_map[i] = NULL; + } + + if (svga->gb_query) + sws->query_destroy(sws, svga->gb_query); + svga->gb_query = NULL; + + util_bitmask_destroy(svga->gb_query_alloc_mask); +} + +/** + * Define query and create the gb query object if it is not already created. + * There is only one gb query object per context which will be shared by + * queries of all types. + */ +static enum pipe_error +define_query_vgpu10(struct svga_context *svga, + struct svga_query *sq, int resultLen) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + int qlen; + enum pipe_error ret = PIPE_OK; + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + + if (svga->gb_query == NULL) { + /* Create a gb query object */ + svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE); + if (!svga->gb_query) + return PIPE_ERROR_OUT_OF_MEMORY; + svga->gb_query_len = SVGA_QUERY_MEM_SIZE; + memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map)); + svga->gb_query_alloc_mask = util_bitmask_create(); + + /* Bind the query object to the context */ + if (svga->swc->query_bind(svga->swc, svga->gb_query, + SVGA_QUERY_FLAG_SET) != PIPE_OK) { + svga_context_flush(svga, NULL); + svga->swc->query_bind(svga->swc, svga->gb_query, + SVGA_QUERY_FLAG_SET); + } + } + + sq->gb_query = svga->gb_query; + + /* Allocate an integer ID for this query */ + sq->id = util_bitmask_add(svga->query_id_bm); + if (sq->id == UTIL_BITMASK_INVALID_INDEX) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* Find a slot for this query in the gb object */ + qlen = resultLen + sizeof(SVGA3dQueryState); + sq->offset = allocate_query(svga, sq->svga_type, qlen); + if (sq->offset == -1) + return PIPE_ERROR_OUT_OF_MEMORY; + + SVGA_DBG(DEBUG_QUERY, " query type=%d qid=0x%x offset=%d\n", + sq->svga_type, sq->id, sq->offset); + + /** + * Send SVGA3D commands to define the query + */ + ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags); + } + if (ret != PIPE_OK) + return PIPE_ERROR_OUT_OF_MEMORY; + + ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id); + } + assert(ret == PIPE_OK); + + ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset); + } + assert(ret == PIPE_OK); + + return PIPE_OK; +} + +static enum pipe_error +destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq) +{ + enum pipe_error ret; + + ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id); + + /* Deallocate the memory slot allocated for this query */ + deallocate_query(svga, sq); + + return ret; +} + + +/** + * Rebind queryies to the context. + */ +static void +rebind_vgpu10_query(struct svga_context *svga) +{ + if (svga->swc->query_bind(svga->swc, svga->gb_query, + SVGA_QUERY_FLAG_REF) != PIPE_OK) { + svga_context_flush(svga, NULL); + svga->swc->query_bind(svga->swc, svga->gb_query, + SVGA_QUERY_FLAG_REF); + } + + svga->rebind.flags.query = FALSE; +} + + +static enum pipe_error +begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + enum pipe_error ret = PIPE_OK; + int status = 0; + + sws->fence_reference(sws, &sq->fence, NULL); + + /* Initialize the query state to NEW */ + status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW); + if (status) + return PIPE_ERROR; + + if (svga->rebind.flags.query) { + rebind_vgpu10_query(svga); + } + + /* Send the BeginQuery command to the device */ + ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id); + } + return ret; +} + +static enum pipe_error +end_query_vgpu10(struct svga_context *svga, struct svga_query *sq) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + enum pipe_error ret = PIPE_OK; + + if (svga->rebind.flags.query) { + rebind_vgpu10_query(svga); + } + + ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id); + } + + /* Finish fence is copied here from get_query_result_vgpu10. This helps + * with cases where svga_begin_query might be called again before + * svga_get_query_result, such as GL_TIME_ELAPSED. + */ + if (!sq->fence) { + svga_context_flush(svga, &sq->fence); + } + sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); + + return ret; +} + +static boolean +get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq, + boolean wait, void *result, int resultLen) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + SVGA3dQueryState queryState; + + if (svga->rebind.flags.query) { + rebind_vgpu10_query(svga); + } + + sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen); + + if (queryState == SVGA3D_QUERYSTATE_PENDING) { + if (!wait) + return FALSE; + sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); + sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen); + } + + assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED || + queryState == SVGA3D_QUERYSTATE_FAILED); + + return TRUE; +} static struct pipe_query * svga_create_query(struct pipe_context *pipe, unsigned query_type, unsigned index) { - struct svga_context *svga = svga_context( pipe ); - struct svga_screen *svgascreen = svga_screen(pipe->screen); - struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_context *svga = svga_context(pipe); struct svga_query *sq; - SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + assert(query_type < SVGA_QUERY_MAX); sq = CALLOC_STRUCT(svga_query); if (!sq) - goto no_sq; + goto fail; + + /* Allocate an integer ID for the query */ + sq->id = util_bitmask_add(svga->query_id_bm); + if (sq->id == UTIL_BITMASK_INVALID_INDEX) + goto fail; + + SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__, + query_type, sq, sq->id); switch (query_type) { case PIPE_QUERY_OCCLUSION_COUNTER: sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION; + if (svga_have_vgpu10(svga)) { + define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult)); - sq->hwbuf = svga_winsys_buffer_create(svga, 1, - SVGA_BUFFER_USAGE_PINNED, - sizeof *sq->queryResult); - if (!sq->hwbuf) { - debug_printf("svga: failed to alloc query object!\n"); - goto no_hwbuf; - } + /** + * In OpenGL, occlusion counter query can be used in conditional + * rendering; however, in DX10, only OCCLUSION_PREDICATE query can + * be used for predication. Hence, we need to create an occlusion + * predicate query along with the occlusion counter query. So when + * the occlusion counter query is used for predication, the associated + * query of occlusion predicate type will be used + * in the SetPredication command. + */ + sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index); - sq->queryResult = (SVGA3dQueryResult *) - sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE); - if (!sq->queryResult) { - debug_printf("svga: failed to map query object!\n"); - goto no_query_result; + } else { + define_query_vgpu9(svga, sq); } - - sq->queryResult->totalSize = sizeof *sq->queryResult; - sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; - - /* We request the buffer to be pinned and assume it is always mapped. - * The reason is that we don't want to wait for fences when checking the - * query status. - */ - sws->buffer_unmap(sws, sq->hwbuf); break; - case SVGA_QUERY_DRAW_CALLS: - case SVGA_QUERY_FALLBACKS: + case PIPE_QUERY_OCCLUSION_PREDICATE: + assert(svga_have_vgpu10(svga)); + sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE; + define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult)); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + assert(svga_have_vgpu10(svga)); + sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS; + define_query_vgpu10(svga, sq, + sizeof(SVGADXStreamOutStatisticsQueryResult)); + break; + case PIPE_QUERY_TIMESTAMP: + assert(svga_have_vgpu10(svga)); + sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP; + define_query_vgpu10(svga, sq, + sizeof(SVGADXTimestampQueryResult)); + break; + case SVGA_QUERY_NUM_DRAW_CALLS: + case SVGA_QUERY_NUM_FALLBACKS: + case SVGA_QUERY_NUM_FLUSHES: case SVGA_QUERY_MEMORY_USED: + case SVGA_QUERY_NUM_SHADERS: + case SVGA_QUERY_NUM_RESOURCES: + case SVGA_QUERY_NUM_STATE_OBJECTS: + case SVGA_QUERY_NUM_VALIDATIONS: + case SVGA_QUERY_MAP_BUFFER_TIME: + case SVGA_QUERY_NUM_SURFACE_VIEWS: + case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_NUM_GENERATE_MIPMAP: break; default: assert(!"unexpected query type in svga_create_query()"); @@ -129,39 +742,75 @@ svga_create_query(struct pipe_context *pipe, return &sq->base; -no_query_result: - sws->buffer_destroy(sws, sq->hwbuf); -no_hwbuf: +fail: FREE(sq); -no_sq: return NULL; } - static void svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) { - struct svga_screen *svgascreen = svga_screen(pipe->screen); - struct svga_winsys_screen *sws = svgascreen->sws; - struct svga_query *sq = svga_query( q ); + struct svga_context *svga = svga_context(pipe); + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + struct svga_query *sq; - SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + if (!q) { + destroy_gb_query_obj(svga); + return; + } + + sq = svga_query(q); + + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__, + sq, sq->id); switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: - sws->buffer_destroy(sws, sq->hwbuf); - sq->hwbuf = NULL; + if (svga_have_vgpu10(svga)) { + /* make sure to also destroy any associated predicate query */ + if (sq->predicate) + svga_destroy_query(pipe, sq->predicate); + destroy_query_vgpu10(svga, sq); + } else { + sws->buffer_destroy(sws, sq->hwbuf); + } sws->fence_reference(sws, &sq->fence, NULL); break; - case SVGA_QUERY_DRAW_CALLS: - case SVGA_QUERY_FALLBACKS: + case PIPE_QUERY_OCCLUSION_PREDICATE: + assert(svga_have_vgpu10(svga)); + destroy_query_vgpu10(svga, sq); + sws->fence_reference(sws, &sq->fence, NULL); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_TIMESTAMP: + assert(svga_have_vgpu10(svga)); + destroy_query_vgpu10(svga, sq); + sws->fence_reference(sws, &sq->fence, NULL); + break; + case SVGA_QUERY_NUM_DRAW_CALLS: + case SVGA_QUERY_NUM_FALLBACKS: + case SVGA_QUERY_NUM_FLUSHES: case SVGA_QUERY_MEMORY_USED: + case SVGA_QUERY_NUM_SHADERS: + case SVGA_QUERY_NUM_RESOURCES: + case SVGA_QUERY_NUM_STATE_OBJECTS: + case SVGA_QUERY_NUM_VALIDATIONS: + case SVGA_QUERY_MAP_BUFFER_TIME: + case SVGA_QUERY_NUM_SURFACE_VIEWS: + case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_NUM_GENERATE_MIPMAP: /* nothing */ break; default: assert(!"svga: unexpected query type in svga_destroy_query()"); } + /* Free the query id */ + util_bitmask_clear(svga->query_id_bm, sq->id); + FREE(sq); } @@ -169,13 +818,15 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) static boolean svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) { - struct svga_screen *svgascreen = svga_screen(pipe->screen); - struct svga_winsys_screen *sws = svgascreen->sws; - struct svga_context *svga = svga_context( pipe ); - struct svga_query *sq = svga_query( q ); + struct svga_context *svga = svga_context(pipe); + struct svga_query *sq = svga_query(q); enum pipe_error ret; - SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + assert(sq); + assert(sq->type < SVGA_QUERY_MAX); + + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__, + sq, sq->id); /* Need to flush out buffered drawing commands so that they don't * get counted in the query results. @@ -184,44 +835,69 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: - assert(!svga->sq); - if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) { - /* The application doesn't care for the pending query result. - * We cannot let go of the existing buffer and just get a new one - * because its storage may be reused for other purposes and clobbered - * by the host when it determines the query result. So the only - * option here is to wait for the existing query's result -- not a - * big deal, given that no sane application would do this. - */ - uint64_t result; - svga_get_query_result(pipe, q, TRUE, (void*)&result); - assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING); - } - - sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; - sws->fence_reference(sws, &sq->fence, NULL); - - ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type); - assert(ret == PIPE_OK); + if (svga_have_vgpu10(svga)) { + ret = begin_query_vgpu10(svga, sq); + /* also need to start the associated occlusion predicate query */ + if (sq->predicate) { + enum pipe_error status; + status = begin_query_vgpu10(svga, svga_query(sq->predicate)); + assert(status == PIPE_OK); + (void) status; + } + } else { + ret = begin_query_vgpu9(svga, sq); } - - svga->sq = sq; + assert(ret == PIPE_OK); + (void) ret; + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + assert(svga_have_vgpu10(svga)); + ret = begin_query_vgpu10(svga, sq); + assert(ret == PIPE_OK); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_TIMESTAMP: + assert(svga_have_vgpu10(svga)); + ret = begin_query_vgpu10(svga, sq); + assert(ret == PIPE_OK); + break; + case SVGA_QUERY_NUM_DRAW_CALLS: + sq->begin_count = svga->hud.num_draw_calls; + break; + case SVGA_QUERY_NUM_FALLBACKS: + sq->begin_count = svga->hud.num_fallbacks; break; - case SVGA_QUERY_DRAW_CALLS: - sq->begin_count = svga->num_draw_calls; + case SVGA_QUERY_NUM_FLUSHES: + sq->begin_count = svga->hud.num_flushes; break; - case SVGA_QUERY_FALLBACKS: - sq->begin_count = svga->num_fallbacks; + case SVGA_QUERY_NUM_VALIDATIONS: + sq->begin_count = svga->hud.num_validations; + break; + case SVGA_QUERY_MAP_BUFFER_TIME: + sq->begin_count = svga->hud.map_buffer_time; + break; + case SVGA_QUERY_NUM_RESOURCES_MAPPED: + sq->begin_count = svga->hud.num_resources_mapped; + break; + case SVGA_QUERY_NUM_BYTES_UPLOADED: + sq->begin_count = svga->hud.num_bytes_uploaded; break; case SVGA_QUERY_MEMORY_USED: + case SVGA_QUERY_NUM_SHADERS: + case SVGA_QUERY_NUM_RESOURCES: + case SVGA_QUERY_NUM_STATE_OBJECTS: + case SVGA_QUERY_NUM_SURFACE_VIEWS: + case SVGA_QUERY_NUM_GENERATE_MIPMAP: /* nothing */ break; default: assert(!"unexpected query type in svga_begin_query()"); } + + svga->sq[sq->type] = sq; + return true; } @@ -229,48 +905,91 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) static void svga_end_query(struct pipe_context *pipe, struct pipe_query *q) { - struct svga_context *svga = svga_context( pipe ); - struct svga_query *sq = svga_query( q ); + struct svga_context *svga = svga_context(pipe); + struct svga_query *sq = svga_query(q); enum pipe_error ret; - SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + assert(sq); + assert(sq->type < SVGA_QUERY_MAX); + + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__, + sq, sq->id); + + if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq) + svga_begin_query(pipe, q); svga_hwtnl_flush_retry(svga); + assert(svga->sq[sq->type] == sq); + switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: - assert(svga->sq == sq); - - /* Set to PENDING before sending EndQuery. */ - sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING; - - ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf); - assert(ret == PIPE_OK); + if (svga_have_vgpu10(svga)) { + ret = end_query_vgpu10(svga, sq); + /* also need to end the associated occlusion predicate query */ + if (sq->predicate) { + enum pipe_error status; + status = end_query_vgpu10(svga, svga_query(sq->predicate)); + assert(status == PIPE_OK); + (void) status; + } + } else { + ret = end_query_vgpu9(svga, sq); } - + assert(ret == PIPE_OK); + (void) ret; /* TODO: Delay flushing. We don't really need to flush here, just ensure * that there is one flush before svga_get_query_result attempts to get * the result. */ svga_context_flush(svga, NULL); - - svga->sq = NULL; break; - case SVGA_QUERY_DRAW_CALLS: - sq->end_count = svga->num_draw_calls; + case PIPE_QUERY_OCCLUSION_PREDICATE: + assert(svga_have_vgpu10(svga)); + ret = end_query_vgpu10(svga, sq); + assert(ret == PIPE_OK); break; - case SVGA_QUERY_FALLBACKS: - sq->end_count = svga->num_fallbacks; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_TIMESTAMP: + assert(svga_have_vgpu10(svga)); + ret = end_query_vgpu10(svga, sq); + assert(ret == PIPE_OK); + break; + case SVGA_QUERY_NUM_DRAW_CALLS: + sq->end_count = svga->hud.num_draw_calls; + break; + case SVGA_QUERY_NUM_FALLBACKS: + sq->end_count = svga->hud.num_fallbacks; + break; + case SVGA_QUERY_NUM_FLUSHES: + sq->end_count = svga->hud.num_flushes; + break; + case SVGA_QUERY_NUM_VALIDATIONS: + sq->end_count = svga->hud.num_validations; + break; + case SVGA_QUERY_MAP_BUFFER_TIME: + sq->end_count = svga->hud.map_buffer_time; + break; + case SVGA_QUERY_NUM_RESOURCES_MAPPED: + sq->end_count = svga->hud.num_resources_mapped; + break; + case SVGA_QUERY_NUM_BYTES_UPLOADED: + sq->end_count = svga->hud.num_bytes_uploaded; break; case SVGA_QUERY_MEMORY_USED: + case SVGA_QUERY_NUM_SHADERS: + case SVGA_QUERY_NUM_RESOURCES: + case SVGA_QUERY_NUM_STATE_OBJECTS: + case SVGA_QUERY_NUM_SURFACE_VIEWS: + case SVGA_QUERY_NUM_GENERATE_MIPMAP: /* nothing */ break; default: assert(!"unexpected query type in svga_end_query()"); } + svga->sq[sq->type] = NULL; } @@ -280,64 +999,175 @@ svga_get_query_result(struct pipe_context *pipe, boolean wait, union pipe_query_result *vresult) { - struct svga_context *svga = svga_context( pipe ); - struct svga_screen *svgascreen = svga_screen( pipe->screen ); - struct svga_winsys_screen *sws = svgascreen->sws; - struct svga_query *sq = svga_query( q ); - SVGA3dQueryState state; - uint64_t *result = (uint64_t *) vresult; + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_context *svga = svga_context(pipe); + struct svga_query *sq = svga_query(q); + uint64_t *result = (uint64_t *)vresult; + boolean ret = TRUE; - SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__); + assert(sq); + + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n", + __FUNCTION__, sq, sq->id, wait); switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: - /* The query status won't be updated by the host unless - * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause - * a synchronous wait on the host. - */ - if (!sq->fence) { - enum pipe_error ret; - - ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf); - assert(ret == PIPE_OK); - } - - svga_context_flush(svga, &sq->fence); - - assert(sq->fence); + if (svga_have_vgpu10(svga)) { + SVGADXOcclusionQueryResult occResult; + ret = get_query_result_vgpu10(svga, sq, wait, + (void *)&occResult, sizeof(occResult)); + *result = (uint64_t)occResult.samplesRendered; + } else { + ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result); } + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: { + SVGADXOcclusionPredicateQueryResult occResult; + assert(svga_have_vgpu10(svga)); + ret = get_query_result_vgpu10(svga, sq, wait, + (void *)&occResult, sizeof(occResult)); + vresult->b = occResult.anySamplesRendered != 0; + break; + } + case PIPE_QUERY_SO_STATISTICS: { + SVGADXStreamOutStatisticsQueryResult sResult; + struct pipe_query_data_so_statistics *pResult = + (struct pipe_query_data_so_statistics *)vresult; - state = sq->queryResult->state; - if (state == SVGA3D_QUERYSTATE_PENDING) { - if (!wait) - return FALSE; - sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); - state = sq->queryResult->state; - } + assert(svga_have_vgpu10(svga)); + ret = get_query_result_vgpu10(svga, sq, wait, + (void *)&sResult, sizeof(sResult)); + pResult->num_primitives_written = sResult.numPrimitivesWritten; + pResult->primitives_storage_needed = sResult.numPrimitivesRequired; + break; + } + case PIPE_QUERY_TIMESTAMP: { + SVGADXTimestampQueryResult sResult; - assert(state == SVGA3D_QUERYSTATE_SUCCEEDED || - state == SVGA3D_QUERYSTATE_FAILED); + assert(svga_have_vgpu10(svga)); + ret = get_query_result_vgpu10(svga, sq, wait, + (void *)&sResult, sizeof(sResult)); + *result = (uint64_t)sResult.timestamp; + break; + } + case PIPE_QUERY_PRIMITIVES_GENERATED: { + SVGADXStreamOutStatisticsQueryResult sResult; - *result = (uint64_t) sq->queryResult->result32; + assert(svga_have_vgpu10(svga)); + ret = get_query_result_vgpu10(svga, sq, wait, + (void *)&sResult, sizeof sResult); + *result = (uint64_t)sResult.numPrimitivesRequired; break; - case SVGA_QUERY_DRAW_CALLS: - /* fall-through */ - case SVGA_QUERY_FALLBACKS: + } + case PIPE_QUERY_PRIMITIVES_EMITTED: { + SVGADXStreamOutStatisticsQueryResult sResult; + + assert(svga_have_vgpu10(svga)); + ret = get_query_result_vgpu10(svga, sq, wait, + (void *)&sResult, sizeof sResult); + *result = (uint64_t)sResult.numPrimitivesWritten; + break; + } + /* These are per-frame counters */ + case SVGA_QUERY_NUM_DRAW_CALLS: + case SVGA_QUERY_NUM_FALLBACKS: + case SVGA_QUERY_NUM_FLUSHES: + case SVGA_QUERY_NUM_VALIDATIONS: + case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_MAP_BUFFER_TIME: vresult->u64 = sq->end_count - sq->begin_count; break; + /* These are running total counters */ case SVGA_QUERY_MEMORY_USED: - vresult->u64 = svgascreen->total_resource_bytes; + vresult->u64 = svgascreen->hud.total_resource_bytes; + break; + case SVGA_QUERY_NUM_SHADERS: + vresult->u64 = svga->hud.num_shaders; + break; + case SVGA_QUERY_NUM_RESOURCES: + vresult->u64 = svgascreen->hud.num_resources; + break; + case SVGA_QUERY_NUM_STATE_OBJECTS: + vresult->u64 = svga->hud.num_state_objects; + break; + case SVGA_QUERY_NUM_SURFACE_VIEWS: + vresult->u64 = svga->hud.num_surface_views; + break; + case SVGA_QUERY_NUM_GENERATE_MIPMAP: + vresult->u64 = svga->hud.num_generate_mipmap; break; default: assert(!"unexpected query type in svga_get_query_result"); } - SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result); + SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult)); - return TRUE; + return ret; +} + +static void +svga_render_condition(struct pipe_context *pipe, struct pipe_query *q, + boolean condition, uint mode) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + struct svga_query *sq = svga_query(q); + SVGA3dQueryId queryId; + enum pipe_error ret; + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + + assert(svga_have_vgpu10(svga)); + if (sq == NULL) { + queryId = SVGA3D_INVALID_ID; + } + else { + assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION || + sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE); + + if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) { + assert(sq->predicate); + /** + * For conditional rendering, make sure to use the associated + * predicate query. + */ + sq = svga_query(sq->predicate); + } + queryId = sq->id; + + if ((mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) { + sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY); + } + } + + ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId, + (uint32) condition); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId, + (uint32) condition); + } +} + + +/* + * This function is a workaround because we lack the ability to query + * renderer's time synchornously. + */ +static uint64_t +svga_get_timestamp(struct pipe_context *pipe) +{ + struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0); + union pipe_query_result result; + + svga_begin_query(pipe, q); + svga_end_query(pipe,q); + svga_get_query_result(pipe, q, TRUE, &result); + svga_destroy_query(pipe, q); + + return result.u64; } @@ -349,4 +1179,6 @@ svga_init_query_functions(struct svga_context *svga) svga->pipe.begin_query = svga_begin_query; svga->pipe.end_query = svga_end_query; svga->pipe.get_query_result = svga_get_query_result; + svga->pipe.render_condition = svga_render_condition; + svga->pipe.get_timestamp = svga_get_timestamp; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 356898a86..8e0db5395 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -23,16 +23,18 @@ * **********************************************************/ +#include "pipe/p_defines.h" #include "draw/draw_context.h" +#include "util/u_bitmask.h" #include "util/u_inlines.h" -#include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "svga_cmd.h" #include "svga_context.h" +#include "svga_hw_reg.h" #include "svga_screen.h" -#include "svga_hw_reg.h" /* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW. */ @@ -61,6 +63,96 @@ static SVGA3dShadeMode svga_translate_flatshade( unsigned mode ) } +static unsigned +translate_fill_mode(unsigned fill) +{ + switch (fill) { + case PIPE_POLYGON_MODE_POINT: + return SVGA3D_FILLMODE_POINT; + case PIPE_POLYGON_MODE_LINE: + return SVGA3D_FILLMODE_LINE; + case PIPE_POLYGON_MODE_FILL: + return SVGA3D_FILLMODE_FILL; + default: + assert(!"Bad fill mode"); + return SVGA3D_FILLMODE_FILL; + } +} + + +static unsigned +translate_cull_mode(unsigned cull) +{ + switch (cull) { + case PIPE_FACE_NONE: + return SVGA3D_CULL_NONE; + case PIPE_FACE_FRONT: + return SVGA3D_CULL_FRONT; + case PIPE_FACE_BACK: + return SVGA3D_CULL_BACK; + case PIPE_FACE_FRONT_AND_BACK: + /* NOTE: we simply no-op polygon drawing in svga_draw_vbo() */ + return SVGA3D_CULL_NONE; + default: + assert(!"Bad cull mode"); + return SVGA3D_CULL_NONE; + } +} + + +static void +define_rasterizer_object(struct svga_context *svga, + struct svga_rasterizer_state *rast) +{ + unsigned fill_mode = translate_fill_mode(rast->templ.fill_front); + unsigned cull_mode = translate_cull_mode(rast->templ.cull_face); + int depth_bias = rast->templ.offset_units; + float slope_scaled_depth_bias = rast->templ.offset_scale; + float depth_bias_clamp = 0.0; /* XXX fix me */ + unsigned try; + const float line_width = rast->templ.line_width > 0.0f ? + rast->templ.line_width : 1.0f; + const uint8 line_factor = rast->templ.line_stipple_enable ? + rast->templ.line_stipple_factor : 0; + const uint16 line_pattern = rast->templ.line_stipple_enable ? + rast->templ.line_stipple_pattern : 0; + + rast->id = util_bitmask_add(svga->rast_object_id_bm); + + if (rast->templ.fill_front != rast->templ.fill_back) { + /* The VGPU10 device can't handle different front/back fill modes. + * We'll handle that with a swtnl/draw fallback. But we need to + * make sure we always fill triangles in that case. + */ + fill_mode = SVGA3D_FILLMODE_FILL; + } + + for (try = 0; try < 2; try++) { + enum pipe_error ret = + SVGA3D_vgpu10_DefineRasterizerState(svga->swc, + rast->id, + fill_mode, + cull_mode, + rast->templ.front_ccw, + depth_bias, + depth_bias_clamp, + slope_scaled_depth_bias, + rast->templ.depth_clip, + rast->templ.scissor, + rast->templ.multisample, + rast->templ.line_smooth, + line_width, + rast->templ.line_stipple_enable, + line_factor, + line_pattern, + !rast->templ.flatshade_first); + if (ret == PIPE_OK) + return; + svga_context_flush(svga, NULL); + } +} + + static void * svga_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *templ) @@ -92,17 +184,24 @@ svga_create_rasterizer_state(struct pipe_context *pipe, rast->antialiasedlineenable = templ->line_smooth; rast->lastpixel = templ->line_last_pixel; rast->pointsprite = templ->sprite_coord_enable != 0x0; - rast->pointsize = templ->point_size; - rast->hw_unfilled = PIPE_POLYGON_MODE_FILL; + + if (templ->point_smooth) { + /* For smooth points we need to generate fragments for at least + * a 2x2 region. Otherwise the quad we draw may be too small and + * we may generate no fragments at all. + */ + rast->pointsize = MAX2(2.0f, templ->point_size); + } + else { + rast->pointsize = templ->point_size; + } + + rast->hw_fillmode = PIPE_POLYGON_MODE_FILL; /* Use swtnl + decomposition implement these: */ - if (templ->poly_stipple_enable) { - rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; - rast->need_pipeline_tris_str = "poly stipple"; - } - if (screen->maxLineWidth > 1.0F) { + if (templ->line_width <= screen->maxLineWidth) { /* pass line width to device */ rast->linewidth = MAX2(1.0F, templ->line_width); } @@ -129,7 +228,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, } } - if (templ->point_smooth) { + if (!svga_have_vgpu10(svga) && templ->point_smooth) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS; rast->need_pipeline_points_str = "smooth points"; } @@ -231,13 +330,13 @@ svga_create_rasterizer_state(struct pipe_context *pipe, rast->depthbias = templ->offset_units; } - rast->hw_unfilled = fill; + rast->hw_fillmode = fill; } if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) { /* Turn off stuff which will get done in the draw module: */ - rast->hw_unfilled = PIPE_POLYGON_MODE_FILL; + rast->hw_fillmode = PIPE_POLYGON_MODE_FILL; rast->slopescaledepthbias = 0; rast->depthbias = 0; } @@ -249,6 +348,17 @@ svga_create_rasterizer_state(struct pipe_context *pipe, debug_printf(" tris: %s \n", rast->need_pipeline_tris_str); } + if (svga_have_vgpu10(svga)) { + define_rasterizer_object(svga, rast); + } + + if (templ->poly_smooth) { + pipe_debug_message(&svga->debug.callback, CONFORMANCE, + "GL_POLYGON_SMOOTH not supported"); + } + + svga->hud.num_state_objects++; + return rast; } @@ -258,18 +368,41 @@ static void svga_bind_rasterizer_state( struct pipe_context *pipe, struct svga_context *svga = svga_context(pipe); struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state; + if (!raster || + !svga->curr.rast || + raster->templ.poly_stipple_enable != + svga->curr.rast->templ.poly_stipple_enable) { + svga->dirty |= SVGA_NEW_STIPPLE; + } - draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL, - state); svga->curr.rast = raster; svga->dirty |= SVGA_NEW_RAST; } -static void svga_delete_rasterizer_state(struct pipe_context *pipe, - void *raster) +static void +svga_delete_rasterizer_state(struct pipe_context *pipe, void *state) { - FREE(raster); + struct svga_context *svga = svga_context(pipe); + struct svga_rasterizer_state *raster = + (struct svga_rasterizer_state *) state; + + if (svga_have_vgpu10(svga)) { + enum pipe_error ret = + SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id); + } + + if (raster->id == svga->state.hw_draw.rasterizer_id) + svga->state.hw_draw.rasterizer_id = SVGA3D_INVALID_ID; + + util_bitmask_clear(svga->rast_object_id_bm, raster->id); + } + + FREE(state); + svga->hud.num_state_objects--; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c index effd490dd..3e778f0a0 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -23,17 +23,21 @@ * **********************************************************/ -#include "util/u_inlines.h" #include "pipe/p_defines.h" +#include "util/u_bitmask.h" #include "util/u_format.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "svga_context.h" +#include "svga_cmd.h" +#include "svga_debug.h" #include "svga_resource_texture.h" +#include "svga_surface.h" +#include "svga_sampler_view.h" -#include "svga_debug.h" static inline unsigned translate_wrap_mode(unsigned wrap) @@ -91,6 +95,126 @@ static inline unsigned translate_mip_filter( unsigned filter ) } } + +static uint8 +translate_comparison_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return SVGA3D_COMPARISON_NEVER; + case PIPE_FUNC_LESS: + return SVGA3D_COMPARISON_LESS; + case PIPE_FUNC_EQUAL: + return SVGA3D_COMPARISON_EQUAL; + case PIPE_FUNC_LEQUAL: + return SVGA3D_COMPARISON_LESS_EQUAL; + case PIPE_FUNC_GREATER: + return SVGA3D_COMPARISON_GREATER; + case PIPE_FUNC_NOTEQUAL: + return SVGA3D_COMPARISON_NOT_EQUAL; + case PIPE_FUNC_GEQUAL: + return SVGA3D_COMPARISON_GREATER_EQUAL; + case PIPE_FUNC_ALWAYS: + return SVGA3D_COMPARISON_ALWAYS; + default: + assert(!"Invalid comparison function"); + return SVGA3D_COMPARISON_ALWAYS; + } +} + + +/** + * Translate filtering state to vgpu10 format. + */ +static SVGA3dFilter +translate_filter_mode(unsigned img_filter, + unsigned min_filter, + unsigned mag_filter, + boolean anisotropic, + boolean compare) +{ + SVGA3dFilter mode = 0; + + if (img_filter == PIPE_TEX_FILTER_LINEAR) + mode |= SVGA3D_FILTER_MIP_LINEAR; + if (min_filter == PIPE_TEX_FILTER_LINEAR) + mode |= SVGA3D_FILTER_MIN_LINEAR; + if (mag_filter == PIPE_TEX_FILTER_LINEAR) + mode |= SVGA3D_FILTER_MAG_LINEAR; + if (anisotropic) + mode |= SVGA3D_FILTER_ANISOTROPIC; + if (compare) + mode |= SVGA3D_FILTER_COMPARE; + + return mode; +} + + +/** + * Define a vgpu10 sampler state. + */ +static void +define_sampler_state_object(struct svga_context *svga, + struct svga_sampler_state *ss, + const struct pipe_sampler_state *ps) +{ + uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */ + boolean anisotropic; + uint8 compare_func; + SVGA3dFilter filter; + SVGA3dRGBAFloat bcolor; + unsigned try; + float min_lod, max_lod; + + assert(svga_have_vgpu10(svga)); + + anisotropic = ss->aniso_level > 1.0f; + + filter = translate_filter_mode(ps->min_mip_filter, + ps->min_img_filter, + ps->mag_img_filter, + anisotropic, + ss->compare_mode); + + compare_func = translate_comparison_func(ss->compare_func); + + COPY_4V(bcolor.value, ps->border_color.f); + + ss->id = util_bitmask_add(svga->sampler_object_id_bm); + + assert(ps->min_lod <= ps->max_lod); + + if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* just use the base level image */ + min_lod = max_lod = 0.0f; + } + else { + min_lod = ps->min_lod; + max_lod = ps->max_lod; + } + + /* Loop in case command buffer is full and we need to flush and retry */ + for (try = 0; try < 2; try++) { + enum pipe_error ret = + SVGA3D_vgpu10_DefineSamplerState(svga->swc, + ss->id, + filter, + ss->addressu, + ss->addressv, + ss->addressw, + ss->lod_bias, /* float */ + max_aniso, + compare_func, + bcolor, + min_lod, /* float */ + max_lod); /* float */ + if (ret == PIPE_OK) + return; + svga_context_flush(svga, NULL); + } +} + + static void * svga_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) @@ -141,10 +265,16 @@ svga_create_sampler_state(struct pipe_context *pipe, } } + if (svga_have_vgpu10(svga)) { + define_sampler_state_object(svga, cso, sampler); + } + SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n", cso->min_lod, cso->view_min_lod, cso->view_max_lod, cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING"); + svga->hud.num_state_objects++; + return cso; } @@ -157,23 +287,31 @@ svga_bind_sampler_states(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); unsigned i; + boolean any_change = FALSE; assert(shader < PIPE_SHADER_TYPES); assert(start + num <= PIPE_MAX_SAMPLERS); - /* we only support fragment shader samplers at this time */ - if (shader != PIPE_SHADER_FRAGMENT) + /* Pre-VGPU10 only supports FS textures */ + if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT) return; - for (i = 0; i < num; i++) - svga->curr.sampler[start + i] = samplers[i]; + for (i = 0; i < num; i++) { + if (svga->curr.sampler[shader][start + i] != samplers[i]) + any_change = TRUE; + svga->curr.sampler[shader][start + i] = samplers[i]; + } + + if (!any_change) { + return; + } /* find highest non-null sampler[] entry */ { - unsigned j = MAX2(svga->curr.num_samplers, start + num); - while (j > 0 && svga->curr.sampler[j - 1] == NULL) + unsigned j = MAX2(svga->curr.num_samplers[shader], start + num); + while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL) j--; - svga->curr.num_samplers = j; + svga->curr.num_samplers[shader] = j; } svga->dirty |= SVGA_NEW_SAMPLER; @@ -183,7 +321,24 @@ svga_bind_sampler_states(struct pipe_context *pipe, static void svga_delete_sampler_state(struct pipe_context *pipe, void *sampler) { + struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler; + struct svga_context *svga = svga_context(pipe); + + if (svga_have_vgpu10(svga)) { + enum pipe_error ret; + + svga_hwtnl_flush_retry(svga); + + ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id); + } + util_bitmask_clear(svga->sampler_object_id_bm, ss->id); + } + FREE(sampler); + svga->hud.num_state_objects--; } @@ -192,17 +347,21 @@ svga_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ) { - struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); - - if (view) { - *view = *templ; - view->reference.count = 1; - view->texture = NULL; - pipe_resource_reference(&view->texture, texture); - view->context = pipe; + struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view); + + if (!sv) { + return NULL; } - return view; + sv->base = *templ; + sv->base.reference.count = 1; + sv->base.texture = NULL; + pipe_resource_reference(&sv->base.texture, texture); + + sv->base.context = pipe; + sv->id = SVGA3D_INVALID_ID; + + return &sv->base; } @@ -210,8 +369,37 @@ static void svga_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { - pipe_resource_reference(&view->texture, NULL); - FREE(view); + struct svga_context *svga = svga_context(pipe); + struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view); + + if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) { + if (view->context != pipe) { + /* The SVGA3D device will generate an error (and on Linux, cause + * us to abort) if we try to destroy a shader resource view from + * a context other than the one it was created with. Skip the + * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler + * view for now. This should only sometimes happen when a shared + * texture is deleted. + */ + _debug_printf("context mismatch in %s\n", __func__); + } + else { + enum pipe_error ret; + + svga_hwtnl_flush_retry(svga); /* XXX is this needed? */ + + ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id); + } + util_bitmask_clear(svga->sampler_view_id_bm, sv->id); + } + } + + pipe_resource_reference(&sv->base.texture, NULL); + + FREE(sv); } static void @@ -225,23 +413,25 @@ svga_set_sampler_views(struct pipe_context *pipe, unsigned flag_1d = 0; unsigned flag_srgb = 0; uint i; + boolean any_change = FALSE; assert(shader < PIPE_SHADER_TYPES); - assert(start + num <= Elements(svga->curr.sampler_views)); + assert(start + num <= Elements(svga->curr.sampler_views[shader])); - /* we only support fragment shader sampler views at this time */ - if (shader != PIPE_SHADER_FRAGMENT) + /* Pre-VGPU10 only supports FS textures */ + if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT) return; for (i = 0; i < num; i++) { - if (svga->curr.sampler_views[start + i] != views[i]) { + if (svga->curr.sampler_views[shader][start + i] != views[i]) { /* Note: we're using pipe_sampler_view_release() here to work around * a possible crash when the old view belongs to another context that * was already destroyed. */ - pipe_sampler_view_release(pipe, &svga->curr.sampler_views[start + i]); - pipe_sampler_view_reference(&svga->curr.sampler_views[start + i], + pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]); + pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i], views[i]); + any_change = TRUE; } if (!views[i]) @@ -254,12 +444,16 @@ svga_set_sampler_views(struct pipe_context *pipe, flag_1d |= 1 << (start + i); } + if (!any_change) { + return; + } + /* find highest non-null sampler_views[] entry */ { - unsigned j = MAX2(svga->curr.num_sampler_views, start + num); - while (j > 0 && svga->curr.sampler_views[j - 1] == NULL) + unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num); + while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL) j--; - svga->curr.num_sampler_views = j; + svga->curr.num_sampler_views[shader] = j; } svga->dirty |= SVGA_NEW_TEXTURE_BINDING; @@ -270,7 +464,31 @@ svga_set_sampler_views(struct pipe_context *pipe, svga->dirty |= SVGA_NEW_TEXTURE_FLAGS; svga->curr.tex_flags.flag_1d = flag_1d; svga->curr.tex_flags.flag_srgb = flag_srgb; - } + } + + /* Check if any of the sampler view resources collide with the framebuffer + * color buffers or depth stencil resource. If so, enable the NEW_FRAME_BUFFER + * dirty bit so that emit_framebuffer can be invoked to create backed view + * for the conflicted surface view. + */ + for (i = 0; i < svga->curr.framebuffer.nr_cbufs; i++) { + if (svga->curr.framebuffer.cbufs[i]) { + struct svga_surface *s = svga_surface(svga->curr.framebuffer.cbufs[i]); + if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) { + svga->dirty |= SVGA_NEW_FRAME_BUFFER; + break; + } + } + } + + if (svga->curr.framebuffer.zsbuf) { + struct svga_surface *s = svga_surface(svga->curr.framebuffer.zsbuf); + if (s) { + if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) { + svga->dirty |= SVGA_NEW_FRAME_BUFFER; + } + } + } } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c new file mode 100644 index 000000000..3f443c44e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c @@ -0,0 +1,320 @@ +/********************************************************** + * Copyright 2014 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_memory.h" +#include "util/u_bitmask.h" + +#include "svga_cmd.h" +#include "svga_context.h" +#include "svga_resource_buffer.h" +#include "svga_shader.h" +#include "svga_debug.h" +#include "svga_streamout.h" + +struct svga_stream_output_target { + struct pipe_stream_output_target base; +}; + +/** cast wrapper */ +static inline struct svga_stream_output_target * +svga_stream_output_target(struct pipe_stream_output_target *s) +{ + return (struct svga_stream_output_target *)s; +} + +struct svga_stream_output * +svga_create_stream_output(struct svga_context *svga, + struct svga_shader *shader, + const struct pipe_stream_output_info *info) +{ + struct svga_stream_output *streamout; + SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS]; + unsigned strides[SVGA3D_DX_MAX_SOTARGETS]; + unsigned i; + enum pipe_error ret; + unsigned id; + + assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS); + + /* Gallium utility creates shaders with stream output. + * For non-DX10, just return NULL. + */ + if (!svga_have_vgpu10(svga)) + return NULL; + + assert(info->num_outputs <= SVGA3D_MAX_STREAMOUT_DECLS); + + /* Allocate an integer ID for the stream output */ + id = util_bitmask_add(svga->stream_output_id_bm); + if (id == UTIL_BITMASK_INVALID_INDEX) { + return NULL; + } + + /* Allocate the streamout data structure */ + streamout = CALLOC_STRUCT(svga_stream_output); + + if (!streamout) + return NULL; + + streamout->info = *info; + streamout->id = id; + streamout->pos_out_index = -1; + + SVGA_DBG(DEBUG_STREAMOUT, "%s, num_outputs=%d id=%d\n", __FUNCTION__, + info->num_outputs, id); + + /* init whole decls and stride arrays to zero to avoid garbage values */ + memset(decls, 0, sizeof(decls)); + memset(strides, 0, sizeof(strides)); + + for (i = 0; i < info->num_outputs; i++) { + unsigned reg_idx = info->output[i].register_index; + unsigned buf_idx = info->output[i].output_buffer; + const unsigned sem_name = shader->info.output_semantic_name[reg_idx]; + + assert(buf_idx <= PIPE_MAX_SO_BUFFERS); + + if (sem_name == TGSI_SEMANTIC_POSITION) { + /** + * Check if streaming out POSITION. If so, replace the + * register index with the index for NON_ADJUSTED POSITION. + */ + decls[i].registerIndex = shader->info.num_outputs; + + /* Save this output index, so we can tell later if this stream output + * includes an output of a vertex position + */ + streamout->pos_out_index = i; + } + else if (sem_name == TGSI_SEMANTIC_CLIPDIST) { + /** + * Use the shadow copy for clip distance because + * CLIPDIST instruction is only emitted for enabled clip planes. + * It's valid to write to ClipDistance variable for non-enabled + * clip planes. + */ + decls[i].registerIndex = shader->info.num_outputs + 1 + + shader->info.output_semantic_index[reg_idx]; + } + else { + decls[i].registerIndex = reg_idx; + } + + decls[i].outputSlot = buf_idx; + decls[i].registerMask = + ((1 << info->output[i].num_components) - 1) + << info->output[i].start_component; + + SVGA_DBG(DEBUG_STREAMOUT, "%d slot=%d regIdx=%d regMask=0x%x\n", + i, decls[i].outputSlot, decls[i].registerIndex, + decls[i].registerMask); + + strides[buf_idx] = info->stride[buf_idx] * sizeof(float); + } + + ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id, + info->num_outputs, + strides, + decls); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id, + info->num_outputs, + strides, + decls); + if (ret != PIPE_OK) { + util_bitmask_clear(svga->stream_output_id_bm, id); + FREE(streamout); + streamout = NULL; + } + } + return streamout; +} + +enum pipe_error +svga_set_stream_output(struct svga_context *svga, + struct svga_stream_output *streamout) +{ + enum pipe_error ret = PIPE_OK; + unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID; + + if (!svga_have_vgpu10(svga)) { + return PIPE_OK; + } + + SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__, + streamout, id); + + if (svga->current_so != streamout) { + /* Save current SO state */ + svga->current_so = streamout; + + ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id); + } + } + + return ret; +} + +void +svga_delete_stream_output(struct svga_context *svga, + struct svga_stream_output *streamout) +{ + enum pipe_error ret; + + SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout); + + assert(svga_have_vgpu10(svga)); + assert(streamout != NULL); + + ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id); + } + + /* Release the ID */ + util_bitmask_clear(svga->stream_output_id_bm, streamout->id); + + /* Free streamout structure */ + FREE(streamout); +} + +static struct pipe_stream_output_target * +svga_create_stream_output_target(struct pipe_context *pipe, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_stream_output_target *sot; + + SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__, + buffer_offset, buffer_size); + + assert(svga_have_vgpu10(svga)); + (void) svga; + + sot = CALLOC_STRUCT(svga_stream_output_target); + if (!sot) + return NULL; + + pipe_reference_init(&sot->base.reference, 1); + pipe_resource_reference(&sot->base.buffer, buffer); + sot->base.context = pipe; + sot->base.buffer = buffer; + sot->base.buffer_offset = buffer_offset; + sot->base.buffer_size = buffer_size; + + return &sot->base; +} + +static void +svga_destroy_stream_output_target(struct pipe_context *pipe, + struct pipe_stream_output_target *target) +{ + struct svga_stream_output_target *sot = svga_stream_output_target(target); + + SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__); + + pipe_resource_reference(&sot->base.buffer, NULL); + FREE(sot); +} + +static void +svga_set_stream_output_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct svga_context *svga = svga_context(pipe); + struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS]; + enum pipe_error ret; + unsigned i; + unsigned num_so_targets; + + SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__, + num_targets); + + assert(svga_have_vgpu10(svga)); + + /* Mark the streamout buffers as dirty so that we'll issue readbacks + * before mapping. + */ + for (i = 0; i < svga->num_so_targets; i++) { + struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer); + sbuf->dirty = TRUE; + } + + assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS); + + for (i = 0; i < num_targets; i++) { + struct svga_stream_output_target *sot + = svga_stream_output_target(targets[i]); + struct svga_buffer *sbuf = svga_buffer(sot->base.buffer); + unsigned size; + + assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT); + (void) sbuf; + + svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer); + svga->so_targets[i] = &sot->base; + soBindings[i].offset = sot->base.buffer_offset; + + /* The size cannot extend beyond the end of the buffer. Clamp it. */ + size = MIN2(sot->base.buffer_size, + sot->base.buffer->width0 - sot->base.buffer_offset); + + soBindings[i].sizeInBytes = size; + } + + /* unbind any previously bound stream output buffers */ + for (; i < svga->num_so_targets; i++) { + svga->so_surfaces[i] = NULL; + svga->so_targets[i] = NULL; + } + + num_so_targets = MAX2(svga->num_so_targets, num_targets); + ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, + soBindings, svga->so_surfaces); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, + soBindings, svga->so_surfaces); + } + + svga->num_so_targets = num_targets; +} + +void +svga_init_stream_output_functions(struct svga_context *svga) +{ + svga->pipe.create_stream_output_target = svga_create_stream_output_target; + svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target; + svga->pipe.set_stream_output_targets = svga_set_stream_output_targets; +} diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_vertex.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_vertex.c index faf77f3ab..b932c568f 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -23,17 +23,21 @@ * **********************************************************/ +#include "pipe/p_defines.h" +#include "util/u_bitmask.h" +#include "util/u_format.h" #include "util/u_helpers.h" #include "util/u_inlines.h" -#include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" -#include "svga_screen.h" -#include "svga_resource_buffer.h" #include "svga_context.h" +#include "svga_cmd.h" +#include "svga_format.h" +#include "svga_resource_buffer.h" +#include "svga_screen.h" static void svga_set_vertex_buffers(struct pipe_context *pipe, @@ -55,25 +59,33 @@ static void svga_set_index_buffer(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); - if (ib) { - pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer); - memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib)); - } - else { - pipe_resource_reference(&svga->curr.ib.buffer, NULL); - memset(&svga->curr.ib, 0, sizeof(svga->curr.ib)); - } + util_set_index_buffer(&svga->curr.ib, ib); +} + - /* TODO make this more like a state */ +/** + * Does the given vertex attrib format need range adjustment in the VS? + * Range adjustment scales and biases values from [0,1] to [-1,1]. + * This lets us avoid the swtnl path. + */ +static boolean +attrib_needs_range_adjustment(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R8G8B8_SNORM: + return TRUE; + default: + return FALSE; + } } /** - * Given a gallium vertex element format, return the corresponding SVGA3D - * format. Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats. + * Given a gallium vertex element format, return the corresponding + * SVGA3dDeclType. */ static SVGA3dDeclType -translate_vertex_format(enum pipe_format format) +translate_vertex_format_to_decltype(enum pipe_format format) { switch (format) { case PIPE_FORMAT_R32_FLOAT: return SVGA3D_DECLTYPE_FLOAT1; @@ -94,10 +106,10 @@ translate_vertex_format(enum pipe_format format) case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2; case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4; - /* See attrib_needs_adjustment() and attrib_needs_w_to_1() below */ + /* See attrib_needs_adjustment() and attrib_needs_w_to_1() above */ case PIPE_FORMAT_R8G8B8_SNORM: return SVGA3D_DECLTYPE_UBYTE4N; - /* See attrib_needs_w_to_1() below */ + /* See attrib_needs_w_to_1() above */ case PIPE_FORMAT_R16G16B16_SNORM: return SVGA3D_DECLTYPE_SHORT4N; case PIPE_FORMAT_R16G16B16_UNORM: return SVGA3D_DECLTYPE_USHORT4N; case PIPE_FORMAT_R8G8B8_UNORM: return SVGA3D_DECLTYPE_UBYTE4N; @@ -111,38 +123,121 @@ translate_vertex_format(enum pipe_format format) } -/** - * Does the given vertex attrib format need range adjustment in the VS? - * Range adjustment scales and biases values from [0,1] to [-1,1]. - * This lets us avoid the swtnl path. - */ -static boolean -attrib_needs_range_adjustment(enum pipe_format format) +static void +define_input_element_object(struct svga_context *svga, + struct svga_velems_state *velems) { - switch (format) { - case PIPE_FORMAT_R8G8B8_SNORM: - return TRUE; - default: - return FALSE; + SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS]; + enum pipe_error ret; + unsigned i; + + assert(velems->count <= PIPE_MAX_ATTRIBS); + assert(svga_have_vgpu10(svga)); + + for (i = 0; i < velems->count; i++) { + const struct pipe_vertex_element *elem = velems->velem + i; + SVGA3dSurfaceFormat svga_format; + unsigned vf_flags; + + svga_translate_vertex_format_vgpu10(elem->src_format, + &svga_format, &vf_flags); + + velems->decl_type[i] = + translate_vertex_format_to_decltype(elem->src_format); + elements[i].inputSlot = elem->vertex_buffer_index; + elements[i].alignedByteOffset = elem->src_offset; + elements[i].format = svga_format; + + if (elem->instance_divisor) { + elements[i].inputSlotClass = SVGA3D_INPUT_PER_INSTANCE_DATA; + elements[i].instanceDataStepRate = elem->instance_divisor; + } + else { + elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA; + elements[i].instanceDataStepRate = 0; + } + elements[i].inputRegister = i; + + if (elements[i].format == SVGA3D_FORMAT_INVALID) { + velems->need_swvfetch = TRUE; + } + + if (util_format_is_pure_integer(elem->src_format)) { + velems->attrib_is_pure_int |= (1 << i); + } + + if (vf_flags & VF_W_TO_1) { + velems->adjust_attrib_w_1 |= (1 << i); + } + + if (vf_flags & VF_U_TO_F_CAST) { + velems->adjust_attrib_utof |= (1 << i); + } + else if (vf_flags & VF_I_TO_F_CAST) { + velems->adjust_attrib_itof |= (1 << i); + } + + if (vf_flags & VF_BGRA) { + velems->attrib_is_bgra |= (1 << i); + } + + if (vf_flags & VF_PUINT_TO_SNORM) { + velems->attrib_puint_to_snorm |= (1 << i); + } + else if (vf_flags & VF_PUINT_TO_USCALED) { + velems->attrib_puint_to_uscaled |= (1 << i); + } + else if (vf_flags & VF_PUINT_TO_SSCALED) { + velems->attrib_puint_to_sscaled |= (1 << i); + } + } + + velems->id = util_bitmask_add(svga->input_element_object_id_bm); + + ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count, + velems->id, elements); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count, + velems->id, elements); + assert(ret == PIPE_OK); } } /** - * Does the given vertex attrib format need to have the W component set - * to one in the VS? + * Translate the vertex element types to SVGA3dDeclType and check + * for VS-based vertex attribute adjustments. */ -static boolean -attrib_needs_w_to_1(enum pipe_format format) +static void +translate_vertex_decls(struct svga_context *svga, + struct svga_velems_state *velems) { - switch (format) { - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16B16_UNORM: - return TRUE; - default: - return FALSE; + unsigned i; + + assert(!svga_have_vgpu10(svga)); + + for (i = 0; i < velems->count; i++) { + const enum pipe_format f = velems->velem[i].src_format; + SVGA3dSurfaceFormat svga_format; + unsigned vf_flags; + + svga_translate_vertex_format_vgpu10(f, &svga_format, &vf_flags); + + velems->decl_type[i] = translate_vertex_format_to_decltype(f); + if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) { + /* Unsupported format - use software fetch */ + velems->need_swvfetch = TRUE; + } + + /* Check for VS-based adjustments */ + if (attrib_needs_range_adjustment(f)) { + velems->adjust_attrib_range |= (1 << i); + } + + if (vf_flags & VF_W_TO_1) { + velems->adjust_attrib_w_1 |= (1 << i); + } } } @@ -152,54 +247,78 @@ svga_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *attribs) { + struct svga_context *svga = svga_context(pipe); struct svga_velems_state *velems; + assert(count <= PIPE_MAX_ATTRIBS); velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state)); if (velems) { - unsigned i; - velems->count = count; memcpy(velems->velem, attribs, sizeof(*attribs) * count); velems->need_swvfetch = FALSE; velems->adjust_attrib_range = 0x0; + velems->attrib_is_pure_int = 0x0; velems->adjust_attrib_w_1 = 0x0; - - /* Translate Gallium vertex format to SVGA3dDeclType */ - for (i = 0; i < count; i++) { - enum pipe_format f = attribs[i].src_format; - velems->decl_type[i] = translate_vertex_format(f); - if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) { - /* Unsupported format - use software fetch */ - velems->need_swvfetch = TRUE; - break; - } - - if (attrib_needs_range_adjustment(f)) { - velems->adjust_attrib_range |= (1 << i); - } - if (attrib_needs_w_to_1(f)) { - velems->adjust_attrib_w_1 |= (1 << i); - } + velems->adjust_attrib_itof = 0x0; + velems->adjust_attrib_utof = 0x0; + velems->attrib_is_bgra = 0x0; + velems->attrib_puint_to_snorm = 0x0; + velems->attrib_puint_to_uscaled = 0x0; + velems->attrib_puint_to_sscaled = 0x0; + + if (svga_have_vgpu10(svga)) { + define_input_element_object(svga, velems); + } + else { + translate_vertex_decls(svga, velems); } } + + svga->hud.num_state_objects++; + return velems; } -static void svga_bind_vertex_elements_state(struct pipe_context *pipe, - void *velems) + +static void +svga_bind_vertex_elements_state(struct pipe_context *pipe, void *state) { struct svga_context *svga = svga_context(pipe); - struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems; + struct svga_velems_state *velems = (struct svga_velems_state *) state; - svga->curr.velems = svga_velems; + svga->curr.velems = velems; svga->dirty |= SVGA_NEW_VELEMENT; } -static void svga_delete_vertex_elements_state(struct pipe_context *pipe, - void *velems) + +static void +svga_delete_vertex_elements_state(struct pipe_context *pipe, void *state) { + struct svga_context *svga = svga_context(pipe); + struct svga_velems_state *velems = (struct svga_velems_state *) state; + + if (svga_have_vgpu10(svga)) { + enum pipe_error ret; + + svga_hwtnl_flush_retry(svga); + + ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id); + assert(ret == PIPE_OK); + } + + if (velems->id == svga->state.hw_draw.layout_id) + svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID; + + util_bitmask_clear(svga->input_element_object_id_bm, velems->id); + velems->id = SVGA3D_INVALID_ID; + } + FREE(velems); + svga->hud.num_state_objects--; } void svga_cleanup_vertex_state( struct svga_context *svga ) @@ -219,5 +338,3 @@ void svga_init_vertex_functions( struct svga_context *svga ) svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state; svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state; } - - diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_vs.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_vs.c index c3ac663b4..630f49078 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_vs.c @@ -32,11 +32,11 @@ #include "tgsi/tgsi_text.h" #include "svga_context.h" -#include "svga_tgsi.h" #include "svga_hw_reg.h" #include "svga_cmd.h" #include "svga_debug.h" #include "svga_shader.h" +#include "svga_streamout.h" /** @@ -100,6 +100,7 @@ svga_create_vs_state(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader); + if (!vs) return NULL; @@ -123,10 +124,12 @@ svga_create_vs_state(struct pipe_context *pipe, vs->base.id = svga->debug.shader_id++; - if (SVGA_DEBUG & DEBUG_TGSI || 0) { - debug_printf("%s id: %u, inputs: %u, outputs: %u\n", - __FUNCTION__, vs->base.id, - vs->base.info.num_inputs, vs->base.info.num_outputs); + vs->generic_outputs = svga_get_generic_outputs_mask(&vs->base.info); + + /* check for any stream output declarations */ + if (templ->stream_output.num_outputs) { + vs->base.stream_output = svga_create_stream_output(svga, &vs->base, + &templ->stream_output); } return vs; @@ -139,6 +142,17 @@ svga_bind_vs_state(struct pipe_context *pipe, void *shader) struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader; struct svga_context *svga = svga_context(pipe); + if (vs == svga->curr.vs) + return; + + /* If the currently bound vertex shader has a generated geometry shader, + * then unbind the geometry shader before binding a new vertex shader. + * We need to unbind the geometry shader here because there is no + * pipe_shader associated with the generated geometry shader. + */ + if (svga->curr.vs != NULL && svga->curr.vs->gs != NULL) + svga->pipe.bind_gs_state(&svga->pipe, NULL); + svga->curr.vs = vs; svga->dirty |= SVGA_NEW_VS; } @@ -154,20 +168,40 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader) svga_hwtnl_flush_retry(svga); + assert(vs->base.parent == NULL); + + /* Check if there is a generated geometry shader to go with this + * vertex shader. If there is, then delete the geometry shader as well. + */ + if (vs->gs != NULL) { + svga->pipe.delete_gs_state(&svga->pipe, vs->gs); + } + + if (vs->base.stream_output != NULL) + svga_delete_stream_output(svga, vs->base.stream_output); + draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader); for (variant = vs->base.variants; variant; variant = tmp) { tmp = variant->next; - ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant); - (void) ret; /* PIPE_ERROR_ not handled yet */ - - /* - * Remove stale references to this variant to ensure a new variant on the - * same address will be detected as a change. - */ - if (variant == svga->state.hw_draw.vs) + /* Check if deleting currently bound shader */ + if (variant == svga->state.hw_draw.vs) { + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL); + assert(ret == PIPE_OK); + } svga->state.hw_draw.vs = NULL; + } + + ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant); + assert(ret == PIPE_OK); + } } FREE((void *)vs->base.tokens); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource.c b/lib/mesa/src/gallium/drivers/svga/svga_resource.c index b295b44ea..1c3bcd67a 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource.c @@ -69,18 +69,21 @@ svga_can_create_resource(struct pipe_screen *screen, struct svga_winsys_screen *sws = svgascreen->sws; SVGA3dSurfaceFormat format; SVGA3dSize base_level_size; - uint32 numFaces; uint32 numMipLevels; + uint32 arraySize; if (res->target == PIPE_BUFFER) { format = SVGA3D_BUFFER; base_level_size.width = res->width0; base_level_size.height = 1; base_level_size.depth = 1; - numFaces = 1; numMipLevels = 1; + arraySize = 1; } else { + if (res->target == PIPE_TEXTURE_CUBE) + assert(res->array_size == 6); + format = svga_translate_format(svgascreen, res->format, res->bind); if (format == SVGA3D_FORMAT_INVALID) return FALSE; @@ -88,12 +91,12 @@ svga_can_create_resource(struct pipe_screen *screen, base_level_size.width = res->width0; base_level_size.height = res->height0; base_level_size.depth = res->depth0; - numFaces = (res->target == PIPE_TEXTURE_CUBE) ? 6 : 1; numMipLevels = res->last_level + 1; + arraySize = res->array_size; } return sws->surface_can_create(sws, format, base_level_size, - numFaces, numMipLevels); + arraySize, numMipLevels); } @@ -104,6 +107,12 @@ svga_init_resource_functions(struct svga_context *svga) svga->pipe.transfer_flush_region = u_transfer_flush_region_vtbl; svga->pipe.transfer_unmap = u_transfer_unmap_vtbl; svga->pipe.transfer_inline_write = u_transfer_inline_write_vtbl; + + if (svga_have_vgpu10(svga)) { + svga->pipe.generate_mipmap = svga_texture_generate_mipmap; + } else { + svga->pipe.generate_mipmap = NULL; + } } void diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c index 13f85cddb..a8ffcc7f6 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c @@ -29,6 +29,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "os/os_thread.h" +#include "os/os_time.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_resource.h" @@ -48,7 +49,8 @@ static inline boolean svga_buffer_needs_hw_storage(unsigned usage) { - return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER); + return (usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT)) != 0; } @@ -76,9 +78,15 @@ svga_buffer_transfer_map(struct pipe_context *pipe, struct svga_buffer *sbuf = svga_buffer(resource); struct pipe_transfer *transfer; uint8_t *map; + int64_t begin = os_time_get(); + + assert(box->y == 0); + assert(box->z == 0); + assert(box->height == 1); + assert(box->depth == 1); transfer = CALLOC_STRUCT(pipe_transfer); - if (transfer == NULL) { + if (!transfer) { return NULL; } @@ -87,6 +95,26 @@ svga_buffer_transfer_map(struct pipe_context *pipe, transfer->usage = usage; transfer->box = *box; + if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) { + /* Only need to test for vgpu10 since only vgpu10 features (streamout, + * buffer copy) can modify buffers on the device. + */ + if (svga_have_vgpu10(svga)) { + enum pipe_error ret; + assert(sbuf->handle); + ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); + assert(ret == PIPE_OK); + } + + svga_context_finish(svga); + + sbuf->dirty = FALSE; + } + } + if (usage & PIPE_TRANSFER_WRITE) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* @@ -222,7 +250,9 @@ svga_buffer_transfer_map(struct pipe_context *pipe, } else { FREE(transfer); } - + + svga->hud.map_buffer_time += (os_time_get() - begin); + return map; } @@ -254,9 +284,9 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe, struct svga_screen *ss = svga_screen(pipe->screen); struct svga_context *svga = svga_context(pipe); struct svga_buffer *sbuf = svga_buffer(transfer->resource); - + pipe_mutex_lock(ss->swc_mutex); - + assert(sbuf->map.count); if (sbuf->map.count) { --sbuf->map.count; @@ -275,7 +305,7 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe, */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); - + sbuf->dma.flags.discard = TRUE; svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); @@ -295,28 +325,31 @@ svga_buffer_destroy( struct pipe_screen *screen, struct svga_buffer *sbuf = svga_buffer( buf ); assert(!p_atomic_read(&buf->reference.count)); - + assert(!sbuf->dma.pending); - if(sbuf->handle) + if (sbuf->handle) svga_buffer_destroy_host_surface(ss, sbuf); - - if(sbuf->uploaded.buffer) + + if (sbuf->uploaded.buffer) pipe_resource_reference(&sbuf->uploaded.buffer, NULL); - if(sbuf->hwbuf) + if (sbuf->hwbuf) svga_buffer_destroy_hw_storage(ss, sbuf); - - if(sbuf->swbuf && !sbuf->user) + + if (sbuf->swbuf && !sbuf->user) align_free(sbuf->swbuf); - - ss->total_resource_bytes -= sbuf->size; + + ss->hud.total_resource_bytes -= sbuf->size; + assert(ss->hud.num_resources > 0); + if (ss->hud.num_resources > 0) + ss->hud.num_resources--; FREE(sbuf); } -struct u_resource_vtbl svga_buffer_vtbl = +struct u_resource_vtbl svga_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ svga_buffer_destroy, /* resource_destroy */ @@ -334,33 +367,65 @@ svga_buffer_create(struct pipe_screen *screen, { struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf; - + sbuf = CALLOC_STRUCT(svga_buffer); - if(!sbuf) + if (!sbuf) goto error1; - + sbuf->b.b = *template; sbuf->b.vtbl = &svga_buffer_vtbl; pipe_reference_init(&sbuf->b.b.reference, 1); sbuf->b.b.screen = screen; + sbuf->bind_flags = template->bind; + + if (template->bind & PIPE_BIND_CONSTANT_BUFFER) { + /* Constant buffers can only have the PIPE_BIND_CONSTANT_BUFFER + * flag set. + */ + if (ss->sws->have_vgpu10) { + sbuf->bind_flags = PIPE_BIND_CONSTANT_BUFFER; + + /* Constant buffer size needs to be in multiples of 16. */ + sbuf->b.b.width0 = align(sbuf->b.b.width0, 16); + } + } + + if (svga_buffer_needs_hw_storage(template->bind)) { + + /* If the buffer will be used for vertex/index/stream data, set all + * the flags so that the buffer will be accepted for all those uses. + * Note that the PIPE_BIND_ flags we get from the state tracker are + * just a hint about how the buffer may be used. And OpenGL buffer + * object may be used for many different things. + */ + if (!(template->bind & PIPE_BIND_CONSTANT_BUFFER)) { + /* Not a constant buffer. The buffer may be used for vertex data, + * indexes or stream-out. + */ + sbuf->bind_flags |= (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER); + if (ss->sws->have_vgpu10) + sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT; + } - if(svga_buffer_needs_hw_storage(template->bind)) { - if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK) + if (svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK) goto error2; } else { - sbuf->swbuf = align_malloc(template->width0, 64); - if(!sbuf->swbuf) + sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64); + if (!sbuf->swbuf) goto error2; } - + debug_reference(&sbuf->b.b.reference, (debug_reference_descriptor)debug_describe_resource, 0); - sbuf->size = util_resource_size(template); - ss->total_resource_bytes += sbuf->size; + sbuf->size = util_resource_size(&sbuf->b.b); + ss->hud.total_resource_bytes += sbuf->size; + + ss->hud.num_resources++; - return &sbuf->b.b; + return &sbuf->b.b; error2: FREE(sbuf); @@ -368,6 +433,7 @@ error1: return NULL; } + struct pipe_resource * svga_user_buffer_create(struct pipe_screen *screen, void *ptr, @@ -375,11 +441,12 @@ svga_user_buffer_create(struct pipe_screen *screen, unsigned bind) { struct svga_buffer *sbuf; - + struct svga_screen *ss = svga_screen(screen); + sbuf = CALLOC_STRUCT(svga_buffer); - if(!sbuf) + if (!sbuf) goto no_sbuf; - + pipe_reference_init(&sbuf->b.b.reference, 1); sbuf->b.vtbl = &svga_buffer_vtbl; sbuf->b.b.screen = screen; @@ -391,13 +458,16 @@ svga_user_buffer_create(struct pipe_screen *screen, sbuf->b.b.depth0 = 1; sbuf->b.b.array_size = 1; + sbuf->bind_flags = bind; sbuf->swbuf = ptr; sbuf->user = TRUE; debug_reference(&sbuf->b.b.reference, (debug_reference_descriptor)debug_describe_resource, 0); - - return &sbuf->b.b; + + ss->hud.num_resources++; + + return &sbuf->b.b; no_sbuf: return NULL; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h index e838beb66..0591f8960 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h @@ -65,6 +65,9 @@ struct svga_buffer { struct u_resource b; + /** This is a superset of b.b.bind */ + unsigned bind_flags; + /** * Regular (non DMA'able) memory. * @@ -187,6 +190,8 @@ struct svga_buffer struct list_head head; unsigned size; /**< Approximate size in bytes */ + + boolean dirty; /**< Need to do a readback before mapping? */ }; @@ -248,6 +253,9 @@ svga_buffer_hw_storage_map(struct svga_context *svga, unsigned flags, boolean *retry) { struct svga_winsys_screen *sws = svga_buffer_winsys_screen(sbuf); + + svga->hud.num_resources_mapped++; + if (sws->have_gb_objects) { return svga->swc->surface_map(svga->swc, sbuf->handle, flags, retry); } else { diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 5686531f9..7f7ceab0a 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -149,10 +149,22 @@ svga_buffer_create_host_surface(struct svga_screen *ss, sbuf->key.flags = 0; sbuf->key.format = SVGA3D_BUFFER; - if (sbuf->b.b.bind & PIPE_BIND_VERTEX_BUFFER) + if (sbuf->bind_flags & PIPE_BIND_VERTEX_BUFFER) { sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER; - if (sbuf->b.b.bind & PIPE_BIND_INDEX_BUFFER) + sbuf->key.flags |= SVGA3D_SURFACE_BIND_VERTEX_BUFFER; + } + if (sbuf->bind_flags & PIPE_BIND_INDEX_BUFFER) { sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER; + sbuf->key.flags |= SVGA3D_SURFACE_BIND_INDEX_BUFFER; + } + if (sbuf->bind_flags & PIPE_BIND_CONSTANT_BUFFER) + sbuf->key.flags |= SVGA3D_SURFACE_BIND_CONSTANT_BUFFER; + + if (sbuf->bind_flags & PIPE_BIND_STREAM_OUTPUT) + sbuf->key.flags |= SVGA3D_SURFACE_BIND_STREAM_OUTPUT; + + if (sbuf->bind_flags & PIPE_BIND_SAMPLER_VIEW) + sbuf->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE; sbuf->key.size.width = sbuf->b.b.width0; sbuf->key.size.height = 1; @@ -161,10 +173,12 @@ svga_buffer_create_host_surface(struct svga_screen *ss, sbuf->key.numFaces = 1; sbuf->key.numMipLevels = 1; sbuf->key.cachable = 1; + sbuf->key.arraySize = 1; SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->b.b.width0); - sbuf->handle = svga_screen_surface_create(ss, &sbuf->key); + sbuf->handle = svga_screen_surface_create(ss, sbuf->b.b.bind, + sbuf->b.b.usage, &sbuf->key); if (!sbuf->handle) return PIPE_ERROR_OUT_OF_MEMORY; @@ -203,79 +217,89 @@ svga_buffer_upload_gb_command(struct svga_context *svga, struct svga_buffer *sbuf) { struct svga_winsys_context *swc = svga->swc; - SVGA3dCmdUpdateGBImage *cmd; - struct svga_3d_update_gb_image *ccmd = NULL; + SVGA3dCmdUpdateGBImage *update_cmd; + struct svga_3d_update_gb_image *whole_update_cmd = NULL; uint32 numBoxes = sbuf->map.num_ranges; struct pipe_resource *dummy; - unsigned int i; + unsigned i; assert(numBoxes); assert(sbuf->dma.updates == NULL); if (sbuf->dma.flags.discard) { struct svga_3d_invalidate_gb_image *cicmd = NULL; - SVGA3dCmdInvalidateGBImage *icmd; + SVGA3dCmdInvalidateGBImage *invalidate_cmd; + const unsigned total_commands_size = + sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd); /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by * 'numBoxes' UPDATE_GB_IMAGE commands. Allocate all at once rather * than with separate commands because we need to properly deal with * filling the command buffer. */ - icmd = SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_INVALIDATE_GB_IMAGE, - sizeof *icmd + numBoxes * sizeof *ccmd, - 2); - if (!icmd) + invalidate_cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_INVALIDATE_GB_IMAGE, + total_commands_size, 1 + numBoxes); + if (!invalidate_cmd) return PIPE_ERROR_OUT_OF_MEMORY; - cicmd = container_of(icmd, cicmd, body); - cicmd->header.size = sizeof *icmd; - swc->surface_relocation(swc, &icmd->image.sid, NULL, sbuf->handle, + cicmd = container_of(invalidate_cmd, cicmd, body); + cicmd->header.size = sizeof(*invalidate_cmd); + swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL, sbuf->handle, (SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL | SVGA_RELOC_DMA)); - icmd->image.face = 0; - icmd->image.mipmap = 0; + invalidate_cmd->image.face = 0; + invalidate_cmd->image.mipmap = 0; + /* The whole_update_command is a SVGA3dCmdHeader plus the + * SVGA3dCmdUpdateGBImage command. + */ + whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1]; /* initialize the first UPDATE_GB_IMAGE command */ - ccmd = (struct svga_3d_update_gb_image *) &icmd[1]; - ccmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd = &ccmd->body; + whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; + update_cmd = &whole_update_cmd->body; } else { /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */ - cmd = SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_UPDATE_GB_IMAGE, - sizeof *cmd + (numBoxes - 1) * sizeof *ccmd, - 1); - if (!cmd) + const unsigned total_commands_size = + sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd); + + update_cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_UPDATE_GB_IMAGE, + total_commands_size, numBoxes); + if (!update_cmd) return PIPE_ERROR_OUT_OF_MEMORY; - ccmd = container_of(cmd, ccmd, body); + /* The whole_update_command is a SVGA3dCmdHeader plus the + * SVGA3dCmdUpdateGBImage command. + */ + whole_update_cmd = container_of(update_cmd, whole_update_cmd, body); } /* Init the first UPDATE_GB_IMAGE command */ - ccmd->header.size = sizeof *cmd; - swc->surface_relocation(swc, &cmd->image.sid, NULL, sbuf->handle, + whole_update_cmd->header.size = sizeof(*update_cmd); + swc->surface_relocation(swc, &update_cmd->image.sid, NULL, sbuf->handle, SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL); - cmd->image.face = 0; - cmd->image.mipmap = 0; + update_cmd->image.face = 0; + update_cmd->image.mipmap = 0; /* Save pointer to the first UPDATE_GB_IMAGE command so that we can * fill in the box info below. */ - sbuf->dma.updates = ccmd; + sbuf->dma.updates = whole_update_cmd; /* - * Copy the relocation info, face and mipmap to all - * subsequent commands. NOTE: For winsyses that actually - * patch the image.sid member at flush time, this will fail - * miserably. For those we need to add as many relocations - * as there are copy boxes. + * Copy the face, mipmap, etc. info to all subsequent commands. + * Also do the surface relocation for each subsequent command. */ - for (i = 1; i < numBoxes; ++i) { - memcpy(++ccmd, sbuf->dma.updates, sizeof *ccmd); + whole_update_cmd++; + memcpy(whole_update_cmd, sbuf->dma.updates, sizeof(*whole_update_cmd)); + + swc->surface_relocation(swc, &whole_update_cmd->body.image.sid, NULL, + sbuf->handle, + SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL); } /* Increment reference count */ @@ -284,6 +308,7 @@ svga_buffer_upload_gb_command(struct svga_context *svga, pipe_resource_reference(&dummy, &sbuf->b.b); SVGA_FIFOCommitAll(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; sbuf->dma.flags.discard = FALSE; return PIPE_OK; @@ -357,6 +382,7 @@ svga_buffer_upload_command(struct svga_context *svga, SVGA_FIFOCommitAll(swc); + swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; sbuf->dma.flags.discard = FALSE; return PIPE_OK; @@ -405,6 +431,8 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(box->x <= sbuf->b.b.width0); assert(box->x + box->w <= sbuf->b.b.width0); + + svga->hud.num_bytes_uploaded += box->w; } } else { @@ -430,6 +458,8 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(box->x <= sbuf->b.b.width0); assert(box->x + box->w <= sbuf->b.b.width0); + + svga->hud.num_bytes_uploaded += box->w; } } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c index 64fd245c0..3f754c4d5 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c @@ -29,6 +29,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "os/os_thread.h" +#include "os/os_time.h" #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_math.h" @@ -46,12 +47,6 @@ #include "svga_debug.h" -/* XXX: This isn't a real hardware flag, but just a hack for kernel to - * know about primary surfaces. Find a better way to accomplish this. - */ -#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9) - - static void svga_transfer_dma_band(struct svga_context *svga, struct svga_transfer *st, @@ -59,10 +54,10 @@ svga_transfer_dma_band(struct svga_context *svga, unsigned y, unsigned h, unsigned srcy, SVGA3dSurfaceDMAFlags flags) { - struct svga_texture *texture = svga_texture(st->base.resource); + struct svga_texture *texture = svga_texture(st->base.resource); SVGA3dCopyBox box; enum pipe_error ret; - + assert(!st->use_direct_map); box.x = st->base.box.x; @@ -75,28 +70,23 @@ svga_transfer_dma_band(struct svga_context *svga, box.srcy = srcy; box.srcz = 0; - if (st->base.resource->target == PIPE_TEXTURE_CUBE) { - st->face = st->base.box.z; - box.z = 0; - } - else - st->face = 0; - - SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n", - transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", - texture->handle, - st->face, - st->base.box.x, - y, - box.z, - st->base.box.x + st->base.box.width, - y + h, - box.z + 1, - util_format_get_blocksize(texture->b.b.format) * 8 / - (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format))); + SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - " + "(%u, %u, %u), %ubpp\n", + transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", + texture->handle, + st->slice, + st->base.box.x, + y, + box.z, + st->base.box.x + st->base.box.width, + y + h, + box.z + 1, + util_format_get_blocksize(texture->b.b.format) * 8 / + (util_format_get_blockwidth(texture->b.b.format) + * util_format_get_blockheight(texture->b.b.format))); ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags); - if(ret != PIPE_OK) { + if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags); assert(ret == PIPE_OK); @@ -110,7 +100,7 @@ svga_transfer_dma(struct svga_context *svga, SVGA3dTransferType transfer, SVGA3dSurfaceDMAFlags flags) { - struct svga_texture *texture = svga_texture(st->base.resource); + struct svga_texture *texture = svga_texture(st->base.resource); struct svga_screen *screen = svga_screen(texture->b.b.screen); struct svga_winsys_screen *sws = screen->sws; struct pipe_fence_handle *fence = NULL; @@ -126,14 +116,13 @@ svga_transfer_dma(struct svga_context *svga, */ svga_surfaces_flush( svga ); - if(!st->swbuf) { + if (!st->swbuf) { /* Do the DMA transfer in a single go */ - svga_transfer_dma_band(svga, st, transfer, st->base.box.y, st->base.box.height, 0, flags); - if(transfer == SVGA3D_READ_HOST_VRAM) { + if (transfer == SVGA3D_READ_HOST_VRAM) { svga_context_flush(svga, &fence); sws->fence_finish(sws, fence, 0); sws->fence_reference(sws, &fence, NULL); @@ -141,10 +130,13 @@ svga_transfer_dma(struct svga_context *svga, } else { int y, h, srcy; - unsigned blockheight = util_format_get_blockheight(st->base.resource->format); + unsigned blockheight = + util_format_get_blockheight(st->base.resource->format); + h = st->hw_nblocksy * blockheight; srcy = 0; - for(y = 0; y < st->base.box.height; y += h) { + + for (y = 0; y < st->base.box.height; y += h) { unsigned offset, length; void *hw, *sw; @@ -158,7 +150,7 @@ svga_transfer_dma(struct svga_context *svga, offset = y * st->base.stride / blockheight; length = h * st->base.stride / blockheight; - sw = (uint8_t *)st->swbuf + offset; + sw = (uint8_t *) st->swbuf + offset; if (transfer == SVGA3D_WRITE_HOST_VRAM) { unsigned usage = PIPE_TRANSFER_WRITE; @@ -184,16 +176,15 @@ svga_transfer_dma(struct svga_context *svga, * Prevent the texture contents to be discarded on the next band * upload. */ - flags.discard = FALSE; - if(transfer == SVGA3D_READ_HOST_VRAM) { + if (transfer == SVGA3D_READ_HOST_VRAM) { svga_context_flush(svga, &fence); sws->fence_finish(sws, fence, 0); hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_READ); assert(hw); - if(hw) { + if (hw) { memcpy(sw, hw, length); sws->buffer_unmap(sws, st->hwbuf); } @@ -203,19 +194,22 @@ svga_transfer_dma(struct svga_context *svga, } -static boolean +static boolean svga_texture_get_handle(struct pipe_screen *screen, - struct pipe_resource *texture, - struct winsys_handle *whandle) + struct pipe_resource *texture, + struct winsys_handle *whandle) { struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen); unsigned stride; assert(svga_texture(texture)->key.cachable == 0); svga_texture(texture)->key.cachable = 0; + stride = util_format_get_nblocksx(texture->format, texture->width0) * util_format_get_blocksize(texture->format); - return sws->surface_get_handle(sws, svga_texture(texture)->handle, stride, whandle); + + return sws->surface_get_handle(sws, svga_texture(texture)->handle, + stride, whandle); } @@ -236,10 +230,15 @@ svga_texture_destroy(struct pipe_screen *screen, SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle); svga_screen_surface_destroy(ss, &tex->key, &tex->handle); - ss->total_resource_bytes -= tex->size; + ss->hud.total_resource_bytes -= tex->size; + FREE(tex->defined); FREE(tex->rendered_to); FREE(tex); + + assert(ss->hud.num_resources > 0); + if (ss->hud.num_resources > 0) + ss->hud.num_resources--; } @@ -274,10 +273,43 @@ need_tex_readback(struct pipe_transfer *transfer) } +static enum pipe_error +readback_image_vgpu9(struct svga_context *svga, + struct svga_winsys_surface *surf, + unsigned slice, + unsigned level) +{ + enum pipe_error ret; + + ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level); + } + return ret; +} + + +static enum pipe_error +readback_image_vgpu10(struct svga_context *svga, + struct svga_winsys_surface *surf, + unsigned slice, + unsigned level, + unsigned numMipLevels) +{ + enum pipe_error ret; + unsigned subResource; + + subResource = slice * numMipLevels + level; + ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource); + } + return ret; +} + -/* XXX: Still implementing this as if it was a screen function, but - * can now modify it to queue transfers on the context. - */ static void * svga_texture_transfer_map(struct pipe_context *pipe, struct pipe_resource *texture, @@ -289,11 +321,14 @@ svga_texture_transfer_map(struct pipe_context *pipe, struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; + struct svga_texture *tex = svga_texture(texture); struct svga_transfer *st; unsigned nblocksx, nblocksy; boolean use_direct_map = svga_have_gb_objects(svga) && !svga_have_gb_dma(svga); unsigned d; + void *returnVal; + int64_t begin = os_time_get(); /* We can't map texture storage directly unless we have GB objects */ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { @@ -326,25 +361,40 @@ svga_texture_transfer_map(struct pipe_context *pipe, } pipe_resource_reference(&st->base.resource, texture); + st->base.level = level; st->base.usage = usage; st->base.box = *box; st->base.stride = nblocksx*util_format_get_blocksize(texture->format); st->base.layer_stride = st->base.stride * nblocksy; + switch (tex->b.b.target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_1D_ARRAY: + st->slice = st->base.box.z; + st->base.box.z = 0; /* so we don't apply double offsets below */ + break; + default: + st->slice = 0; + break; + } + + if (usage & PIPE_TRANSFER_WRITE) { + /* record texture upload for HUD */ + svga->hud.num_bytes_uploaded += + nblocksx * nblocksy * d * util_format_get_blocksize(texture->format); + } + if (!use_direct_map) { /* Use a DMA buffer */ st->hw_nblocksy = nblocksy; - st->hwbuf = svga_winsys_buffer_create(svga, - 1, - 0, - st->hw_nblocksy * st->base.stride * d); + st->hwbuf = svga_winsys_buffer_create(svga, 1, 0, + st->hw_nblocksy * st->base.stride * d); while(!st->hwbuf && (st->hw_nblocksy /= 2)) { - st->hwbuf = svga_winsys_buffer_create(svga, - 1, - 0, - st->hw_nblocksy * st->base.stride * d); + st->hwbuf = svga_winsys_buffer_create(svga, 1, 0, + st->hw_nblocksy * st->base.stride * d); } if (!st->hwbuf) { @@ -352,8 +402,8 @@ svga_texture_transfer_map(struct pipe_context *pipe, return NULL; } - if(st->hw_nblocksy < nblocksy) { - /* We couldn't allocate a hardware buffer big enough for the transfer, + if (st->hw_nblocksy < nblocksy) { + /* We couldn't allocate a hardware buffer big enough for the transfer, * so allocate regular malloc memory instead */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " @@ -379,45 +429,27 @@ svga_texture_transfer_map(struct pipe_context *pipe, } } else { struct pipe_transfer *transfer = &st->base; - struct svga_texture *tex = svga_texture(transfer->resource); struct svga_winsys_surface *surf = tex->handle; - unsigned face; - - assert(surf); - if (tex->b.b.target == PIPE_TEXTURE_CUBE) { - face = transfer->box.z; - } else { - face = 0; + if (!surf) { + FREE(st); + return NULL; } if (need_tex_readback(transfer)) { - SVGA3dBox box; enum pipe_error ret; - box.x = transfer->box.x; - box.y = transfer->box.y; - box.w = transfer->box.width; - box.h = transfer->box.height; - box.d = transfer->box.depth; - if (tex->b.b.target == PIPE_TEXTURE_CUBE) { - box.z = 0; - } - else { - box.z = transfer->box.z; - } - - (void) box; /* not used at this time */ - svga_surfaces_flush(svga); - ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level); + if (svga_have_vgpu10(svga)) { + ret = readback_image_vgpu10(svga, surf, st->slice, transfer->level, + tex->b.b.last_level + 1); + } else { + ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level); + } - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level); - assert(ret == PIPE_OK); - } + assert(ret == PIPE_OK); + (void) ret; svga_context_flush(svga, NULL); @@ -425,7 +457,7 @@ svga_texture_transfer_map(struct pipe_context *pipe, * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified * we could potentially clear the flag for all faces/layers/mips. */ - svga_clear_texture_rendered_to(tex, face, transfer->level); + svga_clear_texture_rendered_to(tex, st->slice, transfer->level); } else { assert(transfer->usage & PIPE_TRANSFER_WRITE); @@ -445,23 +477,21 @@ svga_texture_transfer_map(struct pipe_context *pipe, * Begin mapping code */ if (st->swbuf) { - return st->swbuf; + returnVal = st->swbuf; } else if (!st->use_direct_map) { - return sws->buffer_map(sws, st->hwbuf, usage); + returnVal = sws->buffer_map(sws, st->hwbuf, usage); } else { - struct svga_screen *screen = svga_screen(svga->pipe.screen); - SVGA3dSurfaceFormat format; SVGA3dSize baseLevelSize; struct svga_texture *tex = svga_texture(texture); struct svga_winsys_surface *surf = tex->handle; uint8_t *map; boolean retry; - unsigned face, offset, mip_width, mip_height; - unsigned xoffset = box->x; - unsigned yoffset = box->y; - unsigned zoffset = box->z; + unsigned offset, mip_width, mip_height; + unsigned xoffset = st->base.box.x; + unsigned yoffset = st->base.box.y; + unsigned zoffset = st->base.box.z; map = svga->swc->surface_map(svga->swc, surf, usage, &retry); if (map == NULL && retry) { @@ -476,7 +506,7 @@ svga_texture_transfer_map(struct pipe_context *pipe, /* * Make sure we return NULL if the map fails */ - if (map == NULL) { + if (!map) { FREE(st); return map; } @@ -484,21 +514,13 @@ svga_texture_transfer_map(struct pipe_context *pipe, /** * Compute the offset to the specific texture slice in the buffer. */ - if (tex->b.b.target == PIPE_TEXTURE_CUBE) { - face = zoffset; - zoffset = 0; - } else { - face = 0; - } - - format = svga_translate_format(screen, tex->b.b.format, 0); baseLevelSize.width = tex->b.b.width0; baseLevelSize.height = tex->b.b.height0; baseLevelSize.depth = tex->b.b.depth0; - offset = svga3dsurface_get_image_offset(format, baseLevelSize, + offset = svga3dsurface_get_image_offset(tex->key.format, baseLevelSize, tex->b.b.last_level + 1, /* numMips */ - face, level); + st->slice, level); if (level > 0) { assert(offset > 0); } @@ -506,11 +528,16 @@ svga_texture_transfer_map(struct pipe_context *pipe, mip_width = u_minify(tex->b.b.width0, level); mip_height = u_minify(tex->b.b.height0, level); - offset += svga3dsurface_get_pixel_offset(format, mip_width, mip_height, + offset += svga3dsurface_get_pixel_offset(tex->key.format, + mip_width, mip_height, xoffset, yoffset, zoffset); - - return (void *) (map + offset); + returnVal = (void *) (map + offset); } + + svga->hud.map_buffer_time += (os_time_get() - begin); + svga->hud.num_resources_mapped++; + + return returnVal; } @@ -541,9 +568,45 @@ svga_texture_surface_unmap(struct svga_context *svga, } -/* XXX: Still implementing this as if it was a screen function, but - * can now modify it to queue transfers on the context. - */ +static enum pipe_error +update_image_vgpu9(struct svga_context *svga, + struct svga_winsys_surface *surf, + const SVGA3dBox *box, + unsigned slice, + unsigned level) +{ + enum pipe_error ret; + + ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level); + } + return ret; +} + + +static enum pipe_error +update_image_vgpu10(struct svga_context *svga, + struct svga_winsys_surface *surf, + const SVGA3dBox *box, + unsigned slice, + unsigned level, + unsigned numMipLevels) +{ + enum pipe_error ret; + unsigned subResource; + + subResource = slice * numMipLevels + level; + ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource); + } + return ret; +} + + static void svga_texture_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) @@ -579,26 +642,25 @@ svga_texture_transfer_unmap(struct pipe_context *pipe, } else if (transfer->usage & PIPE_TRANSFER_WRITE) { struct svga_winsys_surface *surf = svga_texture(transfer->resource)->handle; - unsigned face; SVGA3dBox box; enum pipe_error ret; assert(svga_have_gb_objects(svga)); /* update the effected region */ - if (tex->b.b.target == PIPE_TEXTURE_CUBE) { - face = transfer->box.z; - } else { - face = 0; - } - box.x = transfer->box.x; box.y = transfer->box.y; - if (tex->b.b.target == PIPE_TEXTURE_CUBE) { + switch (tex->b.b.target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_2D_ARRAY: box.z = 0; - } - else { + break; + case PIPE_TEXTURE_1D_ARRAY: + box.y = box.z = 0; + break; + default: box.z = transfer->box.z; + break; } box.w = transfer->box.width; box.h = transfer->box.height; @@ -610,18 +672,21 @@ svga_texture_transfer_unmap(struct pipe_context *pipe, box.x, box.y, box.z, box.w, box.h, box.d); - ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level); - assert(ret == PIPE_OK); + if (svga_have_vgpu10(svga)) { + ret = update_image_vgpu10(svga, surf, &box, st->slice, transfer->level, + tex->b.b.last_level + 1); + } else { + ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level); } + + assert(ret == PIPE_OK); + (void) ret; } ss->texture_timestamp++; svga_age_texture_view(tex, transfer->level); if (transfer->resource->target == PIPE_TEXTURE_CUBE) - svga_define_texture_level(tex, transfer->box.z, transfer->level); + svga_define_texture_level(tex, st->slice, transfer->level); else svga_define_texture_level(tex, 0, transfer->level); @@ -635,7 +700,18 @@ svga_texture_transfer_unmap(struct pipe_context *pipe, } -struct u_resource_vtbl svga_texture_vtbl = +/** + * Does format store depth values? + */ +static inline boolean +format_has_depth(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + return util_format_has_depth(desc); +} + + +struct u_resource_vtbl svga_texture_vtbl = { svga_texture_get_handle, /* get_handle */ svga_texture_destroy, /* resource_destroy */ @@ -651,57 +727,119 @@ svga_texture_create(struct pipe_screen *screen, const struct pipe_resource *template) { struct svga_screen *svgascreen = svga_screen(screen); - struct svga_texture *tex = CALLOC_STRUCT(svga_texture); + struct svga_texture *tex; + unsigned bindings = template->bind; - if (!tex) - goto error1; + assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS); + if (template->last_level >= SVGA_MAX_TEXTURE_LEVELS) { + return NULL; + } + + tex = CALLOC_STRUCT(svga_texture); + if (!tex) { + return NULL; + } + + tex->defined = CALLOC(template->depth0 * template->array_size, + sizeof(tex->defined[0])); + if (!tex->defined) { + FREE(tex); + return NULL; + } + + tex->rendered_to = CALLOC(template->depth0 * template->array_size, + sizeof(tex->rendered_to[0])); + if (!tex->rendered_to) { + FREE(tex->defined); + FREE(tex); + return NULL; + } tex->b.b = *template; tex->b.vtbl = &svga_texture_vtbl; pipe_reference_init(&tex->b.b.reference, 1); tex->b.b.screen = screen; - assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS); - if(template->last_level >= SVGA_MAX_TEXTURE_LEVELS) - goto error2; - tex->key.flags = 0; tex->key.size.width = template->width0; tex->key.size.height = template->height0; tex->key.size.depth = template->depth0; + tex->key.arraySize = 1; + tex->key.numFaces = 1; + tex->key.sampleCount = template->nr_samples; - if(template->target == PIPE_TEXTURE_CUBE) { - tex->key.flags |= SVGA3D_SURFACE_CUBEMAP; - tex->key.numFaces = 6; - } - else { - tex->key.numFaces = 1; + if (template->nr_samples > 1) { + tex->key.flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS; } - if (template->target == PIPE_TEXTURE_3D) { - tex->key.flags |= SVGA3D_SURFACE_VOLUME; + if (svgascreen->sws->have_vgpu10) { + switch (template->target) { + case PIPE_TEXTURE_1D: + tex->key.flags |= SVGA3D_SURFACE_1D; + break; + case PIPE_TEXTURE_1D_ARRAY: + tex->key.flags |= SVGA3D_SURFACE_1D; + /* fall-through */ + case PIPE_TEXTURE_2D_ARRAY: + tex->key.flags |= SVGA3D_SURFACE_ARRAY; + tex->key.arraySize = template->array_size; + break; + case PIPE_TEXTURE_3D: + tex->key.flags |= SVGA3D_SURFACE_VOLUME; + break; + case PIPE_TEXTURE_CUBE: + tex->key.flags |= (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_ARRAY); + tex->key.numFaces = 6; + break; + default: + break; + } + } + else { + switch (template->target) { + case PIPE_TEXTURE_3D: + tex->key.flags |= SVGA3D_SURFACE_VOLUME; + break; + case PIPE_TEXTURE_CUBE: + tex->key.flags |= SVGA3D_SURFACE_CUBEMAP; + tex->key.numFaces = 6; + break; + default: + break; + } } tex->key.cachable = 1; - if (template->bind & PIPE_BIND_SAMPLER_VIEW) + if (bindings & PIPE_BIND_SAMPLER_VIEW) { tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; + tex->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE; + + if (!(bindings & PIPE_BIND_RENDER_TARGET)) { + /* Also check if the format is renderable */ + if (screen->is_format_supported(screen, template->format, + template->target, + template->nr_samples, + PIPE_BIND_RENDER_TARGET)) { + bindings |= PIPE_BIND_RENDER_TARGET; + } + } + } - if (template->bind & PIPE_BIND_DISPLAY_TARGET) { + if (bindings & PIPE_BIND_DISPLAY_TARGET) { tex->key.cachable = 0; } - if (template->bind & PIPE_BIND_SHARED) { + if (bindings & PIPE_BIND_SHARED) { tex->key.cachable = 0; } - if (template->bind & (PIPE_BIND_SCANOUT | - PIPE_BIND_CURSOR)) { - tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT; + if (bindings & (PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR)) { + tex->key.scanout = 1; tex->key.cachable = 0; } - /* + /* * Note: Previously we never passed the * SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot * know beforehand whether a texture will be used as a rendertarget or not @@ -712,23 +850,55 @@ svga_texture_create(struct pipe_screen *screen, * (XA for example) uses it accurately and certain device versions * relies on it in certain situations to render correctly. */ - if((template->bind & PIPE_BIND_RENDER_TARGET) && - !util_format_is_s3tc(template->format)) + if ((bindings & PIPE_BIND_RENDER_TARGET) && + !util_format_is_s3tc(template->format)) { tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET; - - if(template->bind & PIPE_BIND_DEPTH_STENCIL) + tex->key.flags |= SVGA3D_SURFACE_BIND_RENDER_TARGET; + } + + if (bindings & PIPE_BIND_DEPTH_STENCIL) { tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL; - + tex->key.flags |= SVGA3D_SURFACE_BIND_DEPTH_STENCIL; + } + tex->key.numMipLevels = template->last_level + 1; - - tex->key.format = svga_translate_format(svgascreen, template->format, template->bind); - if(tex->key.format == SVGA3D_FORMAT_INVALID) - goto error2; + + tex->key.format = svga_translate_format(svgascreen, template->format, + bindings); + if (tex->key.format == SVGA3D_FORMAT_INVALID) { + FREE(tex->defined); + FREE(tex->rendered_to); + FREE(tex); + return NULL; + } + + /* Use typeless formats for sRGB and depth resources. Typeless + * formats can be reinterpreted as other formats. For example, + * SVGA3D_R8G8B8A8_UNORM_TYPELESS can be interpreted as + * SVGA3D_R8G8B8A8_UNORM_SRGB or SVGA3D_R8G8B8A8_UNORM. + */ + if (svgascreen->sws->have_vgpu10 && + (util_format_is_srgb(template->format) || + format_has_depth(template->format))) { + SVGA3dSurfaceFormat typeless = svga_typeless_format(tex->key.format); + if (0) { + debug_printf("Convert resource type %s -> %s (bind 0x%x)\n", + svga_format_name(tex->key.format), + svga_format_name(typeless), + bindings); + } + tex->key.format = typeless; + } SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle); - tex->handle = svga_screen_surface_create(svgascreen, &tex->key); - if (!tex->handle) - goto error2; + tex->handle = svga_screen_surface_create(svgascreen, bindings, + tex->b.b.usage, &tex->key); + if (!tex->handle) { + FREE(tex->defined); + FREE(tex->rendered_to); + FREE(tex); + return NULL; + } SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle); @@ -736,20 +906,10 @@ svga_texture_create(struct pipe_screen *screen, (debug_reference_descriptor)debug_describe_resource, 0); tex->size = util_resource_size(template); - svgascreen->total_resource_bytes += tex->size; - - tex->rendered_to = CALLOC(template->depth0 * template->array_size, - sizeof(tex->rendered_to[0])); - if (!tex->rendered_to) - goto error2; + svgascreen->hud.total_resource_bytes += tex->size; + svgascreen->hud.num_resources++; return &tex->b.b; - -error2: - FREE(tex->rendered_to); - FREE(tex); -error1: - return NULL; } @@ -759,6 +919,7 @@ svga_texture_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle) { struct svga_winsys_screen *sws = svga_winsys_screen(screen); + struct svga_screen *ss = svga_screen(screen); struct svga_winsys_surface *srf; struct svga_texture *tex; enum SVGA3dSurfaceFormat format = 0; @@ -777,16 +938,28 @@ svga_texture_from_handle(struct pipe_screen *screen, if (!srf) return NULL; - if (svga_translate_format(svga_screen(screen), template->format, template->bind) != format) { - unsigned f1 = svga_translate_format(svga_screen(screen), template->format, template->bind); + if (svga_translate_format(svga_screen(screen), template->format, + template->bind) != format) { + unsigned f1 = svga_translate_format(svga_screen(screen), + template->format, template->bind); unsigned f2 = format; - /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */ - if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) || + /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up. + */ + if (f1 == SVGA3D_B8G8R8A8_UNORM) + f1 = SVGA3D_A8R8G8B8; + if (f1 == SVGA3D_B8G8R8X8_UNORM) + f1 = SVGA3D_X8R8G8B8; + + if ( !( (f1 == f2) || + (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) || + (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_B8G8R8X8_UNORM) || (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) || + (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_B8G8R8A8_UNORM) || (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) || (f1 == SVGA3D_Z_DF24 && f2 == SVGA3D_Z_D24S8_INT) ) ) { - debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2); + debug_printf("%s wrong format %s != %s\n", __FUNCTION__, + svga_format_name(f1), svga_format_name(f2)); return NULL; } } @@ -795,6 +968,13 @@ svga_texture_from_handle(struct pipe_screen *screen, if (!tex) return NULL; + tex->defined = CALLOC(template->depth0 * template->array_size, + sizeof(tex->defined[0])); + if (!tex->defined) { + FREE(tex); + return NULL; + } + tex->b.b = *template; tex->b.vtbl = &svga_texture_vtbl; pipe_reference_init(&tex->b.b.reference, 1); @@ -803,9 +983,71 @@ svga_texture_from_handle(struct pipe_screen *screen, SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf); tex->key.cachable = 0; + tex->key.format = format; tex->handle = srf; tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0])); + tex->imported = TRUE; + + ss->hud.num_resources++; return &tex->b.b; } + +boolean +svga_texture_generate_mipmap(struct pipe_context *pipe, + struct pipe_resource *pt, + enum pipe_format format, + unsigned base_level, + unsigned last_level, + unsigned first_layer, + unsigned last_layer) +{ + struct pipe_sampler_view templ, *psv; + struct svga_pipe_sampler_view *sv; + struct svga_context *svga = svga_context(pipe); + struct svga_texture *tex = svga_texture(pt); + enum pipe_error ret; + + assert(svga_have_vgpu10(svga)); + + /* Only support 2D texture for now */ + if (pt->target != PIPE_TEXTURE_2D) + return FALSE; + + /* Fallback to the mipmap generation utility for those formats that + * do not support hw generate mipmap + */ + if (!svga_format_support_gen_mips(format)) + return FALSE; + + /* Make sure the texture surface was created with + * SVGA3D_SURFACE_BIND_RENDER_TARGET + */ + if (!tex->handle || !(tex->key.flags & SVGA3D_SURFACE_BIND_RENDER_TARGET)) + return FALSE; + + templ.format = format; + templ.u.tex.first_layer = first_layer; + templ.u.tex.last_layer = last_layer; + templ.u.tex.first_level = base_level; + templ.u.tex.last_level = last_level; + + psv = pipe->create_sampler_view(pipe, pt, &templ); + if (psv == NULL) + return FALSE; + + sv = svga_pipe_sampler_view(psv); + svga_validate_pipe_sampler_view(svga, sv); + + ret = SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle); + } + pipe_sampler_view_reference(&psv, NULL); + + svga->hud.num_generate_mipmap++; + + return TRUE; +} diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h index 19dadfb88..99ba33b26 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h @@ -51,7 +51,7 @@ struct svga_texture { struct u_resource b; - boolean defined[6][SVGA_MAX_TEXTURE_LEVELS]; + ushort *defined; struct svga_sampler_view *cached_view; @@ -77,6 +77,12 @@ struct svga_texture */ struct svga_winsys_surface *handle; + /** + * Whether the host side surface is imported and not created by this + * driver. + */ + boolean imported; + unsigned size; /**< Approximate size in bytes */ /** array indexed by cube face or 3D/array slice, one bit per mipmap level */ @@ -91,7 +97,7 @@ struct svga_transfer { struct pipe_transfer base; - unsigned face; + unsigned slice; /**< array slice or cube face */ struct svga_winsys_buffer *hwbuf; @@ -135,29 +141,6 @@ svga_age_texture_view(struct svga_texture *tex, unsigned level) } -/** - * Mark the given texture face/level as being defined. - */ -static inline void -svga_define_texture_level(struct svga_texture *tex, - unsigned face,unsigned level) -{ - assert(face < Elements(tex->defined)); - assert(level < Elements(tex->defined[0])); - tex->defined[face][level] = TRUE; -} - - -static inline bool -svga_is_texture_level_defined(const struct svga_texture *tex, - unsigned face, unsigned level) -{ - assert(face < Elements(tex->defined)); - assert(level < Elements(tex->defined[0])); - return tex->defined[face][level]; -} - - /** For debugging, check that face and level are legal */ static inline void check_face_level(const struct svga_texture *tex, @@ -177,6 +160,27 @@ check_face_level(const struct svga_texture *tex, } +/** + * Mark the given texture face/level as being defined. + */ +static inline void +svga_define_texture_level(struct svga_texture *tex, + unsigned face,unsigned level) +{ + check_face_level(tex, face, level); + tex->defined[face] |= 1 << level; +} + + +static inline bool +svga_is_texture_level_defined(const struct svga_texture *tex, + unsigned face, unsigned level) +{ + check_face_level(tex, face, level); + return (tex->defined[face] & (1 << level)) != 0; +} + + static inline void svga_set_texture_rendered_to(struct svga_texture *tex, unsigned face, unsigned level) @@ -213,7 +217,14 @@ svga_texture_from_handle(struct pipe_screen * screen, const struct pipe_resource *template, struct winsys_handle *whandle); - +boolean +svga_texture_generate_mipmap(struct pipe_context *pipe, + struct pipe_resource *pt, + enum pipe_format format, + unsigned base_level, + unsigned last_level, + unsigned first_layer, + unsigned last_layer); #endif /* SVGA_TEXTURE_H */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c index 55dc49f2d..9c33a79db 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c @@ -48,9 +48,11 @@ svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv) { char res[128]; debug_describe_resource(res, sv->texture); - util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod); + util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", + res, sv->min_lod, sv->max_lod); } + struct svga_sampler_view * svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_resource *pt, @@ -58,16 +60,17 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); - struct svga_texture *tex = svga_texture(pt); + struct svga_texture *tex = svga_texture(pt); struct svga_sampler_view *sv = NULL; SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE; - SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format, PIPE_BIND_SAMPLER_VIEW); + SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format, + PIPE_BIND_SAMPLER_VIEW); boolean view = TRUE; assert(pt); assert(min_lod <= max_lod); assert(max_lod <= pt->last_level); - + assert(!svga_have_vgpu10(svga)); /* Is a view needed */ { @@ -143,17 +146,20 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pt->last_level); sv->age = tex->age; - sv->handle = svga_texture_view_surface(svga, tex, flags, format, + sv->handle = svga_texture_view_surface(svga, tex, + PIPE_BIND_SAMPLER_VIEW, + flags, format, min_lod, max_lod - min_lod + 1, - -1, -1, + -1, 1, -1, &sv->key); if (!sv->handle) { sv->key.cachable = 0; sv->handle = tex->handle; debug_reference(&sv->reference, - (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); + (debug_reference_descriptor) + svga_debug_describe_sampler_view, 0); return sv; } @@ -162,13 +168,16 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pipe_mutex_unlock(ss->tex_mutex); debug_reference(&sv->reference, - (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); + (debug_reference_descriptor) + svga_debug_describe_sampler_view, 0); return sv; } + void -svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v) +svga_validate_sampler_view(struct svga_context *svga, + struct svga_sampler_view *v) { struct svga_texture *tex = svga_texture(v->texture); unsigned numFaces; @@ -177,13 +186,14 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * unsigned k; assert(svga); + assert(!svga_have_vgpu10(svga)); if (v->handle == tex->handle) return; age = tex->age; - if(tex->b.b.target == PIPE_TEXTURE_CUBE) + if (tex->b.b.target == PIPE_TEXTURE_CUBE) numFaces = 6; else numFaces = 1; @@ -204,12 +214,13 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * v->age = age; } + void svga_destroy_sampler_view_priv(struct svga_sampler_view *v) { struct svga_texture *tex = svga_texture(v->texture); - if(v->handle != tex->handle) { + if (v->handle != tex->handle) { struct svga_screen *ss = svga_screen(v->texture->screen); SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle); svga_screen_surface_destroy(ss, &v->key, &v->handle); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.h b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.h index 7f14323f8..15f2313c4 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.h @@ -35,7 +35,9 @@ struct pipe_context; struct pipe_screen; struct svga_context; +struct svga_pipe_sampler_view; struct svga_winsys_surface; +struct svga_surface; enum SVGA3dSurfaceFormat; @@ -97,5 +99,13 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_ *ptr = v; } +boolean +svga_check_sampler_view_resource_collision(struct svga_context *svga, + struct svga_winsys_surface *res, + unsigned shader); + +enum pipe_error +svga_validate_pipe_sampler_view(struct svga_context *svga, + struct svga_pipe_sampler_view *sv); #endif diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen.c b/lib/mesa/src/gallium/drivers/svga/svga_screen.c index 6539971e0..c9abd49ec 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_screen.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_screen.c @@ -34,31 +34,37 @@ #include "svga_context.h" #include "svga_format.h" #include "svga_screen.h" +#include "svga_tgsi.h" #include "svga_resource_texture.h" #include "svga_resource.h" #include "svga_debug.h" #include "svga3d_shaderdefs.h" +#include "VGPU10ShaderTokens.h" +/* NOTE: this constant may get moved into a svga3d*.h header file */ +#define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024) #ifdef DEBUG int SVGA_DEBUG = 0; static const struct debug_named_value svga_debug_flags[] = { - { "dma", DEBUG_DMA, NULL }, - { "tgsi", DEBUG_TGSI, NULL }, - { "pipe", DEBUG_PIPE, NULL }, - { "state", DEBUG_STATE, NULL }, - { "screen", DEBUG_SCREEN, NULL }, - { "tex", DEBUG_TEX, NULL }, - { "swtnl", DEBUG_SWTNL, NULL }, - { "const", DEBUG_CONSTS, NULL }, - { "viewport", DEBUG_VIEWPORT, NULL }, - { "views", DEBUG_VIEWS, NULL }, - { "perf", DEBUG_PERF, NULL }, - { "flush", DEBUG_FLUSH, NULL }, - { "sync", DEBUG_SYNC, NULL }, - { "cache", DEBUG_CACHE, NULL }, + { "dma", DEBUG_DMA, NULL }, + { "tgsi", DEBUG_TGSI, NULL }, + { "pipe", DEBUG_PIPE, NULL }, + { "state", DEBUG_STATE, NULL }, + { "screen", DEBUG_SCREEN, NULL }, + { "tex", DEBUG_TEX, NULL }, + { "swtnl", DEBUG_SWTNL, NULL }, + { "const", DEBUG_CONSTS, NULL }, + { "viewport", DEBUG_VIEWPORT, NULL }, + { "views", DEBUG_VIEWS, NULL }, + { "perf", DEBUG_PERF, NULL }, + { "flush", DEBUG_FLUSH, NULL }, + { "sync", DEBUG_SYNC, NULL }, + { "cache", DEBUG_CACHE, NULL }, + { "streamout", DEBUG_STREAMOUT, NULL }, + { "query", DEBUG_QUERY, NULL }, DEBUG_NAMED_VALUE_END }; #endif @@ -80,18 +86,52 @@ svga_get_name( struct pipe_screen *pscreen ) */ build = "build: DEBUG;"; mutex = "mutex: " PIPE_ATOMIC ";"; -#ifdef HAVE_LLVM - llvm = "LLVM;"; -#endif #else build = "build: RELEASE;"; #endif +#ifdef HAVE_LLVM + llvm = "LLVM;"; +#endif util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm); return name; } +/** Helper for querying float-valued device cap */ +static float +get_float_cap(struct svga_winsys_screen *sws, unsigned cap, float defaultVal) +{ + SVGA3dDevCapResult result; + if (sws->get_cap(sws, cap, &result)) + return result.f; + else + return defaultVal; +} + + +/** Helper for querying uint-valued device cap */ +static unsigned +get_uint_cap(struct svga_winsys_screen *sws, unsigned cap, unsigned defaultVal) +{ + SVGA3dDevCapResult result; + if (sws->get_cap(sws, cap, &result)) + return result.u; + else + return defaultVal; +} + + +/** Helper for querying boolean-valued device cap */ +static boolean +get_bool_cap(struct svga_winsys_screen *sws, unsigned cap, boolean defaultVal) +{ + SVGA3dDevCapResult result; + if (sws->get_cap(sws, cap, &result)) + return result.b; + else + return defaultVal; +} static float @@ -99,7 +139,6 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param) { struct svga_screen *svgascreen = svga_screen(screen); struct svga_winsys_screen *sws = svgascreen->sws; - SVGA3dDevCapResult result; switch (param) { case PIPE_CAPF_MAX_LINE_WIDTH: @@ -113,12 +152,11 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param) return svgascreen->maxPointSize; case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, &result)) - return 4.0f; - return (float) result.u; + return (float) get_uint_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, 4); case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: return 15.0; + case PIPE_CAPF_GUARD_BAND_LEFT: case PIPE_CAPF_GUARD_BAND_TOP: case PIPE_CAPF_GUARD_BAND_RIGHT: @@ -145,7 +183,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TWO_SIDED_STENCIL: return 1; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: - return 0; + /* + * "In virtually every OpenGL implementation and hardware, + * GL_MAX_DUAL_SOURCE_DRAW_BUFFERS is 1" + * http://www.opengl.org/wiki/Blending + */ + return sws->have_vgpu10 ? 1 : 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: @@ -158,6 +201,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_QUERY_TIME_ELAPSED: return 0; + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return sws->have_vgpu10; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_TEXTURE_SWIZZLE: @@ -170,7 +215,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_USER_CONSTANT_BUFFERS: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: - return 16; + return 256; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: { @@ -199,17 +244,20 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return MIN2(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS), 12 /* 2048x2048 */); + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return sws->have_vgpu10 ? SVGA3D_MAX_SURFACE_ARRAYSIZE : 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */ return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: return 1; case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 0; + return sws->have_vgpu10; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: return 0; case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 1; + return !sws->have_vgpu10; case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: return 1; /* The color outputs of vertex shaders are not clamped */ @@ -222,7 +270,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; /* expected for GL_ARB_framebuffer_object */ case PIPE_CAP_GLSL_FEATURE_LEVEL: - return 120; + return sws->have_vgpu10 ? 330 : 120; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return 0; @@ -230,54 +278,75 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_SM3: return 1; - /* Unsupported features */ - case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_DEPTH_CLIP_DISABLE: - case PIPE_CAP_SEAMLESS_CUBE_MAP: - case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_INDEP_BLEND_FUNC: - case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: - case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: - case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_FAKE_SW_MSAA: + return sws->have_vgpu10; + + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return sws->have_vgpu10 ? SVGA3D_DX_MAX_SOTARGETS : 0; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + return sws->have_vgpu10 ? 4 : 0; + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return sws->have_vgpu10 ? SVGA3D_MAX_STREAMOUT_DECLS : 0; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return 0; + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return svgascreen->ms_samples ? 1 : 0; + + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return SVGA3D_DX_MAX_RESOURCE_SIZE; + case PIPE_CAP_MIN_TEXEL_OFFSET: + return sws->have_vgpu10 ? VGPU10_MIN_TEXEL_FETCH_OFFSET : 0; case PIPE_CAP_MAX_TEXEL_OFFSET: + return sws->have_vgpu10 ? VGPU10_MAX_TEXEL_FETCH_OFFSET : 0; + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: - case PIPE_CAP_CONDITIONAL_RENDER: - case PIPE_CAP_TEXTURE_BARRIER: - case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: - case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: - case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return 0; + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + return sws->have_vgpu10 ? 256 : 0; case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return sws->have_vgpu10 ? 1024 : 0; + + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; /* may be a sw fallback, depending on restart index */ + + case PIPE_CAP_GENERATE_MIPMAP: + return sws->have_vgpu10; + + /* Unsupported features */ + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_MAX_VERTEX_STREAMS: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_COMPUTE: case PIPE_CAP_START_INSTANCE: - case PIPE_CAP_QUERY_TIMESTAMP: - case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_CUBE_MAP_ARRAY: - case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_QUERY_PIPELINE_STATISTICS: - case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: - case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: @@ -285,11 +354,19 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return 64; + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: - return 1; + return 1; /* need 4-byte alignment for all offsets and strides */ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; case PIPE_CAP_MAX_VIEWPORTS: @@ -313,6 +390,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_DRAW_PARAMETERS: + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; } @@ -320,11 +407,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 0; } -static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) + +static int +vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader, + enum pipe_shader_cap param) { struct svga_screen *svgascreen = svga_screen(screen); struct svga_winsys_screen *sws = svgascreen->sws; - SVGA3dDevCapResult result; + unsigned val; + + assert(!sws->have_vgpu10); switch (shader) { @@ -347,9 +439,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return 1; case PIPE_SHADER_CAP_MAX_TEMPS: - if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, &result)) - return 32; - return MIN2(result.u, SVGA3D_TEMPREG_MAX); + val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, 32); + return MIN2(val, SVGA3D_TEMPREG_MAX); case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: /* * Although PS 3.0 has some addressing abilities it can only represent @@ -377,11 +468,15 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -394,9 +489,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en { case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS, &result)) - return 512; - return result.u; + return get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS, + 512); case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: /* XXX: until we have vertex texture support */ @@ -412,9 +506,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return 1; case PIPE_SHADER_CAP_MAX_TEMPS: - if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, &result)) - return 32; - return MIN2(result.u, SVGA3D_TEMPREG_MAX); + val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, 32); + return MIN2(val, SVGA3D_TEMPREG_MAX); case PIPE_SHADER_CAP_MAX_PREDS: return 1; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: @@ -437,11 +530,15 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -463,8 +560,108 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en } +static int +vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader, + enum pipe_shader_cap param) +{ + struct svga_screen *svgascreen = svga_screen(screen); + struct svga_winsys_screen *sws = svgascreen->sws; + + assert(sws->have_vgpu10); + (void) sws; /* silence unused var warnings in non-debug builds */ + + /* Only VS, GS, FS supported */ + if (shader != PIPE_SHADER_VERTEX && + shader != PIPE_SHADER_GEOMETRY && + shader != PIPE_SHADER_FRAGMENT) { + return 0; + } + + /* NOTE: we do not query the device for any caps/limits at this time */ + + /* Generally the same limits for vertex, geometry and fragment shaders */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 64 * 1024; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 64; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_FRAGMENT) + return VGPU10_MAX_FS_INPUTS; + else if (shader == PIPE_SHADER_GEOMETRY) + return VGPU10_MAX_GS_INPUTS; + else + return VGPU10_MAX_VS_INPUTS; + case PIPE_SHADER_CAP_MAX_OUTPUTS: + if (shader == PIPE_SHADER_FRAGMENT) + return VGPU10_MAX_FS_OUTPUTS; + else if (shader == PIPE_SHADER_GEOMETRY) + return VGPU10_MAX_GS_OUTPUTS; + else + return VGPU10_MAX_VS_OUTPUTS; + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]); + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return svgascreen->max_const_buffers; + case PIPE_SHADER_CAP_MAX_TEMPS: + return VGPU10_MAX_TEMPS; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return TRUE; /* XXX verify */ + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_INTEGERS: + return TRUE; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + return SVGA3D_DX_MAX_SAMPLERS; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; + case PIPE_SHADER_CAP_DOUBLES: + case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; + default: + debug_printf("Unexpected vgpu10 shader query %u\n", param); + return 0; + } + return 0; +} + + +static int +svga_get_shader_param(struct pipe_screen *screen, unsigned shader, + enum pipe_shader_cap param) +{ + struct svga_screen *svgascreen = svga_screen(screen); + struct svga_winsys_screen *sws = svgascreen->sws; + if (sws->have_vgpu10) { + return vgpu10_get_shader_param(screen, shader, param); + } + else { + return vgpu9_get_shader_param(screen, shader, param); + } +} + + /** - * Implemnt pipe_screen::is_format_supported(). + * Implement pipe_screen::is_format_supported(). * \param bindings bitmask of PIPE_BIND_x flags */ static boolean @@ -482,7 +679,12 @@ svga_is_format_supported( struct pipe_screen *screen, assert(bindings); if (sample_count > 1) { - return FALSE; + /* In ms_samples, if bit N is set it means that we support + * multisample with N+1 samples per pixel. + */ + if ((ss->ms_samples & (1 << (sample_count - 1))) == 0) { + return FALSE; + } } svga_format = svga_translate_format(ss, format, bindings); @@ -490,6 +692,22 @@ svga_is_format_supported( struct pipe_screen *screen, return FALSE; } + /* we don't support sRGB rendering into display targets */ + if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) { + return FALSE; + } + + /* + * For VGPU10 vertex formats, skip querying host capabilities + */ + + if (ss->sws->have_vgpu10 && (bindings & PIPE_BIND_VERTEX_BUFFER)) { + SVGA3dSurfaceFormat svga_format; + unsigned flags; + svga_translate_vertex_format_vgpu10(format, &svga_format, &flags); + return svga_format != SVGA3D_FORMAT_INVALID; + } + /* * Override host capabilities, so that we end up with the same * visuals for all virtual hardware implementations. @@ -502,6 +720,12 @@ svga_is_format_supported( struct pipe_screen *screen, case SVGA3D_R5G6B5: break; + /* VGPU10 formats */ + case SVGA3D_B8G8R8A8_UNORM: + case SVGA3D_B8G8R8X8_UNORM: + case SVGA3D_B5G6R5_UNORM: + break; + /* Often unsupported/problematic. This means we end up with the same * visuals for all virtual hardware implementations. */ @@ -520,22 +744,32 @@ svga_is_format_supported( struct pipe_screen *screen, svga_get_format_cap(ss, svga_format, &caps); + if (bindings & PIPE_BIND_RENDER_TARGET) { + /* Check that the color surface is blendable, unless it's an + * integer format. + */ + if (!svga_format_is_integer(svga_format) && + (caps.value & SVGA3DFORMAT_OP_NOALPHABLEND)) { + return FALSE; + } + } + mask.value = 0; if (bindings & PIPE_BIND_RENDER_TARGET) { - mask.offscreenRenderTarget = 1; + mask.value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET; } if (bindings & PIPE_BIND_DEPTH_STENCIL) { - mask.zStencil = 1; + mask.value |= SVGA3DFORMAT_OP_ZSTENCIL; } if (bindings & PIPE_BIND_SAMPLER_VIEW) { - mask.texture = 1; + mask.value |= SVGA3DFORMAT_OP_TEXTURE; } if (target == PIPE_TEXTURE_CUBE) { - mask.cubeTexture = 1; + mask.value |= SVGA3DFORMAT_OP_CUBETEXTURE; } - if (target == PIPE_TEXTURE_3D) { - mask.volumeTexture = 1; + else if (target == PIPE_TEXTURE_3D) { + mask.value |= SVGA3DFORMAT_OP_VOLUMETEXTURE; } return (caps.value & mask.value) == mask.value; @@ -574,11 +808,41 @@ svga_get_driver_query_info(struct pipe_screen *screen, unsigned index, struct pipe_driver_query_info *info) { +#define QUERY(NAME, ENUM, UNITS) \ + {NAME, ENUM, {0}, UNITS, PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, 0, 0x0} + static const struct pipe_driver_query_info queries[] = { - {"draw-calls", SVGA_QUERY_DRAW_CALLS, {0}}, - {"fallbacks", SVGA_QUERY_FALLBACKS, {0}}, - {"memory-used", SVGA_QUERY_MEMORY_USED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES} + /* per-frame counters */ + QUERY("num-draw-calls", SVGA_QUERY_NUM_DRAW_CALLS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-fallbacks", SVGA_QUERY_NUM_FALLBACKS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-flushes", SVGA_QUERY_NUM_FLUSHES, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-validations", SVGA_QUERY_NUM_VALIDATIONS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("map-buffer-time", SVGA_QUERY_MAP_BUFFER_TIME, + PIPE_DRIVER_QUERY_TYPE_MICROSECONDS), + QUERY("num-resources-mapped", SVGA_QUERY_NUM_RESOURCES_MAPPED, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED, + PIPE_DRIVER_QUERY_TYPE_BYTES), + + /* running total counters */ + QUERY("memory-used", SVGA_QUERY_MEMORY_USED, + PIPE_DRIVER_QUERY_TYPE_BYTES), + QUERY("num-shaders", SVGA_QUERY_NUM_SHADERS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-resources", SVGA_QUERY_NUM_RESOURCES, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-state-objects", SVGA_QUERY_NUM_STATE_OBJECTS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-surface-views", SVGA_QUERY_NUM_SURFACE_VIEWS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-generate-mipmap", SVGA_QUERY_NUM_GENERATE_MIPMAP, + PIPE_DRIVER_QUERY_TYPE_UINT64), }; +#undef QUERY if (!info) return Elements(queries); @@ -615,8 +879,6 @@ svga_screen_create(struct svga_winsys_screen *sws) { struct svga_screen *svgascreen; struct pipe_screen *screen; - SVGA3dDevCapResult result; - boolean use_vs30, use_ps30; #ifdef DEBUG SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 ); @@ -646,6 +908,7 @@ svga_screen_create(struct svga_winsys_screen *sws) screen->get_param = svga_get_param; screen->get_shader_param = svga_get_shader_param; screen->get_paramf = svga_get_paramf; + screen->get_timestamp = NULL; screen->is_format_supported = svga_is_format_supported; screen->context_create = svga_context_create; screen->fence_reference = svga_fence_reference; @@ -661,18 +924,6 @@ svga_screen_create(struct svga_winsys_screen *sws) svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1; } - use_ps30 = - sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) && - result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE; - - use_vs30 = - sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) && - result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE; - - /* we require Shader model 3.0 or later */ - if (!use_ps30 || !use_vs30) - goto error2; - /* * The D16, D24X8, and D24S8 formats always do an implicit shadow compare * when sampled from, where as the DF16, DF24, and D24S8_INT do not. So @@ -720,46 +971,77 @@ svga_screen_create(struct svga_winsys_screen *sws) /* Query device caps */ - if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, &result)) - svgascreen->haveLineStipple = FALSE; - else - svgascreen->haveLineStipple = result.u; + if (sws->have_vgpu10) { + svgascreen->haveProvokingVertex + = get_bool_cap(sws, SVGA3D_DEVCAP_DX_PROVOKING_VERTEX, FALSE); + svgascreen->haveLineSmooth = TRUE; + svgascreen->maxPointSize = 80.0F; + svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS; + + /* Multisample samples per pixel */ + svgascreen->ms_samples = + get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0); + + /* Maximum number of constant buffers */ + svgascreen->max_const_buffers = + get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1); + assert(svgascreen->max_const_buffers <= SVGA_MAX_CONST_BUFS); + } + else { + /* VGPU9 */ + unsigned vs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, + SVGA3DVSVERSION_NONE); + unsigned fs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, + SVGA3DPSVERSION_NONE); + + /* we require Shader model 3.0 or later */ + if (fs_ver < SVGA3DPSVERSION_30 || vs_ver < SVGA3DVSVERSION_30) { + goto error2; + } - if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_AA, &result)) - svgascreen->haveLineSmooth = FALSE; - else - svgascreen->haveLineSmooth = result.u; + svgascreen->haveProvokingVertex = FALSE; - if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, &result)) - svgascreen->maxLineWidth = 1.0F; - else - svgascreen->maxLineWidth = result.f; + svgascreen->haveLineSmooth = + get_bool_cap(sws, SVGA3D_DEVCAP_LINE_AA, FALSE); - if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, &result)) - svgascreen->maxLineWidthAA = 1.0F; - else - svgascreen->maxLineWidthAA = result.f; + svgascreen->maxPointSize = + get_float_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, 1.0f); + /* Keep this to a reasonable size to avoid failures in conform/pntaa.c */ + svgascreen->maxPointSize = MIN2(svgascreen->maxPointSize, 80.0f); + + /* The SVGA3D device always supports 4 targets at this time, regardless + * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return. + */ + svgascreen->max_color_buffers = 4; + + /* Only support one constant buffer + */ + svgascreen->max_const_buffers = 1; - if (0) + /* No multisampling */ + svgascreen->ms_samples = 0; + } + + /* common VGPU9 / VGPU10 caps */ + svgascreen->haveLineStipple = + get_bool_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, FALSE); + + svgascreen->maxLineWidth = + get_float_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, 1.0f); + + svgascreen->maxLineWidthAA = + get_float_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, 1.0f); + + if (0) { + debug_printf("svga: haveProvokingVertex %u\n", + svgascreen->haveProvokingVertex); debug_printf("svga: haveLineStip %u " "haveLineSmooth %u maxLineWidth %f\n", svgascreen->haveLineStipple, svgascreen->haveLineSmooth, svgascreen->maxLineWidth); - - if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, &result)) { - svgascreen->maxPointSize = 1.0F; - } else { - /* Keep this to a reasonable size to avoid failures in - * conform/pntaa.c: - */ - svgascreen->maxPointSize = MIN2(result.f, 80.0f); + debug_printf("svga: maxPointSize %g\n", svgascreen->maxPointSize); } - /* The SVGA3D device always supports 4 targets at this time, regardless - * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return. - */ - svgascreen->max_color_buffers = 4; - pipe_mutex_init(svgascreen->tex_mutex); pipe_mutex_init(svgascreen->swc_mutex); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen.h b/lib/mesa/src/gallium/drivers/svga/svga_screen.h index ea1e743df..98b56b2a6 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_screen.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_screen.h @@ -1,4 +1,4 @@ -/********************************************************** + /********************************************************** * Copyright 2008-2009 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person @@ -48,10 +48,13 @@ struct svga_screen SVGA3dHardwareVersion hw_version; /** Device caps */ + boolean haveProvokingVertex; boolean haveLineStipple, haveLineSmooth; float maxLineWidth, maxLineWidthAA; float maxPointSize; unsigned max_color_buffers; + unsigned max_const_buffers; + unsigned ms_samples; struct { boolean force_level_surface_view; @@ -69,6 +72,7 @@ struct svga_screen /* which formats to translate depth formats into */ struct { enum SVGA3dSurfaceFormat z16; + /* note gallium order */ enum SVGA3dSurfaceFormat x8z24; enum SVGA3dSurfaceFormat s8z24; @@ -76,8 +80,12 @@ struct svga_screen struct svga_host_surface_cache cache; - /** Memory used by all resources (buffers and surfaces) */ - uint64_t total_resource_bytes; + /** HUD counters */ + struct { + /** Memory used by all resources (buffers and surfaces) */ + uint64_t total_resource_bytes; + uint64_t num_resources; + } hud; }; #ifndef DEBUG diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c index 3c765394a..5b4412957 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c @@ -115,8 +115,14 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen, assert(entry->handle); + /* If the key matches and the fence is signalled (the surface is no + * longer needed) the lookup was successful. We found a surface that + * can be reused. + * We unlink the surface from the cache entry and we add the entry to + * the 'empty' list. + */ if (memcmp(&entry->key, key, sizeof *key) == 0 && - sws->fence_signalled(sws, entry->fence, 0) == 0) { + sws->fence_signalled(sws, entry->fence, 0) == 0) { unsigned surf_size; assert(sws->surface_is_flushed(sws, entry->handle)); @@ -124,10 +130,13 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen, handle = entry->handle; /* Reference is transfered here. */ entry->handle = NULL; + /* Remove from hash table */ LIST_DEL(&entry->bucket_head); + /* remove from LRU list */ LIST_DEL(&entry->head); + /* Add the cache entry (but not the surface!) to the empty list */ LIST_ADD(&entry->head, &cache->empty); /* update the cache size */ @@ -195,7 +204,8 @@ svga_screen_cache_shrink(struct svga_screen *svgascreen, /** - * Transfers a handle reference. + * Add a surface to the cache. This is done when the driver deletes + * the surface. Note: transfers a handle reference. */ static void svga_screen_cache_add(struct svga_screen *svgascreen, @@ -207,17 +217,17 @@ svga_screen_cache_add(struct svga_screen *svgascreen, struct svga_host_surface_cache_entry *entry = NULL; struct svga_winsys_surface *handle = *p_handle; unsigned surf_size; - + assert(key->cachable); if (!handle) return; - + surf_size = surface_size(key); *p_handle = NULL; pipe_mutex_lock(cache->mutex); - + if (surf_size >= SVGA_HOST_SURFACE_CACHE_BYTES) { /* this surface is too large to cache, just free it */ sws->surface_reference(sws, &handle, NULL); @@ -245,10 +255,13 @@ svga_screen_cache_add(struct svga_screen *svgascreen, } if (!LIST_IS_EMPTY(&cache->empty)) { - /* use the first empty entry */ + /* An empty entry has no surface associated with it. + * Use the first empty entry. + */ entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head); + /* Remove from LRU list */ LIST_DEL(&entry->head); } else if (!LIST_IS_EMPTY(&cache->unused)) { @@ -262,12 +275,15 @@ svga_screen_cache_add(struct svga_screen *svgascreen, sws->surface_reference(sws, &entry->handle, NULL); + /* Remove from hash table */ LIST_DEL(&entry->bucket_head); + /* Remove from LRU list */ LIST_DEL(&entry->head); } if (entry) { + assert(entry->handle == NULL); entry->handle = handle; memcpy(&entry->key, key, sizeof entry->key); @@ -304,6 +320,7 @@ svga_screen_cache_flush(struct svga_screen *svgascreen, pipe_mutex_lock(cache->mutex); + /* Loop over entries in the validated list */ curr = cache->validated.next; next = curr->next; while (curr != &cache->validated) { @@ -312,12 +329,15 @@ svga_screen_cache_flush(struct svga_screen *svgascreen, assert(entry->handle); if (sws->surface_is_flushed(sws, entry->handle)) { + /* remove entry from LRU list */ LIST_DEL(&entry->head); svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence); + /* Add entry to the unused list */ LIST_ADD(&entry->head, &cache->unused); + /* Add entry to the hash table bucket */ bucket = svga_screen_cache_bucket(&entry->key); LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]); } @@ -388,9 +408,12 @@ svga_screen_cache_init(struct svga_screen *svgascreen) * Allocate a new host-side surface. If the surface is marked as cachable, * first try re-using a surface in the cache of freed surfaces. Otherwise, * allocate a new surface. + * \param bind_flags bitmask of PIPE_BIND_x flags + * \param usage one of PIPE_USAGE_x values */ struct svga_winsys_surface * svga_screen_surface_create(struct svga_screen *svgascreen, + unsigned bind_flags, unsigned usage, struct svga_host_surface_cache_key *key) { struct svga_winsys_screen *sws = svgascreen->sws; @@ -398,17 +421,20 @@ svga_screen_surface_create(struct svga_screen *svgascreen, boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable; SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, - "%s sz %dx%dx%d mips %d faces %d cachable %d\n", + "%s sz %dx%dx%d mips %d faces %d arraySize %d cachable %d\n", __FUNCTION__, key->size.width, key->size.height, key->size.depth, key->numMipLevels, key->numFaces, + key->arraySize, key->cachable); if (cachable) { if (key->format == SVGA3D_BUFFER) { + SVGA3dSurfaceFlags hint_flag; + /* For buffers, round the buffer size up to the nearest power * of two to increase the probability of cache hits. Keep * texture surface dimensions unchanged. @@ -417,15 +443,33 @@ svga_screen_surface_create(struct svga_screen *svgascreen, while (size < key->size.width) size <<= 1; key->size.width = size; - /* Since we're reusing buffers we're effectively transforming all - * of them into dynamic buffers. - * - * It would be nice to not cache long lived static buffers. But there - * is no way to detect the long lived from short lived ones yet. A - * good heuristic would be buffer size. - */ - key->flags &= ~SVGA3D_SURFACE_HINT_STATIC; - key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC; + + /* Determine whether the buffer is static or dynamic. + * This is a bit of a heuristic which can be tuned as needed. + */ + if (usage == PIPE_USAGE_DEFAULT || + usage == PIPE_USAGE_IMMUTABLE) { + hint_flag = SVGA3D_SURFACE_HINT_STATIC; + } + else if (bind_flags & PIPE_BIND_INDEX_BUFFER) { + /* Index buffers don't change too often. Mark them as static. + */ + hint_flag = SVGA3D_SURFACE_HINT_STATIC; + } + else { + /* Since we're reusing buffers we're effectively transforming all + * of them into dynamic buffers. + * + * It would be nice to not cache long lived static buffers. But there + * is no way to detect the long lived from short lived ones yet. A + * good heuristic would be buffer size. + */ + hint_flag = SVGA3D_SURFACE_HINT_DYNAMIC; + } + + key->flags &= ~(SVGA3D_SURFACE_HINT_STATIC | + SVGA3D_SURFACE_HINT_DYNAMIC); + key->flags |= hint_flag; } handle = svga_screen_cache_lookup(svgascreen, key); @@ -436,24 +480,32 @@ svga_screen_surface_create(struct svga_screen *svgascreen, key->size.width); else SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, - "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle, + "reuse sid %p sz %dx%dx%d mips %d faces %d arraySize %d\n", handle, key->size.width, key->size.height, key->size.depth, key->numMipLevels, - key->numFaces); + key->numFaces, + key->arraySize); } } if (!handle) { + unsigned usage = 0; + + if (!key->cachable) + usage |= SVGA_SURFACE_USAGE_SHARED; + if (key->scanout) + usage |= SVGA_SURFACE_USAGE_SCANOUT; + handle = sws->surface_create(sws, key->flags, key->format, - key->cachable ? - 0 : SVGA_SURFACE_USAGE_SHARED, + usage, key->size, - key->numFaces, - key->numMipLevels); + key->numFaces * key->arraySize, + key->numMipLevels, + key->sampleCount); if (handle) SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, " CREATE sid %p sz %dx%dx%d\n", diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h index 56ac62b39..424eb2c5a 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h @@ -62,9 +62,12 @@ struct svga_host_surface_cache_key SVGA3dSurfaceFlags flags; SVGA3dSurfaceFormat format; SVGA3dSize size; - uint32_t numFaces:24; - uint32_t numMipLevels:7; + uint32_t numFaces:3; + uint32_t arraySize:16; + uint32_t numMipLevels:6; uint32_t cachable:1; /* False if this is a shared surface */ + uint32_t sampleCount:5; + uint32_t scanout:1; }; @@ -137,6 +140,7 @@ svga_screen_cache_init(struct svga_screen *svgascreen); struct svga_winsys_surface * svga_screen_surface_create(struct svga_screen *svgascreen, + unsigned bind_flags, unsigned usage, struct svga_host_surface_cache_key *key); void diff --git a/lib/mesa/src/gallium/drivers/svga/svga_shader.c b/lib/mesa/src/gallium/drivers/svga/svga_shader.c index 46efa07df..5c99e16d9 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_shader.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_shader.c @@ -27,14 +27,318 @@ #include "util/u_memory.h" #include "svga_context.h" #include "svga_cmd.h" +#include "svga_format.h" #include "svga_shader.h" +/** + * This bit isn't really used anywhere. It only serves to help + * generate a unique "signature" for the vertex shader output bitmask. + * Shader input/output signatures are used to resolve shader linking + * issues. + */ +#define FOG_GENERIC_BIT (((uint64_t) 1) << 63) + + +/** + * Use the shader info to generate a bitmask indicating which generic + * inputs are used by the shader. A set bit indicates that GENERIC[i] + * is used. + */ +uint64_t +svga_get_generic_inputs_mask(const struct tgsi_shader_info *info) +{ + unsigned i; + uint64_t mask = 0x0; + + for (i = 0; i < info->num_inputs; i++) { + if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + unsigned j = info->input_semantic_index[i]; + assert(j < sizeof(mask) * 8); + mask |= ((uint64_t) 1) << j; + } + } + + return mask; +} + + +/** + * Scan shader info to return a bitmask of written outputs. + */ +uint64_t +svga_get_generic_outputs_mask(const struct tgsi_shader_info *info) +{ + unsigned i; + uint64_t mask = 0x0; + + for (i = 0; i < info->num_outputs; i++) { + switch (info->output_semantic_name[i]) { + case TGSI_SEMANTIC_GENERIC: + { + unsigned j = info->output_semantic_index[i]; + assert(j < sizeof(mask) * 8); + mask |= ((uint64_t) 1) << j; + } + break; + case TGSI_SEMANTIC_FOG: + mask |= FOG_GENERIC_BIT; + break; + } + } + + return mask; +} + + + +/** + * Given a mask of used generic variables (as returned by the above functions) + * fill in a table which maps those indexes to small integers. + * This table is used by the remap_generic_index() function in + * svga_tgsi_decl_sm30.c + * Example: if generics_mask = binary(1010) it means that GENERIC[1] and + * GENERIC[3] are used. The remap_table will contain: + * table[1] = 0; + * table[3] = 1; + * The remaining table entries will be filled in with the next unused + * generic index (in this example, 2). + */ +void +svga_remap_generics(uint64_t generics_mask, + int8_t remap_table[MAX_GENERIC_VARYING]) +{ + /* Note texcoord[0] is reserved so start at 1 */ + unsigned count = 1, i; + + for (i = 0; i < MAX_GENERIC_VARYING; i++) { + remap_table[i] = -1; + } + + /* for each bit set in generic_mask */ + while (generics_mask) { + unsigned index = ffsll(generics_mask) - 1; + remap_table[index] = count++; + generics_mask &= ~((uint64_t) 1 << index); + } +} + + +/** + * Use the generic remap table to map a TGSI generic varying variable + * index to a small integer. If the remapping table doesn't have a + * valid value for the given index (the table entry is -1) it means + * the fragment shader doesn't use that VS output. Just allocate + * the next free value in that case. Alternately, we could cull + * VS instructions that write to register, or replace the register + * with a dummy temp register. + * XXX TODO: we should do one of the later as it would save precious + * texcoord registers. + */ +int +svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], + int generic_index) +{ + assert(generic_index < MAX_GENERIC_VARYING); + + if (generic_index >= MAX_GENERIC_VARYING) { + /* just don't return a random/garbage value */ + generic_index = MAX_GENERIC_VARYING - 1; + } + + if (remap_table[generic_index] == -1) { + /* This is a VS output that has no matching PS input. Find a + * free index. + */ + int i, max = 0; + for (i = 0; i < MAX_GENERIC_VARYING; i++) { + max = MAX2(max, remap_table[i]); + } + remap_table[generic_index] = max + 1; + } + + return remap_table[generic_index]; +} + + +/** + * Initialize the shader-neutral fields of svga_compile_key from context + * state. This is basically the texture-related state. + */ +void +svga_init_shader_key_common(const struct svga_context *svga, unsigned shader, + struct svga_compile_key *key) +{ + unsigned i, idx = 0; + + assert(shader < Elements(svga->curr.num_sampler_views)); + + for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) { + struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; + if (view) { + assert(svga->curr.sampler[shader][i]); + assert(view->texture); + assert(view->texture->target < (1 << 4)); /* texture_target:4 */ + + key->tex[i].texture_target = view->texture->target; + + /* 1D/2D array textures with one slice are treated as non-arrays + * by the SVGA3D device. Convert the texture type here so that + * we emit the right TEX/SAMPLE instruction in the shader. + */ + if (view->texture->array_size == 1) { + if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) { + key->tex[i].texture_target = PIPE_TEXTURE_1D; + } + else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) { + key->tex[i].texture_target = PIPE_TEXTURE_2D; + } + } + + key->tex[i].texture_msaa = view->texture->nr_samples > 1; + if (!svga->curr.sampler[shader][i]->normalized_coords) { + assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */ + key->tex[i].width_height_idx = idx++; + key->tex[i].unnormalized = TRUE; + ++key->num_unnormalized_coords; + } + + key->tex[i].swizzle_r = view->swizzle_r; + key->tex[i].swizzle_g = view->swizzle_g; + key->tex[i].swizzle_b = view->swizzle_b; + key->tex[i].swizzle_a = view->swizzle_a; + + key->tex[i].return_type = svga_get_texture_datatype(view->format); + } + } + key->num_textures = svga->curr.num_sampler_views[shader]; +} + + +/** Search for a compiled shader variant with the same compile key */ +struct svga_shader_variant * +svga_search_shader_key(const struct svga_shader *shader, + const struct svga_compile_key *key) +{ + struct svga_shader_variant *variant = shader->variants; + + assert(key); + + for ( ; variant; variant = variant->next) { + if (svga_compile_keys_equal(key, &variant->key)) + return variant; + } + return NULL; +} + +/** Search for a shader with the same token key */ +struct svga_shader * +svga_search_shader_token_key(struct svga_shader *pshader, + const struct svga_token_key *key) +{ + struct svga_shader *shader = pshader; + + assert(key); + + for ( ; shader; shader = shader->next) { + if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0) + return shader; + } + return NULL; +} + +/** + * Helper function to define a gb shader for non-vgpu10 device + */ +static enum pipe_error +define_gb_shader_vgpu9(struct svga_context *svga, + SVGA3dShaderType type, + struct svga_shader_variant *variant, + unsigned codeLen) +{ + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + enum pipe_error ret; + + /** + * Create gb memory for the shader and upload the shader code. + * Kernel module will allocate an id for the shader and issue + * the DefineGBShader command. + */ + variant->gb_shader = sws->shader_create(sws, type, + variant->tokens, codeLen); + + if (!variant->gb_shader) + return PIPE_ERROR_OUT_OF_MEMORY; + + ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader); + + return ret; +} + +/** + * Helper function to define a gb shader for vgpu10 device + */ +static enum pipe_error +define_gb_shader_vgpu10(struct svga_context *svga, + SVGA3dShaderType type, + struct svga_shader_variant *variant, + unsigned codeLen) +{ + struct svga_winsys_context *swc = svga->swc; + enum pipe_error ret; + + /** + * Shaders in VGPU10 enabled device reside in the device COTable. + * SVGA driver will allocate an integer ID for the shader and + * issue DXDefineShader and DXBindShader commands. + */ + variant->id = util_bitmask_add(svga->shader_id_bm); + if (variant->id == UTIL_BITMASK_INVALID_INDEX) { + return PIPE_ERROR_OUT_OF_MEMORY; + } + + /* Create gb memory for the shader and upload the shader code */ + variant->gb_shader = swc->shader_create(swc, + variant->id, type, + variant->tokens, codeLen); + + if (!variant->gb_shader) { + /* Free the shader ID */ + assert(variant->id != UTIL_BITMASK_INVALID_INDEX); + goto fail_no_allocation; + } + + /** + * Since we don't want to do any flush within state emission to avoid + * partial state in a command buffer, it's important to make sure that + * there is enough room to send both the DXDefineShader & DXBindShader + * commands in the same command buffer. So let's send both + * commands in one command reservation. If it fails, we'll undo + * the shader creation and return an error. + */ + ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader, + variant->id, type, codeLen); + + if (ret != PIPE_OK) + goto fail; + + return PIPE_OK; + +fail: + swc->shader_destroy(swc, variant->gb_shader); + variant->gb_shader = NULL; + +fail_no_allocation: + util_bitmask_clear(svga->shader_id_bm, variant->id); + variant->id = UTIL_BITMASK_INVALID_INDEX; + + return PIPE_ERROR_OUT_OF_MEMORY; +} /** * Issue the SVGA3D commands to define a new shader. - * \param result contains the shader tokens, etc. The result->id field will - * be set here. + * \param variant contains the shader tokens, etc. The result->id field will + * be set here. */ enum pipe_error svga_define_shader(struct svga_context *svga, @@ -42,27 +346,17 @@ svga_define_shader(struct svga_context *svga, struct svga_shader_variant *variant) { unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]); + enum pipe_error ret; - if (svga_have_gb_objects(svga)) { - struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - enum pipe_error ret; + variant->id = UTIL_BITMASK_INVALID_INDEX; - variant->gb_shader = sws->shader_create(sws, type, - variant->tokens, codeLen); - if (!variant->gb_shader) - return PIPE_ERROR_OUT_OF_MEMORY; - - ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader); - if (ret != PIPE_OK) { - sws->shader_destroy(sws, variant->gb_shader); - variant->gb_shader = NULL; - } - - return ret; + if (svga_have_gb_objects(svga)) { + if (svga_have_vgpu10(svga)) + return define_gb_shader_vgpu10(svga, type, variant, codeLen); + else + return define_gb_shader_vgpu9(svga, type, variant, codeLen); } else { - enum pipe_error ret; - /* Allocate an integer ID for the shader */ variant->id = util_bitmask_add(svga->shader_id_bm); if (variant->id == UTIL_BITMASK_INVALID_INDEX) { @@ -80,14 +374,53 @@ svga_define_shader(struct svga_context *svga, assert(variant->id != UTIL_BITMASK_INVALID_INDEX); util_bitmask_clear(svga->shader_id_bm, variant->id); variant->id = UTIL_BITMASK_INVALID_INDEX; - return ret; } } - return PIPE_OK; + return ret; +} + + +/** + * Issue the SVGA3D commands to set/bind a shader. + * \param result the shader to bind. + */ +enum pipe_error +svga_set_shader(struct svga_context *svga, + SVGA3dShaderType type, + struct svga_shader_variant *variant) +{ + enum pipe_error ret; + unsigned id = variant ? variant->id : SVGA3D_INVALID_ID; + + assert(type == SVGA3D_SHADERTYPE_VS || + type == SVGA3D_SHADERTYPE_GS || + type == SVGA3D_SHADERTYPE_PS); + + if (svga_have_gb_objects(svga)) { + struct svga_winsys_gb_shader *gbshader = + variant ? variant->gb_shader : NULL; + + if (svga_have_vgpu10(svga)) + ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id); + else + ret = SVGA3D_SetGBShader(svga->swc, type, gbshader); + } + else { + ret = SVGA3D_SetShader(svga->swc, type, id); + } + + return ret; } +struct svga_shader_variant * +svga_new_shader_variant(struct svga_context *svga) +{ + svga->hud.num_shaders++; + return CALLOC_STRUCT(svga_shader_variant); +} + enum pipe_error svga_destroy_shader_variant(struct svga_context *svga, @@ -96,32 +429,94 @@ svga_destroy_shader_variant(struct svga_context *svga, { enum pipe_error ret = PIPE_OK; - if (svga_have_gb_objects(svga)) { - struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - - sws->shader_destroy(sws, variant->gb_shader); + if (svga_have_gb_objects(svga) && variant->gb_shader) { + if (svga_have_vgpu10(svga)) { + struct svga_winsys_context *swc = svga->swc; + swc->shader_destroy(swc, variant->gb_shader); + ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id); + if (ret != PIPE_OK) { + /* flush and try again */ + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id); + } + util_bitmask_clear(svga->shader_id_bm, variant->id); + } + else { + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + sws->shader_destroy(sws, variant->gb_shader); + } variant->gb_shader = NULL; - goto end; } - - /* first try */ - if (variant->id != UTIL_BITMASK_INVALID_INDEX) { - ret = SVGA3D_DestroyShader(svga->swc, variant->id, type); - - if (ret != PIPE_OK) { - /* flush and try again */ - svga_context_flush(svga, NULL); - + else { + if (variant->id != UTIL_BITMASK_INVALID_INDEX) { ret = SVGA3D_DestroyShader(svga->swc, variant->id, type); - assert(ret == PIPE_OK); + if (ret != PIPE_OK) { + /* flush and try again */ + svga_context_flush(svga, NULL); + ret = SVGA3D_DestroyShader(svga->swc, variant->id, type); + assert(ret == PIPE_OK); + } + util_bitmask_clear(svga->shader_id_bm, variant->id); } - - util_bitmask_clear(svga->shader_id_bm, variant->id); } -end: FREE((unsigned *)variant->tokens); FREE(variant); + svga->hud.num_shaders--; + return ret; } + +/* + * Rebind shaders. + * Called at the beginning of every new command buffer to ensure that + * shaders are properly paged-in. Instead of sending the SetShader + * command, this function sends a private allocation command to + * page in a shader. This avoids emitting redundant state to the device + * just to page in a resource. + */ +enum pipe_error +svga_rebind_shaders(struct svga_context *svga) +{ + struct svga_winsys_context *swc = svga->swc; + struct svga_hw_draw_state *hw = &svga->state.hw_draw; + enum pipe_error ret; + + assert(svga_have_vgpu10(svga)); + + /** + * If the underlying winsys layer does not need resource rebinding, + * just clear the rebind flags and return. + */ + if (swc->resource_rebind == NULL) { + svga->rebind.flags.vs = 0; + svga->rebind.flags.gs = 0; + svga->rebind.flags.fs = 0; + + return PIPE_OK; + } + + if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) { + ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; + } + svga->rebind.flags.vs = 0; + + if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) { + ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; + } + svga->rebind.flags.gs = 0; + + if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) { + ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; + } + svga->rebind.flags.fs = 0; + + return PIPE_OK; +} diff --git a/lib/mesa/src/gallium/drivers/svga/svga_shader.h b/lib/mesa/src/gallium/drivers/svga/svga_shader.h index 5102159b9..f49fdb46d 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_shader.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_shader.h @@ -27,8 +27,244 @@ #define SVGA_SHADER_H #include "svga3d_reg.h" +#include "svga_context.h" +#include "svga_streamout.h" -struct svga_shader_variant; + +/** + * We use a 64-bit mask to keep track of the generic indexes. + * This is the maximum semantic index for a TGSI GENERIC[i] register. + */ +#define MAX_GENERIC_VARYING 64 + + +struct svga_context; + + +struct svga_compile_key +{ + /* vertex shader only */ + struct { + uint64_t fs_generic_inputs; + unsigned passthrough:1; + unsigned need_prescale:1; + unsigned undo_viewport:1; + unsigned allow_psiz:1; + /** The following are all 32-bit bitmasks (per VS input) */ + unsigned adjust_attrib_range; + unsigned attrib_is_pure_int; + unsigned adjust_attrib_w_1; + unsigned adjust_attrib_itof; + unsigned adjust_attrib_utof; + unsigned attrib_is_bgra; + unsigned attrib_puint_to_snorm; + unsigned attrib_puint_to_uscaled; + unsigned attrib_puint_to_sscaled; + } vs; + + /* geometry shader only */ + struct { + uint64_t vs_generic_outputs; + unsigned need_prescale:1; + unsigned writes_psize:1; + unsigned wide_point:1; + } gs; + + /* fragment shader only */ + struct { + uint64_t vs_generic_outputs; + uint64_t gs_generic_outputs; + unsigned light_twoside:1; + unsigned front_ccw:1; + unsigned white_fragments:1; + unsigned flatshade:1; + unsigned pstipple:1; + unsigned alpha_func:4; /**< SVGA3D_CMP_x */ + unsigned write_color0_to_n_cbufs:4; + unsigned aa_point:1; + int aa_point_coord_index; + float alpha_ref; + } fs; + + /* any shader type */ + int8_t generic_remap_table[MAX_GENERIC_VARYING]; + unsigned num_textures:8; + unsigned num_unnormalized_coords:8; + unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES; + unsigned sprite_origin_lower_left:1; + unsigned sprite_coord_enable; + struct { + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned unnormalized:1; + unsigned width_height_idx:5; /**< texture unit */ + unsigned texture_target:4; /**< PIPE_TEXTURE_x */ + unsigned texture_msaa:1; /**< A multisample texture? */ + unsigned sprite_texgen:1; + unsigned swizzle_r:3; + unsigned swizzle_g:3; + unsigned swizzle_b:3; + unsigned swizzle_a:3; + unsigned return_type:3; /**< TGSI_RETURN_TYPE_x */ + } tex[PIPE_MAX_SAMPLERS]; + /* Note: svga_compile_keys_equal() depends on the variable-size + * tex[] array being at the end of this structure. + */ +}; + +/* A key for a variant of token string of a shader */ +struct svga_token_key { + struct { + unsigned sprite_coord_enable:24; + unsigned sprite_origin_upper_left:1; + unsigned point_pos_stream_out:1; + unsigned writes_psize:1; + unsigned aa_point:1; + } gs; +}; + +/** + * A single TGSI shader may be compiled into different variants of + * SVGA3D shaders depending on the compile key. Each user shader + * will have a linked list of these variants. + */ +struct svga_shader_variant +{ + const struct svga_shader *shader; + + /** Parameters used to generate this variant */ + struct svga_compile_key key; + + /* Compiled shader tokens: + */ + const unsigned *tokens; + unsigned nr_tokens; + + /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE, + * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY. + */ + unsigned id; + + /** Start of extra constants (number of float[4] constants) */ + unsigned extra_const_start; + + /* GB object buffer containing the bytecode */ + struct svga_winsys_gb_shader *gb_shader; + + boolean uses_flat_interp; /** TRUE if flat interpolation qualifier is + * applied to any of the varyings. + */ + + /** Is the color output just a constant value? (fragment shader only) */ + boolean constant_color_output; + + /** For FS-based polygon stipple */ + unsigned pstipple_sampler_unit; + + /** Next variant */ + struct svga_shader_variant *next; +}; + + +struct svga_shader +{ + const struct tgsi_token *tokens; + struct svga_token_key token_key; /* token key for the token string */ + struct tgsi_shader_info info; + + /* List of shaders with tokens derived from the same token string */ + struct svga_shader *next; + struct svga_shader *parent; /* shader with the original token string */ + + struct svga_stream_output *stream_output; + + /** Head of linked list of compiled variants */ + struct svga_shader_variant *variants; + + unsigned id; /**< for debugging only */ +}; + + +struct svga_fragment_shader +{ + struct svga_shader base; + + struct draw_fragment_shader *draw_shader; + + /** Mask of which generic varying variables are read by this shader */ + uint64_t generic_inputs; + + /** Table mapping original TGSI generic indexes to low integers */ + int8_t generic_remap_table[MAX_GENERIC_VARYING]; +}; + + +struct svga_vertex_shader +{ + struct svga_shader base; + + struct draw_vertex_shader *draw_shader; + + /** Mask of which generic varying variables are written by this shader */ + uint64_t generic_outputs; + + /** Generated geometry shader that goes with this vertex shader */ + struct svga_geometry_shader *gs; +}; + + +struct svga_geometry_shader +{ + struct svga_shader base; + + struct draw_geometry_shader *draw_shader; + + /** Table mapping original TGSI generic indexes to low integers */ + int8_t generic_remap_table[MAX_GENERIC_VARYING]; + uint64_t generic_outputs; + + unsigned aa_point_coord_index; /* generic index for aa point coord */ + + unsigned wide_point:1; /* set if the shader emulates wide point */ +}; + + +static inline boolean +svga_compile_keys_equal(const struct svga_compile_key *a, + const struct svga_compile_key *b) +{ + unsigned key_size = + (const char *) &a->tex[a->num_textures] - (const char *) a; + + return memcmp(a, b, key_size) == 0; +} + + +uint64_t +svga_get_generic_inputs_mask(const struct tgsi_shader_info *info); + +uint64_t +svga_get_generic_outputs_mask(const struct tgsi_shader_info *info); + +void +svga_remap_generics(uint64_t generics_mask, + int8_t remap_table[MAX_GENERIC_VARYING]); + +int +svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], + int generic_index); + +void +svga_init_shader_key_common(const struct svga_context *svga, unsigned shader, + struct svga_compile_key *key); + +struct svga_shader_variant * +svga_search_shader_key(const struct svga_shader *shader, + const struct svga_compile_key *key); + +struct svga_shader * +svga_search_shader_token_key(struct svga_shader *shader, + const struct svga_token_key *key); enum pipe_error svga_define_shader(struct svga_context *svga, @@ -36,10 +272,20 @@ svga_define_shader(struct svga_context *svga, struct svga_shader_variant *variant); enum pipe_error +svga_set_shader(struct svga_context *svga, + SVGA3dShaderType type, + struct svga_shader_variant *variant); + +struct svga_shader_variant * +svga_new_shader_variant(struct svga_context *svga); + +enum pipe_error svga_destroy_shader_variant(struct svga_context *svga, SVGA3dShaderType type, struct svga_shader_variant *variant); +enum pipe_error +svga_rebind_shaders(struct svga_context *svga); /** * Check if a shader's bytecode exceeds the device limits. @@ -62,4 +308,40 @@ svga_shader_too_large(const struct svga_context *svga, } +/** + * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_* + */ +static inline SVGA3dShaderType +svga_shader_type(unsigned shader) +{ + switch (shader) { + case PIPE_SHADER_VERTEX: + return SVGA3D_SHADERTYPE_VS; + case PIPE_SHADER_GEOMETRY: + return SVGA3D_SHADERTYPE_GS; + case PIPE_SHADER_FRAGMENT: + return SVGA3D_SHADERTYPE_PS; + default: + assert(!"Invalid shader type"); + return SVGA3D_SHADERTYPE_VS; + } +} + + +/** Does the current VS have stream output? */ +static inline boolean +svga_have_vs_streamout(const struct svga_context *svga) +{ + return svga->curr.vs != NULL && svga->curr.vs->base.stream_output != NULL; +} + + +/** Does the current GS have stream output? */ +static inline boolean +svga_have_gs_streamout(const struct svga_context *svga) +{ + return svga->curr.gs != NULL && svga->curr.gs->base.stream_output != NULL; +} + + #endif /* SVGA_SHADER_H */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state.c b/lib/mesa/src/gallium/drivers/svga/svga_state.c index b0bc867f6..4479a2712 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state.c @@ -23,6 +23,7 @@ * **********************************************************/ +#include "util/u_bitmask.h" #include "util/u_debug.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -63,14 +64,19 @@ static const struct svga_tracked_state *hw_clear_state[] = */ static const struct svga_tracked_state *hw_draw_state[] = { + &svga_need_tgsi_transform, &svga_hw_fs, + &svga_hw_gs, &svga_hw_vs, &svga_hw_rss, - &svga_hw_tss, - &svga_hw_tss_binding, + &svga_hw_sampler, /* VGPU10 */ + &svga_hw_sampler_bindings, /* VGPU10 */ + &svga_hw_tss, /* pre-VGPU10 */ + &svga_hw_tss_binding, /* pre-VGPU10 */ &svga_hw_clip_planes, &svga_hw_vdecl, &svga_hw_fs_constants, + &svga_hw_gs_constants, &svga_hw_vs_constants, NULL }; @@ -123,7 +129,11 @@ update_state(struct svga_context *svga, const struct svga_tracked_state *atoms[], unsigned *state) { +#ifdef DEBUG boolean debug = TRUE; +#else + boolean debug = FALSE; +#endif enum pipe_error ret = PIPE_OK; unsigned i; @@ -219,6 +229,9 @@ svga_update_state(struct svga_context *svga, unsigned max_level) svga->state.dirty[i] |= svga->dirty; svga->dirty = 0; + + svga->hud.num_validations++; + return PIPE_OK; } @@ -255,23 +268,55 @@ do { \ */ enum pipe_error svga_emit_initial_state( struct svga_context *svga ) { - SVGA3dRenderState *rs; - unsigned count = 0; - const unsigned COUNT = 2; - enum pipe_error ret; - - ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT ); - if (ret != PIPE_OK) + if (svga_have_vgpu10(svga)) { + SVGA3dRasterizerStateId id = util_bitmask_add(svga->rast_object_id_bm); + enum pipe_error ret; + + /* XXX preliminary code */ + ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc, + id, + SVGA3D_FILLMODE_FILL, + SVGA3D_CULL_NONE, + 1, /* frontCounterClockwise */ + 0, /* depthBias */ + 0.0f, /* depthBiasClamp */ + 0.0f, /* slopeScaledDepthBiasClamp */ + 0, /* depthClampEnable */ + 0, /* scissorEnable */ + 0, /* multisampleEnable */ + 0, /* aalineEnable */ + 1.0f, /* lineWidth */ + 0, /* lineStippleEnable */ + 0, /* lineStippleFactor */ + 0, /* lineStipplePattern */ + 0); /* provokingVertexLast */ + + + assert(ret == PIPE_OK); + + ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, id); return ret; + } + else { + SVGA3dRenderState *rs; + unsigned count = 0; + const unsigned COUNT = 2; + enum pipe_error ret; - /* Always use D3D style coordinate space as this is the only one - * which is implemented on all backends. - */ - EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED ); - EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW ); - - assert( COUNT == count ); - SVGA_FIFOCommitAll( svga->swc ); + ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT ); + if (ret != PIPE_OK) + return ret; - return PIPE_OK; + /* Always use D3D style coordinate space as this is the only one + * which is implemented on all backends. + */ + EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, + SVGA3D_COORDINATE_LEFTHANDED ); + EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW ); + + assert( COUNT == count ); + SVGA_FIFOCommitAll( svga->swc ); + + return PIPE_OK; + } } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state.h b/lib/mesa/src/gallium/drivers/svga/svga_state.h index 3325626a4..04b20e161 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_state.h @@ -57,14 +57,20 @@ extern struct svga_tracked_state svga_hw_framebuffer; /* HW_DRAW */ +extern struct svga_tracked_state svga_need_tgsi_transform; extern struct svga_tracked_state svga_hw_vs; extern struct svga_tracked_state svga_hw_fs; +extern struct svga_tracked_state svga_hw_gs; extern struct svga_tracked_state svga_hw_rss; +extern struct svga_tracked_state svga_hw_pstipple; +extern struct svga_tracked_state svga_hw_sampler; +extern struct svga_tracked_state svga_hw_sampler_bindings; extern struct svga_tracked_state svga_hw_tss; extern struct svga_tracked_state svga_hw_tss_binding; extern struct svga_tracked_state svga_hw_clip_planes; extern struct svga_tracked_state svga_hw_vdecl; extern struct svga_tracked_state svga_hw_fs_constants; +extern struct svga_tracked_state svga_hw_gs_constants; extern struct svga_tracked_state svga_hw_vs_constants; /* SWTNL_DRAW @@ -93,10 +99,14 @@ enum pipe_error svga_emit_initial_state( struct svga_context *svga ); enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga ); +enum pipe_error svga_rebind_framebuffer_bindings( struct svga_context *svga ); + enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga ); enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga); enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga); +enum pipe_error svga_reemit_gs_bindings(struct svga_context *svga); + #endif diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c b/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c index 1e1fbb099..8ab169308 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c @@ -1,3 +1,4 @@ + /********************************************************** * Copyright 2008-2009 VMware, Inc. All rights reserved. * @@ -23,9 +24,11 @@ * **********************************************************/ +#include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "pipe/p_defines.h" +#include "util/u_upload_mgr.h" #include "svga_screen.h" #include "svga_context.h" @@ -34,6 +37,7 @@ #include "svga_tgsi.h" #include "svga_debug.h" #include "svga_resource_buffer.h" +#include "svga_shader.h" #include "svga_hw_reg.h" @@ -52,65 +56,135 @@ /** Guest-backed surface constant buffers must be this size */ #define GB_CONSTBUF_SIZE (SVGA3D_CONSTREG_MAX) + /** - * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_* + * Emit any extra shader-type-independent shader constants into the buffer + * pointed to by 'dest'. + * \return number of float[4] constants put into the 'dest' buffer */ static unsigned -svga_shader_type(unsigned shader) +svga_get_extra_constants_common(struct svga_context *svga, + const struct svga_shader_variant *variant, + unsigned shader, float *dest) { - switch (shader) { - case PIPE_SHADER_VERTEX: - return SVGA3D_SHADERTYPE_VS; - case PIPE_SHADER_FRAGMENT: - return SVGA3D_SHADERTYPE_PS; - default: - assert(!"Unexpected shader type"); - return SVGA3D_SHADERTYPE_VS; + uint32_t *dest_u = (uint32_t *) dest; // uint version of dest + unsigned i; + unsigned count = 0; + + for (i = 0; i < variant->key.num_textures; i++) { + struct pipe_sampler_view *sv = svga->curr.sampler_views[shader][i]; + if (sv) { + struct pipe_resource *tex = sv->texture; + /* Scaling factors needed for handling unnormalized texture coordinates + * for texture rectangles. + */ + if (variant->key.tex[i].unnormalized) { + /* debug/sanity check */ + assert(variant->key.tex[i].width_height_idx == count); + + *dest++ = 1.0 / (float)tex->width0; + *dest++ = 1.0 / (float)tex->height0; + *dest++ = 1.0; + *dest++ = 1.0; + + count++; + } + + /* Store the sizes for texture buffers. + */ + if (tex->target == PIPE_BUFFER) { + unsigned bytes_per_element = util_format_get_blocksize(sv->format); + *dest_u++ = tex->width0 / bytes_per_element; + *dest_u++ = 1; + *dest_u++ = 1; + *dest_u++ = 1; + + count++; + } + } } + + return count; } /** * Emit any extra fragment shader constants into the buffer pointed * to by 'dest'. - * In particular, these would be the scaling factors needed for handling - * unnormalized texture coordinates for texture rectangles. * \return number of float[4] constants put into the dest buffer */ static unsigned svga_get_extra_fs_constants(struct svga_context *svga, float *dest) { const struct svga_shader_variant *variant = svga->state.hw_draw.fs; - const struct svga_fs_compile_key *key = &variant->key.fkey; unsigned count = 0; - /* SVGA_NEW_VS_VARIANT - */ - if (key->num_unnormalized_coords) { - unsigned i; + count += svga_get_extra_constants_common(svga, variant, + PIPE_SHADER_FRAGMENT, dest); - for (i = 0; i < key->num_textures; i++) { - if (key->tex[i].unnormalized) { - struct pipe_resource *tex = svga->curr.sampler_views[i]->texture; + assert(count <= MAX_EXTRA_CONSTS); - /* debug/sanity check */ - assert(key->tex[i].width_height_idx == count); + return count; +} - *dest++ = 1.0 / (float)tex->width0; - *dest++ = 1.0 / (float)tex->height0; - *dest++ = 1.0; - *dest++ = 1.0; +/** + * Emit extra constants needed for prescale computation into the + * the buffer pointed to by '*dest'. The updated buffer pointer + * will be returned in 'dest'. + */ +static unsigned +svga_get_prescale_constants(struct svga_context *svga, float **dest) +{ + memcpy(*dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float)); + *dest += 4; - count++; - } - } - } + memcpy(*dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float)); + *dest += 4; - assert(count <= MAX_EXTRA_CONSTS); + return 2; +} - return count; +/** + * Emit extra constants needed for point sprite emulation. + */ +static unsigned +svga_get_pt_sprite_constants(struct svga_context *svga, float **dest) +{ + struct svga_screen *screen = svga_screen(svga->pipe.screen); + float *dst = *dest; + + dst[0] = 1.0 / (svga->curr.viewport.scale[0] * 2); + dst[1] = 1.0 / (svga->curr.viewport.scale[1] * 2); + dst[2] = svga->curr.rast->pointsize; + dst[3] = screen->maxPointSize; + *dest = *dest + 4; + return 1; } +/** + * Emit user-defined clip plane coefficients into the buffer pointed to + * by '*dest'. The updated buffer pointer will be returned in 'dest'. + */ +static unsigned +svga_get_clip_plane_constants(struct svga_context *svga, + const struct svga_shader_variant *variant, + float **dest) +{ + unsigned count = 0; + + /* SVGA_NEW_CLIP */ + if (svga_have_vgpu10(svga)) { + /* append user-defined clip plane coefficients onto constant buffer */ + unsigned clip_planes = variant->key.clip_plane_enable; + while (clip_planes) { + int i = u_bit_scan(&clip_planes); + COPY_4V(*dest, svga->curr.clip.ucp[i]); + *dest += 4; + count += 1; + } + } + return count; +} /** * Emit any extra vertex shader constants into the buffer pointed @@ -124,26 +198,71 @@ static unsigned svga_get_extra_vs_constants(struct svga_context *svga, float *dest) { const struct svga_shader_variant *variant = svga->state.hw_draw.vs; - const struct svga_vs_compile_key *key = &variant->key.vkey; unsigned count = 0; /* SVGA_NEW_VS_VARIANT */ - if (key->need_prescale) { - memcpy(dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float)); - dest += 4; + if (variant->key.vs.need_prescale) { + count += svga_get_prescale_constants(svga, &dest); + } - memcpy(dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float)); + if (variant->key.vs.undo_viewport) { + /* Used to convert window coords back to NDC coords */ + dest[0] = 1.0f / svga->curr.viewport.scale[0]; + dest[1] = 1.0f / svga->curr.viewport.scale[1]; + dest[2] = -svga->curr.viewport.translate[0]; + dest[3] = -svga->curr.viewport.translate[1]; dest += 4; - - count = 2; + count += 1; } + /* SVGA_NEW_CLIP */ + count += svga_get_clip_plane_constants(svga, variant, &dest); + + /* common constants */ + count += svga_get_extra_constants_common(svga, variant, + PIPE_SHADER_VERTEX, dest); + assert(count <= MAX_EXTRA_CONSTS); return count; } +/** + * Emit any extra geometry shader constants into the buffer pointed + * to by 'dest'. + */ +static unsigned +svga_get_extra_gs_constants(struct svga_context *svga, float *dest) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.gs; + unsigned count = 0; + + /* SVGA_NEW_GS_VARIANT + */ + + /* Constants for point sprite + * These are used in the transformed gs that supports point sprite. + * They need to be added before the prescale constants. + */ + if (variant->key.gs.wide_point) { + count += svga_get_pt_sprite_constants(svga, &dest); + } + + if (variant->key.gs.need_prescale) { + count += svga_get_prescale_constants(svga, &dest); + } + + /* SVGA_NEW_CLIP */ + count += svga_get_clip_plane_constants(svga, variant, &dest); + + /* common constants */ + count += svga_get_extra_constants_common(svga, variant, + PIPE_SHADER_GEOMETRY, dest); + + assert(count <= MAX_EXTRA_CONSTS); + return count; +} /** * Check and emit one shader constant register. @@ -159,6 +278,7 @@ emit_const(struct svga_context *svga, unsigned shader, unsigned i, assert(shader < PIPE_SHADER_TYPES); assert(i < SVGA3D_CONSTREG_MAX); + assert(!svga_have_vgpu10(svga)); if (memcmp(svga->state.hw_draw.cb[shader][i], value, 4 * sizeof(float)) != 0) { @@ -202,6 +322,10 @@ emit_const_range(struct svga_context *svga, unsigned i, j; enum pipe_error ret; + assert(shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_FRAGMENT); + assert(!svga_have_vgpu10(svga)); + #ifdef DEBUG if (offset + count > SVGA3D_CONSTREG_MAX) { debug_printf("svga: too many constants (offset %u + count %u = %u (max = %u))\n", @@ -307,10 +431,12 @@ emit_const_range(struct svga_context *svga, /** * Emit all the constants in a constant buffer for a shader stage. + * On VGPU10, emit_consts_vgpu10 is used instead. */ static enum pipe_error -emit_consts(struct svga_context *svga, unsigned shader) +emit_consts_vgpu9(struct svga_context *svga, unsigned shader) { + const struct pipe_constant_buffer *cbuf; struct svga_screen *ss = svga_screen(svga->pipe.screen); struct pipe_transfer *transfer = NULL; unsigned count; @@ -320,85 +446,318 @@ emit_consts(struct svga_context *svga, unsigned shader) const unsigned offset = 0; assert(shader < PIPE_SHADER_TYPES); + assert(!svga_have_vgpu10(svga)); + /* Only one constant buffer per shader is supported before VGPU10. + * This is only an approximate check against that. + */ + assert(svga->curr.constbufs[shader][1].buffer == NULL); - if (svga->curr.cbufs[shader].buffer == NULL) - goto done; + cbuf = &svga->curr.constbufs[shader][0]; - data = (const float (*)[4])pipe_buffer_map(&svga->pipe, - svga->curr.cbufs[shader].buffer, - PIPE_TRANSFER_READ, - &transfer); - if (data == NULL) { - ret = PIPE_ERROR_OUT_OF_MEMORY; - goto done; - } + if (svga->curr.constbufs[shader][0].buffer) { + /* emit user-provided constants */ + data = (const float (*)[4]) + pipe_buffer_map(&svga->pipe, svga->curr.constbufs[shader][0].buffer, + PIPE_TRANSFER_READ, &transfer); + if (!data) { + return PIPE_ERROR_OUT_OF_MEMORY; + } - /* sanity check */ - assert(svga->curr.cbufs[shader].buffer->width0 >= - svga->curr.cbufs[shader].buffer_size); + /* sanity check */ + assert(cbuf->buffer->width0 >= + cbuf->buffer_size); - /* Use/apply the constant buffer size and offsets here */ - count = svga->curr.cbufs[shader].buffer_size / (4 * sizeof(float)); - data += svga->curr.cbufs[shader].buffer_offset / (4 * sizeof(float)); + /* Use/apply the constant buffer size and offsets here */ + count = cbuf->buffer_size / (4 * sizeof(float)); + data += cbuf->buffer_offset / (4 * sizeof(float)); - if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) { - ret = emit_const_range( svga, shader, offset, count, data ); - if (ret != PIPE_OK) { - goto done; + if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) { + ret = emit_const_range( svga, shader, offset, count, data ); } - } else { - for (i = 0; i < count; i++) { - ret = emit_const( svga, shader, offset + i, data[i] ); - if (ret != PIPE_OK) { - goto done; + else { + for (i = 0; i < count; i++) { + ret = emit_const( svga, shader, offset + i, data[i] ); + if (ret != PIPE_OK) { + break; + } } } - } -done: - if (data) pipe_buffer_unmap(&svga->pipe, transfer); + if (ret != PIPE_OK) { + return ret; + } + } + + /* emit extra shader constants */ + { + const struct svga_shader_variant *variant = NULL; + unsigned offset; + float extras[MAX_EXTRA_CONSTS][4]; + unsigned count, i; + + switch (shader) { + case PIPE_SHADER_VERTEX: + variant = svga->state.hw_draw.vs; + count = svga_get_extra_vs_constants(svga, (float *) extras); + break; + case PIPE_SHADER_FRAGMENT: + variant = svga->state.hw_draw.fs; + count = svga_get_extra_fs_constants(svga, (float *) extras); + break; + default: + assert(!"Unexpected shader type"); + count = 0; + } + + assert(variant); + offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1; + assert(count <= Elements(extras)); + + if (count > 0) { + if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) { + ret = emit_const_range(svga, shader, offset, count, + (const float (*) [4])extras); + } + else { + for (i = 0; i < count; i++) { + ret = emit_const(svga, shader, offset + i, extras[i]); + if (ret != PIPE_OK) + return ret; + } + } + } + } + return ret; } + static enum pipe_error -emit_fs_consts(struct svga_context *svga, unsigned dirty) +emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader) { - struct svga_screen *ss = svga_screen(svga->pipe.screen); - const struct svga_shader_variant *variant = svga->state.hw_draw.fs; + const struct pipe_constant_buffer *cbuf; + struct pipe_resource *dst_buffer = NULL; enum pipe_error ret = PIPE_OK; + struct pipe_transfer *src_transfer; + struct svga_winsys_surface *dst_handle; + float extras[MAX_EXTRA_CONSTS][4]; + unsigned extra_count, extra_size, extra_offset; + unsigned new_buf_size; + void *src_map = NULL, *dst_map; + unsigned offset; + const struct svga_shader_variant *variant; - /* SVGA_NEW_FS_VARIANT - */ - if (variant == NULL) + assert(shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_FRAGMENT); + + cbuf = &svga->curr.constbufs[shader][0]; + + switch (shader) { + case PIPE_SHADER_VERTEX: + variant = svga->state.hw_draw.vs; + extra_count = svga_get_extra_vs_constants(svga, (float *) extras); + break; + case PIPE_SHADER_FRAGMENT: + variant = svga->state.hw_draw.fs; + extra_count = svga_get_extra_fs_constants(svga, (float *) extras); + break; + case PIPE_SHADER_GEOMETRY: + variant = svga->state.hw_draw.gs; + extra_count = svga_get_extra_gs_constants(svga, (float *) extras); + break; + default: + assert(!"Unexpected shader type"); + /* Don't return an error code since we don't want to keep re-trying + * this function and getting stuck in an infinite loop. + */ return PIPE_OK; + } - /* SVGA_NEW_FS_CONST_BUFFER + assert(variant); + + /* Compute extra constants size and offset in bytes */ + extra_size = extra_count * 4 * sizeof(float); + extra_offset = 4 * sizeof(float) * variant->extra_const_start; + + if (cbuf->buffer_size + extra_size == 0) + return PIPE_OK; /* nothing to do */ + + /* Typically, the cbuf->buffer here is a user-space buffer so mapping + * it is really cheap. If we ever get real HW buffers for constants + * we should void mapping and instead use a ResourceCopy command. + */ + if (cbuf->buffer_size > 0) { + src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer, + cbuf->buffer_offset, cbuf->buffer_size, + PIPE_TRANSFER_READ, &src_transfer); + assert(src_map); + if (!src_map) { + return PIPE_ERROR_OUT_OF_MEMORY; + } + } + + /* The new/dest buffer's size must be large enough to hold the original, + * user-specified constants, plus the extra constants. + * The size of the original constant buffer _should_ agree with what the + * shader is expecting, but it might not (it's not enforced anywhere by + * gallium). + */ + new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size; + + /* According to the DX10 spec, the constant buffer size must be + * in multiples of 16. + */ + new_buf_size = align(new_buf_size, 16); + + u_upload_alloc(svga->const0_upload, 0, new_buf_size, + CONST0_UPLOAD_ALIGNMENT, &offset, + &dst_buffer, &dst_map); + if (!dst_map) { + if (src_map) + pipe_buffer_unmap(&svga->pipe, src_transfer); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + if (src_map) { + memcpy(dst_map, src_map, cbuf->buffer_size); + pipe_buffer_unmap(&svga->pipe, src_transfer); + } + + if (extra_size) { + assert(extra_offset + extra_size <= new_buf_size); + memcpy((char *) dst_map + extra_offset, extras, extra_size); + } + u_upload_unmap(svga->const0_upload); + + /* Issue the SetSingleConstantBuffer command */ + dst_handle = svga_buffer_handle(svga, dst_buffer); + if (!dst_handle) { + pipe_resource_reference(&dst_buffer, NULL); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + assert(new_buf_size % 16 == 0); + ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, + 0, /* index */ + svga_shader_type(shader), + dst_handle, + offset, + new_buf_size); + + if (ret != PIPE_OK) { + pipe_resource_reference(&dst_buffer, NULL); + return ret; + } + + /* Save this const buffer until it's replaced in the future. + * Otherwise, all references to the buffer will go away after the + * command buffer is submitted, it'll get recycled and we will have + * incorrect constant buffer bindings. */ - ret = emit_consts( svga, PIPE_SHADER_FRAGMENT ); - if (ret != PIPE_OK) + pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer); + + svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size; + + pipe_resource_reference(&dst_buffer, NULL); + + return ret; +} + + +static enum pipe_error +emit_consts_vgpu10(struct svga_context *svga, unsigned shader) +{ + enum pipe_error ret; + unsigned dirty_constbufs; + unsigned enabled_constbufs; + + /* Emit 0th constant buffer (with extra constants) */ + ret = emit_constbuf_vgpu10(svga, shader); + if (ret != PIPE_OK) { return ret; + } - /* emit extra shader constants */ - { - unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1; - float extras[MAX_EXTRA_CONSTS][4]; - unsigned count, i; + enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; - count = svga_get_extra_fs_constants(svga, (float *) extras); + /* Emit other constant buffers (UBOs) */ + dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; - if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) { - ret = emit_const_range(svga, PIPE_SHADER_FRAGMENT, offset, count, - (const float (*) [4])extras); - } else { - for (i = 0; i < count; i++) { - ret = emit_const(svga, PIPE_SHADER_FRAGMENT, offset + i, extras[i]); - if (ret != PIPE_OK) - return ret; + while (dirty_constbufs) { + unsigned index = u_bit_scan(&dirty_constbufs); + unsigned offset = svga->curr.constbufs[shader][index].buffer_offset; + unsigned size = svga->curr.constbufs[shader][index].buffer_size; + struct svga_buffer *buffer = + svga_buffer(svga->curr.constbufs[shader][index].buffer); + struct svga_winsys_surface *handle; + + if (buffer) { + handle = svga_buffer_handle(svga, &buffer->b.b); + enabled_constbufs |= 1 << index; + } + else { + handle = NULL; + enabled_constbufs &= ~(1 << index); + assert(offset == 0); + assert(size == 0); + } + + if (size % 16 != 0) { + /* GL's buffer range sizes can be any number of bytes but the + * SVGA3D device requires a multiple of 16 bytes. + */ + const unsigned total_size = buffer->b.b.width0; + + if (offset + align(size, 16) <= total_size) { + /* round up size to multiple of 16 */ + size = align(size, 16); + } + else { + /* round down to mulitple of 16 (this may cause rendering problems + * but should avoid a device error). + */ + size &= ~15; } } + + assert(size % 16 == 0); + ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, + index, + svga_shader_type(shader), + handle, + offset, + size); + if (ret != PIPE_OK) + return ret; + } + + svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; + svga->state.dirty_constbufs[shader] = 0; + + return ret; +} + +static enum pipe_error +emit_fs_consts(struct svga_context *svga, unsigned dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.fs; + enum pipe_error ret = PIPE_OK; + + /* SVGA_NEW_FS_VARIANT + */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_FS_CONST_BUFFER + */ + if (svga_have_vgpu10(svga)) { + ret = emit_consts_vgpu10(svga, PIPE_SHADER_FRAGMENT); + } + else { + ret = emit_consts_vgpu9(svga, PIPE_SHADER_FRAGMENT); } return ret; @@ -419,40 +778,21 @@ struct svga_tracked_state svga_hw_fs_constants = static enum pipe_error emit_vs_consts(struct svga_context *svga, unsigned dirty) { - struct svga_screen *ss = svga_screen(svga->pipe.screen); const struct svga_shader_variant *variant = svga->state.hw_draw.vs; enum pipe_error ret = PIPE_OK; /* SVGA_NEW_VS_VARIANT */ - if (variant == NULL) + if (!variant) return PIPE_OK; /* SVGA_NEW_VS_CONST_BUFFER */ - ret = emit_consts( svga, PIPE_SHADER_VERTEX ); - if (ret != PIPE_OK) - return ret; - - /* emit extra shader constants */ - { - unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1; - float extras[MAX_EXTRA_CONSTS][4]; - unsigned count, i; - - count = svga_get_extra_vs_constants(svga, (float *) extras); - assert(count <= Elements(extras)); - - if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) { - ret = emit_const_range(svga, PIPE_SHADER_VERTEX, offset, count, - (const float (*) [4]) extras); - } else { - for (i = 0; i < count; i++) { - ret = emit_const(svga, PIPE_SHADER_VERTEX, offset + i, extras[i]); - if (ret != PIPE_OK) - return ret; - } - } + if (svga_have_vgpu10(svga)) { + ret = emit_consts_vgpu10(svga, PIPE_SHADER_VERTEX); + } + else { + ret = emit_consts_vgpu9(svga, PIPE_SHADER_VERTEX); } return ret; @@ -467,3 +807,42 @@ struct svga_tracked_state svga_hw_vs_constants = SVGA_NEW_VS_VARIANT), emit_vs_consts }; + + +static enum pipe_error +emit_gs_consts(struct svga_context *svga, unsigned dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.gs; + enum pipe_error ret = PIPE_OK; + + /* SVGA_NEW_GS_VARIANT + */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_GS_CONST_BUFFER + */ + if (svga_have_vgpu10(svga)) { + /** + * If only the rasterizer state has changed and the current geometry + * shader does not emit wide points, then there is no reason to + * re-emit the GS constants, so skip it. + */ + if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point) + return PIPE_OK; + + ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY); + } + + return ret; +} + + +struct svga_tracked_state svga_hw_gs_constants = +{ + "hw gs params", + (SVGA_NEW_GS_CONST_BUFFER | + SVGA_NEW_RAST | + SVGA_NEW_GS_VARIANT), + emit_gs_consts +}; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c b/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c index 1c174da31..4b0400bf8 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -26,12 +26,14 @@ #include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_format.h" #include "svga_context.h" #include "svga_state.h" #include "svga_cmd.h" #include "svga_debug.h" #include "svga_screen.h" +#include "svga_surface.h" /* @@ -46,30 +48,26 @@ #define MAX_RT_PER_BATCH 8 -/*********************************************************************** - * Hardware state update - */ - static enum pipe_error -emit_framebuffer( struct svga_context *svga, - unsigned dirty ) +emit_fb_vgpu9(struct svga_context *svga) { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer; struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; - boolean reemit = svga->rebind.rendertargets; + boolean reemit = svga->rebind.flags.rendertargets; unsigned i; enum pipe_error ret; + assert(!svga_have_vgpu10(svga)); + /* * We need to reemit non-null surface bindings, even when they are not * dirty, to ensure that the resources are paged in. */ for (i = 0; i < svgascreen->max_color_buffers; i++) { - if (curr->cbufs[i] != hw->cbufs[i] || - (reemit && hw->cbufs[i])) { + if ((curr->cbufs[i] != hw->cbufs[i]) || (reemit && hw->cbufs[i])) { if (svga->curr.nr_fbs++ > MAX_RT_PER_BATCH) return PIPE_ERROR_OUT_OF_MEMORY; @@ -82,14 +80,13 @@ emit_framebuffer( struct svga_context *svga, } } - if (curr->zsbuf != hw->zsbuf || - (reemit && hw->zsbuf)) { + if ((curr->zsbuf != hw->zsbuf) || (reemit && hw->zsbuf)) { ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf); if (ret != PIPE_OK) return ret; if (curr->zsbuf && - curr->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) { + util_format_is_depth_and_stencil(curr->zsbuf->format)) { ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, curr->zsbuf); if (ret != PIPE_OK) @@ -104,8 +101,6 @@ emit_framebuffer( struct svga_context *svga, pipe_surface_reference(&hw->zsbuf, curr->zsbuf); } - svga->rebind.rendertargets = FALSE; - return PIPE_OK; } @@ -118,15 +113,15 @@ emit_framebuffer( struct svga_context *svga, * Called at the beginning of every new command buffer to ensure that * non-dirty rendertargets are properly paged-in. */ -enum pipe_error -svga_reemit_framebuffer_bindings(struct svga_context *svga) +static enum pipe_error +svga_reemit_framebuffer_bindings_vgpu9(struct svga_context *svga) { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; unsigned i; enum pipe_error ret; - assert(svga->rebind.rendertargets); + assert(!svga_have_vgpu10(svga)); for (i = 0; i < svgascreen->max_color_buffers; i++) { if (hw->cbufs[i]) { @@ -145,7 +140,7 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga) } if (hw->zsbuf && - hw->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) { + util_format_is_depth_and_stencil(hw->zsbuf->format)) { ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf); if (ret != PIPE_OK) { return ret; @@ -159,7 +154,161 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga) } } - svga->rebind.rendertargets = FALSE; + return PIPE_OK; +} + + + +static enum pipe_error +emit_fb_vgpu10(struct svga_context *svga) +{ + const struct svga_screen *ss = svga_screen(svga->pipe.screen); + struct pipe_surface *rtv[SVGA3D_MAX_RENDER_TARGETS]; + struct pipe_surface *dsv; + struct pipe_framebuffer_state *curr = &svga->curr.framebuffer; + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + const unsigned num_color = MAX2(curr->nr_cbufs, hw->nr_cbufs); + unsigned i; + enum pipe_error ret; + + assert(svga_have_vgpu10(svga)); + + /* Setup render targets array. Note that we loop over the max of the + * number of previously bound buffers and the new buffers to unbind + * any previously bound buffers when the new number of buffers is less + * than the old number of buffers. + */ + for (i = 0; i < num_color; i++) { + if (curr->cbufs[i]) { + rtv[i] = svga_validate_surface_view(svga, + svga_surface(curr->cbufs[i])); + if (rtv[i] == NULL) { + return PIPE_ERROR_OUT_OF_MEMORY; + } + + assert(svga_surface(rtv[i])->view_id != SVGA3D_INVALID_ID); + } + else { + rtv[i] = NULL; + } + } + + /* Setup depth stencil view */ + if (curr->zsbuf) { + dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf)); + if (!dsv) { + return PIPE_ERROR_OUT_OF_MEMORY; + } + } + else { + dsv = NULL; + } + + ret = SVGA3D_vgpu10_SetRenderTargets(svga->swc, num_color, rtv, dsv); + if (ret != PIPE_OK) + return ret; + + for (i = 0; i < ss->max_color_buffers; i++) { + if (hw->cbufs[i] != curr->cbufs[i]) { + /* propagate the backed view surface before unbinding it */ + if (hw->cbufs[i] && svga_surface(hw->cbufs[i])->backed) { + svga_propagate_surface(svga, + &svga_surface(hw->cbufs[i])->backed->base); + } + pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]); + } + } + hw->nr_cbufs = curr->nr_cbufs; + + if (hw->zsbuf != curr->zsbuf) { + /* propagate the backed view surface before unbinding it */ + if (hw->zsbuf && svga_surface(hw->zsbuf)->backed) { + svga_propagate_surface(svga, &svga_surface(hw->zsbuf)->backed->base); + } + pipe_surface_reference(&hw->zsbuf, curr->zsbuf); + } + + return ret; +} + + +static enum pipe_error +emit_framebuffer(struct svga_context *svga, unsigned dirty) +{ + if (svga_have_vgpu10(svga)) { + return emit_fb_vgpu10(svga); + } + else { + return emit_fb_vgpu9(svga); + } +} + + +/* + * Rebind rendertargets. + * + * Similar to emit_framebuffer, but without any state checking/update. + * + * Called at the beginning of every new command buffer to ensure that + * non-dirty rendertargets are properly paged-in. + */ +enum pipe_error +svga_reemit_framebuffer_bindings(struct svga_context *svga) +{ + enum pipe_error ret; + + assert(svga->rebind.flags.rendertargets); + + if (svga_have_vgpu10(svga)) { + ret = emit_fb_vgpu10(svga); + } + else { + ret = svga_reemit_framebuffer_bindings_vgpu9(svga); + } + + svga->rebind.flags.rendertargets = FALSE; + + return ret; +} + + +/* + * Send a private allocation command to page in rendertargets resource. + */ +enum pipe_error +svga_rebind_framebuffer_bindings(struct svga_context *svga) +{ + const struct svga_screen *ss = svga_screen(svga->pipe.screen); + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + unsigned i; + enum pipe_error ret; + + assert(svga_have_vgpu10(svga)); + + if (!svga->rebind.flags.rendertargets) + return PIPE_OK; + + for (i = 0; i < ss->max_color_buffers; i++) { + if (hw->cbufs[i]) { + ret = svga->swc->resource_rebind(svga->swc, + svga_surface(hw->cbufs[i])->handle, + NULL, + SVGA_RELOC_WRITE); + if (ret != PIPE_OK) + return ret; + } + } + + if (hw->zsbuf) { + ret = svga->swc->resource_rebind(svga->swc, + svga_surface(hw->zsbuf)->handle, + NULL, + SVGA_RELOC_WRITE); + if (ret != PIPE_OK) + return ret; + } + + svga->rebind.flags.rendertargets = 0; return PIPE_OK; } @@ -202,6 +351,7 @@ emit_viewport( struct svga_context *svga, float fy = flip * viewport->scale[1] * -1.0f + viewport->translate[1]; float fw = viewport->scale[0] * 2.0f; float fh = flip * viewport->scale[1] * 2.0f; + boolean emit_vgpu10_viewport = FALSE; memset( &prescale, 0, sizeof(prescale) ); @@ -225,7 +375,16 @@ emit_viewport( struct svga_context *svga, prescale.translate[1] = 0; prescale.translate[2] = 0; prescale.translate[3] = 0; - prescale.enabled = TRUE; + + /* Enable prescale to adjust vertex positions to match + VGPU10 convention only if rasterization is enabled. + */ + if (svga->curr.rast->templ.rasterizer_discard) { + degenerate = TRUE; + goto out; + } else { + prescale.enabled = TRUE; + } if (fw < 0) { prescale.scale[0] *= -1.0f; @@ -235,7 +394,14 @@ emit_viewport( struct svga_context *svga, } if (fh < 0.0) { - prescale.translate[1] = fh - 1.0f + fy * 2.0f; + if (svga_have_vgpu10(svga)) { + /* floating point viewport params below */ + prescale.translate[1] = fh + fy * 2.0f; + } + else { + /* integer viewport params below */ + prescale.translate[1] = fh - 1.0f + fy * 2.0f; + } fh = -fh; fy -= fh; prescale.scale[1] = -1.0f; @@ -321,19 +487,31 @@ emit_viewport( struct svga_context *svga, float adjust_x = 0.0; float adjust_y = 0.0; - switch (svga->curr.reduced_prim) { - case PIPE_PRIM_POINTS: - adjust_x = -0.375; - adjust_y = -0.75; - break; - case PIPE_PRIM_LINES: - adjust_x = -0.5; - adjust_y = 0; - break; - case PIPE_PRIM_TRIANGLES: - adjust_x = -0.5; - adjust_y = -0.5; - break; + if (svga_have_vgpu10(svga)) { + /* Normally, we don't have to do any sub-pixel coordinate + * adjustments for VGPU10. But when we draw wide points with + * a GS we need an X adjustment in order to be conformant. + */ + if (svga->curr.reduced_prim == PIPE_PRIM_POINTS && + svga->curr.rast->pointsize > 1.0f) { + adjust_x = 0.5; + } + } + else { + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_POINTS: + adjust_x = -0.375; + adjust_y = -0.75; + break; + case PIPE_PRIM_LINES: + adjust_x = -0.5; + adjust_y = 0; + break; + case PIPE_PRIM_TRIANGLES: + adjust_x = -0.5; + adjust_y = -0.5; + break; + } } if (invertY) @@ -360,6 +538,17 @@ emit_viewport( struct svga_context *svga, prescale.scale[2] = -prescale.scale[2]; } + /* If zmin is less than 0, clamp zmin to 0 and adjust the prescale. + * zmin can be set to -1 when viewport->scale[2] is set to 1 and + * viewport->translate[2] is set to 0 in the blit code. + */ + if (range_min < 0.0f) { + range_min = -0.5f * viewport->scale[2] + 0.5f + viewport->translate[2]; + range_max = 0.5f * viewport->scale[2] + 0.5f + viewport->translate[2]; + prescale.scale[2] *= 2.0f; + prescale.translate[2] -= 0.5f; + } + if (prescale.enabled) { float H[2]; float J[2]; @@ -428,21 +617,49 @@ out: prescale.enabled = FALSE; } - if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) { - ret = SVGA3D_SetViewport(svga->swc, &rect); - if(ret != PIPE_OK) - return ret; + if (!svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) { + if (svga_have_vgpu10(svga)) { + emit_vgpu10_viewport = TRUE; + } + else { + ret = SVGA3D_SetViewport(svga->swc, &rect); + if (ret != PIPE_OK) + return ret; - memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect)); - assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport)); + svga->state.hw_clear.viewport = rect; + } } if (svga->state.hw_clear.depthrange.zmin != range_min || - svga->state.hw_clear.depthrange.zmax != range_max) { - ret = SVGA3D_SetZRange(svga->swc, range_min, range_max ); - if(ret != PIPE_OK) + svga->state.hw_clear.depthrange.zmax != range_max) + { + if (svga_have_vgpu10(svga)) { + emit_vgpu10_viewport = TRUE; + } + else { + ret = SVGA3D_SetZRange(svga->swc, range_min, range_max ); + if (ret != PIPE_OK) + return ret; + + svga->state.hw_clear.depthrange.zmin = range_min; + svga->state.hw_clear.depthrange.zmax = range_max; + } + } + + if (emit_vgpu10_viewport) { + SVGA3dViewport vp; + vp.x = (float) rect.x; + vp.y = (float) rect.y; + vp.width = (float) rect.w; + vp.height = (float) rect.h; + vp.minDepth = range_min; + vp.maxDepth = range_max; + ret = SVGA3D_vgpu10_SetViewports(svga->swc, 1, &vp); + if (ret != PIPE_OK) return ret; + svga->state.hw_clear.viewport = rect; + svga->state.hw_clear.depthrange.zmin = range_min; svga->state.hw_clear.depthrange.zmax = range_max; } @@ -475,14 +692,27 @@ emit_scissor_rect( struct svga_context *svga, unsigned dirty ) { const struct pipe_scissor_state *scissor = &svga->curr.scissor; - SVGA3dRect rect; - rect.x = scissor->minx; - rect.y = scissor->miny; - rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */ - rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */ + if (svga_have_vgpu10(svga)) { + SVGASignedRect rect; + + rect.left = scissor->minx; + rect.top = scissor->miny; + rect.right = scissor->maxx; + rect.bottom = scissor->maxy; + + return SVGA3D_vgpu10_SetScissorRects(svga->swc, 1, &rect); + } + else { + SVGA3dRect rect; - return SVGA3D_SetScissorRect(svga->swc, &rect); + rect.x = scissor->minx; + rect.y = scissor->miny; + rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */ + rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */ + + return SVGA3D_SetScissorRect(svga->swc, &rect); + } } @@ -527,9 +757,15 @@ emit_clip_planes( struct svga_context *svga, plane[2] = 2.0f * c; plane[3] = d - c; - ret = SVGA3D_SetClipPlane(svga->swc, i, plane); - if(ret != PIPE_OK) - return ret; + if (svga_have_vgpu10(svga)) { + //debug_printf("XXX emit DX10 clip plane\n"); + ret = PIPE_OK; + } + else { + ret = SVGA3D_SetClipPlane(svga->swc, i, plane); + if (ret != PIPE_OK) + return ret; + } } return PIPE_OK; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c index 8cdce742b..bac91669b 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c @@ -36,43 +36,12 @@ #include "svga_shader.h" #include "svga_resource_texture.h" #include "svga_tgsi.h" +#include "svga_format.h" #include "svga_hw_reg.h" -static inline int -compare_fs_keys(const struct svga_fs_compile_key *a, - const struct svga_fs_compile_key *b) -{ - unsigned keysize_a = svga_fs_key_size( a ); - unsigned keysize_b = svga_fs_key_size( b ); - - if (keysize_a != keysize_b) { - return (int)(keysize_a - keysize_b); - } - return memcmp( a, b, keysize_a ); -} - - -/** Search for a fragment shader variant */ -static struct svga_shader_variant * -search_fs_key(const struct svga_fragment_shader *fs, - const struct svga_fs_compile_key *key) -{ - struct svga_shader_variant *variant = fs->base.variants; - - assert(key); - - for ( ; variant; variant = variant->next) { - if (compare_fs_keys( key, &variant->key.fkey ) == 0) - return variant; - } - - return NULL; -} - - /** * If we fail to compile a fragment shader (because it uses too many * registers, for example) we'll use a dummy/fallback shader that @@ -111,13 +80,30 @@ get_dummy_fragment_shader(void) } +static struct svga_shader_variant * +translate_fragment_program(struct svga_context *svga, + const struct svga_fragment_shader *fs, + const struct svga_compile_key *key) +{ + if (svga_have_vgpu10(svga)) { + return svga_tgsi_vgpu10_translate(svga, &fs->base, key, + PIPE_SHADER_FRAGMENT); + } + else { + return svga_tgsi_vgpu9_translate(svga, &fs->base, key, + PIPE_SHADER_FRAGMENT); + } +} + + /** * Replace the given shader's instruction with a simple constant-color * shader. We use this when normal shader translation fails. */ static struct svga_shader_variant * -get_compiled_dummy_shader(struct svga_fragment_shader *fs, - const struct svga_fs_compile_key *key) +get_compiled_dummy_shader(struct svga_context *svga, + struct svga_fragment_shader *fs, + const struct svga_compile_key *key) { const struct tgsi_token *dummy = get_dummy_fragment_shader(); struct svga_shader_variant *variant; @@ -129,7 +115,7 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs, FREE((void *) fs->base.tokens); fs->base.tokens = dummy; - variant = svga_translate_fragment_program(fs, key); + variant = translate_fragment_program(svga, fs, key); return variant; } @@ -140,52 +126,47 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs, static enum pipe_error compile_fs(struct svga_context *svga, struct svga_fragment_shader *fs, - const struct svga_fs_compile_key *key, + const struct svga_compile_key *key, struct svga_shader_variant **out_variant) { struct svga_shader_variant *variant; enum pipe_error ret = PIPE_ERROR; - variant = svga_translate_fragment_program( fs, key ); + variant = translate_fragment_program(svga, fs, key); if (variant == NULL) { debug_printf("Failed to compile fragment shader," " using dummy shader instead.\n"); - variant = get_compiled_dummy_shader(fs, key); - if (!variant) { - ret = PIPE_ERROR; - goto fail; - } + variant = get_compiled_dummy_shader(svga, fs, key); } - - if (svga_shader_too_large(svga, variant)) { + else if (svga_shader_too_large(svga, variant)) { /* too big, use dummy shader */ - debug_printf("Shader too large (%lu bytes)," + debug_printf("Shader too large (%u bytes)," " using dummy shader instead.\n", - (unsigned long ) variant->nr_tokens * sizeof(variant->tokens[0])); - variant = get_compiled_dummy_shader(fs, key); - if (!variant) { - ret = PIPE_ERROR; - goto fail; - } + (unsigned) (variant->nr_tokens + * sizeof(variant->tokens[0]))); + /* Free the too-large variant */ + svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant); + /* Use simple pass-through shader instead */ + variant = get_compiled_dummy_shader(svga, fs, key); + } + + if (!variant) { + return PIPE_ERROR; } ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_PS, variant); - if (ret != PIPE_OK) - goto fail; + if (ret != PIPE_OK) { + svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant); + return ret; + } *out_variant = variant; - /* insert variants at head of linked list */ + /* insert variant at head of linked list */ variant->next = fs->base.variants; fs->base.variants = variant; return PIPE_OK; - -fail: - if (variant) { - svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant); - } - return ret; } @@ -197,23 +178,45 @@ fail: static enum pipe_error make_fs_key(const struct svga_context *svga, struct svga_fragment_shader *fs, - struct svga_fs_compile_key *key) + struct svga_compile_key *key) { + const unsigned shader = PIPE_SHADER_FRAGMENT; unsigned i; - int idx = 0; memset(key, 0, sizeof *key); + memcpy(key->generic_remap_table, fs->generic_remap_table, + sizeof(fs->generic_remap_table)); + + /* SVGA_NEW_GS, SVGA_NEW_VS + */ + if (svga->curr.gs) { + key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs; + } else { + key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs; + } + /* Only need fragment shader fixup for twoside lighting if doing * hwtnl. Otherwise the draw module does the whole job for us. * * SVGA_NEW_SWTNL */ if (!svga->state.sw.need_swtnl) { - /* SVGA_NEW_RAST + /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE */ - key->light_twoside = svga->curr.rast->templ.light_twoside; - key->front_ccw = svga->curr.rast->templ.front_ccw; + key->fs.light_twoside = svga->curr.rast->templ.light_twoside; + key->fs.front_ccw = svga->curr.rast->templ.front_ccw; + key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable && + svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES); + key->fs.aa_point = (svga->curr.rast->templ.point_smooth && + svga->curr.reduced_prim == PIPE_PRIM_POINTS && + (svga->curr.rast->pointsize > 1.0 || + svga->curr.vs->base.info.writes_psize)); + if (key->fs.aa_point) { + assert(svga->curr.gs != NULL); + assert(svga->curr.gs->aa_point_coord_index != -1); + key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index; + } } /* The blend workaround for simulating logicop xor behaviour @@ -231,7 +234,7 @@ make_fs_key(const struct svga_context *svga, * SVGA_NEW_BLEND */ if (svga->curr.blend->need_white_fragments) { - key->white_fragments = 1; + key->fs.white_fragments = 1; } #ifdef DEBUG @@ -241,22 +244,23 @@ make_fs_key(const struct svga_context *svga, */ { static boolean warned = FALSE; - unsigned i, n = MAX2(svga->curr.num_sampler_views, - svga->curr.num_samplers); + unsigned i, n = MAX2(svga->curr.num_sampler_views[shader], + svga->curr.num_samplers[shader]); /* Only warn once to prevent too much debug output */ if (!warned) { - if (svga->curr.num_sampler_views != svga->curr.num_samplers) { + if (svga->curr.num_sampler_views[shader] != + svga->curr.num_samplers[shader]) { debug_printf("svga: mismatched number of sampler views (%u) " "vs. samplers (%u)\n", - svga->curr.num_sampler_views, - svga->curr.num_samplers); + svga->curr.num_sampler_views[shader], + svga->curr.num_samplers[shader]); } for (i = 0; i < n; i++) { - if ((svga->curr.sampler_views[i] == NULL) != - (svga->curr.sampler[i] == NULL)) + if ((svga->curr.sampler_views[shader][i] == NULL) != + (svga->curr.sampler[shader][i] == NULL)) debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n", - i, svga->curr.sampler_views[i], - i, svga->curr.sampler[i]); + i, svga->curr.sampler_views[shader][i], + i, svga->curr.sampler[shader][i]); } warned = TRUE; } @@ -268,68 +272,62 @@ make_fs_key(const struct svga_context *svga, * * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */ - for (i = 0; i < svga->curr.num_sampler_views; i++) { - if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) { - assert(svga->curr.sampler_views[i]->texture); - key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target; - if (!svga->curr.sampler[i]->normalized_coords) { - key->tex[i].width_height_idx = idx++; - key->tex[i].unnormalized = TRUE; - ++key->num_unnormalized_coords; - } - - key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r; - key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g; - key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b; - key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a; - } - } - key->num_textures = svga->curr.num_sampler_views; - - idx = 0; - for (i = 0; i < svga->curr.num_samplers; ++i) { - if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) { - struct pipe_resource *tex = svga->curr.sampler_views[i]->texture; - struct svga_texture *stex = svga_texture(tex); - SVGA3dSurfaceFormat format = stex->key.format; - - if (format == SVGA3D_Z_D16 || - format == SVGA3D_Z_D24X8 || - format == SVGA3D_Z_D24S8) { - /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8, - * or SVGA3D_Z_D24S8 surface, we'll automatically get - * shadow comparison. But we only get LEQUAL mode. - * Set TEX_COMPARE_NONE here so we don't emit the extra FS - * code for shadow comparison. - */ - key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE; - key->tex[i].compare_func = PIPE_FUNC_NEVER; - /* These depth formats _only_ support comparison mode and - * not ordinary sampling so warn if the later is expected. - */ - if (svga->curr.sampler[i]->compare_mode != - PIPE_TEX_COMPARE_R_TO_TEXTURE) { - debug_warn_once("Unsupported shadow compare mode"); - } - /* The only supported comparison mode is LEQUAL */ - if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) { - debug_warn_once("Unsupported shadow compare function"); + svga_init_shader_key_common(svga, shader, key); + + for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { + struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; + const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; + if (view) { + struct pipe_resource *tex = view->texture; + if (tex->target != PIPE_BUFFER) { + struct svga_texture *stex = svga_texture(tex); + SVGA3dSurfaceFormat format = stex->key.format; + + if (!svga_have_vgpu10(svga) && + (format == SVGA3D_Z_D16 || + format == SVGA3D_Z_D24X8 || + format == SVGA3D_Z_D24S8)) { + /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8, + * or SVGA3D_Z_D24S8 surface, we'll automatically get + * shadow comparison. But we only get LEQUAL mode. + * Set TEX_COMPARE_NONE here so we don't emit the extra FS + * code for shadow comparison. + */ + key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE; + key->tex[i].compare_func = PIPE_FUNC_NEVER; + /* These depth formats _only_ support comparison mode and + * not ordinary sampling so warn if the later is expected. + */ + if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) { + debug_warn_once("Unsupported shadow compare mode"); + } + /* The shader translation code can emit code to + * handle ALWAYS and NEVER compare functions + */ + else if (sampler->compare_func == PIPE_FUNC_ALWAYS || + sampler->compare_func == PIPE_FUNC_NEVER) { + key->tex[i].compare_mode = sampler->compare_mode; + key->tex[i].compare_func = sampler->compare_func; + } + else if (sampler->compare_func != PIPE_FUNC_LEQUAL) { + debug_warn_once("Unsupported shadow compare function"); + } + } + else { + /* For other texture formats, just use the compare func/mode + * as-is. Should be no-ops for color textures. For depth + * textures, we do not get automatic depth compare. We have + * to do it ourselves in the shader. And we don't get PCF. + */ + key->tex[i].compare_mode = sampler->compare_mode; + key->tex[i].compare_func = sampler->compare_func; } - } - else { - /* For other texture formats, just use the compare func/mode - * as-is. Should be no-ops for color textures. For depth - * textures, we do not get automatic depth compare. We have - * to do it ourselves in the shader. And we don't get PCF. - */ - key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode; - key->tex[i].compare_func = svga->curr.sampler[i]->compare_func; } } } /* sprite coord gen state */ - for (i = 0; i < svga->curr.num_samplers; ++i) { + for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { key->tex[i].sprite_texgen = svga->curr.rast->templ.sprite_coord_enable & (1 << i); } @@ -337,10 +335,25 @@ make_fs_key(const struct svga_context *svga, key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); + key->fs.flatshade = svga->curr.rast->templ.flatshade; + + /* SVGA_NEW_DEPTH_STENCIL_ALPHA */ + if (svga_have_vgpu10(svga)) { + /* Alpha testing is not supported in integer-valued render targets. */ + if (svga_has_any_integer_cbufs(svga)) { + key->fs.alpha_func = SVGA3D_CMP_ALWAYS; + key->fs.alpha_ref = 0; + } + else { + key->fs.alpha_func = svga->curr.depth->alphafunc; + key->fs.alpha_ref = svga->curr.depth->alpharef; + } + } + /* SVGA_NEW_FRAME_BUFFER */ if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { /* Replicate color0 output to N colorbuffers */ - key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; + key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; } return PIPE_OK; @@ -355,18 +368,32 @@ svga_reemit_fs_bindings(struct svga_context *svga) { enum pipe_error ret; - assert(svga->rebind.fs); + assert(svga->rebind.flags.fs); assert(svga_have_gb_objects(svga)); if (!svga->state.hw_draw.fs) return PIPE_OK; - ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, - svga->state.hw_draw.fs->gb_shader); + if (!svga_need_to_rebind_resources(svga)) { + ret = svga->swc->resource_rebind(svga->swc, NULL, + svga->state.hw_draw.fs->gb_shader, + SVGA_RELOC_READ); + goto out; + } + + if (svga_have_vgpu10(svga)) + ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, + svga->state.hw_draw.fs->gb_shader, + svga->state.hw_draw.fs->id); + else + ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, + svga->state.hw_draw.fs->gb_shader); + + out: if (ret != PIPE_OK) return ret; - svga->rebind.fs = FALSE; + svga->rebind.flags.fs = FALSE; return PIPE_OK; } @@ -378,7 +405,7 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty) struct svga_shader_variant *variant = NULL; enum pipe_error ret = PIPE_OK; struct svga_fragment_shader *fs = svga->curr.fs; - struct svga_fs_compile_key key; + struct svga_compile_key key; /* SVGA_NEW_BLEND * SVGA_NEW_TEXTURE_BINDING @@ -386,14 +413,16 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty) * SVGA_NEW_NEED_SWTNL * SVGA_NEW_SAMPLER * SVGA_NEW_FRAME_BUFFER + * SVGA_NEW_DEPTH_STENCIL_ALPHA + * SVGA_NEW_VS */ - ret = make_fs_key( svga, fs, &key ); + ret = make_fs_key(svga, fs, &key); if (ret != PIPE_OK) return ret; - variant = search_fs_key( fs, &key ); + variant = svga_search_shader_key(&fs->base, &key); if (!variant) { - ret = compile_fs( svga, fs, &key, &variant ); + ret = compile_fs(svga, fs, &key, &variant); if (ret != PIPE_OK) return ret; } @@ -401,22 +430,14 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty) assert(variant); if (variant != svga->state.hw_draw.fs) { - if (svga_have_gb_objects(svga)) { - ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, - variant->gb_shader); - if (ret != PIPE_OK) - return ret; + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant); + if (ret != PIPE_OK) + return ret; - svga->rebind.fs = FALSE; - } - else { - ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id); - if (ret != PIPE_OK) - return ret; - } + svga->rebind.flags.fs = FALSE; svga->dirty |= SVGA_NEW_FS_VARIANT; - svga->state.hw_draw.fs = variant; + svga->state.hw_draw.fs = variant; } return PIPE_OK; @@ -426,11 +447,16 @@ struct svga_tracked_state svga_hw_fs = { "fragment shader (hwtnl)", (SVGA_NEW_FS | + SVGA_NEW_GS | + SVGA_NEW_VS | SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_NEED_SWTNL | SVGA_NEW_RAST | + SVGA_NEW_STIPPLE | + SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_SAMPLER | SVGA_NEW_FRAME_BUFFER | + SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_BLEND), emit_hw_fs }; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c new file mode 100644 index 000000000..618bec248 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c @@ -0,0 +1,255 @@ +/********************************************************** + * Copyright 2014 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_bitmask.h" +#include "translate/translate.h" +#include "tgsi/tgsi_ureg.h" + +#include "svga_context.h" +#include "svga_cmd.h" +#include "svga_shader.h" +#include "svga_tgsi.h" +#include "svga_streamout.h" +#include "svga_format.h" + +/** + * If we fail to compile a geometry shader we'll use a dummy/fallback shader + * that simply emits the incoming vertices. + */ +static const struct tgsi_token * +get_dummy_geometry_shader(void) +{ + //XXX + return NULL; +} + + +static struct svga_shader_variant * +translate_geometry_program(struct svga_context *svga, + const struct svga_geometry_shader *gs, + const struct svga_compile_key *key) +{ + assert(svga_have_vgpu10(svga)); + return svga_tgsi_vgpu10_translate(svga, &gs->base, key, + PIPE_SHADER_GEOMETRY); +} + + +/** + * Translate TGSI shader into an svga shader variant. + */ +static enum pipe_error +compile_gs(struct svga_context *svga, + struct svga_geometry_shader *gs, + const struct svga_compile_key *key, + struct svga_shader_variant **out_variant) +{ + struct svga_shader_variant *variant; + enum pipe_error ret = PIPE_ERROR; + + variant = translate_geometry_program(svga, gs, key); + if (!variant) { + /* some problem during translation, try the dummy shader */ + const struct tgsi_token *dummy = get_dummy_geometry_shader(); + if (!dummy) { + return PIPE_ERROR_OUT_OF_MEMORY; + } + debug_printf("Failed to compile geometry shader, using dummy shader instead.\n"); + FREE((void *) gs->base.tokens); + gs->base.tokens = dummy; + variant = translate_geometry_program(svga, gs, key); + if (!variant) { + return PIPE_ERROR; + } + } + + ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_GS, variant); + if (ret != PIPE_OK) { + svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant); + return ret; + } + + *out_variant = variant; + + return PIPE_OK; +} + + +static void +make_gs_key(struct svga_context *svga, struct svga_compile_key *key) +{ + struct svga_geometry_shader *gs = svga->curr.gs; + + memset(key, 0, sizeof *key); + + /* + * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER + */ + svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, key); + + memcpy(key->generic_remap_table, gs->generic_remap_table, + sizeof(gs->generic_remap_table)); + + key->gs.vs_generic_outputs = svga->curr.vs->generic_outputs; + + key->gs.need_prescale = svga->state.hw_clear.prescale.enabled; + + key->gs.writes_psize = gs->base.info.writes_psize; + key->gs.wide_point = gs->wide_point; + key->sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable; + key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode + == PIPE_SPRITE_COORD_LOWER_LEFT); + + /* SVGA_NEW_RAST */ + key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable; +} + + +/** + * svga_reemit_gs_bindings - Reemit the geometry shader bindings + */ +enum pipe_error +svga_reemit_gs_bindings(struct svga_context *svga) +{ + enum pipe_error ret; + struct svga_winsys_gb_shader *gbshader = NULL; + SVGA3dShaderId shaderId = SVGA3D_INVALID_ID; + + assert(svga->rebind.flags.gs); + assert(svga_have_gb_objects(svga)); + + /* Geometry Shader is only supported in vgpu10 */ + assert(svga_have_vgpu10(svga)); + + if (svga->state.hw_draw.gs) { + gbshader = svga->state.hw_draw.gs->gb_shader; + shaderId = svga->state.hw_draw.gs->id; + } + + if (!svga_need_to_rebind_resources(svga)) { + ret = svga->swc->resource_rebind(svga->swc, NULL, gbshader, + SVGA_RELOC_READ); + goto out; + } + + ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_GS, + gbshader, shaderId); + + out: + if (ret != PIPE_OK) + return ret; + + svga->rebind.flags.gs = FALSE; + return PIPE_OK; +} + +static enum pipe_error +emit_hw_gs(struct svga_context *svga, unsigned dirty) +{ + struct svga_shader_variant *variant; + struct svga_geometry_shader *gs = svga->curr.gs; + enum pipe_error ret = PIPE_OK; + struct svga_compile_key key; + + /* If there's a user-defined GS, we should have a pointer to a derived + * GS. This should have been resolved in update_tgsi_transform(). + */ + if (svga->curr.user_gs) + assert(svga->curr.gs); + + if (!gs) { + if (svga->state.hw_draw.gs != NULL) { + + /** The previous geometry shader is made inactive. + * Needs to unbind the geometry shader. + */ + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL); + svga->state.hw_draw.gs = NULL; + } + return ret; + } + + /* If there is stream output info for this geometry shader, then use + * it instead of the one from the vertex shader. + */ + if (svga_have_gs_streamout(svga)) { + svga_set_stream_output(svga, gs->base.stream_output); + } + else if (!svga_have_vs_streamout(svga)) { + /* turn off stream out */ + svga_set_stream_output(svga, NULL); + } + + /* SVGA_NEW_NEED_SWTNL */ + if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) { + /* No geometry shader is needed */ + variant = NULL; + } + else { + make_gs_key(svga, &key); + + /* See if we already have a GS variant that matches the key */ + variant = svga_search_shader_key(&gs->base, &key); + + if (!variant) { + ret = compile_gs(svga, gs, &key, &variant); + if (ret != PIPE_OK) + return ret; + + /* insert the new variant at head of linked list */ + assert(variant); + variant->next = gs->base.variants; + gs->base.variants = variant; + } + } + + if (variant != svga->state.hw_draw.gs) { + /* Bind the new variant */ + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, variant); + if (ret != PIPE_OK) + return ret; + + svga->rebind.flags.gs = FALSE; + svga->dirty |= SVGA_NEW_GS_VARIANT; + svga->state.hw_draw.gs = variant; + } + + return PIPE_OK; +} + +struct svga_tracked_state svga_hw_gs = +{ + "geometry shader (hwtnl)", + (SVGA_NEW_VS | + SVGA_NEW_FS | + SVGA_NEW_GS | + SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_SAMPLER | + SVGA_NEW_RAST | + SVGA_NEW_NEED_SWTNL), + emit_hw_gs +}; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c b/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c index cac39d62f..b07c62da4 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -26,6 +26,7 @@ #include "util/u_inlines.h" #include "pipe/p_state.h" #include "svga_context.h" +#include "svga_shader.h" #include "svga_state.h" #include "svga_debug.h" #include "svga_hw_reg.h" @@ -61,6 +62,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty) { boolean need_pipeline = FALSE; struct svga_vertex_shader *vs = svga->curr.vs; + const char *reason = ""; /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE */ @@ -75,6 +77,20 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty) svga->curr.rast->need_pipeline_lines_str, svga->curr.rast->need_pipeline_points_str); need_pipeline = TRUE; + + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_POINTS: + reason = svga->curr.rast->need_pipeline_points_str; + break; + case PIPE_PRIM_LINES: + reason = svga->curr.rast->need_pipeline_lines_str; + break; + case PIPE_PRIM_TRIANGLES: + reason = svga->curr.rast->need_pipeline_tris_str; + break; + default: + assert(!"Unexpected reduced prim type"); + } } /* EDGEFLAGS @@ -82,6 +98,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty) if (vs && vs->base.info.writes_edgeflag) { SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__); need_pipeline = TRUE; + reason = "edge flags"; } /* SVGA_NEW_FS, SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE @@ -91,7 +108,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty) unsigned generic_inputs = svga->curr.fs ? svga->curr.fs->generic_inputs : 0; - if (sprite_coord_gen && + if (!svga_have_vgpu10(svga) && sprite_coord_gen && (generic_inputs & ~sprite_coord_gen)) { /* The fragment shader is using some generic inputs that are * not being replaced by auto-generated point/sprite coords (and @@ -103,6 +120,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty) * point stage. */ need_pipeline = TRUE; + reason = "point sprite coordinate generation"; } } @@ -115,6 +133,12 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty) if (0 && svga->state.sw.need_pipeline) debug_printf("sw.need_pipeline = %d\n", svga->state.sw.need_pipeline); + if (svga->state.sw.need_pipeline) { + assert(reason); + pipe_debug_message(&svga->debug.callback, FALLBACK, + "Using semi-fallback for %s", reason); + } + return PIPE_OK; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c b/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c index ebb98373e..d43894d71 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c @@ -23,16 +23,20 @@ * **********************************************************/ +#include "pipe/p_defines.h" +#include "util/u_bitmask.h" #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_memory.h" -#include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "svga_context.h" #include "svga_screen.h" #include "svga_state.h" #include "svga_cmd.h" +#include "svga_format.h" +#include "svga_shader.h" struct rs_queue { @@ -77,7 +81,7 @@ svga_queue_rs( struct rs_queue *q, * the "to" state. */ static enum pipe_error -emit_rss(struct svga_context *svga, unsigned dirty) +emit_rss_vgpu9(struct svga_context *svga, unsigned dirty) { struct svga_screen *screen = svga_screen(svga->pipe.screen); struct rs_queue queue; @@ -85,7 +89,7 @@ emit_rss(struct svga_context *svga, unsigned dirty) queue.rs_count = 0; - if (dirty & SVGA_NEW_BLEND) { + if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) { const struct svga_blend_state *curr = svga->curr.blend; EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail ); @@ -119,7 +123,7 @@ emit_rss(struct svga_context *svga, unsigned dirty) EMIT_RS( svga, color, BLENDCOLOR, fail ); } - if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) { + if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_RAST)) { const struct svga_depth_stencil_state *curr = svga->curr.depth; const struct svga_rasterizer_state *rast = svga->curr.rast; @@ -300,6 +304,151 @@ fail: return PIPE_ERROR_OUT_OF_MEMORY; } +/** Returns a non-culling rasterizer state object to be used with + * point sprite. + */ +static struct svga_rasterizer_state * +get_no_cull_rasterizer_state(struct svga_context *svga) +{ + const struct svga_rasterizer_state *r = svga->curr.rast; + unsigned int aa_point = r->templ.point_smooth; + + if (!svga->rasterizer_no_cull[aa_point]) { + struct pipe_rasterizer_state rast; + + memset(&rast, 0, sizeof(rast)); + rast.flatshade = 1; + rast.front_ccw = 1; + rast.point_smooth = r->templ.point_smooth; + + /* All rasterizer states have the same half_pixel_center, + * bottom_edge_rule and clip_halfz values since they are + * constant for a context. If we ever implement + * GL_ARB_clip_control, the clip_halfz field would have to be observed. + */ + rast.half_pixel_center = r->templ.half_pixel_center; + rast.bottom_edge_rule = r->templ.bottom_edge_rule; + rast.clip_halfz = r->templ.clip_halfz; + + svga->rasterizer_no_cull[aa_point] = + svga->pipe.create_rasterizer_state(&svga->pipe, &rast); + } + return svga->rasterizer_no_cull[aa_point]; +} + +static enum pipe_error +emit_rss_vgpu10(struct svga_context *svga, unsigned dirty) +{ + enum pipe_error ret = PIPE_OK; + + svga_hwtnl_flush_retry(svga); + + if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) { + const struct svga_blend_state *curr; + float blend_factor[4]; + + if (svga_has_any_integer_cbufs(svga)) { + /* Blending is not supported in integer-valued render targets. */ + curr = svga->noop_blend; + blend_factor[0] = + blend_factor[1] = + blend_factor[2] = + blend_factor[3] = 0; + } + else { + curr = svga->curr.blend; + + if (curr->blend_color_alpha) { + blend_factor[0] = + blend_factor[1] = + blend_factor[2] = + blend_factor[3] = svga->curr.blend_color.color[3]; + } + else { + blend_factor[0] = svga->curr.blend_color.color[0]; + blend_factor[1] = svga->curr.blend_color.color[1]; + blend_factor[2] = svga->curr.blend_color.color[2]; + blend_factor[3] = svga->curr.blend_color.color[3]; + } + } + + /* Set/bind the blend state object */ + if (svga->state.hw_draw.blend_id != curr->id || + svga->state.hw_draw.blend_factor[0] != blend_factor[0] || + svga->state.hw_draw.blend_factor[1] != blend_factor[1] || + svga->state.hw_draw.blend_factor[2] != blend_factor[2] || + svga->state.hw_draw.blend_factor[3] != blend_factor[3] || + svga->state.hw_draw.blend_sample_mask != svga->curr.sample_mask) { + ret = SVGA3D_vgpu10_SetBlendState(svga->swc, curr->id, + blend_factor, + svga->curr.sample_mask); + if (ret != PIPE_OK) + return ret; + + svga->state.hw_draw.blend_id = curr->id; + svga->state.hw_draw.blend_factor[0] = blend_factor[0]; + svga->state.hw_draw.blend_factor[1] = blend_factor[1]; + svga->state.hw_draw.blend_factor[2] = blend_factor[2]; + svga->state.hw_draw.blend_factor[3] = blend_factor[3]; + svga->state.hw_draw.blend_sample_mask = svga->curr.sample_mask; + } + } + + if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_STENCIL_REF)) { + const struct svga_depth_stencil_state *curr = svga->curr.depth; + unsigned curr_ref = svga->curr.stencil_ref.ref_value[0]; + + if (curr->id != svga->state.hw_draw.depth_stencil_id || + curr_ref != svga->state.hw_draw.stencil_ref) { + /* Set/bind the depth/stencil state object */ + ret = SVGA3D_vgpu10_SetDepthStencilState(svga->swc, curr->id, + curr_ref); + if (ret != PIPE_OK) + return ret; + + svga->state.hw_draw.depth_stencil_id = curr->id; + svga->state.hw_draw.stencil_ref = curr_ref; + } + } + + if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST)) { + const struct svga_rasterizer_state *rast; + + if (svga->curr.reduced_prim == PIPE_PRIM_POINTS && + svga->curr.gs && svga->curr.gs->wide_point) { + + /* If we are drawing a point sprite, we will need to + * bind a non-culling rasterizer state object + */ + rast = get_no_cull_rasterizer_state(svga); + } + else { + rast = svga->curr.rast; + } + + if (svga->state.hw_draw.rasterizer_id != rast->id) { + /* Set/bind the rasterizer state object */ + ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rast->id); + if (ret != PIPE_OK) + return ret; + svga->state.hw_draw.rasterizer_id = rast->id; + } + } + return PIPE_OK; +} + + +static enum pipe_error +emit_rss(struct svga_context *svga, unsigned dirty) +{ + if (svga_have_vgpu10(svga)) { + return emit_rss_vgpu10(svga, dirty); + } + else { + return emit_rss_vgpu9(svga, dirty); + } +} + struct svga_tracked_state svga_hw_rss = { @@ -307,11 +456,12 @@ struct svga_tracked_state svga_hw_rss = (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR | - SVGA_NEW_DEPTH_STENCIL | + SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_STENCIL_REF | SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | - SVGA_NEW_NEED_PIPELINE), + SVGA_NEW_NEED_PIPELINE | + SVGA_NEW_REDUCED_PRIMITIVE), emit_rss }; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c b/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c new file mode 100644 index 000000000..e7b540cc7 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c @@ -0,0 +1,349 @@ +/* + * Copyright 2013 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +/** + * VGPU10 sampler and sampler view functions. + */ + + +#include "pipe/p_defines.h" +#include "util/u_bitmask.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_cmd.h" +#include "svga_context.h" +#include "svga_format.h" +#include "svga_resource_buffer.h" +#include "svga_resource_texture.h" +#include "svga_shader.h" +#include "svga_state.h" +#include "svga_sampler_view.h" + + +/** Get resource handle for a texture or buffer */ +static inline struct svga_winsys_surface * +svga_resource_handle(struct pipe_resource *res) +{ + if (res->target == PIPE_BUFFER) { + return svga_buffer(res)->handle; + } + else { + return svga_texture(res)->handle; + } +} + + +/** + * This helper function returns TRUE if the specified resource collides with + * any of the resources bound to any of the currently bound sampler views. + */ +boolean +svga_check_sampler_view_resource_collision(struct svga_context *svga, + struct svga_winsys_surface *res, + unsigned shader) +{ + struct pipe_screen *screen = svga->pipe.screen; + unsigned i; + + if (svga_screen(screen)->debug.no_surface_view) { + return FALSE; + } + + for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) { + struct svga_pipe_sampler_view *sv = + svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]); + + if (sv && res == svga_resource_handle(sv->base.texture)) { + return TRUE; + } + } + + return FALSE; +} + + +/** + * Create a DX ShaderResourceSamplerView for the given pipe_sampler_view, + * if needed. + */ +enum pipe_error +svga_validate_pipe_sampler_view(struct svga_context *svga, + struct svga_pipe_sampler_view *sv) +{ + enum pipe_error ret = PIPE_OK; + + if (sv->id == SVGA3D_INVALID_ID) { + struct svga_screen *ss = svga_screen(svga->pipe.screen); + struct pipe_resource *texture = sv->base.texture; + struct svga_winsys_surface *surface = svga_resource_handle(texture); + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDim; + SVGA3dShaderResourceViewDesc viewDesc; + + format = svga_translate_format(ss, sv->base.format, + PIPE_BIND_SAMPLER_VIEW); + assert(format != SVGA3D_FORMAT_INVALID); + + /* Convert the format to a sampler-friendly format, if needed */ + format = svga_sampler_format(format); + + if (texture->target == PIPE_BUFFER) { + viewDesc.buffer.firstElement = sv->base.u.buf.first_element; + viewDesc.buffer.numElements = (sv->base.u.buf.last_element - + sv->base.u.buf.first_element + 1); + } + else { + viewDesc.tex.mostDetailedMip = sv->base.u.tex.first_level; + viewDesc.tex.firstArraySlice = sv->base.u.tex.first_layer; + viewDesc.tex.mipLevels = (sv->base.u.tex.last_level - + sv->base.u.tex.first_level + 1); + } + + /* arraySize in viewDesc specifies the number of array slices in a + * texture array. For 3D texture, last_layer in + * pipe_sampler_view specifies the last slice of the texture + * which is different from the last slice in a texture array, + * hence we need to set arraySize to 1 explicitly. + */ + viewDesc.tex.arraySize = + (texture->target == PIPE_TEXTURE_3D || + texture->target == PIPE_BUFFER) ? 1 : + (sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1); + + switch (texture->target) { + case PIPE_BUFFER: + resourceDim = SVGA3D_RESOURCE_BUFFER; + break; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + resourceDim = SVGA3D_RESOURCE_TEXTURE1D; + break; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + resourceDim = SVGA3D_RESOURCE_TEXTURE2D; + break; + case PIPE_TEXTURE_3D: + resourceDim = SVGA3D_RESOURCE_TEXTURE3D; + break; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + resourceDim = SVGA3D_RESOURCE_TEXTURECUBE; + break; + + default: + assert(!"Unexpected texture type"); + resourceDim = SVGA3D_RESOURCE_TEXTURE2D; + } + + sv->id = util_bitmask_add(svga->sampler_view_id_bm); + + ret = SVGA3D_vgpu10_DefineShaderResourceView(svga->swc, + sv->id, + surface, + format, + resourceDim, + &viewDesc); + if (ret != PIPE_OK) { + util_bitmask_clear(svga->sampler_view_id_bm, sv->id); + sv->id = SVGA3D_INVALID_ID; + } + } + + return ret; +} + + +static enum pipe_error +update_sampler_resources(struct svga_context *svga, unsigned dirty) +{ + enum pipe_error ret = PIPE_OK; + unsigned shader; + + if (!svga_have_vgpu10(svga)) + return PIPE_OK; + + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS]; + struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS]; + unsigned count; + unsigned nviews; + unsigned i; + + count = svga->curr.num_sampler_views[shader]; + for (i = 0; i < count; i++) { + struct svga_pipe_sampler_view *sv = + svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]); + struct svga_winsys_surface *surface; + + if (sv) { + surface = svga_resource_handle(sv->base.texture); + + ret = svga_validate_pipe_sampler_view(svga, sv); + if (ret != PIPE_OK) + return ret; + + assert(sv->id != SVGA3D_INVALID_ID); + ids[i] = sv->id; + } + else { + surface = NULL; + ids[i] = SVGA3D_INVALID_ID; + } + surfaces[i] = surface; + } + + for (; i < Elements(ids); i++) { + ids[i] = SVGA3D_INVALID_ID; + surfaces[i] = NULL; + } + + if (shader == PIPE_SHADER_FRAGMENT) { + /* Handle polygon stipple sampler view */ + if (svga->curr.rast->templ.poly_stipple_enable) { + const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + struct svga_pipe_sampler_view *sv = + svga->polygon_stipple.sampler_view; + + assert(sv); + if (!sv) { + return PIPE_OK; /* probably out of memory */ + } + + ret = svga_validate_pipe_sampler_view(svga, sv); + if (ret != PIPE_OK) + return ret; + + ids[unit] = sv->id; + surfaces[unit] = svga_resource_handle(sv->base.texture); + count = MAX2(count, unit+1); + } + } + + /* Number of ShaderResources that need to be modified. This includes + * the one that need to be unbound. + */ + nviews = MAX2(svga->state.hw_draw.num_sampler_views[shader], count); + if (nviews > 0) { + ret = SVGA3D_vgpu10_SetShaderResources(svga->swc, + svga_shader_type(shader), + 0, /* startView */ + nviews, + ids, + surfaces); + if (ret != PIPE_OK) + return ret; + } + + /* Number of sampler views enabled in the device */ + svga->state.hw_draw.num_sampler_views[shader] = count; + } + + return ret; +} + + +struct svga_tracked_state svga_hw_sampler_bindings = { + "shader resources emit", + SVGA_NEW_STIPPLE | + SVGA_NEW_TEXTURE_BINDING, + update_sampler_resources +}; + + + +static enum pipe_error +update_samplers(struct svga_context *svga, unsigned dirty ) +{ + enum pipe_error ret = PIPE_OK; + unsigned shader; + + if (!svga_have_vgpu10(svga)) + return PIPE_OK; + + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + const unsigned count = svga->curr.num_samplers[shader]; + SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS]; + unsigned i; + + for (i = 0; i < count; i++) { + if (svga->curr.sampler[shader][i]) { + ids[i] = svga->curr.sampler[shader][i]->id; + assert(ids[i] != SVGA3D_INVALID_ID); + } + else { + ids[i] = SVGA3D_INVALID_ID; + } + } + + if (count > 0) { + if (count != svga->state.hw_draw.num_samplers[shader] || + memcmp(ids, svga->state.hw_draw.samplers[shader], + count * sizeof(ids[0])) != 0) { + /* HW state is really changing */ + ret = SVGA3D_vgpu10_SetSamplers(svga->swc, + count, + 0, /* start */ + svga_shader_type(shader), /* type */ + ids); + if (ret != PIPE_OK) + return ret; + memcpy(svga->state.hw_draw.samplers[shader], ids, + count * sizeof(ids[0])); + svga->state.hw_draw.num_samplers[shader] = count; + } + } + } + + /* Handle polygon stipple sampler texture */ + if (svga->curr.rast->templ.poly_stipple_enable) { + const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + struct svga_sampler_state *sampler = svga->polygon_stipple.sampler; + + assert(sampler); + if (!sampler) { + return PIPE_OK; /* probably out of memory */ + } + + ret = SVGA3D_vgpu10_SetSamplers(svga->swc, + 1, /* count */ + unit, /* start */ + SVGA3D_SHADERTYPE_PS, + &sampler->id); + } + + return ret; +} + + +struct svga_tracked_state svga_hw_sampler = { + "texture sampler emit", + (SVGA_NEW_SAMPLER | + SVGA_NEW_STIPPLE | + SVGA_NEW_TEXTURE_FLAGS), + update_samplers +}; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_tgsi_transform.c b/lib/mesa/src/gallium/drivers/svga/svga_state_tgsi_transform.c new file mode 100644 index 000000000..9e643ff49 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_tgsi_transform.c @@ -0,0 +1,293 @@ +/********************************************************** + * Copyright 2014 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_bitmask.h" +#include "util/u_simple_shaders.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_point_sprite.h" +#include "tgsi/tgsi_dump.h" + +#include "svga_context.h" +#include "svga_shader.h" +#include "svga_tgsi.h" + + +/** + * Bind a new GS. This updates the derived current gs state, not the + * user-specified GS state. + */ +static void +bind_gs_state(struct svga_context *svga, + struct svga_geometry_shader *gs) +{ + svga->curr.gs = gs; + svga->dirty |= SVGA_NEW_GS; +} + + +/** + * emulate_point_sprite searches the shader variants list to see it there is + * a shader variant with a token string that matches the emulation + * requirement. It there isn't, then it will use a tgsi utility + * tgsi_add_point_sprite to transform the original token string to support + * point sprite. A new geometry shader state will be created with the + * transformed token string and added to the shader variants list of the + * original geometry shader. The new geometry shader state will then be + * bound as the current geometry shader. + */ +static struct svga_shader * +emulate_point_sprite(struct svga_context *svga, + struct svga_shader *shader, + const struct tgsi_token *tokens) +{ + struct svga_token_key key; + struct tgsi_token *new_tokens; + const struct tgsi_token *orig_tokens; + struct svga_geometry_shader *orig_gs = (struct svga_geometry_shader *)shader; + struct svga_geometry_shader *gs = NULL; + struct pipe_shader_state templ; + struct svga_stream_output *streamout = NULL; + int pos_out_index = -1; + int aa_point_coord_index = -1; + + assert(tokens != NULL); + + orig_tokens = tokens; + + /* Create a token key */ + memset(&key, 0, sizeof key); + key.gs.writes_psize = 1; + key.gs.sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable; + + key.gs.sprite_origin_upper_left = + !(svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); + + key.gs.aa_point = svga->curr.rast->templ.point_smooth; + + if (orig_gs) { + + /* Check if the original geometry shader has stream output and + * if position is one of the outputs. + */ + streamout = orig_gs->base.stream_output; + if (streamout) { + pos_out_index = streamout->pos_out_index; + key.gs.point_pos_stream_out = pos_out_index != -1; + } + + /* Search the shader lists to see if there is a variant that matches + * this token key. + */ + gs = (struct svga_geometry_shader *) + svga_search_shader_token_key(&orig_gs->base, &key); + } + + /* If there isn't, then call the tgsi utility tgsi_add_point_sprite + * to transform the original tokens to support point sprite. + * Flip the sprite origin as SVGA3D device only supports an + * upper-left origin. + */ + if (!gs) { + new_tokens = tgsi_add_point_sprite(orig_tokens, + key.gs.sprite_coord_enable, + key.gs.sprite_origin_upper_left, + key.gs.point_pos_stream_out, + key.gs.aa_point ? + &aa_point_coord_index : NULL); + + if (!new_tokens) { + /* if no new tokens are generated for whatever reason, just return */ + return NULL; + } + + if (0) { + debug_printf("Before tgsi_add_point_sprite ---------------\n"); + tgsi_dump(orig_tokens, 0); + debug_printf("After tgsi_add_point_sprite --------------\n"); + tgsi_dump(new_tokens, 0); + } + + templ.tokens = new_tokens; + templ.stream_output.num_outputs = 0; + + if (streamout) { + templ.stream_output = streamout->info; + /* The tgsi_add_point_sprite utility adds an extra output + * for the original point position for stream output purpose. + * We need to replace the position output register index in the + * stream output declaration with the new register index. + */ + if (pos_out_index != -1) { + assert(orig_gs != NULL); + templ.stream_output.output[pos_out_index].register_index = + orig_gs->base.info.num_outputs; + } + } + + /* Create a new geometry shader state with the new tokens */ + gs = svga->pipe.create_gs_state(&svga->pipe, &templ); + + /* Don't need the token string anymore. There is a local copy + * in the shader state. + */ + FREE(new_tokens); + + if (!gs) { + return NULL; + } + + gs->wide_point = TRUE; + gs->aa_point_coord_index = aa_point_coord_index; + gs->base.token_key = key; + gs->base.parent = &orig_gs->base; + gs->base.next = NULL; + + /* Add the new geometry shader to the head of the shader list + * pointed to by the original geometry shader. + */ + if (orig_gs) { + gs->base.next = orig_gs->base.next; + orig_gs->base.next = &gs->base; + } + } + + /* Bind the new geometry shader state */ + bind_gs_state(svga, gs); + + return &gs->base; +} + +/** + * Generate a geometry shader that emits a wide point by drawing a quad. + * This function first creates a passthrough geometry shader and then + * calls emulate_point_sprite() to transform the geometry shader to + * support point sprite. + */ +static struct svga_shader * +add_point_sprite_shader(struct svga_context *svga) +{ + struct svga_vertex_shader *vs = svga->curr.vs; + struct svga_geometry_shader *orig_gs = vs->gs; + struct svga_geometry_shader *new_gs; + const struct tgsi_token *tokens; + + if (orig_gs == NULL) { + + /* If this is the first time adding a geometry shader to this + * vertex shader to support point sprite, then create + * a passthrough geometry shader first. + */ + orig_gs = (struct svga_geometry_shader *) + util_make_geometry_passthrough_shader( + &svga->pipe, vs->base.info.num_outputs, + vs->base.info.output_semantic_name, + vs->base.info.output_semantic_index); + + if (!orig_gs) + return NULL; + } + else { + if (orig_gs->base.parent) + orig_gs = (struct svga_geometry_shader *)orig_gs->base.parent; + } + tokens = orig_gs->base.tokens; + + /* Call emulate_point_sprite to find or create a transformed + * geometry shader for supporting point sprite. + */ + new_gs = (struct svga_geometry_shader *) + emulate_point_sprite(svga, &orig_gs->base, tokens); + + /* If this is the first time creating a geometry shader to + * support vertex point size, then add the new geometry shader + * to the vertex shader. + */ + if (vs->gs == NULL) { + vs->gs = new_gs; + } + + return &new_gs->base; +} + +/* update_tgsi_transform provides a hook to transform a shader if needed. + */ +static enum pipe_error +update_tgsi_transform(struct svga_context *svga, unsigned dirty) +{ + struct svga_geometry_shader *gs = svga->curr.user_gs; /* current gs */ + struct svga_vertex_shader *vs = svga->curr.vs; /* currently bound vs */ + struct svga_shader *orig_gs; /* original gs */ + struct svga_shader *new_gs; /* new gs */ + + if (!svga_have_vgpu10(svga)) + return PIPE_OK; + + if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) { + /* If the current prim type is POINTS and the current geometry shader + * emits wide points, transform the shader to emulate wide points using + * quads. + */ + if (gs != NULL && (gs->base.info.writes_psize || gs->wide_point)) { + orig_gs = gs->base.parent ? gs->base.parent : &gs->base; + new_gs = emulate_point_sprite(svga, orig_gs, orig_gs->tokens); + } + + /* If there is not an active geometry shader and the current vertex + * shader emits wide point then create a new geometry shader to emulate + * wide point. + */ + else if (gs == NULL && + (svga->curr.rast->pointsize > 1.0 || + vs->base.info.writes_psize)) { + new_gs = add_point_sprite_shader(svga); + } + else { + /* use the user's GS */ + bind_gs_state(svga, svga->curr.user_gs); + } + } + else if (svga->curr.gs != svga->curr.user_gs) { + /* If current primitive type is not POINTS, then make sure + * we don't bind to any of the generated geometry shader + */ + bind_gs_state(svga, svga->curr.user_gs); + } + (void) new_gs; /* silence the unused var warning */ + + return PIPE_OK; +} + +struct svga_tracked_state svga_need_tgsi_transform = +{ + "transform shader for optimization", + (SVGA_NEW_VS | + SVGA_NEW_FS | + SVGA_NEW_GS | + SVGA_NEW_REDUCED_PRIMITIVE | + SVGA_NEW_RAST), + update_tgsi_transform +}; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_tss.c b/lib/mesa/src/gallium/drivers/svga/svga_state_tss.c index 41334bd7c..4debbf166 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_tss.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_tss.c @@ -31,24 +31,28 @@ #include "svga_sampler_view.h" #include "svga_winsys.h" #include "svga_context.h" +#include "svga_shader.h" #include "svga_state.h" #include "svga_cmd.h" +/** + * Called when tearing down a context to free resources and samplers. + */ void svga_cleanup_tss_binding(struct svga_context *svga) { + const unsigned shader = PIPE_SHADER_FRAGMENT; unsigned i; - unsigned count = MAX2( svga->curr.num_sampler_views, - svga->state.hw_draw.num_views ); - for (i = 0; i < count; i++) { + for (i = 0; i < Elements(svga->state.hw_draw.views); i++) { struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; - - svga_sampler_view_reference(&view->v, NULL); - pipe_sampler_view_release(&svga->pipe, &svga->curr.sampler_views[i]); - pipe_resource_reference( &view->texture, NULL ); - - view->dirty = 1; + if (view) { + svga_sampler_view_reference(&view->v, NULL); + pipe_sampler_view_release(&svga->pipe, + &svga->curr.sampler_views[shader][i]); + pipe_resource_reference(&view->texture, NULL); + view->dirty = TRUE; + } } } @@ -63,73 +67,113 @@ struct bind_queue { }; +/** + * Update the texture binding for one texture unit. + */ +static void +emit_tex_binding_unit(struct svga_context *svga, + unsigned unit, + const struct svga_sampler_state *s, + const struct pipe_sampler_view *sv, + struct svga_hw_view_state *view, + boolean reemit, + struct bind_queue *queue) +{ + struct pipe_resource *texture = NULL; + unsigned last_level, min_lod, max_lod; + + /* get min max lod */ + if (sv && s) { + if (s->mipfilter == SVGA3D_TEX_FILTER_NONE) { + /* just use the base level image */ + min_lod = max_lod = sv->u.tex.first_level; + } + else { + last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level); + min_lod = s->view_min_lod + sv->u.tex.first_level; + min_lod = MIN2(min_lod, last_level); + max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level); + } + texture = sv->texture; + } + else { + min_lod = 0; + max_lod = 0; + } + + if (view->texture != texture || + view->min_lod != min_lod || + view->max_lod != max_lod) { + + svga_sampler_view_reference(&view->v, NULL); + pipe_resource_reference( &view->texture, texture ); + + view->dirty = TRUE; + view->min_lod = min_lod; + view->max_lod = max_lod; + + if (texture) { + view->v = svga_get_tex_sampler_view(&svga->pipe, + texture, + min_lod, + max_lod); + } + } + + /* + * We need to reemit non-null texture bindings, even when they are not + * dirty, to ensure that the resources are paged in. + */ + if (view->dirty || (reemit && view->v)) { + queue->bind[queue->bind_count].unit = unit; + queue->bind[queue->bind_count].view = view; + queue->bind_count++; + } + + if (!view->dirty && view->v) { + svga_validate_sampler_view(svga, view->v); + } +} + + static enum pipe_error update_tss_binding(struct svga_context *svga, unsigned dirty ) { - boolean reemit = svga->rebind.texture_samplers; + const unsigned shader = PIPE_SHADER_FRAGMENT; + boolean reemit = svga->rebind.flags.texture_samplers; unsigned i; - unsigned count = MAX2( svga->curr.num_sampler_views, + unsigned count = MAX2( svga->curr.num_sampler_views[shader], svga->state.hw_draw.num_views ); - unsigned min_lod; - unsigned max_lod; struct bind_queue queue; + if (svga_have_vgpu10(svga)) + return PIPE_OK; + queue.bind_count = 0; for (i = 0; i < count; i++) { - const struct svga_sampler_state *s = svga->curr.sampler[i]; - struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; - struct pipe_resource *texture = NULL; - struct pipe_sampler_view *sv = svga->curr.sampler_views[i]; - - /* get min max lod */ - if (sv && s) { - min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level)); - max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, - sv->texture->last_level); - texture = sv->texture; - } else { - min_lod = 0; - max_lod = 0; - } - - if (view->texture != texture || - view->min_lod != min_lod || - view->max_lod != max_lod) { - - svga_sampler_view_reference(&view->v, NULL); - pipe_resource_reference( &view->texture, texture ); - - view->dirty = TRUE; - view->min_lod = min_lod; - view->max_lod = max_lod; - - if (texture) - view->v = svga_get_tex_sampler_view(&svga->pipe, - texture, - min_lod, - max_lod); - } - - /* - * We need to reemit non-null texture bindings, even when they are not - * dirty, to ensure that the resources are paged in. - */ - - if (view->dirty || - (reemit && view->v)) { - queue.bind[queue.bind_count].unit = i; - queue.bind[queue.bind_count].view = view; - queue.bind_count++; - } - if (!view->dirty && view->v) { - svga_validate_sampler_view(svga, view->v); - } + emit_tex_binding_unit(svga, i, + svga->curr.sampler[shader][i], + svga->curr.sampler_views[shader][i], + &svga->state.hw_draw.views[i], + reemit, + &queue); } - svga->state.hw_draw.num_views = svga->curr.num_sampler_views; + svga->state.hw_draw.num_views = svga->curr.num_sampler_views[shader]; + + /* Polygon stipple */ + if (svga->curr.rast->templ.poly_stipple_enable) { + const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + emit_tex_binding_unit(svga, unit, + svga->polygon_stipple.sampler, + &svga->polygon_stipple.sampler_view->base, + &svga->state.hw_draw.views[unit], + reemit, + &queue); + } if (queue.bind_count) { SVGA3dTextureState *ts; @@ -163,7 +207,7 @@ update_tss_binding(struct svga_context *svga, SVGA_FIFOCommitAll( svga->swc ); } - svga->rebind.texture_samplers = FALSE; + svga->rebind.flags.texture_samplers = FALSE; return PIPE_OK; @@ -187,7 +231,8 @@ svga_reemit_tss_bindings(struct svga_context *svga) enum pipe_error ret; struct bind_queue queue; - assert(svga->rebind.texture_samplers); + assert(!svga_have_vgpu10(svga)); + assert(svga->rebind.flags.texture_samplers); queue.bind_count = 0; @@ -201,6 +246,18 @@ svga_reemit_tss_bindings(struct svga_context *svga) } } + /* Polygon stipple */ + if (svga->curr.rast->templ.poly_stipple_enable) { + const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + struct svga_hw_view_state *view = &svga->state.hw_draw.views[unit]; + + if (view->v) { + queue.bind[queue.bind_count].unit = unit; + queue.bind[queue.bind_count].view = view; + queue.bind_count++; + } + } + if (queue.bind_count) { SVGA3dTextureState *ts; @@ -229,7 +286,7 @@ svga_reemit_tss_bindings(struct svga_context *svga) SVGA_FIFOCommitAll(svga->swc); } - svga->rebind.texture_samplers = FALSE; + svga->rebind.flags.texture_samplers = FALSE; return PIPE_OK; } @@ -238,6 +295,7 @@ svga_reemit_tss_bindings(struct svga_context *svga) struct svga_tracked_state svga_hw_tss_binding = { "texture binding emit", SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_STIPPLE | SVGA_NEW_SAMPLER, update_tss_binding }; @@ -252,78 +310,98 @@ struct ts_queue { }; -#define EMIT_TS(svga, unit, val, token, fail) \ +static inline void +svga_queue_tss( struct ts_queue *q, + unsigned unit, + unsigned tss, + unsigned value ) +{ + assert(q->ts_count < ARRAY_SIZE(q->ts)); + q->ts[q->ts_count].stage = unit; + q->ts[q->ts_count].name = tss; + q->ts[q->ts_count].value = value; + q->ts_count++; +} + + +#define EMIT_TS(svga, unit, val, token) \ do { \ assert(unit < Elements(svga->state.hw_draw.ts)); \ assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit])); \ if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \ - svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \ + svga_queue_tss( queue, unit, SVGA3D_TS_##token, val ); \ svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \ } \ } while (0) -#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail) \ +#define EMIT_TS_FLOAT(svga, unit, fvalue, token) \ do { \ unsigned val = fui(fvalue); \ assert(unit < Elements(svga->state.hw_draw.ts)); \ assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit])); \ if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \ - svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \ + svga_queue_tss( queue, unit, SVGA3D_TS_##token, val ); \ svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \ } \ } while (0) -static inline void -svga_queue_tss( struct ts_queue *q, - unsigned unit, - unsigned tss, - unsigned value ) +/** + * Emit texture sampler state (tss) for one texture unit. + */ +static void +emit_tss_unit(struct svga_context *svga, unsigned unit, + const struct svga_sampler_state *state, + struct ts_queue *queue) { - assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0])); - q->ts[q->ts_count].stage = unit; - q->ts[q->ts_count].name = tss; - q->ts[q->ts_count].value = value; - q->ts_count++; + EMIT_TS(svga, unit, state->mipfilter, MIPFILTER); + EMIT_TS(svga, unit, state->min_lod, TEXTURE_MIPMAP_LEVEL); + EMIT_TS(svga, unit, state->magfilter, MAGFILTER); + EMIT_TS(svga, unit, state->minfilter, MINFILTER); + EMIT_TS(svga, unit, state->aniso_level, TEXTURE_ANISOTROPIC_LEVEL); + EMIT_TS_FLOAT(svga, unit, state->lod_bias, TEXTURE_LOD_BIAS); + EMIT_TS(svga, unit, state->addressu, ADDRESSU); + EMIT_TS(svga, unit, state->addressw, ADDRESSW); + EMIT_TS(svga, unit, state->bordercolor, BORDERCOLOR); + // TEXCOORDINDEX -- hopefully not needed + + if (svga->curr.tex_flags.flag_1d & (1 << unit)) + EMIT_TS(svga, unit, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV); + else + EMIT_TS(svga, unit, state->addressv, ADDRESSV); + + if (svga->curr.tex_flags.flag_srgb & (1 << unit)) + EMIT_TS_FLOAT(svga, unit, 2.2f, GAMMA); + else + EMIT_TS_FLOAT(svga, unit, 1.0f, GAMMA); } - static enum pipe_error update_tss(struct svga_context *svga, unsigned dirty ) { + const unsigned shader = PIPE_SHADER_FRAGMENT; unsigned i; struct ts_queue queue; - queue.ts_count = 0; - for (i = 0; i < svga->curr.num_samplers; i++) { - if (svga->curr.sampler[i]) { - const struct svga_sampler_state *curr = svga->curr.sampler[i]; - - EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail); - EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail); - EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail); - EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail); - EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail); - EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail); - EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail); - EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail); - EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail); - // TEXCOORDINDEX -- hopefully not needed - - if (svga->curr.tex_flags.flag_1d & (1 << i)) { - EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail); - } - else - EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail); - - if (svga->curr.tex_flags.flag_srgb & (1 << i)) - EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail); - else - EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail); + if (svga_have_vgpu10(svga)) + return PIPE_OK; + queue.ts_count = 0; + for (i = 0; i < svga->curr.num_samplers[shader]; i++) { + if (svga->curr.sampler[shader][i]) { + const struct svga_sampler_state *curr = svga->curr.sampler[shader][i]; + emit_tss_unit(svga, i, curr, &queue); } } + + /* polygon stipple sampler */ + if (svga->curr.rast->templ.poly_stipple_enable) { + emit_tss_unit(svga, + svga->state.hw_draw.fs->pstipple_sampler_unit, + svga->polygon_stipple.sampler, + &queue); + } if (queue.ts_count) { SVGA3dTextureState *ts; @@ -357,6 +435,7 @@ fail: struct svga_tracked_state svga_hw_tss = { "texture state emit", (SVGA_NEW_SAMPLER | + SVGA_NEW_STIPPLE | SVGA_NEW_TEXTURE_FLAGS), update_tss }; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_vdecl.c b/lib/mesa/src/gallium/drivers/svga/svga_state_vdecl.c index a33eda383..e1b6a1c2a 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_vdecl.c @@ -33,6 +33,7 @@ #include "svga_draw.h" #include "svga_tgsi.h" #include "svga_screen.h" +#include "svga_shader.h" #include "svga_resource_buffer.h" #include "svga_hw_reg.h" @@ -42,16 +43,14 @@ static enum pipe_error emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty) { const struct pipe_vertex_element *ve = svga->curr.velems->velem; + SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX]; + unsigned buffer_indexes[SVGA3D_INPUTREG_MAX]; unsigned i; unsigned neg_bias = 0; assert(svga->curr.velems->count >= svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]); - /* specify number of vertex element declarations to come */ - svga_hwtnl_reset_vdecl( svga->hwtnl, - svga->curr.velems->count ); - /** * We can't set the VDECL offset to something negative, so we * must calculate a common negative additional index bias, and modify @@ -70,15 +69,16 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty) for (i = 0; i < svga->curr.velems->count; i++) { const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; - const struct svga_buffer *buffer; + struct svga_buffer *buffer; unsigned int offset = vb->buffer_offset + ve[i].src_offset; + unsigned tmp_neg_bias = 0; if (!vb->buffer) continue; buffer = svga_buffer(vb->buffer); if (buffer->uploaded.start > offset) { - unsigned tmp_neg_bias = buffer->uploaded.start - offset; + tmp_neg_bias = buffer->uploaded.start - offset; if (vb->stride) tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride; neg_bias = MAX2(neg_bias, tmp_neg_bias); @@ -89,8 +89,7 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty) const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; unsigned usage, index; - const struct svga_buffer *buffer; - SVGA3dVertexDecl decl; + struct svga_buffer *buffer; if (!vb->buffer) continue; @@ -100,29 +99,37 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty) /* SVGA_NEW_VELEMENT */ - decl.identity.type = svga->curr.velems->decl_type[i]; - decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT; - decl.identity.usage = usage; - decl.identity.usageIndex = index; - decl.array.stride = vb->stride; + decls[i].identity.type = svga->curr.velems->decl_type[i]; + decls[i].identity.method = SVGA3D_DECLMETHOD_DEFAULT; + decls[i].identity.usage = usage; + decls[i].identity.usageIndex = index; + decls[i].array.stride = vb->stride; /* Compensate for partially uploaded vbo, and * for the negative index bias. */ - decl.array.offset = (vb->buffer_offset + decls[i].array.offset = (vb->buffer_offset + ve[i].src_offset + neg_bias * vb->stride - buffer->uploaded.start); - assert(decl.array.offset >= 0); + assert(decls[i].array.offset >= 0); + + buffer_indexes[i] = ve[i].vertex_buffer_index; - svga_hwtnl_vdecl( svga->hwtnl, - i, - &decl, - buffer->uploaded.buffer ? buffer->uploaded.buffer : - vb->buffer ); + assert(!buffer->uploaded.buffer); } + svga_hwtnl_vertex_decls(svga->hwtnl, + svga->curr.velems->count, + decls, + buffer_indexes, + svga->curr.velems->id); + + svga_hwtnl_vertex_buffers(svga->hwtnl, + svga->curr.num_vertex_buffers, + svga->curr.vb); + svga_hwtnl_set_index_bias( svga->hwtnl, -(int) neg_bias ); return PIPE_OK; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c index c2a0f1ee6..a103dab25 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c @@ -25,7 +25,6 @@ #include "util/u_inlines.h" #include "pipe/p_defines.h" -#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_bitmask.h" @@ -41,33 +40,6 @@ #include "svga_hw_reg.h" -static inline int -compare_vs_keys(const struct svga_vs_compile_key *a, - const struct svga_vs_compile_key *b) -{ - unsigned keysize = svga_vs_key_size( a ); - return memcmp( a, b, keysize ); -} - - -/** Search for a vertex shader variant */ -static struct svga_shader_variant * -search_vs_key(const struct svga_vertex_shader *vs, - const struct svga_vs_compile_key *key) -{ - struct svga_shader_variant *variant = vs->base.variants; - - assert(key); - - for ( ; variant; variant = variant->next) { - if (compare_vs_keys( key, &variant->key.vkey ) == 0) - return variant; - } - - return NULL; -} - - /** * If we fail to compile a vertex shader we'll use a dummy/fallback shader * that simply emits a (0,0,0,1) vertex position. @@ -99,13 +71,30 @@ get_dummy_vertex_shader(void) } +static struct svga_shader_variant * +translate_vertex_program(struct svga_context *svga, + const struct svga_vertex_shader *vs, + const struct svga_compile_key *key) +{ + if (svga_have_vgpu10(svga)) { + return svga_tgsi_vgpu10_translate(svga, &vs->base, key, + PIPE_SHADER_VERTEX); + } + else { + return svga_tgsi_vgpu9_translate(svga, &vs->base, key, + PIPE_SHADER_VERTEX); + } +} + + /** * Replace the given shader's instruction with a simple / dummy shader. * We use this when normal shader translation fails. */ static struct svga_shader_variant * -get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs, - const struct svga_vs_compile_key *key) +get_compiled_dummy_vertex_shader(struct svga_context *svga, + struct svga_vertex_shader *vs, + const struct svga_compile_key *key) { const struct tgsi_token *dummy = get_dummy_vertex_shader(); struct svga_shader_variant *variant; @@ -117,7 +106,7 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs, FREE((void *) vs->base.tokens); vs->base.tokens = dummy; - variant = svga_translate_vertex_program(vs, key); + variant = translate_vertex_program(svga, vs, key); return variant; } @@ -128,69 +117,90 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs, static enum pipe_error compile_vs(struct svga_context *svga, struct svga_vertex_shader *vs, - const struct svga_vs_compile_key *key, + const struct svga_compile_key *key, struct svga_shader_variant **out_variant) { struct svga_shader_variant *variant; enum pipe_error ret = PIPE_ERROR; - variant = svga_translate_vertex_program( vs, key ); + variant = translate_vertex_program(svga, vs, key); if (variant == NULL) { - /* some problem during translation, try the dummy shader */ - variant = get_compiled_dummy_vertex_shader(vs, key); - if (!variant) { - ret = PIPE_ERROR; - goto fail; - } + debug_printf("Failed to compile vertex shader," + " using dummy shader instead.\n"); + variant = get_compiled_dummy_vertex_shader(svga, vs, key); } - - if (svga_shader_too_large(svga, variant)) { + else if (svga_shader_too_large(svga, variant)) { /* too big, use dummy shader */ - debug_printf("Shader too large (%lu bytes)," + debug_printf("Shader too large (%u bytes)," " using dummy shader instead.\n", - (unsigned long ) variant->nr_tokens - * sizeof(variant->tokens[0])); - variant = get_compiled_dummy_vertex_shader(vs, key); - if (!variant) { - ret = PIPE_ERROR; - goto fail; - } + (unsigned) (variant->nr_tokens + * sizeof(variant->tokens[0]))); + /* Free the too-large variant */ + svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant); + /* Use simple pass-through shader instead */ + variant = get_compiled_dummy_vertex_shader(svga, vs, key); + } + + if (!variant) { + return PIPE_ERROR; } ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_VS, variant); - if (ret != PIPE_OK) - goto fail; + if (ret != PIPE_OK) { + svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant); + return ret; + } *out_variant = variant; - /* insert variants at head of linked list */ - variant->next = vs->base.variants; - vs->base.variants = variant; - return PIPE_OK; - -fail: - if (variant) { - svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant); - } - return ret; } + /* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_FS */ static void -make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key) +make_vs_key(struct svga_context *svga, struct svga_compile_key *key) { + const unsigned shader = PIPE_SHADER_VERTEX; + memset(key, 0, sizeof *key); - key->need_prescale = svga->state.hw_clear.prescale.enabled; - key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex; + + if (svga->state.sw.need_swtnl && svga_have_vgpu10(svga)) { + /* Set both of these flags, to match compile_passthrough_vs() */ + key->vs.passthrough = 1; + key->vs.undo_viewport = 1; + return; + } + + /* SVGA_NEW_PRESCALE */ + key->vs.need_prescale = svga->state.hw_clear.prescale.enabled && + (svga->curr.gs == NULL); + + /* SVGA_NEW_RAST */ + key->vs.allow_psiz = svga->curr.rast->templ.point_size_per_vertex; /* SVGA_NEW_FS */ - key->fs_generic_inputs = svga->curr.fs->generic_inputs; + key->vs.fs_generic_inputs = svga->curr.fs->generic_inputs; + + svga_remap_generics(key->vs.fs_generic_inputs, key->generic_remap_table); /* SVGA_NEW_VELEMENT */ - key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range; - key->adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1; + key->vs.adjust_attrib_range = svga->curr.velems->adjust_attrib_range; + key->vs.adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1; + key->vs.attrib_is_pure_int = svga->curr.velems->attrib_is_pure_int; + key->vs.adjust_attrib_itof = svga->curr.velems->adjust_attrib_itof; + key->vs.adjust_attrib_utof = svga->curr.velems->adjust_attrib_utof; + key->vs.attrib_is_bgra = svga->curr.velems->attrib_is_bgra; + key->vs.attrib_puint_to_snorm = svga->curr.velems->attrib_puint_to_snorm; + key->vs.attrib_puint_to_uscaled = svga->curr.velems->attrib_puint_to_uscaled; + key->vs.attrib_puint_to_sscaled = svga->curr.velems->attrib_puint_to_sscaled; + + /* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */ + svga_init_shader_key_common(svga, shader, key); + + /* SVGA_NEW_RAST */ + key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable; } @@ -201,63 +211,196 @@ enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga) { enum pipe_error ret; - struct svga_winsys_gb_shader *gbshader = - svga->state.hw_draw.vs ? svga->state.hw_draw.vs->gb_shader : NULL; + struct svga_winsys_gb_shader *gbshader = NULL; + SVGA3dShaderId shaderId = SVGA3D_INVALID_ID; - assert(svga->rebind.vs); + assert(svga->rebind.flags.vs); assert(svga_have_gb_objects(svga)); - ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader); + if (svga->state.hw_draw.vs) { + gbshader = svga->state.hw_draw.vs->gb_shader; + shaderId = svga->state.hw_draw.vs->id; + } + + if (!svga_need_to_rebind_resources(svga)) { + ret = svga->swc->resource_rebind(svga->swc, NULL, gbshader, + SVGA_RELOC_READ); + goto out; + } + + if (svga_have_vgpu10(svga)) + ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS, + gbshader, shaderId); + else + ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader); + + out: if (ret != PIPE_OK) return ret; - svga->rebind.vs = FALSE; + svga->rebind.flags.vs = FALSE; return PIPE_OK; } +/** + * The current vertex shader is already executed by the 'draw' + * module, so we just need to generate a simple vertex shader + * to pass through all those VS outputs that will + * be consumed by the fragment shader. + * Used when we employ the 'draw' module. + */ static enum pipe_error -emit_hw_vs(struct svga_context *svga, unsigned dirty) +compile_passthrough_vs(struct svga_context *svga, + struct svga_vertex_shader *vs, + struct svga_fragment_shader *fs, + struct svga_shader_variant **out_variant) { struct svga_shader_variant *variant = NULL; + unsigned num_inputs; + unsigned i; + unsigned num_elements; + struct svga_vertex_shader new_vs; + struct ureg_src src[PIPE_MAX_SHADER_INPUTS]; + struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_program *ureg; + unsigned num_tokens; + struct svga_compile_key key; + enum pipe_error ret; + + assert(svga_have_vgpu10(svga)); + assert(fs); + + num_inputs = fs->base.info.num_inputs; + + ureg = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!ureg) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* draw will always add position */ + dst[0] = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); + src[0] = ureg_DECL_vs_input(ureg, 0); + num_elements = 1; + + /** + * swtnl backend redefines the input layout based on the + * fragment shader's inputs. So we only need to passthrough + * those inputs that will be consumed by the fragment shader. + * Note: DX10 requires the number of vertex elements + * specified in the input layout to be no less than the + * number of inputs to the vertex shader. + */ + for (i = 0; i < num_inputs; i++) { + switch (fs->base.info.input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_FOG: + dst[num_elements] = ureg_DECL_output(ureg, + fs->base.info.input_semantic_name[i], + fs->base.info.input_semantic_index[i]); + src[num_elements] = ureg_DECL_vs_input(ureg, num_elements); + num_elements++; + break; + default: + break; + } + } + + for (i = 0; i < num_elements; i++) { + ureg_MOV(ureg, dst[i], src[i]); + } + + ureg_END(ureg); + + memset(&new_vs, 0, sizeof(new_vs)); + new_vs.base.tokens = ureg_get_tokens(ureg, &num_tokens); + tgsi_scan_shader(new_vs.base.tokens, &new_vs.base.info); + + memset(&key, 0, sizeof(key)); + key.vs.undo_viewport = 1; + + ret = compile_vs(svga, &new_vs, &key, &variant); + if (ret != PIPE_OK) + return ret; + + ureg_free_tokens(new_vs.base.tokens); + ureg_destroy(ureg); + + /* Overwrite the variant key to indicate it's a pass-through VS */ + memset(&variant->key, 0, sizeof(variant->key)); + variant->key.vs.passthrough = 1; + variant->key.vs.undo_viewport = 1; + + *out_variant = variant; + + return PIPE_OK; +} + + +static enum pipe_error +emit_hw_vs(struct svga_context *svga, unsigned dirty) +{ + struct svga_shader_variant *variant; + struct svga_vertex_shader *vs = svga->curr.vs; + struct svga_fragment_shader *fs = svga->curr.fs; enum pipe_error ret = PIPE_OK; + struct svga_compile_key key; + + /* If there is an active geometry shader, and it has stream output + * defined, then we will skip the stream output from the vertex shader + */ + if (!svga_have_gs_streamout(svga)) { + /* No GS stream out */ + if (svga_have_vs_streamout(svga)) { + /* Set VS stream out */ + svga_set_stream_output(svga, vs->base.stream_output); + } + else { + /* turn off stream out */ + svga_set_stream_output(svga, NULL); + } + } /* SVGA_NEW_NEED_SWTNL */ - if (!svga->state.sw.need_swtnl) { - struct svga_vertex_shader *vs = svga->curr.vs; - struct svga_vs_compile_key key; + if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) { + /* No vertex shader is needed */ + variant = NULL; + } + else { + make_vs_key(svga, &key); - make_vs_key( svga, &key ); + /* See if we already have a VS variant that matches the key */ + variant = svga_search_shader_key(&vs->base, &key); - variant = search_vs_key( vs, &key ); if (!variant) { - ret = compile_vs( svga, vs, &key, &variant ); + /* Create VS variant now */ + if (key.vs.passthrough) { + ret = compile_passthrough_vs(svga, vs, fs, &variant); + } + else { + ret = compile_vs(svga, vs, &key, &variant); + } if (ret != PIPE_OK) return ret; - } - assert(variant); + /* insert the new variant at head of linked list */ + assert(variant); + variant->next = vs->base.variants; + vs->base.variants = variant; + } } if (variant != svga->state.hw_draw.vs) { - if (svga_have_gb_objects(svga)) { - struct svga_winsys_gb_shader *gbshader = - variant ? variant->gb_shader : NULL; - ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader); - if (ret != PIPE_OK) - return ret; - - svga->rebind.vs = FALSE; - } - else { - unsigned id = variant ? variant->id : SVGA_ID_INVALID; - ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS, id); + /* Bind the new variant */ + if (variant) { + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, variant); if (ret != PIPE_OK) return ret; + svga->rebind.flags.vs = FALSE; } svga->dirty |= SVGA_NEW_VS_VARIANT; - svga->state.hw_draw.vs = variant; + svga->state.hw_draw.vs = variant; } return PIPE_OK; @@ -268,6 +411,9 @@ struct svga_tracked_state svga_hw_vs = "vertex shader (hwtnl)", (SVGA_NEW_VS | SVGA_NEW_FS | + SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_SAMPLER | + SVGA_NEW_RAST | SVGA_NEW_PRESCALE | SVGA_NEW_VELEMENT | SVGA_NEW_NEED_SWTNL), diff --git a/lib/mesa/src/gallium/drivers/svga/svga_streamout.h b/lib/mesa/src/gallium/drivers/svga/svga_streamout.h new file mode 100644 index 000000000..da0c4457d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_streamout.h @@ -0,0 +1,50 @@ +/********************************************************** + * Copyright 2014 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_STREAMOUT_H +#define SVGA_STREAMOUT_H + +struct svga_shader; + +struct svga_stream_output { + struct pipe_stream_output_info info; + unsigned pos_out_index; // position output index + unsigned id; +}; + +struct svga_stream_output * +svga_create_stream_output(struct svga_context *svga, + struct svga_shader *shader, + const struct pipe_stream_output_info *info); + +enum pipe_error +svga_set_stream_output(struct svga_context *svga, + struct svga_stream_output *streamout); + +void +svga_delete_stream_output(struct svga_context *svga, + struct svga_stream_output *streamout); + +#endif /* SVGA_STREAMOUT_H */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_surface.c b/lib/mesa/src/gallium/drivers/svga/svga_surface.c index 85d015460..ad06a1d53 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_surface.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_surface.c @@ -29,6 +29,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "os/os_thread.h" +#include "util/u_bitmask.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -36,19 +37,21 @@ #include "svga_format.h" #include "svga_screen.h" #include "svga_context.h" +#include "svga_sampler_view.h" #include "svga_resource_texture.h" #include "svga_surface.h" #include "svga_debug.h" +static void svga_mark_surface_dirty(struct pipe_surface *surf); void svga_texture_copy_handle(struct svga_context *svga, struct svga_winsys_surface *src_handle, unsigned src_x, unsigned src_y, unsigned src_z, - unsigned src_level, unsigned src_face, + unsigned src_level, unsigned src_layer, struct svga_winsys_surface *dst_handle, unsigned dst_x, unsigned dst_y, unsigned dst_z, - unsigned dst_level, unsigned dst_face, + unsigned dst_level, unsigned dst_layer, unsigned width, unsigned height, unsigned depth) { struct svga_surface dst, src; @@ -59,12 +62,12 @@ svga_texture_copy_handle(struct svga_context *svga, src.handle = src_handle; src.real_level = src_level; - src.real_face = src_face; + src.real_layer = src_layer; src.real_zslice = 0; dst.handle = dst_handle; dst.real_level = dst_level; - dst.real_face = dst_face; + dst.real_layer = dst_layer; dst.real_zslice = 0; box.x = dst_x; @@ -103,11 +106,13 @@ svga_texture_copy_handle(struct svga_context *svga, struct svga_winsys_surface * svga_texture_view_surface(struct svga_context *svga, struct svga_texture *tex, + unsigned bind_flags, SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, unsigned start_mip, unsigned num_mip, - int face_pick, + int layer_pick, + unsigned num_layers, int zslice_pick, struct svga_host_surface_cache_key *key) /* OUT */ { @@ -117,8 +122,8 @@ svga_texture_view_surface(struct svga_context *svga, unsigned z_offset = 0; SVGA_DBG(DEBUG_PERF, - "svga: Create surface view: face %d zslice %d mips %d..%d\n", - face_pick, zslice_pick, start_mip, start_mip+num_mip-1); + "svga: Create surface view: layer %d zslice %d mips %d..%d\n", + layer_pick, zslice_pick, start_mip, start_mip+num_mip-1); key->flags = flags; key->format = format; @@ -127,12 +132,20 @@ svga_texture_view_surface(struct svga_context *svga, key->size.height = u_minify(tex->b.b.height0, start_mip); key->size.depth = zslice_pick < 0 ? u_minify(tex->b.b.depth0, start_mip) : 1; key->cachable = 1; + key->arraySize = 1; + key->numFaces = 1; + key->sampleCount = tex->b.b.nr_samples; + + if (key->sampleCount > 1) { + key->flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS; + } - if (tex->b.b.target == PIPE_TEXTURE_CUBE && face_pick < 0) { + if (tex->b.b.target == PIPE_TEXTURE_CUBE && layer_pick < 0) { key->flags |= SVGA3D_SURFACE_CUBEMAP; key->numFaces = 6; - } else { - key->numFaces = 1; + } else if (tex->b.b.target == PIPE_TEXTURE_1D_ARRAY || + tex->b.b.target == PIPE_TEXTURE_2D_ARRAY) { + key->arraySize = num_layers; } if (key->format == SVGA3D_FORMAT_INVALID) { @@ -141,7 +154,7 @@ svga_texture_view_surface(struct svga_context *svga, } SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n"); - handle = svga_screen_surface_create(ss, key); + handle = svga_screen_surface_create(ss, bind_flags, PIPE_USAGE_DEFAULT, key); if (!handle) { key->cachable = 0; return NULL; @@ -149,15 +162,15 @@ svga_texture_view_surface(struct svga_context *svga, SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle); - if (face_pick < 0) - face_pick = 0; + if (layer_pick < 0) + layer_pick = 0; if (zslice_pick >= 0) z_offset = zslice_pick; for (i = 0; i < key->numMipLevels; i++) { - for (j = 0; j < key->numFaces; j++) { - if (svga_is_texture_level_defined(tex, j + face_pick, i + start_mip)) { + for (j = 0; j < key->numFaces * key->arraySize; j++) { + if (svga_is_texture_level_defined(tex, j + layer_pick, i + start_mip)) { unsigned depth = (zslice_pick < 0 ? u_minify(tex->b.b.depth0, i + start_mip) : 1); @@ -166,7 +179,7 @@ svga_texture_view_surface(struct svga_context *svga, tex->handle, 0, 0, z_offset, i + start_mip, - j + face_pick, + j + layer_pick, handle, 0, 0, 0, i, j, u_minify(tex->b.b.width0, i + start_mip), u_minify(tex->b.b.height0, i + start_mip), @@ -179,33 +192,43 @@ svga_texture_view_surface(struct svga_context *svga, } +/** + * A helper function to create a surface view. + * The view boolean flag specifies whether svga_texture_view_surface() + * will be called to create a cloned surface and resource for the view. + */ static struct pipe_surface * -svga_create_surface(struct pipe_context *pipe, - struct pipe_resource *pt, - const struct pipe_surface *surf_tmpl) +svga_create_surface_view(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl, + boolean view) { struct svga_context *svga = svga_context(pipe); struct svga_texture *tex = svga_texture(pt); struct pipe_screen *screen = pipe->screen; struct svga_screen *ss = svga_screen(screen); struct svga_surface *s; - unsigned face, zslice; - boolean view = FALSE; - SVGA3dSurfaceFlags flags; + unsigned layer, zslice, bind; + unsigned nlayers = 1; + SVGA3dSurfaceFlags flags = 0; SVGA3dSurfaceFormat format; - assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); - s = CALLOC_STRUCT(svga_surface); if (!s) return NULL; if (pt->target == PIPE_TEXTURE_CUBE) { - face = surf_tmpl->u.tex.first_layer; + layer = surf_tmpl->u.tex.first_layer; zslice = 0; } + else if (pt->target == PIPE_TEXTURE_1D_ARRAY || + pt->target == PIPE_TEXTURE_2D_ARRAY) { + layer = surf_tmpl->u.tex.first_layer; + zslice = 0; + nlayers = surf_tmpl->u.tex.last_layer - surf_tmpl->u.tex.first_layer + 1; + } else { - face = 0; + layer = 0; zslice = surf_tmpl->u.tex.first_layer; } @@ -218,25 +241,100 @@ svga_create_surface(struct pipe_context *pipe, s->base.u.tex.level = surf_tmpl->u.tex.level; s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; + s->view_id = SVGA3D_INVALID_ID; + + s->backed = NULL; if (util_format_is_depth_or_stencil(surf_tmpl->format)) { - flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL; + flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL | + SVGA3D_SURFACE_BIND_DEPTH_STENCIL; + bind = PIPE_BIND_DEPTH_STENCIL; } else { - flags = SVGA3D_SURFACE_HINT_RENDERTARGET; + flags = SVGA3D_SURFACE_HINT_RENDERTARGET | + SVGA3D_SURFACE_BIND_RENDER_TARGET; + bind = PIPE_BIND_RENDER_TARGET; } - format = svga_translate_format(ss, surf_tmpl->format, 0); + if (tex->imported) + format = tex->key.format; + else + format = svga_translate_format(ss, surf_tmpl->format, bind); + assert(format != SVGA3D_FORMAT_INVALID); - if (svga_screen(screen)->debug.force_surface_view) - view = TRUE; + if (view) { + SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u layer %u z %u, %p\n", + pt, surf_tmpl->u.tex.level, layer, zslice, s); + + if (svga_have_vgpu10(svga)) { + switch (pt->target) { + case PIPE_TEXTURE_1D: + flags |= SVGA3D_SURFACE_1D; + break; + case PIPE_TEXTURE_1D_ARRAY: + flags |= SVGA3D_SURFACE_1D | SVGA3D_SURFACE_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + flags |= SVGA3D_SURFACE_ARRAY; + break; + case PIPE_TEXTURE_3D: + flags |= SVGA3D_SURFACE_VOLUME; + break; + case PIPE_TEXTURE_CUBE: + if (nlayers == 6) + flags |= SVGA3D_SURFACE_CUBEMAP; + break; + default: + break; + } + } - /* Currently only used for compressed textures */ - if (format != svga_translate_format(ss, surf_tmpl->format, 0)) { - view = TRUE; + /* When we clone the surface view resource, use the format used in + * the creation of the original resource. + */ + s->handle = svga_texture_view_surface(svga, tex, bind, flags, tex->key.format, + surf_tmpl->u.tex.level, 1, + layer, nlayers, zslice, &s->key); + if (!s->handle) { + FREE(s); + return NULL; + } + + s->key.format = format; + s->real_layer = 0; + s->real_level = 0; + s->real_zslice = 0; + } else { + SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, layer %u, z %u, %p\n", + pt, surf_tmpl->u.tex.level, layer, zslice, s); + + memset(&s->key, 0, sizeof s->key); + s->key.format = format; + s->handle = tex->handle; + s->real_layer = layer; + s->real_zslice = zslice; + s->real_level = surf_tmpl->u.tex.level; } + svga->hud.num_surface_views++; + + return &s->base; +} + + +static struct pipe_surface * +svga_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) +{ + struct svga_context *svga = svga_context(pipe); + struct pipe_screen *screen = pipe->screen; + boolean view = FALSE; + + if (svga_screen(screen)->debug.force_surface_view) + view = TRUE; + if (surf_tmpl->u.tex.level != 0 && svga_screen(screen)->debug.force_level_surface_view) view = TRUE; @@ -244,49 +342,177 @@ svga_create_surface(struct pipe_context *pipe, if (pt->target == PIPE_TEXTURE_3D) view = TRUE; - if (svga_screen(screen)->debug.no_surface_view) + if (svga_have_vgpu10(svga) || svga_screen(screen)->debug.no_surface_view) view = FALSE; - if (view) { - SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", - pt, surf_tmpl->u.tex.level, face, zslice, s); + return svga_create_surface_view(pipe, pt, surf_tmpl, view); +} - s->handle = svga_texture_view_surface(svga, tex, flags, format, - surf_tmpl->u.tex.level, - 1, face, zslice, &s->key); - s->real_face = 0; - s->real_level = 0; - s->real_zslice = 0; - } else { - SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n", - pt, surf_tmpl->u.tex.level, face, zslice, s); - memset(&s->key, 0, sizeof s->key); - s->handle = tex->handle; - s->real_face = face; - s->real_zslice = zslice; - s->real_level = surf_tmpl->u.tex.level; +/** + * Clone the surface view and its associated resource. + */ +static struct svga_surface * +create_backed_surface_view(struct svga_context *svga, struct svga_surface *s) +{ + struct svga_surface *bs = s->backed; + + if (!bs) { + struct svga_texture *tex = svga_texture(s->base.texture); + struct pipe_surface *backed_view; + + backed_view = svga_create_surface_view(&svga->pipe, + &tex->b.b, + &s->base, + TRUE); + if (!backed_view) + return NULL; + + bs = svga_surface(backed_view); + s->backed = bs; } + svga_mark_surface_dirty(&bs->base); + + return bs; +} + +/** + * Create a DX RenderTarget/DepthStencil View for the given surface, + * if needed. + */ +struct pipe_surface * +svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s) +{ + enum pipe_error ret = PIPE_OK; + unsigned shader; + + assert(svga_have_vgpu10(svga)); + + /** + * DX spec explicitly specifies that no resource can be bound to a render + * target view and a shader resource view simultanously. + * So first check if the resource bound to this surface view collides with + * a sampler view. If so, then we will clone this surface view and its + * associated resource. We will then use the cloned surface view for + * render target. + */ + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) { + SVGA_DBG(DEBUG_VIEWS, + "same resource used in shaderResource and renderTarget 0x%x\n", + s->handle); + s = create_backed_surface_view(svga, s); + if (!s) + return NULL; + + break; + } + } + + if (s->view_id == SVGA3D_INVALID_ID) { + SVGA3dResourceType resType; + SVGA3dRenderTargetViewDesc desc; + + desc.tex.mipSlice = s->real_level; + desc.tex.firstArraySlice = s->real_layer + s->real_zslice; + desc.tex.arraySize = + s->base.u.tex.last_layer - s->base.u.tex.first_layer + 1; + + s->view_id = util_bitmask_add(svga->surface_view_id_bm); + + switch (s->base.texture->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + resType = SVGA3D_RESOURCE_TEXTURE1D; + break; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE: + /* drawing to cube map is treated as drawing to 2D array */ + resType = SVGA3D_RESOURCE_TEXTURE2D; + break; + case PIPE_TEXTURE_3D: + resType = SVGA3D_RESOURCE_TEXTURE3D; + break; + default: + assert(!"Unexpected texture target"); + resType = SVGA3D_RESOURCE_TEXTURE2D; + } + + if (util_format_is_depth_or_stencil(s->base.format)) { + ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc, + s->view_id, + s->handle, + s->key.format, + resType, + &desc); + } + else { + ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc, + s->view_id, + s->handle, + s->key.format, + resType, + &desc); + } + + if (ret != PIPE_OK) { + util_bitmask_clear(svga->surface_view_id_bm, s->view_id); + s->view_id = SVGA3D_INVALID_ID; + return NULL; + } + } return &s->base; } + static void svga_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf) { + struct svga_context *svga = svga_context(pipe); struct svga_surface *s = svga_surface(surf); struct svga_texture *t = svga_texture(surf->texture); struct svga_screen *ss = svga_screen(surf->texture->screen); + enum pipe_error ret = PIPE_OK; + + /* Destroy the backed view surface if it exists */ + if (s->backed) { + svga_surface_destroy(pipe, &s->backed->base); + s->backed = NULL; + } if (s->handle != t->handle) { SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle); svga_screen_surface_destroy(ss, &s->key, &s->handle); } + if (s->view_id != SVGA3D_INVALID_ID) { + unsigned try; + + assert(svga_have_vgpu10(svga)); + for (try = 0; try < 2; try++) { + if (util_format_is_depth_or_stencil(s->base.format)) { + ret = SVGA3D_vgpu10_DestroyDepthStencilView(svga->swc, s->view_id); + } + else { + ret = SVGA3D_vgpu10_DestroyRenderTargetView(svga->swc, s->view_id); + } + if (ret == PIPE_OK) + break; + svga_context_flush(svga, NULL); + } + assert(ret == PIPE_OK); + util_bitmask_clear(svga->surface_view_id_bm, s->view_id); + } + pipe_resource_reference(&surf->texture, NULL); FREE(surf); + + svga->hud.num_surface_views--; } @@ -294,29 +520,25 @@ static void svga_mark_surface_dirty(struct pipe_surface *surf) { struct svga_surface *s = svga_surface(surf); + struct svga_texture *tex = svga_texture(surf->texture); if (!s->dirty) { - struct svga_texture *tex = svga_texture(surf->texture); - s->dirty = TRUE; if (s->handle == tex->handle) { /* hmm so 3d textures always have all their slices marked ? */ - if (surf->texture->target == PIPE_TEXTURE_CUBE) - svga_define_texture_level(tex, surf->u.tex.first_layer, - surf->u.tex.level); - else - svga_define_texture_level(tex, 0, surf->u.tex.level); + svga_define_texture_level(tex, surf->u.tex.first_layer, + surf->u.tex.level); } else { /* this will happen later in svga_propagate_surface */ } - - /* Increment the view_age and texture age for this surface's mipmap - * level so that any sampler views into the texture are re-validated too. - */ - svga_age_texture_view(tex, surf->u.tex.level); } + + /* Increment the view_age and texture age for this surface's mipmap + * level so that any sampler views into the texture are re-validated too. + */ + svga_age_texture_view(tex, surf->u.tex.level); } @@ -345,18 +567,26 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf) struct svga_surface *s = svga_surface(surf); struct svga_texture *tex = svga_texture(surf->texture); struct svga_screen *ss = svga_screen(surf->texture->screen); - unsigned zslice, face; + unsigned zslice, layer; + unsigned nlayers = 1; + unsigned i; if (!s->dirty) return; if (surf->texture->target == PIPE_TEXTURE_CUBE) { zslice = 0; - face = surf->u.tex.first_layer; + layer = surf->u.tex.first_layer; + } + else if (surf->texture->target == PIPE_TEXTURE_1D_ARRAY || + surf->texture->target == PIPE_TEXTURE_2D_ARRAY) { + zslice = 0; + layer = surf->u.tex.first_layer; + nlayers = surf->u.tex.last_layer - surf->u.tex.first_layer + 1; } else { zslice = surf->u.tex.first_layer; - face = 0; + layer = 0; } s->dirty = FALSE; @@ -367,12 +597,14 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf) SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->u.tex.level, surf); - svga_texture_copy_handle(svga, - s->handle, 0, 0, 0, s->real_level, s->real_face, - tex->handle, 0, 0, zslice, surf->u.tex.level, face, - u_minify(tex->b.b.width0, surf->u.tex.level), - u_minify(tex->b.b.height0, surf->u.tex.level), 1); - svga_define_texture_level(tex, face, surf->u.tex.level); + for (i = 0; i < nlayers; i++) { + svga_texture_copy_handle(svga, + s->handle, 0, 0, 0, s->real_level, s->real_layer + i, + tex->handle, 0, 0, zslice, surf->u.tex.level, layer + i, + u_minify(tex->b.b.width0, surf->u.tex.level), + u_minify(tex->b.b.height0, surf->u.tex.level), 1); + svga_define_texture_level(tex, layer + i, surf->u.tex.level); + } } } @@ -390,10 +622,76 @@ svga_surface_needs_propagation(const struct pipe_surface *surf) } +static void +svga_get_sample_position(struct pipe_context *context, + unsigned sample_count, unsigned sample_index, + float *pos_out) +{ + /* We can't actually query the device to learn the sample positions. + * These were grabbed from nvidia's driver. + */ + static const float pos1[1][2] = { + { 0.5, 0.5 } + }; + static const float pos4[4][2] = { + { 0.375000, 0.125000 }, + { 0.875000, 0.375000 }, + { 0.125000, 0.625000 }, + { 0.625000, 0.875000 } + }; + static const float pos8[8][2] = { + { 0.562500, 0.312500 }, + { 0.437500, 0.687500 }, + { 0.812500, 0.562500 }, + { 0.312500, 0.187500 }, + { 0.187500, 0.812500 }, + { 0.062500, 0.437500 }, + { 0.687500, 0.937500 }, + { 0.937500, 0.062500 } + }; + static const float pos16[16][2] = { + { 0.187500, 0.062500 }, + { 0.437500, 0.187500 }, + { 0.062500, 0.312500 }, + { 0.312500, 0.437500 }, + { 0.687500, 0.062500 }, + { 0.937500, 0.187500 }, + { 0.562500, 0.312500 }, + { 0.812500, 0.437500 }, + { 0.187500, 0.562500 }, + { 0.437500, 0.687500 }, + { 0.062500, 0.812500 }, + { 0.312500, 0.937500 }, + { 0.687500, 0.562500 }, + { 0.937500, 0.687500 }, + { 0.562500, 0.812500 }, + { 0.812500, 0.937500 } + }; + const float (*positions)[2]; + + switch (sample_count) { + case 4: + positions = pos4; + break; + case 8: + positions = pos8; + break; + case 16: + positions = pos16; + break; + default: + positions = pos1; + } + + pos_out[0] = positions[sample_index][0]; + pos_out[1] = positions[sample_index][1]; +} + void svga_init_surface_functions(struct svga_context *svga) { svga->pipe.create_surface = svga_create_surface; svga->pipe.surface_destroy = svga_surface_destroy; + svga->pipe.get_sample_position = svga_get_sample_position; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_surface.h b/lib/mesa/src/gallium/drivers/svga/svga_surface.h index 2fa72a1c8..0e5794b0b 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_surface.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_surface.h @@ -47,11 +47,15 @@ struct svga_surface struct svga_host_surface_cache_key key; struct svga_winsys_surface *handle; - unsigned real_face; + unsigned real_layer; unsigned real_level; unsigned real_zslice; boolean dirty; + + /* VGPU10 */ + SVGA3dRenderTargetViewId view_id; + struct svga_surface *backed; }; @@ -64,11 +68,13 @@ svga_surface_needs_propagation(const struct pipe_surface *surf); struct svga_winsys_surface * svga_texture_view_surface(struct svga_context *svga, struct svga_texture *tex, + unsigned bind_flags, SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, unsigned start_mip, unsigned num_mip, - int face_pick, + int layer_pick, + unsigned num_layers, int zslice_pick, struct svga_host_surface_cache_key *key); /* OUT */ @@ -99,4 +105,8 @@ svga_surface_const(const struct pipe_surface *surface) return (const struct svga_surface *)surface; } +struct pipe_surface * +svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s); + + #endif diff --git a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_backend.c b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_backend.c index ded8bcbd5..4bdb21a98 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -40,6 +40,7 @@ #include "svga_reg.h" #include "svga3d_reg.h" #include "svga_draw.h" +#include "svga_shader.h" #include "svga_swtnl_private.h" @@ -129,9 +130,12 @@ svga_vbuf_render_map_vertices( struct vbuf_render *render ) PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_UNSYNCHRONIZED, &svga_render->vbuf_transfer); - if (ptr) + if (ptr) { + svga_render->vbuf_ptr = ptr; return ptr + svga_render->vbuf_offset; + } else { + svga_render->vbuf_ptr = NULL; svga_render->vbuf_transfer = NULL; return NULL; } @@ -154,6 +158,18 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render, offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index; length = svga_render->vertex_size * (max_index + 1 - min_index); + + if (0) { + /* dump vertex data */ + const float *f = (const float *) ((char *) svga_render->vbuf_ptr + + svga_render->vbuf_offset); + unsigned i; + debug_printf("swtnl vertex data:\n"); + for (i = 0; i < length / 4; i += 4) { + debug_printf("%u: %f %f %f %f\n", i, f[i], f[i+1], f[i+2], f[i+3]); + } + } + pipe_buffer_flush_mapped_range(&svga->pipe, svga_render->vbuf_transfer, offset, length); @@ -178,6 +194,7 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render ) SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; enum pipe_error ret; unsigned i; + static const unsigned zero[PIPE_MAX_ATTRIBS] = {0}; /* if the vdecl or vbuf hasn't changed do nothing */ if (!svga->swtnl.new_vdecl) @@ -192,18 +209,27 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render ) ret = svga_hwtnl_flush(svga->hwtnl); /* if we hit this path we might become synced with hw */ svga->swtnl.new_vbuf = TRUE; - assert(ret == 0); + assert(ret == PIPE_OK); } - svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count); - for (i = 0; i < svga_render->vdecl_count; i++) { vdecl[i].array.offset += svga_render->vdecl_offset; + } - svga_hwtnl_vdecl( svga->hwtnl, - i, - &vdecl[i], - svga_render->vbuf ); + svga_hwtnl_vertex_decls(svga->hwtnl, + svga_render->vdecl_count, + vdecl, + zero, + svga_render->layout_id); + + /* Specify the vertex buffer (there's only ever one) */ + { + struct pipe_vertex_buffer vb; + vb.buffer = svga_render->vbuf; + vb.buffer_offset = svga_render->vdecl_offset; + vb.stride = vdecl[0].array.stride; + vb.user_buffer = NULL; + svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb); } /* We have already taken care of flatshading, so let the hwtnl @@ -211,15 +237,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render ) */ if (svga->state.sw.need_pipeline) { svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE); - svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL); + svga_hwtnl_set_fillmode(svga->hwtnl, PIPE_POLYGON_MODE_FILL); } else { svga_hwtnl_set_flatshade( svga->hwtnl, - svga->curr.rast->templ.flatshade, + svga->curr.rast->templ.flatshade || + svga->state.hw_draw.fs->uses_flat_interp, svga->curr.rast->templ.flatshade_first ); - svga_hwtnl_set_unfilled( svga->hwtnl, - svga->curr.rast->hw_unfilled ); + svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode); } svga->swtnl.new_vdecl = FALSE; @@ -227,13 +253,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render ) static void svga_vbuf_render_draw_arrays( struct vbuf_render *render, - unsigned start, - uint nr ) + unsigned start, uint nr ) { struct svga_vbuf_render *svga_render = svga_vbuf_render(render); struct svga_context *svga = svga_render->svga; unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; enum pipe_error ret = PIPE_OK; + /* instancing will already have been resolved at this point by 'draw' */ + const unsigned start_instance = 0; + const unsigned instance_count = 1; /* off to hardware */ svga_vbuf_submit_state(svga_render); @@ -244,10 +272,13 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render, */ svga_update_state_retry( svga, SVGA_STATE_HW_DRAW ); - ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr); + ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr, + start_instance, instance_count); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); - ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr); + ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, + start + bias, nr, + start_instance, instance_count); svga->swtnl.new_vbuf = TRUE; assert(ret == PIPE_OK); } @@ -265,6 +296,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render, int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; boolean ret; size_t size = 2 * nr_indices; + /* instancing will already have been resolved at this point by 'draw' */ + const unsigned start_instance = 0; + const unsigned instance_count = 1; assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0); @@ -299,7 +333,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render, svga_render->min_index, svga_render->max_index, svga_render->prim, - svga_render->ibuf_offset / 2, nr_indices); + svga_render->ibuf_offset / 2, nr_indices, + start_instance, instance_count); if(ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = svga_hwtnl_draw_range_elements(svga->hwtnl, @@ -309,7 +344,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render, svga_render->min_index, svga_render->max_index, svga_render->prim, - svga_render->ibuf_offset / 2, nr_indices); + svga_render->ibuf_offset / 2, + nr_indices, + start_instance, instance_count); svga->swtnl.new_vbuf = TRUE; assert(ret == PIPE_OK); } @@ -349,6 +386,7 @@ svga_vbuf_render_create( struct svga_context *svga ) svga_render->vbuf_size = 0; svga_render->ibuf_alloc_size = 4*1024; svga_render->vbuf_alloc_size = 64*1024; + svga_render->layout_id = SVGA3D_INVALID_ID; svga_render->base.max_vertex_buffer_bytes = 64*1024/10; svga_render->base.max_indices = 65536; svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_draw.c index 832249523..6a8e857ce 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -42,9 +42,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga, { struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = { 0 }; struct pipe_transfer *ib_transfer = NULL; - struct pipe_transfer *cb_transfer = NULL; + struct pipe_transfer *cb_transfer[SVGA_MAX_CONST_BUFS] = { 0 }; struct draw_context *draw = svga->swtnl.draw; - unsigned i; + unsigned i, old_num_vertex_buffers; const void *map; enum pipe_error ret; @@ -76,6 +76,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga, draw_set_mapped_vertex_buffer(draw, i, map, ~0); } } + old_num_vertex_buffers = svga->curr.num_vertex_buffers; /* Map index buffer, if present */ map = NULL; @@ -88,16 +89,21 @@ svga_swtnl_draw_vbo(struct svga_context *svga, svga->curr.ib.index_size, ~0); } - if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) { + /* Map constant buffers */ + for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) { + if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer == NULL) { + continue; + } + map = pipe_buffer_map(&svga->pipe, - svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer, + svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer, PIPE_TRANSFER_READ, - &cb_transfer); + &cb_transfer[i]); assert(map); draw_set_mapped_constant_buffer( - draw, PIPE_SHADER_VERTEX, 0, + draw, PIPE_SHADER_VERTEX, i, map, - svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer->width0); + svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer->width0); } draw_vbo(draw, info); @@ -105,8 +111,8 @@ svga_swtnl_draw_vbo(struct svga_context *svga, draw_flush(svga->swtnl.draw); /* Ensure the draw module didn't touch this */ - assert(i == svga->curr.num_vertex_buffers); - + assert(old_num_vertex_buffers == svga->curr.num_vertex_buffers); + /* * unmap vertex/index buffers */ @@ -122,8 +128,10 @@ svga_swtnl_draw_vbo(struct svga_context *svga, draw_set_indexes(draw, NULL, 0, 0); } - if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) { - pipe_buffer_unmap(&svga->pipe, cb_transfer); + for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) { + if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer) { + pipe_buffer_unmap(&svga->pipe, cb_transfer[i]); + } } /* Now safe to remove the need_swtnl flag in any update_state call */ @@ -167,9 +175,6 @@ boolean svga_init_swtnl( struct svga_context *svga ) if (!screen->haveLineSmooth) draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe); - /* always install polygon stipple stage */ - draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe); - /* enable/disable line stipple stage depending on device caps */ draw_enable_line_stipple(svga->swtnl.draw, !screen->haveLineStipple); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_private.h b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_private.h index e2106e1e8..0a226abca 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_private.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_private.h @@ -43,6 +43,8 @@ struct svga_vbuf_render { unsigned vertex_size; + SVGA3dElementLayoutId layout_id; /**< current element layout id */ + unsigned prim; struct pipe_resource *vbuf; @@ -50,6 +52,8 @@ struct svga_vbuf_render { struct pipe_transfer *vbuf_transfer; struct pipe_transfer *ibuf_transfer; + void *vbuf_ptr; + /* current size of buffer */ size_t vbuf_size; size_t ibuf_size; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_state.c b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_state.c index e62698e11..4d21f4f0e 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_swtnl_state.c @@ -25,10 +25,13 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "util/u_bitmask.h" #include "util/u_inlines.h" #include "pipe/p_state.h" +#include "svga_cmd.h" #include "svga_context.h" +#include "svga_shader.h" #include "svga_swtnl.h" #include "svga_state.h" #include "svga_tgsi.h" @@ -51,30 +54,37 @@ static void set_draw_viewport( struct svga_context *svga ) float adjx = 0.0f; float adjy = 0.0f; - switch (svga->curr.reduced_prim) { - case PIPE_PRIM_POINTS: - adjx = SVGA_POINT_ADJ_X; - adjy = SVGA_POINT_ADJ_Y; - break; - case PIPE_PRIM_LINES: - /* XXX: This is to compensate for the fact that wide lines are - * going to be drawn with triangles, but we're not catching all - * cases where that will happen. - */ - if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES) - { - adjx = SVGA_LINE_ADJ_X + 0.175f; - adjy = SVGA_LINE_ADJ_Y - 0.175f; + if (svga_have_vgpu10(svga)) { + if (svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES) { + adjy = 0.25; } - else { - adjx = SVGA_LINE_ADJ_X; - adjy = SVGA_LINE_ADJ_Y; + } + else { + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_POINTS: + adjx = SVGA_POINT_ADJ_X; + adjy = SVGA_POINT_ADJ_Y; + break; + case PIPE_PRIM_LINES: + /* XXX: This is to compensate for the fact that wide lines are + * going to be drawn with triangles, but we're not catching all + * cases where that will happen. + */ + if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES) + { + adjx = SVGA_LINE_ADJ_X + 0.175f; + adjy = SVGA_LINE_ADJ_Y - 0.175f; + } + else { + adjx = SVGA_LINE_ADJ_X; + adjy = SVGA_LINE_ADJ_Y; + } + break; + case PIPE_PRIM_TRIANGLES: + adjx += SVGA_TRIANGLE_ADJ_X; + adjy += SVGA_TRIANGLE_ADJ_Y; + break; } - break; - case PIPE_PRIM_TRIANGLES: - adjx += SVGA_TRIANGLE_ADJ_X; - adjy += SVGA_TRIANGLE_ADJ_Y; - break; } vp.translate[0] += adjx; @@ -150,6 +160,59 @@ struct svga_tracked_state svga_update_swtnl_draw = }; +static SVGA3dSurfaceFormat +translate_vertex_format(SVGA3dDeclType format) +{ + switch (format) { + case SVGA3D_DECLTYPE_FLOAT1: + return SVGA3D_R32_FLOAT; + case SVGA3D_DECLTYPE_FLOAT2: + return SVGA3D_R32G32_FLOAT; + case SVGA3D_DECLTYPE_FLOAT3: + return SVGA3D_R32G32B32_FLOAT; + case SVGA3D_DECLTYPE_FLOAT4: + return SVGA3D_R32G32B32A32_FLOAT; + default: + assert(!"Unexpected format in translate_vertex_format()"); + return SVGA3D_R32G32B32A32_FLOAT; + } +} + + +static SVGA3dElementLayoutId +svga_vdecl_to_input_element(struct svga_context *svga, + const SVGA3dVertexDecl *vdecl, unsigned num_decls) +{ + SVGA3dElementLayoutId id; + SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS]; + enum pipe_error ret; + unsigned i; + + assert(num_decls <= PIPE_MAX_ATTRIBS); + assert(svga_have_vgpu10(svga)); + + for (i = 0; i < num_decls; i++) { + elements[i].inputSlot = 0; /* vertex buffer index */ + elements[i].alignedByteOffset = vdecl[i].array.offset; + elements[i].format = translate_vertex_format(vdecl[i].identity.type); + elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA; + elements[i].instanceDataStepRate = 0; + elements[i].inputRegister = i; + } + + id = util_bitmask_add(svga->input_element_object_id_bm); + + ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements); + assert(ret == PIPE_OK); + } + + return id; +} + + enum pipe_error svga_swtnl_update_vdecl( struct svga_context *svga ) { @@ -157,23 +220,24 @@ svga_swtnl_update_vdecl( struct svga_context *svga ) struct draw_context *draw = svga->swtnl.draw; struct vertex_info *vinfo = &svga_render->vertex_info; SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; - const enum interp_mode colorInterp = - svga->curr.rast->templ.flatshade ? INTERP_CONSTANT : INTERP_LINEAR; struct svga_fragment_shader *fs = svga->curr.fs; int offset = 0; int nr_decls = 0; int src; unsigned i; + int any_change; memset(vinfo, 0, sizeof(*vinfo)); memset(vdecl, 0, sizeof(vdecl)); draw_prepare_shader_outputs(draw); + /* always add position */ src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, src); vinfo->attrib[0].emit = EMIT_4F; vdecl[0].array.offset = offset; + vdecl[0].identity.method = SVGA3D_DECLMETHOD_DEFAULT; vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4; vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT; vdecl[0].identity.usageIndex = 0; @@ -191,14 +255,14 @@ svga_swtnl_update_vdecl( struct svga_context *svga ) switch (sem_name) { case TGSI_SEMANTIC_COLOR: - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, src); vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_COLOR; vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4; offset += 16; nr_decls++; break; case TGSI_SEMANTIC_GENERIC: - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, src); vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD; vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4; vdecl[nr_decls].identity.usageIndex = @@ -207,7 +271,7 @@ svga_swtnl_update_vdecl( struct svga_context *svga ) nr_decls++; break; case TGSI_SEMANTIC_FOG: - draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_1F, src); vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD; vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT1; assert(vdecl[nr_decls].identity.usageIndex == 0); @@ -225,16 +289,67 @@ svga_swtnl_update_vdecl( struct svga_context *svga ) draw_compute_vertex_size(vinfo); svga_render->vdecl_count = nr_decls; - for (i = 0; i < svga_render->vdecl_count; i++) + for (i = 0; i < svga_render->vdecl_count; i++) { vdecl[i].array.stride = offset; + } - if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0) - return PIPE_OK; + any_change = memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)); + + if (svga_have_vgpu10(svga)) { + enum pipe_error ret; + + if (!any_change && svga_render->layout_id != SVGA3D_INVALID_ID) { + return PIPE_OK; + } + + if (svga_render->layout_id != SVGA3D_INVALID_ID) { + /* destroy old */ + ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, + svga_render->layout_id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, + svga_render->layout_id); + assert(ret == PIPE_OK); + } + + /** + * reset current layout id state after the element layout is + * destroyed, so that if a new layout has the same layout id, we + * will know to re-issue the SetInputLayout command. + */ + if (svga->state.hw_draw.layout_id == svga_render->layout_id) + svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID; + + util_bitmask_clear(svga->input_element_object_id_bm, + svga_render->layout_id); + } + + svga_render->layout_id = + svga_vdecl_to_input_element(svga, vdecl, nr_decls); + + /* bind new */ + if (svga->state.hw_draw.layout_id != svga_render->layout_id) { + ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, svga_render->layout_id); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, + svga_render->layout_id); + assert(ret == PIPE_OK); + } + + svga->state.hw_draw.layout_id = svga_render->layout_id; + } + } + else { + if (!any_change) + return PIPE_OK; + } memcpy(svga_render->vdecl, vdecl, sizeof(vdecl)); svga->swtnl.new_vdecl = TRUE; - return PIPE_OK; + return 0; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c index 2e2ff5e46..c62d4d671 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c @@ -37,6 +37,7 @@ #include "svgadump/svga_shader_dump.h" #include "svga_context.h" +#include "svga_shader.h" #include "svga_tgsi.h" #include "svga_tgsi_emit.h" #include "svga_debug.h" @@ -70,7 +71,7 @@ svga_shader_expand(struct svga_shader_emitter *emit) else new_buf = NULL; - if (new_buf == NULL) { + if (!new_buf) { emit->ptr = err_buf; emit->buf = err_buf; emit->size = sizeof(err_buf); @@ -166,97 +167,6 @@ svga_shader_emit_header(struct svga_shader_emitter *emit) /** - * Use the shader info to generate a bitmask indicating which generic - * inputs are used by the shader. A set bit indicates that GENERIC[i] - * is used. - */ -unsigned -svga_get_generic_inputs_mask(const struct tgsi_shader_info *info) -{ - unsigned i, mask = 0x0; - - for (i = 0; i < info->num_inputs; i++) { - if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { - unsigned j = info->input_semantic_index[i]; - assert(j < sizeof(mask) * 8); - mask |= 1 << j; - } - } - - return mask; -} - - -/** - * Given a mask of used generic variables (as returned by the above functions) - * fill in a table which maps those indexes to small integers. - * This table is used by the remap_generic_index() function in - * svga_tgsi_decl_sm30.c - * Example: if generics_mask = binary(1010) it means that GENERIC[1] and - * GENERIC[3] are used. The remap_table will contain: - * table[1] = 0; - * table[3] = 1; - * The remaining table entries will be filled in with the next unused - * generic index (in this example, 2). - */ -void -svga_remap_generics(unsigned generics_mask, - int8_t remap_table[MAX_GENERIC_VARYING]) -{ - /* Note texcoord[0] is reserved so start at 1 */ - unsigned count = 1, i; - - for (i = 0; i < MAX_GENERIC_VARYING; i++) { - remap_table[i] = -1; - } - - /* for each bit set in generic_mask */ - while (generics_mask) { - unsigned index = ffs(generics_mask) - 1; - remap_table[index] = count++; - generics_mask &= ~(1 << index); - } -} - - -/** - * Use the generic remap table to map a TGSI generic varying variable - * index to a small integer. If the remapping table doesn't have a - * valid value for the given index (the table entry is -1) it means - * the fragment shader doesn't use that VS output. Just allocate - * the next free value in that case. Alternately, we could cull - * VS instructions that write to register, or replace the register - * with a dummy temp register. - * XXX TODO: we should do one of the later as it would save precious - * texcoord registers. - */ -int -svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], - int generic_index) -{ - assert(generic_index < MAX_GENERIC_VARYING); - - if (generic_index >= MAX_GENERIC_VARYING) { - /* just don't return a random/garbage value */ - generic_index = MAX_GENERIC_VARYING - 1; - } - - if (remap_table[generic_index] == -1) { - /* This is a VS output that has no matching PS input. Find a - * free index. - */ - int i, max = 0; - for (i = 0; i < MAX_GENERIC_VARYING; i++) { - max = MAX2(max, remap_table[i]); - } - remap_table[generic_index] = max + 1; - } - - return remap_table[generic_index]; -} - - -/** * Parse TGSI shader and translate to SVGA/DX9 serialized * representation. * @@ -264,9 +174,10 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], * can be dynamically grown. Once we've finished and know how large * it is, it will be copied to a hardware buffer for upload. */ -static struct svga_shader_variant * -svga_tgsi_translate(const struct svga_shader *shader, - const struct svga_compile_key *key, unsigned unit) +struct svga_shader_variant * +svga_tgsi_vgpu9_translate(struct svga_context *svga, + const struct svga_shader *shader, + const struct svga_compile_key *key, unsigned unit) { struct svga_shader_variant *variant = NULL; struct svga_shader_emitter emit; @@ -288,10 +199,10 @@ svga_tgsi_translate(const struct svga_shader *shader, emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1; if (unit == PIPE_SHADER_FRAGMENT) - emit.imm_start += key->fkey.num_unnormalized_coords; + emit.imm_start += key->num_unnormalized_coords; if (unit == PIPE_SHADER_VERTEX) { - emit.imm_start += key->vkey.need_prescale ? 2 : 0; + emit.imm_start += key->vs.need_prescale ? 2 : 0; } emit.nr_hw_float_const = @@ -317,8 +228,8 @@ svga_tgsi_translate(const struct svga_shader *shader, goto fail; } - variant = CALLOC_STRUCT(svga_shader_variant); - if (variant == NULL) + variant = svga_new_shader_variant(svga); + if (!variant) goto fail; variant->shader = shader; @@ -327,7 +238,18 @@ svga_tgsi_translate(const struct svga_shader *shader, memcpy(&variant->key, key, sizeof(*key)); variant->id = UTIL_BITMASK_INVALID_INDEX; - if (SVGA_DEBUG & DEBUG_TGSI) { + variant->pstipple_sampler_unit = emit.pstipple_sampler_unit; + + /* If there was exactly one write to a fragment shader output register + * and it came from a constant buffer, we know all fragments will have + * the same color (except for blending). + */ + variant->constant_color_output = + emit.constant_color_output && emit.num_output_writes == 1; + +#if 0 + if (!svga_shader_verify(variant->tokens, variant->nr_tokens) || + SVGA_DEBUG & DEBUG_TGSI) { debug_printf("#####################################\n"); debug_printf("Shader %u below\n", shader->id); tgsi_dump(shader->tokens, 0); @@ -337,6 +259,7 @@ svga_tgsi_translate(const struct svga_shader *shader, } debug_printf("#####################################\n"); } +#endif return variant; @@ -345,39 +268,3 @@ svga_tgsi_translate(const struct svga_shader *shader, FREE(emit.buf); return NULL; } - - -struct svga_shader_variant * -svga_translate_fragment_program(const struct svga_fragment_shader *fs, - const struct svga_fs_compile_key *fkey) -{ - struct svga_compile_key key; - - memset(&key, 0, sizeof(key)); - - memcpy(&key.fkey, fkey, sizeof *fkey); - - memcpy(key.generic_remap_table, fs->generic_remap_table, - sizeof(fs->generic_remap_table)); - - return svga_tgsi_translate(&fs->base, &key, PIPE_SHADER_FRAGMENT); -} - - -struct svga_shader_variant * -svga_translate_vertex_program(const struct svga_vertex_shader *vs, - const struct svga_vs_compile_key *vkey) -{ - struct svga_compile_key key; - - memset(&key, 0, sizeof(key)); - - memcpy(&key.vkey, vkey, sizeof *vkey); - - /* Note: we could alternately store the remap table in the vkey but - * that would make it larger. We just regenerate it here instead. - */ - svga_remap_generics(vkey->fs_generic_inputs, key.generic_remap_table); - - return svga_tgsi_translate(&vs->base, &key, PIPE_SHADER_VERTEX); -} diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.h b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.h index 5c47a4ad3..258113570 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.h @@ -26,94 +26,16 @@ #ifndef SVGA_TGSI_H #define SVGA_TGSI_H -#include "pipe/p_state.h" +#include "pipe/p_compiler.h" +#include "svga3d_reg.h" -#include "svga_hw_reg.h" +#define MAX_VGPU10_ADDR_REGS 2 -/** - * We use a 32-bit mask to keep track of the generic indexes. - */ -#define MAX_GENERIC_VARYING 32 - - -struct svga_fragment_shader; -struct svga_vertex_shader; +struct svga_compile_key; +struct svga_context; struct svga_shader; -struct tgsi_shader_info; -struct tgsi_token; - - -struct svga_vs_compile_key -{ - unsigned fs_generic_inputs; - unsigned need_prescale:1; - unsigned allow_psiz:1; - unsigned adjust_attrib_range:16; - unsigned adjust_attrib_w_1:16; -}; - -struct svga_fs_compile_key -{ - unsigned light_twoside:1; - unsigned front_ccw:1; - unsigned white_fragments:1; - unsigned write_color0_to_n_cbufs:3; - unsigned num_textures:8; - unsigned num_unnormalized_coords:8; - unsigned sprite_origin_lower_left:1; - struct { - unsigned compare_mode:1; - unsigned compare_func:3; - unsigned unnormalized:1; - unsigned width_height_idx:7; - unsigned texture_target:8; - unsigned sprite_texgen:1; - unsigned swizzle_r:3; - unsigned swizzle_g:3; - unsigned swizzle_b:3; - unsigned swizzle_a:3; - } tex[PIPE_MAX_SAMPLERS]; -}; - -/** - * Key/index for identifying shader variants. - */ -struct svga_compile_key { - struct svga_vs_compile_key vkey; - struct svga_fs_compile_key fkey; - int8_t generic_remap_table[MAX_GENERIC_VARYING]; -}; - - -/** - * A single TGSI shader may be compiled into different variants of - * SVGA3D shaders depending on the compile key. Each user shader - * will have a linked list of these variants. - */ -struct svga_shader_variant -{ - const struct svga_shader *shader; - - /** Parameters used to generate this variant */ - struct svga_compile_key key; - - /* Compiled shader tokens: - */ - const unsigned *tokens; - unsigned nr_tokens; - - /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE, - * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY. - */ - unsigned id; - - /* GB object buffer containing the bytecode */ - struct svga_winsys_gb_shader *gb_shader; - - /** Next variant */ - struct svga_shader_variant *next; -}; +struct svga_shader_variant; /* TGSI doesn't provide use with VS input semantics (they're actually @@ -140,37 +62,17 @@ static inline void svga_generate_vdecl_semantics( unsigned idx, -static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key ) -{ - return sizeof *key; -} - -static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key ) -{ - return (const char *)&key->tex[key->num_textures] - (const char *)key; -} - struct svga_shader_variant * -svga_translate_fragment_program( const struct svga_fragment_shader *fs, - const struct svga_fs_compile_key *fkey ); +svga_tgsi_vgpu9_translate(struct svga_context *svga, + const struct svga_shader *shader, + const struct svga_compile_key *key, unsigned unit); struct svga_shader_variant * -svga_translate_vertex_program( const struct svga_vertex_shader *fs, - const struct svga_vs_compile_key *vkey ); - - -unsigned -svga_get_generic_inputs_mask(const struct tgsi_shader_info *info); - -unsigned -svga_get_generic_outputs_mask(const struct tgsi_shader_info *info); - -void -svga_remap_generics(unsigned generics_mask, - int8_t remap_table[MAX_GENERIC_VARYING]); +svga_tgsi_vgpu10_translate(struct svga_context *svga, + const struct svga_shader *shader, + const struct svga_compile_key *key, + unsigned unit); -int -svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], - int generic_index); +boolean svga_shader_verify(const uint32_t *tokens, unsigned nr_tokens); #endif diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index 42d6f489b..ca4009b9e 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -216,7 +216,7 @@ ps30_input(struct svga_shader_emitter *emit, return emit_decl( emit, reg, 0, 0 ); } - else if (emit->key.fkey.light_twoside && + else if (emit->key.fs.light_twoside && (semantic.Name == TGSI_SEMANTIC_COLOR)) { if (!translate_vs_ps_semantic( emit, semantic, &usage, &index )) @@ -285,9 +285,9 @@ ps30_input(struct svga_shader_emitter *emit, return FALSE; if (semantic.Name == TGSI_SEMANTIC_GENERIC && - emit->key.fkey.sprite_origin_lower_left && + emit->key.sprite_origin_lower_left && index >= 1 && - emit->key.fkey.tex[index - 1].sprite_texgen) { + emit->key.tex[index - 1].sprite_texgen) { /* This is a sprite texture coord with lower-left origin. * We need to invert the texture T coordinate since the SVGA3D * device only supports an upper-left origin. @@ -329,7 +329,7 @@ ps30_output(struct svga_shader_emitter *emit, switch (semantic.Name) { case TGSI_SEMANTIC_COLOR: if (emit->unit == PIPE_SHADER_FRAGMENT) { - if (emit->key.fkey.white_fragments) { + if (emit->key.fs.white_fragments) { /* Used for XOR logicop mode */ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); @@ -337,14 +337,14 @@ ps30_output(struct svga_shader_emitter *emit, emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT, semantic.Index); } - else if (emit->key.fkey.write_color0_to_n_cbufs) { + else if (emit->key.fs.write_color0_to_n_cbufs) { /* We'll write color output [0] to all render targets. * Prepare all the output registers here, but only when the * semantic.Index == 0 so we don't do this more than once. */ if (semantic.Index == 0) { unsigned i; - for (i = 0; i < emit->key.fkey.write_color0_to_n_cbufs; i++) { + for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) { emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP, emit->nr_hw_temp++); emit->temp_color_output[i] = emit->output_map[idx+i]; @@ -487,7 +487,7 @@ vs30_output(struct svga_shader_emitter *emit, /* This has the effect of not declaring psiz (below) and not * emitting the final MOV to true_psiz in the postamble. */ - if (!emit->key.vkey.allow_psiz) + if (!emit->key.vs.allow_psiz) return TRUE; emit->true_psiz = dcl.dst; @@ -517,7 +517,7 @@ vs30_output(struct svga_shader_emitter *emit, static ubyte svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx) { - switch (emit->key.fkey.tex[idx].texture_target) { + switch (emit->key.tex[idx].texture_target) { case PIPE_TEXTURE_1D: return SVGA3DSAMP_2D; case PIPE_TEXTURE_2D: diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_emit.h b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_emit.h index 1a1dac235..83f0c8bd4 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -28,6 +28,7 @@ #include "tgsi/tgsi_scan.h" #include "svga_hw_reg.h" +#include "svga_shader.h" #include "svga_tgsi.h" #include "svga3d_shaderdefs.h" @@ -83,6 +84,9 @@ struct svga_shader_emitter int dynamic_branching_level; + unsigned num_output_writes; + boolean constant_color_output; + boolean in_main_func; boolean created_common_immediate; @@ -130,6 +134,8 @@ struct svga_shader_emitter struct svga_arl_consts arl_consts[12]; int num_arl_consts; int current_arl; + + unsigned pstipple_sampler_unit; }; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c index bac956066..489e68f88 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -29,6 +29,7 @@ #include "tgsi/tgsi_parse.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_pstipple.h" #include "svga_tgsi_emit.h" #include "svga_context.h" @@ -98,6 +99,7 @@ translate_dst_register( struct svga_shader_emitter *emit, * Need to lookup a table built at decl time: */ dest = emit->output_map[reg->Register.Index]; + emit->num_output_writes++; break; default: @@ -164,7 +166,7 @@ scalar(struct src_register src, unsigned comp) static boolean svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) { - int i; + unsigned i; for (i = 0; i < emit->num_arl_consts; ++i) { if (emit->arl_consts[i].arl_num == emit->current_arl) @@ -177,7 +179,7 @@ svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) static int svga_arl_adjustment( const struct svga_shader_emitter *emit ) { - int i; + unsigned i; for (i = 0; i < emit->num_arl_consts; ++i) { if (emit->arl_consts[i].arl_num == emit->current_arl) @@ -862,7 +864,7 @@ create_common_immediate( struct svga_shader_emitter *emit ) idx++; /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */ - if (emit->key.vkey.adjust_attrib_range) { + if (emit->key.vs.adjust_attrib_range) { if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 2.0f, 0.0f, 0.0f, 0.0f )) return FALSE; @@ -1015,7 +1017,7 @@ get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) struct src_register reg; /* the width/height indexes start right after constants */ - idx = emit->key.fkey.tex[sampler_num].width_height_idx + + idx = emit->key.tex[sampler_num].width_height_idx + emit->info.file_max[TGSI_FILE_CONSTANT] + 1; reg = src_register( SVGA3DREG_CONST, idx ); @@ -1173,7 +1175,7 @@ emit_div(struct svga_shader_emitter *emit, const struct src_register src1 = translate_src_register(emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); - int i; + unsigned i; /* For each enabled element, perform a RCP instruction. Note that * RCP is scalar in SVGA3D: @@ -1723,7 +1725,7 @@ emit_tex2(struct svga_shader_emitter *emit, texcoord = translate_src_register( emit, &insn->Src[0] ); sampler = translate_src_register( emit, &insn->Src[1] ); - if (emit->key.fkey.tex[sampler.base.num].unnormalized || + if (emit->key.tex[sampler.base.num].unnormalized || emit->dynamic_branching_level > 0) tmp = get_temp( emit ); @@ -1755,7 +1757,7 @@ emit_tex2(struct svga_shader_emitter *emit, /* Explicit normalization of texcoords: */ - if (emit->key.fkey.tex[sampler.base.num].unnormalized) { + if (emit->key.tex[sampler.base.num].unnormalized) { struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); /* MUL tmp, SRC0, WH */ @@ -1820,7 +1822,7 @@ emit_tex_swizzle(struct svga_shader_emitter *emit, const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; unsigned srcSwizzle[4]; unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0; - int i; + unsigned i; /* build writemasks and srcSwizzle terms */ for (i = 0; i < 4; i++) { @@ -1891,14 +1893,14 @@ emit_tex(struct svga_shader_emitter *emit, const unsigned unit = src1.base.num; /* check for shadow samplers */ - boolean compare = (emit->key.fkey.tex[unit].compare_mode == + boolean compare = (emit->key.tex[unit].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE); /* texture swizzle */ - boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED || - emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN || - emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE || - emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA); + boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_RED || + emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN || + emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE || + emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA); boolean saturate = insn->Instruction.Saturate; @@ -1965,7 +1967,7 @@ emit_tex(struct svga_shader_emitter *emit, /* Compare texture sample value against R component of texcoord */ if (!emit_select(emit, - emit->key.fkey.tex[unit].compare_func, + emit->key.tex[unit].compare_func, writemask( dst2, TGSI_WRITEMASK_XYZ ), r_coord, tex_src_x)) @@ -1991,10 +1993,10 @@ emit_tex(struct svga_shader_emitter *emit, /* swizzle from tex_result to dst (handles saturation too, if any) */ emit_tex_swizzle(emit, dst, src(tex_result), - emit->key.fkey.tex[unit].swizzle_r, - emit->key.fkey.tex[unit].swizzle_g, - emit->key.fkey.tex[unit].swizzle_b, - emit->key.fkey.tex[unit].swizzle_a); + emit->key.tex[unit].swizzle_r, + emit->key.tex[unit].swizzle_g, + emit->key.tex[unit].swizzle_b, + emit->key.tex[unit].swizzle_a); } return TRUE; @@ -2102,6 +2104,29 @@ emit_simple_instruction(struct svga_shader_emitter *emit, /** + * TGSI_OPCODE_MOVE is only special-cased here to detect the + * svga_fragment_shader::constant_color_output case. + */ +static boolean +emit_mov(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct tgsi_full_src_register *src = &insn->Src[0]; + const struct tgsi_full_dst_register *dst = &insn->Dst[0]; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == 0 && + src->Register.File == TGSI_FILE_CONSTANT && + !src->Register.Indirect) { + emit->constant_color_output = TRUE; + } + + return emit_simple_instruction(emit, SVGA3DOP_MOV, insn); +} + + +/** * Translate/emit TGSI DDX, DDY instructions. */ static boolean @@ -3044,6 +3069,9 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SSG: return emit_ssg( emit, insn ); + case TGSI_OPCODE_MOV: + return emit_mov( emit, insn ); + default: { unsigned opcode = translate_opcode(insn->Instruction.Opcode); @@ -3113,7 +3141,7 @@ make_immediate(struct svga_shader_emitter *emit, static boolean emit_vs_preamble(struct svga_shader_emitter *emit) { - if (!emit->key.vkey.need_prescale) { + if (!emit->key.vs.need_prescale) { if (!make_immediate( emit, 0, 0, .5, .5, &emit->imm_0055)) return FALSE; @@ -3190,7 +3218,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit) * logicop workaround. */ if (emit->unit == PIPE_SHADER_FRAGMENT && - emit->key.fkey.white_fragments) { + emit->key.fs.white_fragments) { struct src_register one = get_one_immediate(emit); if (!submit_op1( emit, @@ -3200,7 +3228,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit) return FALSE; } else if (emit->unit == PIPE_SHADER_FRAGMENT && - i < emit->key.fkey.write_color0_to_n_cbufs) { + i < emit->key.fs.write_color0_to_n_cbufs) { /* Write temp color output [0] to true output [i] */ if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), emit->true_color_output[i], @@ -3244,7 +3272,7 @@ emit_vs_postamble(struct svga_shader_emitter *emit) /* Need to perform various manipulations on vertex position to cope * with the different GL and D3D clip spaces. */ - if (emit->key.vkey.need_prescale) { + if (emit->key.vs.need_prescale) { SVGA3dShaderDestToken temp_pos = emit->temp_pos; SVGA3dShaderDestToken depth = emit->depth_pos; SVGA3dShaderDestToken pos = emit->true_pos; @@ -3343,7 +3371,7 @@ emit_light_twoside(struct svga_shader_emitter *emit) struct src_register back[2]; SVGA3dShaderDestToken color[2]; int count = emit->internal_color_count; - int i; + unsigned i; SVGA3dShaderInstToken if_token; if (count == 0) @@ -3372,7 +3400,7 @@ emit_light_twoside(struct svga_shader_emitter *emit) if_token = inst_token( SVGA3DOP_IFC ); - if (emit->key.fkey.front_ccw) + if (emit->key.fs.front_ccw) if_token.control = SVGA3DOPCOMP_LT; else if_token.control = SVGA3DOPCOMP_GT; @@ -3423,7 +3451,7 @@ emit_frontface(struct svga_shader_emitter *emit) temp = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); - if (emit->key.fkey.front_ccw) { + if (emit->key.fs.front_ccw) { pass = get_zero_immediate(emit); fail = get_one_immediate(emit); } else { @@ -3494,8 +3522,8 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit) static boolean emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) { - unsigned adjust_mask = (emit->key.vkey.adjust_attrib_range | - emit->key.vkey.adjust_attrib_w_1); + unsigned adjust_mask = (emit->key.vs.adjust_attrib_range | + emit->key.vs.adjust_attrib_w_1); while (adjust_mask) { /* Adjust vertex attrib range and/or set W component = 1 */ @@ -3506,7 +3534,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); emit->nr_hw_temp++; - if (emit->key.vkey.adjust_attrib_range & (1 << index)) { + if (emit->key.vs.adjust_attrib_range & (1 << index)) { /* The vertex input/attribute is supposed to be a signed value in * the range [-1,1] but we actually fetched/converted it to the * range [0,1]. This most likely happens when the app specifies a @@ -3558,7 +3586,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) return FALSE; } - if (emit->key.vkey.adjust_attrib_w_1 & (1 << index)) { + if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) { /* move 1 into W position of tmp */ if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), @@ -3588,10 +3616,10 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) unsigned i; if (emit->unit == PIPE_SHADER_FRAGMENT) { - if (emit->key.fkey.light_twoside) + if (emit->key.fs.light_twoside) return TRUE; - if (emit->key.fkey.white_fragments) + if (emit->key.fs.white_fragments) return TRUE; if (emit->emit_frontface) @@ -3606,16 +3634,16 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) return TRUE; /* look for any PIPE_SWIZZLE_ZERO/ONE terms */ - for (i = 0; i < emit->key.fkey.num_textures; i++) { - if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA) + for (i = 0; i < emit->key.num_textures; i++) { + if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA || + emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA || + emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA || + emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA) return TRUE; } - for (i = 0; i < emit->key.fkey.num_textures; i++) { - if (emit->key.fkey.tex[i].compare_mode + for (i = 0; i < emit->key.num_textures; i++) { + if (emit->key.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) return TRUE; } @@ -3623,8 +3651,8 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) else if (emit->unit == PIPE_SHADER_VERTEX) { if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) return TRUE; - if (emit->key.vkey.adjust_attrib_range || - emit->key.vkey.adjust_attrib_w_1) + if (emit->key.vs.adjust_attrib_range || + emit->key.vs.adjust_attrib_w_1) return TRUE; } @@ -3670,7 +3698,7 @@ static boolean pre_parse_add_indirect( struct svga_shader_emitter *emit, int num, int current_arl) { - int i; + unsigned i; assert(num < 0); for (i = 0; i < emit->num_arl_consts; ++i) { @@ -3772,7 +3800,7 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit) if (!emit_ps_preamble( emit )) return FALSE; - if (emit->key.fkey.light_twoside) { + if (emit->key.fs.light_twoside) { if (!emit_light_twoside( emit )) return FALSE; } @@ -3787,14 +3815,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit) } else { assert(emit->unit == PIPE_SHADER_VERTEX); - if (emit->key.vkey.adjust_attrib_range || - emit->key.vkey.adjust_attrib_w_1) { - if (!emit_adjusted_vertex_attribs(emit)) + if (emit->key.vs.adjust_attrib_range) { + if (!emit_adjusted_vertex_attribs(emit) || + emit->key.vs.adjust_attrib_w_1) { return FALSE; + } } } - return TRUE; } @@ -3808,10 +3836,31 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit, const struct tgsi_token *tokens) { struct tgsi_parse_context parse; + const struct tgsi_token *new_tokens = NULL; boolean ret = TRUE; boolean helpers_emitted = FALSE; unsigned line_nr = 0; + if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) { + unsigned unit; + + new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, + TGSI_FILE_INPUT); + + if (new_tokens) { + /* Setup texture state for stipple */ + emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D; + emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; + emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; + emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; + emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; + + emit->pstipple_sampler_unit = unit; + + tokens = new_tokens; + } + } + tgsi_parse_init( &parse, tokens ); emit->internal_imm_count = 0; @@ -3878,5 +3927,9 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit, done: tgsi_parse_free( &parse ); + if (new_tokens) { + tgsi_free_tokens(new_tokens); + } + return ret; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c new file mode 100644 index 000000000..0c5afeb4c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -0,0 +1,6846 @@ +/********************************************************** + * Copyright 1998-2013 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file svga_tgsi_vgpu10.c + * + * TGSI -> VGPU10 shader translation. + * + * \author Mingcheng Chen + * \author Brian Paul + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_defines.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_two_side.h" +#include "tgsi/tgsi_aa_point.h" +#include "tgsi/tgsi_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_bitmask.h" +#include "util/u_debug.h" +#include "util/u_pstipple.h" + +#include "svga_context.h" +#include "svga_debug.h" +#include "svga_link.h" +#include "svga_shader.h" +#include "svga_tgsi.h" + +#include "VGPU10ShaderTokens.h" + + +#define INVALID_INDEX 99999 +#define MAX_INTERNAL_TEMPS 3 +#define MAX_SYSTEM_VALUES 4 +#define MAX_IMMEDIATE_COUNT \ + (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) +#define MAX_TEMP_ARRAYS 64 /* Enough? */ + + +/** + * Clipping is complicated. There's four different cases which we + * handle during VS/GS shader translation: + */ +enum clipping_mode +{ + CLIP_NONE, /**< No clipping enabled */ + CLIP_LEGACY, /**< The shader has no clipping declarations or code but + * one or more user-defined clip planes are enabled. We + * generate extra code to emit clip distances. + */ + CLIP_DISTANCE, /**< The shader already declares clip distance output + * registers and has code to write to them. + */ + CLIP_VERTEX /**< The shader declares a clip vertex output register and + * has code that writes to the register. We convert the + * clipvertex position into one or more clip distances. + */ +}; + + +struct svga_shader_emitter_v10 +{ + /* The token output buffer */ + unsigned size; + char *buf; + char *ptr; + + /* Information about the shader and state (does not change) */ + struct svga_compile_key key; + struct tgsi_shader_info info; + unsigned unit; + + unsigned inst_start_token; + boolean discard_instruction; /**< throw away current instruction? */ + + union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; + unsigned num_immediates; /**< Number of immediates emitted */ + unsigned common_immediate_pos[8]; /**< literals for common immediates */ + unsigned num_common_immediates; + boolean immediates_emitted; + + unsigned num_outputs; /**< include any extra outputs */ + /** The first extra output is reserved for + * non-adjusted vertex position for + * stream output purpose + */ + + /* Temporary Registers */ + unsigned num_shader_temps; /**< num of temps used by original shader */ + unsigned internal_temp_count; /**< currently allocated internal temps */ + struct { + unsigned start, size; + } temp_arrays[MAX_TEMP_ARRAYS]; + unsigned num_temp_arrays; + + /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ + struct { + unsigned arrayId, index; + } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ + + /** Number of constants used by original shader for each constant buffer. + * The size should probably always match with that of svga_state.constbufs. + */ + unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; + + /* Samplers */ + unsigned num_samplers; + + /* Address regs (really implemented with temps) */ + unsigned num_address_regs; + unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; + + /* Output register usage masks */ + ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; + + /* To map TGSI system value index to VGPU shader input indexes */ + ubyte system_value_indexes[MAX_SYSTEM_VALUES]; + + struct { + /* vertex position scale/translation */ + unsigned out_index; /**< the real position output reg */ + unsigned tmp_index; /**< the fake/temp position output reg */ + unsigned so_index; /**< the non-adjusted position output reg */ + unsigned prescale_scale_index, prescale_trans_index; + boolean need_prescale; + } vposition; + + /* For vertex shaders only */ + struct { + /* viewport constant */ + unsigned viewport_index; + + /* temp index of adjusted vertex attributes */ + unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; + } vs; + + /* For fragment shaders only */ + struct { + /* apha test */ + unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ + unsigned color_tmp_index; /**< fake/temp color output reg */ + unsigned alpha_ref_index; /**< immediate constant for alpha ref */ + + /* front-face */ + unsigned face_input_index; /**< real fragment shader face reg (bool) */ + unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ + + unsigned pstipple_sampler_unit; + + unsigned fragcoord_input_index; /**< real fragment position input reg */ + unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ + } fs; + + /* For geometry shaders only */ + struct { + VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ + VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ + unsigned input_size; /**< size of input arrays */ + unsigned prim_id_index; /**< primitive id register index */ + unsigned max_out_vertices; /**< maximum number of output vertices */ + } gs; + + /* For vertex or geometry shaders */ + enum clipping_mode clip_mode; + unsigned clip_dist_out_index; /**< clip distance output register index */ + unsigned clip_dist_tmp_index; /**< clip distance temporary register */ + unsigned clip_dist_so_index; /**< clip distance shadow copy */ + + /** Index of temporary holding the clipvertex coordinate */ + unsigned clip_vertex_out_index; /**< clip vertex output register index */ + unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ + + /* user clip plane constant slot indexes */ + unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; + + unsigned num_output_writes; + boolean constant_color_output; + + boolean uses_flat_interp; + + /* For all shaders: const reg index for RECT coord scaling */ + unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; + + /* For all shaders: const reg index for texture buffer size */ + unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; + + /* VS/GS/FS Linkage info */ + struct shader_linkage linkage; + + bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ +}; + + +static boolean +emit_post_helpers(struct svga_shader_emitter_v10 *emit); + +static boolean +emit_vertex(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst); + +static char err_buf[128]; + +static boolean +expand(struct svga_shader_emitter_v10 *emit) +{ + char *new_buf; + unsigned newsize = emit->size * 2; + + if (emit->buf != err_buf) + new_buf = REALLOC(emit->buf, emit->size, newsize); + else + new_buf = NULL; + + if (!new_buf) { + emit->ptr = err_buf; + emit->buf = err_buf; + emit->size = sizeof(err_buf); + return FALSE; + } + + emit->size = newsize; + emit->ptr = new_buf + (emit->ptr - emit->buf); + emit->buf = new_buf; + return TRUE; +} + +/** + * Create and initialize a new svga_shader_emitter_v10 object. + */ +static struct svga_shader_emitter_v10 * +alloc_emitter(void) +{ + struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); + + if (!emit) + return NULL; + + /* to initialize the output buffer */ + emit->size = 512; + if (!expand(emit)) { + FREE(emit); + return NULL; + } + return emit; +} + +/** + * Free an svga_shader_emitter_v10 object. + */ +static void +free_emitter(struct svga_shader_emitter_v10 *emit) +{ + assert(emit); + FREE(emit->buf); /* will be NULL if translation succeeded */ + FREE(emit); +} + +static inline boolean +reserve(struct svga_shader_emitter_v10 *emit, + unsigned nr_dwords) +{ + while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { + if (!expand(emit)) + return FALSE; + } + + return TRUE; +} + +static boolean +emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) +{ + if (!reserve(emit, 1)) + return FALSE; + + *(uint32 *)emit->ptr = dword; + emit->ptr += sizeof dword; + return TRUE; +} + +static boolean +emit_dwords(struct svga_shader_emitter_v10 *emit, + const uint32 *dwords, + unsigned nr) +{ + if (!reserve(emit, nr)) + return FALSE; + + memcpy(emit->ptr, dwords, nr * sizeof *dwords); + emit->ptr += nr * sizeof *dwords; + return TRUE; +} + +/** Return the number of tokens in the emitter's buffer */ +static unsigned +emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) +{ + return (emit->ptr - emit->buf) / sizeof(unsigned); +} + + +/** + * Check for register overflow. If we overflow we'll set an + * error flag. This function can be called for register declarations + * or use as src/dst instruction operands. + * \param type register type. One of VGPU10_OPERAND_TYPE_x + or VGPU10_OPCODE_DCL_x + * \param index the register index + */ +static void +check_register_index(struct svga_shader_emitter_v10 *emit, + unsigned operandType, unsigned index) +{ + bool overflow_before = emit->register_overflow; + + switch (operandType) { + case VGPU10_OPERAND_TYPE_TEMP: + case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: + case VGPU10_OPCODE_DCL_TEMPS: + if (index >= VGPU10_MAX_TEMPS) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: + case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: + if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_INPUT: + case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: + case VGPU10_OPCODE_DCL_INPUT: + case VGPU10_OPCODE_DCL_INPUT_SGV: + case VGPU10_OPCODE_DCL_INPUT_SIV: + case VGPU10_OPCODE_DCL_INPUT_PS: + case VGPU10_OPCODE_DCL_INPUT_PS_SGV: + case VGPU10_OPCODE_DCL_INPUT_PS_SIV: + if ((emit->unit == PIPE_SHADER_VERTEX && + index >= VGPU10_MAX_VS_INPUTS) || + (emit->unit == PIPE_SHADER_GEOMETRY && + index >= VGPU10_MAX_GS_INPUTS) || + (emit->unit == PIPE_SHADER_FRAGMENT && + index >= VGPU10_MAX_FS_INPUTS)) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_OUTPUT: + case VGPU10_OPCODE_DCL_OUTPUT: + case VGPU10_OPCODE_DCL_OUTPUT_SGV: + case VGPU10_OPCODE_DCL_OUTPUT_SIV: + if ((emit->unit == PIPE_SHADER_VERTEX && + index >= VGPU10_MAX_VS_OUTPUTS) || + (emit->unit == PIPE_SHADER_GEOMETRY && + index >= VGPU10_MAX_GS_OUTPUTS) || + (emit->unit == PIPE_SHADER_FRAGMENT && + index >= VGPU10_MAX_FS_OUTPUTS)) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_SAMPLER: + case VGPU10_OPCODE_DCL_SAMPLER: + if (index >= VGPU10_MAX_SAMPLERS) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_RESOURCE: + case VGPU10_OPCODE_DCL_RESOURCE: + if (index >= VGPU10_MAX_RESOURCES) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: + if (index >= MAX_IMMEDIATE_COUNT) { + emit->register_overflow = TRUE; + } + break; + default: + assert(0); + ; /* nothing */ + } + + if (emit->register_overflow && !overflow_before) { + debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", + operandType, index); + } +} + + +/** + * Examine misc state to determine the clipping mode. + */ +static void +determine_clipping_mode(struct svga_shader_emitter_v10 *emit) +{ + if (emit->info.num_written_clipdistance > 0) { + emit->clip_mode = CLIP_DISTANCE; + } + else if (emit->info.writes_clipvertex) { + emit->clip_mode = CLIP_VERTEX; + } + else if (emit->key.clip_plane_enable) { + emit->clip_mode = CLIP_LEGACY; + } + else { + emit->clip_mode = CLIP_NONE; + } +} + + +/** + * For clip distance register declarations and clip distance register + * writes we need to mask the declaration usage or instruction writemask + * (respectively) against the set of the really-enabled clipping planes. + * + * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables + * has a VS that writes to all 8 clip distance registers, but the plane enable + * flags are a subset of that. + * + * This function is used to apply the plane enable flags to the register + * declaration or instruction writemask. + * + * \param writemask the declaration usage mask or instruction writemask + * \param clip_reg_index which clip plane register is being declared/written. + * The legal values are 0 and 1 (two clip planes per + * register, for a total of 8 clip planes) + */ +static unsigned +apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, + unsigned writemask, unsigned clip_reg_index) +{ + unsigned shift; + + assert(clip_reg_index < 2); + + /* four clip planes per clip register: */ + shift = clip_reg_index * 4; + writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); + + return writemask; +} + + +/** + * Translate gallium shader type into VGPU10 type. + */ +static VGPU10_PROGRAM_TYPE +translate_shader_type(unsigned type) +{ + switch (type) { + case PIPE_SHADER_VERTEX: + return VGPU10_VERTEX_SHADER; + case PIPE_SHADER_GEOMETRY: + return VGPU10_GEOMETRY_SHADER; + case PIPE_SHADER_FRAGMENT: + return VGPU10_PIXEL_SHADER; + default: + assert(!"Unexpected shader type"); + return VGPU10_VERTEX_SHADER; + } +} + + +/** + * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x + * Note: we only need to translate the opcodes for "simple" instructions, + * as seen below. All other opcodes are handled/translated specially. + */ +static VGPU10_OPCODE_TYPE +translate_opcode(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_MOV: + return VGPU10_OPCODE_MOV; + case TGSI_OPCODE_MUL: + return VGPU10_OPCODE_MUL; + case TGSI_OPCODE_ADD: + return VGPU10_OPCODE_ADD; + case TGSI_OPCODE_DP3: + return VGPU10_OPCODE_DP3; + case TGSI_OPCODE_DP4: + return VGPU10_OPCODE_DP4; + case TGSI_OPCODE_MIN: + return VGPU10_OPCODE_MIN; + case TGSI_OPCODE_MAX: + return VGPU10_OPCODE_MAX; + case TGSI_OPCODE_MAD: + return VGPU10_OPCODE_MAD; + case TGSI_OPCODE_SQRT: + return VGPU10_OPCODE_SQRT; + case TGSI_OPCODE_FRC: + return VGPU10_OPCODE_FRC; + case TGSI_OPCODE_FLR: + return VGPU10_OPCODE_ROUND_NI; + case TGSI_OPCODE_FSEQ: + return VGPU10_OPCODE_EQ; + case TGSI_OPCODE_FSGE: + return VGPU10_OPCODE_GE; + case TGSI_OPCODE_FSNE: + return VGPU10_OPCODE_NE; + case TGSI_OPCODE_DDX: + return VGPU10_OPCODE_DERIV_RTX; + case TGSI_OPCODE_DDY: + return VGPU10_OPCODE_DERIV_RTY; + case TGSI_OPCODE_RET: + return VGPU10_OPCODE_RET; + case TGSI_OPCODE_DIV: + return VGPU10_OPCODE_DIV; + case TGSI_OPCODE_IDIV: + return VGPU10_OPCODE_IDIV; + case TGSI_OPCODE_DP2: + return VGPU10_OPCODE_DP2; + case TGSI_OPCODE_BRK: + return VGPU10_OPCODE_BREAK; + case TGSI_OPCODE_IF: + return VGPU10_OPCODE_IF; + case TGSI_OPCODE_ELSE: + return VGPU10_OPCODE_ELSE; + case TGSI_OPCODE_ENDIF: + return VGPU10_OPCODE_ENDIF; + case TGSI_OPCODE_CEIL: + return VGPU10_OPCODE_ROUND_PI; + case TGSI_OPCODE_I2F: + return VGPU10_OPCODE_ITOF; + case TGSI_OPCODE_NOT: + return VGPU10_OPCODE_NOT; + case TGSI_OPCODE_TRUNC: + return VGPU10_OPCODE_ROUND_Z; + case TGSI_OPCODE_SHL: + return VGPU10_OPCODE_ISHL; + case TGSI_OPCODE_AND: + return VGPU10_OPCODE_AND; + case TGSI_OPCODE_OR: + return VGPU10_OPCODE_OR; + case TGSI_OPCODE_XOR: + return VGPU10_OPCODE_XOR; + case TGSI_OPCODE_CONT: + return VGPU10_OPCODE_CONTINUE; + case TGSI_OPCODE_EMIT: + return VGPU10_OPCODE_EMIT; + case TGSI_OPCODE_ENDPRIM: + return VGPU10_OPCODE_CUT; + case TGSI_OPCODE_BGNLOOP: + return VGPU10_OPCODE_LOOP; + case TGSI_OPCODE_ENDLOOP: + return VGPU10_OPCODE_ENDLOOP; + case TGSI_OPCODE_ENDSUB: + return VGPU10_OPCODE_RET; + case TGSI_OPCODE_NOP: + return VGPU10_OPCODE_NOP; + case TGSI_OPCODE_BREAKC: + return VGPU10_OPCODE_BREAKC; + case TGSI_OPCODE_END: + return VGPU10_OPCODE_RET; + case TGSI_OPCODE_F2I: + return VGPU10_OPCODE_FTOI; + case TGSI_OPCODE_IMAX: + return VGPU10_OPCODE_IMAX; + case TGSI_OPCODE_IMIN: + return VGPU10_OPCODE_IMIN; + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_MOD: + return VGPU10_OPCODE_UDIV; + case TGSI_OPCODE_IMUL_HI: + return VGPU10_OPCODE_IMUL; + case TGSI_OPCODE_INEG: + return VGPU10_OPCODE_INEG; + case TGSI_OPCODE_ISHR: + return VGPU10_OPCODE_ISHR; + case TGSI_OPCODE_ISGE: + return VGPU10_OPCODE_IGE; + case TGSI_OPCODE_ISLT: + return VGPU10_OPCODE_ILT; + case TGSI_OPCODE_F2U: + return VGPU10_OPCODE_FTOU; + case TGSI_OPCODE_UADD: + return VGPU10_OPCODE_IADD; + case TGSI_OPCODE_U2F: + return VGPU10_OPCODE_UTOF; + case TGSI_OPCODE_UCMP: + return VGPU10_OPCODE_MOVC; + case TGSI_OPCODE_UMAD: + return VGPU10_OPCODE_UMAD; + case TGSI_OPCODE_UMAX: + return VGPU10_OPCODE_UMAX; + case TGSI_OPCODE_UMIN: + return VGPU10_OPCODE_UMIN; + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMUL_HI: + return VGPU10_OPCODE_UMUL; + case TGSI_OPCODE_USEQ: + return VGPU10_OPCODE_IEQ; + case TGSI_OPCODE_USGE: + return VGPU10_OPCODE_UGE; + case TGSI_OPCODE_USHR: + return VGPU10_OPCODE_USHR; + case TGSI_OPCODE_USLT: + return VGPU10_OPCODE_ULT; + case TGSI_OPCODE_USNE: + return VGPU10_OPCODE_INE; + case TGSI_OPCODE_SWITCH: + return VGPU10_OPCODE_SWITCH; + case TGSI_OPCODE_CASE: + return VGPU10_OPCODE_CASE; + case TGSI_OPCODE_DEFAULT: + return VGPU10_OPCODE_DEFAULT; + case TGSI_OPCODE_ENDSWITCH: + return VGPU10_OPCODE_ENDSWITCH; + case TGSI_OPCODE_FSLT: + return VGPU10_OPCODE_LT; + case TGSI_OPCODE_ROUND: + return VGPU10_OPCODE_ROUND_NE; + default: + assert(!"Unexpected TGSI opcode in translate_opcode()"); + return VGPU10_OPCODE_NOP; + } +} + + +/** + * Translate a TGSI register file type into a VGPU10 operand type. + * \param array is the TGSI_FILE_TEMPORARY register an array? + */ +static VGPU10_OPERAND_TYPE +translate_register_file(enum tgsi_file_type file, boolean array) +{ + switch (file) { + case TGSI_FILE_CONSTANT: + return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; + case TGSI_FILE_INPUT: + return VGPU10_OPERAND_TYPE_INPUT; + case TGSI_FILE_OUTPUT: + return VGPU10_OPERAND_TYPE_OUTPUT; + case TGSI_FILE_TEMPORARY: + return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP + : VGPU10_OPERAND_TYPE_TEMP; + case TGSI_FILE_IMMEDIATE: + /* all immediates are 32-bit values at this time so + * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. + */ + return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; + case TGSI_FILE_SAMPLER: + return VGPU10_OPERAND_TYPE_SAMPLER; + case TGSI_FILE_SYSTEM_VALUE: + return VGPU10_OPERAND_TYPE_INPUT; + + /* XXX TODO more cases to finish */ + + default: + assert(!"Bad tgsi register file!"); + return VGPU10_OPERAND_TYPE_NULL; + } +} + + +/** + * Emit a null dst register + */ +static void +emit_null_dst_register(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OperandToken0 operand; + + operand.value = 0; + operand.operandType = VGPU10_OPERAND_TYPE_NULL; + operand.numComponents = VGPU10_OPERAND_0_COMPONENT; + + emit_dword(emit, operand.value); +} + + +/** + * If the given register is a temporary, return the array ID. + * Else return zero. + */ +static unsigned +get_temp_array_id(const struct svga_shader_emitter_v10 *emit, + unsigned file, unsigned index) +{ + if (file == TGSI_FILE_TEMPORARY) { + return emit->temp_map[index].arrayId; + } + else { + return 0; + } +} + + +/** + * If the given register is a temporary, convert the index from a TGSI + * TEMPORARY index to a VGPU10 temp index. + */ +static unsigned +remap_temp_index(const struct svga_shader_emitter_v10 *emit, + unsigned file, unsigned index) +{ + if (file == TGSI_FILE_TEMPORARY) { + return emit->temp_map[index].index; + } + else { + return index; + } +} + + +/** + * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). + * Note: the operandType field must already be initialized. + */ +static VGPU10OperandToken0 +setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, + VGPU10OperandToken0 operand0, + unsigned file, + boolean indirect, boolean index2D, + unsigned tempArrayID) +{ + unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D; + + /* + * Compute index dimensions + */ + if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || + operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { + /* there's no swizzle for in-line immediates */ + indexDim = VGPU10_OPERAND_INDEX_0D; + assert(operand0.selectionMode == 0); + } + else { + if (index2D || + tempArrayID > 0 || + operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { + indexDim = VGPU10_OPERAND_INDEX_2D; + } + else { + indexDim = VGPU10_OPERAND_INDEX_1D; + } + } + + /* + * Compute index representations (immediate, relative, etc). + */ + if (tempArrayID > 0) { + assert(file == TGSI_FILE_TEMPORARY); + /* First index is the array ID, second index is the array element */ + index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + if (indirect) { + index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + } + else { + index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + } + } + else if (indirect) { + if (file == TGSI_FILE_CONSTANT) { + /* index[0] indicates which constant buffer while index[1] indicates + * the position in the constant buffer. + */ + index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + } + else { + /* All other register files are 1-dimensional */ + index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + } + } + else { + index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + } + + operand0.indexDimension = indexDim; + operand0.index0Representation = index0Rep; + operand0.index1Representation = index1Rep; + + return operand0; +} + + +/** + * Emit the operand for expressing an address register for indirect indexing. + * Note that the address register is really just a temp register. + * \param addr_reg_index which address register to use + */ +static void +emit_indirect_register(struct svga_shader_emitter_v10 *emit, + unsigned addr_reg_index) +{ + unsigned tmp_reg_index; + VGPU10OperandToken0 operand0; + + assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); + + tmp_reg_index = emit->address_reg_index[addr_reg_index]; + + /* operand0 is a simple temporary register, selecting one component */ + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; + operand0.swizzleX = 0; + operand0.swizzleY = 1; + operand0.swizzleZ = 2; + operand0.swizzleW = 3; + + emit_dword(emit, operand0.value); + emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); +} + + +/** + * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. + * \param emit the emitter context + * \param reg the TGSI dst register to translate + */ +static void +emit_dst_register(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *reg) +{ + unsigned file = reg->Register.File; + unsigned index = reg->Register.Index; + const unsigned sem_name = emit->info.output_semantic_name[index]; + const unsigned sem_index = emit->info.output_semantic_index[index]; + unsigned writemask = reg->Register.WriteMask; + const unsigned indirect = reg->Register.Indirect; + const unsigned tempArrayId = get_temp_array_id(emit, file, index); + const unsigned index2d = reg->Register.Dimension; + VGPU10OperandToken0 operand0; + + if (file == TGSI_FILE_OUTPUT) { + if (emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY) { + if (index == emit->vposition.out_index && + emit->vposition.tmp_index != INVALID_INDEX) { + /* replace OUTPUT[POS] with TEMP[POS]. We need to store the + * vertex position result in a temporary so that we can modify + * it in the post_helper() code. + */ + file = TGSI_FILE_TEMPORARY; + index = emit->vposition.tmp_index; + } + else if (sem_name == TGSI_SEMANTIC_CLIPDIST && + emit->clip_dist_tmp_index != INVALID_INDEX) { + /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. + * We store the clip distance in a temporary first, then + * we'll copy it to the shadow copy and to CLIPDIST with the + * enabled planes mask in emit_clip_distance_instructions(). + */ + file = TGSI_FILE_TEMPORARY; + index = emit->clip_dist_tmp_index + sem_index; + } + else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && + emit->clip_vertex_tmp_index != INVALID_INDEX) { + /* replace the CLIPVERTEX output register with a temporary */ + assert(emit->clip_mode == CLIP_VERTEX); + assert(sem_index == 0); + file = TGSI_FILE_TEMPORARY; + index = emit->clip_vertex_tmp_index; + } + } + else if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (sem_name == TGSI_SEMANTIC_POSITION) { + /* Fragment depth output register */ + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + emit_dword(emit, operand0.value); + return; + } + else if (index == emit->fs.color_out_index[0] && + emit->fs.color_tmp_index != INVALID_INDEX) { + /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the + * fragment color result in a temporary so that we can read it + * it in the post_helper() code. + */ + file = TGSI_FILE_TEMPORARY; + index = emit->fs.color_tmp_index; + } + else { + /* Typically, for fragment shaders, the output register index + * matches the color semantic index. But not when we write to + * the fragment depth register. In that case, OUT[0] will be + * fragdepth and OUT[1] will be the 0th color output. We need + * to use the semantic index for color outputs. + */ + assert(sem_name == TGSI_SEMANTIC_COLOR); + index = emit->info.output_semantic_index[index]; + + emit->num_output_writes++; + } + } + } + + /* init operand tokens to all zero */ + operand0.value = 0; + + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + /* the operand has a writemask */ + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + + /* Which of the four dest components to write to. Note that we can use a + * simple assignment here since TGSI writemasks match VGPU10 writemasks. + */ + STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); + operand0.mask = writemask; + + /* translate TGSI register file type to VGPU10 operand type */ + operand0.operandType = translate_register_file(file, tempArrayId > 0); + + check_register_index(emit, operand0.operandType, index); + + operand0 = setup_operand0_indexing(emit, operand0, file, indirect, + index2d, tempArrayId); + + /* Emit tokens */ + emit_dword(emit, operand0.value); + if (tempArrayId > 0) { + emit_dword(emit, tempArrayId); + } + + emit_dword(emit, remap_temp_index(emit, file, index)); + + if (indirect) { + emit_indirect_register(emit, reg->Indirect.Index); + } +} + + +/** + * Translate a src register of a TGSI instruction and emit VGPU10 tokens. + */ +static void +emit_src_register(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_src_register *reg) +{ + unsigned file = reg->Register.File; + unsigned index = reg->Register.Index; + const unsigned indirect = reg->Register.Indirect; + const unsigned tempArrayId = get_temp_array_id(emit, file, index); + const unsigned index2d = reg->Register.Dimension; + const unsigned swizzleX = reg->Register.SwizzleX; + const unsigned swizzleY = reg->Register.SwizzleY; + const unsigned swizzleZ = reg->Register.SwizzleZ; + const unsigned swizzleW = reg->Register.SwizzleW; + const unsigned absolute = reg->Register.Absolute; + const unsigned negate = reg->Register.Negate; + bool is_prim_id = FALSE; + + VGPU10OperandToken0 operand0; + VGPU10OperandToken1 operand1; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + file == TGSI_FILE_INPUT) { + if (index == emit->fs.face_input_index) { + /* Replace INPUT[FACE] with TEMP[FACE] */ + file = TGSI_FILE_TEMPORARY; + index = emit->fs.face_tmp_index; + } + else if (index == emit->fs.fragcoord_input_index) { + /* Replace INPUT[POSITION] with TEMP[POSITION] */ + file = TGSI_FILE_TEMPORARY; + index = emit->fs.fragcoord_tmp_index; + } + else { + /* We remap fragment shader inputs to that FS input indexes + * match up with VS/GS output indexes. + */ + index = emit->linkage.input_map[index]; + } + } + else if (emit->unit == PIPE_SHADER_GEOMETRY && + file == TGSI_FILE_INPUT) { + is_prim_id = (index == emit->gs.prim_id_index); + index = emit->linkage.input_map[index]; + } + else if (emit->unit == PIPE_SHADER_VERTEX) { + if (file == TGSI_FILE_INPUT) { + /* if input is adjusted... */ + if ((emit->key.vs.adjust_attrib_w_1 | + emit->key.vs.adjust_attrib_itof | + emit->key.vs.adjust_attrib_utof | + emit->key.vs.attrib_is_bgra | + emit->key.vs.attrib_puint_to_snorm | + emit->key.vs.attrib_puint_to_uscaled | + emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { + file = TGSI_FILE_TEMPORARY; + index = emit->vs.adjusted_input[index]; + } + } + else if (file == TGSI_FILE_SYSTEM_VALUE) { + assert(index < Elements(emit->system_value_indexes)); + index = emit->system_value_indexes[index]; + } + } + + operand0.value = operand1.value = 0; + + if (is_prim_id) { + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + } + else { + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = translate_register_file(file, tempArrayId > 0); + } + + operand0 = setup_operand0_indexing(emit, operand0, file, indirect, + index2d, tempArrayId); + + if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && + operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { + /* there's no swizzle for in-line immediates */ + if (swizzleX == swizzleY && + swizzleX == swizzleZ && + swizzleX == swizzleW) { + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; + } + else { + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + } + + operand0.swizzleX = swizzleX; + operand0.swizzleY = swizzleY; + operand0.swizzleZ = swizzleZ; + operand0.swizzleW = swizzleW; + + if (absolute || negate) { + operand0.extended = 1; + operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; + if (absolute && !negate) + operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; + if (!absolute && negate) + operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; + if (absolute && negate) + operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; + } + } + + /* Emit the operand tokens */ + emit_dword(emit, operand0.value); + if (operand0.extended) + emit_dword(emit, operand1.value); + + if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { + /* Emit the four float/int in-line immediate values */ + unsigned *c; + assert(index < Elements(emit->immediates)); + assert(file == TGSI_FILE_IMMEDIATE); + assert(swizzleX < 4); + assert(swizzleY < 4); + assert(swizzleZ < 4); + assert(swizzleW < 4); + c = (unsigned *) emit->immediates[index]; + emit_dword(emit, c[swizzleX]); + emit_dword(emit, c[swizzleY]); + emit_dword(emit, c[swizzleZ]); + emit_dword(emit, c[swizzleW]); + } + else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { + /* Emit the register index(es) */ + if (index2d || + operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { + emit_dword(emit, reg->Dimension.Index); + } + + if (tempArrayId > 0) { + emit_dword(emit, tempArrayId); + } + + emit_dword(emit, remap_temp_index(emit, file, index)); + + if (indirect) { + emit_indirect_register(emit, reg->Indirect.Index); + } + } +} + + +/** + * Emit a resource operand (for use with a SAMPLE instruction). + */ +static void +emit_resource_register(struct svga_shader_emitter_v10 *emit, + unsigned resource_number) +{ + VGPU10OperandToken0 operand0; + + check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); + + /* init */ + operand0.value = 0; + + operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = VGPU10_COMPONENT_X; + operand0.swizzleY = VGPU10_COMPONENT_Y; + operand0.swizzleZ = VGPU10_COMPONENT_Z; + operand0.swizzleW = VGPU10_COMPONENT_W; + + emit_dword(emit, operand0.value); + emit_dword(emit, resource_number); +} + + +/** + * Emit a sampler operand (for use with a SAMPLE instruction). + */ +static void +emit_sampler_register(struct svga_shader_emitter_v10 *emit, + unsigned sampler_number) +{ + VGPU10OperandToken0 operand0; + + check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); + + /* init */ + operand0.value = 0; + + operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + + emit_dword(emit, operand0.value); + emit_dword(emit, sampler_number); +} + + +/** + * Emit an operand which reads the IS_FRONT_FACING register. + */ +static void +emit_face_register(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OperandToken0 operand0; + unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; + + /* init */ + operand0.value = 0; + + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + operand0.swizzleX = VGPU10_COMPONENT_X; + operand0.swizzleY = VGPU10_COMPONENT_X; + operand0.swizzleZ = VGPU10_COMPONENT_X; + operand0.swizzleW = VGPU10_COMPONENT_X; + + emit_dword(emit, operand0.value); + emit_dword(emit, index); +} + + +/** + * Emit the token for a VGPU10 opcode. + * \param saturate clamp result to [0,1]? + */ +static void +emit_opcode(struct svga_shader_emitter_v10 *emit, + unsigned vgpu10_opcode, boolean saturate) +{ + VGPU10OpcodeToken0 token0; + + token0.value = 0; /* init all fields to zero */ + token0.opcodeType = vgpu10_opcode; + token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ + token0.saturate = saturate; + + emit_dword(emit, token0.value); +} + + +/** + * Emit the token for a VGPU10 resinfo instruction. + * \param modifier return type modifier, _uint or _rcpFloat. + * TODO: We may want to remove this parameter if it will + * only ever be used as _uint. + */ +static void +emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, + VGPU10_RESINFO_RETURN_TYPE modifier) +{ + VGPU10OpcodeToken0 token0; + + token0.value = 0; /* init all fields to zero */ + token0.opcodeType = VGPU10_OPCODE_RESINFO; + token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ + token0.resinfoReturnType = modifier; + + emit_dword(emit, token0.value); +} + + +/** + * Emit opcode tokens for a texture sample instruction. Texture instructions + * can be rather complicated (texel offsets, etc) so we have this specialized + * function. + */ +static void +emit_sample_opcode(struct svga_shader_emitter_v10 *emit, + unsigned vgpu10_opcode, boolean saturate, + const int offsets[3]) +{ + VGPU10OpcodeToken0 token0; + VGPU10OpcodeToken1 token1; + + token0.value = 0; /* init all fields to zero */ + token0.opcodeType = vgpu10_opcode; + token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ + token0.saturate = saturate; + + if (offsets[0] || offsets[1] || offsets[2]) { + assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); + assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); + assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); + assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); + assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); + assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); + + token0.extended = 1; + token1.value = 0; + token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; + token1.offsetU = offsets[0]; + token1.offsetV = offsets[1]; + token1.offsetW = offsets[2]; + } + + emit_dword(emit, token0.value); + if (token0.extended) { + emit_dword(emit, token1.value); + } +} + + +/** + * Emit a DISCARD opcode token. + * If nonzero is set, we'll discard the fragment if the X component is not 0. + * Otherwise, we'll discard the fragment if the X component is 0. + */ +static void +emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) +{ + VGPU10OpcodeToken0 opcode0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DISCARD; + if (nonzero) + opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; + + emit_dword(emit, opcode0.value); +} + + +/** + * We need to call this before we begin emitting a VGPU10 instruction. + */ +static void +begin_emit_instruction(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->inst_start_token == 0); + /* Save location of the instruction's VGPU10OpcodeToken0 token. + * Note, we can't save a pointer because it would become invalid if + * we have to realloc the output buffer. + */ + emit->inst_start_token = emit_get_num_tokens(emit); +} + + +/** + * We need to call this after we emit the last token of a VGPU10 instruction. + * This function patches in the opcode token's instructionLength field. + */ +static void +end_emit_instruction(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; + unsigned inst_length; + + assert(emit->inst_start_token > 0); + + if (emit->discard_instruction) { + /* Back up the emit->ptr to where this instruction started so + * that we discard the current instruction. + */ + emit->ptr = (char *) (tokens + emit->inst_start_token); + } + else { + /* Compute instruction length and patch that into the start of + * the instruction. + */ + inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; + + assert(inst_length > 0); + + tokens[emit->inst_start_token].instructionLength = inst_length; + } + + emit->inst_start_token = 0; /* reset to zero for error checking */ + emit->discard_instruction = FALSE; +} + + +/** + * Return index for a free temporary register. + */ +static unsigned +get_temp_index(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); + return emit->num_shader_temps + emit->internal_temp_count++; +} + + +/** + * Release the temporaries which were generated by get_temp_index(). + */ +static void +free_temp_indexes(struct svga_shader_emitter_v10 *emit) +{ + emit->internal_temp_count = 0; +} + + +/** + * Create a tgsi_full_src_register. + */ +static struct tgsi_full_src_register +make_src_reg(unsigned file, unsigned index) +{ + struct tgsi_full_src_register reg; + + memset(®, 0, sizeof(reg)); + reg.Register.File = file; + reg.Register.Index = index; + reg.Register.SwizzleX = TGSI_SWIZZLE_X; + reg.Register.SwizzleY = TGSI_SWIZZLE_Y; + reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; + reg.Register.SwizzleW = TGSI_SWIZZLE_W; + return reg; +} + + +/** + * Create a tgsi_full_src_register for a temporary. + */ +static struct tgsi_full_src_register +make_src_temp_reg(unsigned index) +{ + return make_src_reg(TGSI_FILE_TEMPORARY, index); +} + + +/** + * Create a tgsi_full_src_register for a constant. + */ +static struct tgsi_full_src_register +make_src_const_reg(unsigned index) +{ + return make_src_reg(TGSI_FILE_CONSTANT, index); +} + + +/** + * Create a tgsi_full_src_register for an immediate constant. + */ +static struct tgsi_full_src_register +make_src_immediate_reg(unsigned index) +{ + return make_src_reg(TGSI_FILE_IMMEDIATE, index); +} + + +/** + * Create a tgsi_full_dst_register. + */ +static struct tgsi_full_dst_register +make_dst_reg(unsigned file, unsigned index) +{ + struct tgsi_full_dst_register reg; + + memset(®, 0, sizeof(reg)); + reg.Register.File = file; + reg.Register.Index = index; + reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; + return reg; +} + + +/** + * Create a tgsi_full_dst_register for a temporary. + */ +static struct tgsi_full_dst_register +make_dst_temp_reg(unsigned index) +{ + return make_dst_reg(TGSI_FILE_TEMPORARY, index); +} + + +/** + * Create a tgsi_full_dst_register for an output. + */ +static struct tgsi_full_dst_register +make_dst_output_reg(unsigned index) +{ + return make_dst_reg(TGSI_FILE_OUTPUT, index); +} + + +/** + * Create negated tgsi_full_src_register. + */ +static struct tgsi_full_src_register +negate_src(const struct tgsi_full_src_register *reg) +{ + struct tgsi_full_src_register neg = *reg; + neg.Register.Negate = !reg->Register.Negate; + return neg; +} + +/** + * Create absolute value of a tgsi_full_src_register. + */ +static struct tgsi_full_src_register +absolute_src(const struct tgsi_full_src_register *reg) +{ + struct tgsi_full_src_register absolute = *reg; + absolute.Register.Absolute = 1; + return absolute; +} + + +/** Return the named swizzle term from the src register */ +static inline unsigned +get_swizzle(const struct tgsi_full_src_register *reg, unsigned term) +{ + switch (term) { + case TGSI_SWIZZLE_X: + return reg->Register.SwizzleX; + case TGSI_SWIZZLE_Y: + return reg->Register.SwizzleY; + case TGSI_SWIZZLE_Z: + return reg->Register.SwizzleZ; + case TGSI_SWIZZLE_W: + return reg->Register.SwizzleW; + default: + assert(!"Bad swizzle"); + return TGSI_SWIZZLE_X; + } +} + + +/** + * Create swizzled tgsi_full_src_register. + */ +static struct tgsi_full_src_register +swizzle_src(const struct tgsi_full_src_register *reg, + unsigned swizzleX, unsigned swizzleY, + unsigned swizzleZ, unsigned swizzleW) +{ + struct tgsi_full_src_register swizzled = *reg; + /* Note: we swizzle the current swizzle */ + swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); + swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); + swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); + swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); + return swizzled; +} + + +/** + * Create swizzled tgsi_full_src_register where all the swizzle + * terms are the same. + */ +static struct tgsi_full_src_register +scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle) +{ + struct tgsi_full_src_register swizzled = *reg; + /* Note: we swizzle the current swizzle */ + swizzled.Register.SwizzleX = + swizzled.Register.SwizzleY = + swizzled.Register.SwizzleZ = + swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); + return swizzled; +} + + +/** + * Create new tgsi_full_dst_register with writemask. + * \param mask bitmask of TGSI_WRITEMASK_[XYZW] + */ +static struct tgsi_full_dst_register +writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) +{ + struct tgsi_full_dst_register masked = *reg; + masked.Register.WriteMask = mask; + return masked; +} + + +/** + * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. + */ +static boolean +same_swizzle_terms(const struct tgsi_full_src_register *reg) +{ + return (reg->Register.SwizzleX == reg->Register.SwizzleY && + reg->Register.SwizzleY == reg->Register.SwizzleZ && + reg->Register.SwizzleZ == reg->Register.SwizzleW); +} + + +/** + * Search the vector for the value 'x' and return its position. + */ +static int +find_imm_in_vec4(const union tgsi_immediate_data vec[4], + union tgsi_immediate_data x) +{ + unsigned i; + for (i = 0; i < 4; i++) { + if (vec[i].Int == x.Int) + return i; + } + return -1; +} + + +/** + * Helper used by make_immediate_reg(), make_immediate_reg_4(). + */ +static int +find_immediate(struct svga_shader_emitter_v10 *emit, + union tgsi_immediate_data x, unsigned startIndex) +{ + const unsigned endIndex = emit->num_immediates; + unsigned i; + + assert(emit->immediates_emitted); + + /* Search immediates for x, y, z, w */ + for (i = startIndex; i < endIndex; i++) { + if (x.Int == emit->immediates[i][0].Int || + x.Int == emit->immediates[i][1].Int || + x.Int == emit->immediates[i][2].Int || + x.Int == emit->immediates[i][3].Int) { + return i; + } + } + /* Should never try to use an immediate value that wasn't pre-declared */ + assert(!"find_immediate() failed!"); + return -1; +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal + * union tgsi_immediate_data[4] value. + * Note: the values must have been previously declared/allocated in + * emit_pre_helpers(). And, all of x,y,z,w must be located in the same + * vec4 immediate. + */ +static struct tgsi_full_src_register +make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, + const union tgsi_immediate_data imm[4]) +{ + struct tgsi_full_src_register reg; + unsigned i; + + for (i = 0; i < emit->num_common_immediates; i++) { + /* search for first component value */ + int immpos = find_immediate(emit, imm[0], i); + int x, y, z, w; + + assert(immpos >= 0); + + /* find remaining components within the immediate vector */ + x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); + y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); + z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); + w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); + + if (x >=0 && y >= 0 && z >= 0 && w >= 0) { + /* found them all */ + memset(®, 0, sizeof(reg)); + reg.Register.File = TGSI_FILE_IMMEDIATE; + reg.Register.Index = immpos; + reg.Register.SwizzleX = x; + reg.Register.SwizzleY = y; + reg.Register.SwizzleZ = z; + reg.Register.SwizzleW = w; + return reg; + } + /* else, keep searching */ + } + + assert(!"Failed to find immediate register!"); + + /* Just return IMM[0].xxxx */ + memset(®, 0, sizeof(reg)); + reg.Register.File = TGSI_FILE_IMMEDIATE; + return reg; +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal + * union tgsi_immediate_data value of the form {value, value, value, value}. + * \sa make_immediate_reg_4() regarding allowed values. + */ +static struct tgsi_full_src_register +make_immediate_reg(struct svga_shader_emitter_v10 *emit, + union tgsi_immediate_data value) +{ + struct tgsi_full_src_register reg; + int immpos = find_immediate(emit, value, 0); + + assert(immpos >= 0); + + memset(®, 0, sizeof(reg)); + reg.Register.File = TGSI_FILE_IMMEDIATE; + reg.Register.Index = immpos; + reg.Register.SwizzleX = + reg.Register.SwizzleY = + reg.Register.SwizzleZ = + reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); + + return reg; +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal float[4] value. + * \sa make_immediate_reg_4() regarding allowed values. + */ +static struct tgsi_full_src_register +make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, + float x, float y, float z, float w) +{ + union tgsi_immediate_data imm[4]; + imm[0].Float = x; + imm[1].Float = y; + imm[2].Float = z; + imm[3].Float = w; + return make_immediate_reg_4(emit, imm); +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal float value + * of the form {value, value, value, value}. + * \sa make_immediate_reg_4() regarding allowed values. + */ +static struct tgsi_full_src_register +make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) +{ + union tgsi_immediate_data imm; + imm.Float = value; + return make_immediate_reg(emit, imm); +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal int[4] vector. + */ +static struct tgsi_full_src_register +make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, + int x, int y, int z, int w) +{ + union tgsi_immediate_data imm[4]; + imm[0].Int = x; + imm[1].Int = y; + imm[2].Int = z; + imm[3].Int = w; + return make_immediate_reg_4(emit, imm); +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal int value + * of the form {value, value, value, value}. + * \sa make_immediate_reg_4() regarding allowed values. + */ +static struct tgsi_full_src_register +make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) +{ + union tgsi_immediate_data imm; + imm.Int = value; + return make_immediate_reg(emit, imm); +} + + +/** + * Allocate space for a union tgsi_immediate_data[4] immediate. + * \return the index/position of the immediate. + */ +static unsigned +alloc_immediate_4(struct svga_shader_emitter_v10 *emit, + const union tgsi_immediate_data imm[4]) +{ + unsigned n = emit->num_immediates++; + assert(!emit->immediates_emitted); + assert(n < Elements(emit->immediates)); + emit->immediates[n][0] = imm[0]; + emit->immediates[n][1] = imm[1]; + emit->immediates[n][2] = imm[2]; + emit->immediates[n][3] = imm[3]; + return n; +} + + +/** + * Allocate space for a float[4] immediate. + * \return the index/position of the immediate. + */ +static unsigned +alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, + float x, float y, float z, float w) +{ + union tgsi_immediate_data imm[4]; + imm[0].Float = x; + imm[1].Float = y; + imm[2].Float = z; + imm[3].Float = w; + return alloc_immediate_4(emit, imm); +} + + +/** + * Allocate space for a int[4] immediate. + * \return the index/position of the immediate. + */ +static unsigned +alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, + int x, int y, int z, int w) +{ + union tgsi_immediate_data imm[4]; + imm[0].Int = x; + imm[1].Int = y; + imm[2].Int = z; + imm[3].Int = w; + return alloc_immediate_4(emit, imm); +} + + +/** + * Allocate a shader input to store a system value. + */ +static unsigned +alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) +{ + const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; + assert(index < Elements(emit->system_value_indexes)); + emit->system_value_indexes[index] = n; + return n; +} + + +/** + * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. + */ +static boolean +emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_immediate *imm) +{ + /* We don't actually emit any code here. We just save the + * immediate values and emit them later. + */ + alloc_immediate_4(emit, imm->u); + return TRUE; +} + + +/** + * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block + * containing all the immediate values previously allocated + * with alloc_immediate_4(). + */ +static boolean +emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 token; + + assert(!emit->immediates_emitted); + + token.value = 0; + token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; + token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; + + /* Note: no begin/end_emit_instruction() calls */ + emit_dword(emit, token.value); + emit_dword(emit, 2 + 4 * emit->num_immediates); + emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); + + emit->immediates_emitted = TRUE; + + return TRUE; +} + + +/** + * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 + * interpolation mode. + * \return a VGPU10_INTERPOLATION_x value + */ +static unsigned +translate_interpolation(const struct svga_shader_emitter_v10 *emit, + unsigned interp, unsigned interpolate_loc) +{ + if (interp == TGSI_INTERPOLATE_COLOR) { + interp = emit->key.fs.flatshade ? + TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; + } + + switch (interp) { + case TGSI_INTERPOLATE_CONSTANT: + return VGPU10_INTERPOLATION_CONSTANT; + case TGSI_INTERPOLATE_LINEAR: + return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? + VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : + VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; + case TGSI_INTERPOLATE_PERSPECTIVE: + return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? + VGPU10_INTERPOLATION_LINEAR_CENTROID : + VGPU10_INTERPOLATION_LINEAR; + default: + assert(!"Unexpected interpolation mode"); + return VGPU10_INTERPOLATION_CONSTANT; + } +} + + +/** + * Translate a TGSI property to VGPU10. + * Don't emit any instructions yet, only need to gather the primitive property information. + * The output primitive topology might be changed later. The final property instructions + * will be emitted as part of the pre-helper code. + */ +static boolean +emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_property *prop) +{ + static const VGPU10_PRIMITIVE primType[] = { + VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ + VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ + VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ + VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ + VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ + VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ + VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ + VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ + VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ + VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ + VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ + VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ + VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ + VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ + }; + + static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { + VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ + VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ + }; + + static const unsigned inputArraySize[] = { + 0, /* VGPU10_PRIMITIVE_UNDEFINED */ + 1, /* VGPU10_PRIMITIVE_POINT */ + 2, /* VGPU10_PRIMITIVE_LINE */ + 3, /* VGPU10_PRIMITIVE_TRIANGLE */ + 0, + 0, + 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ + 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ + }; + + switch (prop->Property.PropertyName) { + case TGSI_PROPERTY_GS_INPUT_PRIM: + assert(prop->u[0].Data < Elements(primType)); + emit->gs.prim_type = primType[prop->u[0].Data]; + assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); + emit->gs.input_size = inputArraySize[emit->gs.prim_type]; + break; + + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + assert(prop->u[0].Data < Elements(primTopology)); + emit->gs.prim_topology = primTopology[prop->u[0].Data]; + assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); + break; + + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + emit->gs.max_out_vertices = prop->u[0].Data; + break; + + default: + break; + } + + return TRUE; +} + + +static void +emit_property_instruction(struct svga_shader_emitter_v10 *emit, + VGPU10OpcodeToken0 opcode0, unsigned nData, + unsigned data) +{ + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + if (nData) + emit_dword(emit, data); + end_emit_instruction(emit); +} + + +/** + * Emit property instructions + */ +static void +emit_property_instructions(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + + assert(emit->unit == PIPE_SHADER_GEOMETRY); + + /* emit input primitive type declaration */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; + opcode0.primitive = emit->gs.prim_type; + emit_property_instruction(emit, opcode0, 0, 0); + + /* emit output primitive topology declaration */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; + opcode0.primitiveTopology = emit->gs.prim_topology; + emit_property_instruction(emit, opcode0, 0, 0); + + /* emit max output vertices */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; + emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); +} + + +/** + * Emit a vgpu10 declaration "instruction". + * \param index the register index + * \param size array size of the operand. In most cases, it is 1, + * but for inputs to geometry shader, the array size varies + * depending on the primitive type. + */ +static void +emit_decl_instruction(struct svga_shader_emitter_v10 *emit, + VGPU10OpcodeToken0 opcode0, + VGPU10OperandToken0 operand0, + VGPU10NameToken name_token, + unsigned index, unsigned size) +{ + assert(opcode0.opcodeType); + assert(operand0.mask); + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + + emit_dword(emit, operand0.value); + + if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { + /* Next token is the index of the register to declare */ + emit_dword(emit, index); + } + else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { + /* Next token is the size of the register */ + emit_dword(emit, size); + + /* Followed by the index of the register */ + emit_dword(emit, index); + } + + if (name_token.value) { + emit_dword(emit, name_token.value); + } + + end_emit_instruction(emit); +} + + +/** + * Emit the declaration for a shader input. + * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx + * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x + * \param dim index dimension + * \param index the input register index + * \param size array size of the operand. In most cases, it is 1, + * but for inputs to geometry shader, the array size varies + * depending on the primitive type. + * \param name one of VGPU10_NAME_x + * \parma numComp number of components + * \param selMode component selection mode + * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values + * \param interpMode interpolation mode + */ +static void +emit_input_declaration(struct svga_shader_emitter_v10 *emit, + unsigned opcodeType, unsigned operandType, + unsigned dim, unsigned index, unsigned size, + unsigned name, unsigned numComp, + unsigned selMode, unsigned usageMask, + unsigned interpMode) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || + opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || + opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || + opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); + assert(operandType == VGPU10_OPERAND_TYPE_INPUT || + operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); + assert(numComp <= VGPU10_OPERAND_4_COMPONENT); + assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); + assert(dim <= VGPU10_OPERAND_INDEX_3D); + assert(name == VGPU10_NAME_UNDEFINED || + name == VGPU10_NAME_POSITION || + name == VGPU10_NAME_INSTANCE_ID || + name == VGPU10_NAME_VERTEX_ID || + name == VGPU10_NAME_PRIMITIVE_ID || + name == VGPU10_NAME_IS_FRONT_FACE); + assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || + interpMode == VGPU10_INTERPOLATION_CONSTANT || + interpMode == VGPU10_INTERPOLATION_LINEAR || + interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); + + check_register_index(emit, opcodeType, index); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = opcodeType; + opcode0.interpolationMode = interpMode; + + operand0.operandType = operandType; + operand0.numComponents = numComp; + operand0.selectionMode = selMode; + operand0.mask = usageMask; + operand0.indexDimension = dim; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + if (dim == VGPU10_OPERAND_INDEX_2D) + operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + name_token.name = name; + + emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); +} + + +/** + * Emit the declaration for a shader output. + * \param type one of VGPU10_OPCODE_DCL_OUTPUTx + * \param index the output register index + * \param name one of VGPU10_NAME_x + * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values + */ +static void +emit_output_declaration(struct svga_shader_emitter_v10 *emit, + unsigned type, unsigned index, + unsigned name, unsigned usageMask) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + assert(type == VGPU10_OPCODE_DCL_OUTPUT || + type == VGPU10_OPCODE_DCL_OUTPUT_SGV || + type == VGPU10_OPCODE_DCL_OUTPUT_SIV); + assert(name == VGPU10_NAME_UNDEFINED || + name == VGPU10_NAME_POSITION || + name == VGPU10_NAME_PRIMITIVE_ID || + name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || + name == VGPU10_NAME_CLIP_DISTANCE); + + check_register_index(emit, type, index); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = type; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.mask = usageMask; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + name_token.name = name; + + emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); +} + + +/** + * Emit the declaration for the fragment depth output. + */ +static void +emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + + emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); +} + + +/** + * Emit the declaration for a system value input/output. + */ +static void +emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, + unsigned semantic_name, unsigned index) +{ + switch (semantic_name) { + case TGSI_SEMANTIC_INSTANCEID: + index = alloc_system_value_index(emit, index); + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, + index, 1, + VGPU10_NAME_INSTANCE_ID, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + VGPU10_INTERPOLATION_UNDEFINED); + break; + case TGSI_SEMANTIC_VERTEXID: + index = alloc_system_value_index(emit, index); + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, + index, 1, + VGPU10_NAME_VERTEX_ID, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + VGPU10_INTERPOLATION_UNDEFINED); + break; + default: + ; /* XXX */ + } +} + +/** + * Translate a TGSI declaration to VGPU10. + */ +static boolean +emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_declaration *decl) +{ + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + /* do nothing - see emit_input_declarations() */ + return TRUE; + + case TGSI_FILE_OUTPUT: + assert(decl->Range.First == decl->Range.Last); + emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; + return TRUE; + + case TGSI_FILE_TEMPORARY: + /* Don't declare the temps here. Just keep track of how many + * and emit the declaration later. + */ + if (decl->Declaration.Array) { + /* Indexed temporary array. Save the start index of the array + * and the size of the array. + */ + const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); + unsigned i; + + assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); + + /* Save this array so we can emit the declaration for it later */ + emit->temp_arrays[arrayID].start = decl->Range.First; + emit->temp_arrays[arrayID].size = + decl->Range.Last - decl->Range.First + 1; + + emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); + assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); + emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); + + /* Fill in the temp_map entries for this array */ + for (i = decl->Range.First; i <= decl->Range.Last; i++) { + emit->temp_map[i].arrayId = arrayID; + emit->temp_map[i].index = i - decl->Range.First; + } + } + + /* for all temps, indexed or not, keep track of highest index */ + emit->num_shader_temps = MAX2(emit->num_shader_temps, + decl->Range.Last + 1); + return TRUE; + + case TGSI_FILE_CONSTANT: + /* Don't declare constants here. Just keep track and emit later. */ + { + unsigned constbuf = 0, num_consts; + if (decl->Declaration.Dimension) { + constbuf = decl->Dim.Index2D; + } + /* We throw an assertion here when, in fact, the shader should never + * have linked due to constbuf index out of bounds, so we shouldn't + * have reached here. + */ + assert(constbuf < Elements(emit->num_shader_consts)); + + num_consts = MAX2(emit->num_shader_consts[constbuf], + decl->Range.Last + 1); + + if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { + debug_printf("Warning: constant buffer is declared to size [%u]" + " but [%u] is the limit.\n", + num_consts, + VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + } + /* The linker doesn't enforce the max UBO size so we clamp here */ + emit->num_shader_consts[constbuf] = + MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + } + return TRUE; + + case TGSI_FILE_IMMEDIATE: + assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); + return FALSE; + + case TGSI_FILE_SYSTEM_VALUE: + emit_system_value_declaration(emit, decl->Semantic.Name, + decl->Range.First); + return TRUE; + + case TGSI_FILE_SAMPLER: + /* Don't declare samplers here. Just keep track and emit later. */ + emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); + return TRUE; + +#if 0 + case TGSI_FILE_RESOURCE: + /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ + /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ + assert(!"TGSI_FILE_RESOURCE not handled yet"); + return FALSE; +#endif + + case TGSI_FILE_ADDRESS: + emit->num_address_regs = MAX2(emit->num_address_regs, + decl->Range.Last + 1); + return TRUE; + + case TGSI_FILE_SAMPLER_VIEW: + /* Not used at this time, but maybe in the future. + * See emit_resource_declarations(). + */ + return TRUE; + + default: + assert(!"Unexpected type of declaration"); + return FALSE; + } +} + + + +/** + * Emit all input declarations. + */ +static boolean +emit_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + + for (i = 0; i < emit->linkage.num_inputs; i++) { + unsigned semantic_name = emit->info.input_semantic_name[i]; + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = emit->linkage.input_map[i]; + unsigned type, interpolationMode, name; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* fragment position input */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_LINEAR; + name = VGPU10_NAME_POSITION; + if (usage_mask & TGSI_WRITEMASK_W) { + /* we need to replace use of 'w' with '1/w' */ + emit->fs.fragcoord_input_index = i; + } + } + else if (semantic_name == TGSI_SEMANTIC_FACE) { + /* fragment front-facing input */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_IS_FRONT_FACE; + emit->fs.face_input_index = i; + } + else if (semantic_name == TGSI_SEMANTIC_PRIMID) { + /* primitive ID */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_PRIMITIVE_ID; + } + else { + /* general fragment input */ + type = VGPU10_OPCODE_DCL_INPUT_PS; + interpolationMode = + translate_interpolation(emit, + emit->info.input_interpolate[i], + emit->info.input_interpolate_loc[i]); + + /* keeps track if flat interpolation mode is being used */ + emit->uses_flat_interp = emit->uses_flat_interp || + (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); + + name = VGPU10_NAME_UNDEFINED; + } + + emit_input_declaration(emit, type, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, index, 1, + name, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + interpolationMode); + } + } + else if (emit->unit == PIPE_SHADER_GEOMETRY) { + + for (i = 0; i < emit->info.num_inputs; i++) { + unsigned semantic_name = emit->info.input_semantic_name[i]; + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = emit->linkage.input_map[i]; + unsigned opcodeType, operandType; + unsigned numComp, selMode; + unsigned name; + unsigned dim; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + opcodeType = VGPU10_OPCODE_DCL_INPUT; + operandType = VGPU10_OPERAND_TYPE_INPUT; + numComp = VGPU10_OPERAND_4_COMPONENT; + selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + name = VGPU10_NAME_UNDEFINED; + + /* all geometry shader inputs are two dimensional except gl_PrimitiveID */ + dim = VGPU10_OPERAND_INDEX_2D; + + if (semantic_name == TGSI_SEMANTIC_PRIMID) { + /* Primitive ID */ + operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + dim = VGPU10_OPERAND_INDEX_0D; + numComp = VGPU10_OPERAND_0_COMPONENT; + selMode = 0; + + /* also save the register index so we can check for + * primitive id when emit src register. We need to modify the + * operand type, index dimension when emit primitive id src reg. + */ + emit->gs.prim_id_index = i; + } + else if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* vertex position input */ + opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; + name = VGPU10_NAME_POSITION; + } + + emit_input_declaration(emit, opcodeType, operandType, + dim, index, + emit->gs.input_size, + name, + numComp, selMode, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED); + } + } + else { + assert(emit->unit == PIPE_SHADER_VERTEX); + + for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = i; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED); + } + } + + return TRUE; +} + + +/** + * Emit all output declarations. + */ +static boolean +emit_output_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + for (i = 0; i < emit->info.num_outputs; i++) { + /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ + const unsigned semantic_name = emit->info.output_semantic_name[i]; + const unsigned semantic_index = emit->info.output_semantic_index[i]; + unsigned index = i; + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (semantic_name == TGSI_SEMANTIC_COLOR) { + assert(semantic_index < Elements(emit->fs.color_out_index)); + + emit->fs.color_out_index[semantic_index] = index; + + /* The semantic index is the shader's color output/buffer index */ + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT, semantic_index, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + + if (semantic_index == 0) { + if (emit->key.fs.write_color0_to_n_cbufs > 1) { + /* Emit declarations for the additional color outputs + * for broadcasting. + */ + unsigned j; + for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { + /* Allocate a new output index */ + unsigned idx = emit->info.num_outputs + j - 1; + emit->fs.color_out_index[j] = idx; + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT, idx, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + emit->info.output_semantic_index[idx] = j; + } + } + } + else { + assert(!emit->key.fs.write_color0_to_n_cbufs); + } + } + else if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* Fragment depth output */ + emit_fragdepth_output_declaration(emit); + } + else { + assert(!"Bad output semantic name"); + } + } + else { + /* VS or GS */ + unsigned name, type; + unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + + switch (semantic_name) { + case TGSI_SEMANTIC_POSITION: + assert(emit->unit != PIPE_SHADER_FRAGMENT); + type = VGPU10_OPCODE_DCL_OUTPUT_SIV; + name = VGPU10_NAME_POSITION; + /* Save the index of the vertex position output register */ + emit->vposition.out_index = index; + break; + case TGSI_SEMANTIC_CLIPDIST: + type = VGPU10_OPCODE_DCL_OUTPUT_SIV; + name = VGPU10_NAME_CLIP_DISTANCE; + /* save the starting index of the clip distance output register */ + if (semantic_index == 0) + emit->clip_dist_out_index = index; + writemask = emit->output_usage_mask[index]; + writemask = apply_clip_plane_mask(emit, writemask, semantic_index); + if (writemask == 0x0) { + continue; /* discard this do-nothing declaration */ + } + break; + case TGSI_SEMANTIC_PRIMID: + assert(emit->unit == PIPE_SHADER_GEOMETRY); + type = VGPU10_OPCODE_DCL_OUTPUT_SGV; + name = VGPU10_NAME_PRIMITIVE_ID; + break; + case TGSI_SEMANTIC_LAYER: + assert(emit->unit == PIPE_SHADER_GEOMETRY); + type = VGPU10_OPCODE_DCL_OUTPUT_SGV; + name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + type = VGPU10_OPCODE_DCL_OUTPUT; + name = VGPU10_NAME_UNDEFINED; + emit->clip_vertex_out_index = index; + break; + default: + /* generic output */ + type = VGPU10_OPCODE_DCL_OUTPUT; + name = VGPU10_NAME_UNDEFINED; + } + + emit_output_declaration(emit, type, index, name, writemask); + } + } + + if (emit->vposition.so_index != INVALID_INDEX && + emit->vposition.out_index != INVALID_INDEX) { + + assert(emit->unit != PIPE_SHADER_FRAGMENT); + + /* Emit the declaration for the non-adjusted vertex position + * for stream output purpose + */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->vposition.so_index, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + } + + if (emit->clip_dist_so_index != INVALID_INDEX && + emit->clip_dist_out_index != INVALID_INDEX) { + + assert(emit->unit != PIPE_SHADER_FRAGMENT); + + /* Emit the declaration for the clip distance shadow copy which + * will be used for stream output purpose and for clip distance + * varying variable + */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->clip_dist_so_index, + VGPU10_NAME_UNDEFINED, + emit->output_usage_mask[emit->clip_dist_out_index]); + + if (emit->info.num_written_clipdistance > 4) { + /* for the second clip distance register, each handles 4 planes */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->clip_dist_so_index + 1, + VGPU10_NAME_UNDEFINED, + emit->output_usage_mask[emit->clip_dist_out_index+1]); + } + } + + return TRUE; +} + + +/** + * Emit the declaration for the temporary registers. + */ +static boolean +emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) +{ + unsigned total_temps, reg, i; + + total_temps = emit->num_shader_temps; + + /* Allocate extra temps for specially-implemented instructions, + * such as LIT. + */ + total_temps += MAX_INTERNAL_TEMPS; + + if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { + if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || + emit->key.clip_plane_enable || + emit->vposition.so_index != INVALID_INDEX) { + emit->vposition.tmp_index = total_temps; + total_temps += 1; + } + + if (emit->unit == PIPE_SHADER_VERTEX) { + unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | + emit->key.vs.adjust_attrib_itof | + emit->key.vs.adjust_attrib_utof | + emit->key.vs.attrib_is_bgra | + emit->key.vs.attrib_puint_to_snorm | + emit->key.vs.attrib_puint_to_uscaled | + emit->key.vs.attrib_puint_to_sscaled); + while (attrib_mask) { + unsigned index = u_bit_scan(&attrib_mask); + emit->vs.adjusted_input[index] = total_temps++; + } + } + + if (emit->clip_mode == CLIP_DISTANCE) { + /* We need to write the clip distance to a temporary register + * first. Then it will be copied to the shadow copy for + * the clip distance varying variable and stream output purpose. + * It will also be copied to the actual CLIPDIST register + * according to the enabled clip planes + */ + emit->clip_dist_tmp_index = total_temps++; + if (emit->info.num_written_clipdistance > 4) + total_temps++; /* second clip register */ + } + else if (emit->clip_mode == CLIP_VERTEX) { + /* We need to convert the TGSI CLIPVERTEX output to one or more + * clip distances. Allocate a temp reg for the clipvertex here. + */ + assert(emit->info.writes_clipvertex > 0); + emit->clip_vertex_tmp_index = total_temps; + total_temps++; + } + } + else if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || + emit->key.fs.white_fragments || + emit->key.fs.write_color0_to_n_cbufs > 1) { + /* Allocate a temp to hold the output color */ + emit->fs.color_tmp_index = total_temps; + total_temps += 1; + } + + if (emit->fs.face_input_index != INVALID_INDEX) { + /* Allocate a temp for the +/-1 face register */ + emit->fs.face_tmp_index = total_temps; + total_temps += 1; + } + + if (emit->fs.fragcoord_input_index != INVALID_INDEX) { + /* Allocate a temp for modified fragment position register */ + emit->fs.fragcoord_tmp_index = total_temps; + total_temps += 1; + } + } + + for (i = 0; i < emit->num_address_regs; i++) { + emit->address_reg_index[i] = total_temps++; + } + + /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 + * temp indexes. Basically, we compact all the non-array temp register + * indexes into a consecutive series. + * + * Before, we may have some TGSI declarations like: + * DCL TEMP[0..1], LOCAL + * DCL TEMP[2..4], ARRAY(1), LOCAL + * DCL TEMP[5..7], ARRAY(2), LOCAL + * plus, some extra temps, like TEMP[8], TEMP[9] for misc things + * + * After, we'll have a map like this: + * temp_map[0] = { array 0, index 0 } + * temp_map[1] = { array 0, index 1 } + * temp_map[2] = { array 1, index 0 } + * temp_map[3] = { array 1, index 1 } + * temp_map[4] = { array 1, index 2 } + * temp_map[5] = { array 2, index 0 } + * temp_map[6] = { array 2, index 1 } + * temp_map[7] = { array 2, index 2 } + * temp_map[8] = { array 0, index 2 } + * temp_map[9] = { array 0, index 3 } + * + * We'll declare two arrays of 3 elements, plus a set of four non-indexed + * temps numbered 0..3 + * + * Any time we emit a temporary register index, we'll have to use the + * temp_map[] table to convert the TGSI index to the VGPU10 index. + * + * Finally, we recompute the total_temps value here. + */ + reg = 0; + for (i = 0; i < total_temps; i++) { + if (emit->temp_map[i].arrayId == 0) { + emit->temp_map[i].index = reg++; + } + } + total_temps = reg; + + if (0) { + debug_printf("total_temps %u\n", total_temps); + for (i = 0; i < 30; i++) { + debug_printf("temp %u -> array %u index %u\n", + i, emit->temp_map[i].arrayId, emit->temp_map[i].index); + } + } + + /* Emit declaration of ordinary temp registers */ + if (total_temps > 0) { + VGPU10OpcodeToken0 opcode0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, total_temps); + end_emit_instruction(emit); + } + + /* Emit declarations for indexable temp arrays. Skip 0th entry since + * it's unused. + */ + for (i = 1; i < emit->num_temp_arrays; i++) { + unsigned num_temps = emit->temp_arrays[i].size; + + if (num_temps > 0) { + VGPU10OpcodeToken0 opcode0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, i); /* which array */ + emit_dword(emit, num_temps); + emit_dword(emit, 4); /* num components */ + end_emit_instruction(emit); + + total_temps += num_temps; + } + } + + /* Check that the grand total of all regular and indexed temps is + * under the limit. + */ + check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); + + return TRUE; +} + + +static boolean +emit_constant_declaration(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + unsigned total_consts, i; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; + opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; + /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = 0; + operand0.swizzleY = 1; + operand0.swizzleZ = 2; + operand0.swizzleW = 3; + + /** + * Emit declaration for constant buffer [0]. We also allocate + * room for the extra constants here. + */ + total_consts = emit->num_shader_consts[0]; + + /* Now, allocate constant slots for the "extra" constants */ + + /* Vertex position scale/translation */ + if (emit->vposition.need_prescale) { + emit->vposition.prescale_scale_index = total_consts++; + emit->vposition.prescale_trans_index = total_consts++; + } + + if (emit->unit == PIPE_SHADER_VERTEX) { + if (emit->key.vs.undo_viewport) { + emit->vs.viewport_index = total_consts++; + } + } + + /* user-defined clip planes */ + if (emit->key.clip_plane_enable) { + unsigned n = util_bitcount(emit->key.clip_plane_enable); + assert(emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY); + for (i = 0; i < n; i++) { + emit->clip_plane_const[i] = total_consts++; + } + } + + /* Texcoord scale factors for RECT textures */ + { + for (i = 0; i < emit->num_samplers; i++) { + if (emit->key.tex[i].unnormalized) { + emit->texcoord_scale_index[i] = total_consts++; + } + } + } + + /* Texture buffer sizes */ + for (i = 0; i < emit->num_samplers; i++) { + if (emit->key.tex[i].texture_target == PIPE_BUFFER) { + emit->texture_buffer_size_index[i] = total_consts++; + } + } + + if (total_consts > 0) { + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, 0); /* which const buffer slot */ + emit_dword(emit, total_consts); + end_emit_instruction(emit); + } + + /* Declare remaining constant buffers (UBOs) */ + for (i = 1; i < Elements(emit->num_shader_consts); i++) { + if (emit->num_shader_consts[i] > 0) { + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, i); /* which const buffer slot */ + emit_dword(emit, emit->num_shader_consts[i]); + end_emit_instruction(emit); + } + } + + return TRUE; +} + + +/** + * Emit declarations for samplers. + */ +static boolean +emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + for (i = 0; i < emit->num_samplers; i++) { + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; + opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, i); + end_emit_instruction(emit); + } + + return TRUE; +} + + +/** + * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. + */ +static unsigned +pipe_texture_to_resource_dimension(unsigned target, bool msaa) +{ + switch (target) { + case PIPE_BUFFER: + return VGPU10_RESOURCE_DIMENSION_BUFFER; + case PIPE_TEXTURE_1D: + return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS + : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; + case PIPE_TEXTURE_3D: + return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; + case PIPE_TEXTURE_CUBE: + return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; + case PIPE_TEXTURE_1D_ARRAY: + return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY; + case PIPE_TEXTURE_2D_ARRAY: + return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY + : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY; + case PIPE_TEXTURE_CUBE_ARRAY: + return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; + default: + assert(!"Unexpected resource type"); + return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; + } +} + + +/** + * Given a tgsi_return_type, return true iff it is an integer type. + */ +static boolean +is_integer_type(enum tgsi_return_type type) +{ + switch (type) { + case TGSI_RETURN_TYPE_SINT: + case TGSI_RETURN_TYPE_UINT: + return TRUE; + case TGSI_RETURN_TYPE_FLOAT: + case TGSI_RETURN_TYPE_UNORM: + case TGSI_RETURN_TYPE_SNORM: + return FALSE; + case TGSI_RETURN_TYPE_COUNT: + default: + assert(!"is_integer_type: Unknown tgsi_return_type"); + return FALSE; + } +} + + +/** + * Emit declarations for resources. + * XXX When we're sure that all TGSI shaders will be generated with + * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may + * rework this code. + */ +static boolean +emit_resource_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + /* Emit resource decl for each sampler */ + for (i = 0; i < emit->num_samplers; i++) { + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10ResourceReturnTypeToken return_type; + VGPU10_RESOURCE_RETURN_TYPE rt; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; + opcode0.resourceDimension = + pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target, + emit->key.tex[i].texture_msaa); + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + +#if 1 + /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ + STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); + STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); + STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); + STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); + STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); + assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT); + rt = emit->key.tex[i].return_type + 1; +#else + switch (emit->key.tex[i].return_type) { + case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; + case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; + case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; + case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; + case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; + case TGSI_RETURN_TYPE_COUNT: + default: + rt = VGPU10_RETURN_TYPE_FLOAT; + assert(!"emit_resource_declarations: Unknown tgsi_return_type"); + } +#endif + + return_type.value = 0; + return_type.component0 = rt; + return_type.component1 = rt; + return_type.component2 = rt; + return_type.component3 = rt; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, i); + emit_dword(emit, return_type.value); + end_emit_instruction(emit); + } + + return TRUE; +} + +static void +emit_instruction_op1(struct svga_shader_emitter_v10 *emit, + unsigned opcode, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src, + boolean saturate) +{ + begin_emit_instruction(emit); + emit_opcode(emit, opcode, saturate); + emit_dst_register(emit, dst); + emit_src_register(emit, src); + end_emit_instruction(emit); +} + +static void +emit_instruction_op2(struct svga_shader_emitter_v10 *emit, + unsigned opcode, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src1, + const struct tgsi_full_src_register *src2, + boolean saturate) +{ + begin_emit_instruction(emit); + emit_opcode(emit, opcode, saturate); + emit_dst_register(emit, dst); + emit_src_register(emit, src1); + emit_src_register(emit, src2); + end_emit_instruction(emit); +} + +static void +emit_instruction_op3(struct svga_shader_emitter_v10 *emit, + unsigned opcode, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src1, + const struct tgsi_full_src_register *src2, + const struct tgsi_full_src_register *src3, + boolean saturate) +{ + begin_emit_instruction(emit); + emit_opcode(emit, opcode, saturate); + emit_dst_register(emit, dst); + emit_src_register(emit, src1); + emit_src_register(emit, src2); + emit_src_register(emit, src3); + end_emit_instruction(emit); +} + +/** + * Emit the actual clip distance instructions to be used for clipping + * by copying the clip distance from the temporary registers to the + * CLIPDIST registers written with the enabled planes mask. + * Also copy the clip distance from the temporary to the clip distance + * shadow copy register which will be referenced by the input shader + */ +static void +emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) +{ + struct tgsi_full_src_register tmp_clip_dist_src; + struct tgsi_full_dst_register clip_dist_dst; + + unsigned i; + unsigned clip_plane_enable = emit->key.clip_plane_enable; + unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; + int num_written_clipdist = emit->info.num_written_clipdistance; + + assert(emit->clip_dist_out_index != INVALID_INDEX); + assert(emit->clip_dist_tmp_index != INVALID_INDEX); + + /** + * Temporary reset the temporary clip dist register index so + * that the copy to the real clip dist register will not + * attempt to copy to the temporary register again + */ + emit->clip_dist_tmp_index = INVALID_INDEX; + + for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { + + tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); + + /** + * copy to the shadow copy for use by varying variable and + * stream output. All clip distances + * will be written regardless of the enabled clipping planes. + */ + clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, + emit->clip_dist_so_index + i); + + /* MOV clip_dist_so, tmp_clip_dist */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, + &tmp_clip_dist_src, FALSE); + + /** + * copy those clip distances to enabled clipping planes + * to CLIPDIST registers for clipping + */ + if (clip_plane_enable & 0xf) { + clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, + emit->clip_dist_out_index + i); + clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); + + /* MOV CLIPDIST, tmp_clip_dist */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, + &tmp_clip_dist_src, FALSE); + } + /* four clip planes per clip register */ + clip_plane_enable >>= 4; + } + /** + * set the temporary clip dist register index back to the + * temporary index for the next vertex + */ + emit->clip_dist_tmp_index = clip_dist_tmp_index; +} + +/* Declare clip distance output registers for user-defined clip planes + * or the TGSI_CLIPVERTEX output. + */ +static void +emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); + unsigned index = emit->num_outputs; + unsigned plane_mask; + + assert(emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY); + assert(num_clip_planes <= 8); + + if (emit->clip_mode != CLIP_LEGACY && + emit->clip_mode != CLIP_VERTEX) { + return; + } + + if (num_clip_planes == 0) + return; + + /* Declare one or two clip output registers. The number of components + * in the mask reflects the number of clip planes. For example, if 5 + * clip planes are needed, we'll declare outputs similar to: + * dcl_output_siv o2.xyzw, clip_distance + * dcl_output_siv o3.x, clip_distance + */ + emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ + + plane_mask = (1 << num_clip_planes) - 1; + if (plane_mask & 0xf) { + unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, + VGPU10_NAME_CLIP_DISTANCE, cmask); + emit->num_outputs++; + } + if (plane_mask & 0xf0) { + unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, + VGPU10_NAME_CLIP_DISTANCE, cmask); + emit->num_outputs++; + } +} + + +/** + * Emit the instructions for writing to the clip distance registers + * to handle legacy/automatic clip planes. + * For each clip plane, the distance is the dot product of the vertex + * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. + * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE + * output registers already declared. + */ +static void +emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, + unsigned vpos_tmp_index) +{ + unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); + + assert(emit->clip_mode == CLIP_LEGACY); + assert(num_clip_planes <= 8); + + assert(emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY); + + for (i = 0; i < num_clip_planes; i++) { + struct tgsi_full_dst_register dst; + struct tgsi_full_src_register plane_src, vpos_src; + unsigned reg_index = emit->clip_dist_out_index + i / 4; + unsigned comp = i % 4; + unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; + + /* create dst, src regs */ + dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); + dst = writemask_dst(&dst, writemask); + + plane_src = make_src_const_reg(emit->clip_plane_const[i]); + vpos_src = make_src_temp_reg(vpos_tmp_index); + + /* DP4 clip_dist, plane, vpos */ + emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, + &plane_src, &vpos_src, FALSE); + } +} + + +/** + * Emit the instructions for computing the clip distance results from + * the clip vertex temporary. + * For each clip plane, the distance is the dot product of the clip vertex + * position (found in a temp reg) and the clip plane coefficients. + */ +static void +emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) +{ + const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); + unsigned i; + struct tgsi_full_dst_register dst; + struct tgsi_full_src_register clipvert_src; + const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; + + assert(emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY); + + assert(emit->clip_mode == CLIP_VERTEX); + + clipvert_src = make_src_temp_reg(clip_vertex_tmp); + + for (i = 0; i < num_clip; i++) { + struct tgsi_full_src_register plane_src; + unsigned reg_index = emit->clip_dist_out_index + i / 4; + unsigned comp = i % 4; + unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; + + /* create dst, src regs */ + dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); + dst = writemask_dst(&dst, writemask); + + plane_src = make_src_const_reg(emit->clip_plane_const[i]); + + /* DP4 clip_dist, plane, vpos */ + emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, + &plane_src, &clipvert_src, FALSE); + } + + /* copy temporary clip vertex register to the clip vertex register */ + + assert(emit->clip_vertex_out_index != INVALID_INDEX); + + /** + * temporary reset the temporary clip vertex register index so + * that copy to the clip vertex register will not attempt + * to copy to the temporary register again + */ + emit->clip_vertex_tmp_index = INVALID_INDEX; + + /* MOV clip_vertex, clip_vertex_tmp */ + dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &dst, &clipvert_src, FALSE); + + /** + * set the temporary clip vertex register index back to the + * temporary index for the next vertex + */ + emit->clip_vertex_tmp_index = clip_vertex_tmp; +} + +/** + * Emit code to convert RGBA to BGRA + */ +static void +emit_swap_r_b(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + struct tgsi_full_src_register bgra_src = + swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); + + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, dst); + emit_src_register(emit, &bgra_src); + end_emit_instruction(emit); +} + + +/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ +static void +emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); + struct tgsi_full_src_register two = + make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); + struct tgsi_full_src_register neg_two = + make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); + + unsigned val_tmp = get_temp_index(emit); + struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); + struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); + + unsigned bias_tmp = get_temp_index(emit); + struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); + struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); + + /* val = src * 2.0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, + src, &two, FALSE); + + /* bias = src > 0.5 */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, + src, &half, FALSE); + + /* bias = bias & -2.0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, + &bias_src, &neg_two, FALSE); + + /* dst = val + bias */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, + &val_src, &bias_src, FALSE); + + free_temp_indexes(emit); +} + + +/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ +static void +emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + struct tgsi_full_src_register scale = + make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); + + /* dst = src * scale */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); +} + + +/** Convert from R32_UINT to 10_10_10_2_sscaled */ +static void +emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + struct tgsi_full_src_register lshift = + make_immediate_reg_int4(emit, 22, 12, 2, 0); + struct tgsi_full_src_register rshift = + make_immediate_reg_int4(emit, 22, 22, 22, 30); + + struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + + /* + * r = (pixel << 22) >> 22; # signed int in [511, -512] + * g = (pixel << 12) >> 22; # signed int in [511, -512] + * b = (pixel << 2) >> 22; # signed int in [511, -512] + * a = (pixel << 0) >> 30; # signed int in [1, -2] + * dst = i_to_f(r,g,b,a); # convert to float + */ + emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, + &src_xxxx, &lshift, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, + &tmp_src, &rshift, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); + + free_temp_indexes(emit); +} + + +/** + * Emit code for TGSI_OPCODE_ABS instruction. + */ +static boolean +emit_abs(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = ABS(s0): + * dst = abs(s0) + * Translates into: + * MOV dst, abs(s0) + */ + struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]); + + /* MOV dst, abs(s0) */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &abs_src0, inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. + */ +static boolean +emit_arl_uarl(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned index = inst->Dst[0].Register.Index; + struct tgsi_full_dst_register dst; + unsigned opcode; + + assert(index < MAX_VGPU10_ADDR_REGS); + dst = make_dst_temp_reg(emit->address_reg_index[index]); + + /* ARL dst, s0 + * Translates into: + * FTOI address_tmp, s0 + * + * UARL dst, s0 + * Translates into: + * MOV address_tmp, s0 + */ + if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) + opcode = VGPU10_OPCODE_FTOI; + else + opcode = VGPU10_OPCODE_MOV; + + emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_CAL instruction. + */ +static boolean +emit_cal(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned label = inst->Label.Label; + VGPU10OperandToken0 operand; + operand.value = 0; + operand.operandType = VGPU10_OPERAND_TYPE_LABEL; + + begin_emit_instruction(emit); + emit_dword(emit, operand.value); + emit_dword(emit, label); + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_IABS instruction. + */ +static boolean +emit_iabs(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = (src0.x < 0) ? -src0.x : src0.x + * dst.y = (src0.y < 0) ? -src0.y : src0.y + * dst.z = (src0.z < 0) ? -src0.z : src0.z + * dst.w = (src0.w < 0) ? -src0.w : src0.w + * + * Translates into + * IMAX dst, src, neg(src) + */ + struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); + emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], + &inst->Src[0], &neg_src, FALSE); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_CMP instruction. + */ +static boolean +emit_cmp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = (src0.x < 0) ? src1.x : src2.x + * dst.y = (src0.y < 0) ? src1.y : src2.y + * dst.z = (src0.z < 0) ? src1.z : src2.z + * dst.w = (src0.w < 0) ? src1.w : src2.w + * + * Translates into + * LT tmp, src0, 0.0 + * MOVC dst, tmp, src1, src2 + */ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, + &inst->Src[0], &zero, FALSE); + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], + &tmp_src, &inst->Src[1], &inst->Src[2], + inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_DP2A instruction. + */ +static boolean +emit_dp2a(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x + * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x + * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x + * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x + * Translate into + * MAD tmp.x, s0.y, s1.y, s2.x + * MAD tmp.x, s0.x, s1.x, tmp.x + * MOV dst.xyzw, tmp.xxxx + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + + struct tgsi_full_src_register src0_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + struct tgsi_full_src_register src0_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register src1_xxxx = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + struct tgsi_full_src_register src1_yyyy = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register src2_xxxx = + scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); + + emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, + &src1_yyyy, &src2_xxxx, FALSE); + emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, + &src1_xxxx, &tmp_src_xxxx, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &tmp_src_xxxx, inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_DPH instruction. + */ +static boolean +emit_dph(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * DP3 tmp, s0, s1 + * ADD dst, tmp, s1.wwww + */ + + struct tgsi_full_src_register s1_wwww = + swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, + TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + /* DP3 tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* ADD dst, tmp, s1.wwww */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src, + &s1_wwww, inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_DST instruction. + */ +static boolean +emit_dst(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * dst.x = 1 + * dst.y = src0.y * src1.y + * dst.z = src0.z + * dst.w = src1.w + */ + + struct tgsi_full_src_register s0_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register s0_zzzz = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); + struct tgsi_full_src_register s1_yyyy = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register s1_wwww = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); + + /* + * If dst and either src0 and src1 are the same we need + * to create a temporary for it and insert a extra move. + */ + unsigned tmp_move = get_temp_index(emit); + struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); + struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); + + /* MOV dst.x, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&move_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); + } + + /* MUL dst.y, s0.y, s1.y */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&move_dst, TGSI_WRITEMASK_Y); + + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, + &s1_yyyy, inst->Instruction.Saturate); + } + + /* MOV dst.z, s0.z */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&move_dst, TGSI_WRITEMASK_Z); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, + inst->Instruction.Saturate); + } + + /* MOV dst.w, s1.w */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&move_dst, TGSI_WRITEMASK_W); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, + inst->Instruction.Saturate); + } + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, + FALSE); + free_temp_indexes(emit); + + return TRUE; +} + + + +/** + * Emit code for TGSI_OPCODE_ENDPRIM (GS only) + */ +static boolean +emit_endprim(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + assert(emit->unit == PIPE_SHADER_GEOMETRY); + + /* We can't use emit_simple() because the TGSI instruction has one + * operand (vertex stream number) which we must ignore for VGPU10. + */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); + end_emit_instruction(emit); + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. + */ +static boolean +emit_ex2(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* Note that TGSI_OPCODE_EX2 computes only one value from src.x + * while VGPU10 computes four values. + * + * dst = EX2(src): + * dst.xyzw = 2.0 ^ src.x + */ + + struct tgsi_full_src_register src_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + /* EXP tmp, s0.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, + inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_EXP instruction. + */ +static boolean +emit_exp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * dst.x = 2 ^ floor(s0.x) + * dst.y = s0.x - floor(s0.x) + * dst.z = 2 ^ s0.x + * dst.w = 1.0 + */ + + struct tgsi_full_src_register src_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + /* + * If dst and src are the same we need to create + * a temporary for it and insert a extra move. + */ + unsigned tmp_move = get_temp_index(emit); + struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); + struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); + + /* only use X component of temp reg */ + tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* ROUND_NI tmp.x, s0.x */ + emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, + &src_xxxx, FALSE); /* round to -infinity */ + + /* EXP dst.x, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&move_dst, TGSI_WRITEMASK_X); + + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, + inst->Instruction.Saturate); + } + + /* ADD dst.y, s0.x, -tmp */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&move_dst, TGSI_WRITEMASK_Y); + struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); + + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, + &neg_tmp_src, inst->Instruction.Saturate); + } + + /* EXP dst.z, s0.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&move_dst, TGSI_WRITEMASK_Z); + + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, + inst->Instruction.Saturate); + } + + /* MOV dst.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&move_dst, TGSI_WRITEMASK_W); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, + FALSE); + } + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, + FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_IF instruction. + */ +static boolean +emit_if(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + VGPU10OpcodeToken0 opcode0; + + /* The src register should be a scalar */ + assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && + inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && + inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); + + /* The only special thing here is that we need to set the + * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if + * src.x is non-zero. + */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_IF; + opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_src_register(emit, &inst->Src[0]); + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of + * the register components are negative). + */ +static boolean +emit_kill_if(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* tmp = src[0] < 0.0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], + &zero, FALSE); + + if (!same_swizzle_terms(&inst->Src[0])) { + /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to + * logically OR the swizzle terms. Most uses of KILL_IF only + * test one channel so it's good to avoid these extra steps. + */ + struct tgsi_full_src_register tmp_src_yyyy = + scalar_src(&tmp_src, TGSI_SWIZZLE_Y); + struct tgsi_full_src_register tmp_src_zzzz = + scalar_src(&tmp_src, TGSI_SWIZZLE_Z); + struct tgsi_full_src_register tmp_src_wwww = + scalar_src(&tmp_src, TGSI_SWIZZLE_W); + + emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, + &tmp_src_yyyy, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, + &tmp_src_zzzz, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, + &tmp_src_wwww, FALSE); + } + + begin_emit_instruction(emit); + emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ + emit_src_register(emit, &tmp_src_xxxx); + end_emit_instruction(emit); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). + */ +static boolean +emit_kill(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + + /* DISCARD if 0.0 is zero */ + begin_emit_instruction(emit); + emit_discard_opcode(emit, FALSE); + emit_src_register(emit, &zero); + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LG2 instruction. + */ +static boolean +emit_lg2(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* Note that TGSI_OPCODE_LG2 computes only one value from src.x + * while VGPU10 computes four values. + * + * dst = LG2(src): + * dst.xyzw = log2(src.x) + */ + + struct tgsi_full_src_register src_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + /* LOG tmp, s0.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, + inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LIT instruction. + */ +static boolean +emit_lit(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* + * If dst and src are the same we need to create + * a temporary for it and insert a extra move. + */ + unsigned tmp_move = get_temp_index(emit); + struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); + struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); + + /* + * dst.x = 1 + * dst.y = max(src.x, 0) + * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 + * dst.w = 1 + */ + + /* MOV dst.x, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&move_dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); + } + + /* MOV dst.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&move_dst, TGSI_WRITEMASK_W); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); + } + + /* MAX dst.y, src.x, 0.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&move_dst, TGSI_WRITEMASK_Y); + struct tgsi_full_src_register zero = + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register src_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, + &zero, inst->Instruction.Saturate); + } + + /* + * tmp1 = clamp(src.w, -128, 128); + * MAX tmp1, src.w, -128 + * MIN tmp1, tmp1, 128 + * + * tmp2 = max(tmp2, 0); + * MAX tmp2, src.y, 0 + * + * tmp1 = pow(tmp2, tmp1); + * LOG tmp2, tmp2 + * MUL tmp1, tmp2, tmp1 + * EXP tmp1, tmp1 + * + * tmp1 = (src.w == 0) ? 1 : tmp1; + * EQ tmp2, 0, src.w + * MOVC tmp1, tmp2, 1.0, tmp1 + * + * dst.z = (0 < src.x) ? tmp1 : 0; + * LT tmp2, 0, src.x + * MOVC dst.z, tmp2, tmp1, 0.0 + */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&move_dst, TGSI_WRITEMASK_Z); + + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + + struct tgsi_full_src_register src_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + struct tgsi_full_src_register src_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register src_wwww = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + + struct tgsi_full_src_register zero = + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register lowerbound = + make_immediate_reg_float(emit, -128.0f); + struct tgsi_full_src_register upperbound = + make_immediate_reg_float(emit, 128.0f); + + emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, + &lowerbound, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, + &upperbound, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, + &zero, FALSE); + + /* POW tmp1, tmp2, tmp1 */ + /* LOG tmp2, tmp2 */ + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, + FALSE); + + /* MUL tmp1, tmp2, tmp1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, + &tmp1_src, FALSE); + + /* EXP tmp1, tmp1 */ + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, + FALSE); + + /* EQ tmp2, 0, src.w */ + emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, + &src_wwww, FALSE); + /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, + &tmp2_src, &one, &tmp1_src, FALSE); + + /* LT tmp2, 0, src.x */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, + &src_xxxx, FALSE); + /* MOVC dst.z, tmp2, tmp1, 0.0 */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, + &tmp2_src, &tmp1_src, &zero, FALSE); + } + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, + FALSE); + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LOG instruction. + */ +static boolean +emit_log(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * dst.x = floor(lg2(abs(s0.x))) + * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) + * dst.z = lg2(abs(s0.x)) + * dst.w = 1.0 + */ + + struct tgsi_full_src_register src_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); + + /* only use X component of temp reg */ + tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* LOG tmp.x, abs(s0.x) */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, + &abs_src_xxxx, FALSE); + } + + /* MOV dst.z, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, + &tmp_src, inst->Instruction.Saturate); + } + + /* FLR tmp.x, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { + emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, + &tmp_src, FALSE); + } + + /* MOV dst.x, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, + inst->Instruction.Saturate); + } + + /* EXP tmp.x, tmp.x */ + /* DIV dst.y, abs(s0.x), tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); + + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, + FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, + &tmp_src, inst->Instruction.Saturate); + } + + /* MOV dst.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); + } + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LRP instruction. + */ +static boolean +emit_lrp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = LRP(s0, s1, s2): + * dst = s0 * (s1 - s2) + s2 + * Translates into: + * SUB tmp, s1, s2; tmp = s1 - s2 + * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); + struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); + struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); + + /* ADD tmp, s1, -s2 */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, + &inst->Src[1], &neg_src2, FALSE); + + /* MAD dst, s1, tmp, s3 */ + emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], + &inst->Src[0], &src_tmp, &inst->Src[2], + inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_POW instruction. + */ +static boolean +emit_pow(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* Note that TGSI_OPCODE_POW computes only one value from src0.x and + * src1.x while VGPU10 computes four values. + * + * dst = POW(src0, src1): + * dst.xyzw = src0.x ^ src1.x + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register src0_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + struct tgsi_full_src_register src1_xxxx = + swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + /* LOG tmp, s0.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, + FALSE); + + /* MUL tmp, tmp, s1.xxxx */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, + &src1_xxxx, FALSE); + + /* EXP tmp, s0.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], + &tmp_src, inst->Instruction.Saturate); + + /* free tmp */ + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. + */ +static boolean +emit_rcp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* DIV tmp.x, 1.0, s0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, + &inst->Src[0], FALSE); + + /* MOV dst, tmp.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &tmp_src_xxxx, inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_RSQ instruction. + */ +static boolean +emit_rsq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = RSQ(src): + * dst.xyzw = 1 / sqrt(src.x) + * Translates into: + * RSQ tmp, src.x + * MOV dst, tmp.xxxx + */ + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* RSQ tmp, src.x */ + emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, + &inst->Src[0], FALSE); + + /* MOV dst, tmp.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &tmp_src_xxxx, inst->Instruction.Saturate); + + /* free tmp */ + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SCS instruction. + */ +static boolean +emit_scs(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = cos(src.x) + * dst.y = sin(src.x) + * dst.z = 0.0 + * dst.w = 1.0 + */ + struct tgsi_full_dst_register dst_x = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); + struct tgsi_full_dst_register dst_y = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); + struct tgsi_full_dst_register dst_zw = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW); + + struct tgsi_full_src_register zero_one = + make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f); + + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate); + emit_dst_register(emit, &dst_y); + emit_dst_register(emit, &dst_x); + emit_src_register(emit, &inst->Src[0]); + end_emit_instruction(emit); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &dst_zw, &zero_one, inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. + */ +static boolean +emit_seq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SEQ(s0, s1): + * dst = s0 == s1 ? 1.0 : 0.0 (per component) + * Translates into: + * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* EQ tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. + */ +static boolean +emit_sge(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SGE(s0, s1): + * dst = s0 >= s1 ? 1.0 : 0.0 (per component) + * Translates into: + * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* GE tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. + */ +static boolean +emit_sgt(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SGT(s0, s1): + * dst = s0 > s1 ? 1.0 : 0.0 (per component) + * Translates into: + * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* LT tmp, s1, s0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], + &inst->Src[0], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. + */ +static boolean +emit_sincos(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); + + if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) + { + emit_dst_register(emit, &tmp_dst_x); /* first destination register */ + emit_null_dst_register(emit); /* second destination register */ + } + else { + emit_null_dst_register(emit); + emit_dst_register(emit, &tmp_dst_x); + } + + emit_src_register(emit, &inst->Src[0]); + end_emit_instruction(emit); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &tmp_src_xxxx, inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. + */ +static boolean +emit_sle(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SLE(s0, s1): + * dst = s0 <= s1 ? 1.0 : 0.0 (per component) + * Translates into: + * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* GE tmp, s1, s0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], + &inst->Src[0], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. + */ +static boolean +emit_slt(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SLT(s0, s1): + * dst = s0 < s1 ? 1.0 : 0.0 (per component) + * Translates into: + * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* LT tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. + */ +static boolean +emit_sne(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SNE(s0, s1): + * dst = s0 != s1 ? 1.0 : 0.0 (per component) + * Translates into: + * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* NE tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. + */ +static boolean +emit_ssg(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 + * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 + * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 + * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 + * Translates into: + * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) + * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) + * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) + */ + struct tgsi_full_src_register zero = + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register neg_one = + make_immediate_reg_float(emit, -1.0f); + + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); + + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], + &zero, FALSE); + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, + &neg_one, &zero, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, + &inst->Src[0], FALSE); + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, + &one, &tmp2_src, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. + */ +static boolean +emit_issg(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 + * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 + * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 + * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 + * Translates into: + * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) + * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) + * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) + */ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); + + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + + struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); + + emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, + &inst->Src[0], &zero, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, + &zero, &inst->Src[0], FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], + &tmp1_src, &neg_tmp2, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SUB instruction. + */ +static boolean +emit_sub(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SUB(s0, s1): + * dst = s0 - s1 + * Translates into: + * ADD dst, s0, neg(s1) + */ + struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); + + /* ADD dst, s0, neg(s1) */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], + &inst->Src[0], &neg_src1, + inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit a comparison instruction. The dest register will get + * 0 or ~0 values depending on the outcome of comparing src0 to src1. + */ +static void +emit_comparison(struct svga_shader_emitter_v10 *emit, + SVGA3dCmpFunc func, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src0, + const struct tgsi_full_src_register *src1) +{ + struct tgsi_full_src_register immediate; + VGPU10OpcodeToken0 opcode0; + boolean swapSrc = FALSE; + + /* Sanity checks for svga vs. gallium enums */ + STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); + STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); + + opcode0.value = 0; + + switch (func) { + case SVGA3D_CMP_NEVER: + immediate = make_immediate_reg_int(emit, 0); + /* MOV dst, {0} */ + begin_emit_instruction(emit); + emit_dword(emit, VGPU10_OPCODE_MOV); + emit_dst_register(emit, dst); + emit_src_register(emit, &immediate); + end_emit_instruction(emit); + return; + case SVGA3D_CMP_ALWAYS: + immediate = make_immediate_reg_int(emit, -1); + /* MOV dst, {-1} */ + begin_emit_instruction(emit); + emit_dword(emit, VGPU10_OPCODE_MOV); + emit_dst_register(emit, dst); + emit_src_register(emit, &immediate); + end_emit_instruction(emit); + return; + case SVGA3D_CMP_LESS: + opcode0.opcodeType = VGPU10_OPCODE_LT; + break; + case SVGA3D_CMP_EQUAL: + opcode0.opcodeType = VGPU10_OPCODE_EQ; + break; + case SVGA3D_CMP_LESSEQUAL: + opcode0.opcodeType = VGPU10_OPCODE_GE; + swapSrc = TRUE; + break; + case SVGA3D_CMP_GREATER: + opcode0.opcodeType = VGPU10_OPCODE_LT; + swapSrc = TRUE; + break; + case SVGA3D_CMP_NOTEQUAL: + opcode0.opcodeType = VGPU10_OPCODE_NE; + break; + case SVGA3D_CMP_GREATEREQUAL: + opcode0.opcodeType = VGPU10_OPCODE_GE; + break; + default: + assert(!"Unexpected comparison mode"); + opcode0.opcodeType = VGPU10_OPCODE_EQ; + } + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dst_register(emit, dst); + if (swapSrc) { + emit_src_register(emit, src1); + emit_src_register(emit, src0); + } + else { + emit_src_register(emit, src0); + emit_src_register(emit, src1); + } + end_emit_instruction(emit); +} + + +/** + * Get texel/address offsets for a texture instruction. + */ +static void +get_texel_offsets(const struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, int offsets[3]) +{ + if (inst->Texture.NumOffsets == 1) { + /* According to OpenGL Shader Language spec the offsets are only + * fetched from a previously-declared immediate/literal. + */ + const struct tgsi_texture_offset *off = inst->TexOffsets; + const unsigned index = off[0].Index; + const unsigned swizzleX = off[0].SwizzleX; + const unsigned swizzleY = off[0].SwizzleY; + const unsigned swizzleZ = off[0].SwizzleZ; + const union tgsi_immediate_data *imm = emit->immediates[index]; + + assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); + + offsets[0] = imm[swizzleX].Int; + offsets[1] = imm[swizzleY].Int; + offsets[2] = imm[swizzleZ].Int; + } + else { + offsets[0] = offsets[1] = offsets[2] = 0; + } +} + + +/** + * Set up the coordinate register for texture sampling. + * When we're sampling from a RECT texture we have to scale the + * unnormalized coordinate to a normalized coordinate. + * We do that by multiplying the coordinate by an "extra" constant. + * An alternative would be to use the RESINFO instruction to query the + * texture's size. + */ +static struct tgsi_full_src_register +setup_texcoord(struct svga_shader_emitter_v10 *emit, + unsigned unit, + const struct tgsi_full_src_register *coord) +{ + if (emit->key.tex[unit].unnormalized) { + unsigned scale_index = emit->texcoord_scale_index[unit]; + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); + + /* MUL tmp, coord, const[] */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, + coord, &scale_src, FALSE); + return tmp_src; + } + else { + /* use texcoord as-is */ + return *coord; + } +} + + +/** + * For SAMPLE_C instructions, emit the extra src register which indicates + * the reference/comparision value. + */ +static void +emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, + unsigned target, + const struct tgsi_full_src_register *coord) +{ + struct tgsi_full_src_register coord_src_ref; + unsigned component; + + assert(tgsi_is_shadow_target(target)); + + assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */ + if (target == TGSI_TEXTURE_SHADOW2D_ARRAY || + target == TGSI_TEXTURE_SHADOWCUBE) + component = TGSI_SWIZZLE_W; + else + component = TGSI_SWIZZLE_Z; + + coord_src_ref = scalar_src(coord, component); + + emit_src_register(emit, &coord_src_ref); +} + + +/** + * Info for implementing texture swizzles. + * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() + * functions use this to encapsulate the extra steps needed to perform + * a texture swizzle, or shadow/depth comparisons. + * The shadow/depth comparison is only done here if for the cases where + * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). + */ +struct tex_swizzle_info +{ + boolean swizzled; + boolean shadow_compare; + unsigned unit; + unsigned texture_target; /**< TGSI_TEXTURE_x */ + struct tgsi_full_src_register tmp_src; + struct tgsi_full_dst_register tmp_dst; + const struct tgsi_full_dst_register *inst_dst; + const struct tgsi_full_src_register *coord_src; +}; + + +/** + * Do setup for handling texture swizzles or shadow compares. + * \param unit the texture unit + * \param inst the TGSI texture instruction + * \param shadow_compare do shadow/depth comparison? + * \param swz returns the swizzle info + */ +static void +begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, + unsigned unit, + const struct tgsi_full_instruction *inst, + boolean shadow_compare, + struct tex_swizzle_info *swz) +{ + swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || + emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || + emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || + emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); + + swz->shadow_compare = shadow_compare; + swz->texture_target = inst->Texture.Texture; + + if (swz->swizzled || shadow_compare) { + /* Allocate temp register for the result of the SAMPLE instruction + * and the source of the MOV/compare/swizzle instructions. + */ + unsigned tmp = get_temp_index(emit); + swz->tmp_src = make_src_temp_reg(tmp); + swz->tmp_dst = make_dst_temp_reg(tmp); + + swz->unit = unit; + } + swz->inst_dst = &inst->Dst[0]; + swz->coord_src = &inst->Src[0]; +} + + +/** + * Returns the register to put the SAMPLE instruction results into. + * This will either be the original instruction dst reg (if no swizzle + * and no shadow comparison) or a temporary reg if there is a swizzle. + */ +static const struct tgsi_full_dst_register * +get_tex_swizzle_dst(const struct tex_swizzle_info *swz) +{ + return (swz->swizzled || swz->shadow_compare) + ? &swz->tmp_dst : swz->inst_dst; +} + + +/** + * This emits the MOV instruction that actually implements a texture swizzle + * and/or shadow comparison. + */ +static void +end_tex_swizzle(struct svga_shader_emitter_v10 *emit, + const struct tex_swizzle_info *swz) +{ + if (swz->shadow_compare) { + /* Emit extra instructions to compare the fetched texel value against + * a texture coordinate component. The result of the comparison + * is 0.0 or 1.0. + */ + struct tgsi_full_src_register coord_src; + struct tgsi_full_src_register texel_src = + scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + /* convert gallium comparison func to SVGA comparison func */ + SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + switch (swz->texture_target) { + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); + break; + case TGSI_TEXTURE_SHADOW1D: + coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y); + break; + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W); + break; + default: + assert(!"Unexpected texture target in end_tex_swizzle()"); + coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); + } + + /* COMPARE tmp, coord, texel */ + /* XXX it would seem that the texel and coord arguments should + * be transposed here, but piglit tests indicate otherwise. + */ + emit_comparison(emit, compare_func, + &swz->tmp_dst, &texel_src, &coord_src); + + /* AND dest, tmp, {1.0} */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); + if (swz->swizzled) { + emit_dst_register(emit, &swz->tmp_dst); + } + else { + emit_dst_register(emit, swz->inst_dst); + } + emit_src_register(emit, &swz->tmp_src); + emit_src_register(emit, &one); + end_emit_instruction(emit); + } + + if (swz->swizzled) { + unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; + unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; + unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; + unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; + unsigned writemask_0 = 0, writemask_1 = 0; + boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type); + + /* Swizzle w/out zero/one terms */ + struct tgsi_full_src_register src_swizzled = + swizzle_src(&swz->tmp_src, + swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED, + swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN, + swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE, + swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA); + + /* MOV dst, color(tmp).<swizzle> */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + swz->inst_dst, &src_swizzled, FALSE); + + /* handle swizzle zero terms */ + writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) | + ((swz_g == PIPE_SWIZZLE_ZERO) << 1) | + ((swz_b == PIPE_SWIZZLE_ZERO) << 2) | + ((swz_a == PIPE_SWIZZLE_ZERO) << 3)); + + if (writemask_0) { + struct tgsi_full_src_register zero = int_tex ? + make_immediate_reg_int(emit, 0) : + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_dst_register dst = + writemask_dst(swz->inst_dst, writemask_0); + + /* MOV dst.writemask_0, {0,0,0,0} */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &dst, &zero, FALSE); + } + + /* handle swizzle one terms */ + writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) | + ((swz_g == PIPE_SWIZZLE_ONE) << 1) | + ((swz_b == PIPE_SWIZZLE_ONE) << 2) | + ((swz_a == PIPE_SWIZZLE_ONE) << 3)); + + if (writemask_1) { + struct tgsi_full_src_register one = int_tex ? + make_immediate_reg_int(emit, 1) : + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_dst_register dst = + writemask_dst(swz->inst_dst, writemask_1); + + /* MOV dst.writemask_1, {1,1,1,1} */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); + } + } +} + + +/** + * Emit code for TGSI_OPCODE_SAMPLE instruction. + */ +static boolean +emit_sample(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const unsigned resource_unit = inst->Src[1].Register.Index; + const unsigned sampler_unit = inst->Src[2].Register.Index; + struct tgsi_full_src_register coord; + int offsets[3]; + struct tex_swizzle_info swz_info; + + begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); + + /* SAMPLE dst, coord(s0), resource, sampler */ + begin_emit_instruction(emit); + + emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, resource_unit); + emit_sampler_register(emit, sampler_unit); + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Check if a texture instruction is valid. + * An example of an invalid texture instruction is doing shadow comparison + * with an integer-valued texture. + * If we detect an invalid texture instruction, we replace it with: + * MOV dst, {1,1,1,1}; + * \return TRUE if valid, FALSE if invalid. + */ +static boolean +is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const unsigned unit = inst->Src[1].Register.Index; + const unsigned target = inst->Texture.Texture; + boolean valid = TRUE; + + if (tgsi_is_shadow_target(target) && + is_integer_type(emit->key.tex[unit].return_type)) { + debug_printf("Invalid SAMPLE_C with an integer texture!\n"); + valid = FALSE; + } + /* XXX might check for other conditions in the future here */ + + if (!valid) { + /* emit a MOV dst, {1,1,1,1} instruction. */ + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &one); + end_emit_instruction(emit); + } + + return valid; +} + + +/** + * Emit code for TGSI_OPCODE_TEX (simple texture lookup) + */ +static boolean +emit_tex(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + unsigned target = inst->Texture.Texture; + unsigned opcode; + struct tgsi_full_src_register coord; + int offsets[3]; + struct tex_swizzle_info swz_info; + + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; + + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* SAMPLE dst, coord(s0), resource, sampler */ + begin_emit_instruction(emit); + + if (tgsi_is_shadow_target(target)) + opcode = VGPU10_OPCODE_SAMPLE_C; + else + opcode = VGPU10_OPCODE_SAMPLE; + + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + if (opcode == VGPU10_OPCODE_SAMPLE_C) { + emit_tex_compare_refcoord(emit, target, &coord); + } + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXP (projective texture) + */ +static boolean +emit_txp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + unsigned target = inst->Texture.Texture; + unsigned opcode; + int offsets[3]; + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register src0_wwww = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + struct tgsi_full_src_register coord; + struct tex_swizzle_info swz_info; + + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; + + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* DIV tmp, coord, coord.wwww */ + emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, + &coord, &src0_wwww, FALSE); + + /* SAMPLE dst, coord(tmp), resource, sampler */ + begin_emit_instruction(emit); + + if (tgsi_is_shadow_target(target)) + opcode = VGPU10_OPCODE_SAMPLE_C; + else + opcode = VGPU10_OPCODE_SAMPLE; + + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &tmp_src); /* projected coord */ + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + if (opcode == VGPU10_OPCODE_SAMPLE_C) { + emit_tex_compare_refcoord(emit, target, &tmp_src); + } + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/* + * Emit code for TGSI_OPCODE_XPD instruction. + */ +static boolean +emit_xpd(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = src0.y * src1.z - src1.y * src0.z + * dst.y = src0.z * src1.x - src1.z * src0.x + * dst.z = src0.x * src1.y - src1.x * src0.y + * dst.w = 1 + */ + struct tgsi_full_src_register s0_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + struct tgsi_full_src_register s0_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register s0_zzzz = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); + + struct tgsi_full_src_register s1_xxxx = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + struct tgsi_full_src_register s1_yyyy = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register s1_zzzz = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z); + + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); + + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src); + + unsigned tmp3 = get_temp_index(emit); + struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3); + struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3); + struct tgsi_full_dst_register tmp3_dst_x = + writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X); + struct tgsi_full_dst_register tmp3_dst_y = + writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y); + struct tgsi_full_dst_register tmp3_dst_z = + writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z); + struct tgsi_full_dst_register tmp3_dst_w = + writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W); + + /* Note: we put all the intermediate computations into tmp3 in case + * the XPD dest register is that same as one of the src regs (in which + * case we could clobber a src reg before we're done with it) . + * + * Note: we could get by with just one temp register instead of three + * since we're doing scalar operations and there's enough room in one + * temp for everything. + */ + + /* MUL tmp1, src0.y, src1.z */ + /* MUL tmp2, src1.y, src0.z */ + /* ADD tmp3.x, tmp1, -tmp2 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, + &s0_yyyy, &s1_zzzz, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, + &s1_yyyy, &s0_zzzz, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x, + &tmp1_src, &neg_tmp2_src, FALSE); + } + + /* MUL tmp1, src0.z, src1.x */ + /* MUL tmp2, src1.z, src0.x */ + /* ADD tmp3.y, tmp1, -tmp2 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz, + &s1_xxxx, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz, + &s0_xxxx, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y, + &tmp1_src, &neg_tmp2_src, FALSE); + } + + /* MUL tmp1, src0.x, src1.y */ + /* MUL tmp2, src1.x, src0.y */ + /* ADD tmp3.z, tmp1, -tmp2 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx, + &s1_yyyy, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx, + &s0_yyyy, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z, + &tmp1_src, &neg_tmp2_src, FALSE); + } + + /* MOV tmp3.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE); + } + + /* MOV dst, tmp3 */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src, + inst->Instruction.Saturate); + + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXD (explicit derivatives) + */ +static boolean +emit_txd(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[3].Register.Index; + unsigned target = inst->Texture.Texture; + int offsets[3]; + struct tgsi_full_src_register coord; + struct tex_swizzle_info swz_info; + + begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), + &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + emit_src_register(emit, &inst->Src[1]); /* Xderiv */ + emit_src_register(emit, &inst->Src[2]); /* Yderiv */ + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXF (texel fetch) + */ +static boolean +emit_txf(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + const unsigned msaa = emit->key.tex[unit].texture_msaa; + int offsets[3]; + struct tex_swizzle_info swz_info; + + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + + get_texel_offsets(emit, inst, offsets); + + if (msaa) { + /* Fetch one sample from an MSAA texture */ + struct tgsi_full_src_register sampleIndex = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + /* LD_MS dst, coord(s0), resource, sampleIndex */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &inst->Src[0]); + emit_resource_register(emit, unit); + emit_src_register(emit, &sampleIndex); + end_emit_instruction(emit); + } + else { + /* Fetch one texel specified by integer coordinate */ + /* LD dst, coord(s0), resource */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_LD, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &inst->Src[0]); + emit_resource_register(emit, unit); + end_emit_instruction(emit); + } + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) + * or TGSI_OPCODE_TXB2 (for cube shadow maps). + */ +static boolean +emit_txl_txb(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned target = inst->Texture.Texture; + unsigned opcode, unit; + int offsets[3]; + struct tgsi_full_src_register coord, lod_bias; + struct tex_swizzle_info swz_info; + + assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || + inst->Instruction.Opcode == TGSI_OPCODE_TXB || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2); + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { + lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + unit = inst->Src[2].Register.Index; + } + else { + lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + unit = inst->Src[1].Register.Index; + } + + begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), + &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ + begin_emit_instruction(emit); + if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { + opcode = VGPU10_OPCODE_SAMPLE_L; + } + else { + opcode = VGPU10_OPCODE_SAMPLE_B; + } + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + emit_src_register(emit, &lod_bias); + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. + */ +static boolean +emit_txq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + + if (emit->key.tex[unit].texture_target == PIPE_BUFFER) { + /* RESINFO does not support querying texture buffers, so we instead + * store texture buffer sizes in shader constants, then copy them to + * implement TXQ instead of emitting RESINFO. + * MOV dst, const[texture_buffer_size_index[unit]] + */ + struct tgsi_full_src_register size_src = + make_src_const_reg(emit->texture_buffer_size_index[unit]); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, + FALSE); + } else { + /* RESINFO dst, srcMipLevel, resource */ + begin_emit_instruction(emit); + emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &inst->Src[0]); + emit_resource_register(emit, unit); + end_emit_instruction(emit); + } + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit a simple instruction (like ADD, MUL, MIN, etc). + */ +static boolean +emit_simple(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const unsigned opcode = inst->Instruction.Opcode; + const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); + unsigned i; + + begin_emit_instruction(emit); + emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), + inst->Instruction.Saturate); + for (i = 0; i < op->num_dst; i++) { + emit_dst_register(emit, &inst->Dst[i]); + } + for (i = 0; i < op->num_src; i++) { + emit_src_register(emit, &inst->Src[i]); + } + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * We only special case the MOV instruction to try to detect constant + * color writes in the fragment shader. + */ +static boolean +emit_mov(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const struct tgsi_full_src_register *src = &inst->Src[0]; + const struct tgsi_full_dst_register *dst = &inst->Dst[0]; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == 0 && + src->Register.File == TGSI_FILE_CONSTANT && + !src->Register.Indirect) { + emit->constant_color_output = TRUE; + } + + return emit_simple(emit, inst); +} + + +/** + * Emit a simple VGPU10 instruction which writes to multiple dest registers, + * where TGSI only uses one dest register. + */ +static boolean +emit_simple_1dst(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + unsigned dst_count, + unsigned dst_index) +{ + const unsigned opcode = inst->Instruction.Opcode; + const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); + unsigned i; + + begin_emit_instruction(emit); + emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), + inst->Instruction.Saturate); + + for (i = 0; i < dst_count; i++) { + if (i == dst_index) { + emit_dst_register(emit, &inst->Dst[0]); + } else { + emit_null_dst_register(emit); + } + } + + for (i = 0; i < op->num_src; i++) { + emit_src_register(emit, &inst->Src[i]); + } + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Translate a single TGSI instruction to VGPU10. + */ +static boolean +emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst) +{ + const unsigned opcode = inst->Instruction.Opcode; + + switch (opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_BRK: + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + case TGSI_OPCODE_DIV: + case TGSI_OPCODE_DP2: + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDSUB: + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_FRC: + case TGSI_OPCODE_FSEQ: + case TGSI_OPCODE_FSGE: + case TGSI_OPCODE_FSLT: + case TGSI_OPCODE_FSNE: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_NOP: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_RET: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_ROUND: + case TGSI_OPCODE_SQRT: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_TRUNC: + case TGSI_OPCODE_U2F: + case TGSI_OPCODE_UCMP: + case TGSI_OPCODE_USHR: + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_XOR: + /* simple instructions */ + return emit_simple(emit, inst); + + case TGSI_OPCODE_MOV: + return emit_mov(emit, inst); + case TGSI_OPCODE_EMIT: + return emit_vertex(emit, inst); + case TGSI_OPCODE_ENDPRIM: + return emit_endprim(emit, inst); + case TGSI_OPCODE_ABS: + return emit_abs(emit, inst); + case TGSI_OPCODE_IABS: + return emit_iabs(emit, inst); + case TGSI_OPCODE_ARL: + /* fall-through */ + case TGSI_OPCODE_UARL: + return emit_arl_uarl(emit, inst); + case TGSI_OPCODE_BGNSUB: + /* no-op */ + return TRUE; + case TGSI_OPCODE_CAL: + return emit_cal(emit, inst); + case TGSI_OPCODE_CMP: + return emit_cmp(emit, inst); + case TGSI_OPCODE_COS: + return emit_sincos(emit, inst); + case TGSI_OPCODE_DP2A: + return emit_dp2a(emit, inst); + case TGSI_OPCODE_DPH: + return emit_dph(emit, inst); + case TGSI_OPCODE_DST: + return emit_dst(emit, inst); + case TGSI_OPCODE_EX2: + return emit_ex2(emit, inst); + case TGSI_OPCODE_EXP: + return emit_exp(emit, inst); + case TGSI_OPCODE_IF: + return emit_if(emit, inst); + case TGSI_OPCODE_KILL: + return emit_kill(emit, inst); + case TGSI_OPCODE_KILL_IF: + return emit_kill_if(emit, inst); + case TGSI_OPCODE_LG2: + return emit_lg2(emit, inst); + case TGSI_OPCODE_LIT: + return emit_lit(emit, inst); + case TGSI_OPCODE_LOG: + return emit_log(emit, inst); + case TGSI_OPCODE_LRP: + return emit_lrp(emit, inst); + case TGSI_OPCODE_POW: + return emit_pow(emit, inst); + case TGSI_OPCODE_RCP: + return emit_rcp(emit, inst); + case TGSI_OPCODE_RSQ: + return emit_rsq(emit, inst); + case TGSI_OPCODE_SAMPLE: + return emit_sample(emit, inst); + case TGSI_OPCODE_SCS: + return emit_scs(emit, inst); + case TGSI_OPCODE_SEQ: + return emit_seq(emit, inst); + case TGSI_OPCODE_SGE: + return emit_sge(emit, inst); + case TGSI_OPCODE_SGT: + return emit_sgt(emit, inst); + case TGSI_OPCODE_SIN: + return emit_sincos(emit, inst); + case TGSI_OPCODE_SLE: + return emit_sle(emit, inst); + case TGSI_OPCODE_SLT: + return emit_slt(emit, inst); + case TGSI_OPCODE_SNE: + return emit_sne(emit, inst); + case TGSI_OPCODE_SSG: + return emit_ssg(emit, inst); + case TGSI_OPCODE_ISSG: + return emit_issg(emit, inst); + case TGSI_OPCODE_SUB: + return emit_sub(emit, inst); + case TGSI_OPCODE_TEX: + return emit_tex(emit, inst); + case TGSI_OPCODE_TXP: + return emit_txp(emit, inst); + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXB2: + case TGSI_OPCODE_TXL: + return emit_txl_txb(emit, inst); + case TGSI_OPCODE_TXD: + return emit_txd(emit, inst); + case TGSI_OPCODE_TXF: + return emit_txf(emit, inst); + case TGSI_OPCODE_TXQ: + return emit_txq(emit, inst); + case TGSI_OPCODE_UIF: + return emit_if(emit, inst); + case TGSI_OPCODE_XPD: + return emit_xpd(emit, inst); + case TGSI_OPCODE_UMUL_HI: + case TGSI_OPCODE_IMUL_HI: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_IDIV: + /* These cases use only the FIRST of two destination registers */ + return emit_simple_1dst(emit, inst, 2, 0); + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_MOD: + /* These cases use only the SECOND of two destination registers */ + return emit_simple_1dst(emit, inst, 2, 1); + case TGSI_OPCODE_END: + if (!emit_post_helpers(emit)) + return FALSE; + return emit_simple(emit, inst); + + default: + debug_printf("Unimplemented tgsi instruction %s\n", + tgsi_get_opcode_name(opcode)); + return FALSE; + } + + return TRUE; +} + + +/** + * Emit the extra instructions to adjust the vertex position. + * There are two possible adjustments: + * 1. Converting from Gallium to VGPU10 coordinate space by applying the + * "prescale" and "pretranslate" values. + * 2. Undoing the viewport transformation when we use the swtnl/draw path. + * \param vs_pos_tmp_index which temporary register contains the vertex pos. + */ +static void +emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, + unsigned vs_pos_tmp_index) +{ + struct tgsi_full_src_register tmp_pos_src; + struct tgsi_full_dst_register pos_dst; + + /* Don't bother to emit any extra vertex instructions if vertex position is + * not written out + */ + if (emit->vposition.out_index == INVALID_INDEX) + return; + + tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); + pos_dst = make_dst_output_reg(emit->vposition.out_index); + + /* If non-adjusted vertex position register index + * is valid, copy the vertex position from the temporary + * vertex position register before it is modified by the + * prescale computation. + */ + if (emit->vposition.so_index != INVALID_INDEX) { + struct tgsi_full_dst_register pos_so_dst = + make_dst_output_reg(emit->vposition.so_index); + + /* MOV pos_so, tmp_pos */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, + &tmp_pos_src, FALSE); + } + + if (emit->vposition.need_prescale) { + /* This code adjusts the vertex position to match the VGPU10 convention. + * If p is the position computed by the shader (usually by applying the + * modelview and projection matrices), the new position q is computed by: + * + * q.x = p.w * trans.x + p.x * scale.x + * q.y = p.w * trans.y + p.y * scale.y + * q.z = p.w * trans.z + p.z * scale.z; + * q.w = p.w * trans.w + p.w; + */ + struct tgsi_full_src_register tmp_pos_src_w = + scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); + struct tgsi_full_dst_register tmp_pos_dst = + make_dst_temp_reg(vs_pos_tmp_index); + struct tgsi_full_dst_register tmp_pos_dst_xyz = + writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); + + struct tgsi_full_src_register prescale_scale = + make_src_const_reg(emit->vposition.prescale_scale_index); + struct tgsi_full_src_register prescale_trans = + make_src_const_reg(emit->vposition.prescale_trans_index); + + /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, + &tmp_pos_src, &prescale_scale, FALSE); + + /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ + emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, + &prescale_trans, &tmp_pos_src, FALSE); + } + else if (emit->key.vs.undo_viewport) { + /* This code computes the final vertex position from the temporary + * vertex position by undoing the viewport transformation and the + * divide-by-W operation (we convert window coords back to clip coords). + * This is needed when we use the 'draw' module for fallbacks. + * If p is the temp pos in window coords, then the NDC coord q is: + * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w + * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w + * q.z = p.z * p.w + * q.w = p.w + * CONST[vs_viewport_index] contains: + * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } + */ + struct tgsi_full_dst_register tmp_pos_dst = + make_dst_temp_reg(vs_pos_tmp_index); + struct tgsi_full_dst_register tmp_pos_dst_xy = + writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); + struct tgsi_full_src_register tmp_pos_src_wwww = + scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); + + struct tgsi_full_dst_register pos_dst_xyz = + writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); + struct tgsi_full_dst_register pos_dst_w = + writemask_dst(&pos_dst, TGSI_WRITEMASK_W); + + struct tgsi_full_src_register vp_xyzw = + make_src_const_reg(emit->vs.viewport_index); + struct tgsi_full_src_register vp_zwww = + swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, + TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); + + /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, + &tmp_pos_src, &vp_zwww, FALSE); + + /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, + &tmp_pos_src, &vp_xyzw, FALSE); + + /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, + &tmp_pos_src, &tmp_pos_src_wwww, FALSE); + + /* MOV pos.w, tmp_pos.w */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, + &tmp_pos_src, FALSE); + } + else if (vs_pos_tmp_index != INVALID_INDEX) { + /* This code is to handle the case where the temporary vertex + * position register is created when the vertex shader has stream + * output and prescale is disabled because rasterization is to be + * discarded. + */ + struct tgsi_full_dst_register pos_dst = + make_dst_output_reg(emit->vposition.out_index); + + /* MOV pos, tmp_pos */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, &pos_dst); + emit_src_register(emit, &tmp_pos_src); + end_emit_instruction(emit); + } +} + +static void +emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) +{ + if (emit->clip_mode == CLIP_DISTANCE) { + /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ + emit_clip_distance_instructions(emit); + + } else if (emit->clip_mode == CLIP_VERTEX) { + /* Convert TGSI CLIPVERTEX to CLIPDIST */ + emit_clip_vertex_instructions(emit); + } + + /** + * Emit vertex position and take care of legacy user planes only if + * there is a valid vertex position register index. + * This is to take care of the case + * where the shader doesn't output vertex position. Then in + * this case, don't bother to emit more vertex instructions. + */ + if (emit->vposition.out_index == INVALID_INDEX) + return; + + /** + * Emit per-vertex clipping instructions for legacy user defined clip planes. + * NOTE: we must emit the clip distance instructions before the + * emit_vpos_instructions() call since the later function will change + * the TEMP[vs_pos_tmp_index] value. + */ + if (emit->clip_mode == CLIP_LEGACY) { + /* Emit CLIPDIST for legacy user defined clip planes */ + emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); + } +} + + +/** + * Emit extra per-vertex instructions. This includes clip-coordinate + * space conversion and computing clip distances. This is called for + * each GS emit-vertex instruction and at the end of VS translation. + */ +static void +emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) +{ + const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; + + /* Emit clipping instructions based on clipping mode */ + emit_clipping_instructions(emit); + + /** + * Reset the temporary vertex position register index + * so that emit_dst_register() will use the real vertex position output + */ + emit->vposition.tmp_index = INVALID_INDEX; + + /* Emit vertex position instructions */ + emit_vpos_instructions(emit, vs_pos_tmp_index); + + /* Restore original vposition.tmp_index value for the next GS vertex. + * It doesn't matter for VS. + */ + emit->vposition.tmp_index = vs_pos_tmp_index; +} + +/** + * Translate the TGSI_OPCODE_EMIT GS instruction. + */ +static boolean +emit_vertex(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned ret = TRUE; + + assert(emit->unit == PIPE_SHADER_GEOMETRY); + + emit_vertex_instructions(emit); + + /* We can't use emit_simple() because the TGSI instruction has one + * operand (vertex stream number) which we must ignore for VGPU10. + */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); + end_emit_instruction(emit); + + return ret; +} + + +/** + * Emit the extra code to convert from VGPU10's boolean front-face + * register to TGSI's signed front-face register. + * + * TODO: Make temporary front-face register a scalar. + */ +static void +emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + if (emit->fs.face_input_index != INVALID_INDEX) { + /* convert vgpu10 boolean face register to gallium +/-1 value */ + struct tgsi_full_dst_register tmp_dst = + make_dst_temp_reg(emit->fs.face_tmp_index); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register neg_one = + make_immediate_reg_float(emit, -1.0f); + + /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); + emit_dst_register(emit, &tmp_dst); + emit_face_register(emit); + emit_src_register(emit, &one); + emit_src_register(emit, &neg_one); + end_emit_instruction(emit); + } +} + + +/** + * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. + */ +static void +emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + if (emit->fs.fragcoord_input_index != INVALID_INDEX) { + struct tgsi_full_dst_register tmp_dst = + make_dst_temp_reg(emit->fs.fragcoord_tmp_index); + struct tgsi_full_dst_register tmp_dst_xyz = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); + struct tgsi_full_dst_register tmp_dst_w = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register fragcoord = + make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); + + /* save the input index */ + unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; + /* set to invalid to prevent substitution in emit_src_register() */ + emit->fs.fragcoord_input_index = INVALID_INDEX; + + /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, &tmp_dst_xyz); + emit_src_register(emit, &fragcoord); + end_emit_instruction(emit); + + /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); + emit_dst_register(emit, &tmp_dst_w); + emit_src_register(emit, &one); + emit_src_register(emit, &fragcoord); + end_emit_instruction(emit); + + /* restore saved value */ + emit->fs.fragcoord_input_index = fragcoord_input_index; + } +} + + +/** + * Emit extra instructions to adjust VS inputs/attributes. This can + * mean casting a vertex attribute from int to float or setting the + * W component to 1, or both. + */ +static void +emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) +{ + const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; + const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; + const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; + const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; + const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; + const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; + const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; + + unsigned adjust_mask = (save_w_1_mask | + save_itof_mask | + save_utof_mask | + save_is_bgra_mask | + save_puint_to_snorm_mask | + save_puint_to_uscaled_mask | + save_puint_to_sscaled_mask); + + assert(emit->unit == PIPE_SHADER_VERTEX); + + if (adjust_mask) { + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + + struct tgsi_full_src_register one_int = + make_immediate_reg_int(emit, 1); + + /* We need to turn off these bitmasks while emitting the + * instructions below, then restore them afterward. + */ + emit->key.vs.adjust_attrib_w_1 = 0; + emit->key.vs.adjust_attrib_itof = 0; + emit->key.vs.adjust_attrib_utof = 0; + emit->key.vs.attrib_is_bgra = 0; + emit->key.vs.attrib_puint_to_snorm = 0; + emit->key.vs.attrib_puint_to_uscaled = 0; + emit->key.vs.attrib_puint_to_sscaled = 0; + + while (adjust_mask) { + unsigned index = u_bit_scan(&adjust_mask); + + /* skip the instruction if this vertex attribute is not being used */ + if (emit->info.input_usage_mask[index] == 0) + continue; + + unsigned tmp = emit->vs.adjusted_input[index]; + struct tgsi_full_src_register input_src = + make_src_reg(TGSI_FILE_INPUT, index); + + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst_w = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); + + /* ITOF/UTOF/MOV tmp, input[index] */ + if (save_itof_mask & (1 << index)) { + emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, + &tmp_dst, &input_src, FALSE); + } + else if (save_utof_mask & (1 << index)) { + emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, + &tmp_dst, &input_src, FALSE); + } + else if (save_puint_to_snorm_mask & (1 << index)) { + emit_puint_to_snorm(emit, &tmp_dst, &input_src); + } + else if (save_puint_to_uscaled_mask & (1 << index)) { + emit_puint_to_uscaled(emit, &tmp_dst, &input_src); + } + else if (save_puint_to_sscaled_mask & (1 << index)) { + emit_puint_to_sscaled(emit, &tmp_dst, &input_src); + } + else { + assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &tmp_dst, &input_src, FALSE); + } + + if (save_is_bgra_mask & (1 << index)) { + emit_swap_r_b(emit, &tmp_dst, &tmp_src); + } + + if (save_w_1_mask & (1 << index)) { + /* MOV tmp.w, 1.0 */ + if (emit->key.vs.attrib_is_pure_int & (1 << index)) { + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &tmp_dst_w, &one_int, FALSE); + } + else { + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &tmp_dst_w, &one, FALSE); + } + } + } + + emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; + emit->key.vs.adjust_attrib_itof = save_itof_mask; + emit->key.vs.adjust_attrib_utof = save_utof_mask; + emit->key.vs.attrib_is_bgra = save_is_bgra_mask; + emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; + emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; + emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; + } +} + + +/** + * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed + * to implement some instructions. We pre-allocate those values here + * in the immediate constant buffer. + */ +static void +alloc_common_immediates(struct svga_shader_emitter_v10 *emit) +{ + unsigned n = 0; + + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); + + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f); + + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 0, 1, 0, -1); + + if (emit->key.vs.attrib_puint_to_snorm) { + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); + } + + if (emit->key.vs.attrib_puint_to_uscaled) { + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); + } + + if (emit->key.vs.attrib_puint_to_sscaled) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 22, 12, 2, 0); + + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 22, 30, 0, 0); + } + + assert(n <= Elements(emit->common_immediate_pos)); + emit->num_common_immediates = n; +} + + +/** + * Emit any extra/helper declarations/code that we might need between + * the declaration section and code section. + */ +static boolean +emit_pre_helpers(struct svga_shader_emitter_v10 *emit) +{ + /* Properties */ + if (emit->unit == PIPE_SHADER_GEOMETRY) + emit_property_instructions(emit); + + /* Declare inputs */ + if (!emit_input_declarations(emit)) + return FALSE; + + /* Declare outputs */ + if (!emit_output_declarations(emit)) + return FALSE; + + /* Declare temporary registers */ + emit_temporaries_declaration(emit); + + /* Declare constant registers */ + emit_constant_declaration(emit); + + /* Declare samplers and resources */ + emit_sampler_declarations(emit); + emit_resource_declarations(emit); + + /* Declare clip distance output registers */ + if (emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY) { + emit_clip_distance_declarations(emit); + } + + alloc_common_immediates(emit); + + if (emit->unit == PIPE_SHADER_FRAGMENT && + emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { + float alpha = emit->key.fs.alpha_ref; + emit->fs.alpha_ref_index = + alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); + } + + /* Now, emit the constant block containing all the immediates + * declared by shader, as well as the extra ones seen above. + */ + emit_vgpu10_immediates_block(emit); + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + emit_frontface_instructions(emit); + emit_fragcoord_instructions(emit); + } + else if (emit->unit == PIPE_SHADER_VERTEX) { + emit_vertex_attrib_instructions(emit); + } + + return TRUE; +} + + +/** + * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w + * against the alpha reference value and discards the fragment if the + * comparison fails. + */ +static void +emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, + unsigned fs_color_tmp_index) +{ + /* compare output color's alpha to alpha ref and kill */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_src_register tmp_src_x = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register color_src = + make_src_temp_reg(fs_color_tmp_index); + struct tgsi_full_src_register color_src_w = + scalar_src(&color_src, TGSI_SWIZZLE_W); + struct tgsi_full_src_register ref_src = + make_src_immediate_reg(emit->fs.alpha_ref_index); + struct tgsi_full_dst_register color_dst = + make_dst_output_reg(emit->fs.color_out_index[0]); + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + /* dst = src0 'alpha_func' src1 */ + emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, + &color_src_w, &ref_src); + + /* DISCARD if dst.x == 0 */ + begin_emit_instruction(emit); + emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ + emit_src_register(emit, &tmp_src_x); + end_emit_instruction(emit); + + /* If we don't need to broadcast the color below or set fragments to + * white, emit final color here. + */ + if (emit->key.fs.write_color0_to_n_cbufs <= 1 && + !emit->key.fs.white_fragments) { + /* MOV output.color, tempcolor */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, + &color_src, FALSE); /* XXX saturate? */ + } + + free_temp_indexes(emit); +} + + +/** + * When we need to emit white for all fragments (for emulating XOR logicop + * mode), this function copies white into the temporary color output register. + */ +static void +emit_set_color_white(struct svga_shader_emitter_v10 *emit, + unsigned fs_color_tmp_index) +{ + struct tgsi_full_dst_register color_dst = + make_dst_temp_reg(fs_color_tmp_index); + struct tgsi_full_src_register white = + make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE); +} + + +/** + * Emit instructions for writing a single color output to multiple + * color buffers. + * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or + * when key.fs.white_fragments is true). + * property is set and the number of render targets is greater than one. + * \param fs_color_tmp_index index of the temp register that holds the + * color to broadcast. + */ +static void +emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, + unsigned fs_color_tmp_index) +{ + const unsigned n = emit->key.fs.write_color0_to_n_cbufs; + unsigned i; + struct tgsi_full_src_register color_src = + make_src_temp_reg(fs_color_tmp_index); + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + for (i = 0; i < n; i++) { + unsigned output_reg = emit->fs.color_out_index[i]; + struct tgsi_full_dst_register color_dst = + make_dst_output_reg(output_reg); + + /* Fill in this semantic here since we'll use it later in + * emit_dst_register(). + */ + emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; + + /* MOV output.color[i], tempcolor */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, + &color_src, FALSE); /* XXX saturate? */ + } +} + + +/** + * Emit extra helper code after the original shader code, but before the + * last END/RET instruction. + * For vertex shaders this means emitting the extra code to apply the + * prescale scale/translation. + */ +static boolean +emit_post_helpers(struct svga_shader_emitter_v10 *emit) +{ + if (emit->unit == PIPE_SHADER_VERTEX) { + emit_vertex_instructions(emit); + } + else if (emit->unit == PIPE_SHADER_FRAGMENT) { + const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; + + /* We no longer want emit_dst_register() to substitute the + * temporary fragment color register for the real color output. + */ + emit->fs.color_tmp_index = INVALID_INDEX; + + if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { + emit_alpha_test_instructions(emit, fs_color_tmp_index); + } + if (emit->key.fs.white_fragments) { + emit_set_color_white(emit, fs_color_tmp_index); + } + if (emit->key.fs.write_color0_to_n_cbufs > 1 || + emit->key.fs.white_fragments) { + emit_broadcast_color_instructions(emit, fs_color_tmp_index); + } + } + + return TRUE; +} + + +/** + * Translate the TGSI tokens into VGPU10 tokens. + */ +static boolean +emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, + const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + boolean ret = TRUE; + boolean pre_helpers_emitted = FALSE; + unsigned inst_number = 0; + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (!pre_helpers_emitted) { + ret = emit_pre_helpers(emit); + if (!ret) + goto done; + pre_helpers_emitted = TRUE; + } + ret = emit_vgpu10_instruction(emit, inst_number++, + &parse.FullToken.FullInstruction); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_PROPERTY: + ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); + if (!ret) + goto done; + break; + + default: + break; + } + } + +done: + tgsi_parse_free(&parse); + return ret; +} + + +/** + * Emit the first VGPU10 shader tokens. + */ +static boolean +emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) +{ + VGPU10ProgramToken ptoken; + + /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ + ptoken.majorVersion = 4; + ptoken.minorVersion = 0; + ptoken.programType = translate_shader_type(emit->unit); + if (!emit_dword(emit, ptoken.value)) + return FALSE; + + /* Second token: total length of shader, in tokens. We can't fill this + * in until we're all done. Emit zero for now. + */ + return emit_dword(emit, 0); +} + + +static boolean +emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) +{ + VGPU10ProgramToken *tokens; + + /* Replace the second token with total shader length */ + tokens = (VGPU10ProgramToken *) emit->buf; + tokens[1].value = emit_get_num_tokens(emit); + + return TRUE; +} + + +/** + * Modify the FS to read the BCOLORs and use the FACE register + * to choose between the front/back colors. + */ +static const struct tgsi_token * +transform_fs_twoside(const struct tgsi_token *tokens) +{ + if (0) { + debug_printf("Before tgsi_add_two_side ------------------\n"); + tgsi_dump(tokens,0); + } + tokens = tgsi_add_two_side(tokens); + if (0) { + debug_printf("After tgsi_add_two_side ------------------\n"); + tgsi_dump(tokens, 0); + } + return tokens; +} + + +/** + * Modify the FS to do polygon stipple. + */ +static const struct tgsi_token * +transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, + const struct tgsi_token *tokens) +{ + const struct tgsi_token *new_tokens; + unsigned unit; + + if (0) { + debug_printf("Before pstipple ------------------\n"); + tgsi_dump(tokens,0); + } + + new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, + TGSI_FILE_INPUT); + + emit->fs.pstipple_sampler_unit = unit; + + /* Setup texture state for stipple */ + emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D; + emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; + emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; + emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; + emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; + + if (0) { + debug_printf("After pstipple ------------------\n"); + tgsi_dump(new_tokens, 0); + } + + return new_tokens; +} + +/** + * Modify the FS to support anti-aliasing point. + */ +static const struct tgsi_token * +transform_fs_aapoint(const struct tgsi_token *tokens, + int aa_coord_index) +{ + if (0) { + debug_printf("Before tgsi_add_aa_point ------------------\n"); + tgsi_dump(tokens,0); + } + tokens = tgsi_add_aa_point(tokens, aa_coord_index); + if (0) { + debug_printf("After tgsi_add_aa_point ------------------\n"); + tgsi_dump(tokens, 0); + } + return tokens; +} + +/** + * This is the main entrypoint for the TGSI -> VPGU10 translator. + */ +struct svga_shader_variant * +svga_tgsi_vgpu10_translate(struct svga_context *svga, + const struct svga_shader *shader, + const struct svga_compile_key *key, + unsigned unit) +{ + struct svga_shader_variant *variant = NULL; + struct svga_shader_emitter_v10 *emit; + const struct tgsi_token *tokens = shader->tokens; + struct svga_vertex_shader *vs = svga->curr.vs; + struct svga_geometry_shader *gs = svga->curr.gs; + + assert(unit == PIPE_SHADER_VERTEX || + unit == PIPE_SHADER_GEOMETRY || + unit == PIPE_SHADER_FRAGMENT); + + /* These two flags cannot be used together */ + assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); + + /* + * Setup the code emitter + */ + emit = alloc_emitter(); + if (!emit) + return NULL; + + emit->unit = unit; + emit->key = *key; + + emit->vposition.need_prescale = (emit->key.vs.need_prescale || + emit->key.gs.need_prescale); + emit->vposition.tmp_index = INVALID_INDEX; + emit->vposition.so_index = INVALID_INDEX; + emit->vposition.out_index = INVALID_INDEX; + + emit->fs.color_tmp_index = INVALID_INDEX; + emit->fs.face_input_index = INVALID_INDEX; + emit->fs.fragcoord_input_index = INVALID_INDEX; + + emit->gs.prim_id_index = INVALID_INDEX; + + emit->clip_dist_out_index = INVALID_INDEX; + emit->clip_dist_tmp_index = INVALID_INDEX; + emit->clip_dist_so_index = INVALID_INDEX; + emit->clip_vertex_out_index = INVALID_INDEX; + + if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { + emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; + } + + if (unit == PIPE_SHADER_FRAGMENT) { + if (key->fs.light_twoside) { + tokens = transform_fs_twoside(tokens); + } + if (key->fs.pstipple) { + const struct tgsi_token *new_tokens = + transform_fs_pstipple(emit, tokens); + if (tokens != shader->tokens) { + /* free the two-sided shader tokens */ + tgsi_free_tokens(tokens); + } + tokens = new_tokens; + } + if (key->fs.aa_point) { + tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); + } + } + + if (SVGA_DEBUG & DEBUG_TGSI) { + debug_printf("#####################################\n"); + debug_printf("### TGSI Shader %u\n", shader->id); + tgsi_dump(tokens, 0); + } + + /** + * Rescan the header if the token string is different from the one + * included in the shader; otherwise, the header info is already up-to-date + */ + if (tokens != shader->tokens) { + tgsi_scan_shader(tokens, &emit->info); + } else { + emit->info = shader->info; + } + + emit->num_outputs = emit->info.num_outputs; + + if (unit == PIPE_SHADER_FRAGMENT) { + /* Compute FS input remapping to match the output from VS/GS */ + if (gs) { + svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); + } else { + assert(vs); + svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); + } + } else if (unit == PIPE_SHADER_GEOMETRY) { + assert(vs); + svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); + } + + determine_clipping_mode(emit); + + if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { + if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { + /* if there is stream output declarations associated + * with this shader or the shader writes to ClipDistance + * then reserve extra registers for the non-adjusted vertex position + * and the ClipDistance shadow copy + */ + emit->vposition.so_index = emit->num_outputs++; + + if (emit->clip_mode == CLIP_DISTANCE) { + emit->clip_dist_so_index = emit->num_outputs++; + if (emit->info.num_written_clipdistance > 4) + emit->num_outputs++; + } + } + } + + /* + * Do actual shader translation. + */ + if (!emit_vgpu10_header(emit)) { + debug_printf("svga: emit VGPU10 header failed\n"); + goto cleanup; + } + + if (!emit_vgpu10_instructions(emit, tokens)) { + debug_printf("svga: emit VGPU10 instructions failed\n"); + goto cleanup; + } + + if (!emit_vgpu10_tail(emit)) { + debug_printf("svga: emit VGPU10 tail failed\n"); + goto cleanup; + } + + if (emit->register_overflow) { + goto cleanup; + } + + /* + * Create, initialize the 'variant' object. + */ + variant = svga_new_shader_variant(svga); + if (!variant) + goto cleanup; + + variant->shader = shader; + variant->nr_tokens = emit_get_num_tokens(emit); + variant->tokens = (const unsigned *)emit->buf; + emit->buf = NULL; /* buffer is no longer owed by emitter context */ + memcpy(&variant->key, key, sizeof(*key)); + variant->id = UTIL_BITMASK_INVALID_INDEX; + + /* The extra constant starting offset starts with the number of + * shader constants declared in the shader. + */ + variant->extra_const_start = emit->num_shader_consts[0]; + if (key->gs.wide_point) { + /** + * The extra constant added in the transformed shader + * for inverse viewport scale is to be supplied by the driver. + * So the extra constant starting offset needs to be reduced by 1. + */ + assert(variant->extra_const_start > 0); + variant->extra_const_start--; + } + + variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; + + /* If there was exactly one write to a fragment shader output register + * and it came from a constant buffer, we know all fragments will have + * the same color (except for blending). + */ + variant->constant_color_output = + emit->constant_color_output && emit->num_output_writes == 1; + + /** keep track in the variant if flat interpolation is used + * for any of the varyings. + */ + variant->uses_flat_interp = emit->uses_flat_interp; + + if (tokens != shader->tokens) { + tgsi_free_tokens(tokens); + } + +cleanup: + free_emitter(emit); + + return variant; +} diff --git a/lib/mesa/src/gallium/drivers/svga/svga_winsys.h b/lib/mesa/src/gallium/drivers/svga/svga_winsys.h index 19d074fd6..562c6690f 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_winsys.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_winsys.h @@ -79,15 +79,20 @@ struct winsys_handle; #define SVGA_FENCE_FLAG_EXEC (1 << 0) #define SVGA_FENCE_FLAG_QUERY (1 << 1) -#define SVGA_SURFACE_USAGE_SHARED (1 << 0) +#define SVGA_SURFACE_USAGE_SHARED (1 << 0) +#define SVGA_SURFACE_USAGE_SCANOUT (1 << 1) + +#define SVGA_QUERY_FLAG_SET (1 << 0) +#define SVGA_QUERY_FLAG_REF (1 << 1) + +#define SVGA_HINT_FLAG_CAN_PRE_FLUSH (1 << 0) /* Can preemptively flush */ /** Opaque surface handle */ struct svga_winsys_surface; - /** Opaque guest-backed objects */ struct svga_winsys_gb_shader; - +struct svga_winsys_gb_query; /** @@ -143,7 +148,8 @@ struct svga_winsys_context uint32 *shid, uint32 *mobid, uint32 *offset, - struct svga_winsys_gb_shader *shader); + struct svga_winsys_gb_shader *shader, + unsigned flags); /** * Emit a relocation for a guest-backed context. @@ -173,6 +179,26 @@ struct svga_winsys_context uint32 offset, unsigned flags); + /** + * Emit a relocation for a guest-backed query object. + * + * NOTE: Order of this call does matter. It should be the same order + * as relocations appear in the command buffer. + */ + void + (*query_relocation)(struct svga_winsys_context *swc, + SVGAMobId *id, + struct svga_winsys_gb_query *query); + + /** + * Bind queries to context. + * \param flags exactly one of SVGA_QUERY_FLAG_SET/REF + */ + enum pipe_error + (*query_bind)(struct svga_winsys_context *sws, + struct svga_winsys_gb_query *query, + unsigned flags); + void (*commit)(struct svga_winsys_context *swc); @@ -189,6 +215,11 @@ struct svga_winsys_context uint32 cid; /** + * Flags to hint the current context state + */ + uint32 hints; + + /** ** BEGIN new functions for guest-backed surfaces. **/ @@ -219,6 +250,36 @@ struct svga_winsys_context struct svga_winsys_surface *surface, boolean *rebind); + /** + * Create and define a DX GB shader that resides in the device COTable. + * Caller of this function will issue the DXDefineShader command. + */ + struct svga_winsys_gb_shader * + (*shader_create)(struct svga_winsys_context *swc, + uint32 shaderId, + SVGA3dShaderType shaderType, + const uint32 *bytecode, + uint32 bytecodeLen); + + /** + * Destroy a DX GB shader. + * This function will issue the DXDestroyShader command. + */ + void + (*shader_destroy)(struct svga_winsys_context *swc, + struct svga_winsys_gb_shader *shader); + + /** + * Rebind a DX GB resource to a context. + * This is called to reference a DX GB resource in the command stream in + * order to page in the associated resource in case the memory has been + * paged out, and to fence it if necessary after command submission. + */ + enum pipe_error + (*resource_rebind)(struct svga_winsys_context *swc, + struct svga_winsys_surface *surface, + struct svga_winsys_gb_shader *shader, + unsigned flags); }; @@ -260,7 +321,7 @@ struct svga_winsys_screen * \param format Format Device surface format * \param usage Winsys usage: bitmask of SVGA_SURFACE_USAGE_x flags * \param size Surface size given in device format - * \param numFaces Number of faces of the surface (1 or 6) + * \param numLayers Number of layers of the surface (or cube faces) * \param numMipLevels Number of mipmap levels for each face * * Returns the surface ID (sid). Surfaces are generic @@ -274,7 +335,7 @@ struct svga_winsys_screen * - Each face has a list of mipmap levels * * - Each mipmap image may have multiple volume - * slices, if the image is three dimensional. + * slices for 3D image, or multiple 2D slices for texture array. * * - Each slice is a 2D array of 'blocks' * @@ -296,8 +357,9 @@ struct svga_winsys_screen SVGA3dSurfaceFormat format, unsigned usage, SVGA3dSize size, - uint32 numFaces, - uint32 numMipLevels); + uint32 numLayers, + uint32 numMipLevels, + unsigned sampleCount); /** * Creates a surface from a winsys handle. @@ -343,7 +405,7 @@ struct svga_winsys_screen (*surface_can_create)(struct svga_winsys_screen *sws, SVGA3dSurfaceFormat format, SVGA3dSize size, - uint32 numFaces, + uint32 numLayers, uint32 numMipLevels); /** @@ -420,7 +482,7 @@ struct svga_winsys_screen */ struct svga_winsys_gb_shader * (*shader_create)(struct svga_winsys_screen *sws, - SVGA3dShaderType type, + SVGA3dShaderType shaderType, const uint32 *bytecode, uint32 bytecodeLen); @@ -432,6 +494,46 @@ struct svga_winsys_screen (*shader_destroy)(struct svga_winsys_screen *sws, struct svga_winsys_gb_shader *shader); + /** + * Create and define a GB query. + */ + struct svga_winsys_gb_query * + (*query_create)(struct svga_winsys_screen *sws, uint32 len); + + /** + * Destroy a GB query. + */ + void + (*query_destroy)(struct svga_winsys_screen *sws, + struct svga_winsys_gb_query *query); + + /** + * Initialize the query state of the query that resides in the slot + * specified in offset + * \return zero on success. + */ + int + (*query_init)(struct svga_winsys_screen *sws, + struct svga_winsys_gb_query *query, + unsigned offset, + SVGA3dQueryState queryState); + + /** + * Inquire for the query state and result of the query that resides + * in the slot specified in offset + */ + void + (*query_get_result)(struct svga_winsys_screen *sws, + struct svga_winsys_gb_query *query, + unsigned offset, + SVGA3dQueryState *queryState, + void *result, uint32 resultLen); + + /** Have VGPU v10 hardware? */ + boolean have_vgpu10; + + /** To rebind resources at the beginnning of a new command buffer */ + boolean need_to_rebind_resources; }; diff --git a/lib/mesa/src/gallium/drivers/svga/svgadump/svga_dump.c b/lib/mesa/src/gallium/drivers/svga/svgadump/svga_dump.c index 0874d2321..252e0d6c8 100644 --- a/lib/mesa/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/lib/mesa/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -1369,12 +1369,6 @@ dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd) case SVGA3D_BUMPL6V5U5: _debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n"); break; - case SVGA3D_BUMPX8L8V8U8: - _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n"); - break; - case SVGA3D_BUMPL8V8U8: - _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n"); - break; case SVGA3D_ARGB_S10E5: _debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n"); break; @@ -1528,15 +1522,6 @@ dump_SVGA3dCmdDestroyGBShader(const SVGA3dCmdDestroyGBShader *cmd) } static void -dump_SVGA3dCmdBindGBShaderConsts(const SVGA3dCmdBindGBShaderConsts *cmd) -{ - _debug_printf("\t\t.cid = %u\n", cmd->cid); - _debug_printf("\t\t.shaderType = %u\n", cmd->shaderType); - _debug_printf("\t\t.shaderConstType = %u\n", cmd->shaderConstType); - _debug_printf("\t\t.sid = %u\n", cmd->sid); -} - -static void dump_SVGA3dCmdBindGBSurface(const SVGA3dCmdBindGBSurface *cmd) { _debug_printf("\t\t.sid = %u\n", cmd->sid); @@ -1929,14 +1914,6 @@ svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) body = (const uint8_t *)&cmd[1]; } break; - case SVGA_3D_CMD_BIND_SHADERCONSTS: - _debug_printf("\tSVGA_3D_CMD_BIND_SHADERCONSTS\n"); - { - const SVGA3dCmdBindGBShaderConsts *cmd = (const SVGA3dCmdBindGBShaderConsts *) body; - dump_SVGA3dCmdBindGBShaderConsts(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; case SVGA_3D_CMD_BIND_GB_SURFACE: _debug_printf("\tSVGA_3D_CMD_BIND_GB_SURFACE\n"); { diff --git a/lib/mesa/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/lib/mesa/src/gallium/drivers/svga/svgadump/svga_shader_op.c index ad1549d9f..03a63cf5e 100644 --- a/lib/mesa/src/gallium/drivers/svga/svgadump/svga_shader_op.c +++ b/lib/mesa/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -144,7 +144,7 @@ const struct sh_opcode_info *svga_opcode_info( uint op ) { struct sh_opcode_info *info; - if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) { + if (op >= ARRAY_SIZE(opcode_info)) { /* The opcode is either PHASE, COMMENT, END or out of range. */ assert( 0 ); |