summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/svga
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-09-02 05:47:02 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-09-02 05:47:02 +0000
commit0dbbf1e0708df85a357d70e2708c0a11aeb5480e (patch)
tree6656ff8eb8b15a2fc1c02888973caf618388cfd0 /lib/mesa/src/gallium/drivers/svga
parent5f66494d31f735486b8222ecfa0a0c9046e92543 (diff)
Merge Mesa 22.1.7
Diffstat (limited to 'lib/mesa/src/gallium/drivers/svga')
-rw-r--r--lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h989
-rw-r--r--lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h870
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_cmd.c2
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_cmd.h70
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_context.c63
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_context.h218
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_debug.h2
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_draw.c320
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_format.c201
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_format.h2
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c103
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c47
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c8
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c2
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c2
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c2
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c104
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c4
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c7
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c44
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h23
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c230
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h2
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c178
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h31
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c4
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_screen.c207
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_screen.h19
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c12
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h3
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_shader.c194
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_shader.h32
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state.c78
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state_constants.c379
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c16
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state_fs.c6
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state_gs.c6
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c2
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state_rss.c59
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c272
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_state_vs.c6
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_surface.c74
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c92
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c1961
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_winsys.h28
45 files changed, 5439 insertions, 1535 deletions
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
index 1eff07d27..8178c467e 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
@@ -1,7 +1,7 @@
/**************************************************************************
*
- * Copyright © 1998-2015 VMware, Inc., Palo Alto, CA., USA
- * All Rights Reserved.
+ * Copyright 1998-2022 VMware, Inc.
+ * SPDX-License-Identifier: GPL-2.0 OR MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -61,108 +61,292 @@ enum svga3d_block_desc {
SVGA3DBLOCKDESC_NONE = 0, /* No channels are active */
SVGA3DBLOCKDESC_BLUE = 1 << 0, /* Block with red channel data */
- SVGA3DBLOCKDESC_U = 1 << 0, /* Block with bump U channel data */
- SVGA3DBLOCKDESC_GREEN = 1 << 1, /* Block with green channel data */
- SVGA3DBLOCKDESC_V = 1 << 1, /* Block with bump V channel data */
- SVGA3DBLOCKDESC_RED = 1 << 2, /* Block with blue channel data */
- SVGA3DBLOCKDESC_W = 1 << 2, /* Block with bump W channel data */
- SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, /* Block with luminance channel data */
- SVGA3DBLOCKDESC_Y = 1 << 2, /* Block with video luminance data */
+ SVGA3DBLOCKDESC_W = 1 << 0,
+ SVGA3DBLOCKDESC_BUMP_L = 1 << 0,
+
+ /* Format contains Green/V data */
+ SVGA3DBLOCKDESC_GREEN = 1 << 1,
+ SVGA3DBLOCKDESC_V = 1 << 1,
+
+ /* Format contains Red/W/Luminance data */
+ SVGA3DBLOCKDESC_RED = 1 << 2,
+ SVGA3DBLOCKDESC_U = 1 << 2,
+ SVGA3DBLOCKDESC_LUMINANCE = 1 << 2,
+
SVGA3DBLOCKDESC_ALPHA = 1 << 3, /* Block with an alpha channel */
SVGA3DBLOCKDESC_Q = 1 << 3, /* Block with bump Q channel data */
SVGA3DBLOCKDESC_BUFFER = 1 << 4, /* Block stores 1 byte of data */
SVGA3DBLOCKDESC_COMPRESSED = 1 << 5, /* Block stores n bytes of data depending
on the compression method used */
- SVGA3DBLOCKDESC_IEEE_FP = 1 << 6, /* Block stores data in an IEEE floating point
- representation in all channels */
- SVGA3DBLOCKDESC_UV_VIDEO = 1 << 7, /* Block with alternating video U and V */
- SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 8, /* Three separate blocks store data. */
- SVGA3DBLOCKDESC_U_VIDEO = 1 << 9, /* Block with U video data */
- SVGA3DBLOCKDESC_V_VIDEO = 1 << 10, /* Block with V video data */
- SVGA3DBLOCKDESC_EXP = 1 << 11, /* Shared exponent */
- SVGA3DBLOCKDESC_SRGB = 1 << 12, /* Data is in sRGB format */
- SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13, /* 2 planes of Y, UV, e.g., NV12. */
- SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14, /* 3 planes of separate Y, U, V, e.g., YV12. */
- SVGA3DBLOCKDESC_DEPTH = 1 << 15, /* Block with depth channel */
- SVGA3DBLOCKDESC_STENCIL = 1 << 16, /* Block with a stencil channel */
-
- SVGA3DBLOCKDESC_RG = SVGA3DBLOCKDESC_RED |
- SVGA3DBLOCKDESC_GREEN,
- SVGA3DBLOCKDESC_RGB = SVGA3DBLOCKDESC_RG |
- SVGA3DBLOCKDESC_BLUE,
- SVGA3DBLOCKDESC_RGB_SRGB = SVGA3DBLOCKDESC_RGB |
- SVGA3DBLOCKDESC_SRGB,
- SVGA3DBLOCKDESC_RGBA = SVGA3DBLOCKDESC_RGB |
- SVGA3DBLOCKDESC_ALPHA,
- SVGA3DBLOCKDESC_RGBA_SRGB = SVGA3DBLOCKDESC_RGBA |
- SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_FP = 1 << 6,
+
+ SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 7,
+ SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 8,
+ SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 9,
+ SVGA3DBLOCKDESC_STENCIL = 1 << 11,
+ SVGA3DBLOCKDESC_TYPELESS = 1 << 12,
+ SVGA3DBLOCKDESC_SINT = 1 << 13,
+ SVGA3DBLOCKDESC_UINT = 1 << 14,
+ SVGA3DBLOCKDESC_NORM = 1 << 15,
+ SVGA3DBLOCKDESC_SRGB = 1 << 16,
+ SVGA3DBLOCKDESC_EXP = 1 << 17,
+ SVGA3DBLOCKDESC_COLOR = 1 << 18,
+ SVGA3DBLOCKDESC_DEPTH = 1 << 19,
+ SVGA3DBLOCKDESC_BUMP = 1 << 20,
+ SVGA3DBLOCKDESC_YUV_VIDEO = 1 << 21,
+ SVGA3DBLOCKDESC_MIXED = 1 << 22,
+ SVGA3DBLOCKDESC_CX = 1 << 23,
+
+ /* Different compressed format groups. */
+ SVGA3DBLOCKDESC_BC1 = 1 << 24,
+ SVGA3DBLOCKDESC_BC2 = 1 << 25,
+ SVGA3DBLOCKDESC_BC3 = 1 << 26,
+ SVGA3DBLOCKDESC_BC4 = 1 << 27,
+ SVGA3DBLOCKDESC_BC5 = 1 << 28,
+ SVGA3DBLOCKDESC_BC6H = 1 << 29,
+ SVGA3DBLOCKDESC_BC7 = 1 << 30,
+ SVGA3DBLOCKDESC_COMPRESSED_MASK = SVGA3DBLOCKDESC_BC1 |
+ SVGA3DBLOCKDESC_BC2 |
+ SVGA3DBLOCKDESC_BC3 |
+ SVGA3DBLOCKDESC_BC4 |
+ SVGA3DBLOCKDESC_BC5 |
+ SVGA3DBLOCKDESC_BC6H |
+ SVGA3DBLOCKDESC_BC7,
+
+ SVGA3DBLOCKDESC_A_UINT = SVGA3DBLOCKDESC_ALPHA |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_A_UNORM = SVGA3DBLOCKDESC_A_UINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_R_UINT = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_R_UNORM = SVGA3DBLOCKDESC_R_UINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_R_SINT = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_SINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_R_SNORM = SVGA3DBLOCKDESC_R_SINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_G_UINT = SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_RG_UINT = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_RG_UNORM = SVGA3DBLOCKDESC_RG_UINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_RG_SINT = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_SINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_RG_SNORM = SVGA3DBLOCKDESC_RG_SINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_RGB_UINT = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_BLUE |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_RGB_SINT = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_BLUE |
+ SVGA3DBLOCKDESC_SINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_RGB_UNORM = SVGA3DBLOCKDESC_RGB_UINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_RGB_UNORM_SRGB = SVGA3DBLOCKDESC_RGB_UNORM |
+ SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_RGBA_UINT = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_BLUE |
+ SVGA3DBLOCKDESC_ALPHA |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_RGBA_UNORM = SVGA3DBLOCKDESC_RGBA_UINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_RGBA_UNORM_SRGB = SVGA3DBLOCKDESC_RGBA_UNORM |
+ SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_RGBA_SINT = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_BLUE |
+ SVGA3DBLOCKDESC_ALPHA |
+ SVGA3DBLOCKDESC_SINT |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_RGBA_SNORM = SVGA3DBLOCKDESC_RGBA_SINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_BLUE |
+ SVGA3DBLOCKDESC_ALPHA |
+ SVGA3DBLOCKDESC_FP |
+ SVGA3DBLOCKDESC_COLOR,
SVGA3DBLOCKDESC_UV = SVGA3DBLOCKDESC_U |
- SVGA3DBLOCKDESC_V,
+ SVGA3DBLOCKDESC_V |
+ SVGA3DBLOCKDESC_BUMP,
SVGA3DBLOCKDESC_UVL = SVGA3DBLOCKDESC_UV |
- SVGA3DBLOCKDESC_LUMINANCE,
+ SVGA3DBLOCKDESC_BUMP_L |
+ SVGA3DBLOCKDESC_MIXED |
+ SVGA3DBLOCKDESC_BUMP,
SVGA3DBLOCKDESC_UVW = SVGA3DBLOCKDESC_UV |
- SVGA3DBLOCKDESC_W,
+ SVGA3DBLOCKDESC_W |
+ SVGA3DBLOCKDESC_BUMP,
SVGA3DBLOCKDESC_UVWA = SVGA3DBLOCKDESC_UVW |
- SVGA3DBLOCKDESC_ALPHA,
+ SVGA3DBLOCKDESC_ALPHA |
+ SVGA3DBLOCKDESC_MIXED |
+ SVGA3DBLOCKDESC_BUMP,
SVGA3DBLOCKDESC_UVWQ = SVGA3DBLOCKDESC_U |
SVGA3DBLOCKDESC_V |
SVGA3DBLOCKDESC_W |
- SVGA3DBLOCKDESC_Q,
- SVGA3DBLOCKDESC_LA = SVGA3DBLOCKDESC_LUMINANCE |
- SVGA3DBLOCKDESC_ALPHA,
+ SVGA3DBLOCKDESC_Q |
+ SVGA3DBLOCKDESC_BUMP,
+ SVGA3DBLOCKDESC_L_UNORM = SVGA3DBLOCKDESC_LUMINANCE |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_NORM |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_LA_UNORM = SVGA3DBLOCKDESC_LUMINANCE |
+ SVGA3DBLOCKDESC_ALPHA |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_NORM |
+ SVGA3DBLOCKDESC_COLOR,
SVGA3DBLOCKDESC_R_FP = SVGA3DBLOCKDESC_RED |
- SVGA3DBLOCKDESC_IEEE_FP,
+ SVGA3DBLOCKDESC_FP |
+ SVGA3DBLOCKDESC_COLOR,
SVGA3DBLOCKDESC_RG_FP = SVGA3DBLOCKDESC_R_FP |
- SVGA3DBLOCKDESC_GREEN,
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_COLOR,
SVGA3DBLOCKDESC_RGB_FP = SVGA3DBLOCKDESC_RG_FP |
- SVGA3DBLOCKDESC_BLUE,
- SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RGB_FP |
- SVGA3DBLOCKDESC_ALPHA,
- SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH |
- SVGA3DBLOCKDESC_STENCIL,
- SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_UV_VIDEO |
- SVGA3DBLOCKDESC_Y,
+ SVGA3DBLOCKDESC_BLUE |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_YUV_VIDEO |
+ SVGA3DBLOCKDESC_COLOR,
SVGA3DBLOCKDESC_AYUV = SVGA3DBLOCKDESC_ALPHA |
- SVGA3DBLOCKDESC_Y |
- SVGA3DBLOCKDESC_U_VIDEO |
- SVGA3DBLOCKDESC_V_VIDEO,
- SVGA3DBLOCKDESC_RGBE = SVGA3DBLOCKDESC_RGB |
- SVGA3DBLOCKDESC_EXP,
- SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
+ SVGA3DBLOCKDESC_YUV_VIDEO |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_RGB_EXP = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN |
+ SVGA3DBLOCKDESC_BLUE |
+ SVGA3DBLOCKDESC_EXP |
+ SVGA3DBLOCKDESC_COLOR,
+
+ SVGA3DBLOCKDESC_COMP_TYPELESS = SVGA3DBLOCKDESC_COMPRESSED |
+ SVGA3DBLOCKDESC_TYPELESS,
+ SVGA3DBLOCKDESC_COMP_UNORM = SVGA3DBLOCKDESC_COMPRESSED |
+ SVGA3DBLOCKDESC_UINT |
+ SVGA3DBLOCKDESC_NORM |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_COMP_SNORM = SVGA3DBLOCKDESC_COMPRESSED |
+ SVGA3DBLOCKDESC_SINT |
+ SVGA3DBLOCKDESC_NORM |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_COMP_UNORM |
SVGA3DBLOCKDESC_SRGB,
- SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_PLANAR_YUV |
- SVGA3DBLOCKDESC_2PLANAR_YUV,
- SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_PLANAR_YUV |
- SVGA3DBLOCKDESC_3PLANAR_YUV,
+ SVGA3DBLOCKDESC_BC1_COMP_TYPELESS = SVGA3DBLOCKDESC_BC1 |
+ SVGA3DBLOCKDESC_COMP_TYPELESS,
+ SVGA3DBLOCKDESC_BC1_COMP_UNORM = SVGA3DBLOCKDESC_BC1 |
+ SVGA3DBLOCKDESC_COMP_UNORM,
+ SVGA3DBLOCKDESC_BC1_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC1_COMP_UNORM |
+ SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_BC2_COMP_TYPELESS = SVGA3DBLOCKDESC_BC2 |
+ SVGA3DBLOCKDESC_COMP_TYPELESS,
+ SVGA3DBLOCKDESC_BC2_COMP_UNORM = SVGA3DBLOCKDESC_BC2 |
+ SVGA3DBLOCKDESC_COMP_UNORM,
+ SVGA3DBLOCKDESC_BC2_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC2_COMP_UNORM |
+ SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_BC3_COMP_TYPELESS = SVGA3DBLOCKDESC_BC3 |
+ SVGA3DBLOCKDESC_COMP_TYPELESS,
+ SVGA3DBLOCKDESC_BC3_COMP_UNORM = SVGA3DBLOCKDESC_BC3 |
+ SVGA3DBLOCKDESC_COMP_UNORM,
+ SVGA3DBLOCKDESC_BC3_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC3_COMP_UNORM |
+ SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_BC4_COMP_TYPELESS = SVGA3DBLOCKDESC_BC4 |
+ SVGA3DBLOCKDESC_COMP_TYPELESS,
+ SVGA3DBLOCKDESC_BC4_COMP_UNORM = SVGA3DBLOCKDESC_BC4 |
+ SVGA3DBLOCKDESC_COMP_UNORM,
+ SVGA3DBLOCKDESC_BC4_COMP_SNORM = SVGA3DBLOCKDESC_BC4 |
+ SVGA3DBLOCKDESC_COMP_SNORM,
+ SVGA3DBLOCKDESC_BC5_COMP_TYPELESS = SVGA3DBLOCKDESC_BC5 |
+ SVGA3DBLOCKDESC_COMP_TYPELESS,
+ SVGA3DBLOCKDESC_BC5_COMP_UNORM = SVGA3DBLOCKDESC_BC5 |
+ SVGA3DBLOCKDESC_COMP_UNORM,
+ SVGA3DBLOCKDESC_BC5_COMP_SNORM = SVGA3DBLOCKDESC_BC5 |
+ SVGA3DBLOCKDESC_COMP_SNORM,
+ SVGA3DBLOCKDESC_BC6H_COMP_TYPELESS = SVGA3DBLOCKDESC_BC6H |
+ SVGA3DBLOCKDESC_COMP_TYPELESS,
+ SVGA3DBLOCKDESC_BC6H_COMP_UF16 = SVGA3DBLOCKDESC_BC6H |
+ SVGA3DBLOCKDESC_COMPRESSED,
+ SVGA3DBLOCKDESC_BC6H_COMP_SF16 = SVGA3DBLOCKDESC_BC6H |
+ SVGA3DBLOCKDESC_COMPRESSED,
+ SVGA3DBLOCKDESC_BC7_COMP_TYPELESS = SVGA3DBLOCKDESC_BC7 |
+ SVGA3DBLOCKDESC_COMP_TYPELESS,
+ SVGA3DBLOCKDESC_BC7_COMP_UNORM = SVGA3DBLOCKDESC_BC7 |
+ SVGA3DBLOCKDESC_COMP_UNORM,
+ SVGA3DBLOCKDESC_BC7_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC7_COMP_UNORM |
+ SVGA3DBLOCKDESC_SRGB,
+
+ SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_YUV_VIDEO |
+ SVGA3DBLOCKDESC_PLANAR_YUV |
+ SVGA3DBLOCKDESC_2PLANAR_YUV |
+ SVGA3DBLOCKDESC_COLOR,
+ SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_YUV_VIDEO |
+ SVGA3DBLOCKDESC_PLANAR_YUV |
+ SVGA3DBLOCKDESC_3PLANAR_YUV |
+ SVGA3DBLOCKDESC_COLOR,
+
+ SVGA3DBLOCKDESC_DEPTH_UINT = SVGA3DBLOCKDESC_DEPTH |
+ SVGA3DBLOCKDESC_UINT,
+ SVGA3DBLOCKDESC_DEPTH_UNORM = SVGA3DBLOCKDESC_DEPTH_UINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH |
+ SVGA3DBLOCKDESC_STENCIL,
+ SVGA3DBLOCKDESC_DS_UINT = SVGA3DBLOCKDESC_DEPTH |
+ SVGA3DBLOCKDESC_STENCIL |
+ SVGA3DBLOCKDESC_UINT,
+ SVGA3DBLOCKDESC_DS_UNORM = SVGA3DBLOCKDESC_DS_UINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_DEPTH_FP = SVGA3DBLOCKDESC_DEPTH |
+ SVGA3DBLOCKDESC_FP,
+
+ SVGA3DBLOCKDESC_UV_UINT = SVGA3DBLOCKDESC_UV |
+ SVGA3DBLOCKDESC_UINT,
+ SVGA3DBLOCKDESC_UV_SNORM = SVGA3DBLOCKDESC_UV |
+ SVGA3DBLOCKDESC_SINT |
+ SVGA3DBLOCKDESC_NORM,
+ SVGA3DBLOCKDESC_UVCX_SNORM = SVGA3DBLOCKDESC_UV_SNORM |
+ SVGA3DBLOCKDESC_CX,
+ SVGA3DBLOCKDESC_UVWQ_SNORM = SVGA3DBLOCKDESC_UVWQ |
+ SVGA3DBLOCKDESC_SINT |
+ SVGA3DBLOCKDESC_NORM,
};
typedef struct SVGA3dChannelDef {
- union {
- uint8 blue;
- uint8 u;
- uint8 uv_video;
- uint8 u_video;
- };
- union {
- uint8 green;
- uint8 v;
- uint8 stencil;
- uint8 v_video;
- };
- union {
- uint8 red;
- uint8 w;
- uint8 luminance;
- uint8 y;
- uint8 depth;
- uint8 data;
- };
- union {
- uint8 alpha;
- uint8 q;
- uint8 exp;
- };
+ union {
+ uint8 blue;
+ uint8 w_bump;
+ uint8 l_bump;
+ uint8 uv_video;
+ uint8 u_video;
+ };
+ union {
+ uint8 green;
+ uint8 stencil;
+ uint8 v_bump;
+ uint8 v_video;
+ };
+ union {
+ uint8 red;
+ uint8 u_bump;
+ uint8 luminance;
+ uint8 y_video;
+ uint8 depth;
+ uint8 data;
+ };
+ union {
+ uint8 alpha;
+ uint8 q_bump;
+ uint8 exp;
+ };
} SVGA3dChannelDef;
struct svga3d_surface_desc {
@@ -173,739 +357,784 @@ struct svga3d_surface_desc {
uint32 bytes_per_block;
uint32 pitch_bytes_per_block;
- uint32 totalBitDepth;
- SVGA3dChannelDef bitDepth;
- SVGA3dChannelDef bitOffset;
+ SVGA3dChannelDef bitDepth;
+ SVGA3dChannelDef bitOffset;
};
static const struct svga3d_surface_desc svga3d_surface_descs[] = {
{SVGA3D_FORMAT_INVALID, SVGA3DBLOCKDESC_NONE,
{1, 1, 1}, 0, 0,
- 0, {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB,
+ {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB_UNORM,
{1, 1, 1}, 4, 4,
- 24, {{8}, {8}, {8}, {0}},
+ {{8}, {8}, {8}, {0}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB,
+ {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{5}, {6}, {5}, {0}},
+ {{5}, {6}, {5}, {0}},
{{0}, {5}, {11}, {0}}},
- {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB,
+ {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB_UNORM,
{1, 1, 1}, 2, 2,
- 15, {{5}, {5}, {5}, {0}},
+ {{5}, {5}, {5}, {0}},
{{0}, {5}, {10}, {0}}},
- {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{5}, {5}, {5}, {1}},
+ {{5}, {5}, {5}, {1}},
{{0}, {5}, {10}, {15}}},
- {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{4}, {4}, {4}, {4}},
+ {{4}, {4}, {4}, {4}},
{{0}, {4}, {8}, {12}}},
- {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH,
+ {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {32}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH,
+ {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS,
+ {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {8}, {24}, {0}},
- {{0}, {24}, {0}, {0}}},
+ {{0}, {8}, {24}, {0}},
+ {{0}, {0}, {8}, {0}}},
- {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS,
+ {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {1}, {15}, {0}},
- {{0}, {15}, {0}, {0}}},
+ {{0}, {1}, {15}, {0}},
+ {{0}, {0}, {1}, {0}}},
- {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_LUMINANCE,
+ {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_L_UNORM,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA,
- {1 , 1, 1}, 1, 1,
- 8, {{0}, {0}, {4}, {4}},
+ {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA_UNORM,
+ {1, 1, 1}, 1, 1,
+ {{0}, {0}, {4}, {4}},
{{0}, {0}, {0}, {4}}},
- {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_LUMINANCE,
+ {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_L_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA,
+ {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {8}, {8}},
+ {{0}, {0}, {8}, {8}},
{{0}, {0}, {0}, {8}}},
- {SVGA3D_DXT1, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_DXT1, SVGA3DBLOCKDESC_BC1_COMP_UNORM,
{4, 4, 1}, 8, 8,
- 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {64}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_DXT2, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_DXT2, SVGA3DBLOCKDESC_BC2_COMP_UNORM,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_DXT3, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_DXT3, SVGA3DBLOCKDESC_BC2_COMP_UNORM,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_DXT4, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_DXT4, SVGA3DBLOCKDESC_BC3_COMP_UNORM,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_DXT5, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_DXT5, SVGA3DBLOCKDESC_BC3_COMP_UNORM,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV,
+ {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV_SNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {8}, {8}},
- {{0}, {0}, {0}, {8}}},
+ {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
{SVGA3D_BUMPL6V5U5, SVGA3DBLOCKDESC_UVL,
{1, 1, 1}, 2, 2,
- 16, {{5}, {5}, {6}, {0}},
- {{11}, {6}, {0}, {0}}},
+ {{6}, {5}, {5}, {0}},
+ {{10}, {5}, {0}, {0}}},
{SVGA3D_BUMPX8L8V8U8, SVGA3DBLOCKDESC_UVL,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {0}},
+ {{8}, {8}, {8}, {0}},
{{16}, {8}, {0}, {0}}},
- {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_UVL,
- {0, 0, 0}, 0, 0,
- 0, {{0}, {0}, {0}, {0}},
- {{0}, {0}, {0}, {0}}},
+ {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_NONE,
+ {1, 1, 1}, 3, 3,
+ {{8}, {8}, {8}, {0}},
+ {{16}, {8}, {0}, {0}}},
{SVGA3D_ARGB_S10E5, SVGA3DBLOCKDESC_RGBA_FP,
{1, 1, 1}, 8, 8,
- 64, {{16}, {16}, {16}, {16}},
+ {{16}, {16}, {16}, {16}},
{{32}, {16}, {0}, {48}}},
{SVGA3D_ARGB_S23E8, SVGA3DBLOCKDESC_RGBA_FP,
{1, 1, 1}, 16, 16,
- 128, {{32}, {32}, {32}, {32}},
+ {{32}, {32}, {32}, {32}},
{{64}, {32}, {0}, {96}}},
- {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{10}, {10}, {10}, {2}},
+ {{10}, {10}, {10}, {2}},
{{0}, {10}, {20}, {30}}},
- {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV,
+ {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV_SNORM,
{1, 1, 1}, 2, 2,
- 16, {{8}, {8}, {0}, {0}},
- {{8}, {0}, {0}, {0}}},
+ {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
- {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ,
+ {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ_SNORM,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
- {{24}, {16}, {8}, {0}}},
+ {{8}, {8}, {8}, {8}},
+ {{16}, {8}, {0}, {24}}},
- {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UV,
+ {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UVCX_SNORM,
{1, 1, 1}, 2, 2,
- 16, {{8}, {8}, {0}, {0}},
- {{8}, {0}, {0}, {0}}},
+ {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
{SVGA3D_X8L8V8U8, SVGA3DBLOCKDESC_UVL,
{1, 1, 1}, 4, 4,
- 24, {{8}, {8}, {8}, {0}},
+ {{8}, {8}, {8}, {0}},
{{16}, {8}, {0}, {0}}},
{SVGA3D_A2W10V10U10, SVGA3DBLOCKDESC_UVWA,
{1, 1, 1}, 4, 4,
- 32, {{10}, {10}, {10}, {2}},
- {{0}, {10}, {20}, {30}}},
+ {{10}, {10}, {10}, {2}},
+ {{20}, {10}, {0}, {30}}},
- {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_ALPHA,
+ {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_A_UNORM,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {0}, {8}},
+ {{0}, {0}, {0}, {8}},
{{0}, {0}, {0}, {0}}},
{SVGA3D_R_S10E5, SVGA3DBLOCKDESC_R_FP,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
{SVGA3D_R_S23E8, SVGA3DBLOCKDESC_R_FP,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {32}, {0}},
{{0}, {0}, {0}, {0}}},
{SVGA3D_RG_S10E5, SVGA3DBLOCKDESC_RG_FP,
{1, 1, 1}, 4, 4,
- 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {16}, {0}},
{{0}, {16}, {0}, {0}}},
{SVGA3D_RG_S23E8, SVGA3DBLOCKDESC_RG_FP,
{1, 1, 1}, 8, 8,
- 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {32}, {0}},
{{0}, {32}, {0}, {0}}},
{SVGA3D_BUFFER, SVGA3DBLOCKDESC_BUFFER,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH,
+ {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {24}, {0}},
- {{0}, {24}, {0}, {0}}},
+ {{0}, {0}, {24}, {0}},
+ {{0}, {0}, {8}, {0}}},
- {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV,
+ {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV_SNORM,
{1, 1, 1}, 4, 4,
- 32, {{16}, {16}, {0}, {0}},
- {{16}, {0}, {0}, {0}}},
+ {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
- {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {16}, {16}, {0}},
- {{0}, {0}, {16}, {0}}},
+ {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
- {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 8, 8,
- 64, {{16}, {16}, {16}, {16}},
+ {{16}, {16}, {16}, {16}},
{{32}, {16}, {0}, {48}}},
{SVGA3D_UYVY, SVGA3DBLOCKDESC_YUV,
- {1, 1, 1}, 2, 2,
- 16, {{8}, {0}, {8}, {0}},
+ {2, 1, 1}, 4, 4,
+ {{8}, {0}, {8}, {0}},
{{0}, {0}, {8}, {0}}},
{SVGA3D_YUY2, SVGA3DBLOCKDESC_YUV,
- {1, 1, 1}, 2, 2,
- 16, {{8}, {0}, {8}, {0}},
+ {2, 1, 1}, 4, 4,
+ {{8}, {0}, {8}, {0}},
{{8}, {0}, {0}, {0}}},
{SVGA3D_NV12, SVGA3DBLOCKDESC_NV12,
{2, 2, 1}, 6, 2,
- 48, {{0}, {0}, {48}, {0}},
+ {{0}, {0}, {48}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV,
+ {SVGA3D_FORMAT_DEAD2, SVGA3DBLOCKDESC_NONE,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 16, 16,
- 128, {{32}, {32}, {32}, {32}},
+ {{32}, {32}, {32}, {32}},
{{64}, {32}, {0}, {96}}},
- {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA_UINT,
{1, 1, 1}, 16, 16,
- 128, {{32}, {32}, {32}, {32}},
+ {{32}, {32}, {32}, {32}},
{{64}, {32}, {0}, {96}}},
- {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_UVWQ,
+ {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_RGBA_SINT,
{1, 1, 1}, 16, 16,
- 128, {{32}, {32}, {32}, {32}},
+ {{32}, {32}, {32}, {32}},
{{64}, {32}, {0}, {96}}},
- {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_RGB,
+ {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 12, 12,
- 96, {{32}, {32}, {32}, {0}},
+ {{32}, {32}, {32}, {0}},
{{64}, {32}, {0}, {0}}},
{SVGA3D_R32G32B32_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
{1, 1, 1}, 12, 12,
- 96, {{32}, {32}, {32}, {0}},
+ {{32}, {32}, {32}, {0}},
{{64}, {32}, {0}, {0}}},
- {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB,
+ {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB_UINT,
{1, 1, 1}, 12, 12,
- 96, {{32}, {32}, {32}, {0}},
+ {{32}, {32}, {32}, {0}},
{{64}, {32}, {0}, {0}}},
- {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_UVW,
+ {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_RGB_SINT,
{1, 1, 1}, 12, 12,
- 96, {{32}, {32}, {32}, {0}},
+ {{32}, {32}, {32}, {0}},
{{64}, {32}, {0}, {0}}},
- {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 8, 8,
- 64, {{16}, {16}, {16}, {16}},
+ {{16}, {16}, {16}, {16}},
{{32}, {16}, {0}, {48}}},
- {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA_UINT,
{1, 1, 1}, 8, 8,
- 64, {{16}, {16}, {16}, {16}},
+ {{16}, {16}, {16}, {16}},
{{32}, {16}, {0}, {48}}},
- {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_UVWQ,
+ {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_RGBA_SNORM,
{1, 1, 1}, 8, 8,
- 64, {{16}, {16}, {16}, {16}},
+ {{16}, {16}, {16}, {16}},
{{32}, {16}, {0}, {48}}},
- {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_UVWQ,
+ {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_RGBA_SINT,
{1, 1, 1}, 8, 8,
- 64, {{16}, {16}, {16}, {16}},
+ {{16}, {16}, {16}, {16}},
{{32}, {16}, {0}, {48}}},
- {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 8, 8,
- 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {32}, {0}},
{{0}, {32}, {0}, {0}}},
- {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG_UINT,
{1, 1, 1}, 8, 8,
- 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {32}, {0}},
{{0}, {32}, {0}, {0}}},
- {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_UV,
+ {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_RG_SINT,
{1, 1, 1}, 8, 8,
- 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {32}, {0}},
{{0}, {32}, {0}, {0}}},
- {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 8, 8,
- 64, {{0}, {8}, {32}, {0}},
+ {{0}, {8}, {32}, {0}},
{{0}, {32}, {0}, {0}}},
{SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3DBLOCKDESC_DS,
{1, 1, 1}, 8, 8,
- 64, {{0}, {8}, {32}, {0}},
+ {{0}, {8}, {32}, {0}},
{{0}, {32}, {0}, {0}}},
{SVGA3D_R32_FLOAT_X8X24, SVGA3DBLOCKDESC_R_FP,
{1, 1, 1}, 8, 8,
- 64, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {32}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_X32_G8X24_UINT, SVGA3DBLOCKDESC_GREEN,
+ {SVGA3D_X32_G8X24_UINT, SVGA3DBLOCKDESC_G_UINT,
{1, 1, 1}, 8, 8,
- 64, {{0}, {8}, {0}, {0}},
+ {{0}, {8}, {0}, {0}},
{{0}, {32}, {0}, {0}}},
- {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 4, 4,
- 32, {{10}, {10}, {10}, {2}},
- {{0}, {10}, {20}, {30}}},
+ {{10}, {10}, {10}, {2}},
+ {{20}, {10}, {0}, {30}}},
- {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA_UINT,
{1, 1, 1}, 4, 4,
- 32, {{10}, {10}, {10}, {2}},
- {{0}, {10}, {20}, {30}}},
+ {{10}, {10}, {10}, {2}},
+ {{20}, {10}, {0}, {30}}},
{SVGA3D_R11G11B10_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
{1, 1, 1}, 4, 4,
- 32, {{10}, {11}, {11}, {0}},
- {{0}, {10}, {21}, {0}}},
+ {{10}, {11}, {11}, {0}},
+ {{22}, {11}, {0}, {0}}},
- {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{16}, {8}, {0}, {24}}},
- {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{16}, {8}, {0}, {24}}},
- {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+ {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_UNORM_SRGB,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{16}, {8}, {0}, {24}}},
- {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA_UINT,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{16}, {8}, {0}, {24}}},
- {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA_SINT,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{16}, {8}, {0}, {24}}},
- {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 4, 4,
- 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {16}, {0}},
{{0}, {16}, {0}, {0}}},
- {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_FP,
+ {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_UINT,
{1, 1, 1}, 4, 4,
- 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {16}, {0}},
{{0}, {16}, {0}, {0}}},
- {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_UV,
+ {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_RG_SINT,
{1, 1, 1}, 4, 4,
- 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {16}, {0}},
{{0}, {16}, {0}, {0}}},
- {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {32}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH,
+ {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH_FP,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {32}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_R_UINT,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {32}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_R_SINT,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {32}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 4, 4,
- 32, {{0}, {8}, {24}, {0}},
+ {{0}, {8}, {24}, {0}},
{{0}, {24}, {0}, {0}}},
- {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS,
+ {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {8}, {24}, {0}},
+ {{0}, {8}, {24}, {0}},
{{0}, {24}, {0}, {0}}},
- {SVGA3D_R24_UNORM_X8, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R24_UNORM_X8, SVGA3DBLOCKDESC_R_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {24}, {0}},
+ {{0}, {0}, {24}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_X24_G8_UINT, SVGA3DBLOCKDESC_GREEN,
+ {SVGA3D_X24_G8_UINT, SVGA3DBLOCKDESC_G_UINT,
{1, 1, 1}, 4, 4,
- 32, {{0}, {8}, {0}, {0}},
+ {{0}, {8}, {0}, {0}},
{{0}, {24}, {0}, {0}}},
- {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 2, 2,
- 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {8}, {0}},
{{0}, {8}, {0}, {0}}},
- {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {8}, {0}},
{{0}, {8}, {0}, {0}}},
- {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG_UINT,
{1, 1, 1}, 2, 2,
- 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {8}, {0}},
{{0}, {8}, {0}, {0}}},
- {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_UV,
+ {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_RG_SINT,
{1, 1, 1}, 2, 2,
- 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {8}, {0}},
{{0}, {8}, {0}, {0}}},
- {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_R_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_R_UINT,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_U,
+ {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_R_SNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_U,
+ {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_R_SINT,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_R_UNORM,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_R_UINT,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_U,
+ {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_R_SNORM,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_U,
+ {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_R_SINT,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_P8, SVGA3DBLOCKDESC_RED,
+ {SVGA3D_P8, SVGA3DBLOCKDESC_NONE,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGBE,
+ {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGB_EXP,
{1, 1, 1}, 4, 4,
- 32, {{9}, {9}, {9}, {5}},
+ {{9}, {9}, {9}, {5}},
{{18}, {9}, {0}, {27}}},
- {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 2, 2,
- 16, {{0}, {8}, {8}, {0}},
- {{0}, {8}, {0}, {0}}},
+ {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_NONE,
+ {2, 1, 1}, 4, 4,
+ {{0}, {8}, {8}, {0}},
+ {{0}, {0}, {8}, {0}}},
- {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 2, 2,
- 16, {{0}, {8}, {8}, {0}},
+ {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_NONE,
+ {2, 1, 1}, 4, 4,
+ {{0}, {8}, {8}, {0}},
{{0}, {8}, {0}, {0}}},
- {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_BC1_COMP_TYPELESS,
{4, 4, 1}, 8, 8,
- 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {64}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+ {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_BC1_COMP_UNORM_SRGB,
{4, 4, 1}, 8, 8,
- 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {64}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_BC2_COMP_TYPELESS,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+ {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_BC2_COMP_UNORM_SRGB,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_BC3_COMP_TYPELESS,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+ {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_BC3_COMP_UNORM_SRGB,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_BC4_COMP_TYPELESS,
{4, 4, 1}, 8, 8,
- 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {64}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_ATI1, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_ATI1, SVGA3DBLOCKDESC_BC4_COMP_UNORM,
{4, 4, 1}, 8, 8,
- 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {64}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_BC4_COMP_SNORM,
{4, 4, 1}, 8, 8,
- 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {64}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_BC5_COMP_TYPELESS,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_ATI2, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_ATI2, SVGA3DBLOCKDESC_BC5_COMP_UNORM,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_BC5_COMP_SNORM,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{10}, {10}, {10}, {2}},
- {{0}, {10}, {20}, {30}}},
+ {{10}, {10}, {10}, {2}},
+ {{20}, {10}, {0}, {30}}},
- {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+ {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_UNORM_SRGB,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_RGB,
+ {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
{1, 1, 1}, 4, 4,
- 24, {{8}, {8}, {8}, {0}},
+ {{8}, {8}, {8}, {0}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_SRGB,
+ {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_UNORM_SRGB,
{1, 1, 1}, 4, 4,
- 24, {{8}, {8}, {8}, {0}},
+ {{8}, {8}, {8}, {0}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH,
+ {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH,
+ {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {8}, {24}, {0}},
- {{0}, {24}, {0}, {0}}},
+ {{0}, {0}, {24}, {0}},
+ {{0}, {0}, {8}, {0}}},
- {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS,
+ {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {8}, {24}, {0}},
- {{0}, {24}, {0}, {0}}},
+ {{0}, {8}, {24}, {0}},
+ {{0}, {0}, {8}, {0}}},
{SVGA3D_YV12, SVGA3DBLOCKDESC_YV12,
{2, 2, 1}, 6, 2,
- 48, {{0}, {0}, {48}, {0}},
+ {{0}, {0}, {48}, {0}},
{{0}, {0}, {0}, {0}}},
{SVGA3D_R32G32B32A32_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
{1, 1, 1}, 16, 16,
- 128, {{32}, {32}, {32}, {32}},
+ {{32}, {32}, {32}, {32}},
{{64}, {32}, {0}, {96}}},
{SVGA3D_R16G16B16A16_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
{1, 1, 1}, 8, 8,
- 64, {{16}, {16}, {16}, {16}},
+ {{16}, {16}, {16}, {16}},
{{32}, {16}, {0}, {48}}},
- {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 8, 8,
- 64, {{16}, {16}, {16}, {16}},
+ {{16}, {16}, {16}, {16}},
{{32}, {16}, {0}, {48}}},
{SVGA3D_R32G32_FLOAT, SVGA3DBLOCKDESC_RG_FP,
{1, 1, 1}, 8, 8,
- 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {32}, {0}},
{{0}, {32}, {0}, {0}}},
- {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{10}, {10}, {10}, {2}},
- {{0}, {10}, {20}, {30}}},
+ {{10}, {10}, {10}, {2}},
+ {{20}, {10}, {0}, {30}}},
- {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA_SNORM,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
- {{24}, {16}, {8}, {0}}},
+ {{8}, {8}, {8}, {8}},
+ {{16}, {8}, {0}, {24}}},
{SVGA3D_R16G16_FLOAT, SVGA3DBLOCKDESC_RG_FP,
{1, 1, 1}, 4, 4,
- 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {16}, {0}},
{{0}, {16}, {0}, {0}}},
- {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{0}, {16}, {16}, {0}},
- {{0}, {0}, {16}, {0}}},
+ {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
- {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG_SNORM,
{1, 1, 1}, 4, 4,
- 32, {{16}, {16}, {0}, {0}},
- {{16}, {0}, {0}, {0}}},
+ {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
{SVGA3D_R32_FLOAT, SVGA3DBLOCKDESC_R_FP,
{1, 1, 1}, 4, 4,
- 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {32}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG,
+ {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG_SNORM,
{1, 1, 1}, 2, 2,
- 16, {{8}, {8}, {0}, {0}},
- {{8}, {0}, {0}, {0}}},
+ {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
{SVGA3D_R16_FLOAT, SVGA3DBLOCKDESC_R_FP,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH,
+ {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {16}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_ALPHA,
+ {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_A_UNORM,
{1, 1, 1}, 1, 1,
- 8, {{0}, {0}, {0}, {8}},
+ {{0}, {0}, {0}, {8}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_BC1_COMP_UNORM,
{4, 4, 1}, 8, 8,
- 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {64}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_BC2_COMP_UNORM,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_BC3_COMP_UNORM,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB,
+ {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{5}, {6}, {5}, {0}},
+ {{5}, {6}, {5}, {0}},
{{0}, {5}, {11}, {0}}},
- {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 2, 2,
- 16, {{5}, {5}, {5}, {1}},
+ {{5}, {5}, {5}, {1}},
{{0}, {5}, {10}, {15}}},
- {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM,
{1, 1, 1}, 4, 4,
- 32, {{8}, {8}, {8}, {8}},
+ {{8}, {8}, {8}, {8}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB,
+ {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB_UNORM,
{1, 1, 1}, 4, 4,
- 24, {{8}, {8}, {8}, {0}},
+ {{8}, {8}, {8}, {0}},
{{0}, {8}, {16}, {24}}},
- {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_BC4_COMP_UNORM,
{4, 4, 1}, 8, 8,
- 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_BC5_COMP_UNORM,
+ {4, 4, 1}, 16, 16,
+ {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_B4G4R4A4_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM,
+ {1, 1, 1}, 2, 2,
+ {{4}, {4}, {4}, {4}},
+ {{0}, {4}, {8}, {12}}},
+
+ {SVGA3D_BC6H_TYPELESS, SVGA3DBLOCKDESC_BC6H_COMP_TYPELESS,
+ {4, 4, 1}, 16, 16,
+ {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC6H_UF16, SVGA3DBLOCKDESC_BC6H_COMP_UF16,
+ {4, 4, 1}, 16, 16,
+ {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC6H_SF16, SVGA3DBLOCKDESC_BC6H_COMP_SF16,
+ {4, 4, 1}, 16, 16,
+ {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC7_TYPELESS, SVGA3DBLOCKDESC_BC7_COMP_TYPELESS,
+ {4, 4, 1}, 16, 16,
+ {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC7_UNORM, SVGA3DBLOCKDESC_BC7_COMP_UNORM,
+ {4, 4, 1}, 16, 16,
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
- {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {SVGA3D_BC7_UNORM_SRGB, SVGA3DBLOCKDESC_BC7_COMP_UNORM_SRGB,
{4, 4, 1}, 16, 16,
- 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {128}, {0}},
{{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV,
+ {1, 1, 1}, 4, 4,
+ {{8}, {8}, {8}, {8}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_R11G11B10_TYPELESS, SVGA3DBLOCKDESC_TYPELESS,
+ {1, 1, 1}, 4, 4,
+ {{10}, {11}, {11}, {0}},
+ {{22}, {11}, {0}, {0}}},
};
+
extern const struct svga3d_surface_desc g_SVGA3dSurfaceDescs[];
extern int g_SVGA3dSurfaceDescs_size;
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h
index 94262314e..1807b1dd5 100644
--- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h
+++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h
@@ -1,5 +1,6 @@
-/**********************************************************
- * Copyright 2007-2015 VMware, Inc. All rights reserved.
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2012-2022 VMware, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -21,14 +22,18 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- **********************************************************/
+ */
/*
* svga3d_types.h --
*
- * SVGA 3d hardware definitions for basic types
+ * SVGA 3d hardware definitions for basic types
*/
+
+
+
+
#ifndef _SVGA3D_TYPES_H_
#define _SVGA3D_TYPES_H_
@@ -232,8 +237,7 @@ typedef enum SVGA3dSurfaceFormat {
/* Planar video formats */
SVGA3D_NV12 = 44,
- /* Video format with alpha */
- SVGA3D_AYUV = 45,
+ SVGA3D_FORMAT_DEAD2 = 45,
SVGA3D_R32G32B32A32_TYPELESS = 46,
SVGA3D_R32G32B32A32_UINT = 47,
@@ -339,14 +343,22 @@ typedef enum SVGA3dSurfaceFormat {
SVGA3D_B8G8R8X8_UNORM = 142,
SVGA3D_BC4_UNORM = 143,
SVGA3D_BC5_UNORM = 144,
+ SVGA3D_B4G4R4A4_UNORM = 145,
+
+
+ SVGA3D_BC6H_TYPELESS = 146,
+ SVGA3D_BC6H_UF16 = 147,
+ SVGA3D_BC6H_SF16 = 148,
+ SVGA3D_BC7_TYPELESS = 149,
+ SVGA3D_BC7_UNORM = 150,
+ SVGA3D_BC7_UNORM_SRGB = 151,
+ SVGA3D_AYUV = 152,
+ SVGA3D_R11G11B10_TYPELESS = 153,
SVGA3D_FORMAT_MAX
} SVGA3dSurfaceFormat;
-typedef uint32 SVGA3dSurfaceFlags;
-/*
- * SVGA3d Surface Flags --
- */
+
#define SVGA3D_SURFACE_CUBEMAP (1 << 0)
/*
@@ -426,27 +438,42 @@ typedef uint32 SVGA3dSurfaceFlags;
*/
#define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER (CONST64U(1) << 30)
-/*
- * Marker for the last defined bit in SVGA3dSurfaceFlags.
- */
-#define SVGA3D_SURFACE_VADECODE (CONST64U(1) << 31)
-/*
- * Specifies that a surface is multisample, and therefore requires the full
- * mob-backing to store all the samples.
- */
+#define SVGA3D_SURFACE_RESERVED1 (CONST64U(1) << 31)
+
+
#define SVGA3D_SURFACE_MULTISAMPLE (CONST64U(1) << 32)
+
+
+#define SVGA3D_SURFACE_BIND_UAVIEW (CONST64U(1) << 33)
+
+
+#define SVGA3D_SURFACE_TRANSFER_TO_BUFFER (CONST64U(1) << 34)
+
+#define SVGA3D_SURFACE_BIND_LOGICOPS (CONST64U(1) << 35)
+
+
+#define SVGA3D_SURFACE_BIND_RAW_VIEWS (CONST64U(1) << 36)
+#define SVGA3D_SURFACE_BUFFER_STRUCTURED (CONST64U(1) << 37)
+
#define SVGA3D_SURFACE_DRAWINDIRECT_ARGS (CONST64U(1) << 38)
+#define SVGA3D_SURFACE_RESOURCE_CLAMP (CONST64U(1) << 39)
+
+
+#define SVGA3D_SURFACE_STAGING_COPY (CONST64U(1) << 40)
+
+
+
+
+
+
+
+
+
+
+#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 44)
-#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 42)
-/*
- * Surface flags types:
- *
- * SVGA3dSurface1Flags: Lower 32-bits of flags.
- * SVGA3dSurface2Flags: Upper 32-bits of flags.
- * SVGA3dSurfaceAllFlags: Full 64-bits of flags.
- */
typedef uint32 SVGA3dSurface1Flags;
typedef uint32 SVGA3dSurface2Flags;
typedef uint64 SVGA3dSurfaceAllFlags;
@@ -464,14 +491,28 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_STAGING_DOWNLOAD | \
SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
- SVGA3D_SURFACE_VADECODE | \
SVGA3D_SURFACE_MULTISAMPLE | \
- SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
+ SVGA3D_SURFACE_BIND_UAVIEW | \
+ SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \
+ SVGA3D_SURFACE_BIND_LOGICOPS | \
+ SVGA3D_SURFACE_BIND_RAW_VIEWS | \
+ SVGA3D_SURFACE_BUFFER_STRUCTURED | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \
+ SVGA3D_SURFACE_RESOURCE_CLAMP | \
+ SVGA3D_SURFACE_STAGING_COPY | \
+ SVGA3D_SURFACE_RESTRICT_UPDATE | \
+ SVGA3D_SURFACE_BIND_TENSOR | \
+ SVGA3D_SURFACE_LO_STAGING \
+ )
+
+#define SVGA3D_SURFACE_HB_PRESENT_DISALLOWED_MASK \
+ ( SVGA3D_SURFACE_1D | \
+ SVGA3D_SURFACE_MULTISAMPLE | \
+ SVGA3D_SURFACE_STAGING_COPY \
)
#define SVGA3D_SURFACE_2D_DISALLOWED_MASK \
( SVGA3D_SURFACE_CUBEMAP | \
- SVGA3D_SURFACE_DEAD2 | \
SVGA3D_SURFACE_AUTOGENMIPMAPS | \
SVGA3D_SURFACE_VOLUME | \
SVGA3D_SURFACE_1D | \
@@ -481,9 +522,14 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
- SVGA3D_SURFACE_VADECODE | \
SVGA3D_SURFACE_MULTISAMPLE | \
- SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
+ SVGA3D_SURFACE_BIND_UAVIEW | \
+ SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \
+ SVGA3D_SURFACE_BIND_RAW_VIEWS | \
+ SVGA3D_SURFACE_BUFFER_STRUCTURED | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \
+ SVGA3D_SURFACE_RESOURCE_CLAMP | \
+ SVGA3D_SURFACE_BIND_TENSOR \
)
#define SVGA3D_SURFACE_BASICOPS_DISALLOWED_MASK \
@@ -491,7 +537,6 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_AUTOGENMIPMAPS | \
SVGA3D_SURFACE_VOLUME | \
SVGA3D_SURFACE_1D | \
- SVGA3D_SURFACE_VADECODE | \
SVGA3D_SURFACE_MULTISAMPLE \
)
@@ -510,9 +555,15 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_STAGING_DOWNLOAD | \
SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
- SVGA3D_SURFACE_VADECODE | \
SVGA3D_SURFACE_MULTISAMPLE | \
- SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
+ SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \
+ SVGA3D_SURFACE_BIND_RAW_VIEWS | \
+ SVGA3D_SURFACE_BUFFER_STRUCTURED | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \
+ SVGA3D_SURFACE_RESOURCE_CLAMP | \
+ SVGA3D_SURFACE_STAGING_COPY | \
+ SVGA3D_SURFACE_BIND_TENSOR | \
+ SVGA3D_SURFACE_LO_STAGING \
)
#define SVGA3D_SURFACE_BUFFER_DISALLOWED_MASK \
@@ -523,30 +574,48 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_DEAD2 | \
SVGA3D_SURFACE_ARRAY | \
SVGA3D_SURFACE_MULTISAMPLE | \
- SVGA3D_SURFACE_MOB_PITCH \
+ SVGA3D_SURFACE_MOB_PITCH | \
+ SVGA3D_SURFACE_RESOURCE_CLAMP \
)
#define SVGA3D_SURFACE_MULTISAMPLE_DISALLOWED_MASK \
- ( SVGA3D_SURFACE_AUTOGENMIPMAPS | \
+ ( SVGA3D_SURFACE_CUBEMAP | \
+ SVGA3D_SURFACE_AUTOGENMIPMAPS | \
SVGA3D_SURFACE_VOLUME | \
SVGA3D_SURFACE_1D | \
SVGA3D_SURFACE_SCREENTARGET | \
SVGA3D_SURFACE_MOB_PITCH | \
- SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
+ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
+ SVGA3D_SURFACE_BIND_UAVIEW | \
+ SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \
+ SVGA3D_SURFACE_BIND_LOGICOPS | \
+ SVGA3D_SURFACE_BIND_RAW_VIEWS | \
+ SVGA3D_SURFACE_BUFFER_STRUCTURED | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \
+ SVGA3D_SURFACE_STAGING_COPY \
)
-#define SVGA3D_SURFACE_DX_ONLY_MASK \
- ( SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
- SVGA3D_SURFACE_STAGING_UPLOAD | \
- SVGA3D_SURFACE_STAGING_DOWNLOAD | \
- SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \
+#define SVGA3D_SURFACE_DX_ONLY_MASK \
+ ( SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
+ SVGA3D_SURFACE_STAGING_UPLOAD | \
+ SVGA3D_SURFACE_STAGING_DOWNLOAD | \
+ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
+ SVGA3D_SURFACE_TRANSFER_TO_BUFFER \
)
-#define SVGA3D_SURFACE_STAGING_MASK \
+#define SVGA3D_SURFACE_ANY_STAGING_MASK \
( SVGA3D_SURFACE_STAGING_UPLOAD | \
- SVGA3D_SURFACE_STAGING_DOWNLOAD \
+ SVGA3D_SURFACE_STAGING_DOWNLOAD | \
+ SVGA3D_SURFACE_STAGING_COPY | \
+ SVGA3D_SURFACE_LO_STAGING \
)
+#define SVGA3D_SURFACE_ANY_NONHINT_STAGING_MASK \
+ (SVGA3D_SURFACE_ANY_STAGING_MASK & \
+ ~( \
+ SVGA3D_SURFACE_LO_STAGING \
+ ))
+
#define SVGA3D_SURFACE_BIND_MASK \
( SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \
SVGA3D_SURFACE_BIND_INDEX_BUFFER | \
@@ -554,78 +623,29 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \
SVGA3D_SURFACE_BIND_RENDER_TARGET | \
SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
- SVGA3D_SURFACE_BIND_STREAM_OUTPUT \
+ SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
+ SVGA3D_SURFACE_BIND_UAVIEW | \
+ SVGA3D_SURFACE_BIND_LOGICOPS | \
+ SVGA3D_SURFACE_BIND_RAW_VIEWS | \
+ SVGA3D_SURFACE_BIND_TENSOR \
)
-#define SVGA3D_SURFACE_VADECODE_DISALLOWED_MASK \
- ( SVGA3D_SURFACE_CUBEMAP | \
- SVGA3D_SURFACE_HINT_STATIC | \
- SVGA3D_SURFACE_HINT_DYNAMIC | \
- SVGA3D_SURFACE_HINT_INDEXBUFFER | \
- SVGA3D_SURFACE_HINT_VERTEXBUFFER | \
- SVGA3D_SURFACE_HINT_TEXTURE | \
- SVGA3D_SURFACE_HINT_RENDERTARGET | \
- SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \
- SVGA3D_SURFACE_HINT_WRITEONLY | \
- SVGA3D_SURFACE_DEAD2 | \
- SVGA3D_SURFACE_AUTOGENMIPMAPS | \
- SVGA3D_SURFACE_HINT_RT_LOCKABLE | \
- SVGA3D_SURFACE_VOLUME | \
- SVGA3D_SURFACE_SCREENTARGET | \
- SVGA3D_SURFACE_1D | \
- SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \
- SVGA3D_SURFACE_BIND_INDEX_BUFFER | \
- SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
- SVGA3D_SURFACE_BIND_RENDER_TARGET | \
- SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \
- SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
- SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
- SVGA3D_SURFACE_INACTIVE | \
- SVGA3D_SURFACE_STAGING_UPLOAD | \
- SVGA3D_SURFACE_STAGING_DOWNLOAD | \
- SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
- SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
- SVGA3D_SURFACE_MULTISAMPLE \
- )
-
-#define SVGA3D_SURFACE_VAPROCESSFRAME_OUTPUT_DISALLOWED_MASK \
- ( SVGA3D_SURFACE_HINT_INDEXBUFFER | \
- SVGA3D_SURFACE_HINT_VERTEXBUFFER | \
- SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \
- SVGA3D_SURFACE_DEAD2 | \
- SVGA3D_SURFACE_VOLUME | \
- SVGA3D_SURFACE_1D | \
- SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \
- SVGA3D_SURFACE_BIND_INDEX_BUFFER | \
- SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
- SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
- SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
- SVGA3D_SURFACE_INACTIVE | \
- SVGA3D_SURFACE_STAGING_UPLOAD | \
- SVGA3D_SURFACE_STAGING_DOWNLOAD | \
- SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
- SVGA3D_SURFACE_VADECODE | \
- SVGA3D_SURFACE_MULTISAMPLE \
+#define SVGA3D_SURFACE_STAGING_DISALLOWED_MASK \
+ ( SVGA3D_SURFACE_BIND_MASK | \
+ SVGA3D_SURFACE_AUTOGENMIPMAPS | \
+ SVGA3D_SURFACE_SCREENTARGET | \
+ SVGA3D_SURFACE_HINT_RENDERTARGET | \
+ SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
+ SVGA3D_SURFACE_MULTISAMPLE | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \
+ SVGA3D_SURFACE_RESOURCE_CLAMP | \
+ SVGA3D_SURFACE_BIND_TENSOR \
)
-#define SVGA3D_SURFACE_VAPROCESSFRAME_INPUT_DISALLOWED_MASK \
- ( SVGA3D_SURFACE_CUBEMAP | \
- SVGA3D_SURFACE_HINT_INDEXBUFFER | \
- SVGA3D_SURFACE_HINT_VERTEXBUFFER | \
- SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \
- SVGA3D_SURFACE_DEAD2 | \
- SVGA3D_SURFACE_VOLUME | \
- SVGA3D_SURFACE_SCREENTARGET | \
- SVGA3D_SURFACE_1D | \
- SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \
- SVGA3D_SURFACE_BIND_INDEX_BUFFER | \
- SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
- SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
- SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
- SVGA3D_SURFACE_STAGING_UPLOAD | \
- SVGA3D_SURFACE_STAGING_DOWNLOAD | \
- SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
- SVGA3D_SURFACE_MULTISAMPLE \
+#define SVGA3D_SURFACE_STAGING_COPY_DISALLOWED_MASK \
+ ( SVGA3D_SURFACE_STAGING_DISALLOWED_MASK | \
+ SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \
+ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \
)
#define SVGA3D_SURFACE_LOGICOPS_DISALLOWED_MASK \
@@ -640,18 +660,28 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
- SVGA3D_SURFACE_VADECODE | \
SVGA3D_SURFACE_MULTISAMPLE | \
- SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
+ SVGA3D_SURFACE_BIND_UAVIEW | \
+ SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \
+ SVGA3D_SURFACE_BIND_RAW_VIEWS | \
+ SVGA3D_SURFACE_BUFFER_STRUCTURED | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \
+ SVGA3D_SURFACE_RESOURCE_CLAMP | \
+ SVGA3D_SURFACE_STAGING_COPY \
)
+#define SVGA3D_SURFACE_SM5_MASK \
+ ( SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \
+ SVGA3D_SURFACE_BUFFER_STRUCTURED | \
+ SVGA3D_SURFACE_BIND_RAW_VIEWS | \
+ SVGA3D_SURFACE_BIND_UAVIEW | \
+ SVGA3D_SURFACE_RESOURCE_CLAMP \
+ )
+
+#define SVGA3D_BUFFER_STRUCTURED_STRIDE_MAX 2048
+
+
-/*
- * These are really the D3DFORMAT_OP defines from the wdk. We need
- * them so that we can query the host for what the supported surface
- * operations are (when we're using the D3D backend, in particular),
- * and so we can send those operations to the guest.
- */
typedef enum {
SVGA3DFORMAT_OP_TEXTURE = 0x00000001,
SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002,
@@ -661,98 +691,52 @@ typedef enum {
SVGA3DFORMAT_OP_ZSTENCIL = 0x00000040,
SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH = 0x00000080,
-/*
- * This format can be used as a render target if the current display mode
- * is the same depth if the alpha channel is ignored. e.g. if the device
- * can render to A8R8G8B8 when the display mode is X8R8G8B8, then the
- * format op list entry for A8R8G8B8 should have this cap.
- */
+
SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET = 0x00000100,
-/*
- * This format contains DirectDraw support (including Flip). This flag
- * should not to be set on alpha formats.
- */
+
SVGA3DFORMAT_OP_DISPLAYMODE = 0x00000400,
-/*
- * The rasterizer can support some level of Direct3D support in this format
- * and implies that the driver can create a Context in this mode (for some
- * render target format). When this flag is set, the SVGA3DFORMAT_OP_DISPLAYMODE
- * flag must also be set.
- */
+
SVGA3DFORMAT_OP_3DACCELERATION = 0x00000800,
-/*
- * This is set for a private format when the driver has put the bpp in
- * the structure.
- */
+
SVGA3DFORMAT_OP_PIXELSIZE = 0x00001000,
-/*
- * Indicates that this format can be converted to any RGB format for which
- * SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB is specified.
- */
+
SVGA3DFORMAT_OP_CONVERT_TO_ARGB = 0x00002000,
-/*
- * Indicates that this format can be used to create offscreen plain surfaces.
- */
+
SVGA3DFORMAT_OP_OFFSCREENPLAIN = 0x00004000,
-/*
- * Indicated that this format can be read as an SRGB texture (meaning that the
- * sampler will linearize the looked up data).
- */
+
SVGA3DFORMAT_OP_SRGBREAD = 0x00008000,
-/*
- * Indicates that this format can be used in the bumpmap instructions.
- */
+
SVGA3DFORMAT_OP_BUMPMAP = 0x00010000,
-/*
- * Indicates that this format can be sampled by the displacement map sampler.
- */
+
SVGA3DFORMAT_OP_DMAP = 0x00020000,
-/*
- * Indicates that this format cannot be used with texture filtering.
- */
+
SVGA3DFORMAT_OP_NOFILTER = 0x00040000,
-/*
- * Indicates that format conversions are supported to this RGB format if
- * SVGA3DFORMAT_OP_CONVERT_TO_ARGB is specified in the source format.
- */
+
SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB = 0x00080000,
-/*
- * Indicated that this format can be written as an SRGB target
- * (meaning that the pixel pipe will DE-linearize data on output to format)
- */
+
SVGA3DFORMAT_OP_SRGBWRITE = 0x00100000,
-/*
- * Indicates that this format cannot be used with alpha blending.
- */
+
SVGA3DFORMAT_OP_NOALPHABLEND = 0x00200000,
-/*
- * Indicates that the device can auto-generated sublevels for resources
- * of this format.
- */
+
SVGA3DFORMAT_OP_AUTOGENMIPMAP = 0x00400000,
-/*
- * Indicates that this format can be used by vertex texture sampler.
- */
+
SVGA3DFORMAT_OP_VERTEXTEXTURE = 0x00800000,
-/*
- * Indicates that this format supports neither texture coordinate
- * wrap modes, nor mipmapping.
- */
+
SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP = 0x01000000
} SVGA3dFormatOp;
@@ -783,10 +767,7 @@ typedef enum {
SVGA3DFORMAT_OP_NOALPHABLEND | \
SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP)
-/*
- * This structure is a conversion of SVGA3DFORMAT_OP_*
- * Entries must be located at the same position.
- */
+
typedef union {
uint32 value;
struct {
@@ -818,123 +799,113 @@ typedef union {
};
} SVGA3dSurfaceFormatCaps;
-/*
- * SVGA_3D_CMD_SETRENDERSTATE Types. All value types
- * must fit in a uint32.
- */
+
typedef enum {
SVGA3D_RS_INVALID = 0,
SVGA3D_RS_MIN = 1,
- SVGA3D_RS_ZENABLE = 1, /* SVGA3dBool */
- SVGA3D_RS_ZWRITEENABLE = 2, /* SVGA3dBool */
- SVGA3D_RS_ALPHATESTENABLE = 3, /* SVGA3dBool */
- SVGA3D_RS_DITHERENABLE = 4, /* SVGA3dBool */
- SVGA3D_RS_BLENDENABLE = 5, /* SVGA3dBool */
- SVGA3D_RS_FOGENABLE = 6, /* SVGA3dBool */
- SVGA3D_RS_SPECULARENABLE = 7, /* SVGA3dBool */
- SVGA3D_RS_STENCILENABLE = 8, /* SVGA3dBool */
- SVGA3D_RS_LIGHTINGENABLE = 9, /* SVGA3dBool */
- SVGA3D_RS_NORMALIZENORMALS = 10, /* SVGA3dBool */
- SVGA3D_RS_POINTSPRITEENABLE = 11, /* SVGA3dBool */
- SVGA3D_RS_POINTSCALEENABLE = 12, /* SVGA3dBool */
- SVGA3D_RS_STENCILREF = 13, /* uint32 */
- SVGA3D_RS_STENCILMASK = 14, /* uint32 */
- SVGA3D_RS_STENCILWRITEMASK = 15, /* uint32 */
- SVGA3D_RS_FOGSTART = 16, /* float */
- SVGA3D_RS_FOGEND = 17, /* float */
- SVGA3D_RS_FOGDENSITY = 18, /* float */
- SVGA3D_RS_POINTSIZE = 19, /* float */
- SVGA3D_RS_POINTSIZEMIN = 20, /* float */
- SVGA3D_RS_POINTSIZEMAX = 21, /* float */
- SVGA3D_RS_POINTSCALE_A = 22, /* float */
- SVGA3D_RS_POINTSCALE_B = 23, /* float */
- SVGA3D_RS_POINTSCALE_C = 24, /* float */
- SVGA3D_RS_FOGCOLOR = 25, /* SVGA3dColor */
- SVGA3D_RS_AMBIENT = 26, /* SVGA3dColor */
- SVGA3D_RS_CLIPPLANEENABLE = 27, /* SVGA3dClipPlanes */
- SVGA3D_RS_FOGMODE = 28, /* SVGA3dFogMode */
- SVGA3D_RS_FILLMODE = 29, /* SVGA3dFillMode */
- SVGA3D_RS_SHADEMODE = 30, /* SVGA3dShadeMode */
- SVGA3D_RS_LINEPATTERN = 31, /* SVGA3dLinePattern */
- SVGA3D_RS_SRCBLEND = 32, /* SVGA3dBlendOp */
- SVGA3D_RS_DSTBLEND = 33, /* SVGA3dBlendOp */
- SVGA3D_RS_BLENDEQUATION = 34, /* SVGA3dBlendEquation */
- SVGA3D_RS_CULLMODE = 35, /* SVGA3dFace */
- SVGA3D_RS_ZFUNC = 36, /* SVGA3dCmpFunc */
- SVGA3D_RS_ALPHAFUNC = 37, /* SVGA3dCmpFunc */
- SVGA3D_RS_STENCILFUNC = 38, /* SVGA3dCmpFunc */
- SVGA3D_RS_STENCILFAIL = 39, /* SVGA3dStencilOp */
- SVGA3D_RS_STENCILZFAIL = 40, /* SVGA3dStencilOp */
- SVGA3D_RS_STENCILPASS = 41, /* SVGA3dStencilOp */
- SVGA3D_RS_ALPHAREF = 42, /* float (0.0 .. 1.0) */
- SVGA3D_RS_FRONTWINDING = 43, /* SVGA3dFrontWinding */
- SVGA3D_RS_COORDINATETYPE = 44, /* SVGA3dCoordinateType */
- SVGA3D_RS_ZBIAS = 45, /* float */
- SVGA3D_RS_RANGEFOGENABLE = 46, /* SVGA3dBool */
- SVGA3D_RS_COLORWRITEENABLE = 47, /* SVGA3dColorMask */
- SVGA3D_RS_VERTEXMATERIALENABLE = 48, /* SVGA3dBool */
- SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49, /* SVGA3dVertexMaterial */
- SVGA3D_RS_SPECULARMATERIALSOURCE = 50, /* SVGA3dVertexMaterial */
- SVGA3D_RS_AMBIENTMATERIALSOURCE = 51, /* SVGA3dVertexMaterial */
- SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52, /* SVGA3dVertexMaterial */
- SVGA3D_RS_TEXTUREFACTOR = 53, /* SVGA3dColor */
- SVGA3D_RS_LOCALVIEWER = 54, /* SVGA3dBool */
- SVGA3D_RS_SCISSORTESTENABLE = 55, /* SVGA3dBool */
- SVGA3D_RS_BLENDCOLOR = 56, /* SVGA3dColor */
- SVGA3D_RS_STENCILENABLE2SIDED = 57, /* SVGA3dBool */
- SVGA3D_RS_CCWSTENCILFUNC = 58, /* SVGA3dCmpFunc */
- SVGA3D_RS_CCWSTENCILFAIL = 59, /* SVGA3dStencilOp */
- SVGA3D_RS_CCWSTENCILZFAIL = 60, /* SVGA3dStencilOp */
- SVGA3D_RS_CCWSTENCILPASS = 61, /* SVGA3dStencilOp */
- SVGA3D_RS_VERTEXBLEND = 62, /* SVGA3dVertexBlendFlags */
- SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63, /* float */
- SVGA3D_RS_DEPTHBIAS = 64, /* float */
-
-
- /*
- * Output Gamma Level
- *
- * Output gamma effects the gamma curve of colors that are output from the
- * rendering pipeline. A value of 1.0 specifies a linear color space. If the
- * value is <= 0.0, gamma correction is ignored and linear color space is
- * used.
- */
-
- SVGA3D_RS_OUTPUTGAMMA = 65, /* float */
- SVGA3D_RS_ZVISIBLE = 66, /* SVGA3dBool */
- SVGA3D_RS_LASTPIXEL = 67, /* SVGA3dBool */
- SVGA3D_RS_CLIPPING = 68, /* SVGA3dBool */
- SVGA3D_RS_WRAP0 = 69, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP1 = 70, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP2 = 71, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP3 = 72, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP4 = 73, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP5 = 74, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP6 = 75, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP7 = 76, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP8 = 77, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP9 = 78, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP10 = 79, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP11 = 80, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP12 = 81, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP13 = 82, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP14 = 83, /* SVGA3dWrapFlags */
- SVGA3D_RS_WRAP15 = 84, /* SVGA3dWrapFlags */
- SVGA3D_RS_MULTISAMPLEANTIALIAS = 85, /* SVGA3dBool */
- SVGA3D_RS_MULTISAMPLEMASK = 86, /* uint32 */
- SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87, /* SVGA3dBool */
- SVGA3D_RS_TWEENFACTOR = 88, /* float */
- SVGA3D_RS_ANTIALIASEDLINEENABLE = 89, /* SVGA3dBool */
- SVGA3D_RS_COLORWRITEENABLE1 = 90, /* SVGA3dColorMask */
- SVGA3D_RS_COLORWRITEENABLE2 = 91, /* SVGA3dColorMask */
- SVGA3D_RS_COLORWRITEENABLE3 = 92, /* SVGA3dColorMask */
- SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93, /* SVGA3dBool */
- SVGA3D_RS_SRCBLENDALPHA = 94, /* SVGA3dBlendOp */
- SVGA3D_RS_DSTBLENDALPHA = 95, /* SVGA3dBlendOp */
- SVGA3D_RS_BLENDEQUATIONALPHA = 96, /* SVGA3dBlendEquation */
- SVGA3D_RS_TRANSPARENCYANTIALIAS = 97, /* SVGA3dTransparencyAntialiasType */
- SVGA3D_RS_LINEWIDTH = 98, /* float */
+ SVGA3D_RS_ZENABLE = 1,
+ SVGA3D_RS_ZWRITEENABLE = 2,
+ SVGA3D_RS_ALPHATESTENABLE = 3,
+ SVGA3D_RS_DITHERENABLE = 4,
+ SVGA3D_RS_BLENDENABLE = 5,
+ SVGA3D_RS_FOGENABLE = 6,
+ SVGA3D_RS_SPECULARENABLE = 7,
+ SVGA3D_RS_STENCILENABLE = 8,
+ SVGA3D_RS_LIGHTINGENABLE = 9,
+ SVGA3D_RS_NORMALIZENORMALS = 10,
+ SVGA3D_RS_POINTSPRITEENABLE = 11,
+ SVGA3D_RS_POINTSCALEENABLE = 12,
+ SVGA3D_RS_STENCILREF = 13,
+ SVGA3D_RS_STENCILMASK = 14,
+ SVGA3D_RS_STENCILWRITEMASK = 15,
+ SVGA3D_RS_FOGSTART = 16,
+ SVGA3D_RS_FOGEND = 17,
+ SVGA3D_RS_FOGDENSITY = 18,
+ SVGA3D_RS_POINTSIZE = 19,
+ SVGA3D_RS_POINTSIZEMIN = 20,
+ SVGA3D_RS_POINTSIZEMAX = 21,
+ SVGA3D_RS_POINTSCALE_A = 22,
+ SVGA3D_RS_POINTSCALE_B = 23,
+ SVGA3D_RS_POINTSCALE_C = 24,
+ SVGA3D_RS_FOGCOLOR = 25,
+ SVGA3D_RS_AMBIENT = 26,
+ SVGA3D_RS_CLIPPLANEENABLE = 27,
+ SVGA3D_RS_FOGMODE = 28,
+ SVGA3D_RS_FILLMODE = 29,
+ SVGA3D_RS_SHADEMODE = 30,
+ SVGA3D_RS_LINEPATTERN = 31,
+ SVGA3D_RS_SRCBLEND = 32,
+ SVGA3D_RS_DSTBLEND = 33,
+ SVGA3D_RS_BLENDEQUATION = 34,
+ SVGA3D_RS_CULLMODE = 35,
+ SVGA3D_RS_ZFUNC = 36,
+ SVGA3D_RS_ALPHAFUNC = 37,
+ SVGA3D_RS_STENCILFUNC = 38,
+ SVGA3D_RS_STENCILFAIL = 39,
+ SVGA3D_RS_STENCILZFAIL = 40,
+ SVGA3D_RS_STENCILPASS = 41,
+ SVGA3D_RS_ALPHAREF = 42,
+ SVGA3D_RS_FRONTWINDING = 43,
+ SVGA3D_RS_COORDINATETYPE = 44,
+ SVGA3D_RS_ZBIAS = 45,
+ SVGA3D_RS_RANGEFOGENABLE = 46,
+ SVGA3D_RS_COLORWRITEENABLE = 47,
+ SVGA3D_RS_VERTEXMATERIALENABLE = 48,
+ SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49,
+ SVGA3D_RS_SPECULARMATERIALSOURCE = 50,
+ SVGA3D_RS_AMBIENTMATERIALSOURCE = 51,
+ SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52,
+ SVGA3D_RS_TEXTUREFACTOR = 53,
+ SVGA3D_RS_LOCALVIEWER = 54,
+ SVGA3D_RS_SCISSORTESTENABLE = 55,
+ SVGA3D_RS_BLENDCOLOR = 56,
+ SVGA3D_RS_STENCILENABLE2SIDED = 57,
+ SVGA3D_RS_CCWSTENCILFUNC = 58,
+ SVGA3D_RS_CCWSTENCILFAIL = 59,
+ SVGA3D_RS_CCWSTENCILZFAIL = 60,
+ SVGA3D_RS_CCWSTENCILPASS = 61,
+ SVGA3D_RS_VERTEXBLEND = 62,
+ SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63,
+ SVGA3D_RS_DEPTHBIAS = 64,
+
+
+
+
+ SVGA3D_RS_OUTPUTGAMMA = 65,
+ SVGA3D_RS_ZVISIBLE = 66,
+ SVGA3D_RS_LASTPIXEL = 67,
+ SVGA3D_RS_CLIPPING = 68,
+ SVGA3D_RS_WRAP0 = 69,
+ SVGA3D_RS_WRAP1 = 70,
+ SVGA3D_RS_WRAP2 = 71,
+ SVGA3D_RS_WRAP3 = 72,
+ SVGA3D_RS_WRAP4 = 73,
+ SVGA3D_RS_WRAP5 = 74,
+ SVGA3D_RS_WRAP6 = 75,
+ SVGA3D_RS_WRAP7 = 76,
+ SVGA3D_RS_WRAP8 = 77,
+ SVGA3D_RS_WRAP9 = 78,
+ SVGA3D_RS_WRAP10 = 79,
+ SVGA3D_RS_WRAP11 = 80,
+ SVGA3D_RS_WRAP12 = 81,
+ SVGA3D_RS_WRAP13 = 82,
+ SVGA3D_RS_WRAP14 = 83,
+ SVGA3D_RS_WRAP15 = 84,
+ SVGA3D_RS_MULTISAMPLEANTIALIAS = 85,
+ SVGA3D_RS_MULTISAMPLEMASK = 86,
+ SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87,
+ SVGA3D_RS_TWEENFACTOR = 88,
+ SVGA3D_RS_ANTIALIASEDLINEENABLE = 89,
+ SVGA3D_RS_COLORWRITEENABLE1 = 90,
+ SVGA3D_RS_COLORWRITEENABLE2 = 91,
+ SVGA3D_RS_COLORWRITEENABLE3 = 92,
+ SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93,
+ SVGA3D_RS_SRCBLENDALPHA = 94,
+ SVGA3D_RS_DSTBLENDALPHA = 95,
+ SVGA3D_RS_BLENDEQUATIONALPHA = 96,
+ SVGA3D_RS_TRANSPARENCYANTIALIAS = 97,
+ SVGA3D_RS_LINEWIDTH = 98,
SVGA3D_RS_MAX
} SVGA3dRenderStateName;
@@ -946,9 +917,9 @@ typedef enum {
} SVGA3dTransparencyAntialiasType;
typedef enum {
- SVGA3D_VERTEXMATERIAL_NONE = 0, /* Use the value in the current material */
- SVGA3D_VERTEXMATERIAL_DIFFUSE = 1, /* Use the value in the diffuse component */
- SVGA3D_VERTEXMATERIAL_SPECULAR = 2, /* Use the value in the specular component */
+ SVGA3D_VERTEXMATERIAL_NONE = 0,
+ SVGA3D_VERTEXMATERIAL_DIFFUSE = 1,
+ SVGA3D_VERTEXMATERIAL_SPECULAR = 2,
SVGA3D_VERTEXMATERIAL_MAX = 3,
} SVGA3dVertexMaterial;
@@ -1068,9 +1039,7 @@ typedef enum {
SVGA3D_FACE_MAX
} SVGA3dFace;
-/*
- * The order and the values should not be changed
- */
+
typedef enum {
SVGA3D_CMP_INVALID = 0,
@@ -1085,11 +1054,7 @@ typedef enum {
SVGA3D_CMP_MAX
} SVGA3dCmpFunc;
-/*
- * SVGA3D_FOGFUNC_* specifies the fog equation, or PER_VERTEX which allows
- * the fog factor to be specified in the alpha component of the specular
- * (a.k.a. secondary) vertex color.
- */
+
typedef enum {
SVGA3D_FOGFUNC_INVALID = 0,
SVGA3D_FOGFUNC_EXP = 1,
@@ -1098,10 +1063,7 @@ typedef enum {
SVGA3D_FOGFUNC_PER_VERTEX = 4
} SVGA3dFogFunction;
-/*
- * SVGA3D_FOGTYPE_* specifies if fog factors are computed on a per-vertex
- * or per-pixel basis.
- */
+
typedef enum {
SVGA3D_FOGTYPE_INVALID = 0,
SVGA3D_FOGTYPE_VERTEX = 1,
@@ -1109,11 +1071,7 @@ typedef enum {
SVGA3D_FOGTYPE_MAX = 3
} SVGA3dFogType;
-/*
- * SVGA3D_FOGBASE_* selects depth or range-based fog. Depth-based fog is
- * computed using the eye Z value of each pixel (or vertex), whereas range-
- * based fog is computed using the actual distance (range) to the eye.
- */
+
typedef enum {
SVGA3D_FOGBASE_INVALID = 0,
SVGA3D_FOGBASE_DEPTHBASED = 1,
@@ -1149,11 +1107,7 @@ typedef enum {
SVGA3D_CLEAR_DEPTH = 0x2,
SVGA3D_CLEAR_STENCIL = 0x4,
- /*
- * Hint only, must be used together with SVGA3D_CLEAR_COLOR. If
- * SVGA3D_CLEAR_DEPTH or SVGA3D_CLEAR_STENCIL bit is set, this
- * bit will be ignored.
- */
+
SVGA3D_CLEAR_COLORFILL = 0x8
} SVGA3dClearFlag;
@@ -1205,57 +1159,48 @@ typedef enum {
SVGA3D_WRAPCOORD_ALL = 0xF,
} SVGA3dWrapFlags;
-/*
- * SVGA_3D_CMD_TEXTURESTATE Types. All value types
- * must fit in a uint32.
- */
+
typedef enum {
SVGA3D_TS_INVALID = 0,
SVGA3D_TS_MIN = 1,
- SVGA3D_TS_BIND_TEXTURE = 1, /* SVGA3dSurfaceId */
- SVGA3D_TS_COLOROP = 2, /* SVGA3dTextureCombiner */
- SVGA3D_TS_COLORARG1 = 3, /* SVGA3dTextureArgData */
- SVGA3D_TS_COLORARG2 = 4, /* SVGA3dTextureArgData */
- SVGA3D_TS_ALPHAOP = 5, /* SVGA3dTextureCombiner */
- SVGA3D_TS_ALPHAARG1 = 6, /* SVGA3dTextureArgData */
- SVGA3D_TS_ALPHAARG2 = 7, /* SVGA3dTextureArgData */
- SVGA3D_TS_ADDRESSU = 8, /* SVGA3dTextureAddress */
- SVGA3D_TS_ADDRESSV = 9, /* SVGA3dTextureAddress */
- SVGA3D_TS_MIPFILTER = 10, /* SVGA3dTextureFilter */
- SVGA3D_TS_MAGFILTER = 11, /* SVGA3dTextureFilter */
- SVGA3D_TS_MINFILTER = 12, /* SVGA3dTextureFilter */
- SVGA3D_TS_BORDERCOLOR = 13, /* SVGA3dColor */
- SVGA3D_TS_TEXCOORDINDEX = 14, /* uint32 */
- SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15, /* SVGA3dTexTransformFlags */
- SVGA3D_TS_TEXCOORDGEN = 16, /* SVGA3dTextureCoordGen */
- SVGA3D_TS_BUMPENVMAT00 = 17, /* float */
- SVGA3D_TS_BUMPENVMAT01 = 18, /* float */
- SVGA3D_TS_BUMPENVMAT10 = 19, /* float */
- SVGA3D_TS_BUMPENVMAT11 = 20, /* float */
- SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21, /* uint32 */
- SVGA3D_TS_TEXTURE_LOD_BIAS = 22, /* float */
- SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23, /* uint32 */
- SVGA3D_TS_ADDRESSW = 24, /* SVGA3dTextureAddress */
-
-
- /*
- * Sampler Gamma Level
- *
- * Sampler gamma effects the color of samples taken from the sampler. A
- * value of 1.0 will produce linear samples. If the value is <= 0.0 the
- * gamma value is ignored and a linear space is used.
- */
-
- SVGA3D_TS_GAMMA = 25, /* float */
- SVGA3D_TS_BUMPENVLSCALE = 26, /* float */
- SVGA3D_TS_BUMPENVLOFFSET = 27, /* float */
- SVGA3D_TS_COLORARG0 = 28, /* SVGA3dTextureArgData */
- SVGA3D_TS_ALPHAARG0 = 29, /* SVGA3dTextureArgData */
- SVGA3D_TS_PREGB_MAX = 30, /* Max value before GBObjects */
- SVGA3D_TS_CONSTANT = 30, /* SVGA3dColor */
- SVGA3D_TS_COLOR_KEY_ENABLE = 31, /* SVGA3dBool */
- SVGA3D_TS_COLOR_KEY = 32, /* SVGA3dColor */
+ SVGA3D_TS_BIND_TEXTURE = 1,
+ SVGA3D_TS_COLOROP = 2,
+ SVGA3D_TS_COLORARG1 = 3,
+ SVGA3D_TS_COLORARG2 = 4,
+ SVGA3D_TS_ALPHAOP = 5,
+ SVGA3D_TS_ALPHAARG1 = 6,
+ SVGA3D_TS_ALPHAARG2 = 7,
+ SVGA3D_TS_ADDRESSU = 8,
+ SVGA3D_TS_ADDRESSV = 9,
+ SVGA3D_TS_MIPFILTER = 10,
+ SVGA3D_TS_MAGFILTER = 11,
+ SVGA3D_TS_MINFILTER = 12,
+ SVGA3D_TS_BORDERCOLOR = 13,
+ SVGA3D_TS_TEXCOORDINDEX = 14,
+ SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15,
+ SVGA3D_TS_TEXCOORDGEN = 16,
+ SVGA3D_TS_BUMPENVMAT00 = 17,
+ SVGA3D_TS_BUMPENVMAT01 = 18,
+ SVGA3D_TS_BUMPENVMAT10 = 19,
+ SVGA3D_TS_BUMPENVMAT11 = 20,
+ SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21,
+ SVGA3D_TS_TEXTURE_LOD_BIAS = 22,
+ SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23,
+ SVGA3D_TS_ADDRESSW = 24,
+
+
+
+
+ SVGA3D_TS_GAMMA = 25,
+ SVGA3D_TS_BUMPENVLSCALE = 26,
+ SVGA3D_TS_BUMPENVLOFFSET = 27,
+ SVGA3D_TS_COLORARG0 = 28,
+ SVGA3D_TS_ALPHAARG0 = 29,
+ SVGA3D_TS_PREGB_MAX = 30,
+ SVGA3D_TS_CONSTANT = 30,
+ SVGA3D_TS_COLOR_KEY_ENABLE = 31,
+ SVGA3D_TS_COLOR_KEY = 32,
SVGA3D_TS_MAX
} SVGA3dTextureStateName;
@@ -1304,20 +1249,17 @@ typedef enum {
SVGA3D_TEX_ADDRESS_MAX
} SVGA3dTextureAddress;
-/*
- * SVGA3D_TEX_FILTER_NONE as the minification filter means mipmapping is
- * disabled, and the rasterizer should use the magnification filter instead.
- */
+
typedef enum {
SVGA3D_TEX_FILTER_NONE = 0,
SVGA3D_TEX_FILTER_MIN = 0,
SVGA3D_TEX_FILTER_NEAREST = 1,
SVGA3D_TEX_FILTER_LINEAR = 2,
SVGA3D_TEX_FILTER_ANISOTROPIC = 3,
- SVGA3D_TEX_FILTER_FLATCUBIC = 4, /* Deprecated, not implemented */
- SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, /* Deprecated, not implemented */
- SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, /* Not currently implemented */
- SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, /* Not currently implemented */
+ SVGA3D_TEX_FILTER_FLATCUBIC = 4,
+ SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5,
+ SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6,
+ SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7,
SVGA3D_TEX_FILTER_MAX
} SVGA3dTextureFilter;
@@ -1339,9 +1281,7 @@ typedef enum {
SVGA3D_TEXCOORD_GEN_MAX
} SVGA3dTextureCoordGen;
-/*
- * Texture argument constants for texture combiner
- */
+
typedef enum {
SVGA3D_TA_INVALID = 0,
SVGA3D_TA_TFACTOR = 1,
@@ -1355,23 +1295,14 @@ typedef enum {
#define SVGA3D_TM_MASK_LEN 4
-/* Modifiers for texture argument constants defined above. */
+
typedef enum {
SVGA3D_TM_NONE = 0,
SVGA3D_TM_ALPHA = (1 << SVGA3D_TM_MASK_LEN),
SVGA3D_TM_ONE_MINUS = (2 << SVGA3D_TM_MASK_LEN),
} SVGA3dTextureArgModifier;
-/*
- * Vertex declarations
- *
- * Notes:
- *
- * SVGA3D_DECLUSAGE_POSITIONT is for pre-transformed vertices. If you
- * draw with any POSITIONT vertex arrays, the programmable vertex
- * pipeline will be implicitly disabled. Drawing will take place as if
- * no vertex shader was bound.
- */
+
typedef enum {
SVGA3D_DECLUSAGE_POSITION = 0,
@@ -1395,11 +1326,10 @@ typedef enum {
SVGA3D_DECLMETHOD_DEFAULT = 0,
SVGA3D_DECLMETHOD_PARTIALU,
SVGA3D_DECLMETHOD_PARTIALV,
- SVGA3D_DECLMETHOD_CROSSUV, /* Normal */
+ SVGA3D_DECLMETHOD_CROSSUV,
SVGA3D_DECLMETHOD_UV,
- SVGA3D_DECLMETHOD_LOOKUP, /* Lookup a displacement map */
- SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement */
- /* map */
+ SVGA3D_DECLMETHOD_LOOKUP,
+ SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED,
} SVGA3dDeclMethod;
typedef enum {
@@ -1423,28 +1353,16 @@ typedef enum {
SVGA3D_DECLTYPE_MAX,
} SVGA3dDeclType;
-/*
- * This structure is used for the divisor for geometry instancing;
- * it's a direct translation of the Direct3D equivalent.
- */
+
typedef union {
struct {
- /*
- * For index data, this number represents the number of instances to draw.
- * For instance data, this number represents the number of
- * instances/vertex in this stream
- */
+
uint32 count : 30;
- /*
- * This is 1 if this is supposed to be the data that is repeated for
- * every instance.
- */
+
uint32 indexedData : 1;
- /*
- * This is 1 if this is supposed to be the per-instance data.
- */
+
uint32 instanceData : 1;
};
@@ -1452,12 +1370,7 @@ typedef union {
} SVGA3dVertexDivisor;
typedef enum {
- /*
- * SVGA3D_PRIMITIVE_INVALID is a valid primitive type.
- *
- * List MIN second so debuggers will think INVALID is
- * the correct name.
- */
+
SVGA3D_PRIMITIVE_INVALID = 0,
SVGA3D_PRIMITIVE_MIN = 0,
SVGA3D_PRIMITIVE_TRIANGLELIST = 1,
@@ -1538,8 +1451,8 @@ typedef enum {
SVGA3D_LIGHTTYPE_INVALID = 0,
SVGA3D_LIGHTTYPE_MIN = 1,
SVGA3D_LIGHTTYPE_POINT = 1,
- SVGA3D_LIGHTTYPE_SPOT1 = 2, /* 1-cone, in degrees */
- SVGA3D_LIGHTTYPE_SPOT2 = 3, /* 2-cone, in radians */
+ SVGA3D_LIGHTTYPE_SPOT1 = 2,
+ SVGA3D_LIGHTTYPE_SPOT2 = 3,
SVGA3D_LIGHTTYPE_DIRECTIONAL = 4,
SVGA3D_LIGHTTYPE_MAX
} SVGA3dLightType;
@@ -1584,9 +1497,7 @@ typedef enum {
SVGA3D_CONST_TYPE_MAX = 3,
} SVGA3dShaderConstType;
-/*
- * Register limits for shader consts.
- */
+
#define SVGA3D_CONSTREG_MAX 256
#define SVGA3D_CONSTINTREG_MAX 16
#define SVGA3D_CONSTBOOLREG_MAX 16
@@ -1624,10 +1535,7 @@ typedef uint8 SVGA3dQueryTypeUint8;
#define SVGA3D_NUM_QUERYTYPE (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN)
-/*
- * This is the maximum number of queries per context that can be active
- * simultaneously between a beginQuery and endQuery.
- */
+
#define SVGA3D_MAX_QUERY 64
/*
@@ -1750,19 +1658,26 @@ typedef enum {
SVGA3D_READ_HOST_VRAM = 2,
} SVGA3dTransferType;
-typedef enum {
- SVGA3D_LOGICOP_INVALID = 0,
- SVGA3D_LOGICOP_MIN = 1,
- SVGA3D_LOGICOP_COPY = 1,
- SVGA3D_LOGICOP_NOT = 2,
- SVGA3D_LOGICOP_AND = 3,
- SVGA3D_LOGICOP_OR = 4,
- SVGA3D_LOGICOP_XOR = 5,
- SVGA3D_LOGICOP_NXOR = 6,
- SVGA3D_LOGICOP_ROP3MIN = 30, /* 7-29 are reserved for future logic ops. */
- SVGA3D_LOGICOP_ROP3MAX = (SVGA3D_LOGICOP_ROP3MIN + 255),
- SVGA3D_LOGICOP_MAX = (SVGA3D_LOGICOP_ROP3MAX + 1),
-} SVGA3dLogicOp;
+#define SVGA3D_LOGICOP_INVALID 0
+#define SVGA3D_LOGICOP_COPY 1
+
+#define SVGA3D_LOGICOP_MIN 1
+#define SVGA3D_LOGICOP_NOT 2
+#define SVGA3D_LOGICOP_AND 3
+#define SVGA3D_LOGICOP_OR 4
+#define SVGA3D_LOGICOP_XOR 5
+#define SVGA3D_LOGICOP_NXOR 6
+#define SVGA3D_LOGICOP_ROP3 7
+
+#define SVGA3D_LOGICOP_MAX 8
+
+typedef uint16 SVGA3dLogicOp;
+
+#define SVGA3D_LOGICOP_ROP3_INVALID ((uint16) -1)
+#define SVGA3D_LOGICOP_ROP3_MIN 0
+#define SVGA3D_LOGICOP_ROP3_MAX 256
+
+typedef uint16 SVGA3dLogicOpRop3;
typedef
#include "vmware_pack_begin.h"
@@ -1812,9 +1727,7 @@ struct {
#include "vmware_pack_end.h"
SVGA3dSize;
-/*
- * Guest-backed objects definitions.
- */
+
typedef enum {
SVGA_OTABLE_MOB = 0,
SVGA_OTABLE_MIN = 0,
@@ -1828,22 +1741,16 @@ typedef enum {
SVGA_OTABLE_DXCONTEXT = 5,
SVGA_OTABLE_DX_MAX = 6,
- SVGA_OTABLE_VADECODER = 6,
- SVGA_OTABLE_VAPROCESSOR = 7,
- SVGA_OTABLE_BUG_1952836_MAX = 8,
- /*
- * Additions to this table need to be tied to HW-version features and
- * checkpointed accordingly. (See SVGACheckpointGuestBackedObjects.)
- */
- SVGA_OTABLE_DEVEL_MAX = 8,
- SVGA_OTABLE_MAX = 8
-} SVGAOTableType;
-/*
- * Deprecated.
- */
-#define SVGA_OTABLE_COUNT 4
+ SVGA_OTABLE_DEVEL_MAX = 6,
+ SVGA_OTABLE_MAX = 6,
+
+
+ SVGA_OTABLE_RESERVED1 = 6,
+ SVGA_OTABLE_RESERVED2 = 7,
+ SVGA_OTABLE_BUG_1952836_MAX = 8,
+} SVGAOTableType;
typedef enum {
SVGA_COTABLE_MIN = 0,
@@ -1860,27 +1767,29 @@ typedef enum {
SVGA_COTABLE_DXSHADER = 10,
SVGA_COTABLE_DX10_MAX = 11,
SVGA_COTABLE_UAVIEW = 11,
- SVGA_COTABLE_MAX
+ SVGA_COTABLE_MAX = 12,
} SVGACOTableType;
-/*
- * The largest size (number of entries) allowed in a COTable.
- */
+
#define SVGA_COTABLE_MAX_IDS (MAX_UINT16 - 2)
typedef enum SVGAMobFormat {
- SVGA3D_MOBFMT_INVALID = SVGA3D_INVALID_ID,
- SVGA3D_MOBFMT_PTDEPTH_0 = 0,
- SVGA3D_MOBFMT_MIN = 0,
- SVGA3D_MOBFMT_PTDEPTH_1 = 1,
- SVGA3D_MOBFMT_PTDEPTH_2 = 2,
- SVGA3D_MOBFMT_RANGE = 3,
- SVGA3D_MOBFMT_PTDEPTH64_0 = 4,
- SVGA3D_MOBFMT_PTDEPTH64_1 = 5,
- SVGA3D_MOBFMT_PTDEPTH64_2 = 6,
- SVGA3D_MOBFMT_PREDX_MAX = 7,
- SVGA3D_MOBFMT_EMPTY = 7,
+ SVGA3D_MOBFMT_INVALID = SVGA3D_INVALID_ID,
+ SVGA3D_MOBFMT_PT_0 = 0,
+ SVGA3D_MOBFMT_MIN = 0,
+ SVGA3D_MOBFMT_PT_1 = 1,
+ SVGA3D_MOBFMT_PT_2 = 2,
+ SVGA3D_MOBFMT_RANGE = 3,
+ SVGA3D_MOBFMT_PT64_0 = 4,
+ SVGA3D_MOBFMT_PT64_1 = 5,
+ SVGA3D_MOBFMT_PT64_2 = 6,
+ SVGA3D_MOBFMT_PREDX_MAX = 7,
+ SVGA3D_MOBFMT_EMPTY = 7,
+
SVGA3D_MOBFMT_MAX,
+
+
+ SVGA3D_MOBFMT_HB,
} SVGAMobFormat;
#define SVGA3D_MOB_EMPTY_BASE 1
@@ -1905,7 +1814,18 @@ typedef enum SVGA3dMSQualityLevel {
SVGA3D_MS_QUALITY_NONE = 0,
SVGA3D_MS_QUALITY_MIN = 0,
SVGA3D_MS_QUALITY_FULL = 1,
- SVGA3D_MS_QUALITY_MAX = 2,
+ SVGA3D_MS_QUALITY_RESOLVED = 2,
+ SVGA3D_MS_QUALITY_MAX = 3,
} SVGA3dMSQualityLevel;
-#endif // _SVGA3D_TYPES_H_
+
+
+typedef enum SVGA3dFrameUpdateType {
+ SVGA3D_FRAME_END = 0,
+ SVGA3D_FRAME_MIN = 0,
+ SVGA3D_FRAME_PARTIAL = 1,
+ SVGA3D_FRAME_UNKNOWN = 2,
+ SVGA3D_FRAME_MAX = 3,
+} SVGA3dFrameUpdateType;
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd.c b/lib/mesa/src/gallium/drivers/svga/svga_cmd.c
index 6577c839c..8347dc175 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_cmd.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd.c
@@ -439,6 +439,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
unsigned region_flags;
unsigned surface_flags;
+ assert(!swc->have_gb_objects);
+
if (transfer == SVGA3D_WRITE_HOST_VRAM) {
region_flags = SVGA_RELOC_READ;
surface_flags = SVGA_RELOC_WRITE;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd.h b/lib/mesa/src/gallium/drivers/svga/svga_cmd.h
index 924729873..86d1120b3 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_cmd.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd.h
@@ -636,10 +636,23 @@ enum pipe_error
SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
unsigned count,
uint32 startBuffer,
- const SVGA3dVertexBuffer *bufferInfo,
+ const SVGA3dVertexBuffer_v2 *bufferInfo,
struct svga_winsys_surface **surfaces);
enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers_v2(struct svga_winsys_context *swc,
+ unsigned count,
+ uint32 startBuffer,
+ const SVGA3dVertexBuffer_v2 *bufferInfo,
+ struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffersOffsetAndSize(struct svga_winsys_context *swc,
+ unsigned count,
+ uint32 startBuffer,
+ const SVGA3dVertexBuffer_v2 *bufferInfo);
+
+enum pipe_error
SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
SVGA3dPrimitiveType topology);
@@ -649,6 +662,17 @@ SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
SVGA3dSurfaceFormat format, uint32 offset);
enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer_v2(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *indexes,
+ SVGA3dSurfaceFormat format, uint32 offset,
+ uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBufferOffsetAndSize(struct svga_winsys_context *swc,
+ SVGA3dSurfaceFormat format, uint32 offset,
+ uint32 sizeInBytes);
+
+enum pipe_error
SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
unsigned slot,
SVGA3dShaderType type,
@@ -714,6 +738,31 @@ SVGA3D_sm5_DrawInstancedIndirect(struct svga_winsys_context *swc,
unsigned argOffset);
enum pipe_error
+SVGA3D_sm5_DefineUAView(struct svga_winsys_context *swc,
+ SVGA3dUAViewId uaViewId,
+ struct svga_winsys_surface *surface,
+ SVGA3dSurfaceFormat format,
+ SVGA3dResourceType resourceDimension,
+ const SVGA3dUAViewDesc *desc);
+
+enum pipe_error
+SVGA3D_sm5_DestroyUAView(struct svga_winsys_context *swc,
+ SVGA3dUAViewId uaViewId);
+
+enum pipe_error
+SVGA3D_sm5_SetUAViews(struct svga_winsys_context *swc,
+ uint32 uavSpliceIndex,
+ unsigned count,
+ const SVGA3dUAViewId ids[],
+ struct svga_winsys_surface **uaViews);
+
+enum pipe_error
+SVGA3D_sm5_SetCSUAViews(struct svga_winsys_context *swc,
+ unsigned count,
+ const SVGA3dUAViewId ids[],
+ struct svga_winsys_surface **uaViews);
+
+enum pipe_error
SVGA3D_sm5_Dispatch(struct svga_winsys_context *swc,
const uint32 threadGroupCount[3]);
@@ -732,4 +781,23 @@ SVGA3D_sm5_DefineAndBindStreamOutput(struct svga_winsys_context *swc,
uint32 rasterizedStream,
uint32 sizeInBytes);
+enum pipe_error
+SVGA3D_sm5_DefineRasterizerState_v2(struct svga_winsys_context *swc,
+ SVGA3dRasterizerStateId rasterizerId,
+ uint8 fillMode,
+ SVGA3dCullMode cullMode,
+ uint8 frontCounterClockwise,
+ int32 depthBias,
+ float depthBiasClamp,
+ float slopeScaledDepthBias,
+ uint8 depthClipEnable,
+ uint8 scissorEnable,
+ uint8 multisampleEnable,
+ uint8 antialiasedLineEnable,
+ float lineWidth,
+ uint8 lineStippleEnable,
+ uint8 lineStippleFactor,
+ uint16 lineStipplePattern,
+ uint8 provokingVertexLast,
+ uint32 forcedSampleCount);
#endif /* __SVGA3D_H__ */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_context.c b/lib/mesa/src/gallium/drivers/svga/svga_context.c
index d80336cf7..a1e054b7c 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_context.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_context.c
@@ -61,13 +61,6 @@ svga_destroy(struct pipe_context *pipe)
struct svga_context *svga = svga_context(pipe);
unsigned shader, i;
- /* free any alternate rasterizer states used for point sprite */
- for (i = 0; i < ARRAY_SIZE(svga->rasterizer_no_cull); i++) {
- if (svga->rasterizer_no_cull[i]) {
- pipe->delete_rasterizer_state(pipe, svga->rasterizer_no_cull[i]);
- }
- }
-
/* free depthstencil_disable state */
if (svga->depthstencil_disable) {
pipe->delete_depth_stencil_alpha_state(pipe, svga->depthstencil_disable);
@@ -98,6 +91,7 @@ svga_destroy(struct pipe_context *pipe)
svga_cleanup_tss_binding(svga);
svga_cleanup_vertex_state(svga);
svga_cleanup_tcs_state(svga);
+ svga_cleanup_shader_image_state(svga);
svga_destroy_swtnl(svga);
svga_hwtnl_destroy(svga->hwtnl);
@@ -114,6 +108,9 @@ svga_destroy(struct pipe_context *pipe)
util_bitmask_destroy(svga->surface_view_id_bm);
util_bitmask_destroy(svga->stream_output_id_bm);
util_bitmask_destroy(svga->query_id_bm);
+ util_bitmask_destroy(svga->uav_id_bm);
+ util_bitmask_destroy(svga->uav_to_free_id_bm);
+
u_upload_destroy(svga->const0_upload);
u_upload_destroy(svga->pipe.stream_uploader);
u_upload_destroy(svga->pipe.const_uploader);
@@ -126,6 +123,15 @@ svga_destroy(struct pipe_context *pipe)
}
}
+ /* free any pending srvs that were created for rawbuf sr view for
+ * constant buf.
+ */
+ if (svga_have_gl43(svga)) {
+ svga_destroy_rawbuf_srv(svga);
+ util_bitmask_destroy(svga->sampler_view_to_free_id_bm);
+ pipe_resource_reference(&svga->dummy_resource, NULL);
+ }
+
FREE(svga);
}
@@ -189,6 +195,9 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
svga_init_stream_output_functions(svga);
svga_init_clear_functions(svga);
svga_init_tracked_state(svga);
+ svga_init_shader_image_functions(svga);
+ svga_init_shader_buffer_functions(svga);
+ svga_init_cs_functions(svga);
/* init misc state */
svga->curr.sample_mask = ~0;
@@ -230,6 +239,15 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
if (!(svga->query_id_bm = util_bitmask_create()))
goto cleanup;
+ if (!(svga->uav_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->uav_to_free_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->sampler_view_to_free_id_bm = util_bitmask_create()))
+ goto cleanup;
+
svga->hwtnl = svga_hwtnl_create(svga);
if (svga->hwtnl == NULL)
goto cleanup;
@@ -275,6 +293,11 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
svga->state.hw_draw.num_backed_views = 0;
svga->state.hw_draw.rasterizer_discard = FALSE;
+ /* Initialize uavs */
+ svga->state.hw_draw.uavSpliceIndex = -1;
+ svga->state.hw_draw.num_uavs = 0;
+ svga->state.hw_draw.num_cs_uavs = 0;
+
/* Initialize the shader pointers */
svga->state.hw_draw.vs = NULL;
svga->state.hw_draw.gs = NULL;
@@ -289,6 +312,10 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
sizeof(svga->state.hw_draw.default_constbuf_size));
memset(svga->state.hw_draw.enabled_constbufs, 0,
sizeof(svga->state.hw_draw.enabled_constbufs));
+ memset(svga->state.hw_draw.enabled_rawbufs, 0,
+ sizeof(svga->state.hw_draw.enabled_rawbufs));
+ memset(svga->state.hw_draw.rawbufs, 0,
+ sizeof(svga->state.hw_draw.rawbufs));
svga->state.hw_draw.ib = NULL;
svga->state.hw_draw.num_vbuffers = 0;
memset(svga->state.hw_draw.vbuffers, 0,
@@ -296,6 +323,17 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
svga->state.hw_draw.const0_buffer = NULL;
svga->state.hw_draw.const0_handle = NULL;
+ if (svga_have_gl43(svga)) {
+ for (unsigned shader = 0; shader < PIPE_SHADER_TYPES; ++shader) {
+ for (unsigned i = 0;
+ i < ARRAY_SIZE(svga->state.hw_draw.rawbufs[shader]); i++) {
+ svga->state.hw_draw.rawbufs[shader][i].srvid = SVGA3D_INVALID_ID;
+ }
+ }
+ svga_uav_cache_init(svga);
+ svga->dummy_resource = NULL;
+ }
+
/* Create a no-operation blend state which we will bind whenever the
* requested blend state is impossible (e.g. due to having an integer
* render target attached).
@@ -346,11 +384,15 @@ cleanup:
util_bitmask_destroy(svga->input_element_object_id_bm);
util_bitmask_destroy(svga->rast_object_id_bm);
util_bitmask_destroy(svga->sampler_object_id_bm);
- util_bitmask_destroy(svga->sampler_view_id_bm);
util_bitmask_destroy(svga->shader_id_bm);
util_bitmask_destroy(svga->surface_view_id_bm);
util_bitmask_destroy(svga->stream_output_id_bm);
util_bitmask_destroy(svga->query_id_bm);
+
+ util_bitmask_destroy(svga->uav_id_bm);
+ util_bitmask_destroy(svga->uav_to_free_id_bm);
+ util_bitmask_destroy(svga->sampler_view_id_bm);
+
FREE(svga);
svga = NULL;
@@ -424,6 +466,11 @@ svga_context_flush(struct svga_context *svga,
if (svga_need_to_rebind_resources(svga)) {
svga->rebind.flags.query = TRUE;
}
+
+ if (svga_sws(svga)->have_index_vertex_buffer_offset_cmd) {
+ svga->rebind.flags.vertexbufs = TRUE;
+ svga->rebind.flags.indexbuf = TRUE;
+ }
}
if (SVGA_DEBUG & DEBUG_SYNC) {
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_context.h b/lib/mesa/src/gallium/drivers/svga/svga_context.h
index d5ef4c345..0227d64cf 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_context.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_context.h
@@ -43,6 +43,8 @@
#include "svga_winsys.h"
#include "svga_hw_reg.h"
#include "svga3d_shaderdefs.h"
+#include "svga_image_view.h"
+#include "svga_shader_buffer.h"
#include "svga_debug.h"
/** Non-GPU queries for gallium HUD */
@@ -83,10 +85,12 @@ enum svga_hud {
SVGA_QUERY_MAX
};
+
/**
* Maximum supported number of constant buffers per shader
+ * including the zero slot for the default constant buffer.
*/
-#define SVGA_MAX_CONST_BUFS 14
+#define SVGA_MAX_CONST_BUFS 15
/**
* Maximum constant buffer size that can be set in the
@@ -96,6 +100,18 @@ enum svga_hud {
#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int))
#define CONST0_UPLOAD_ALIGNMENT 256
+#define SVGA_MAX_IMAGES SVGA3D_MAX_UAVIEWS
+#define SVGA_MAX_SHADER_BUFFERS SVGA3D_MAX_UAVIEWS
+#define SVGA_MAX_ATOMIC_BUFFERS SVGA3D_MAX_UAVIEWS
+#define SVGA_MAX_UAVIEWS SVGA3D_DX11_1_MAX_UAVIEWS
+
+enum svga_surface_state
+{
+ SVGA_SURFACE_STATE_CREATED,
+ SVGA_SURFACE_STATE_INVALIDATED,
+ SVGA_SURFACE_STATE_UPDATED,
+ SVGA_SURFACE_STATE_RENDERED,
+};
struct draw_vertex_shader;
struct draw_fragment_shader;
@@ -169,6 +185,8 @@ struct svga_depth_stencil_state {
#define SVGA_PIPELINE_FLAG_LINES (1<<PIPE_PRIM_LINES)
#define SVGA_PIPELINE_FLAG_TRIS (1<<PIPE_PRIM_TRIANGLES)
+#define SVGA_MAX_FRAMEBUFFER_DEFAULT_SAMPLES 4
+
struct svga_rasterizer_state {
struct pipe_rasterizer_state templ; /* needed for draw module */
@@ -194,6 +212,11 @@ struct svga_rasterizer_state {
SVGA3dRasterizerStateId id; /**< vgpu10 */
+ /* Alternate SVGA rasterizer state object with forcedSampleCount */
+ int altRastIds[SVGA_MAX_FRAMEBUFFER_DEFAULT_SAMPLES+1];
+
+ struct svga_rasterizer_state *no_cull_rasterizer;
+
/** For debugging: */
const char* need_pipeline_tris_str;
const char* need_pipeline_lines_str;
@@ -263,6 +286,14 @@ struct svga_constant_buffer {
unsigned size;
};
+struct svga_raw_buffer {
+ struct svga_winsys_surface *handle;
+ unsigned buffer_offset;
+ unsigned buffer_size;
+ struct pipe_resource *buffer;
+ int32 srvid;
+};
+
/* Use to calculate differences between state emitted to hardware and
* current driver-calculated state.
*/
@@ -270,10 +301,10 @@ struct svga_state
{
const struct svga_blend_state *blend;
const struct svga_depth_stencil_state *depth;
- const struct svga_rasterizer_state *rast;
const struct svga_sampler_state *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
const struct svga_velems_state *velems;
+ struct svga_rasterizer_state *rast;
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; /* or texture ID's? */
struct svga_fragment_shader *fs;
struct svga_vertex_shader *vs;
@@ -291,6 +322,7 @@ struct svga_state
* svga_shader_emitter_v10.num_shader_consts.
*/
struct pipe_constant_buffer constbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
+ struct svga_raw_buffer rawbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
struct pipe_framebuffer_state framebuffer;
float depthscale;
@@ -322,12 +354,26 @@ struct svga_state
unsigned sample_mask;
unsigned vertices_per_patch;
float default_tesslevels[6]; /* tessellation (outer[4] + inner[2]) levels */
+
+ /* Image views */
+ unsigned num_image_views[PIPE_SHADER_TYPES];
+ struct svga_image_view image_views[PIPE_SHADER_TYPES][SVGA_MAX_IMAGES];
+
+ /* Shader buffers */
+ unsigned num_shader_buffers[PIPE_SHADER_TYPES];
+ struct svga_shader_buffer shader_buffers[PIPE_SHADER_TYPES][SVGA_MAX_SHADER_BUFFERS];
+
+ /* HW atomic buffers */
+ unsigned num_atomic_buffers;
+ struct svga_shader_buffer atomic_buffers[SVGA_MAX_SHADER_BUFFERS];
+
struct {
/* Determine the layout of the grid (in block units) to be used. */
unsigned size[3];
/* If DispatchIndirect is used, this will has grid size info*/
struct pipe_resource *indirect;
} grid_info;
+
};
struct svga_prescale {
@@ -399,6 +445,8 @@ struct svga_hw_draw_state
/** Currently bound constant buffer, per shader stage */
struct pipe_resource *constbuf[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
struct svga_constant_buffer constbufoffsets[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
+ struct svga_raw_buffer rawbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
+ unsigned enabled_rawbufs[PIPE_SHADER_TYPES];
/** Bitmask of enabled constant buffers */
unsigned enabled_constbufs[PIPE_SHADER_TYPES];
@@ -421,7 +469,7 @@ struct svga_hw_draw_state
SVGA3dPrimitiveType topology;
/** Vertex buffer state */
- SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS];
+ SVGA3dVertexBuffer_v2 vbuffer_attrs[PIPE_MAX_ATTRIBS];
struct pipe_resource *vbuffers[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers;
@@ -441,6 +489,35 @@ struct svga_hw_draw_state
boolean rasterizer_discard; /* set if rasterization is disabled */
boolean has_backed_views; /* set if any of the rtv/dsv is a backed surface view */
+
+ /* Image Views */
+ int uavSpliceIndex;
+ unsigned num_image_views[PIPE_SHADER_TYPES];
+ struct svga_image_view image_views[PIPE_SHADER_TYPES][SVGA_MAX_IMAGES];
+
+ /* Shader Buffers */
+ unsigned num_shader_buffers[PIPE_SHADER_TYPES];
+ struct svga_shader_buffer shader_buffers[PIPE_SHADER_TYPES][SVGA_MAX_SHADER_BUFFERS];
+
+ /* HW Atomic Buffers */
+ unsigned num_atomic_buffers;
+ struct svga_shader_buffer atomic_buffers[SVGA_MAX_SHADER_BUFFERS];
+
+ /* UAV state */
+ unsigned num_uavs;
+ SVGA3dUAViewId uaViewIds[SVGA_MAX_UAVIEWS];
+ struct svga_winsys_surface *uaViews[SVGA_MAX_UAVIEWS];
+
+ /* Compute UAV state */
+ unsigned num_cs_uavs;
+ SVGA3dUAViewId csUAViewIds[SVGA_MAX_UAVIEWS];
+ struct svga_winsys_surface *csUAViews[SVGA_MAX_UAVIEWS];
+
+ /* starting uav index for each shader */
+ unsigned uav_start_index[PIPE_SHADER_TYPES];
+
+ /* starting uav index for HW atomic buffers */
+ unsigned uav_atomic_buf_index;
};
@@ -468,6 +545,32 @@ struct svga_hw_queue;
struct svga_query;
struct svga_qmem_alloc_entry;
+enum svga_uav_type
+{
+ SVGA_IMAGE_VIEW = 0,
+ SVGA_SHADER_BUFFER
+};
+
+struct svga_uav
+{
+ enum svga_uav_type type;
+ union {
+ struct svga_image_view image_view;
+ struct svga_shader_buffer shader_buffer;
+ } desc;
+ struct pipe_resource *resource;
+ unsigned next_uaView;
+ SVGA3dUAViewId uaViewId;
+ unsigned timestamp[2];
+};
+
+struct svga_cache_uav
+{
+ unsigned num_uaViews;
+ unsigned next_uaView;
+ struct svga_uav uaViews[SVGA3D_DX11_1_MAX_UAVIEWS];
+};
+
struct svga_context
{
struct pipe_context pipe;
@@ -488,7 +591,7 @@ struct svga_context
boolean force_hw_line_stipple;
/** To report perf/conformance/etc issues to the gallium frontend */
- struct pipe_debug_callback callback;
+ struct util_debug_callback callback;
} debug;
struct {
@@ -517,6 +620,9 @@ struct svga_context
/* Bitmask of sampler view IDs */
struct util_bitmask *sampler_view_id_bm;
+ /* Bitmask of to-free sampler view IDs created for raw buffer srv */
+ struct util_bitmask *sampler_view_to_free_id_bm;
+
/* Bitmask of used shader IDs */
struct util_bitmask *shader_id_bm;
@@ -529,13 +635,23 @@ struct svga_context
/* Bitmask of used query IDs */
struct util_bitmask *query_id_bm;
+ /* Bitmask of used uav IDs */
+ struct util_bitmask *uav_id_bm;
+
+ /* Bitmask of to-free uav IDs */
+ struct util_bitmask *uav_to_free_id_bm;
+
struct {
uint64_t dirty[SVGA_STATE_MAX];
/** bitmasks of which const buffers are changed */
unsigned dirty_constbufs[PIPE_SHADER_TYPES];
+ /** bitmasks of which const buffers to be bound as raw buffers */
+ unsigned raw_constbufs[PIPE_SHADER_TYPES];
+
unsigned texture_timestamp;
+ unsigned uav_timestamp[2];
struct svga_sw_state sw;
struct svga_hw_draw_state hw_draw;
@@ -557,6 +673,12 @@ struct svga_context
unsigned tes:1;
unsigned cs:1;
unsigned query:1;
+ unsigned images:1;
+ unsigned shaderbufs:1;
+ unsigned atomicbufs:1;
+ unsigned uav:1;
+ unsigned indexbuf:1;
+ unsigned vertexbufs:1;
} flags;
unsigned val;
} rebind;
@@ -641,9 +763,6 @@ struct svga_context
void *sampler;
} polygon_stipple;
- /** Alternate rasterizer states created for point sprite */
- struct svga_rasterizer_state *rasterizer_no_cull[2];
-
/** Depth stencil state created to disable depth stencil test */
struct svga_depth_stencil_state *depthstencil_disable;
@@ -665,6 +784,8 @@ struct svga_context
boolean passthrough;
} tcs;
+ struct svga_cache_uav cache_uav;
+ struct pipe_resource *dummy_resource;
};
/* A flag for each frontend state object:
@@ -707,19 +828,38 @@ struct svga_context
#define SVGA_NEW_TCS_CONST_BUFFER ((uint64_t) 0x1000000000)
#define SVGA_NEW_TES_CONST_BUFFER ((uint64_t) 0x2000000000)
#define SVGA_NEW_TCS_PARAM ((uint64_t) 0x4000000000)
-#define SVGA_NEW_FS_CONSTS ((uint64_t) 0x8000000000)
-#define SVGA_NEW_VS_CONSTS ((uint64_t) 0x10000000000)
-#define SVGA_NEW_GS_CONSTS ((uint64_t) 0x20000000000)
-#define SVGA_NEW_TCS_CONSTS ((uint64_t) 0x40000000000)
-#define SVGA_NEW_TES_CONSTS ((uint64_t) 0x800000000000)
+#define SVGA_NEW_IMAGE_VIEW ((uint64_t) 0x8000000000)
+#define SVGA_NEW_SHADER_BUFFER ((uint64_t) 0x10000000000)
+#define SVGA_NEW_CS ((uint64_t) 0x20000000000)
+#define SVGA_NEW_CS_VARIANT ((uint64_t) 0x40000000000)
+#define SVGA_NEW_CS_CONST_BUFFER ((uint64_t) 0x80000000000)
+#define SVGA_NEW_FS_CONSTS ((uint64_t) 0x100000000000)
+#define SVGA_NEW_VS_CONSTS ((uint64_t) 0x200000000000)
+#define SVGA_NEW_GS_CONSTS ((uint64_t) 0x400000000000)
+#define SVGA_NEW_TCS_CONSTS ((uint64_t) 0x800000000000)
+#define SVGA_NEW_TES_CONSTS ((uint64_t) 0x1000000000000)
+#define SVGA_NEW_CS_CONSTS ((uint64_t) 0x2000000000000)
+#define SVGA_NEW_FS_RAW_BUFFER ((uint64_t) 0x4000000000000)
+#define SVGA_NEW_VS_RAW_BUFFER ((uint64_t) 0x8000000000000)
+#define SVGA_NEW_GS_RAW_BUFFER ((uint64_t) 0x10000000000000)
+#define SVGA_NEW_TCS_RAW_BUFFER ((uint64_t) 0x20000000000000)
+#define SVGA_NEW_TES_RAW_BUFFER ((uint64_t) 0x40000000000000)
+#define SVGA_NEW_CS_RAW_BUFFER ((uint64_t) 0x80000000000000)
#define SVGA_NEW_ALL ((uint64_t) 0xFFFFFFFFFFFFFFFF)
#define SVGA_NEW_CONST_BUFFER \
(SVGA_NEW_FS_CONST_BUFFER | SVGA_NEW_VS_CONST_BUFFER | \
- SVGA_NEW_GS_CONST_BUFFER | \
+ SVGA_NEW_GS_CONST_BUFFER | SVGA_NEW_CS_CONST_BUFFER | \
SVGA_NEW_TCS_CONST_BUFFER | SVGA_NEW_TES_CONST_BUFFER)
+/** Program pipelines */
+enum svga_pipe_type
+{
+ SVGA_PIPE_GRAPHICS = 0,
+ SVGA_PIPE_COMPUTE = 1
+};
+
void svga_init_state_functions( struct svga_context *svga );
void svga_init_flush_functions( struct svga_context *svga );
void svga_init_string_functions( struct svga_context *svga );
@@ -742,6 +882,7 @@ void svga_init_query_functions( struct svga_context *svga );
void svga_init_surface_functions(struct svga_context *svga);
void svga_init_stream_output_functions( struct svga_context *svga );
void svga_init_clear_functions( struct svga_context *svga );
+void svga_init_shader_image_functions( struct svga_context *svga );
void svga_cleanup_vertex_state( struct svga_context *svga );
void svga_cleanup_sampler_state( struct svga_context *svga );
@@ -769,6 +910,36 @@ void svga_toggle_render_condition(struct svga_context *svga,
boolean render_condition_enabled,
boolean on);
+int svga_define_rasterizer_object(struct svga_context *svga,
+ struct svga_rasterizer_state *,
+ unsigned samples);
+
+enum pipe_error
+svga_validate_sampler_resources(struct svga_context *svga,
+ enum svga_pipe_type);
+
+enum pipe_error
+svga_validate_constant_buffers(struct svga_context *svga,
+ enum svga_pipe_type);
+
+enum pipe_error
+svga_validate_image_views(struct svga_context *svga,
+ enum svga_pipe_type);
+
+enum pipe_error
+svga_validate_shader_buffers(struct svga_context *svga,
+ enum svga_pipe_type);
+
+void
+svga_destroy_rawbuf_srv(struct svga_context *svga);
+
+void
+svga_uav_cache_init(struct svga_context *svga);
+
+void
+svga_destroy_rawbuf_srv(struct svga_context *svga);
+
+
/***********************************************************************
* Inline conversion functions. These are better-typed than the
* macros used previously:
@@ -816,6 +987,12 @@ svga_have_sm5(const struct svga_context *svga)
}
static inline boolean
+svga_have_gl43(const struct svga_context *svga)
+{
+ return svga_screen(svga->pipe.screen)->sws->have_gl43;
+}
+
+static inline boolean
svga_need_to_rebind_resources(const struct svga_context *svga)
{
return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources;
@@ -827,6 +1004,21 @@ svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2)
return memcmp(r1, r2, sizeof(*r1)) == 0;
}
+
+/* A helper function to return TRUE if sampler state mapping is
+ * to be used. Sampler state mapping is used in GL43 context
+ * if the number of sampler states exceeds the SVGA device limit or
+ * the sampler state mapping environment variable is set.
+ */
+static inline boolean
+svga_use_sampler_state_mapping(const struct svga_context *svga,
+ unsigned num_sampler_states)
+{
+ return svga_have_gl43(svga) &&
+ (svga_screen(svga->pipe.screen)->debug.sampler_state_mapping ||
+ num_sampler_states > SVGA3D_DX_MAX_SAMPLERS);
+}
+
/**
* If the Gallium HUD is enabled, this will return the current time.
* Otherwise, just return zero.
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_debug.h b/lib/mesa/src/gallium/drivers/svga/svga_debug.h
index cdad858b0..8b43279a9 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_debug.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_debug.h
@@ -46,6 +46,8 @@
#define DEBUG_CACHE 0x8000
#define DEBUG_STREAMOUT 0x10000
#define DEBUG_SAMPLERS 0x20000
+#define DEBUG_IMAGE 0x40000
+#define DEBUG_UAV 0x80000
#define DEBUG_RETRY 0x100000
#ifdef DEBUG
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_draw.c
index 0d6fb987e..284622762 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw.c
@@ -342,14 +342,30 @@ xlate_index_format(unsigned indexWidth)
}
-static enum pipe_error
-validate_sampler_resources(struct svga_context *svga)
+/**
+ * A helper function to validate sampler view resources to ensure any
+ * pending updates to buffers will be emitted before they are referenced
+ * at draw or dispatch time. It also rebinds the resources if needed.
+ */
+enum pipe_error
+svga_validate_sampler_resources(struct svga_context *svga,
+ enum svga_pipe_type pipe_type)
{
- enum pipe_shader_type shader;
+ enum pipe_shader_type shader, first_shader, last_shader;
assert(svga_have_vgpu10(svga));
- for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) {
+ if (pipe_type == SVGA_PIPE_GRAPHICS) {
+ first_shader = PIPE_SHADER_VERTEX;
+ last_shader = PIPE_SHADER_TESS_EVAL;
+ }
+ else {
+ assert(svga_have_gl43(svga));
+ first_shader = PIPE_SHADER_COMPUTE;
+ last_shader = PIPE_SHADER_COMPUTE;
+ }
+
+ for (shader = first_shader; shader <= last_shader; shader++) {
unsigned count = svga->curr.num_sampler_views[shader];
unsigned i;
struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
@@ -409,14 +425,31 @@ validate_sampler_resources(struct svga_context *svga)
}
-static enum pipe_error
-validate_constant_buffers(struct svga_context *svga)
+/**
+ * A helper function to validate constant buffers to ensure any
+ * pending updates to the buffers will be emitted before they are referenced
+ * at draw or dispatch time. It also rebinds the resources if needed.
+ */
+enum pipe_error
+svga_validate_constant_buffers(struct svga_context *svga,
+ enum svga_pipe_type pipe_type)
{
- enum pipe_shader_type shader;
+ enum pipe_shader_type shader, first_shader, last_shader;
assert(svga_have_vgpu10(svga));
- for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) {
+ if (pipe_type == SVGA_PIPE_GRAPHICS) {
+ first_shader = PIPE_SHADER_VERTEX;
+ last_shader = PIPE_SHADER_TESS_EVAL;
+ }
+ else {
+ assert(svga_have_gl43(svga));
+ first_shader = PIPE_SHADER_COMPUTE;
+ last_shader = PIPE_SHADER_COMPUTE;
+ }
+
+ for (shader = first_shader; shader <= last_shader; shader++) {
+
enum pipe_error ret;
struct svga_buffer *buffer;
@@ -463,6 +496,28 @@ validate_constant_buffers(struct svga_context *svga)
return ret;
}
}
+
+ /* Reference raw constant buffers as they are not included in the
+ * hw constant buffers list.
+ */
+ unsigned enabled_rawbufs = svga->state.hw_draw.enabled_rawbufs[shader] & ~1u;
+ while (enabled_rawbufs) {
+ unsigned i = u_bit_scan(&enabled_rawbufs);
+ buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer);
+
+ assert(buffer != NULL);
+ handle = svga_buffer_handle(svga, &buffer->b,
+ PIPE_BIND_SAMPLER_VIEW);
+
+ if (svga->rebind.flags.constbufs && handle) {
+ ret = svga->swc->resource_rebind(svga->swc,
+ handle,
+ NULL,
+ SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ }
}
svga->rebind.flags.constbufs = FALSE;
@@ -471,6 +526,94 @@ validate_constant_buffers(struct svga_context *svga)
/**
+ * A helper function to validate image view resources to ensure any
+ * pending updates to buffers will be emitted before they are referenced
+ * at draw or dispatch time. It also rebinds the resources if needed.
+ */
+enum pipe_error
+svga_validate_image_views(struct svga_context *svga,
+ enum svga_pipe_type pipe_type)
+{
+ enum pipe_shader_type shader, first_shader, last_shader;
+ bool rebind = svga->rebind.flags.images;
+ enum pipe_error ret;
+
+ assert(svga_have_gl43(svga));
+
+ if (pipe_type == SVGA_PIPE_GRAPHICS) {
+ first_shader = PIPE_SHADER_VERTEX;
+ last_shader = PIPE_SHADER_TESS_EVAL;
+ }
+ else {
+ first_shader = PIPE_SHADER_COMPUTE;
+ last_shader = PIPE_SHADER_COMPUTE;
+ }
+
+ for (shader = first_shader; shader <= last_shader; shader++) {
+ ret = svga_validate_image_view_resources(svga,
+ svga->state.hw_draw.num_image_views[shader],
+ &svga->state.hw_draw.image_views[shader][0], rebind);
+
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ svga->rebind.flags.images = FALSE;
+
+ return PIPE_OK;
+}
+
+
+/**
+ * A helper function to validate shader buffer and atomic buffer resources to
+ * ensure any pending updates to buffers will be emitted before they are
+ * referenced at draw or dispatch time. It also rebinds the resources if needed.
+ */
+enum pipe_error
+svga_validate_shader_buffers(struct svga_context *svga,
+ enum svga_pipe_type pipe_type)
+{
+ enum pipe_shader_type shader, first_shader, last_shader;
+ bool rebind = svga->rebind.flags.shaderbufs;
+ enum pipe_error ret;
+
+ assert(svga_have_gl43(svga));
+
+ if (pipe_type == SVGA_PIPE_GRAPHICS) {
+ first_shader = PIPE_SHADER_VERTEX;
+ last_shader = PIPE_SHADER_TESS_EVAL;
+ }
+ else {
+ first_shader = PIPE_SHADER_COMPUTE;
+ last_shader = PIPE_SHADER_COMPUTE;
+ }
+
+ for (shader = first_shader; shader <= last_shader; shader++) {
+ ret = svga_validate_shader_buffer_resources(svga,
+ svga->state.hw_draw.num_shader_buffers[shader],
+ &svga->state.hw_draw.shader_buffers[shader][0], rebind);
+
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ svga->rebind.flags.shaderbufs = FALSE;
+
+ ret = svga_validate_shader_buffer_resources(svga,
+ svga->state.hw_draw.num_atomic_buffers,
+ svga->state.hw_draw.atomic_buffers,
+ svga->rebind.flags.atomicbufs);
+
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->rebind.flags.atomicbufs = FALSE;
+
+ return PIPE_OK;
+}
+
+
+/**
* Was the last command put into the command buffer a drawing command?
* We use this to determine if we can skip emitting buffer re-bind
* commands when we have a sequence of drawing commands that use the
@@ -505,9 +648,9 @@ last_command_was_draw(const struct svga_context *svga)
*/
static boolean
vertex_buffers_equal(unsigned count,
- SVGA3dVertexBuffer *pVBufAttr1,
+ SVGA3dVertexBuffer_v2 *pVBufAttr1,
struct pipe_resource **pVBuf1,
- SVGA3dVertexBuffer *pVBufAttr2,
+ SVGA3dVertexBuffer_v2 *pVBufAttr2,
struct pipe_resource **pVBuf2)
{
return (memcmp(pVBufAttr1, pVBufAttr2,
@@ -526,24 +669,58 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl,
struct svga_context *svga = hwtnl->svga;
struct pipe_resource *vbuffers[SVGA3D_INPUTREG_MAX];
struct svga_winsys_surface *vbuffer_handles[SVGA3D_INPUTREG_MAX];
- struct svga_winsys_surface *so_vertex_count_handle;
+ struct svga_winsys_surface *so_vertex_count_handle = NULL;
const unsigned vbuf_count = so_vertex_count ? 1 : hwtnl->cmd.vbuf_count;
+ SVGA3dVertexBuffer_v2 vbuffer_attrs[PIPE_MAX_ATTRIBS];
int last_vbuf = -1;
unsigned i;
assert(svga_have_vgpu10(svga));
+ /* setup vertex attribute input layout */
+ if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) {
+ enum pipe_error ret =
+ SVGA3D_vgpu10_SetInputLayout(svga->swc,
+ hwtnl->cmd.vdecl_layout_id);
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id;
+ }
+
/* Get handle for each referenced vertex buffer, unless we're using a
* stream-out buffer to specify the drawing information (DrawAuto).
+ * Also set up the buffer attributes.
*/
if (so_vertex_count) {
- i = 0;
+ so_vertex_count_handle = svga_buffer_handle(svga,
+ so_vertex_count->buffer,
+ (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT));
+ if (!so_vertex_count_handle)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* Set IA slot0 input buffer to the SO buffer */
+ assert(vbuf_count == 1);
+ vbuffer_attrs[0].stride = hwtnl->cmd.vbufs[0].stride;
+ vbuffer_attrs[0].offset = hwtnl->cmd.vbufs[0].buffer_offset;
+ vbuffer_attrs[0].sid = 0;
+ assert(so_vertex_count->buffer != NULL);
+ vbuffer_attrs[0].sizeInBytes = svga_buffer(so_vertex_count->buffer)->size;
+ vbuffers[0] = so_vertex_count->buffer;
+ vbuffer_handles[0] = so_vertex_count_handle;
+
+ i = 1;
}
else {
for (i = 0; i < vbuf_count; i++) {
struct svga_buffer *sbuf =
svga_buffer(hwtnl->cmd.vbufs[i].buffer.resource);
+ vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride;
+ vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
+ vbuffer_attrs[i].sid = 0;
+
if (sbuf) {
vbuffer_handles[i] = svga_buffer_handle(svga, &sbuf->b,
PIPE_BIND_VERTEX_BUFFER);
@@ -552,17 +729,25 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl,
return PIPE_ERROR_OUT_OF_MEMORY;
vbuffers[i] = &sbuf->b;
last_vbuf = i;
+
+ vbuffer_attrs[i].sizeInBytes = sbuf->size;
}
else {
vbuffers[i] = NULL;
vbuffer_handles[i] = NULL;
+ vbuffer_attrs[i].sizeInBytes = 0;
}
}
}
+ /* Unbind the unreferenced the vertex buffer handles */
for (; i < svga->state.hw_draw.num_vbuffers; i++) {
vbuffers[i] = NULL;
vbuffer_handles[i] = NULL;
+ vbuffer_attrs[i].sid = 0;
+ vbuffer_attrs[i].stride = 0;
+ vbuffer_attrs[i].offset = 0;
+ vbuffer_attrs[i].sizeInBytes = 0;
}
/* Get handle for each referenced vertex buffer */
@@ -616,25 +801,6 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl,
/* setup vertex buffers */
{
- SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS];
-
- if (so_vertex_count) {
- /* Set IA slot0 input buffer to the SO buffer */
- assert(vbuf_count == 1);
- vbuffer_attrs[0].stride = hwtnl->cmd.vbufs[0].stride;
- vbuffer_attrs[0].offset = hwtnl->cmd.vbufs[0].buffer_offset;
- vbuffer_attrs[0].sid = 0;
- vbuffers[0] = so_vertex_count->buffer;
- vbuffer_handles[0] = so_vertex_count_handle;
- }
- else {
- for (i = 0; i < vbuf_count; i++) {
- vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride;
- vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
- vbuffer_attrs[i].sid = 0;
- }
- }
-
/* If any of the vertex buffer state has changed, issue
* the SetVertexBuffers command. Otherwise, we will just
* need to rebind the resources.
@@ -654,20 +820,13 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl,
*/
num_vbuffers = MAX2(vbuf_count, svga->state.hw_draw.num_vbuffers);
- /* Zero-out the old buffers we want to unbind (the number of loop
- * iterations here is typically very small, and often zero.)
- */
- for (i = vbuf_count; i < num_vbuffers; i++) {
- vbuffer_attrs[i].sid = 0;
- vbuffer_attrs[i].stride = 0;
- vbuffer_attrs[i].offset = 0;
- vbuffer_handles[i] = NULL;
- }
-
if (num_vbuffers > 0) {
- SVGA3dVertexBuffer *pbufAttrs = vbuffer_attrs;
+ SVGA3dVertexBuffer_v2 *pbufAttrs = vbuffer_attrs;
struct svga_winsys_surface **pbufHandles = vbuffer_handles;
unsigned numVBuf = 0;
+ boolean emitVBufs =
+ !svga_sws(svga)->have_index_vertex_buffer_offset_cmd ||
+ svga->rebind.flags.vertexbufs;
/* Loop through the vertex buffer lists to only emit
* those vertex buffers that are not already in the
@@ -681,6 +840,10 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl,
&svga->state.hw_draw.vbuffer_attrs[i],
&svga->state.hw_draw.vbuffers[i]);
+ /* Check if we can use the SetVertexBuffersOffsetAndSize command */
+ emitVBufs = emitVBufs ||
+ (vbuffers[i] != svga->state.hw_draw.vbuffers[i]);
+
if (!emit && i == num_vbuffers-1) {
/* Include the last vertex buffer in the next emit
* if it is different.
@@ -696,11 +859,23 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl,
* In this case, there is nothing to send yet.
*/
if (numVBuf) {
- enum pipe_error ret =
- SVGA3D_vgpu10_SetVertexBuffers(svga->swc,
- numVBuf,
- i - numVBuf,
- pbufAttrs, pbufHandles);
+ enum pipe_error ret;
+
+ /* If all vertex buffers handle are the same as the one
+ * in the device, just use the
+ * SetVertexBuffersOffsetAndSize comand.
+ */
+ if (emitVBufs) {
+ ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc,
+ numVBuf,
+ i - numVBuf,
+ pbufAttrs, pbufHandles);
+ } else {
+ ret = SVGA3D_vgpu10_SetVertexBuffersOffsetAndSize(svga->swc,
+ numVBuf,
+ i - numVBuf,
+ pbufAttrs);
+ }
if (ret != PIPE_OK)
return ret;
}
@@ -740,6 +915,8 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl,
}
}
+ svga->rebind.flags.vertexbufs = FALSE;
+
return PIPE_OK;
}
@@ -755,6 +932,7 @@ validate_index_buffer(struct svga_hwtnl *hwtnl,
struct svga_context *svga = hwtnl->svga;
struct svga_winsys_surface *ib_handle =
svga_buffer_handle(svga, ib, PIPE_BIND_INDEX_BUFFER);
+ enum pipe_error ret;
if (!ib_handle)
return PIPE_ERROR_OUT_OF_MEMORY;
@@ -770,12 +948,26 @@ validate_index_buffer(struct svga_hwtnl *hwtnl,
range->indexArray.offset != svga->state.hw_draw.ib_offset) {
assert(indexFormat != SVGA3D_FORMAT_INVALID);
- enum pipe_error ret =
- SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
- indexFormat,
- range->indexArray.offset);
- if (ret != PIPE_OK)
- return ret;
+
+ if ((ib == svga->state.hw_draw.ib) &&
+ svga_sws(hwtnl->svga)->have_index_vertex_buffer_offset_cmd &&
+ !svga->rebind.flags.indexbuf) {
+
+ ret = SVGA3D_vgpu10_SetIndexBufferOffsetAndSize(svga->swc,
+ indexFormat,
+ range->indexArray.offset,
+ sbuf->size);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ else {
+
+ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
+ indexFormat,
+ range->indexArray.offset);
+ if (ret != PIPE_OK)
+ return ret;
+ }
pipe_resource_reference(&svga->state.hw_draw.ib, ib);
svga->state.hw_draw.ib_format = indexFormat;
@@ -795,6 +987,8 @@ validate_index_buffer(struct svga_hwtnl *hwtnl,
}
}
+ svga->rebind.flags.indexbuf = FALSE;
+
return PIPE_OK;
}
@@ -842,14 +1036,30 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
*/
}
- ret = validate_sampler_resources(svga);
+ ret = svga_validate_sampler_resources(svga, SVGA_PIPE_GRAPHICS);
if (ret != PIPE_OK)
return ret;
- ret = validate_constant_buffers(svga);
+ ret = svga_validate_constant_buffers(svga, SVGA_PIPE_GRAPHICS);
if (ret != PIPE_OK)
return ret;
+ if (svga_have_gl43(svga)) {
+ ret = svga_validate_image_views(svga, SVGA_PIPE_GRAPHICS);
+ if (ret != PIPE_OK)
+ return ret;
+
+ ret = svga_validate_shader_buffers(svga, SVGA_PIPE_GRAPHICS);
+ if (ret != PIPE_OK)
+ return ret;
+
+ if (svga->rebind.flags.uav) {
+ ret= svga_rebind_uav(svga);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ }
+
ret = validate_vertex_buffers(hwtnl, so_vertex_count);
if (ret != PIPE_OK)
return ret;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_format.c b/lib/mesa/src/gallium/drivers/svga/svga_format.c
index 832c50e7c..6cfc92b6f 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_format.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_format.c
@@ -61,9 +61,9 @@ static const struct vgpu10_format_entry format_conversion_table[] =
[ PIPE_FORMAT_B8G8R8X8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8X8_UNORM, SVGA3D_B8G8R8X8_UNORM, TF_GEN_MIPS },
[ PIPE_FORMAT_B5G5R5A1_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B5G5R5A1_UNORM, SVGA3D_B5G5R5A1_UNORM, TF_GEN_MIPS },
[ PIPE_FORMAT_B5G6R5_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B5G6R5_UNORM, SVGA3D_B5G6R5_UNORM, TF_GEN_MIPS },
- [ PIPE_FORMAT_R10G10B10A2_UNORM ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R10G10B10A2_UNORM ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_L8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UNORM, TF_XXX1 },
- [ PIPE_FORMAT_A8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS | TF_000X},
+ [ PIPE_FORMAT_A8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS | TF_000X | TF_UAV },
[ PIPE_FORMAT_I8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UNORM, TF_XXXX },
[ PIPE_FORMAT_L8A8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8G8_UNORM, TF_XXXY },
[ PIPE_FORMAT_L16_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R16_UNORM, TF_XXX1 },
@@ -71,10 +71,10 @@ static const struct vgpu10_format_entry format_conversion_table[] =
[ PIPE_FORMAT_Z32_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, SVGA3D_D32_FLOAT, 0 },
[ PIPE_FORMAT_Z24_UNORM_S8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D24_UNORM_S8_UINT, 0 },
[ PIPE_FORMAT_Z24X8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D24_UNORM_S8_UINT, 0 },
- [ PIPE_FORMAT_R32_FLOAT ] = { SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS },
- [ PIPE_FORMAT_R32G32_FLOAT ] = { SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R32_FLOAT ] = { SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS | TF_UAV },
+ [ PIPE_FORMAT_R32G32_FLOAT ] = { SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_R32G32B32_FLOAT ] = { SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, TF_GEN_MIPS },
- [ PIPE_FORMAT_R32G32B32A32_FLOAT ] = { SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R32G32B32A32_FLOAT ] = { SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_R32_USCALED ] = { SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
[ PIPE_FORMAT_R32G32_USCALED ] = { SVGA3D_R32G32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
[ PIPE_FORMAT_R32G32B32_USCALED ] = { SVGA3D_R32G32B32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
@@ -83,45 +83,42 @@ static const struct vgpu10_format_entry format_conversion_table[] =
[ PIPE_FORMAT_R32G32_SSCALED ] = { SVGA3D_R32G32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
[ PIPE_FORMAT_R32G32B32_SSCALED ] = { SVGA3D_R32G32B32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
[ PIPE_FORMAT_R32G32B32A32_SSCALED ] = { SVGA3D_R32G32B32A32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
- [ PIPE_FORMAT_R16_UNORM ] = { SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, TF_GEN_MIPS },
- [ PIPE_FORMAT_R16G16_UNORM ] = { SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R16_UNORM ] = { SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, TF_GEN_MIPS | TF_UAV },
+ [ PIPE_FORMAT_R16G16_UNORM ] = { SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_R16G16B16_UNORM ] = { SVGA3D_R16G16B16A16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R16G16B16A16_UNORM ] = { SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R16G16B16A16_UNORM ] = { SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_R16_USCALED ] = { SVGA3D_R16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
[ PIPE_FORMAT_R16G16_USCALED ] = { SVGA3D_R16G16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
[ PIPE_FORMAT_R16G16B16_USCALED ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_U_TO_F_CAST },
[ PIPE_FORMAT_R16G16B16A16_USCALED ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
- [ PIPE_FORMAT_R16_SNORM ] = { SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, 0 },
- [ PIPE_FORMAT_R16G16_SNORM ] = { SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, 0 },
+ [ PIPE_FORMAT_R16_SNORM ] = { SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, TF_UAV },
+ [ PIPE_FORMAT_R16G16_SNORM ] = { SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, TF_UAV },
[ PIPE_FORMAT_R16G16B16_SNORM ] = { SVGA3D_R16G16B16A16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R16G16B16A16_SNORM ] = { SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, 0 },
+ [ PIPE_FORMAT_R16G16B16A16_SNORM ] = { SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, TF_UAV },
[ PIPE_FORMAT_R16_SSCALED ] = { SVGA3D_R16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
[ PIPE_FORMAT_R16G16_SSCALED ] = { SVGA3D_R16G16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
[ PIPE_FORMAT_R16G16B16_SSCALED ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_I_TO_F_CAST },
[ PIPE_FORMAT_R16G16B16A16_SSCALED ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
- [ PIPE_FORMAT_R8_UNORM ] = { SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS },
- [ PIPE_FORMAT_R8G8_UNORM ] = { SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R8_UNORM ] = { SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS | TF_UAV },
+ [ PIPE_FORMAT_R8G8_UNORM ] = { SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_R8G8B8_UNORM ] = { SVGA3D_R8G8B8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R8G8B8A8_UNORM ] = { SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R8G8B8A8_UNORM ] = { SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_R8_USCALED ] = { SVGA3D_R8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
[ PIPE_FORMAT_R8G8_USCALED ] = { SVGA3D_R8G8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
[ PIPE_FORMAT_R8G8B8_USCALED ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_U_TO_F_CAST },
[ PIPE_FORMAT_R8G8B8A8_USCALED ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
-
- [ PIPE_FORMAT_R8_SNORM ] = { SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, 0 },
- [ PIPE_FORMAT_R8G8_SNORM ] = { SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, 0 },
+ [ PIPE_FORMAT_R8_SNORM ] = { SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, TF_UAV },
+ [ PIPE_FORMAT_R8G8_SNORM ] = { SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, TF_UAV },
[ PIPE_FORMAT_R8G8B8_SNORM ] = { SVGA3D_R8G8B8A8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R8G8B8A8_SNORM ] = { SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, 0 },
-
+ [ PIPE_FORMAT_R8G8B8A8_SNORM ] = { SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, TF_UAV },
[ PIPE_FORMAT_R8_SSCALED ] = { SVGA3D_R8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
[ PIPE_FORMAT_R8G8_SSCALED ] = { SVGA3D_R8G8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
[ PIPE_FORMAT_R8G8B8_SSCALED ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_I_TO_F_CAST },
[ PIPE_FORMAT_R8G8B8A8_SSCALED ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
-
- [ PIPE_FORMAT_R16_FLOAT ] = { SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, TF_GEN_MIPS },
- [ PIPE_FORMAT_R16G16_FLOAT ] = { SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R16_FLOAT ] = { SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, TF_GEN_MIPS | TF_UAV },
+ [ PIPE_FORMAT_R16G16_FLOAT ] = { SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_R16G16B16_FLOAT ] = { SVGA3D_R16G16B16A16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R16G16B16A16_FLOAT ] = { SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R16G16B16A16_FLOAT ] = { SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_B8G8R8A8_SRGB ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3D_FORMAT_INVALID, TF_GEN_MIPS },
[ PIPE_FORMAT_B8G8R8X8_SRGB ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3D_FORMAT_INVALID, TF_GEN_MIPS },
[ PIPE_FORMAT_R8G8B8A8_SRGB ] = { SVGA3D_FORMAT_INVALID, SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3D_FORMAT_INVALID, TF_GEN_MIPS },
@@ -138,7 +135,7 @@ static const struct vgpu10_format_entry format_conversion_table[] =
[ PIPE_FORMAT_RGTC2_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC5_UNORM, SVGA3D_FORMAT_INVALID, 0 },
[ PIPE_FORMAT_RGTC2_SNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC5_SNORM, SVGA3D_FORMAT_INVALID, 0 },
[ PIPE_FORMAT_R10G10B10A2_USCALED ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_PUINT_TO_USCALED },
- [ PIPE_FORMAT_R11G11B10_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_R11G11B10_FLOAT, SVGA3D_R11G11B10_FLOAT, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R11G11B10_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_R11G11B10_FLOAT, SVGA3D_R11G11B10_FLOAT, TF_GEN_MIPS | TF_UAV },
[ PIPE_FORMAT_R9G9B9E5_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3D_FORMAT_INVALID, 0 },
[ PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3D_FORMAT_INVALID, 0 },
[ PIPE_FORMAT_B10G10R10A2_UNORM ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_BGRA },
@@ -158,30 +155,30 @@ static const struct vgpu10_format_entry format_conversion_table[] =
[ PIPE_FORMAT_B10G10R10A2_USCALED ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_USCALED },
[ PIPE_FORMAT_B10G10R10A2_SSCALED ] = { SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_SSCALED },
[ PIPE_FORMAT_B10G10R10A2_SNORM ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_SNORM },
- [ PIPE_FORMAT_R8_UINT ] = { SVGA3D_R8_UINT, SVGA3D_R8_UINT, SVGA3D_R8_UINT, 0 },
- [ PIPE_FORMAT_R8G8_UINT ] = { SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, 0 },
+ [ PIPE_FORMAT_R8_UINT ] = { SVGA3D_R8_UINT, SVGA3D_R8_UINT, SVGA3D_R8_UINT, TF_UAV },
+ [ PIPE_FORMAT_R8G8_UINT ] = { SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, TF_UAV },
[ PIPE_FORMAT_R8G8B8_UINT ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R8G8B8A8_UINT ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, 0 },
- [ PIPE_FORMAT_R8_SINT ] = { SVGA3D_R8_SINT, SVGA3D_R8_SINT, SVGA3D_R8_SINT, 0 },
- [ PIPE_FORMAT_R8G8_SINT ] = { SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, 0 },
+ [ PIPE_FORMAT_R8G8B8A8_UINT ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, TF_UAV },
+ [ PIPE_FORMAT_R8_SINT ] = { SVGA3D_R8_SINT, SVGA3D_R8_SINT, SVGA3D_R8_SINT, TF_UAV },
+ [ PIPE_FORMAT_R8G8_SINT ] = { SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, TF_UAV },
[ PIPE_FORMAT_R8G8B8_SINT ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R8G8B8A8_SINT ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, 0 },
- [ PIPE_FORMAT_R16_UINT ] = { SVGA3D_R16_UINT, SVGA3D_R16_UINT, SVGA3D_R16_UINT, 0 },
- [ PIPE_FORMAT_R16G16_UINT ] = { SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, 0 },
+ [ PIPE_FORMAT_R8G8B8A8_SINT ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, TF_UAV },
+ [ PIPE_FORMAT_R16_UINT ] = { SVGA3D_R16_UINT, SVGA3D_R16_UINT, SVGA3D_R16_UINT, TF_UAV },
+ [ PIPE_FORMAT_R16G16_UINT ] = { SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, TF_UAV },
[ PIPE_FORMAT_R16G16B16_UINT ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R16G16B16A16_UINT ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, 0 },
- [ PIPE_FORMAT_R16_SINT ] = { SVGA3D_R16_SINT, SVGA3D_R16_SINT, SVGA3D_R16_SINT, 0 },
- [ PIPE_FORMAT_R16G16_SINT ] = { SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, 0 },
+ [ PIPE_FORMAT_R16G16B16A16_UINT ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, TF_UAV },
+ [ PIPE_FORMAT_R16_SINT ] = { SVGA3D_R16_SINT, SVGA3D_R16_SINT, SVGA3D_R16_SINT, TF_UAV },
+ [ PIPE_FORMAT_R16G16_SINT ] = { SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, TF_UAV },
[ PIPE_FORMAT_R16G16B16_SINT ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
- [ PIPE_FORMAT_R16G16B16A16_SINT ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, 0 },
- [ PIPE_FORMAT_R32_UINT ] = { SVGA3D_R32_UINT, SVGA3D_R32_UINT, SVGA3D_R32_UINT, 0 },
- [ PIPE_FORMAT_R32G32_UINT ] = { SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, 0 },
+ [ PIPE_FORMAT_R16G16B16A16_SINT ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, TF_UAV },
+ [ PIPE_FORMAT_R32_UINT ] = { SVGA3D_R32_UINT, SVGA3D_R32_UINT, SVGA3D_R32_UINT, TF_UAV },
+ [ PIPE_FORMAT_R32G32_UINT ] = { SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, TF_UAV },
[ PIPE_FORMAT_R32G32B32_UINT ] = { SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, 0 },
- [ PIPE_FORMAT_R32G32B32A32_UINT ] = { SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, 0 },
- [ PIPE_FORMAT_R32_SINT ] = { SVGA3D_R32_SINT, SVGA3D_R32_SINT, SVGA3D_R32_SINT, 0 },
- [ PIPE_FORMAT_R32G32_SINT ] = { SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, 0 },
+ [ PIPE_FORMAT_R32G32B32A32_UINT ] = { SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, TF_UAV },
+ [ PIPE_FORMAT_R32_SINT ] = { SVGA3D_R32_SINT, SVGA3D_R32_SINT, SVGA3D_R32_SINT, TF_UAV },
+ [ PIPE_FORMAT_R32G32_SINT ] = { SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, TF_UAV },
[ PIPE_FORMAT_R32G32B32_SINT ] = { SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, 0 },
- [ PIPE_FORMAT_R32G32B32A32_SINT ] = { SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, 0 },
+ [ PIPE_FORMAT_R32G32B32A32_SINT ] = { SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, TF_UAV },
[ PIPE_FORMAT_A8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_000X },
[ PIPE_FORMAT_I8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_XXXX },
[ PIPE_FORMAT_L8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_XXX1 },
@@ -206,7 +203,13 @@ static const struct vgpu10_format_entry format_conversion_table[] =
[ PIPE_FORMAT_I32_SINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R32_SINT, TF_XXXX },
[ PIPE_FORMAT_L32_SINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R32_SINT, TF_XXX1 },
[ PIPE_FORMAT_L32A32_SINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R32G32_SINT, TF_XXXY },
- [ PIPE_FORMAT_R10G10B10A2_UINT ] = { SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, 0 },
+ [ PIPE_FORMAT_R10G10B10A2_UINT ] = { SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, TF_UAV },
+ [ PIPE_FORMAT_BPTC_RGBA_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC7_UNORM, SVGA3D_FORMAT_INVALID, TF_SM5 },
+ [ PIPE_FORMAT_BPTC_SRGBA ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC7_UNORM_SRGB, SVGA3D_FORMAT_INVALID, TF_SM5 },
+ [ PIPE_FORMAT_BPTC_RGB_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC6H_SF16, SVGA3D_FORMAT_INVALID, TF_SM5 },
+ [ PIPE_FORMAT_BPTC_RGB_UFLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC6H_UF16, SVGA3D_FORMAT_INVALID, TF_SM5 },
+ [ PIPE_FORMAT_X24S8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_X24_G8_UINT, SVGA3D_FORMAT_INVALID, 0 },
+ [ PIPE_FORMAT_X32_S8X24_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_X32_G8X24_UINT, SVGA3D_FORMAT_INVALID, 0 },
/* Must specify following entry to give the sense of size of format_conversion_table[] */
[ PIPE_FORMAT_COUNT ] = {SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
};
@@ -298,8 +301,18 @@ svga_translate_format(const struct svga_screen *ss,
else if (bind & PIPE_BIND_SCANOUT) {
return svga_translate_screen_target_format_vgpu10(format);
}
+ else if (bind & PIPE_BIND_SHADER_IMAGE) {
+ if (format_conversion_table[format].flags & TF_UAV)
+ return format_conversion_table[format].pixel_format;
+ else
+ return SVGA3D_FORMAT_INVALID;
+ }
else {
- return entry->pixel_format;
+ if ((format_conversion_table[format].flags & TF_SM5) &&
+ !ss->sws->have_sm5)
+ return SVGA3D_FORMAT_INVALID;
+ else
+ return entry->pixel_format;
}
}
@@ -784,10 +797,8 @@ static const struct format_cap format_cap_table[] = {
0, 0, 0, 0
},
{
- "SVGA3D_AYUV",
- SVGA3D_AYUV,
- SVGA3D_DEVCAP_SURFACEFMT_AYUV,
- 0, 0, 0, 0
+ "SVGA3D_FORMAT_DEAD2",
+ SVGA3D_FORMAT_DEAD2, 0, 0, 0, 0, 0
},
{
"SVGA3D_R32G32B32A32_TYPELESS",
@@ -1369,6 +1380,59 @@ static const struct format_cap format_cap_table[] = {
SVGA3D_BC5_UNORM,
SVGA3D_DEVCAP_DXFMT_BC5_UNORM,
4, 4, 16, 0
+ },
+ {
+ "SVGA3D_B4G4R4A4_UNORM",
+ SVGA3D_B4G4R4A4_UNORM,
+ 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC6H_TYPELESS",
+ SVGA3D_BC6H_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC6H_TYPELESS,
+ 4, 4, 16, 0
+ },
+ {
+ "SVGA3D_BC6H_UF16",
+ SVGA3D_BC6H_UF16,
+ SVGA3D_DEVCAP_DXFMT_BC6H_UF16,
+ 4, 4, 16, 0
+ },
+ {
+ "SVGA3D_BC6H_SF16",
+ SVGA3D_BC6H_SF16,
+ SVGA3D_DEVCAP_DXFMT_BC6H_SF16,
+ 4, 4, 16, 0
+ },
+ {
+ "SVGA3D_BC7_TYPELESS",
+ SVGA3D_BC7_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC7_TYPELESS,
+ 4, 4, 16, 0
+ },
+ {
+ "SVGA3D_BC7_UNORM",
+ SVGA3D_BC7_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC6H_TYPELESS,
+ 4, 4, 16, 0
+ },
+ {
+ "SVGA3D_BC7_UNORM_SRGB",
+ SVGA3D_BC7_UNORM_SRGB,
+ SVGA3D_DEVCAP_DXFMT_BC6H_TYPELESS,
+ 4, 4, 16, 0
+ },
+ {
+ "SVGA3D_AYUV",
+ SVGA3D_AYUV,
+ 0,
+ 1, 1, 4, 0
+ },
+ {
+ "SVGA3D_R11G11B10_TYPELESS",
+ SVGA3D_R11G11B10_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT,
+ 1, 1, 4, 0
}
};
@@ -1470,7 +1534,6 @@ svga_devcap_name(SVGA3dDevCapIndex cap)
DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_UYVY),
DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_YUY2),
DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_NV12),
- DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_AYUV),
DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS),
DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT),
DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT),
@@ -1662,7 +1725,7 @@ svga_get_dx_format_cap(struct svga_screen *ss,
if (entry->devcap) {
sws->get_cap(sws, entry->devcap, caps);
- /* pre-SM41 capabable svga device supports SHADER_SAMPLE capability for
+ /* pre-SM41 capable svga device supports SHADER_SAMPLE capability for
* these formats but does not advertise the devcap.
* So enable this bit here.
*/
@@ -1672,6 +1735,9 @@ svga_get_dx_format_cap(struct svga_screen *ss,
caps->u |= SVGA3D_DXFMT_SHADER_SAMPLE;
}
}
+ else {
+ caps->u = entry->defaultOperations;
+ }
if (0) {
debug_printf("Format %s, devcap %s = 0x%x (%s)\n",
@@ -1853,20 +1919,24 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
case SVGA3D_R32G32B32A32_UINT:
case SVGA3D_R32G32B32A32_SINT:
case SVGA3D_R32G32B32A32_FLOAT:
+ case SVGA3D_R32G32B32A32_TYPELESS:
return SVGA3D_R32G32B32A32_TYPELESS;
case SVGA3D_R32G32B32_FLOAT:
case SVGA3D_R32G32B32_UINT:
case SVGA3D_R32G32B32_SINT:
+ case SVGA3D_R32G32B32_TYPELESS:
return SVGA3D_R32G32B32_TYPELESS;
case SVGA3D_R16G16B16A16_UINT:
case SVGA3D_R16G16B16A16_UNORM:
case SVGA3D_R16G16B16A16_SNORM:
case SVGA3D_R16G16B16A16_SINT:
case SVGA3D_R16G16B16A16_FLOAT:
+ case SVGA3D_R16G16B16A16_TYPELESS:
return SVGA3D_R16G16B16A16_TYPELESS;
case SVGA3D_R32G32_UINT:
case SVGA3D_R32G32_SINT:
case SVGA3D_R32G32_FLOAT:
+ case SVGA3D_R32G32_TYPELESS:
return SVGA3D_R32G32_TYPELESS;
case SVGA3D_D32_FLOAT_S8X24_UINT:
case SVGA3D_X32_G8X24_UINT:
@@ -1874,6 +1944,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
return SVGA3D_R32G8X24_TYPELESS;
case SVGA3D_R10G10B10A2_UINT:
case SVGA3D_R10G10B10A2_UNORM:
+ case SVGA3D_R10G10B10A2_TYPELESS:
return SVGA3D_R10G10B10A2_TYPELESS;
case SVGA3D_R8G8B8A8_UNORM:
case SVGA3D_R8G8B8A8_SNORM:
@@ -1887,6 +1958,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
case SVGA3D_R16G16_UNORM:
case SVGA3D_R16G16_SNORM:
case SVGA3D_R16G16_FLOAT:
+ case SVGA3D_R16G16_TYPELESS:
return SVGA3D_R16G16_TYPELESS;
case SVGA3D_D32_FLOAT:
case SVGA3D_R32_FLOAT:
@@ -1903,6 +1975,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
case SVGA3D_R8G8_SNORM:
case SVGA3D_R8G8_UINT:
case SVGA3D_R8G8_SINT:
+ case SVGA3D_R8G8_TYPELESS:
return SVGA3D_R8G8_TYPELESS;
case SVGA3D_D16_UNORM:
case SVGA3D_R16_UNORM:
@@ -1916,6 +1989,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
case SVGA3D_R8_UINT:
case SVGA3D_R8_SNORM:
case SVGA3D_R8_SINT:
+ case SVGA3D_R8_TYPELESS:
return SVGA3D_R8_TYPELESS;
case SVGA3D_B8G8R8A8_UNORM_SRGB:
case SVGA3D_B8G8R8A8_UNORM:
@@ -1939,16 +2013,28 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
return SVGA3D_BC3_TYPELESS;
case SVGA3D_BC4_UNORM:
case SVGA3D_BC4_SNORM:
+ case SVGA3D_BC4_TYPELESS:
return SVGA3D_BC4_TYPELESS;
case SVGA3D_BC5_UNORM:
case SVGA3D_BC5_SNORM:
+ case SVGA3D_BC5_TYPELESS:
return SVGA3D_BC5_TYPELESS;
+ case SVGA3D_BC6H_UF16:
+ case SVGA3D_BC6H_SF16:
+ case SVGA3D_BC6H_TYPELESS:
+ return SVGA3D_BC6H_TYPELESS;
+ case SVGA3D_BC7_UNORM:
+ case SVGA3D_BC7_UNORM_SRGB:
+ case SVGA3D_BC7_TYPELESS:
+ return SVGA3D_BC7_TYPELESS;
+ case SVGA3D_R11G11B10_FLOAT:
+ case SVGA3D_R11G11B10_TYPELESS:
+ return SVGA3D_R11G11B10_TYPELESS;
/* Special cases (no corresponding _TYPELESS formats) */
case SVGA3D_A8_UNORM:
case SVGA3D_B5G5R5A1_UNORM:
case SVGA3D_B5G6R5_UNORM:
- case SVGA3D_R11G11B10_FLOAT:
case SVGA3D_R9G9B9E5_SHAREDEXP:
return format;
default:
@@ -2024,6 +2110,8 @@ svga_format_is_typeless(SVGA3dSurfaceFormat format)
case SVGA3D_BC3_TYPELESS:
case SVGA3D_BC4_TYPELESS:
case SVGA3D_BC5_TYPELESS:
+ case SVGA3D_BC6H_TYPELESS:
+ case SVGA3D_BC7_TYPELESS:
case SVGA3D_B8G8R8A8_TYPELESS:
case SVGA3D_B8G8R8X8_TYPELESS:
return true;
@@ -2228,10 +2316,16 @@ svga_is_dx_format_supported(struct pipe_screen *screen,
assert(bindings);
assert(ss->sws->have_vgpu10);
- if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
- return false;
+ /* To support framebuffer without attachments */
+ if ((format == PIPE_FORMAT_NONE) && (bindings == PIPE_BIND_RENDER_TARGET))
+ return (ss->sws->have_gl43 && (sample_count <= ss->forcedSampleCount));
if (sample_count > 1) {
+
+ /* No MSAA support for shader image */
+ if (bindings & PIPE_BIND_SHADER_IMAGE)
+ return false;
+
/* In ms_samples, if bit N is set it means that we support
* multisample with N+1 samples per pixel.
*/
@@ -2246,7 +2340,6 @@ svga_is_dx_format_supported(struct pipe_screen *screen,
*/
if (bindings & PIPE_BIND_VERTEX_BUFFER) {
- SVGA3dSurfaceFormat svga_format;
unsigned flags;
svga_translate_vertex_format_vgpu10(format, &svga_format, &flags);
return svga_format != SVGA3D_FORMAT_INVALID;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_format.h b/lib/mesa/src/gallium/drivers/svga/svga_format.h
index a2ef47963..dae9a556f 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_format.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_format.h
@@ -60,6 +60,8 @@ struct svga_screen;
#define TF_XXXX (1 << 10) /* swizzle <X, X, X, X> */
#define TF_XXX1 (1 << 11) /* swizzle <X, X, X, 1> */
#define TF_XXXY (1 << 12) /* swizzle <X, X, X, Y> */
+#define TF_UAV (1 << 13) /* supports uav */
+#define TF_SM5 (1 << 14) /* supported in SM5 */
void
svga_translate_vertex_format_vgpu10(enum pipe_format format,
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c
index 9cb53ef0a..d5be5697c 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -103,8 +103,8 @@ intra_surface_copy(struct svga_context *svga, struct pipe_resource *tex,
SVGA_RETRY(svga, SVGA3D_vgpu10_IntraSurfaceCopy(svga->swc, stex->handle,
level, layer_face, &box));
- /* Mark the texture subresource as rendered-to. */
- svga_set_texture_rendered_to(stex, layer_face, level);
+ /* Mark the texture surface as RENDERED. */
+ svga_set_texture_rendered_to(stex);
}
/**
@@ -139,8 +139,8 @@ copy_region_vgpu10(struct svga_context *svga, struct pipe_resource *src_tex,
/* Mark the texture subresource as defined. */
svga_define_texture_level(dtex, dst_layer_face, dst_level);
- /* Mark the texture subresource as rendered-to. */
- svga_set_texture_rendered_to(dtex, dst_layer_face, dst_level);
+ /* Mark the texture surface as RENDERED. */
+ svga_set_texture_rendered_to(dtex);
}
@@ -322,7 +322,7 @@ can_blit_via_svga_copy_region(struct svga_context *svga,
local_blit.dst.format = local_blit.src.format;
if (local_blit.filter == PIPE_TEX_FILTER_LINEAR)
local_blit.filter = PIPE_TEX_FILTER_NEAREST;
- if (!util_can_blit_via_copy_region(&local_blit, TRUE))
+ if (!util_can_blit_via_copy_region(&local_blit, TRUE, svga->render_condition))
return false;
/* For depth+stencil formats, copy with mask != PIPE_MASK_ZS is not
@@ -504,7 +504,8 @@ try_copy_region(struct svga_context *svga,
blit->src.box.depth);
svga_define_texture_level(dtex, dst_layer_face, blit->dst.level);
- svga_set_texture_rendered_to(dtex, dst_layer_face, blit->dst.level);
+ svga_set_texture_rendered_to(dtex);
+
return true;
}
@@ -634,7 +635,7 @@ try_blit(struct svga_context *svga, const struct pipe_blit_info *blit_info)
util_blitter_save_depth_stencil_alpha(svga->blitter,
(void*)svga->curr.depth);
util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
- util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
+ util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask, 0);
util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer);
util_blitter_save_fragment_sampler_states(svga->blitter,
svga->curr.num_samplers[PIPE_SHADER_FRAGMENT],
@@ -752,8 +753,8 @@ static bool
try_cpu_copy_region(struct svga_context *svga,
const struct pipe_blit_info *blit)
{
- if (util_can_blit_via_copy_region(blit, TRUE) ||
- util_can_blit_via_copy_region(blit, FALSE)) {
+ if (util_can_blit_via_copy_region(blit, TRUE, svga->render_condition) ||
+ util_can_blit_via_copy_region(blit, FALSE, svga->render_condition)) {
if (svga->render_condition && blit->render_condition_enable) {
debug_warning("CPU copy_region doesn't support "
@@ -772,6 +773,66 @@ try_cpu_copy_region(struct svga_context *svga,
return false;
}
+/**
+ * A helper function to resolve a multisampled surface to a single-sampled
+ * surface using SVGA command ResolveCopy.
+ */
+static boolean
+try_resolve_copy(struct svga_context *svga,
+ const struct pipe_blit_info *blit)
+{
+ enum pipe_error ret;
+ struct svga_texture *src_tex = svga_texture(blit->src.resource);
+ struct svga_texture *dst_tex = svga_texture(blit->dst.resource);
+
+ /* check if formats are compatible for resolve copy */
+ if (!formats_compatible(svga_screen(svga->pipe.screen),
+ src_tex->key.format, dst_tex->key.format))
+ return FALSE;
+
+ /* check if the copy dimensions are the same */
+ if ((blit->src.box.x || blit->src.box.y || blit->src.box.z) ||
+ (blit->dst.box.x || blit->dst.box.y || blit->dst.box.z) ||
+ (blit->src.box.width != blit->dst.box.width) ||
+ (blit->src.box.height != blit->dst.box.height) ||
+ (blit->src.box.depth != blit->dst.box.depth))
+ return FALSE;
+
+ ret = SVGA3D_vgpu10_ResolveCopy(svga->swc, 0, dst_tex->handle,
+ 0, src_tex->handle, dst_tex->key.format);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_ResolveCopy(svga->swc, 0, dst_tex->handle,
+ 0, src_tex->handle, dst_tex->key.format);
+ }
+
+ /* Mark surface state as RENDERED */
+ dst_tex->surface_state = SVGA_SURFACE_STATE_RENDERED;
+
+ return (ret == PIPE_OK);
+}
+
+
+/**
+ * Returns FALSE if the resource does not have data to copy.
+ */
+static boolean
+is_texture_valid_to_copy(struct svga_context *svga,
+ struct pipe_resource *resource)
+{
+ if (resource->target == PIPE_BUFFER) {
+ struct svga_buffer *buf = svga_buffer(resource);
+ struct svga_buffer_surface *bufsurf = buf->bufsurf;
+
+ return (bufsurf &&
+ bufsurf->surface_state >= SVGA_SURFACE_STATE_UPDATED);
+ } else {
+ struct svga_texture *tex = svga_texture(resource);
+ return ((tex->surface_state >= SVGA_SURFACE_STATE_UPDATED) ||
+ (resource->bind & PIPE_BIND_SHARED));
+ }
+}
+
/**
* The pipe::blit member.
@@ -794,6 +855,20 @@ svga_blit(struct pipe_context *pipe,
SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_BLIT);
+ if (!is_texture_valid_to_copy(svga, blit->src.resource)) {
+ debug_printf("%s: texture is not defined to copy\n",
+ __FUNCTION__);
+ goto done;
+ }
+
+ if (svga_have_sm4_1(svga) &&
+ blit->src.resource->nr_samples > 1 &&
+ blit->dst.resource->nr_samples <=1 &&
+ (blit->dst.resource->bind & PIPE_BIND_DISPLAY_TARGET)) {
+ if (try_resolve_copy(svga, blit))
+ goto done;
+ }
+
if (try_copy_region(svga, blit))
goto done;
@@ -826,6 +901,12 @@ svga_resource_copy_region(struct pipe_context *pipe,
SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_COPYREGION);
+ if (!is_texture_valid_to_copy(svga, src_tex)) {
+ debug_printf("%s: texture is not defined to copy\n",
+ __FUNCTION__);
+ goto done;
+ }
+
if (dst_tex->target == PIPE_BUFFER && src_tex->target == PIPE_BUFFER) {
/* can't copy within the same buffer, unfortunately */
if (svga_have_vgpu10(svga) && src_tex != dst_tex) {
@@ -841,6 +922,10 @@ svga_resource_copy_region(struct pipe_context *pipe,
dst_surf, src_box->x, dstx,
src_box->width));
dbuffer->dirty = TRUE;
+
+ /* Mark the buffer surface as RENDERED */
+ assert(dbuffer->bufsurf);
+ dbuffer->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED;
}
else {
/* use map/memcpy fallback */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c
index 82b102081..1e58549f4 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -57,7 +57,7 @@ begin_blit(struct svga_context *svga)
util_blitter_save_depth_stencil_alpha(svga->blitter,
(void*)svga->curr.depth);
util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
- util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
+ util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask, 0);
}
@@ -174,14 +174,29 @@ try_clear(struct svga_context *svga,
if (svga_have_vgpu10(svga)) {
if (flags & SVGA3D_CLEAR_COLOR) {
unsigned i;
+ bool int_target = is_integer_target(fb, buffers);
- if (is_integer_target(fb, buffers) && !ints_fit_in_floats(color)) {
+ if (int_target && !ints_fit_in_floats(color)) {
clear_buffers_with_quad(svga, buffers, color, depth, stencil);
/* We also cleared depth/stencil, so that's done */
flags &= ~(SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL);
}
else {
struct pipe_surface *rtv;
+ float rgba[4];
+
+ if (int_target) {
+ rgba[0] = (float) color->i[0];
+ rgba[1] = (float) color->i[1];
+ rgba[2] = (float) color->i[2];
+ rgba[3] = (float) color->i[3];
+ }
+ else {
+ rgba[0] = color->f[0];
+ rgba[1] = color->f[1];
+ rgba[2] = color->f[2];
+ rgba[3] = color->f[3];
+ }
/* Issue VGPU10 Clear commands */
for (i = 0; i < fb->nr_cbufs; i++) {
@@ -194,8 +209,7 @@ try_clear(struct svga_context *svga,
if (!rtv)
return PIPE_ERROR_OUT_OF_MEMORY;
- ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc,
- rtv, color->f);
+ ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv, rgba);
if (ret != PIPE_OK)
return ret;
}
@@ -325,7 +339,7 @@ svga_clear_texture(struct pipe_context *pipe,
if (box->x == 0 && box->y == 0 && box->width == surface->width &&
box->height == surface->height) {
/* clearing whole surface, use direct VGPU10 command */
-
+ assert(svga_surface(dsv)->view_id != SVGA3D_INVALID_ID);
SVGA_RETRY(svga, SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv,
clear_flags,
@@ -367,16 +381,32 @@ svga_clear_texture(struct pipe_context *pipe,
if (box->x == 0 && box->y == 0 && box->width == surface->width &&
box->height == surface->height) {
struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+ bool int_target = is_integer_target(curr, PIPE_CLEAR_COLOR);
- if (is_integer_target(curr, PIPE_CLEAR_COLOR) &&
- !ints_fit_in_floats(&color)) {
+ if (int_target && !ints_fit_in_floats(&color)) {
/* To clear full texture with integer format */
clear_buffers_with_quad(svga, PIPE_CLEAR_COLOR, &color, 0.0, 0);
}
else {
+ float rgba[4];
+
+ if (int_target) {
+ rgba[0] = (float) color.i[0];
+ rgba[1] = (float) color.i[1];
+ rgba[2] = (float) color.i[2];
+ rgba[3] = (float) color.i[3];
+ }
+ else {
+ rgba[0] = color.f[0];
+ rgba[1] = color.f[1];
+ rgba[2] = color.f[2];
+ rgba[3] = color.f[3];
+ }
+
/* clearing whole surface using VGPU10 command */
+ assert(svga_surface(rtv)->view_id != SVGA3D_INVALID_ID);
SVGA_RETRY(svga, SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv,
- color.f));
+ rgba));
}
}
else {
@@ -446,6 +476,7 @@ svga_try_clear_render_target(struct svga_context *svga,
if (!rtv)
return PIPE_ERROR_OUT_OF_MEMORY;
+ assert(svga_surface(rtv)->view_id != SVGA3D_INVALID_ID);
return SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv, color->f);
}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c
index feeacd2f2..4d7299da6 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -54,6 +54,7 @@ svga_set_constant_buffer(struct pipe_context *pipe,
if (cb) {
buffer_size = cb->buffer_size;
+
if (cb->user_buffer) {
buf = svga_user_buffer_create(pipe->screen,
(void *) cb->user_buffer,
@@ -94,6 +95,8 @@ svga_set_constant_buffer(struct pipe_context *pipe,
svga->dirty |= SVGA_NEW_TCS_CONSTS;
else if (shader == PIPE_SHADER_TESS_EVAL)
svga->dirty |= SVGA_NEW_TES_CONSTS;
+ else if (shader == PIPE_SHADER_COMPUTE)
+ svga->dirty |= SVGA_NEW_CS_CONSTS;
} else {
if (shader == PIPE_SHADER_FRAGMENT)
svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
@@ -105,9 +108,14 @@ svga_set_constant_buffer(struct pipe_context *pipe,
svga->dirty |= SVGA_NEW_TCS_CONST_BUFFER;
else if (shader == PIPE_SHADER_TESS_EVAL)
svga->dirty |= SVGA_NEW_TES_CONST_BUFFER;
+ else if (shader == PIPE_SHADER_COMPUTE)
+ svga->dirty |= SVGA_NEW_CS_CONST_BUFFER;
/* update bitmask of dirty const buffers */
svga->state.dirty_constbufs[shader] |= (1 << index);
+
+ /* purge any stale rawbuf srv */
+ svga_destroy_rawbuf_srv(svga);
}
if (cb && cb->user_buffer) {
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c
index 745fdad64..ffdd3df05 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -318,7 +318,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
if (!svga_update_state_retry(svga, SVGA_STATE_HW_DRAW)) {
static const char *msg = "State update failed, skipping draw call";
debug_printf("%s\n", msg);
- pipe_debug_message(&svga->debug.callback, INFO, "%s", msg);
+ util_debug_message(&svga->debug.callback, INFO, "%s", msg);
goto done;
}
svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c
index 7e809d0cd..df1284c6a 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -50,7 +50,7 @@ static void svga_flush( struct pipe_context *pipe,
svga_context_flush(svga, fence);
SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n",
- __FUNCTION__, fence ? *fence : 0x0);
+ __FUNCTION__, fence ? *fence : NULL);
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c
index 61b4897c5..f4ed782cd 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -225,7 +225,7 @@ svga_set_viewport_states(struct pipe_context *pipe,
*/
static void
svga_set_debug_callback(struct pipe_context *pipe,
- const struct pipe_debug_callback *cb)
+ const struct util_debug_callback *cb)
{
struct svga_context *svga = svga_context(pipe);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index 1b823d64e..061cd5520 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -101,9 +101,10 @@ translate_cull_mode(unsigned cull)
}
-static void
-define_rasterizer_object(struct svga_context *svga,
- struct svga_rasterizer_state *rast)
+int
+svga_define_rasterizer_object(struct svga_context *svga,
+ struct svga_rasterizer_state *rast,
+ unsigned samples)
{
struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
unsigned fill_mode = translate_fill_mode(rast->templ.fill_front);
@@ -120,8 +121,10 @@ define_rasterizer_object(struct svga_context *svga,
rast->templ.line_stipple_pattern : 0;
const uint8 pv_last = !rast->templ.flatshade_first &&
svgascreen->haveProvokingVertex;
+ int rastId;
+ enum pipe_error ret;
- rast->id = util_bitmask_add(svga->rast_object_id_bm);
+ rastId = util_bitmask_add(svga->rast_object_id_bm);
if (rast->templ.fill_front != rast->templ.fill_back) {
/* The VGPU10 device can't handle different front/back fill modes.
@@ -131,24 +134,53 @@ define_rasterizer_object(struct svga_context *svga,
fill_mode = SVGA3D_FILLMODE_FILL;
}
- SVGA_RETRY(svga, SVGA3D_vgpu10_DefineRasterizerState
- (svga->swc,
- rast->id,
- fill_mode,
- cull_mode,
- rast->templ.front_ccw,
- depth_bias,
- depth_bias_clamp,
- slope_scaled_depth_bias,
- rast->templ.depth_clip_near,
- rast->templ.scissor,
- rast->templ.multisample,
- rast->templ.line_smooth,
- line_width,
- rast->templ.line_stipple_enable,
- line_factor,
- line_pattern,
- pv_last));
+ if (samples > 1 && svga_have_gl43(svga) &&
+ svgascreen->sws->have_rasterizer_state_v2_cmd) {
+
+ ret = SVGA3D_sm5_DefineRasterizerState_v2(svga->swc,
+ rastId,
+ fill_mode,
+ cull_mode,
+ rast->templ.front_ccw,
+ depth_bias,
+ depth_bias_clamp,
+ slope_scaled_depth_bias,
+ rast->templ.depth_clip_near,
+ rast->templ.scissor,
+ rast->templ.multisample,
+ rast->templ.line_smooth,
+ line_width,
+ rast->templ.line_stipple_enable,
+ line_factor,
+ line_pattern,
+ pv_last,
+ samples);
+ } else {
+ ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+ rastId,
+ fill_mode,
+ cull_mode,
+ rast->templ.front_ccw,
+ depth_bias,
+ depth_bias_clamp,
+ slope_scaled_depth_bias,
+ rast->templ.depth_clip_near,
+ rast->templ.scissor,
+ rast->templ.multisample,
+ rast->templ.line_smooth,
+ line_width,
+ rast->templ.line_stipple_enable,
+ line_factor,
+ line_pattern,
+ pv_last);
+ }
+
+ if (ret != PIPE_OK) {
+ util_bitmask_clear(svga->rast_object_id_bm, rastId);
+ return SVGA3D_INVALID_ID;
+ }
+
+ return rastId;
}
@@ -180,7 +212,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
* though. Our smooth point implementation involves drawing a square,
* computing fragment distance from point center, then attenuating
* the fragment alpha value. We should not attenuate alpha if msaa
- * is enabled. We should kill fragments entirely outside the circle
+ * is enabled. We should discard fragments entirely outside the circle
* and let the GPU compute per-fragment coverage.
* But as-is, our implementation gives acceptable results and passes
* Piglit's MSAA point smooth test.
@@ -191,7 +223,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
if (rast->templ.point_smooth &&
rast->templ.point_size_per_vertex == 0 &&
rast->templ.point_size <= screen->pointSmoothThreshold) {
- /* If the point size is less than the threshold, disable smoothing.
+ /* If the point size is less than the threshold, deactivate smoothing.
* Note that this only effects point rendering when we use the
* pipe_rasterizer_state::point_size value, not when the point size
* is set in the VS.
@@ -359,11 +391,27 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
}
if (svga_have_vgpu10(svga)) {
- define_rasterizer_object(svga, rast);
+ rast->id = svga_define_rasterizer_object(svga, rast, 0);
+ if (rast->id == SVGA3D_INVALID_ID) {
+ svga_context_flush(svga, NULL);
+ rast->id = svga_define_rasterizer_object(svga, rast, 0);
+ assert(rast->id != SVGA3D_INVALID_ID);
+ }
+ }
+
+ if (svga_have_gl43(svga)) {
+ /* initialize the alternate rasterizer state ids.
+ * For 0 and 1 sample count, we can use the same rasterizer object.
+ */
+ rast->altRastIds[0] = rast->altRastIds[1] = rast->id;
+
+ for (unsigned i = 2; i < ARRAY_SIZE(rast->altRastIds); i++) {
+ rast->altRastIds[i] = SVGA3D_INVALID_ID;
+ }
}
if (templ->poly_smooth) {
- pipe_debug_message(&svga->debug.callback, CONFORMANCE,
+ util_debug_message(&svga->debug.callback, CONFORMANCE,
"GL_POLYGON_SMOOTH not supported");
}
@@ -408,6 +456,10 @@ svga_delete_rasterizer_state(struct pipe_context *pipe, void *state)
struct svga_rasterizer_state *raster =
(struct svga_rasterizer_state *) state;
+ /* free any alternate rasterizer state used for point sprite */
+ if (raster->no_cull_rasterizer)
+ svga_delete_rasterizer_state(pipe, (void *)(raster->no_cull_rasterizer));
+
if (svga_have_vgpu10(svga)) {
SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyRasterizerState(svga->swc,
raster->id));
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c
index ad1040c9d..3e0e26c76 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -341,7 +341,7 @@ svga_delete_sampler_state(struct pipe_context *pipe, void *sampler)
if (svga_have_vgpu10(svga)) {
unsigned i;
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < ARRAY_SIZE(ss->id); i++) {
if (ss->id[i] != SVGA3D_INVALID_ID) {
svga_hwtnl_flush_retry(svga);
@@ -537,7 +537,7 @@ svga_cleanup_sampler_state(struct svga_context *svga)
{
enum pipe_shader_type shader;
- for (shader = 0; shader <= PIPE_SHADER_TESS_EVAL; shader++) {
+ for (shader = 0; shader <= PIPE_SHADER_COMPUTE; shader++) {
unsigned i;
for (i = 0; i < svga->state.hw_draw.num_sampler_views[shader]; i++) {
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c
index a74825496..4e3280457 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c
@@ -141,7 +141,7 @@ svga_create_stream_output(struct svga_context *svga,
unsigned i;
enum pipe_error ret;
unsigned id;
- ASSERTED unsigned maxDecls;
+ ASSERTED unsigned maxDecls = 0;
assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
@@ -450,6 +450,7 @@ svga_set_stream_output_targets(struct pipe_context *pipe,
for (i = 0; i < num_targets; i++) {
struct svga_stream_output_target *sot
= svga_stream_output_target(targets[i]);
+ struct svga_buffer *sbuf = svga_buffer(sot->base.buffer);
unsigned size;
svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer,
@@ -458,6 +459,10 @@ svga_set_stream_output_targets(struct pipe_context *pipe,
assert(svga_buffer(sot->base.buffer)->key.flags
& SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
+ /* Mark the buffer surface as RENDERED */
+ assert(sbuf->bufsurf);
+ sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED;
+
svga->so_targets[i] = &sot->base;
if (offsets[i] == -1) {
soBindings[i].offset = -1;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c
index aae91e4f4..6fa3af526 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -79,6 +79,26 @@ svga_buffer_needs_hw_storage(const struct svga_screen *ss,
return !!(template->bind & bind_mask);
}
+
+static inline boolean
+need_buf_readback(struct svga_context *svga,
+ struct pipe_transfer *st)
+{
+ struct svga_buffer *sbuf = svga_buffer(st->resource);
+
+ if (st->usage != PIPE_MAP_READ)
+ return FALSE;
+
+ /* No buffer surface has been created */
+ if (!sbuf->bufsurf)
+ return FALSE;
+
+ return ((sbuf->dirty ||
+ sbuf->bufsurf->surface_state == SVGA_SURFACE_STATE_RENDERED) &&
+ !sbuf->key.coherent && !svga->swc->force_coherent);
+}
+
+
/**
* Create a buffer transfer.
*
@@ -131,11 +151,12 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
pipe_resource_reference(&sbuf->translated_indices.buffer, NULL);
}
- if ((usage & PIPE_MAP_READ) && sbuf->dirty &&
- !sbuf->key.coherent && !svga->swc->force_coherent) {
-
- /* Host-side buffers can only be dirtied with vgpu10 features
- * (streamout and buffer copy).
+ /* If it is a read transfer and the buffer is dirty or the buffer is bound
+ * to a uav, we will need to read the subresource content from the device.
+ */
+ if (need_buf_readback(svga, transfer)) {
+ /* Host-side buffers can be dirtied with vgpu10 features
+ * (streamout and buffer copy) and sm5 feature via uav.
*/
assert(svga_have_vgpu10(svga));
@@ -150,13 +171,16 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
assert(sbuf->handle);
- SVGA_RETRY(svga, SVGA3D_vgpu10_ReadbackSubResource(svga->swc,
- sbuf->handle, 0));
+ SVGA_RETRY(svga, SVGA3D_ReadbackGBSurface(svga->swc, sbuf->handle));
svga->hud.num_readbacks++;
svga_context_finish(svga);
sbuf->dirty = FALSE;
+
+ /* Mark the buffer surface state as UPDATED */
+ assert(sbuf->bufsurf);
+ sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_UPDATED;
}
if (usage & PIPE_MAP_WRITE) {
@@ -434,11 +458,13 @@ svga_resource_destroy(struct pipe_screen *screen,
DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
*/
SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle);
- svga_screen_surface_destroy(ss, &tex->key, &tex->handle);
+
+ boolean to_invalidate = svga_was_texture_rendered_to(tex);
+ svga_screen_surface_destroy(ss, &tex->key, to_invalidate, &tex->handle);
/* Destroy the backed surface handle if exists */
if (tex->backed_handle)
- svga_screen_surface_destroy(ss, &tex->backed_key, &tex->backed_handle);
+ svga_screen_surface_destroy(ss, &tex->backed_key, to_invalidate, &tex->backed_handle);
ss->hud.total_resource_bytes -= tex->size;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h
index 97649d972..5652bbcec 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -65,6 +65,7 @@ struct svga_buffer_surface
unsigned bind_flags;
struct svga_host_surface_cache_key key;
struct svga_winsys_surface *handle;
+ enum svga_surface_state surface_state;
};
/**
@@ -120,6 +121,9 @@ struct svga_buffer
*/
struct list_head surfaces;
+ /* Current surface structure */
+ struct svga_buffer_surface *bufsurf;
+
/**
* Information about ongoing and past map operations.
*/
@@ -212,6 +216,7 @@ struct svga_buffer
unsigned size; /**< Approximate size in bytes */
boolean dirty; /**< Need to do a readback before mapping? */
+ boolean uav; /* Set if the buffer is bound to a uav */
/** In some cases we try to keep the results of the translate_indices()
* function from svga_draw_elements.c
@@ -332,6 +337,24 @@ svga_buffer_hw_storage_unmap(struct svga_context *svga,
}
} else
sws->buffer_unmap(sws, sbuf->hwbuf);
+
+ /* Mark the buffer surface as UPDATED */
+ assert(sbuf->bufsurf);
+ sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_UPDATED;
+}
+
+
+static inline void
+svga_set_buffer_rendered_to(struct svga_buffer_surface *bufsurf)
+{
+ bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED;
+}
+
+
+static inline boolean
+svga_was_buffer_rendered_to(const struct svga_buffer_surface *bufsurf)
+{
+ return (bufsurf->surface_state == SVGA_SURFACE_STATE_RENDERED);
}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 5bebbb509..1e86b5d12 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -54,6 +54,10 @@ struct svga_3d_invalidate_gb_image {
};
+static void
+svga_buffer_upload_ranges(struct svga_context *, struct svga_buffer *);
+
+
/**
* Allocate a winsys_buffer (ie. DMA, aka GMR memory).
*
@@ -142,8 +146,7 @@ svga_buffer_create_hw_storage(struct svga_screen *ss,
/**
- * Allocate graphics memory for vertex/index/constant/etc buffer (not
- * textures).
+ * Allocate graphics memory for vertex/index/constant/texture buffer.
*/
enum pipe_error
svga_buffer_create_host_surface(struct svga_screen *ss,
@@ -155,7 +158,7 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
assert(!sbuf->user);
if (!sbuf->handle) {
- boolean validated;
+ boolean invalidated;
sbuf->key.flags = 0;
@@ -190,6 +193,15 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
sbuf->key.flags = SVGA3D_SURFACE_TRANSFER_FROM_BUFFER;
}
+ if (ss->sws->have_gl43 &&
+ (bind_flags & (PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE)) &&
+ (!(bind_flags & (PIPE_BIND_STREAM_OUTPUT)))) {
+ /* This surface can be bound to a uav. */
+ assert((bind_flags & PIPE_BIND_CONSTANT_BUFFER) == 0);
+ sbuf->key.flags |= SVGA3D_SURFACE_BIND_UAVIEW |
+ SVGA3D_SURFACE_BIND_RAW_VIEWS;
+ }
+
if (sbuf->b.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
/* This surface can be mapped persistently. We use
* coherent memory to avoid implementing memory barriers for
@@ -213,22 +225,31 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
sbuf->handle = svga_screen_surface_create(ss, bind_flags,
sbuf->b.usage,
- &validated, &sbuf->key);
+ &invalidated, &sbuf->key);
if (!sbuf->handle)
return PIPE_ERROR_OUT_OF_MEMORY;
- /* Always set the discard flag on the first time the buffer is written
+ /* Set the discard flag on the first time the buffer is written
* as svga_screen_surface_create might have passed a recycled host
- * buffer.
+ * buffer. This is only needed for host-backed mode. As in guest-backed
+ * mode, the recycled buffer would have been invalidated.
*/
- sbuf->dma.flags.discard = TRUE;
+ if (!ss->sws->have_gb_objects)
+ sbuf->dma.flags.discard = TRUE;
SVGA_DBG(DEBUG_DMA, " --> got sid %p sz %d (buffer)\n",
sbuf->handle, sbuf->b.width0);
/* Add the new surface to the buffer surface list */
- ret = svga_buffer_add_host_surface(sbuf, sbuf->handle, &sbuf->key,
- bind_flags);
+ sbuf->bufsurf = svga_buffer_add_host_surface(sbuf, sbuf->handle,
+ &sbuf->key,
+ bind_flags);
+ if (sbuf->bufsurf == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ sbuf->bufsurf->surface_state =
+ invalidated ? SVGA_SURFACE_STATE_INVALIDATED :
+ SVGA_SURFACE_STATE_CREATED;
if (ss->sws->have_gb_objects) {
/* Initialize the surface with zero */
@@ -263,14 +284,23 @@ svga_buffer_recreate_host_surface(struct svga_context *svga,
if (ret == PIPE_OK) {
/* Copy the surface data */
assert(sbuf->handle);
+ assert(sbuf->bufsurf);
SVGA_RETRY(svga, SVGA3D_vgpu10_BufferCopy(svga->swc, old_handle,
sbuf->handle,
0, 0, sbuf->b.width0));
+
+ /* Mark this surface as RENDERED */
+ sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED;
}
/* Set the new bind flags for this buffer resource */
sbuf->bind_flags = bind_flags;
+ /* Set the dirty bit to signal a read back is needed before the data copied
+ * to this new surface can be referenced.
+ */
+ sbuf->dirty = TRUE;
+
return ret;
}
@@ -286,6 +316,10 @@ compatible_bind_flags(unsigned bind_flags,
return TRUE;
else if ((bind_flags|tobind_flags) & PIPE_BIND_CONSTANT_BUFFER)
return FALSE;
+ else if ((bind_flags & PIPE_BIND_STREAM_OUTPUT) &&
+ (tobind_flags & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SHADER_BUFFER)))
+ /* Stream out cannot be mixed with UAV */
+ return FALSE;
else
return TRUE;
}
@@ -313,7 +347,7 @@ svga_buffer_get_host_surface(struct svga_buffer *sbuf,
/**
* Adds the host surface to the buffer surface list.
*/
-enum pipe_error
+struct svga_buffer_surface *
svga_buffer_add_host_surface(struct svga_buffer *sbuf,
struct svga_winsys_surface *handle,
struct svga_host_surface_cache_key *key,
@@ -323,7 +357,7 @@ svga_buffer_add_host_surface(struct svga_buffer *sbuf,
bufsurf = CALLOC_STRUCT(svga_buffer_surface);
if (!bufsurf)
- return PIPE_ERROR_OUT_OF_MEMORY;
+ return NULL;
bufsurf->bind_flags = bind_flags;
bufsurf->handle = handle;
@@ -335,7 +369,7 @@ svga_buffer_add_host_surface(struct svga_buffer *sbuf,
/* Set the new bind flags for this buffer resource */
sbuf->bind_flags = bind_flags;
- return PIPE_OK;
+ return bufsurf;
}
@@ -358,12 +392,14 @@ svga_buffer_bind_host_surface(struct svga_context *svga,
SVGA_RETRY(svga, SVGA3D_vgpu10_BufferCopy(svga->swc, sbuf->handle,
bufsurf->handle,
0, 0, sbuf->b.width0));
+ bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED;
}
/* Set this surface as the current one */
sbuf->handle = bufsurf->handle;
sbuf->key = bufsurf->key;
sbuf->bind_flags = bufsurf->bind_flags;
+ sbuf->bufsurf = bufsurf;
}
@@ -387,6 +423,9 @@ svga_buffer_validate_host_surface(struct svga_context *svga,
struct svga_buffer_surface *bufsurf;
enum pipe_error ret = PIPE_OK;
+ /* upload any dirty ranges */
+ svga_buffer_upload_ranges(svga, sbuf);
+
/* Flush any pending upload first */
svga_buffer_upload_flush(svga, sbuf);
@@ -409,7 +448,9 @@ svga_buffer_validate_host_surface(struct svga_context *svga,
/* Destroy the old surface */
svga_screen_surface_destroy(svga_screen(sbuf->b.screen),
- &bufsurf->key, &bufsurf->handle);
+ &bufsurf->key,
+ svga_was_buffer_rendered_to(bufsurf),
+ &bufsurf->handle);
list_del(&bufsurf->list);
FREE(bufsurf);
@@ -434,7 +475,9 @@ svga_buffer_destroy_host_surface(struct svga_screen *ss,
LIST_FOR_EACH_ENTRY_SAFE(bufsurf, next, &sbuf->surfaces, list) {
SVGA_DBG(DEBUG_DMA, " ungrab sid %p sz %d\n",
bufsurf->handle, sbuf->b.width0);
- svga_screen_surface_destroy(ss, &bufsurf->key, &bufsurf->handle);
+ svga_screen_surface_destroy(ss, &bufsurf->key,
+ svga_was_buffer_rendered_to(bufsurf),
+ &bufsurf->handle);
FREE(bufsurf);
}
}
@@ -464,57 +507,20 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
assert(numBoxes);
assert(sbuf->dma.updates == NULL);
- if (sbuf->dma.flags.discard) {
- struct svga_3d_invalidate_gb_image *cicmd = NULL;
- SVGA3dCmdInvalidateGBImage *invalidate_cmd;
- const unsigned total_commands_size =
- sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd);
+ /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */
+ const unsigned total_commands_size =
+ sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd);
- /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by
- * 'numBoxes' UPDATE_GB_IMAGE commands. Allocate all at once rather
- * than with separate commands because we need to properly deal with
- * filling the command buffer.
- */
- invalidate_cmd = SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
- total_commands_size, 1 + numBoxes);
- if (!invalidate_cmd)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- cicmd = container_of(invalidate_cmd, struct svga_3d_invalidate_gb_image, body);
- cicmd->header.size = sizeof(*invalidate_cmd);
- swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL,
- sbuf->handle,
- (SVGA_RELOC_WRITE |
- SVGA_RELOC_INTERNAL |
- SVGA_RELOC_DMA));
- invalidate_cmd->image.face = 0;
- invalidate_cmd->image.mipmap = 0;
-
- /* The whole_update_command is a SVGA3dCmdHeader plus the
- * SVGA3dCmdUpdateGBImage command.
- */
- whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1];
- /* initialize the first UPDATE_GB_IMAGE command */
- whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
- update_cmd = &whole_update_cmd->body;
-
- } else {
- /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */
- const unsigned total_commands_size =
- sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd);
-
- update_cmd = SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_UPDATE_GB_IMAGE,
- total_commands_size, numBoxes);
- if (!update_cmd)
- return PIPE_ERROR_OUT_OF_MEMORY;
+ update_cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_UPDATE_GB_IMAGE,
+ total_commands_size, numBoxes);
+ if (!update_cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
- /* The whole_update_command is a SVGA3dCmdHeader plus the
- * SVGA3dCmdUpdateGBImage command.
- */
- whole_update_cmd = container_of(update_cmd, struct svga_3d_update_gb_image, body);
- }
+ /* The whole_update_command is a SVGA3dCmdHeader plus the
+ * SVGA3dCmdUpdateGBImage command.
+ */
+ whole_update_cmd = container_of(update_cmd, struct svga_3d_update_gb_image, body);
/* Init the first UPDATE_GB_IMAGE command */
whole_update_cmd->header.size = sizeof(*update_cmd);
@@ -840,7 +846,6 @@ svga_buffer_add_range(struct svga_buffer *sbuf, unsigned start, unsigned end)
}
-
/**
* Copy the contents of the malloc buffer to a hardware buffer.
*/
@@ -979,6 +984,61 @@ svga_buffer_upload_piecewise(struct svga_screen *ss,
/**
+ * A helper function to add an update command for the dirty ranges if there
+ * isn't already one.
+ */
+static void
+svga_buffer_upload_ranges(struct svga_context *svga,
+ struct svga_buffer *sbuf)
+{
+ struct pipe_screen *screen = svga->pipe.screen;
+ struct svga_screen *ss = svga_screen(screen);
+ enum pipe_error ret = PIPE_OK;
+
+ if (sbuf->map.num_ranges) {
+ if (!sbuf->dma.pending) {
+ /* No pending DMA/update commands yet. */
+
+ /* Migrate the data from swbuf -> hwbuf if necessary */
+ ret = svga_buffer_update_hw(svga, sbuf, sbuf->bind_flags);
+ if (ret == PIPE_OK) {
+ /* Emit DMA or UpdateGBImage commands */
+ SVGA_RETRY_OOM(svga, ret, svga_buffer_upload_command(svga, sbuf));
+ if (ret == PIPE_OK) {
+ sbuf->dma.pending = TRUE;
+ assert(!sbuf->head.prev && !sbuf->head.next);
+ list_addtail(&sbuf->head, &svga->dirty_buffers);
+ }
+ }
+ else if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+ /*
+ * The buffer is too big to fit in the GMR aperture, so break it in
+ * smaller pieces.
+ */
+ ret = svga_buffer_upload_piecewise(ss, svga, sbuf);
+ }
+
+ if (ret != PIPE_OK) {
+ /*
+ * Something unexpected happened above. There is very little that
+ * we can do other than proceeding while ignoring the dirty ranges.
+ */
+ assert(0);
+ sbuf->map.num_ranges = 0;
+ }
+ }
+ else {
+ /*
+ * There a pending dma already. Make sure it is from this context.
+ */
+ assert(sbuf->dma.svga == svga);
+ }
+ }
+ return;
+}
+
+
+/**
* Get (or create/upload) the winsys surface handle so that we can
* refer to this buffer in fifo commands.
* This function will create the host surface, and in the GB case also the
@@ -1033,48 +1093,12 @@ svga_buffer_handle(struct svga_context *svga, struct pipe_resource *buf,
}
assert(sbuf->handle);
+ assert(sbuf->bufsurf);
if (svga->swc->force_coherent || sbuf->key.coherent)
return sbuf->handle;
- if (sbuf->map.num_ranges) {
- if (!sbuf->dma.pending) {
- /* No pending DMA/update commands yet. */
-
- /* Migrate the data from swbuf -> hwbuf if necessary */
- ret = svga_buffer_update_hw(svga, sbuf, sbuf->bind_flags);
- if (ret == PIPE_OK) {
- /* Emit DMA or UpdateGBImage commands */
- SVGA_RETRY_OOM(svga, ret, svga_buffer_upload_command(svga, sbuf));
- if (ret == PIPE_OK) {
- sbuf->dma.pending = TRUE;
- assert(!sbuf->head.prev && !sbuf->head.next);
- list_addtail(&sbuf->head, &svga->dirty_buffers);
- }
- }
- else if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
- /*
- * The buffer is too big to fit in the GMR aperture, so break it in
- * smaller pieces.
- */
- ret = svga_buffer_upload_piecewise(ss, svga, sbuf);
- }
-
- if (ret != PIPE_OK) {
- /*
- * Something unexpected happened above. There is very little that
- * we can do other than proceeding while ignoring the dirty ranges.
- */
- assert(0);
- sbuf->map.num_ranges = 0;
- }
- }
- else {
- /*
- * There a pending dma already. Make sure it is from this context.
- */
- assert(sbuf->dma.svga == svga);
- }
- }
+ /* upload any dirty ranges */
+ svga_buffer_upload_ranges(svga, sbuf);
assert(sbuf->map.num_ranges == 0 || sbuf->dma.pending);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h
index c2d749b20..7b15a66d1 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h
@@ -55,7 +55,7 @@ svga_buffer_recreate_host_surface(struct svga_context *svga,
struct svga_buffer *sbuf,
unsigned bind_flags);
-enum pipe_error
+struct svga_buffer_surface *
svga_buffer_add_host_surface(struct svga_buffer *sbuf,
struct svga_winsys_surface *handle,
struct svga_host_surface_cache_key *key,
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c
index 412be0ada..f2ab20edb 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c
@@ -231,8 +231,7 @@ need_tex_readback(struct svga_transfer *st)
if ((st->base.usage & PIPE_MAP_WRITE) &&
((st->base.usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) == 0)) {
- return svga_was_texture_rendered_to(svga_texture(st->base.resource),
- st->slice, st->base.level);
+ return svga_was_texture_rendered_to(svga_texture(st->base.resource));
}
return FALSE;
@@ -240,30 +239,19 @@ need_tex_readback(struct svga_transfer *st)
static void
-readback_image_vgpu9(struct svga_context *svga,
- struct svga_winsys_surface *surf,
- unsigned slice,
- unsigned level)
+readback_texture_surface(struct svga_context *svga,
+ struct svga_texture *tex,
+ struct svga_winsys_surface *surf)
{
- SVGA_RETRY(svga, SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level));
-}
+ SVGA_RETRY(svga, SVGA3D_ReadbackGBSurface(svga->swc, surf));
+ /* Mark the texture surface as UPDATED */
+ tex->surface_state = SVGA_SURFACE_STATE_UPDATED;
-static void
-readback_image_vgpu10(struct svga_context *svga,
- struct svga_winsys_surface *surf,
- unsigned slice,
- unsigned level,
- unsigned numMipLevels)
-{
- unsigned subResource;
-
- subResource = slice * numMipLevels + level;
- SVGA_RETRY(svga, SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf,
- subResource));
+ svga->hud.num_readbacks++;
+ SVGA_STATS_COUNT_INC(svga_sws(svga), SVGA_STATS_COUNT_TEXREADBACK);
}
-
/**
* Use DMA for the transfer request
*/
@@ -346,31 +334,23 @@ svga_texture_transfer_map_direct(struct svga_context *svga,
struct svga_texture *tex = svga_texture(texture);
struct svga_winsys_surface *surf = tex->handle;
unsigned level = st->base.level;
- unsigned w, h, nblocksx, nblocksy, i;
+ unsigned w, h, nblocksx, nblocksy;
unsigned usage = st->base.usage;
if (need_tex_readback(st)) {
svga_surfaces_flush(svga);
if (!svga->swc->force_coherent || tex->imported) {
- for (i = 0; i < st->box.d; i++) {
- if (svga_have_vgpu10(svga)) {
- readback_image_vgpu10(svga, surf, st->slice + i, level,
- tex->b.last_level + 1);
- } else {
- readback_image_vgpu9(svga, surf, st->slice + i, level);
- }
- }
- svga->hud.num_readbacks++;
- SVGA_STATS_COUNT_INC(sws, SVGA_STATS_COUNT_TEXREADBACK);
+ /* Readback the whole surface */
+ readback_texture_surface(svga, tex, surf);
- svga_context_flush(svga, NULL);
+ svga_context_finish(svga);
}
/*
* Note: if PIPE_MAP_DISCARD_WHOLE_RESOURCE were specified
* we could potentially clear the flag for all faces/layers/mips.
*/
- svga_clear_texture_rendered_to(tex, st->slice, level);
+ svga_clear_texture_rendered_to(tex);
}
else {
assert(usage & PIPE_MAP_WRITE);
@@ -427,7 +407,6 @@ svga_texture_transfer_map_direct(struct svga_context *svga,
map = svga->swc->surface_map(svga->swc, surf, usage, &retry, &rebind);
svga_retry_exit(svga);
}
-
if (map && rebind) {
enum pipe_error ret;
@@ -556,10 +535,11 @@ svga_texture_transfer_map(struct pipe_context *pipe,
break;
}
- /* Force direct map for multisample surface */
- if (texture->nr_samples > 1) {
- assert(svga_have_gb_objects(svga));
- assert(sws->have_sm4_1);
+ /* We never want to use DMA transfers on systems with GBObjects because
+ * it causes serialization issues and in SVGAv3 vram is gone which
+ * makes it impossible to support both at the same time.
+ */
+ if (svga_have_gb_objects(svga)) {
use_direct_map = TRUE;
}
@@ -584,8 +564,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
boolean can_use_upload = tex->can_use_upload &&
!(st->base.usage & PIPE_MAP_READ);
boolean was_rendered_to =
- svga_was_texture_rendered_to(svga_texture(texture),
- st->slice, st->base.level);
+ svga_was_texture_rendered_to(svga_texture(texture));
/* If the texture was already rendered to and upload buffer
* is supported, then we will use upload buffer to
@@ -721,7 +700,7 @@ svga_texture_transfer_unmap_dma(struct svga_context *svga,
}
svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags);
- svga_set_texture_rendered_to(tex, st->slice, st->base.level);
+ svga_set_texture_rendered_to(tex);
}
FREE(st->swbuf);
@@ -785,6 +764,9 @@ svga_texture_transfer_unmap_direct(struct svga_context *svga,
transfer->level);
}
}
+
+ /* Mark the texture surface state as UPDATED */
+ tex->surface_state = SVGA_SURFACE_STATE_UPDATED;
}
}
@@ -901,12 +883,6 @@ svga_texture_create(struct pipe_screen *screen,
goto fail_notex;
}
- tex->rendered_to = CALLOC(template->depth0 * template->array_size,
- sizeof(tex->rendered_to[0]));
- if (!tex->rendered_to) {
- goto fail;
- }
-
tex->dirty = CALLOC(template->depth0 * template->array_size,
sizeof(tex->dirty[0]));
if (!tex->dirty) {
@@ -1062,14 +1038,22 @@ svga_texture_create(struct pipe_screen *screen,
goto fail;
}
- /* Use typeless formats for sRGB and depth resources. Typeless
- * formats can be reinterpreted as other formats. For example,
- * SVGA3D_R8G8B8A8_UNORM_TYPELESS can be interpreted as
- * SVGA3D_R8G8B8A8_UNORM_SRGB or SVGA3D_R8G8B8A8_UNORM.
- */
- if (svgascreen->sws->have_vgpu10 &&
- (util_format_is_srgb(template->format) ||
- format_has_depth(template->format))) {
+ bool use_typeless = FALSE;
+ if (svgascreen->sws->have_gl43) {
+ /* Do not use typeless for SHARED, SCANOUT or DISPLAY_TARGET surfaces. */
+ use_typeless = !(bindings & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT |
+ PIPE_BIND_DISPLAY_TARGET));
+ } else if (svgascreen->sws->have_vgpu10) {
+ /* For VGPU10 device, use typeless formats only for sRGB and depth resources
+ * if they do not have SHARED, SCANOUT or DISPLAY_TARGET bind flags
+ */
+ use_typeless = (util_format_is_srgb(template->format) ||
+ format_has_depth(template->format)) &&
+ !(bindings & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT |
+ PIPE_BIND_DISPLAY_TARGET));
+ }
+
+ if (use_typeless) {
SVGA3dSurfaceFormat typeless = svga_typeless_format(tex->key.format);
if (0) {
debug_printf("Convert resource type %s -> %s (bind 0x%x)\n",
@@ -1090,13 +1074,35 @@ svga_texture_create(struct pipe_screen *screen,
tex->key.format = typeless;
}
+ if (svgascreen->sws->have_sm5 &&
+ bindings & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET)) {
+ if (template->nr_samples < 2 &&
+ screen->is_format_supported(screen, template->format,
+ template->target,
+ template->nr_samples,
+ template->nr_storage_samples,
+ PIPE_BIND_SHADER_IMAGE)) {
+ /* Any non multi-samples texture that can be used as a render target
+ * or sampler view can be bound to an image unit.
+ * So make sure to set the UAV flag here.
+ */
+ tex->key.flags |= SVGA3D_SURFACE_BIND_UAVIEW;
+ }
+ }
+
SVGA_DBG(DEBUG_DMA, "surface_create for texture\n");
+ boolean invalidated;
tex->handle = svga_screen_surface_create(svgascreen, bindings,
tex->b.usage,
- &tex->validated, &tex->key);
+ &invalidated, &tex->key);
if (!tex->handle) {
goto fail;
}
+ if (invalidated) {
+ tex->surface_state = SVGA_SURFACE_STATE_INVALIDATED;
+ } else {
+ tex->surface_state = SVGA_SURFACE_STATE_CREATED;
+ }
SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle);
@@ -1122,8 +1128,6 @@ svga_texture_create(struct pipe_screen *screen,
fail:
if (tex->dirty)
FREE(tex->dirty);
- if (tex->rendered_to)
- FREE(tex->rendered_to);
if (tex->defined)
FREE(tex->defined);
FREE(tex);
@@ -1181,9 +1185,24 @@ svga_texture_from_handle(struct pipe_screen *screen,
tex->key.format = format;
tex->handle = srf;
- tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
- if (!tex->rendered_to)
- goto out_no_rendered_to;
+
+ /* set bind flags for the imported texture handle according to the bind
+ * flags in the template
+ */
+ if (template->bind & PIPE_BIND_RENDER_TARGET){
+ tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
+ tex->key.flags |= SVGA3D_SURFACE_BIND_RENDER_TARGET;
+ }
+
+ if (template->bind & PIPE_BIND_DEPTH_STENCIL) {
+ tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+ tex->key.flags |= SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+ }
+
+ if (template->bind & PIPE_BIND_SAMPLER_VIEW) {
+ tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+ tex->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
+ }
tex->dirty = CALLOC(1, sizeof(tex->dirty[0]));
if (!tex->dirty)
@@ -1196,8 +1215,6 @@ svga_texture_from_handle(struct pipe_screen *screen,
return &tex->b;
out_no_dirty:
- FREE(tex->rendered_to);
-out_no_rendered_to:
FREE(tex->defined);
out_no_defined:
FREE(tex);
@@ -1222,10 +1239,6 @@ svga_texture_generate_mipmap(struct pipe_context *pipe,
assert(svga_have_vgpu10(svga));
- /* Only support 2D texture for now */
- if (pt->target != PIPE_TEXTURE_2D)
- return false;
-
/* Fallback to the mipmap generation utility for those formats that
* do not support hw generate mipmap
*/
@@ -1239,11 +1252,21 @@ svga_texture_generate_mipmap(struct pipe_context *pipe,
return false;
templ.format = format;
+ templ.target = pt->target;
templ.u.tex.first_layer = first_layer;
templ.u.tex.last_layer = last_layer;
templ.u.tex.first_level = base_level;
templ.u.tex.last_level = last_level;
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ /**
+ * state tracker generates mipmap one face at a time.
+ * But SVGA generates mipmap for the entire cubemap.
+ */
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer = 5;
+ }
+
psv = pipe->create_sampler_view(pipe, pt, &templ);
if (psv == NULL)
return false;
@@ -1254,6 +1277,9 @@ svga_texture_generate_mipmap(struct pipe_context *pipe,
SVGA_RETRY(svga, SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle));
pipe_sampler_view_reference(&psv, NULL);
+ /* Mark the texture surface as RENDERED */
+ svga_set_texture_rendered_to(tex);
+
svga->hud.num_generate_mipmap++;
return true;
@@ -1449,11 +1475,11 @@ svga_texture_transfer_unmap_upload(struct svga_context *svga,
dstsurf, subResource,
&st->upload.box));
offset += st->base.layer_stride;
-
- /* Set rendered-to flag */
- svga_set_texture_rendered_to(tex, layer, st->base.level);
}
+ /* Mark the texture surface state as RENDERED */
+ svga_set_texture_rendered_to(tex);
+
pipe_resource_reference(&st->upload.buf, NULL);
}
@@ -1476,8 +1502,12 @@ svga_texture_device_format_has_alpha(struct pipe_resource *texture)
/* the svga_texture() call below is invalid for PIPE_BUFFER resources */
assert(texture->target != PIPE_BUFFER);
- enum svga3d_block_desc block_desc =
- svga3dsurface_get_desc(svga_texture(texture)->key.format)->block_desc;
+ const struct svga3d_surface_desc *surf_desc =
+ svga3dsurface_get_desc(svga_texture(texture)->key.format);
+
+ enum svga3d_block_desc block_desc = surf_desc->block_desc;
- return !!(block_desc & SVGA3DBLOCKDESC_ALPHA);
+ return !!((block_desc & SVGA3DBLOCKDESC_ALPHA) ||
+ ((block_desc == SVGA3DBLOCKDESC_TYPELESS) &&
+ (surf_desc->bitDepth.alpha > 0)));
}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h
index cbfc46426..e1872faad 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h
@@ -33,6 +33,7 @@
#include "util/u_memory.h"
#include "util/u_transfer.h"
#include "svga_screen_cache.h"
+#include "svga_context.h"
struct pipe_context;
struct pipe_screen;
@@ -74,13 +75,6 @@ struct svga_texture
struct svga_winsys_surface *handle;
/**
- * Whether the host side surface is validated, either through the
- * InvalidateGBSurface command or after the surface is updated
- * or rendered to.
- */
- boolean validated;
-
- /**
* Whether the host side surface is imported and not created by this
* driver.
*/
@@ -101,6 +95,8 @@ struct svga_texture
*/
ushort *dirty;
+ enum svga_surface_state surface_state;
+
/**
* A cached backing host side surface to be used if this texture is being
* used for rendering and sampling at the same time.
@@ -209,7 +205,6 @@ svga_define_texture_level(struct svga_texture *tex,
{
check_face_level(tex, face, level);
tex->defined[face] |= 1 << level;
- tex->validated = TRUE;
}
@@ -223,30 +218,22 @@ svga_is_texture_level_defined(const struct svga_texture *tex,
static inline void
-svga_set_texture_rendered_to(struct svga_texture *tex,
- unsigned face, unsigned level)
+svga_set_texture_rendered_to(struct svga_texture *tex)
{
- check_face_level(tex, face, level);
- tex->rendered_to[face] |= 1 << level;
- tex->validated = TRUE;
+ tex->surface_state = SVGA_SURFACE_STATE_RENDERED;
}
static inline void
-svga_clear_texture_rendered_to(struct svga_texture *tex,
- unsigned face, unsigned level)
+svga_clear_texture_rendered_to(struct svga_texture *tex)
{
- check_face_level(tex, face, level);
- tex->rendered_to[face] &= ~(1 << level);
+ tex->surface_state = SVGA_SURFACE_STATE_UPDATED;
}
-
static inline boolean
-svga_was_texture_rendered_to(const struct svga_texture *tex,
- unsigned face, unsigned level)
+svga_was_texture_rendered_to(const struct svga_texture *tex)
{
- check_face_level(tex, face, level);
- return !!(tex->rendered_to[face] & (1 << level));
+ return (tex->surface_state == SVGA_SURFACE_STATE_RENDERED);
}
static inline void
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c
index fa0c02604..7adbee06d 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c
@@ -223,7 +223,9 @@ svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
if (v->handle != tex->handle) {
struct svga_screen *ss = svga_screen(v->texture->screen);
SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
- svga_screen_surface_destroy(ss, &v->key, &v->handle);
+ svga_screen_surface_destroy(ss, &v->key,
+ svga_was_texture_rendered_to(tex),
+ &v->handle);
}
/* Note: we're not refcounting the texture resource here to avoid
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen.c b/lib/mesa/src/gallium/drivers/svga/svga_screen.c
index 2537ac7a8..22cd21f7e 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_screen.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_screen.c
@@ -74,6 +74,9 @@ static const struct debug_named_value svga_debug_flags[] = {
{ "streamout", DEBUG_STREAMOUT, NULL },
{ "query", DEBUG_QUERY, NULL },
{ "samplers", DEBUG_SAMPLERS, NULL },
+ { "image", DEBUG_IMAGE, NULL },
+ { "uav", DEBUG_UAV, NULL },
+ { "retry", DEBUG_RETRY, NULL },
DEBUG_NAMED_VALUE_END
};
#endif
@@ -153,14 +156,22 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
struct svga_winsys_screen *sws = svgascreen->sws;
switch (param) {
+ case PIPE_CAPF_MIN_LINE_WIDTH:
+ case PIPE_CAPF_MIN_LINE_WIDTH_AA:
+ case PIPE_CAPF_MIN_POINT_SIZE:
+ case PIPE_CAPF_MIN_POINT_SIZE_AA:
+ return 1;
+ case PIPE_CAPF_POINT_SIZE_GRANULARITY:
+ case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
+ return 0.1;
case PIPE_CAPF_MAX_LINE_WIDTH:
return svgascreen->maxLineWidth;
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
return svgascreen->maxLineWidthAA;
- case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_SIZE:
FALLTHROUGH;
- case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ case PIPE_CAPF_MAX_POINT_SIZE_AA:
return svgascreen->maxPointSize;
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
@@ -212,6 +223,9 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return sws->have_vgpu10;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return sws->have_vgpu10 ? 16 : 0;
+
case PIPE_CAP_TEXTURE_SWIZZLE:
return 1;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -251,11 +265,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
return 1;
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
return 1;
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return sws->have_vgpu10;
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
return !sws->have_vgpu10;
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
@@ -267,7 +281,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1; /* expected for GL_ARB_framebuffer_object */
case PIPE_CAP_GLSL_FEATURE_LEVEL:
- if (sws->have_sm5) {
+ case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
+ if (sws->have_gl43) {
+ return 430;
+ } else if (sws->have_sm5) {
return 410;
} else if (sws->have_vgpu10) {
return 330;
@@ -275,10 +292,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 120;
}
- case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
- return sws->have_sm5 ? 410 : (sws->have_vgpu10 ? 330 : 120);
-
- case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ case PIPE_CAP_TEXTURE_TRANSFER_MODES:
return 0;
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
@@ -290,7 +304,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_QUERY_TIMESTAMP:
- case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VS_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_FAKE_SW_MSAA:
@@ -360,12 +374,28 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_VERTEX_STREAMS:
return sws->have_sm5 ? 4 : 0;
case PIPE_CAP_COMPUTE:
- return 0;
+ return sws->have_gl43;
case PIPE_CAP_MAX_VARYINGS:
- return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS : 10;
+ /* According to the spec, max varyings does not include the components
+ * for position, so remove one count from the max for position.
+ */
+ return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS-1 : 10;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
return sws->have_coherent;
+ case PIPE_CAP_START_INSTANCE:
+ return sws->have_sm5;
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ return sws->have_sm5;
+
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ return sws->have_gl43;
+
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ return sws->have_gl43;
+
+ case PIPE_CAP_CLIP_HALFZ:
+ return sws->have_gl43;
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
@@ -374,6 +404,15 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
return 0;
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return sws->have_gl43 ? 16 : 0;
+
+ case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+ case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS:
+ return sws->have_gl43 ? SVGA_MAX_SHADER_BUFFERS : 0;
+ case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS:
+ case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:
+ return sws->have_gl43 ? SVGA_MAX_ATOMIC_BUFFERS : 0;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
@@ -393,7 +432,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_VENDOR_ID:
return 0x15ad; /* VMware Inc. */
case PIPE_CAP_DEVICE_ID:
- return 0x0405; /* assume SVGA II */
+ if (sws->device_id) {
+ return sws->device_id;
+ } else {
+ return 0x0405; /* assume SVGA II */
+ }
case PIPE_CAP_ACCELERATED:
return 0; /* XXX: */
case PIPE_CAP_VIDEO_MEMORY:
@@ -418,6 +461,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
/* Verify this once protocol is finalized. Setting it to minimum value. */
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
return sws->have_sm5 ? 30 : 0;
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ return 1;
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ return 1;
+ case PIPE_CAP_IMAGE_STORE_FORMATTED:
+ return sws->have_gl43;
default:
return u_pipe_screen_get_param_defaults(screen, param);
}
@@ -613,7 +662,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
(shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL))
return 0;
- if (shader == PIPE_SHADER_COMPUTE)
+ if ((!sws->have_gl43) && (shader == PIPE_SHADER_COMPUTE))
return 0;
/* NOTE: we do not query the device for any caps/limits at this time */
@@ -631,13 +680,13 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
if (shader == PIPE_SHADER_FRAGMENT)
return VGPU10_MAX_FS_INPUTS;
else if (shader == PIPE_SHADER_GEOMETRY)
- return VGPU10_MAX_GS_INPUTS;
+ return svgascreen->max_gs_inputs;
else if (shader == PIPE_SHADER_TESS_CTRL)
return VGPU11_MAX_HS_INPUT_CONTROL_POINTS;
else if (shader == PIPE_SHADER_TESS_EVAL)
return VGPU11_MAX_DS_INPUT_CONTROL_POINTS;
else
- return VGPU10_MAX_VS_INPUTS;
+ return svgascreen->max_vs_inputs;
case PIPE_SHADER_CAP_MAX_OUTPUTS:
if (shader == PIPE_SHADER_FRAGMENT)
return VGPU10_MAX_FS_OUTPUTS;
@@ -648,7 +697,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
else if (shader == PIPE_SHADER_TESS_EVAL)
return VGPU11_MAX_DS_OUTPUTS;
else
- return VGPU10_MAX_VS_OUTPUTS;
+ return svgascreen->max_vs_outputs;
+
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
@@ -673,23 +723,38 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
return FALSE;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
- return SVGA3D_DX_MAX_SAMPLERS;
+ return sws->have_gl43 ? PIPE_MAX_SAMPLERS : SVGA3D_DX_MAX_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
- return 1 << PIPE_SHADER_IR_TGSI;
+ if (sws->have_gl43)
+ return 1 << PIPE_SHADER_IR_TGSI;
+ else
+ return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+ /* For the above cases, we rely on the GLSL compiler to translate/lower
+ * the TGIS instruction into other instructions we do support.
+ */
+ return 0;
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
- case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
- case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return sws->have_sm5;
+
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ return sws->have_gl43 ? SVGA_MAX_IMAGES : 0;
+
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return sws->have_gl43 ? SVGA_MAX_SHADER_BUFFERS : 0;
+
+ case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+ case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+ return sws->have_gl43 ? SVGA_MAX_ATOMIC_BUFFERS : 0;
+
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
case PIPE_SHADER_CAP_INT64_ATOMICS:
- case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
- case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -716,6 +781,45 @@ svga_get_shader_param(struct pipe_screen *screen, enum pipe_shader_type shader,
}
+static int
+svga_sm5_get_compute_param(struct pipe_screen *screen,
+ enum pipe_shader_ir ir_type,
+ enum pipe_compute_cap param,
+ void *ret)
+{
+ ASSERTED struct svga_screen *svgascreen = svga_screen(screen);
+ ASSERTED struct svga_winsys_screen *sws = svgascreen->sws;
+ uint64_t *iret = (uint64_t *)ret;
+
+ assert(sws->have_gl43);
+ assert(ir_type == PIPE_SHADER_IR_TGSI);
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ iret[0] = 65535;
+ iret[1] = 65535;
+ iret[2] = 65535;
+ return 3 * sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ iret[0] = 1024;
+ iret[1] = 1024;
+ iret[2] = 64;
+ return 3 * sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ *iret = 1024;
+ return sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ *iret = 32768;
+ return sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+ *iret = 0;
+ return sizeof(uint64_t);
+ default:
+ debug_printf("Unexpected compute param %u\n", param);
+ }
+ return 0;
+}
+
static void
svga_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
@@ -948,6 +1052,9 @@ svga_screen_create(struct svga_winsys_screen *sws)
screen->fence_get_fd = svga_fence_get_fd;
screen->get_driver_query_info = svga_get_driver_query_info;
+
+ screen->get_compute_param = svga_sm5_get_compute_param;
+
svgascreen->sws = sws;
svga_init_screen_resource_functions(svgascreen);
@@ -965,7 +1072,29 @@ svga_screen_create(struct svga_winsys_screen *sws)
goto error2;
}
+ if (sws->have_gl43) {
+ svgascreen->forcedSampleCount =
+ get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FORCED_SAMPLE_COUNT, 0);
+
+ sws->have_gl43 = sws->have_gl43 && (svgascreen->forcedSampleCount >= 4);
+
+ /* Allow a temporary environment variable to enable/disable GL43 support.
+ */
+ sws->have_gl43 =
+ debug_get_bool_option("SVGA_GL43", sws->have_gl43);
+
+ svgascreen->debug.sampler_state_mapping =
+ debug_get_bool_option("SVGA_SAMPLER_STATE_MAPPING", FALSE);
+ }
+ else {
+ /* sampler state mapping code is only enabled with GL43
+ * due to the limitation in SW Renderer. (VMware bug 2825014)
+ */
+ svgascreen->debug.sampler_state_mapping = FALSE;
+ }
+
debug_printf("%s enabled\n",
+ sws->have_gl43 ? "SM5+" :
sws->have_sm5 ? "SM5" :
sws->have_sm4_1 ? "SM4_1" :
sws->have_vgpu10 ? "VGPU10" : "VGPU9");
@@ -1041,10 +1170,15 @@ svga_screen_create(struct svga_winsys_screen *sws)
}
/* Maximum number of constant buffers */
- svgascreen->max_const_buffers =
- get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1);
- svgascreen->max_const_buffers = MIN2(svgascreen->max_const_buffers,
- SVGA_MAX_CONST_BUFS);
+ if (sws->have_gl43) {
+ svgascreen->max_const_buffers = SVGA_MAX_CONST_BUFS;
+ }
+ else {
+ svgascreen->max_const_buffers =
+ get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1);
+ svgascreen->max_const_buffers = MIN2(svgascreen->max_const_buffers,
+ SVGA_MAX_CONST_BUFS);
+ }
svgascreen->haveBlendLogicops =
get_bool_cap(sws, SVGA3D_DEVCAP_LOGIC_BLENDOPS, FALSE);
@@ -1052,6 +1186,18 @@ svga_screen_create(struct svga_winsys_screen *sws)
screen->is_format_supported = svga_is_dx_format_supported;
svgascreen->max_viewports = SVGA3D_DX_MAX_VIEWPORTS;
+
+ /* Shader limits */
+ if (sws->have_sm4_1) {
+ svgascreen->max_vs_inputs = VGPU10_1_MAX_VS_INPUTS;
+ svgascreen->max_vs_outputs = VGPU10_1_MAX_VS_OUTPUTS;
+ svgascreen->max_gs_inputs = VGPU10_1_MAX_GS_INPUTS;
+ }
+ else {
+ svgascreen->max_vs_inputs = VGPU10_MAX_VS_INPUTS;
+ svgascreen->max_vs_outputs = VGPU10_MAX_VS_OUTPUTS;
+ svgascreen->max_gs_inputs = VGPU10_MAX_GS_INPUTS;
+ }
}
else {
/* VGPU9 */
@@ -1089,6 +1235,11 @@ svga_screen_create(struct svga_winsys_screen *sws)
/* Only one viewport */
svgascreen->max_viewports = 1;
+
+ /* Shader limits */
+ svgascreen->max_vs_inputs = 16;
+ svgascreen->max_vs_outputs = 10;
+ svgascreen->max_gs_inputs = 0;
}
/* common VGPU9 / VGPU10 caps */
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen.h b/lib/mesa/src/gallium/drivers/svga/svga_screen.h
index aa0001b11..1d2db59a3 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_screen.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_screen.h
@@ -58,14 +58,21 @@ struct svga_screen
unsigned max_const_buffers;
unsigned max_viewports;
unsigned ms_samples;
+ unsigned forcedSampleCount; /* available with GL43 capable device only */
+ unsigned max_vs_inputs;
+ unsigned max_vs_outputs;
+ unsigned max_gs_inputs;
struct {
- boolean force_level_surface_view;
- boolean force_surface_view;
- boolean no_surface_view;
- boolean force_sampler_view;
- boolean no_sampler_view;
- boolean no_cache_index_buffers;
+ unsigned force_level_surface_view:1;
+ unsigned force_surface_view:1;
+ unsigned no_surface_view:1;
+ unsigned force_sampler_view:1;
+ unsigned no_sampler_view:1;
+ unsigned no_cache_index_buffers:1;
+ unsigned tessellation:1;
+ unsigned sampler_state_mapping:1;
+ unsigned pad:24;
} debug;
unsigned texture_timestamp;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c
index aba6e304f..7765a15aa 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c
@@ -212,6 +212,7 @@ svga_screen_cache_shrink(struct svga_screen *svgascreen,
static void
svga_screen_cache_add(struct svga_screen *svgascreen,
const struct svga_host_surface_cache_key *key,
+ boolean to_invalidate,
struct svga_winsys_surface **p_handle)
{
struct svga_host_surface_cache *cache = &svgascreen->cache;
@@ -293,8 +294,12 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
"cache sid %p\n", entry->handle);
/* If we don't have gb objects, we don't need to invalidate. */
- if (sws->have_gb_objects)
- list_add(&entry->head, &cache->validated);
+ if (sws->have_gb_objects) {
+ if (to_invalidate)
+ list_add(&entry->head, &cache->validated);
+ else
+ list_add(&entry->head, &cache->invalidated);
+ }
else
list_add(&entry->head, &cache->invalidated);
@@ -603,6 +608,7 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
void
svga_screen_surface_destroy(struct svga_screen *svgascreen,
const struct svga_host_surface_cache_key *key,
+ boolean to_invalidate,
struct svga_winsys_surface **p_handle)
{
struct svga_winsys_screen *sws = svgascreen->sws;
@@ -612,7 +618,7 @@ svga_screen_surface_destroy(struct svga_screen *svgascreen,
* that case.
*/
if (SVGA_SURFACE_CACHE_ENABLED && key->cachable) {
- svga_screen_cache_add(svgascreen, key, p_handle);
+ svga_screen_cache_add(svgascreen, key, to_invalidate, p_handle);
}
else {
SVGA_DBG(DEBUG_DMA,
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h
index c2bfc076f..5793869f3 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h
@@ -148,12 +148,13 @@ svga_screen_cache_init(struct svga_screen *svgascreen);
struct svga_winsys_surface *
svga_screen_surface_create(struct svga_screen *svgascreen,
unsigned bind_flags, enum pipe_resource_usage usage,
- boolean *validated,
+ boolean *invalidated,
struct svga_host_surface_cache_key *key);
void
svga_screen_surface_destroy(struct svga_screen *svgascreen,
const struct svga_host_surface_cache_key *key,
+ boolean to_invalidate,
struct svga_winsys_surface **handle);
void
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_shader.c b/lib/mesa/src/gallium/drivers/svga/svga_shader.c
index 3c48d6724..68883a713 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_shader.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_shader.c
@@ -223,6 +223,16 @@ static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
PIPE_SWIZZLE_NONE
};
+static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = {
+ PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_0,
+ PIPE_SWIZZLE_1,
+ PIPE_SWIZZLE_NONE
+};
+
static VGPU10_RESOURCE_RETURN_TYPE
vgpu10_return_type(enum pipe_format format)
@@ -243,6 +253,17 @@ vgpu10_return_type(enum pipe_format format)
/**
+ * A helper function to return TRUE if the specified format
+ * is a supported format for sample_c instruction.
+ */
+static bool
+isValidSampleCFormat(enum pipe_format format)
+{
+ return util_format_is_depth_or_stencil(format);
+}
+
+
+/**
* Initialize the shader-neutral fields of svga_compile_key from context
* state. This is basically the texture-related state.
*/
@@ -253,15 +274,28 @@ svga_init_shader_key_common(const struct svga_context *svga,
struct svga_compile_key *key)
{
unsigned i, idx = 0;
+ unsigned sampler_slots = 0;
assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
/* In case the number of samplers and sampler_views doesn't match,
- * loop over the lower of the two counts.
+ * loop over the upper of the two counts.
*/
key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
svga->curr.num_samplers[shader_type]);
+ key->num_samplers = 0;
+
+ /* Set sampler_state_mapping only if GL43 is supported and
+ * the number of samplers exceeds SVGA limit or the sampler state
+ * mapping env is set.
+ */
+ boolean sampler_state_mapping =
+ svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]);
+
+ key->sampler_state_mapping =
+ key->num_textures && sampler_state_mapping ? 1 : 0;
+
for (i = 0; i < key->num_textures; i++) {
struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
const struct svga_sampler_state
@@ -269,22 +303,21 @@ svga_init_shader_key_common(const struct svga_context *svga,
if (view) {
assert(view->texture);
- assert(view->texture->target < (1 << 4)); /* texture_target:4 */
enum pipe_texture_target target = view->target;
+ assert(target < (1 << 4)); /* texture_target:4 */
key->tex[i].target = target;
key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
key->tex[i].sampler_view = 1;
-
/* 1D/2D array textures with one slice and cube map array textures
* with one cube are treated as non-arrays by the SVGA3D device.
* Set the is_array flag only if we know that we have more than 1
* element. This will be used to select shader instruction/resource
* types during shader translation.
*/
- switch (view->texture->target) {
+ switch (target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
key->tex[i].is_array = view->texture->array_size > 1;
@@ -300,10 +333,12 @@ svga_init_shader_key_common(const struct svga_context *svga,
key->tex[i].num_samples = view->texture->nr_samples;
const enum pipe_swizzle *swizzle_tab;
- if (view->texture->target == PIPE_BUFFER) {
+ if (target == PIPE_BUFFER) {
SVGA3dSurfaceFormat svga_format;
unsigned tf_flags;
+ assert(view->texture->target == PIPE_BUFFER);
+
/* Apply any special swizzle mask for the view format if needed */
svga_translate_texture_buffer_view_format(view->format,
@@ -334,11 +369,24 @@ svga_init_shader_key_common(const struct svga_context *svga,
view->texture->format == PIPE_FORMAT_DXT1_SRGB)
swizzle_tab = set_alpha;
+ if (view->format == PIPE_FORMAT_X24S8_UINT ||
+ view->format == PIPE_FORMAT_X32_S8X24_UINT)
+ swizzle_tab = set_YYYY;
+
/* Save the compare function as we need to handle
* depth compare in the shader.
*/
key->tex[i].compare_mode = sampler->compare_mode;
key->tex[i].compare_func = sampler->compare_func;
+
+ /* Set the compare_in_shader bit if the view format
+ * is not a supported format for shadow compare.
+ * In this case, we'll do the comparison in the shader.
+ */
+ if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) &&
+ !isValidSampleCFormat(view->format)) {
+ key->tex[i].compare_in_shader = TRUE;
+ }
}
key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
@@ -364,6 +412,139 @@ svga_init_shader_key_common(const struct svga_context *svga,
key->tex[i].texel_bias = TRUE;
}
}
+
+ if (!sampler_state_mapping) {
+ /* Use the same index if sampler state mapping is not supported */
+ key->tex[i].sampler_index = i;
+ key->num_samplers = i + 1;
+ }
+ else {
+
+ /* The current samplers list can have redundant entries.
+ * In order to allow the number of bound samplers within the
+ * max limit supported by SVGA, we'll recreate the list with
+ * unique sampler state objects only.
+ */
+
+ /* Check to see if this sampler is already on the list.
+ * If so, set the sampler index of this sampler to the
+ * same sampler index.
+ */
+ for (unsigned j = 0; j <= i; j++) {
+ if (svga->curr.sampler[shader_type][j] == sampler) {
+
+ if (!(sampler_slots & (1 << j))) {
+
+ /* if this sampler is not added to the new list yet,
+ * set its sampler index to the next sampler index,
+ * increment the sampler count, and mark this
+ * sampler as added to the list.
+ */
+
+ unsigned next_index =
+ MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1);
+
+ key->tex[i].sampler_index = next_index;
+ key->num_samplers = next_index + 1;
+
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /* reserve one slot for the alternate sampler */
+ key->num_samplers++;
+ }
+
+ sampler_slots |= (1 << j);
+ }
+ else {
+ key->tex[i].sampler_index = key->tex[j].sampler_index;
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (svga_have_gl43(svga)) {
+ if (shader->info.images_declared || shader->info.hw_atomic_declared ||
+ shader->info.shader_buffers_declared) {
+
+ /* Save the uavSpliceIndex which is the index used for the first uav
+ * in the draw pipeline. For compute, uavSpliceIndex is always 0.
+ */
+ if (shader_type != PIPE_SHADER_COMPUTE)
+ key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex;
+
+ unsigned uav_splice_index = key->uav_splice_index;
+
+ /* Also get the texture data type to be used in the uav declaration */
+ const struct svga_image_view *cur_image_view =
+ &svga->curr.image_views[shader_type][0];
+
+ for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]);
+ i++, cur_image_view++) {
+
+ struct pipe_resource *resource = cur_image_view->desc.resource;
+
+ if (resource) {
+ key->images[i].return_type =
+ svga_get_texture_datatype(cur_image_view->desc.format);
+
+ key->images[i].is_array = resource->array_size > 1;
+
+ /* Save the image resource target in the shader key because
+ * for single layer image view, the resource target in the
+ * tgsi shader is changed to a different texture target.
+ */
+ key->images[i].resource_target = resource->target;
+ if (resource->target == PIPE_TEXTURE_3D ||
+ resource->target == PIPE_TEXTURE_1D_ARRAY ||
+ resource->target == PIPE_TEXTURE_2D_ARRAY ||
+ resource->target == PIPE_TEXTURE_CUBE ||
+ resource->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ key->images[i].is_single_layer =
+ cur_image_view->desc.u.tex.first_layer ==
+ cur_image_view->desc.u.tex.last_layer;
+ }
+
+ key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index;
+ }
+ else
+ key->images[i].uav_index = SVGA3D_INVALID_ID;
+ }
+
+ const struct svga_shader_buffer *cur_sbuf =
+ &svga->curr.shader_buffers[shader_type][0];
+
+ for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]);
+ i++, cur_sbuf++) {
+
+ if (cur_sbuf->resource)
+ key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index;
+ else
+ key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID;
+ }
+
+ const struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0];
+
+ for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers);
+ i++, cur_buf++) {
+
+ if (cur_buf->resource)
+ key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index;
+ else
+ key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID;
+ }
+ }
+
+ /* Save info about which constant buffers are to be viewed
+ * as raw buffers in the shader key.
+ */
+ if (shader->info.const_buffers_declared &
+ svga->state.raw_constbufs[shader_type]) {
+ key->raw_buffers = svga->state.raw_constbufs[shader_type];
+
+ /* beginning index for srv for raw buffers */
+ key->srv_raw_buf_index = PIPE_MAX_SAMPLERS;
}
}
@@ -605,6 +786,9 @@ svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
case PIPE_SHADER_TESS_CTRL:
variant = CALLOC(1, sizeof(struct svga_tcs_variant));
break;
+ case PIPE_SHADER_COMPUTE:
+ variant = CALLOC(1, sizeof(struct svga_cs_variant));
+ break;
default:
return NULL;
}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_shader.h b/lib/mesa/src/gallium/drivers/svga/svga_shader.h
index 472499c91..cd64dc61f 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_shader.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_shader.h
@@ -115,20 +115,24 @@ struct svga_compile_key
/* compute shader */
struct {
unsigned grid_size[3];
+ unsigned mem_size;
} cs;
/* any shader type */
int8_t generic_remap_table[MAX_GENERIC_VARYING];
unsigned num_textures:8;
+ unsigned num_samplers:8;
unsigned num_unnormalized_coords:8;
unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
unsigned last_vertex_stage:1;
unsigned clamp_vertex_color:1;
+ unsigned sampler_state_mapping:1; /* Set if use sampler state mapping */
unsigned sprite_origin_lower_left:1;
uint16_t sprite_coord_enable;
struct {
unsigned compare_mode:1;
unsigned compare_func:3;
+ unsigned compare_in_shader:1;
unsigned unnormalized:1;
unsigned texel_bias:1;
unsigned width_height_idx:5; /**< texture unit */
@@ -141,10 +145,25 @@ struct svga_compile_key
unsigned target:4;
unsigned sampler_return_type:4;
unsigned sampler_view:1;
+ unsigned sampler_index:5;
} tex[PIPE_MAX_SAMPLERS];
- /* Note: svga_compile_keys_equal() depends on the variable-size
- * tex[] array being at the end of this structure.
- */
+
+ unsigned uav_splice_index:4; /* starting uav index */
+ unsigned srv_raw_buf_index:8; /* start index for srv raw buffers */
+ unsigned image_size_used:1;
+
+ uint16_t raw_buffers; /* bitmask of raw buffers */
+
+ struct {
+ enum tgsi_return_type return_type;
+ enum pipe_texture_target resource_target;
+ unsigned is_array:1;
+ unsigned is_single_layer:1;
+ unsigned uav_index;
+ } images[PIPE_MAX_SHADER_IMAGES];
+
+ uint32_t shader_buf_uav_index[PIPE_MAX_SHADER_BUFFERS];
+ uint32_t atomic_buf_uav_index[PIPE_MAX_HW_ATOMIC_BUFFERS];
};
/* A key for a variant of token string of a shader */
@@ -222,7 +241,8 @@ struct svga_fs_variant
unsigned fs_shadow_compare_units;
/** For FS-based polygon stipple */
- unsigned pstipple_sampler_unit;
+ unsigned pstipple_sampler_unit:8;
+ unsigned pstipple_sampler_state_index:8;
};
@@ -360,6 +380,7 @@ struct svga_tes_shader
struct svga_compute_shader
{
struct svga_shader base;
+ unsigned shared_mem_size;
};
@@ -367,8 +388,7 @@ static inline boolean
svga_compile_keys_equal(const struct svga_compile_key *a,
const struct svga_compile_key *b)
{
- unsigned key_size =
- (const char *) &a->tex[a->num_textures] - (const char *) a;
+ unsigned key_size = sizeof(*a);
return memcmp(a, b, key_size) == 0;
}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state.c b/lib/mesa/src/gallium/drivers/svga/svga_state.c
index 4f6af8052..698f53802 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state.c
@@ -138,6 +138,39 @@ static const struct svga_tracked_state *hw_draw_state_sm5[] =
};
+/**
+ * Atoms to update hardware state prior to emitting a draw packet
+ * for GL43 device which includes uav update.
+ */
+static const struct svga_tracked_state *hw_draw_state_gl43[] =
+{
+ &svga_need_tgsi_transform,
+ &svga_hw_uav,
+ &svga_need_rawbuf_srv,
+ &svga_hw_fs,
+ &svga_hw_gs,
+ &svga_hw_tes,
+ &svga_hw_tcs,
+ &svga_hw_vs,
+ &svga_hw_rss,
+ &svga_hw_sampler,
+ &svga_hw_sampler_bindings,
+ &svga_hw_clip_planes,
+ &svga_hw_vdecl,
+ &svga_hw_fs_constants,
+ &svga_hw_fs_constbufs,
+ &svga_hw_gs_constants,
+ &svga_hw_gs_constbufs,
+ &svga_hw_tes_constants,
+ &svga_hw_tes_constbufs,
+ &svga_hw_tcs_constants,
+ &svga_hw_tcs_constbufs,
+ &svga_hw_vs_constants,
+ &svga_hw_vs_constbufs,
+ NULL
+};
+
+
static const struct svga_tracked_state *swtnl_draw_state[] =
{
&svga_update_swtnl_draw,
@@ -309,7 +342,6 @@ svga_update_state_retry(struct svga_context *svga, unsigned max_level)
}
-
#define EMIT_RS(_rs, _count, _name, _value) \
do { \
_rs[_count].state = _name; \
@@ -383,7 +415,45 @@ svga_init_tracked_state(struct svga_context *svga)
{
/* Set the hw_draw_state atom list to the one for the particular gpu version.
*/
- state_levels[2] = svga_have_sm5(svga) ? hw_draw_state_sm5 :
- (svga_have_vgpu10(svga) ? hw_draw_state_vgpu10 :
- hw_draw_state_vgpu9);
+ state_levels[2] =
+ svga_have_gl43(svga) ? hw_draw_state_gl43 :
+ (svga_have_sm5(svga) ? hw_draw_state_sm5 :
+ ((svga_have_vgpu10(svga) ? hw_draw_state_vgpu10 :
+ hw_draw_state_vgpu9)));
+}
+
+
+static const struct svga_tracked_state *compute_state[] =
+{
+ &svga_hw_cs_uav,
+ &svga_hw_cs_sampler,
+ &svga_hw_cs_sampler_bindings,
+ &svga_hw_cs,
+ &svga_hw_cs_constants,
+ &svga_hw_cs_constbufs,
+ NULL
+};
+
+/**
+ * Update compute state.
+ * If the first attempt fails, flush the command buffer and retry.
+ * \return true if success, false if second attempt fails.
+ */
+bool
+svga_update_compute_state(struct svga_context *svga)
+{
+ enum pipe_error ret = PIPE_OK;
+ uint64_t compute_dirty = svga->dirty;
+
+ if (compute_dirty) {
+ SVGA_RETRY_OOM(svga, ret, update_state(svga, compute_state,
+ &compute_dirty));
+
+ /* Set the dirty flag to the remaining dirty bits which are
+ * not processed in the compute pipeline.
+ */
+ svga->dirty = compute_dirty;
+ }
+
+ return ret == PIPE_OK;
}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c b/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c
index be1637d7a..1c5c20a58 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c
@@ -25,6 +25,7 @@
**********************************************************/
#include "util/format/u_format.h"
+#include "util/u_bitmask.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "pipe/p_defines.h"
@@ -42,6 +43,44 @@
#include "svga_hw_reg.h"
+static unsigned
+svga_get_image_size_constant(const struct svga_context *svga, float **dest,
+ enum pipe_shader_type shader,
+ unsigned num_image_views,
+ const struct svga_image_view images[PIPE_SHADER_TYPES][SVGA3D_MAX_UAVIEWS])
+{
+ uint32_t *dest_u = (uint32_t *) *dest;
+
+ for (int i = 0; i < num_image_views; i++) {
+ if (images[shader][i].desc.resource) {
+ if (images[shader][i].desc.resource->target == PIPE_BUFFER) {
+ unsigned bytes_per_element = util_format_get_blocksize(images[shader][i].desc.format);
+ *dest_u++ = images[shader][i].desc.resource->width0 / bytes_per_element;
+ }
+ else
+ *dest_u++ = images[shader][i].desc.resource->width0;
+
+ if (images[shader][i].desc.resource->target == PIPE_TEXTURE_1D_ARRAY)
+ *dest_u++ = images[shader][i].desc.resource->array_size;
+ else
+ *dest_u++ = images[shader][i].desc.resource->height0;
+
+ if (images[shader][i].desc.resource->target == PIPE_TEXTURE_2D_ARRAY)
+ *dest_u++ = images[shader][i].desc.resource->array_size;
+ else if (images[shader][i].desc.resource->target == PIPE_TEXTURE_CUBE_ARRAY)
+ *dest_u++ = images[shader][i].desc.resource->array_size / 6;
+ else
+ *dest_u++ = images[shader][i].desc.resource->depth0;
+ *dest_u++ = 1; // Later this can be used for sample counts
+ }
+ else {
+ *dest_u += 4;
+ }
+ }
+ return num_image_views;
+}
+
+
/*
* Don't try to send more than 4kb of successive constants.
*/
@@ -104,6 +143,14 @@ svga_get_extra_constants_common(const struct svga_context *svga,
}
}
+ /* image_size */
+ if (variant->key.image_size_used) {
+ count += svga_get_image_size_constant(svga, &dest, shader,
+ svga->state.hw_draw.num_image_views[shader],
+ svga->state.hw_draw.image_views);
+ }
+
+
return count;
}
@@ -572,6 +619,121 @@ emit_consts_vgpu9(struct svga_context *svga, enum pipe_shader_type shader)
/**
+ * A helper function to destroy any pending unused srv.
+ */
+void
+svga_destroy_rawbuf_srv(struct svga_context *svga)
+{
+ unsigned index = 0;
+
+ while ((index = util_bitmask_get_next_index(
+ svga->sampler_view_to_free_id_bm, index))
+ != UTIL_BITMASK_INVALID_INDEX) {
+
+ SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc,
+ index));
+ util_bitmask_clear(svga->sampler_view_id_bm, index);
+ util_bitmask_clear(svga->sampler_view_to_free_id_bm, index);
+ }
+}
+
+/**
+ * A helper function to emit constant buffer as srv raw buffer.
+ */
+static enum pipe_error
+emit_rawbuf(struct svga_context *svga,
+ unsigned slot,
+ enum pipe_shader_type shader,
+ unsigned buffer_offset,
+ unsigned buffer_size,
+ void *buffer)
+{
+ enum pipe_error ret = PIPE_OK;
+ struct svga_raw_buffer *rawbuf = &svga->state.hw_draw.rawbufs[shader][slot];
+ struct svga_winsys_surface *buf_handle = NULL;
+ unsigned srvid = SVGA3D_INVALID_ID;
+ unsigned enabled_rawbufs = svga->state.hw_draw.enabled_rawbufs[shader];
+
+ SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITRAWBUFFER);
+
+ if (buffer == NULL) {
+ if ((svga->state.hw_draw.enabled_rawbufs[shader] & (1 << slot)) == 0) {
+ goto done;
+ }
+ enabled_rawbufs &= ~(1 << slot);
+ }
+ else {
+ if ((rawbuf->buffer_offset != buffer_offset) ||
+ (rawbuf->buffer_size != buffer_size) ||
+ (rawbuf->buffer != buffer)) {
+
+ /* Add the current srvid to the delete list */
+ if (rawbuf->srvid != SVGA3D_INVALID_ID) {
+ util_bitmask_set(svga->sampler_view_to_free_id_bm, rawbuf->srvid);
+ rawbuf->srvid = SVGA3D_INVALID_ID;
+ }
+
+ buf_handle = svga_buffer_handle(svga, buffer,
+ PIPE_BIND_SAMPLER_VIEW);
+ if (!buf_handle) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto done;
+ }
+
+ /* Create a srv for the constant buffer */
+ srvid = util_bitmask_add(svga->sampler_view_id_bm);
+
+ SVGA3dShaderResourceViewDesc viewDesc;
+ viewDesc.bufferex.firstElement = buffer_offset / 4;
+ viewDesc.bufferex.numElements = buffer_size / 4;
+ viewDesc.bufferex.flags = SVGA3D_BUFFEREX_SRV_RAW;
+
+ ret = SVGA3D_vgpu10_DefineShaderResourceView(svga->swc,
+ srvid, buf_handle, SVGA3D_R32_TYPELESS,
+ SVGA3D_RESOURCE_BUFFEREX, &viewDesc);
+
+ if (ret != PIPE_OK) {
+ util_bitmask_clear(svga->sampler_view_id_bm, srvid);
+ goto done;
+ }
+
+ /* Save the current raw buffer attributes in the slot */
+ rawbuf->srvid = srvid;
+ rawbuf->buffer_size = buffer_size;
+ rawbuf->buffer = buffer;
+ rawbuf->handle = buf_handle;
+
+ SVGA_STATS_COUNT_INC(svga_sws(svga), SVGA_STATS_COUNT_RAWBUFFERSRVIEW);
+ }
+ else {
+ /* Same buffer attributes in the slot. Can use the same SRV. */
+ assert(rawbuf->srvid != SVGA3D_INVALID_ID);
+ srvid = rawbuf->srvid;
+ buf_handle = rawbuf->handle;
+ }
+ enabled_rawbufs |= (1 << slot);
+ }
+
+ ret = SVGA3D_vgpu10_SetShaderResources(svga->swc,
+ svga_shader_type(shader),
+ slot + PIPE_MAX_SAMPLERS,
+ 1,
+ &srvid,
+ &buf_handle);
+ if (ret != PIPE_OK) {
+ goto done;
+ }
+
+ /* Save the enabled rawbuf state */
+ svga->state.hw_draw.enabled_rawbufs[shader] = enabled_rawbufs;
+
+done:
+ SVGA_STATS_TIME_POP(svga_sws(svga));
+ return ret;
+}
+
+
+/**
* A helper function to emit a constant buffer binding at the
* specified slot for the specified shader type
*/
@@ -678,6 +840,9 @@ emit_constbuf(struct svga_context *svga,
assert(new_buf_size % 16 == 0);
+ /* clamp the buf size before sending the command */
+ new_buf_size = MIN2(new_buf_size, SVGA3D_DX_MAX_CONSTBUF_BINDING_SIZE);
+
const struct svga_screen *screen = svga_screen(svga->pipe.screen);
const struct svga_winsys_screen *sws = screen->sws;
@@ -850,11 +1015,42 @@ emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader)
assert(size % 16 == 0);
- ret = emit_constbuf(svga, index, shader, offset, size, buffer,
- 0, 0, NULL);
- if (ret != PIPE_OK)
- return ret;
+ /**
+ * If the buffer has been bound as an uav buffer, it will
+ * need to be bound as a shader resource raw buffer.
+ */
+ if (svga->state.raw_constbufs[shader] & (1 << index)) {
+ ret = emit_rawbuf(svga, index, shader, offset, size, buffer);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+
+ ret = emit_constbuf(svga, index, shader, 0, 0, NULL,
+ 0, 0, NULL);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ /* Remove the rawbuf from the to-be-enabled constbuf list
+ * so the buffer will not be referenced again as constant buffer
+ * at resource validation time.
+ */
+ enabled_constbufs &= ~(1 << index);
+ }
+ else {
+ if (svga->state.hw_draw.enabled_rawbufs[shader] & (1 << index)) {
+ ret = emit_rawbuf(svga, index, shader, offset, size, NULL);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+
+ ret = emit_constbuf(svga, index, shader, offset, size, buffer,
+ 0, 0, NULL);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
svga->hud.num_const_buf_updates++;
}
@@ -909,7 +1105,8 @@ emit_fs_constbuf(struct svga_context *svga, uint64_t dirty)
struct svga_tracked_state svga_hw_fs_constants =
{
"hw fs params",
- (SVGA_NEW_FS_CONSTS |
+ (SVGA_NEW_IMAGE_VIEW |
+ SVGA_NEW_FS_CONSTS |
SVGA_NEW_FS_VARIANT |
SVGA_NEW_TEXTURE_CONSTS),
emit_fs_consts
@@ -972,6 +1169,7 @@ struct svga_tracked_state svga_hw_vs_constants =
{
"hw vs params",
(SVGA_NEW_PRESCALE |
+ SVGA_NEW_IMAGE_VIEW |
SVGA_NEW_VS_CONSTS |
SVGA_NEW_VS_VARIANT |
SVGA_NEW_TEXTURE_CONSTS),
@@ -1040,6 +1238,7 @@ struct svga_tracked_state svga_hw_gs_constants =
{
"hw gs params",
(SVGA_NEW_PRESCALE |
+ SVGA_NEW_IMAGE_VIEW |
SVGA_NEW_GS_CONSTS |
SVGA_NEW_RAST |
SVGA_NEW_GS_VARIANT |
@@ -1102,7 +1301,8 @@ emit_tcs_constbuf(struct svga_context *svga, uint64_t dirty)
struct svga_tracked_state svga_hw_tcs_constants =
{
"hw tcs params",
- (SVGA_NEW_TCS_CONSTS |
+ (SVGA_NEW_IMAGE_VIEW |
+ SVGA_NEW_TCS_CONSTS |
SVGA_NEW_TCS_VARIANT),
emit_tcs_consts
};
@@ -1161,6 +1361,7 @@ struct svga_tracked_state svga_hw_tes_constants =
{
"hw tes params",
(SVGA_NEW_PRESCALE |
+ SVGA_NEW_IMAGE_VIEW |
SVGA_NEW_TES_CONSTS |
SVGA_NEW_TES_VARIANT),
emit_tes_consts
@@ -1173,3 +1374,169 @@ struct svga_tracked_state svga_hw_tes_constbufs =
SVGA_NEW_TES_CONST_BUFFER,
emit_tes_constbuf
};
+
+
+/**
+ * Emit constant buffer for compute shader
+ */
+static enum pipe_error
+emit_cs_consts(struct svga_context *svga, uint64_t dirty)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.cs;
+ enum pipe_error ret = PIPE_OK;
+
+ assert(svga_have_sm5(svga));
+
+ /* SVGA_NEW_CS_VARIANT */
+ if (!variant)
+ return PIPE_OK;
+
+ /* SVGA_NEW_CS_CONST_BUFFER */
+ ret = emit_consts_vgpu10(svga, PIPE_SHADER_COMPUTE);
+
+ return ret;
+}
+
+
+static enum pipe_error
+emit_cs_constbuf(struct svga_context *svga, uint64_t dirty)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.cs;
+ enum pipe_error ret = PIPE_OK;
+
+ /* SVGA_NEW_CS_VARIANT
+ */
+ if (!variant)
+ return PIPE_OK;
+
+ /* SVGA_NEW_CS_CONSTBUF
+ */
+ assert(svga_have_vgpu10(svga));
+ ret = emit_constbuf_vgpu10(svga, PIPE_SHADER_COMPUTE);
+
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_cs_constants =
+{
+ "hw cs params",
+ (SVGA_NEW_IMAGE_VIEW |
+ SVGA_NEW_CS_CONSTS |
+ SVGA_NEW_CS_VARIANT |
+ SVGA_NEW_TEXTURE_CONSTS),
+ emit_cs_consts
+};
+
+
+struct svga_tracked_state svga_hw_cs_constbufs =
+{
+ "hw cs params",
+ SVGA_NEW_CS_CONST_BUFFER,
+ emit_cs_constbuf
+};
+
+
+/**
+ * A helper function to update the rawbuf for constbuf mask
+ */
+static void
+update_rawbuf_mask(struct svga_context *svga, enum pipe_shader_type shader)
+{
+ unsigned dirty_constbufs;
+ unsigned enabled_constbufs;
+
+ enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u;
+ dirty_constbufs = (svga->state.dirty_constbufs[shader]|enabled_constbufs) & ~1u;
+
+ while (dirty_constbufs) {
+ unsigned index = u_bit_scan(&dirty_constbufs);
+ struct svga_buffer *sbuf =
+ svga_buffer(svga->curr.constbufs[shader][index].buffer);
+
+ if (sbuf && sbuf->uav) {
+ svga->state.raw_constbufs[shader] |= (1 << index);
+ } else {
+ svga->state.raw_constbufs[shader] &= ~(1 << index);
+ }
+ }
+}
+
+
+/**
+ * update_rawbuf is called at hw state update time to determine
+ * if any of the bound constant buffers need to be bound as
+ * raw buffer srv. This function is called after uav state is
+ * updated and before shader variants are bound.
+ */
+static enum pipe_error
+update_rawbuf(struct svga_context *svga, uint64 dirty)
+{
+ uint64_t rawbuf_dirtybit[] = {
+ SVGA_NEW_VS_RAW_BUFFER, /* PIPE_SHADER_VERTEX */
+ SVGA_NEW_FS_RAW_BUFFER, /* PIPE_SHADER_FRAGMENT */
+ SVGA_NEW_GS_RAW_BUFFER, /* PIPE_SHADER_GEOMETRY */
+ SVGA_NEW_TCS_RAW_BUFFER, /* PIPE_SHADER_TESS_CTRL */
+ SVGA_NEW_TES_RAW_BUFFER, /* PIPE_SHADER_TESS_EVAL */
+ };
+
+ for (enum pipe_shader_type shader = PIPE_SHADER_VERTEX;
+ shader <= PIPE_SHADER_TESS_EVAL; shader++) {
+ unsigned rawbuf_mask = svga->state.raw_constbufs[shader];
+
+ update_rawbuf_mask(svga, shader);
+
+ /* If the rawbuf state is different for the shader stage,
+ * send SVGA_NEW_XX_RAW_BUFFER to trigger a new shader
+ * variant that will use srv for ubo access.
+ */
+ if (svga->state.raw_constbufs[shader] != rawbuf_mask)
+ svga->dirty |= rawbuf_dirtybit[shader];
+ }
+
+ return PIPE_OK;
+}
+
+
+struct svga_tracked_state svga_need_rawbuf_srv =
+{
+ "raw buffer srv",
+ (SVGA_NEW_IMAGE_VIEW |
+ SVGA_NEW_SHADER_BUFFER |
+ SVGA_NEW_CONST_BUFFER),
+ update_rawbuf
+};
+
+
+/**
+ * update_cs_rawbuf is called at compute dispatch time to determine
+ * if any of the bound constant buffers need to be bound as
+ * raw buffer srv. This function is called after uav state is
+ * updated and before a compute shader variant is bound.
+ */
+static enum pipe_error
+update_cs_rawbuf(struct svga_context *svga, uint64 dirty)
+{
+ unsigned rawbuf_mask = svga->state.raw_constbufs[PIPE_SHADER_COMPUTE];
+
+ update_rawbuf_mask(svga, PIPE_SHADER_COMPUTE);
+
+ /* if the rawbuf state is different for the shader stage,
+ * send SVGA_NEW_RAW_BUFFER to trigger a new shader
+ * variant to use srv for ubo access.
+ */
+ if (svga->state.raw_constbufs[PIPE_SHADER_COMPUTE] != rawbuf_mask)
+ svga->dirty |= SVGA_NEW_CS_RAW_BUFFER;
+
+ return PIPE_OK;
+}
+
+
+struct svga_tracked_state svga_cs_need_rawbuf_srv =
+{
+ "raw buffer srv",
+ (SVGA_NEW_IMAGE_VIEW |
+ SVGA_NEW_SHADER_BUFFER |
+ SVGA_NEW_CONST_BUFFER),
+ update_cs_rawbuf
+};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c b/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c
index dacf86c42..9c6997550 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -87,8 +87,7 @@ emit_fb_vgpu9(struct svga_context *svga)
/* Set the rendered-to flag */
struct pipe_surface *s = curr->cbufs[i];
if (s) {
- svga_set_texture_rendered_to(svga_texture(s->texture),
- s->u.tex.first_layer, s->u.tex.level);
+ svga_set_texture_rendered_to(svga_texture(s->texture));
}
}
@@ -119,8 +118,7 @@ emit_fb_vgpu9(struct svga_context *svga)
/* Set the rendered-to flag */
struct pipe_surface *s = curr->zsbuf;
if (s) {
- svga_set_texture_rendered_to(svga_texture(s->texture),
- s->u.tex.first_layer, s->u.tex.level);
+ svga_set_texture_rendered_to(svga_texture(s->texture));
}
}
@@ -225,8 +223,7 @@ emit_fb_vgpu10(struct svga_context *svga)
last_rtv = i;
/* Set the rendered-to flag */
- svga_set_texture_rendered_to(svga_texture(s->texture),
- s->u.tex.first_layer, s->u.tex.level);
+ svga_set_texture_rendered_to(svga_texture(s->texture));
}
else {
rtv[i] = NULL;
@@ -247,8 +244,7 @@ emit_fb_vgpu10(struct svga_context *svga)
}
/* Set the rendered-to flag */
- svga_set_texture_rendered_to(svga_texture(s->texture),
- s->u.tex.first_layer, s->u.tex.level);
+ svga_set_texture_rendered_to(svga_texture(s->texture));
}
else {
dsv = NULL;
@@ -614,6 +610,10 @@ get_viewport_prescale(struct svga_context *svga,
prescale->translate[2] -= 0.5f;
}
+ /* Clamp depth range, making sure it's between 0 and 1 */
+ range_min = CLAMP(range_min, 0.0f, 1.0f);
+ range_max = CLAMP(range_max, 0.0f, 1.0f);
+
if (prescale->enabled) {
float H[2];
float J[2];
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c
index 5f3df6a2b..a8411186f 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c
@@ -369,6 +369,9 @@ make_fs_key(const struct svga_context *svga,
key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
}
+ if (svga_have_gl43(svga))
+ key->image_size_used = fs->base.info.opcode_count[TGSI_OPCODE_RESQ] ? 1 : 0;
+
return PIPE_OK;
}
@@ -498,7 +501,8 @@ struct svga_tracked_state svga_hw_fs =
SVGA_NEW_SAMPLER |
SVGA_NEW_FRAME_BUFFER |
SVGA_NEW_DEPTH_STENCIL_ALPHA |
- SVGA_NEW_BLEND),
+ SVGA_NEW_BLEND |
+ SVGA_NEW_FS_RAW_BUFFER),
emit_hw_fs
};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c
index 670b757c4..250a74258 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c
@@ -135,6 +135,9 @@ make_gs_key(struct svga_context *svga, struct svga_compile_key *key)
/* Mark this as the last shader in the vertex processing stage */
key->last_vertex_stage = 1;
+
+ if (svga_have_gl43(svga))
+ key->image_size_used = gs->base.info.opcode_count[TGSI_OPCODE_RESQ] ? 1 : 0;
}
@@ -235,6 +238,7 @@ struct svga_tracked_state svga_hw_gs =
SVGA_NEW_TEXTURE_BINDING |
SVGA_NEW_SAMPLER |
SVGA_NEW_RAST |
- SVGA_NEW_NEED_SWTNL),
+ SVGA_NEW_NEED_SWTNL |
+ SVGA_NEW_GS_RAW_BUFFER),
emit_hw_gs
};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c b/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c
index 5a52c25a4..4fbab1743 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -136,7 +136,7 @@ update_need_pipeline(struct svga_context *svga, uint64_t dirty)
if (svga->state.sw.need_pipeline) {
assert(reason);
- pipe_debug_message(&svga->debug.callback, FALLBACK,
+ util_debug_message(&svga->debug.callback, FALLBACK,
"Using semi-fallback for %s", reason);
}
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c b/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c
index 8df0f2eca..8cdf60c34 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c
@@ -321,10 +321,9 @@ emit_rss_vgpu9(struct svga_context *svga, uint64_t dirty)
static struct svga_rasterizer_state *
get_no_cull_rasterizer_state(struct svga_context *svga)
{
- const struct svga_rasterizer_state *r = svga->curr.rast;
- unsigned int aa_point = r->templ.point_smooth;
+ struct svga_rasterizer_state *r = svga->curr.rast;
- if (!svga->rasterizer_no_cull[aa_point]) {
+ if (!r->no_cull_rasterizer) {
struct pipe_rasterizer_state rast;
memset(&rast, 0, sizeof(rast));
@@ -341,10 +340,10 @@ get_no_cull_rasterizer_state(struct svga_context *svga)
rast.bottom_edge_rule = r->templ.bottom_edge_rule;
rast.clip_halfz = r->templ.clip_halfz;
- svga->rasterizer_no_cull[aa_point] =
+ r->no_cull_rasterizer =
svga->pipe.create_rasterizer_state(&svga->pipe, &rast);
}
- return svga->rasterizer_no_cull[aa_point];
+ return r->no_cull_rasterizer;
}
@@ -362,6 +361,29 @@ get_no_depth_stencil_test_state(struct svga_context *svga)
}
+/**
+ * A helper function to create an alternate svga rasterizer state object to use
+ * forcedSampleCount to support multisampled framebuffer without attachments.
+ */
+static SVGA3dRasterizerStateId
+get_alt_rasterizer_state_id(struct svga_context *svga,
+ struct svga_rasterizer_state *rast,
+ unsigned samples)
+{
+ assert(samples <= SVGA_MAX_FRAMEBUFFER_DEFAULT_SAMPLES);
+ assert(samples >= 0);
+
+ if (samples <= 1)
+ return rast->id;
+
+ if (rast->altRastIds[samples] == SVGA3D_INVALID_ID) {
+ rast->altRastIds[samples] = svga_define_rasterizer_object(svga, rast, samples);
+ }
+
+ return rast->altRastIds[samples];
+}
+
+
static enum pipe_error
emit_rss_vgpu10(struct svga_context *svga, uint64_t dirty)
{
@@ -457,8 +479,9 @@ emit_rss_vgpu10(struct svga_context *svga, uint64_t dirty)
}
}
- if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST)) {
- const struct svga_rasterizer_state *rast;
+ if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST |
+ SVGA_NEW_FRAME_BUFFER)) {
+ struct svga_rasterizer_state *rast = svga->curr.rast;
if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
svga->curr.gs && svga->curr.gs->wide_point) {
@@ -468,16 +491,28 @@ emit_rss_vgpu10(struct svga_context *svga, uint64_t dirty)
*/
rast = get_no_cull_rasterizer_state(svga);
}
- else {
- rast = svga->curr.rast;
+
+ int rastId = rast->id;
+
+ /* In the case of no-attachment framebuffer, the sample count will be
+ * specified in forcedSampleCount in the RasterizerState_v2 object.
+ */
+ if ((svga->curr.framebuffer.nr_cbufs == 0) &&
+ (svga->curr.framebuffer.zsbuf == NULL)) {
+ rastId =
+ get_alt_rasterizer_state_id(svga, rast,
+ svga->curr.framebuffer.samples);
+
+ if (rastId == SVGA3D_INVALID_ID)
+ return PIPE_ERROR;
}
- if (svga->state.hw_draw.rasterizer_id != rast->id) {
+ if (svga->state.hw_draw.rasterizer_id != rastId) {
/* Set/bind the rasterizer state object */
- ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rast->id);
+ ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rastId);
if (ret != PIPE_OK)
return ret;
- svga->state.hw_draw.rasterizer_id = rast->id;
+ svga->state.hw_draw.rasterizer_id = rastId;
}
}
svga->state.hw_draw.rasterizer_discard = FALSE;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c b/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c
index bbfd889e9..b14a642ed 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c
@@ -136,6 +136,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
SVGA3dResourceType resourceDim;
SVGA3dShaderResourceViewDesc viewDesc;
enum pipe_format viewFormat = sv->base.format;
+ enum pipe_texture_target target = sv->base.target;
/* vgpu10 cannot create a BGRX view for a BGRA resource, so force it to
* create a BGRA view (and vice versa).
@@ -149,8 +150,9 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
viewFormat = PIPE_FORMAT_B8G8R8X8_UNORM;
}
- if (texture->target == PIPE_BUFFER) {
+ if (target == PIPE_BUFFER) {
unsigned pf_flags;
+ assert(texture->target == PIPE_BUFFER);
svga_translate_texture_buffer_view_format(viewFormat,
&format,
&pf_flags);
@@ -168,7 +170,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
assert(format != SVGA3D_FORMAT_INVALID);
- if (texture->target == PIPE_BUFFER) {
+ if (target == PIPE_BUFFER) {
unsigned elem_size = util_format_get_blocksize(sv->base.format);
viewDesc.buffer.firstElement = sv->base.u.buf.offset / elem_size;
@@ -188,11 +190,10 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
* hence we need to set arraySize to 1 explicitly.
*/
viewDesc.tex.arraySize =
- (texture->target == PIPE_TEXTURE_3D ||
- texture->target == PIPE_BUFFER) ? 1 :
+ (target == PIPE_TEXTURE_3D || target == PIPE_BUFFER) ? 1 :
(sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1);
- switch (texture->target) {
+ switch (target) {
case PIPE_BUFFER:
resourceDim = SVGA3D_RESOURCE_BUFFER;
break;
@@ -397,55 +398,100 @@ update_samplers(struct svga_context *svga, uint64_t dirty )
for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) {
const unsigned count = svga->curr.num_samplers[shader];
- SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS];
+ SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS*2];
unsigned i;
- unsigned nsamplers;
+ unsigned nsamplers = 0;
+ boolean sampler_state_mapping =
+ svga_use_sampler_state_mapping(svga, count);
for (i = 0; i < count; i++) {
bool fs_shadow = false;
+ const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
/* _NEW_FS */
if (shader == PIPE_SHADER_FRAGMENT) {
struct svga_fs_variant *fs =
svga_fs_variant(svga->state.hw_draw.fs);
- /* If the fragment shader is doing the shadow comparison
- * for this texture unit, don't enable shadow compare in
- * the texture sampler state.
- */
+
if (fs && (fs->fs_shadow_compare_units & (1 << i))) {
+
+ /* Use the alternate sampler state with the compare
+ * bit disabled when comparison is done in the shader and
+ * sampler state mapping is not enabled.
+ */
fs_shadow = true;
}
}
- if (svga->curr.sampler[shader][i]) {
- ids[i] = svga->curr.sampler[shader][i]->id[fs_shadow];
- assert(ids[i] != SVGA3D_INVALID_ID);
+ if (!sampler_state_mapping) {
+ if (sampler) {
+ SVGA3dSamplerId id = sampler->id[fs_shadow];
+ assert(id != SVGA3D_INVALID_ID);
+ ids[i] = id;
+ }
+ else {
+ ids[i] = SVGA3D_INVALID_ID;
+ }
+ nsamplers++;
}
else {
- ids[i] = SVGA3D_INVALID_ID;
+ if (sampler) {
+ SVGA3dSamplerId id = sampler->id[0];
+ assert(id != SVGA3D_INVALID_ID);
+
+ /* Check if the sampler id is already on the ids list */
+ unsigned k;
+ for (k = 0; k < nsamplers; k++) {
+ if (ids[k] == id)
+ break;
+ }
+
+ /* add the id to the list if it is not already on the list */
+ if (k == nsamplers) {
+ ids[nsamplers++] = id;
+
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /*
+ * add the alternate sampler state as well as the shader
+ * might use this alternate sampler state which has comparison
+ * disabled when the comparison is done in the shader.
+ */
+ ids[nsamplers++] = sampler->id[1];
+ }
+ }
+ }
}
}
- for (; i < svga->state.hw_draw.num_samplers[shader]; i++) {
+ for (i = nsamplers; i < svga->state.hw_draw.num_samplers[shader]; i++) {
ids[i] = SVGA3D_INVALID_ID;
}
- nsamplers = MAX2(svga->state.hw_draw.num_samplers[shader], count);
- if (nsamplers > 0) {
- if (count != svga->state.hw_draw.num_samplers[shader] ||
+ unsigned nsamplerIds =
+ MAX2(nsamplers, svga->state.hw_draw.num_samplers[shader]);
+
+ if (nsamplerIds > 0) {
+
+ if (nsamplers > SVGA3D_DX_MAX_SAMPLERS) {
+ debug_warn_once("Too many sampler states");
+ nsamplers = SVGA3D_DX_MAX_SAMPLERS;
+ }
+
+ if (nsamplers != svga->state.hw_draw.num_samplers[shader] ||
memcmp(ids, svga->state.hw_draw.samplers[shader],
- count * sizeof(ids[0])) != 0) {
+ nsamplerIds * sizeof(ids[0])) != 0) {
+
/* HW state is really changing */
ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
- nsamplers,
+ nsamplerIds,
0, /* start */
svga_shader_type(shader), /* type */
ids);
if (ret != PIPE_OK)
return ret;
memcpy(svga->state.hw_draw.samplers[shader], ids,
- nsamplers * sizeof(ids[0]));
- svga->state.hw_draw.num_samplers[shader] = count;
+ nsamplerIds * sizeof(ids[0]));
+ svga->state.hw_draw.num_samplers[shader] = nsamplers;
}
}
}
@@ -453,7 +499,7 @@ update_samplers(struct svga_context *svga, uint64_t dirty )
/* Handle polygon stipple sampler texture */
if (svga->curr.rast->templ.poly_stipple_enable) {
const unsigned unit =
- svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit;
+ svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_state_index;
struct svga_sampler_state *sampler = svga->polygon_stipple.sampler;
assert(sampler);
@@ -475,6 +521,7 @@ update_samplers(struct svga_context *svga, uint64_t dirty )
svga->state.hw_draw.samplers[PIPE_SHADER_FRAGMENT][unit] =
sampler->id[0];
}
+ svga->state.hw_draw.num_samplers[PIPE_SHADER_FRAGMENT]++;
}
return ret;
@@ -488,3 +535,180 @@ struct svga_tracked_state svga_hw_sampler = {
SVGA_NEW_STIPPLE),
update_samplers
};
+
+
+static enum pipe_error
+update_cs_sampler_resources(struct svga_context *svga, uint64_t dirty)
+{
+ enum pipe_error ret = PIPE_OK;
+ enum pipe_shader_type shader = PIPE_SHADER_COMPUTE;
+
+ assert(svga_have_sm5(svga));
+
+ SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS];
+ struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
+ unsigned count;
+ unsigned nviews;
+ unsigned i;
+
+ count = svga->curr.num_sampler_views[shader];
+ for (i = 0; i < count; i++) {
+ struct svga_pipe_sampler_view *sv =
+ svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+ if (sv) {
+ surfaces[i] = svga_resource_handle(sv->base.texture);
+
+ ret = svga_validate_pipe_sampler_view(svga, sv);
+ if (ret != PIPE_OK)
+ return ret;
+
+ assert(sv->id != SVGA3D_INVALID_ID);
+ ids[i] = sv->id;
+ sampler_views[i] = &sv->base;
+ }
+ else {
+ surfaces[i] = NULL;
+ ids[i] = SVGA3D_INVALID_ID;
+ sampler_views[i] = NULL;
+ }
+ }
+
+ for (; i < svga->state.hw_draw.num_sampler_views[shader]; i++) {
+ ids[i] = SVGA3D_INVALID_ID;
+ surfaces[i] = NULL;
+ sampler_views[i] = NULL;
+ }
+
+ /* Number of ShaderResources that need to be modified. This includes
+ * the one that need to be unbound.
+ */
+ nviews = MAX2(svga->state.hw_draw.num_sampler_views[shader], count);
+ if (nviews > 0) {
+ if (count != svga->state.hw_draw.num_sampler_views[shader] ||
+ memcmp(sampler_views, svga->state.hw_draw.sampler_views[shader],
+ count * sizeof(sampler_views[0])) != 0) {
+ SVGA3dShaderResourceViewId *pIds = ids;
+ struct svga_winsys_surface **pSurf = surfaces;
+ unsigned numSR = 0;
+
+ /* Loop through the sampler view list to only emit the sampler views
+ * that are not already in the corresponding entries in the device's
+ * shader resource list.
+ */
+ for (i = 0; i < nviews; i++) {
+ boolean emit;
+
+ emit = sampler_views[i] ==
+ svga->state.hw_draw.sampler_views[shader][i];
+
+ if (!emit && i == nviews - 1) {
+ /* Include the last sampler view in the next emit
+ * if it is different.
+ */
+ emit = TRUE;
+ numSR++;
+ i++;
+ }
+
+ if (emit) {
+ /* numSR can only be 0 if the first entry of the list
+ * is the same as the one in the device list.
+ * In this case, * there is nothing to send yet.
+ */
+ if (numSR) {
+ ret = SVGA3D_vgpu10_SetShaderResources(svga->swc,
+ svga_shader_type(shader),
+ i - numSR, /* startView */
+ numSR,
+ pIds,
+ pSurf);
+
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ pIds += (numSR + 1);
+ pSurf += (numSR + 1);
+ numSR = 0;
+ }
+ else
+ numSR++;
+ }
+
+ /* Save referenced sampler views in the hw draw state. */
+ svga->state.hw_draw.num_sampler_views[shader] = count;
+ for (i = 0; i < nviews; i++) {
+ pipe_sampler_view_reference(
+ &svga->state.hw_draw.sampler_views[shader][i],
+ sampler_views[i]);
+ }
+ }
+ }
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_cs_sampler_bindings = {
+ "cs shader resources emit",
+ SVGA_NEW_TEXTURE_BINDING,
+ update_cs_sampler_resources
+};
+
+static enum pipe_error
+update_cs_samplers(struct svga_context *svga, uint64_t dirty )
+{
+ enum pipe_error ret = PIPE_OK;
+ enum pipe_shader_type shader = PIPE_SHADER_COMPUTE;
+
+ assert(svga_have_sm5(svga));
+
+ const unsigned count = svga->curr.num_samplers[shader];
+ SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS];
+ unsigned i;
+ unsigned nsamplers;
+
+ for (i = 0; i < count; i++) {
+ if (svga->curr.sampler[shader][i]) {
+ ids[i] = svga->curr.sampler[shader][i]->id[0];
+ assert(ids[i] != SVGA3D_INVALID_ID);
+ }
+ else {
+ ids[i] = SVGA3D_INVALID_ID;
+ }
+ }
+
+ for (; i < svga->state.hw_draw.num_samplers[shader]; i++) {
+ ids[i] = SVGA3D_INVALID_ID;
+ }
+
+ nsamplers = MAX2(svga->state.hw_draw.num_samplers[shader], count);
+ if (nsamplers > 0) {
+ if (count != svga->state.hw_draw.num_samplers[shader] ||
+ memcmp(ids, svga->state.hw_draw.samplers[shader],
+ count * sizeof(ids[0])) != 0) {
+ /* HW state is really changing */
+ ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+ nsamplers,
+ 0, /* start */
+ svga_shader_type(shader), /* type */
+ ids);
+ if (ret != PIPE_OK)
+ return ret;
+
+ memcpy(svga->state.hw_draw.samplers[shader], ids,
+ nsamplers * sizeof(ids[0]));
+ svga->state.hw_draw.num_samplers[shader] = count;
+ }
+ }
+
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_cs_sampler = {
+ "texture cs sampler emit",
+ (SVGA_NEW_CS |
+ SVGA_NEW_SAMPLER),
+ update_cs_samplers
+};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c
index 492a929bd..de10da694 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c
@@ -214,6 +214,9 @@ make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
*/
key->last_vertex_stage = !(svga->curr.gs ||
svga->curr.tcs || svga->curr.tes);
+
+ if (svga_have_gl43(svga))
+ key->image_size_used = vs->base.info.opcode_count[TGSI_OPCODE_RESQ] ? 1 : 0;
}
@@ -434,6 +437,7 @@ struct svga_tracked_state svga_hw_vs =
SVGA_NEW_RAST |
SVGA_NEW_PRESCALE |
SVGA_NEW_VELEMENT |
- SVGA_NEW_NEED_SWTNL),
+ SVGA_NEW_NEED_SWTNL |
+ SVGA_NEW_VS_RAW_BUFFER),
emit_hw_vs
};
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_surface.c b/lib/mesa/src/gallium/drivers/svga/svga_surface.c
index 09a6afb68..c80f0a4f9 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_surface.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_surface.c
@@ -194,7 +194,7 @@ svga_texture_view_surface(struct svga_context *svga,
{
struct svga_screen *ss = svga_screen(svga->pipe.screen);
struct svga_winsys_surface *handle = NULL;
- boolean validated;
+ boolean invalidated;
boolean needCopyResource;
SVGA_DBG(DEBUG_PERF,
@@ -241,7 +241,7 @@ svga_texture_view_surface(struct svga_context *svga,
} else {
SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n");
handle = svga_screen_surface_create(ss, bind_flags, PIPE_USAGE_DEFAULT,
- &validated, key);
+ &invalidated, key);
needCopyResource = TRUE;
if (cacheable && !tex->backed_handle) {
@@ -551,7 +551,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
* associated resource. We will then use the cloned surface view for
* render target.
*/
- for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) {
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) {
if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
SVGA_DBG(DEBUG_VIEWS,
"same resource used in shaderResource and renderTarget 0x%x\n",
@@ -582,7 +582,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
SVGA3dRenderTargetViewDesc desc;
struct svga_texture *stex = svga_texture(s->base.texture);
- if (stex->validated == FALSE) {
+ if (stex->surface_state < SVGA_SURFACE_STATE_INVALIDATED) {
assert(stex->handle);
/* We are about to render into a surface that has not been validated.
@@ -591,7 +591,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
* content when the associated mob is first bound to the surface.
*/
SVGA_RETRY(svga, SVGA3D_InvalidateGBSurface(svga->swc, stex->handle));
- stex->validated = TRUE;
+ stex->surface_state = SVGA_SURFACE_STATE_INVALIDATED;
}
desc.tex.mipSlice = s->real_level;
@@ -599,38 +599,48 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
desc.tex.arraySize =
s->base.u.tex.last_layer - s->base.u.tex.first_layer + 1;
- s->view_id = util_bitmask_add(svga->surface_view_id_bm);
-
resType = svga_resource_type(s->base.texture->target);
if (util_format_is_depth_or_stencil(s->base.format)) {
- ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
- s->view_id,
- s->handle,
- s->key.format,
- resType,
- &desc);
- }
- else {
- SVGA3dSurfaceFormat view_format = s->key.format;
- const struct svga_texture *stex = svga_texture(s->base.texture);
- /* Can't create RGBA render target view of a RGBX surface so adjust
- * the view format. We do something similar for texture samplers in
- * svga_validate_pipe_sampler_view().
+ /* Create depth stencil view only if the resource is created
+ * with depth stencil bind flag.
*/
- if (view_format == SVGA3D_B8G8R8A8_UNORM &&
- (stex->key.format == SVGA3D_B8G8R8X8_UNORM ||
- stex->key.format == SVGA3D_B8G8R8X8_TYPELESS)) {
- view_format = SVGA3D_B8G8R8X8_UNORM;
+ if (stex->key.flags & SVGA3D_SURFACE_BIND_DEPTH_STENCIL) {
+ s->view_id = util_bitmask_add(svga->surface_view_id_bm);
+ ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
+ s->view_id,
+ s->handle,
+ s->key.format,
+ resType,
+ &desc);
}
+ }
+ else {
+ /* Create render target view only if the resource is created
+ * with render target bind flag.
+ */
+ if (stex->key.flags & SVGA3D_SURFACE_BIND_RENDER_TARGET) {
+ SVGA3dSurfaceFormat view_format = s->key.format;
+
+ /* Can't create RGBA render target view of a RGBX surface so adjust
+ * the view format. We do something similar for texture samplers in
+ * svga_validate_pipe_sampler_view().
+ */
+ if (view_format == SVGA3D_B8G8R8A8_UNORM &&
+ (stex->key.format == SVGA3D_B8G8R8X8_UNORM ||
+ stex->key.format == SVGA3D_B8G8R8X8_TYPELESS)) {
+ view_format = SVGA3D_B8G8R8X8_UNORM;
+ }
- ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
- s->view_id,
- s->handle,
- view_format,
- resType,
- &desc);
+ s->view_id = util_bitmask_add(svga->surface_view_id_bm);
+ ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
+ s->view_id,
+ s->handle,
+ view_format,
+ resType,
+ &desc);
+ }
}
if (ret != PIPE_OK) {
@@ -669,7 +679,9 @@ svga_surface_destroy(struct pipe_context *pipe,
*/
if (s->handle != t->handle && s->handle != t->backed_handle) {
SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
- svga_screen_surface_destroy(ss, &s->key, &s->handle);
+ svga_screen_surface_destroy(ss, &s->key,
+ svga_was_texture_rendered_to(t),
+ &s->handle);
}
if (s->view_id != SVGA3D_INVALID_ID) {
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c
index e2d0865d9..d2115a263 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -1344,11 +1344,12 @@ emit_ssg(struct svga_shader_emitter *emit,
/**
- * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative).
+ * Translate/emit the conditional discard instruction (discard if
+ * any of X,Y,Z,W are negative).
*/
static boolean
-emit_kill_if(struct svga_shader_emitter *emit,
- const struct tgsi_full_instruction *insn)
+emit_cond_discard(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
{
const struct tgsi_full_src_register *reg = &insn->Src[0];
struct src_register src0, srcIn;
@@ -1375,7 +1376,7 @@ emit_kill_if(struct svga_shader_emitter *emit,
src0 = src( temp );
}
- /* Do the texkill by checking if any of the XYZW components are < 0.
+ /* Do the discard by checking if any of the XYZW components are < 0.
* Note that ps_2_0 and later take XYZW in consideration, while ps_1_x
* only used XYZ. The MSDN documentation about this is incorrect.
*/
@@ -1387,12 +1388,12 @@ emit_kill_if(struct svga_shader_emitter *emit,
/**
- * Translate/emit unconditional kill instruction (usually found inside
+ * Translate/emit the unconditional discard instruction (usually found inside
* an IF/ELSE/ENDIF block).
*/
static boolean
-emit_kill(struct svga_shader_emitter *emit,
- const struct tgsi_full_instruction *insn)
+emit_discard(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
{
SVGA3dShaderDestToken temp;
struct src_register one = get_one_immediate(emit);
@@ -2044,6 +2045,73 @@ emit_mov(struct svga_shader_emitter *emit,
/**
+ * Translate TGSI SQRT instruction
+ * if src1 == 0
+ * mov dst, src1
+ * else
+ * rsq temp, src1
+ * rcp dst, temp
+ * endif
+ */
+static boolean
+emit_sqrt(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ const struct src_register src1 = translate_src_register(emit, &insn->Src[0]);
+ const struct src_register zero = get_zero_immediate(emit);
+ SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
+ SVGA3dShaderDestToken temp = get_temp(emit);
+ SVGA3dShaderInstToken if_token = inst_token(SVGA3DOP_IFC);
+ boolean ret = TRUE;
+
+ if_token.control = SVGA3DOPCOMP_EQ;
+
+ if (!(emit_instruction(emit, if_token) &&
+ emit_src(emit, src1) &&
+ emit_src(emit, zero))) {
+ ret = FALSE;
+ goto cleanup;
+ }
+
+ if (!submit_op1(emit,
+ inst_token(SVGA3DOP_MOV),
+ dst, src1)) {
+ ret = FALSE;
+ goto cleanup;
+ }
+
+ if (!emit_instruction(emit, inst_token(SVGA3DOP_ELSE))) {
+ ret = FALSE;
+ goto cleanup;
+ }
+
+ if (!submit_op1(emit,
+ inst_token(SVGA3DOP_RSQ),
+ temp, src1)) {
+ ret = FALSE;
+ goto cleanup;
+ }
+
+ if (!submit_op1(emit,
+ inst_token(SVGA3DOP_RCP),
+ dst, src(temp))) {
+ ret = FALSE;
+ goto cleanup;
+ }
+
+ if (!emit_instruction(emit, inst_token(SVGA3DOP_ENDIF))) {
+ ret = FALSE;
+ goto cleanup;
+ }
+
+cleanup:
+ release_temp(emit, temp);
+
+ return ret;
+}
+
+
+/**
* Translate/emit TGSI DDX, DDY instructions.
*/
static boolean
@@ -2820,7 +2888,7 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
return emit_end( emit );
case TGSI_OPCODE_KILL_IF:
- return emit_kill_if( emit, insn );
+ return emit_cond_discard( emit, insn );
/* Selection opcodes. The underlying language is fairly
* non-orthogonal about these.
@@ -2902,7 +2970,7 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
return emit_brk( emit, insn );
case TGSI_OPCODE_KILL:
- return emit_kill( emit, insn );
+ return emit_discard( emit, insn );
case TGSI_OPCODE_DST:
return emit_dst_insn( emit, insn );
@@ -2919,6 +2987,9 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
case TGSI_OPCODE_MOV:
return emit_mov( emit, insn );
+ case TGSI_OPCODE_SQRT:
+ return emit_sqrt( emit, insn );
+
default:
{
SVGA3dShaderOpCodeType opcode =
@@ -3517,7 +3588,8 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
- emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
+ emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SQRT] >= 1)
return TRUE;
return FALSE;
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index a9435a098..362bbf637 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -1,5 +1,5 @@
/**********************************************************
- * Copyright 1998-2013 VMware, Inc. All rights reserved.
+ * Copyright 1998-2022 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -66,7 +66,6 @@
(VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
#define MAX_TEMP_ARRAYS 64 /* Enough? */
-
/**
* Clipping is complicated. There's four different cases which we
* handle during VS/GS shader translation:
@@ -181,6 +180,18 @@ map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
return tgsi_semantic_to_sgn_name[name];
}
+enum reemit_mode {
+ REEMIT_FALSE = 0,
+ REEMIT_TRUE = 1,
+ REEMIT_IN_PROGRESS = 2
+};
+
+struct svga_raw_buf_tmp {
+ bool indirect;
+ unsigned buffer_index:8;
+ unsigned element_index:8;
+ unsigned element_rel:8;
+};
struct svga_shader_emitter_v10
{
@@ -193,18 +204,21 @@ struct svga_shader_emitter_v10
struct svga_compile_key key;
struct tgsi_shader_info info;
unsigned unit;
- unsigned version; /**< Either 40 or 41 at this time */
+ unsigned version; /**< Either 40, 41, 50 or 51 at this time */
unsigned cur_tgsi_token; /**< current tgsi token position */
unsigned inst_start_token;
boolean discard_instruction; /**< throw away current instruction? */
boolean reemit_instruction; /**< reemit current instruction */
+ boolean reemit_tgsi_instruction; /**< reemit current tgsi instruction */
boolean skip_instruction; /**< skip current instruction */
+ boolean use_sampler_state_mapping; /* use sampler state mapping */
+ enum reemit_mode reemit_rawbuf_instruction;
union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
double (*immediates_dbl)[2];
unsigned num_immediates; /**< Number of immediates emitted */
- unsigned common_immediate_pos[10]; /**< literals for common immediates */
+ unsigned common_immediate_pos[20]; /**< literals for common immediates */
unsigned num_common_immediates;
boolean immediates_emitted;
@@ -235,12 +249,36 @@ struct svga_shader_emitter_v10
*/
unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
+ /* Raw constant buffers */
+ unsigned raw_buf_srv_start_index; /* starting srv index for raw buffers */
+ unsigned raw_bufs; /* raw buffers bitmask */
+ unsigned raw_buf_tmp_index; /* starting temp index for raw buffers */
+ unsigned raw_buf_cur_tmp_index; /* current temp index for raw buffers */
+ struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */
+
/* Samplers */
unsigned num_samplers;
boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/
ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
+ /* Images */
+ unsigned num_images;
+ unsigned image_mask;
+ struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES];
+ unsigned image_size_index; /* starting index to cbuf for image size */
+
+ /* Shader buffers */
+ unsigned num_shader_bufs;
+
+ /* HW atomic buffers */
+ unsigned num_atomic_bufs;
+ unsigned atomic_bufs_mask;
+ unsigned max_atomic_counter_index;
+ VGPU10_OPCODE_TYPE cur_atomic_opcode; /* current atomic opcode */
+
+ boolean uav_declared; /* True if uav is declared */
+
/* Index Range declaration */
struct {
unsigned start_index;
@@ -274,6 +312,11 @@ struct svga_shader_emitter_v10
unsigned have_prescale:1;
} vposition;
+ /* Shader limits */
+ unsigned max_vs_inputs;
+ unsigned max_vs_outputs;
+ unsigned max_gs_inputs;
+
/* For vertex shaders only */
struct {
/* viewport constant */
@@ -299,6 +342,7 @@ struct svga_shader_emitter_v10
unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */
unsigned pstipple_sampler_unit;
+ unsigned pstipple_sampler_state_index;
unsigned fragcoord_input_index; /**< real fragment position input reg */
unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
@@ -311,12 +355,11 @@ struct svga_shader_emitter_v10
/** TGSI index of sample mask input sys value */
unsigned sample_mask_in_sys_index;
- /** Which texture units are doing shadow comparison in the FS code */
- unsigned shadow_compare_units;
-
/* layer */
unsigned layer_input_index; /**< TGSI index of layer */
unsigned layer_imm_index; /**< immediate for default layer 0 */
+
+ boolean forceEarlyDepthStencil; /**< true if Early Depth stencil test is enabled */
} fs;
/* For geometry shaders only */
@@ -383,6 +426,19 @@ struct svga_shader_emitter_v10
} outer;
} tes;
+ struct {
+ unsigned block_width; /* thread group size in x dimension */
+ unsigned block_height; /* thread group size in y dimension */
+ unsigned block_depth; /* thread group size in z dimension */
+ unsigned thread_id_index; /* thread id tgsi index */
+ unsigned block_id_index; /* block id tgsi index */
+ bool shared_memory_declared; /* set if shared memory is declared */
+ struct {
+ unsigned tgsi_index; /* grid size tgsi index */
+ unsigned imm_index; /* grid size imm index */
+ } grid_size;
+ } cs;
+
/* For vertex or geometry shaders */
enum clipping_mode clip_mode;
unsigned clip_dist_out_index; /**< clip distance output register index */
@@ -410,6 +466,9 @@ struct svga_shader_emitter_v10
/* For all shaders: const reg index for texture buffer size */
unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
+ /** Which texture units are doing shadow comparison in the shader code */
+ unsigned shadow_compare_units;
+
/* VS/TCS/TES/GS/FS Linkage info */
struct shader_linkage linkage;
struct tgsi_shader_info *prevShaderInfo;
@@ -419,8 +478,8 @@ struct svga_shader_emitter_v10
bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
- /* For pipe_debug_message */
- struct pipe_debug_callback svga_debug_callback;
+ /* For util_debug_message */
+ struct util_debug_callback svga_debug_callback;
/* current loop depth in shader */
unsigned current_loop_depth;
@@ -435,6 +494,9 @@ static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
+static void emit_image_declarations(struct svga_shader_emitter_v10 *emit);
+static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit);
+static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit);
static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
static boolean
@@ -459,6 +521,11 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
boolean addSignature,
SVGA3dDXSignatureSemanticName sgnName);
+static boolean
+emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
+ unsigned inst_number,
+ const struct tgsi_full_instruction *inst);
+
static void
create_temp_array(struct svga_shader_emitter_v10 *emit,
unsigned arrayID, unsigned first, unsigned count,
@@ -602,9 +669,9 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
if ((emit->unit == PIPE_SHADER_VERTEX &&
- index >= VGPU10_MAX_VS_INPUTS) ||
+ index >= emit->max_vs_inputs) ||
(emit->unit == PIPE_SHADER_GEOMETRY &&
- index >= VGPU10_MAX_GS_INPUTS) ||
+ index >= emit->max_gs_inputs) ||
(emit->unit == PIPE_SHADER_FRAGMENT &&
index >= VGPU10_MAX_FS_INPUTS) ||
(emit->unit == PIPE_SHADER_TESS_CTRL &&
@@ -625,7 +692,7 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
* Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
*/
if ((emit->unit == PIPE_SHADER_VERTEX &&
- index >= VGPU10_MAX_VS_OUTPUTS) ||
+ index >= emit->max_vs_outputs) ||
(emit->unit == PIPE_SHADER_GEOMETRY &&
index >= VGPU10_MAX_GS_OUTPUTS) ||
(emit->unit == PIPE_SHADER_FRAGMENT &&
@@ -655,6 +722,13 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
}
break;
case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
+ case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
+ case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
+ case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT:
+ case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT:
+ case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
+ case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
+ case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
/* nothing */
break;
default:
@@ -960,6 +1034,10 @@ translate_opcode(enum tgsi_opcode opcode)
return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
case TGSI_OPCODE_BARRIER:
return VGPU10_OPCODE_SYNC;
+ case TGSI_OPCODE_DFMA:
+ return VGPU10_OPCODE_DFMA;
+ case TGSI_OPCODE_FMA:
+ return VGPU10_OPCODE_MAD;
/* DX11.1 Opcodes */
case TGSI_OPCODE_DDIV:
@@ -1474,7 +1552,7 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
{
enum tgsi_file_type file = reg->Register.File;
unsigned index = reg->Register.Index;
- const boolean indirect = reg->Register.Indirect;
+ boolean indirect = reg->Register.Indirect;
unsigned tempArrayId = get_temp_array_id(emit, file, index);
boolean index2d = (reg->Register.Dimension ||
tempArrayId > 0 ||
@@ -1528,11 +1606,8 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
/* Emitted as vCoverage0.x */
/* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
* elements where s is the maximum number of color samples supported
- * by the implementation. With current implementation, we should not
- * have more than one element. So assert if Index != 0
+ * by the implementation.
*/
- assert((!reg->Register.Indirect && reg->Register.Index == 0) ||
- reg->Register.Indirect);
operand0.value = 0;
operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
@@ -1749,12 +1824,93 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
}
}
}
+ else if (emit->unit == PIPE_SHADER_COMPUTE) {
+ if (file == TGSI_FILE_SYSTEM_VALUE) {
+ if (index == emit->cs.thread_id_index) {
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP;
+ index = 0;
+ } else if (index == emit->cs.block_id_index) {
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+ operand0.swizzleX = swizzleX;
+ operand0.swizzleY = swizzleY;
+ operand0.swizzleZ = swizzleZ;
+ operand0.swizzleW = swizzleW;
+ emit_dword(emit, operand0.value);
+ return;
+ } else if (index == emit->cs.grid_size.tgsi_index) {
+ file = TGSI_FILE_IMMEDIATE;
+ index = emit->cs.grid_size.imm_index;
+ }
+ }
+ }
if (file == TGSI_FILE_ADDRESS) {
index = emit->address_reg_index[index];
file = TGSI_FILE_TEMPORARY;
}
+ if (file == TGSI_FILE_CONSTANT) {
+ /**
+ * If this constant buffer is to be bound as srv raw buffer,
+ * then we have to load the constant to a temp first before
+ * it can be used as a source in the instruction.
+ * This is accomplished in two passes. The first pass is to
+ * identify if there is any constbuf to rawbuf translation.
+ * If there isn't, emit the instruction as usual.
+ * If there is, then we save the constant buffer reference info,
+ * and then instead of emitting the instruction at the end
+ * of the instruction, it will trigger a second pass of parsing
+ * this instruction. Before it starts the parsing, it will
+ * load the referenced raw buffer elements to temporaries.
+ * Then it will emit the instruction that replaces the
+ * constant buffer replaces with the corresponding temporaries.
+ */
+ if (emit->raw_bufs & (1 << index2)) {
+ if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) {
+ unsigned tmpIdx = emit->raw_buf_cur_tmp_index;
+
+ emit->raw_buf_tmp[tmpIdx].buffer_index = index2;
+
+ /* Save whether the element index is indirect indexing */
+ emit->raw_buf_tmp[tmpIdx].indirect = indirect;
+
+ /* If it is indirect index, save the temporary
+ * address index, otherwise, save the immediate index.
+ */
+ if (indirect) {
+ emit->raw_buf_tmp[tmpIdx].element_index =
+ emit->address_reg_index[reg->Indirect.Index];
+ emit->raw_buf_tmp[tmpIdx].element_rel =
+ reg->Register.Index;
+ }
+ else {
+ emit->raw_buf_tmp[tmpIdx].element_index = index;
+ emit->raw_buf_tmp[tmpIdx].element_rel = 0;
+ }
+
+ emit->raw_buf_cur_tmp_index++;
+ emit->reemit_rawbuf_instruction = REEMIT_TRUE;
+ emit->discard_instruction = TRUE;
+ emit->reemit_tgsi_instruction = TRUE;
+ }
+ else {
+ /* In the reemitting process, replace the constant buffer
+ * reference with temporary.
+ */
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index;
+ index2d = FALSE;
+ indirect = FALSE;
+ emit->raw_buf_cur_tmp_index++;
+ }
+ }
+ }
+
if (file == TGSI_FILE_TEMPORARY) {
if (need_temp_reg_initialization(emit, index)) {
emit->initialize_temp_index = index;
@@ -1801,6 +1957,8 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
}
}
+ check_register_index(emit, operand0.operandType, index);
+
/* Emit the operand tokens */
emit_dword(emit, operand0.value);
if (operand0.extended)
@@ -1834,6 +1992,7 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
emit_dword(emit, remap_temp_index(emit, file, index));
if (indirect) {
+ assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP);
emit_indirect_register(emit, reg->Indirect.Index);
}
}
@@ -1873,9 +2032,15 @@ emit_resource_register(struct svga_shader_emitter_v10 *emit,
*/
static void
emit_sampler_register(struct svga_shader_emitter_v10 *emit,
- unsigned sampler_number)
+ unsigned unit)
{
VGPU10OperandToken0 operand0;
+ unsigned sampler_number;
+
+ sampler_number = emit->key.tex[unit].sampler_index;
+
+ if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping)
+ sampler_number++;
check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
@@ -2861,6 +3026,22 @@ emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
emit->tes.point_mode = prop->u[0].Data;
break;
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
+ emit->cs.block_width = prop->u[0].Data;
+ break;
+
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
+ emit->cs.block_height = prop->u[0].Data;
+ break;
+
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
+ emit->cs.block_depth = prop->u[0].Data;
+ break;
+
+ case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
+ emit->fs.forceEarlyDepthStencil = TRUE;
+ break;
+
default:
debug_printf("Unexpected TGSI property %s\n",
tgsi_property_names[prop->Property.PropertyName]);
@@ -2986,7 +3167,10 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
}
emit->common_immediate_pos[n++] =
- alloc_immediate_int4(emit, 0, 1, 0, -1);
+ alloc_immediate_int4(emit, 0, 1, 2, -1);
+
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 3, 4, 5, 6);
if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
@@ -3059,6 +3243,25 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
}
}
+ /** TODO: allocate immediates for all possible element byte offset?
+ */
+ if (emit->raw_bufs) {
+ unsigned i;
+ for (i = 7; i < 12; i+=4) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
+ }
+ }
+
+ if (emit->info.indirect_files &
+ (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) {
+ unsigned i;
+ for (i = 7; i < 8; i+=4) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
+ }
+ }
+
assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
emit->num_common_immediates = n;
}
@@ -3139,6 +3342,8 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
emit_dword(emit, opcode0.value);
end_emit_instruction(emit);
+ alloc_common_immediates(emit);
+
/* Declare constant registers */
emit_constant_declaration(emit);
@@ -3146,7 +3351,14 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
emit_sampler_declarations(emit);
emit_resource_declarations(emit);
- alloc_common_immediates(emit);
+ /* Declare images */
+ emit_image_declarations(emit);
+
+ /* Declare shader buffers */
+ emit_shader_buf_declarations(emit);
+
+ /* Declare atomic buffers */
+ emit_atomic_buf_declarations(emit);
int nVertices = emit->key.tcs.vertices_per_patch;
emit->tcs.imm_index =
@@ -3387,13 +3599,17 @@ emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
/* Usually this applies to TCS only. If shader is reading output of
* patch constant in fork phase, we should reemit all instructions
- * which are writting into ouput of patch constant in fork phase
+ * which are writting into output of patch constant in fork phase
* to store results into temporaries.
*/
+ assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction));
if (emit->reemit_instruction) {
assert(emit->unit == PIPE_SHADER_TESS_CTRL);
ret = emit_vgpu10_instruction(emit, inst_number,
&parse->FullToken.FullInstruction);
+ } else if (emit->reemit_rawbuf_instruction) {
+ ret = emit_rawbuf_instruction(emit, inst_number,
+ &parse->FullToken.FullInstruction);
}
if (!ret)
@@ -3405,6 +3621,25 @@ emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
/**
+ * Emit the thread group declaration for compute shader.
+ */
+static void
+emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 opcode0;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP;
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, emit->cs.block_width);
+ emit_dword(emit, emit->cs.block_height);
+ emit_dword(emit, emit->cs.block_depth);
+ end_emit_instruction(emit);
+}
+
+
+/**
* Emit index range declaration.
*/
static boolean
@@ -4557,6 +4792,41 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
map_tgsi_semantic_to_sgn_name(semantic_name));
}
break;
+ case TGSI_SEMANTIC_THREAD_ID:
+ assert(emit->unit >= PIPE_SHADER_COMPUTE);
+ assert(emit->version >= 50);
+ emit->cs.thread_id_index = index;
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP,
+ VGPU10_OPERAND_INDEX_0D,
+ index, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+ break;
+ case TGSI_SEMANTIC_BLOCK_ID:
+ assert(emit->unit >= PIPE_SHADER_COMPUTE);
+ assert(emit->version >= 50);
+ emit->cs.block_id_index = index;
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID,
+ VGPU10_OPERAND_INDEX_0D,
+ index, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+ break;
+ case TGSI_SEMANTIC_GRID_SIZE:
+ assert(emit->unit == PIPE_SHADER_COMPUTE);
+ assert(emit->version >= 50);
+ emit->cs.grid_size.tgsi_index = index;
+ break;
default:
debug_printf("unexpected system value semantic index %u / %s\n",
semantic_name, tgsi_semantic_names[semantic_name]);
@@ -4623,6 +4893,7 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
" but [%u] is the limit.\n",
num_consts,
VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+ emit->register_overflow = TRUE;
}
/* The linker doesn't enforce the max UBO size so we clamp here */
emit->num_shader_consts[constbuf] =
@@ -4669,6 +4940,43 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
}
return TRUE;
+ case TGSI_FILE_IMAGE:
+ {
+ unsigned unit = decl->Range.First;
+ assert(decl->Range.First == decl->Range.Last);
+ assert(unit < PIPE_MAX_SHADER_IMAGES);
+ emit->image[unit] = decl->Image;
+ emit->image_mask |= 1 << unit;
+ emit->num_images++;
+ }
+ return TRUE;
+
+ case TGSI_FILE_HW_ATOMIC:
+ /* Declare the atomic buffer if it is not already declared. */
+ if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) {
+ emit->num_atomic_bufs++;
+ emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D);
+ }
+
+ /* Remember the maximum atomic counter index encountered */
+ emit->max_atomic_counter_index =
+ MAX2(emit->max_atomic_counter_index, decl->Range.Last);
+ return TRUE;
+
+ case TGSI_FILE_MEMORY:
+ /* Record memory has been used. */
+ if (emit->unit == PIPE_SHADER_COMPUTE &&
+ decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) {
+ emit->cs.shared_memory_declared = TRUE;
+ }
+
+ return TRUE;
+
+ case TGSI_FILE_BUFFER:
+ assert(emit->version >= 50);
+ emit->num_shader_bufs++;
+ return TRUE;
+
default:
assert(!"Unexpected type of declaration");
return FALSE;
@@ -4676,7 +4984,6 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
}
-
/**
* Emit input declarations for fragment shader.
*/
@@ -4928,27 +5235,6 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
}
if (emit->tcs.control_point_phase) {
- if (emit->tcs.control_point_input_index == INVALID_INDEX) {
-
- /* Add input control point declaration if it does not exist */
- if ((indicesMask & (1 << emit->linkage.position_index)) == 0) {
- emit->linkage.input_map[emit->linkage.num_inputs] =
- emit->linkage.position_index;
- emit->tcs.control_point_input_index = emit->linkage.num_inputs++;
-
- emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
- VGPU10_OPERAND_TYPE_INPUT,
- VGPU10_OPERAND_INDEX_2D,
- emit->linkage.position_index,
- emit->key.tcs.vertices_per_patch,
- VGPU10_NAME_UNDEFINED,
- VGPU10_OPERAND_4_COMPONENT,
- VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
- VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
- VGPU10_INTERPOLATION_UNDEFINED, TRUE,
- SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
- }
- }
/* Also add an address register for the indirection to the
* input control points
@@ -5447,6 +5733,17 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
emit->tcs.invocation_id_tmp_index = total_temps++;
}
+ if (emit->raw_bufs) {
+ /**
+ * Add 3 more temporaries if we need to translate constant buffer
+ * to srv raw buffer. Since we need to load the value to a temporary
+ * before it can be used as a source. There could be three source
+ * register in an instruction.
+ */
+ emit->raw_buf_tmp_index = total_temps;
+ total_temps+=3;
+ }
+
for (i = 0; i < emit->num_address_regs; i++) {
emit->address_reg_index[i] = total_temps++;
}
@@ -5618,8 +5915,20 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
}
}
}
+ if (emit->key.image_size_used) {
+ emit->image_size_index = total_consts;
+ total_consts += emit->num_images;
+ }
if (total_consts > 0) {
+ if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+ debug_printf("Warning: Too many constants [%u] declared in constant"
+ " buffer 0. %u is the limit.\n",
+ total_consts,
+ VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+ total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT;
+ emit->register_overflow = TRUE;
+ }
begin_emit_instruction(emit);
emit_dword(emit, opcode0.value);
emit_dword(emit, operand0.value);
@@ -5629,14 +5938,41 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
}
/* Declare remaining constant buffers (UBOs) */
+
for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
if (emit->num_shader_consts[i] > 0) {
- begin_emit_instruction(emit);
- emit_dword(emit, opcode0.value);
- emit_dword(emit, operand0.value);
- emit_dword(emit, i); /* which const buffer slot */
- emit_dword(emit, emit->num_shader_consts[i]);
- end_emit_instruction(emit);
+ if (emit->raw_bufs & (1 << i)) {
+ /* UBO declared as srv raw buffer */
+
+ VGPU10OpcodeToken0 opcode1;
+ VGPU10OperandToken0 operand1;
+
+ opcode1.value = 0;
+ opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW;
+ opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN;
+
+ operand1.value = 0;
+ operand1.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+ operand1.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode1.value);
+ emit_dword(emit, operand1.value);
+ emit_dword(emit, i + emit->raw_buf_srv_start_index);
+ end_emit_instruction(emit);
+ }
+ else {
+
+ /* UBO declared as const buffer */
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, i); /* which const buffer slot */
+ emit_dword(emit, emit->num_shader_consts[i]);
+ end_emit_instruction(emit);
+ }
}
}
@@ -5652,7 +5988,8 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
{
unsigned i;
- for (i = 0; i < emit->num_samplers; i++) {
+ for (i = 0; i < emit->key.num_samplers; i++) {
+
VGPU10OpcodeToken0 opcode0;
VGPU10OperandToken0 operand0;
@@ -5683,7 +6020,8 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
static unsigned
pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
unsigned num_samples,
- boolean is_array)
+ boolean is_array,
+ boolean is_uav)
{
switch (target) {
case PIPE_BUFFER:
@@ -5710,8 +6048,9 @@ pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
else
return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
case PIPE_TEXTURE_CUBE_ARRAY:
- return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
- VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
+ return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
+ (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
+ VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
default:
assert(!"Unexpected resource type");
return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
@@ -5725,7 +6064,8 @@ pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
unsigned num_samples,
- boolean is_array)
+ boolean is_array,
+ boolean is_uav)
{
if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
target = TGSI_TEXTURE_2D;
@@ -5746,7 +6086,8 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_SHADOWCUBE:
- return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
+ return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
+ VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
case TGSI_TEXTURE_SHADOW1D:
return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
case TGSI_TEXTURE_SHADOW2D:
@@ -5766,6 +6107,9 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
: VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
case TGSI_TEXTURE_CUBE_ARRAY:
+ return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
+ (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
+ VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
: VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
@@ -5811,6 +6155,9 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
/* Emit resource decl for each sampler */
for (i = 0; i < emit->num_samplers; i++) {
+ if (!(emit->info.samplers_declared & (1 << i)))
+ continue;
+
VGPU10OpcodeToken0 opcode0;
VGPU10OperandToken0 operand0;
VGPU10ResourceReturnTypeToken return_type;
@@ -5822,13 +6169,15 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
opcode0.resourceDimension =
tgsi_texture_to_resource_dimension(emit->sampler_target[i],
emit->key.tex[i].num_samples,
- emit->key.tex[i].is_array);
+ emit->key.tex[i].is_array,
+ FALSE);
}
else {
opcode0.resourceDimension =
pipe_texture_to_resource_dimension(emit->key.tex[i].target,
emit->key.tex[i].num_samples,
- emit->key.tex[i].is_array);
+ emit->key.tex[i].is_array,
+ FALSE);
}
opcode0.sampleCount = emit->key.tex[i].num_samples;
operand0.value = 0;
@@ -5882,6 +6231,240 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
return TRUE;
}
+
+/**
+ * Emit instruction to declare uav for the shader image
+ */
+static void
+emit_image_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i = 0;
+ unsigned unit = 0;
+ unsigned uav_mask = 0;
+
+ /* Emit uav decl for each image */
+ for (i = 0; i < emit->num_images; i++, unit++) {
+
+ /* Find the unit index of the next declared image.
+ */
+ while (!(emit->image_mask & (1 << unit))) {
+ unit++;
+ }
+
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+ VGPU10ResourceReturnTypeToken return_type;
+
+ /* If the corresponding uav for the image is already declared,
+ * skip this image declaration.
+ */
+ if (uav_mask & (1 << emit->key.images[unit].uav_index))
+ continue;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED;
+ opcode0.uavResourceDimension =
+ tgsi_texture_to_resource_dimension(emit->image[unit].Resource,
+ 0, emit->key.images[unit].is_array,
+ TRUE);
+
+ if (emit->key.images[unit].is_single_layer &&
+ emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) {
+ opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
+ }
+
+ /* Declare the uav as global coherent if the shader includes memory
+ * barrier instructions.
+ */
+ opcode0.globallyCoherent =
+ (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
+
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ return_type.value = 0;
+ return_type.component0 =
+ return_type.component1 =
+ return_type.component2 =
+ return_type.component3 = emit->key.images[unit].return_type + 1;
+
+ assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID);
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, emit->key.images[unit].uav_index);
+ emit_dword(emit, return_type.value);
+ end_emit_instruction(emit);
+
+ /* Mark the uav is already declared */
+ uav_mask |= 1 << emit->key.images[unit].uav_index;
+ }
+
+ emit->uav_declared |= uav_mask;
+}
+
+
+/**
+ * Emit instruction to declare uav for the shader buffer
+ */
+static void
+emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+ unsigned uav_mask = 0;
+
+ /* Emit uav decl for each shader buffer */
+ for (i = 0; i < emit->num_shader_bufs; i++) {
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+
+ /* If the corresponding uav for the shader buf is already declared,
+ * skip this shader buffer declaration.
+ */
+ if (uav_mask & (1 << emit->key.shader_buf_uav_index[i]))
+ continue;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
+
+ /* Declare the uav as global coherent if the shader includes memory
+ * barrier instructions.
+ */
+ opcode0.globallyCoherent =
+ (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
+
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID);
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, emit->key.shader_buf_uav_index[i]);
+ end_emit_instruction(emit);
+
+ /* Mark the uav is already declared */
+ uav_mask |= 1 << emit->key.shader_buf_uav_index[i];
+ }
+
+ emit->uav_declared |= uav_mask;
+}
+
+
+/**
+ * Emit instruction to declare thread group shared memory(tgsm) for shared memory
+ */
+static void
+emit_memory_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ if (emit->cs.shared_memory_declared) {
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW;
+
+ /* Declare the uav as global coherent if the shader includes memory
+ * barrier instructions.
+ */
+ opcode0.globallyCoherent =
+ (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
+
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+
+ /* In current state tracker, TGSI shader declares only one shared memory
+ * TODO: To fix TGSI shader in state tracker to get all shared memory
+ * declarations and then fix following indexing. For now, default index
+ * is 1 as per translated TGSI shader
+ */
+ emit_dword(emit, 1);
+ emit_dword(emit, emit->key.cs.mem_size); /* byte Count */
+ end_emit_instruction(emit);
+ }
+}
+
+
+/**
+ * Emit instruction to declare uav for atomic buffers
+ */
+static void
+emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned atomic_bufs_mask = emit->atomic_bufs_mask;
+ unsigned uav_mask = 0;
+
+ /* Emit uav decl for each atomic buffer */
+ while (atomic_bufs_mask) {
+ unsigned buf_index = u_bit_scan(&atomic_bufs_mask);
+ unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index];
+
+ /* If the corresponding uav for the shader buf is already declared,
+ * skip this shader buffer declaration.
+ */
+ if (uav_mask & (1 << uav_index))
+ continue;
+
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+
+ assert(uav_index != SVGA3D_INVALID_ID);
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
+ opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER;
+
+ /* Declare the uav as global coherent if the shader includes memory
+ * barrier instructions.
+ */
+ opcode0.globallyCoherent =
+ (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
+ opcode0.uavHasCounter = 1;
+
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, uav_index);
+ end_emit_instruction(emit);
+
+ /* Mark the uav is already declared */
+ uav_mask |= 1 << uav_index;
+ }
+
+ emit->uav_declared |= uav_mask;
+
+ /* Allocate immediates to be used for index to the atomic buffers */
+ unsigned j = 0;
+ for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) {
+ alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
+ }
+
+ /* Allocate immediates for the atomic counter index */
+ for (; j <= emit->max_atomic_counter_index; j+=4) {
+ alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
+ }
+}
+
+
/**
* Emit instruction with n=1, 2 or 3 source registers.
*/
@@ -6751,12 +7334,12 @@ emit_if(struct svga_shader_emitter_v10 *emit,
/**
- * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
+ * Emit code for conditional discard instruction (discard fragment if any of
* the register components are negative).
*/
static boolean
-emit_kill_if(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
+emit_cond_discard(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
{
unsigned tmp = get_temp_index(emit);
struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
@@ -6774,8 +7357,9 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit,
if (!same_swizzle_terms(&inst->Src[0])) {
/* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
- * logically OR the swizzle terms. Most uses of KILL_IF only
- * test one channel so it's good to avoid these extra steps.
+ * logically OR the swizzle terms. Most uses of this conditional
+ * discard instruction only test one channel so it's good to
+ * avoid these extra steps.
*/
struct tgsi_full_src_register tmp_src_yyyy =
scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
@@ -6804,11 +7388,11 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit,
/**
- * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
+ * Emit code for the unconditional discard instruction.
*/
static boolean
-emit_kill(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
+emit_discard(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
{
struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
@@ -7834,7 +8418,7 @@ begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
swz->inst_dst = &inst->Dst[0];
swz->coord_src = &inst->Src[0];
- emit->fs.shadow_compare_units |= shadow_compare << unit;
+ emit->shadow_compare_units |= shadow_compare << unit;
}
@@ -8045,12 +8629,16 @@ emit_tex(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_src_register coord;
int offsets[3];
struct tex_swizzle_info swz_info;
+ boolean compare_in_shader;
/* check that the sampler returns a float */
if (!is_valid_tex_instruction(emit, inst))
return TRUE;
- begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+ compare_in_shader = tgsi_is_shadow_target(target) &&
+ emit->key.tex[unit].compare_in_shader;
+
+ begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
get_texel_offsets(emit, inst, offsets);
@@ -8059,7 +8647,7 @@ emit_tex(struct svga_shader_emitter_v10 *emit,
/* SAMPLE dst, coord(s0), resource, sampler */
begin_emit_instruction(emit);
- if (tgsi_is_shadow_target(target))
+ if (tgsi_is_shadow_target(target) && !compare_in_shader)
opcode = VGPU10_OPCODE_SAMPLE_C;
else
opcode = VGPU10_OPCODE_SAMPLE;
@@ -8182,7 +8770,8 @@ emit_tg4(struct svga_shader_emitter_v10 *emit,
emit_resource_register(emit, unit);
/* sampler */
- sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
+ sampler = make_src_reg(TGSI_FILE_SAMPLER,
+ emit->key.tex[unit].sampler_index);
sampler.Register.SwizzleX =
sampler.Register.SwizzleY =
sampler.Register.SwizzleZ =
@@ -8222,7 +8811,8 @@ emit_tg4(struct svga_shader_emitter_v10 *emit,
emit_resource_register(emit, unit);
/* sampler */
- sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
+ sampler = make_src_reg(TGSI_FILE_SAMPLER,
+ emit->key.tex[unit].sampler_index);
sampler.Register.SwizzleX =
sampler.Register.SwizzleY =
sampler.Register.SwizzleZ =
@@ -8263,12 +8853,20 @@ emit_tex2(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_src_register coord, ref;
int offsets[3];
struct tex_swizzle_info swz_info;
+ VGPU10_OPCODE_TYPE opcode;
+ boolean compare_in_shader;
/* check that the sampler returns a float */
if (!is_valid_tex_instruction(emit, inst))
return TRUE;
- begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+ compare_in_shader = emit->key.tex[unit].compare_in_shader;
+ if (compare_in_shader)
+ opcode = VGPU10_OPCODE_SAMPLE;
+ else
+ opcode = VGPU10_OPCODE_SAMPLE_C;
+
+ begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
get_texel_offsets(emit, inst, offsets);
@@ -8277,13 +8875,15 @@ emit_tex2(struct svga_shader_emitter_v10 *emit,
/* SAMPLE_C dst, coord, resource, sampler, ref */
begin_emit_instruction(emit);
- emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C,
+ emit_sample_opcode(emit, opcode,
inst->Instruction.Saturate, offsets);
emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
emit_src_register(emit, &coord);
emit_resource_register(emit, unit);
emit_sampler_register(emit, unit);
- emit_tex_compare_refcoord(emit, target, &ref);
+ if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+ emit_tex_compare_refcoord(emit, target, &ref);
+ }
end_emit_instruction(emit);
end_tex_swizzle(emit, &swz_info);
@@ -8312,12 +8912,16 @@ emit_txp(struct svga_shader_emitter_v10 *emit,
scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
struct tgsi_full_src_register coord;
struct tex_swizzle_info swz_info;
+ boolean compare_in_shader;
/* check that the sampler returns a float */
if (!is_valid_tex_instruction(emit, inst))
return TRUE;
- begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+ compare_in_shader = tgsi_is_shadow_target(target) &&
+ emit->key.tex[unit].compare_in_shader;
+
+ begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
get_texel_offsets(emit, inst, offsets);
@@ -8330,7 +8934,7 @@ emit_txp(struct svga_shader_emitter_v10 *emit,
/* SAMPLE dst, coord(tmp), resource, sampler */
begin_emit_instruction(emit);
- if (tgsi_is_shadow_target(target))
+ if (tgsi_is_shadow_target(target) && !compare_in_shader)
/* NOTE: for non-fragment shaders, we should use
* VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
*/
@@ -8604,6 +9208,7 @@ opcode_has_dbl_dst(unsigned opcode)
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_DFMA:
// XXX more TBD
return true;
default:
@@ -8629,6 +9234,7 @@ opcode_has_dbl_src(unsigned opcode)
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_DFMA:
// XXX more TBD
return true;
default:
@@ -8676,6 +9282,7 @@ check_double_dst_writemask(const struct tgsi_full_instruction *inst)
case TGSI_OPCODE_DRCP:
case TGSI_OPCODE_DSQRT:
case TGSI_OPCODE_F2D:
+ case TGSI_OPCODE_DFMA:
assert(writemask == TGSI_WRITEMASK_XYZW ||
writemask == TGSI_WRITEMASK_XY ||
writemask == TGSI_WRITEMASK_ZW);
@@ -9263,19 +9870,872 @@ emit_vmware(struct svga_shader_emitter_v10 *emit,
return TRUE;
}
+/**
+ * Emit a memory register
+ */
+
+typedef enum {
+ MEM_STORE = 0,
+ MEM_LOAD = 1,
+ MEM_ATOMIC_COUNTER
+} memory_op;
+
+static void
+emit_memory_register(struct svga_shader_emitter_v10 *emit,
+ memory_op mem_op,
+ const struct tgsi_full_instruction *inst,
+ unsigned regIndex, unsigned writemask)
+{
+ VGPU10OperandToken0 operand0;
+ unsigned resIndex = 0;
+
+ operand0.value = 0;
+ operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+ switch (mem_op) {
+ case MEM_ATOMIC_COUNTER:
+ {
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ resIndex = inst->Src[regIndex].Register.Index;
+ break;
+ }
+ case MEM_STORE:
+ {
+ const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex];
+
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+ operand0.mask = writemask;
+ resIndex = reg->Register.Index;
+ break;
+ }
+ case MEM_LOAD:
+ {
+ const struct tgsi_full_src_register *reg = &inst->Src[regIndex];
+
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+ operand0.swizzleX = reg->Register.SwizzleX;
+ operand0.swizzleY = reg->Register.SwizzleY;
+ operand0.swizzleZ = reg->Register.SwizzleZ;
+ operand0.swizzleW = reg->Register.SwizzleW;
+ resIndex = reg->Register.Index;
+ break;
+ }
+ default:
+ assert(!"Unexpected memory opcode");
+ break;
+ }
+
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, resIndex);
+}
+
+
+typedef enum {
+ UAV_STORE = 0,
+ UAV_LOAD = 1,
+ UAV_ATOMIC = 2,
+ UAV_RESQ = 3,
+} UAV_OP;
+
/**
- * Translate a single TGSI instruction to VGPU10.
+ * Emit a uav register
+ * \param uav_index index of resource register
+ * \param uav_op UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode
+ * \param resourceType resource file type
+ * \param writemask resource writemask
+ */
+
+static void
+emit_uav_register(struct svga_shader_emitter_v10 *emit,
+ unsigned res_index, UAV_OP uav_op,
+ enum tgsi_file_type resourceType, unsigned writemask)
+{
+ VGPU10OperandToken0 operand0;
+ unsigned uav_index = INVALID_INDEX;
+
+ operand0.value = 0;
+ operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+ switch (resourceType) {
+ case TGSI_FILE_IMAGE:
+ uav_index = emit->key.images[res_index].uav_index;
+ break;
+ case TGSI_FILE_BUFFER:
+ uav_index = emit->key.shader_buf_uav_index[res_index];
+ break;
+ case TGSI_FILE_HW_ATOMIC:
+ uav_index = emit->key.atomic_buf_uav_index[res_index];
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (uav_op) {
+ case UAV_ATOMIC:
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ break;
+
+ case UAV_STORE:
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+ operand0.mask = writemask;
+ break;
+
+ case UAV_LOAD:
+ case UAV_RESQ:
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+ operand0.swizzleX = VGPU10_COMPONENT_X;
+ operand0.swizzleY = VGPU10_COMPONENT_Y;
+ operand0.swizzleZ = VGPU10_COMPONENT_Z;
+ operand0.swizzleW = VGPU10_COMPONENT_W;
+ break;
+
+ default:
+ break;
+ }
+
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, uav_index);
+}
+
+
+/**
+ * A helper function to emit the uav address.
+ * For memory, buffer, and image resource, it is set to the specified address.
+ * For HW atomic counter, the address is the sum of the address offset and the
+ * offset into the HW atomic buffer as specified by the register index.
+ * It is also possible to specify the counter index as an indirect address.
+ * And in this case, the uav address will be the sum of the address offset and the
+ * counter index specified in the indirect address.
+ */
+static
+struct tgsi_full_src_register
+emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit,
+ enum tgsi_file_type resourceType,
+ unsigned resourceIndex,
+ unsigned resourceIndirect,
+ unsigned resourceIndirectIndex,
+ const struct tgsi_full_src_register *addr_reg)
+{
+ unsigned addr_tmp;
+ struct tgsi_full_dst_register addr_dst;
+ struct tgsi_full_src_register addr_src;
+ struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2);
+
+ addr_tmp = get_temp_index(emit);
+ addr_dst = make_dst_temp_reg(addr_tmp);
+ addr_src = make_src_temp_reg(addr_tmp);
+
+ /* specified address offset */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg);
+
+ /* For HW atomic counter, we need to find the index to the
+ * HW atomic buffer.
+ */
+ if (resourceType == TGSI_FILE_HW_ATOMIC) {
+ if (resourceIndirect) {
+
+ /**
+ * uav addr offset = counter layout offset +
+ * counter indirect index address + address offset
+ */
+
+ /* counter layout offset */
+ struct tgsi_full_src_register layout_offset;
+ layout_offset =
+ make_immediate_reg_int(emit, resourceIndex);
+
+ /* counter layout offset + address offset */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
+ &addr_src, &layout_offset);
+
+ /* counter indirect index address */
+ unsigned indirect_addr =
+ emit->address_reg_index[resourceIndirectIndex];
+
+ struct tgsi_full_src_register indirect_addr_src =
+ make_src_temp_reg(indirect_addr);
+
+ indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
+
+ /* counter layout offset + address offset + counter indirect address */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
+ &addr_src, &indirect_addr_src);
+
+ } else {
+ struct tgsi_full_src_register index_src;
+
+ index_src = make_immediate_reg_int(emit, resourceIndex);
+
+ /* uav addr offset = counter index address + address offset */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst,
+ &addr_src, &index_src);
+ }
+
+ /* HW atomic buffer is declared as raw buffer, so the buffer address is
+ * the byte offset, so we need to multiple the counter addr offset by 4.
+ */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst,
+ &addr_src, &two);
+ }
+ else if (resourceType == TGSI_FILE_IMAGE) {
+ if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D)
+ && emit->key.images[resourceIndex].is_single_layer) {
+
+ struct tgsi_full_dst_register addr_dst_z =
+ writemask_dst(&addr_dst, TGSI_WRITEMASK_Z);
+ struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
+
+ /* For non-layered 3D texture image view, we have to make sure the z
+ * component of the address offset is set to 0.
+ */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z,
+ &zero);
+ }
+ }
+
+ return addr_src;
+}
+
+
+
+/**
+ * A helper function to expand indirect indexing to uav resource
+ * by looping through the resource array, compare the indirect index and
+ * emit the instruction for each resource in the array.
+ */
+static void
+loop_instruction(unsigned index, unsigned count,
+ struct tgsi_full_src_register *addr_index,
+ void (*fb)(struct svga_shader_emitter_v10 *,
+ const struct tgsi_full_instruction *, unsigned),
+ struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ if (count == 0)
+ return;
+
+ if (index > 0) {
+ /* ELSE */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
+ }
+
+ struct tgsi_full_src_register index_src =
+ make_immediate_reg_int(emit, index);
+
+ unsigned tmp_index = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
+ struct tgsi_full_src_register tmp_src_x =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
+
+ /* IEQ tmp, addr_tmp_index, index */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst,
+ addr_index, &index_src);
+
+ /* IF tmp */
+ emit_if(emit, &tmp_src_x);
+
+ free_temp_indexes(emit);
+
+ (*fb)(emit, inst, index);
+
+ loop_instruction(index+1, count-1, addr_index, fb, emit, inst);
+
+ /* ENDIF */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
+}
+
+
+/**
+ * A helper function to emit the load instruction.
+ */
+static void
+emit_load_instruction(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst,
+ unsigned resourceIndex)
+{
+ VGPU10OpcodeToken0 token0;
+ struct tgsi_full_src_register addr_src;
+ enum tgsi_file_type resourceType = inst->Src[0].Register.File;
+
+ /* Resolve the resource address for this resource first */
+ addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex,
+ inst->Src[0].Register.Indirect,
+ inst->Src[0].Indirect.Index,
+ &inst->Src[1]);
+
+ /* LOAD resource, address, src */
+ begin_emit_instruction(emit);
+
+ token0.value = 0;
+
+ if (resourceType == TGSI_FILE_MEMORY ||
+ resourceType == TGSI_FILE_BUFFER ||
+ resourceType == TGSI_FILE_HW_ATOMIC) {
+ token0.opcodeType = VGPU10_OPCODE_LD_RAW;
+ addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
+ }
+ else {
+ token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
+ }
+
+ token0.saturate = inst->Instruction.Saturate,
+ emit_dword(emit, token0.value);
+
+ emit_dst_register(emit, &inst->Dst[0]);
+ emit_src_register(emit, &addr_src);
+
+ if (resourceType == TGSI_FILE_MEMORY) {
+ emit_memory_register(emit, MEM_LOAD, inst, 0, 0);
+ } else if (resourceType == TGSI_FILE_HW_ATOMIC) {
+ emit_uav_register(emit, inst->Src[0].Dimension.Index,
+ UAV_LOAD, inst->Src[0].Register.File, 0);
+ } else {
+ emit_uav_register(emit, resourceIndex,
+ UAV_LOAD, inst->Src[0].Register.File, 0);
+ }
+
+ end_emit_instruction(emit);
+
+ free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit uav / memory load instruction
*/
static boolean
-emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
- unsigned inst_number,
- const struct tgsi_full_instruction *inst)
+emit_load(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
{
- const enum tgsi_opcode opcode = inst->Instruction.Opcode;
+ enum tgsi_file_type resourceType = inst->Src[0].Register.File;
+ unsigned resourceIndex = inst->Src[0].Register.Index;
- if (emit->skip_instruction)
+ /* If the resource register has indirect index, we will need
+ * to expand it since SM5 device does not support indirect indexing
+ * for uav.
+ */
+ if (inst->Src[0].Register.Indirect &&
+ (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
+
+ unsigned indirect_index = inst->Src[0].Indirect.Index;
+ unsigned num_resources =
+ resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
+ emit->num_images;
+
+ /* indirect index tmp register */
+ unsigned indirect_addr = emit->address_reg_index[indirect_index];
+ struct tgsi_full_src_register indirect_addr_src =
+ make_src_temp_reg(indirect_addr);
+ indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
+
+ /* Add offset to the indirect index */
+ if (inst->Src[0].Register.Index != 0) {
+ struct tgsi_full_src_register offset =
+ make_immediate_reg_int(emit, inst->Src[0].Register.Index);
+ struct tgsi_full_dst_register indirect_addr_dst =
+ make_dst_temp_reg(indirect_addr);
+ emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
+ &indirect_addr_src, &offset);
+ }
+
+ /* Loop through the resource array to find which resource to use.
+ */
+ loop_instruction(0, num_resources, &indirect_addr_src,
+ emit_load_instruction, emit, inst);
+ }
+ else {
+ emit_load_instruction(emit, inst, resourceIndex);
+ }
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * A helper function to emit a store instruction.
+ */
+static void
+emit_store_instruction(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst,
+ unsigned resourceIndex)
+{
+ VGPU10OpcodeToken0 token0;
+ enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
+ unsigned writemask = inst->Dst[0].Register.WriteMask;
+ struct tgsi_full_src_register addr_src;
+
+ unsigned tmp_index = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
+ struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index);
+ struct tgsi_full_dst_register tmp_dst;
+
+ struct tgsi_full_src_register src = inst->Src[1];
+ struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
+
+ boolean needLoad = FALSE;
+ boolean needPerComponentStore = FALSE;
+ unsigned swizzles = 0;
+
+ /* Resolve the resource address for this resource first */
+ addr_src = emit_uav_addr_offset(emit, resourceType,
+ inst->Dst[0].Register.Index,
+ inst->Dst[0].Register.Indirect,
+ inst->Dst[0].Indirect.Index,
+ &inst->Src[0]);
+
+ /* First check the writemask to see if it can be supported
+ * by the store instruction.
+ * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory,
+ * we can adjust the address offset, and do a per-component store.
+ * store_uav_typed only allows .xyzw. In this case, we need to
+ * do a load first, update the temporary and then issue the
+ * store. This does have a small risk that if different threads
+ * update different components of the same address, data might not be
+ * in sync.
+ */
+ if (resourceType == TGSI_FILE_IMAGE) {
+ needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? FALSE : TRUE;
+ }
+ else if (resourceType == TGSI_FILE_BUFFER ||
+ resourceType == TGSI_FILE_MEMORY) {
+ if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY ||
+ writemask == TGSI_WRITEMASK_XYZ ||
+ writemask == TGSI_WRITEMASK_XYZW)) {
+ needPerComponentStore = TRUE;
+ }
+ }
+
+ if (needLoad) {
+ assert(resourceType == TGSI_FILE_IMAGE);
+
+ /* LOAD resource, address, src */
+ begin_emit_instruction(emit);
+
+ token0.value = 0;
+ token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
+ token0.saturate = inst->Instruction.Saturate,
+ emit_dword(emit, token0.value);
+
+ emit_dst_register(emit, &tmp_dst_xyzw);
+ emit_src_register(emit, &addr_src);
+ emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0);
+
+ end_emit_instruction(emit);
+
+ /* MOV tmp(writemask) src */
+ tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]);
+
+ /* Now set the writemask to xyzw for the store_uav_typed instruction */
+ writemask = TGSI_WRITEMASK_XYZW;
+ }
+ else if (needPerComponentStore) {
+ /* Save the src swizzles */
+ swizzles = src.Register.SwizzleX |
+ src.Register.SwizzleY << 2 |
+ src.Register.SwizzleZ << 4 |
+ src.Register.SwizzleW << 6;
+ }
+
+ boolean storeDone = FALSE;
+ unsigned perComponentWritemask = writemask;
+ unsigned shift = 0;
+ struct tgsi_full_src_register shift_src;
+
+ while (!storeDone) {
+
+ if (needPerComponentStore) {
+ assert(perComponentWritemask);
+ while (!(perComponentWritemask & TGSI_WRITEMASK_X)) {
+ shift++;
+ perComponentWritemask >>= 1;
+ }
+
+ /* First adjust the addr_src to the next component */
+ if (shift != 0) {
+ struct tgsi_full_dst_register addr_dst =
+ make_dst_temp_reg(addr_src.Register.Index);
+ shift_src = make_immediate_reg_int(emit, shift);
+ emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four,
+ &shift_src, &addr_src);
+
+ /* Adjust the src swizzle as well */
+ swizzles >>= (shift * 2);
+ }
+
+ /* Now the address offset is set to the next component,
+ * we can set the writemask to .x and make sure to set
+ * the src swizzle as well.
+ */
+ src.Register.SwizzleX = swizzles & 0x3;
+ writemask = TGSI_WRITEMASK_X;
+
+ /* Shift for the next component check */
+ perComponentWritemask >>= 1;
+ shift = 1;
+ }
+
+ /* STORE resource, address, src */
+ begin_emit_instruction(emit);
+
+ token0.value = 0;
+ token0.saturate = inst->Instruction.Saturate;
+
+ if (resourceType == TGSI_FILE_MEMORY) {
+ token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
+ addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
+ emit_dword(emit, token0.value);
+ emit_memory_register(emit, MEM_STORE, inst, 0, writemask);
+ }
+ else if (resourceType == TGSI_FILE_BUFFER ||
+ resourceType == TGSI_FILE_HW_ATOMIC) {
+ token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
+ addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
+ emit_dword(emit, token0.value);
+ emit_uav_register(emit, resourceIndex, UAV_STORE,
+ resourceType, writemask);
+ }
+ else {
+ token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED;
+ emit_dword(emit, token0.value);
+ emit_uav_register(emit, resourceIndex, UAV_STORE,
+ resourceType, writemask);
+ }
+
+ emit_src_register(emit, &addr_src);
+
+ if (needLoad)
+ emit_src_register(emit, &tmp_src);
+ else
+ emit_src_register(emit, &src);
+
+ end_emit_instruction(emit);
+
+ if (!needPerComponentStore || !perComponentWritemask)
+ storeDone = TRUE;
+ }
+
+ free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit uav / memory store instruction
+ */
+static boolean
+emit_store(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
+ unsigned resourceIndex = inst->Dst[0].Register.Index;
+
+ /* If the resource register has indirect index, we will need
+ * to expand it since SM5 device does not support indirect indexing
+ * for uav.
+ */
+ if (inst->Dst[0].Register.Indirect &&
+ (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
+
+ unsigned indirect_index = inst->Dst[0].Indirect.Index;
+ unsigned num_resources =
+ resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
+ emit->num_images;
+
+ /* Indirect index tmp register */
+ unsigned indirect_addr = emit->address_reg_index[indirect_index];
+ struct tgsi_full_src_register indirect_addr_src =
+ make_src_temp_reg(indirect_addr);
+ indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
+
+ /* Add offset to the indirect index */
+ if (inst->Dst[0].Register.Index != 0) {
+ struct tgsi_full_src_register offset =
+ make_immediate_reg_int(emit, inst->Dst[0].Register.Index);
+ struct tgsi_full_dst_register indirect_addr_dst =
+ make_dst_temp_reg(indirect_addr);
+ emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
+ &indirect_addr_src, &offset);
+ }
+
+ /* Loop through the resource array to find which resource to use.
+ */
+ loop_instruction(0, num_resources, &indirect_addr_src,
+ emit_store_instruction, emit, inst);
+ }
+ else {
+ emit_store_instruction(emit, inst, resourceIndex);
+ }
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * A helper function to emit an atomic instruction.
+ */
+
+static void
+emit_atomic_instruction(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst,
+ unsigned resourceIndex)
+{
+ VGPU10OpcodeToken0 token0;
+ enum tgsi_file_type resourceType = inst->Src[0].Register.File;
+ struct tgsi_full_src_register addr_src;
+ VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode;
+
+ /* Resolve the resource address */
+ addr_src = emit_uav_addr_offset(emit, resourceType,
+ inst->Src[0].Register.Index,
+ inst->Src[0].Register.Indirect,
+ inst->Src[0].Indirect.Index,
+ &inst->Src[1]);
+
+ /* Emit the atomic operation */
+ begin_emit_instruction(emit);
+
+ token0.value = 0;
+ token0.opcodeType = opcode;
+ token0.saturate = inst->Instruction.Saturate,
+ emit_dword(emit, token0.value);
+
+ emit_dst_register(emit, &inst->Dst[0]);
+
+ if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
+ emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0);
+ } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) {
+ assert(inst->Src[0].Register.Dimension == 1);
+ emit_uav_register(emit, inst->Src[0].Dimension.Index,
+ UAV_ATOMIC, inst->Src[0].Register.File, 0);
+ } else {
+ emit_uav_register(emit, resourceIndex,
+ UAV_ATOMIC, inst->Src[0].Register.File, 0);
+ }
+
+ /* resource address offset */
+ emit_src_register(emit, &addr_src);
+
+ struct tgsi_full_src_register src0_x =
+ swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+ emit_src_register(emit, &src0_x);
+
+ if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) {
+ struct tgsi_full_src_register src1_x =
+ swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+ emit_src_register(emit, &src1_x);
+ }
+
+ end_emit_instruction(emit);
+
+ free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit atomic instruction
+ */
+static boolean
+emit_atomic(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst,
+ VGPU10_OPCODE_TYPE opcode)
+{
+ enum tgsi_file_type resourceType = inst->Src[0].Register.File;
+ unsigned resourceIndex = inst->Src[0].Register.Index;
+
+ emit->cur_atomic_opcode = opcode;
+
+ /* If the resource register has indirect index, we will need
+ * to expand it since SM5 device does not support indirect indexing
+ * for uav.
+ */
+ if (inst->Dst[0].Register.Indirect &&
+ (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
+
+ unsigned indirect_index = inst->Dst[0].Indirect.Index;
+ unsigned num_resources =
+ resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
+ emit->num_images;
+
+ /* indirect index tmp register */
+ unsigned indirect_addr = emit->address_reg_index[indirect_index];
+ struct tgsi_full_src_register indirect_addr_src =
+ make_src_temp_reg(indirect_addr);
+ indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
+
+ /* Loop through the resource array to find which resource to use.
+ */
+ loop_instruction(0, num_resources, &indirect_addr_src,
+ emit_atomic_instruction, emit, inst);
+ }
+ else {
+ emit_atomic_instruction(emit, inst, resourceIndex);
+ }
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit barrier instruction
+ */
+static boolean
+emit_barrier(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ VGPU10OpcodeToken0 token0;
+
+ assert(emit->version >= 50);
+
+ token0.value = 0;
+ token0.opcodeType = VGPU10_OPCODE_SYNC;
+
+ if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) {
+ /* SM5 device doesn't support BARRIER in tcs . If barrier is used
+ * in shader, don't do anything for this opcode and continue rest
+ * of shader translation
+ */
+ util_debug_message(&emit->svga_debug_callback, INFO,
+ "barrier instruction is not supported in tessellation control shader\n");
return TRUE;
+ }
+ else if (emit->unit == PIPE_SHADER_COMPUTE) {
+ if (emit->cs.shared_memory_declared)
+ token0.syncThreadGroupShared = 1;
+
+ if (emit->uav_declared)
+ token0.syncUAVMemoryGroup = 1;
+
+ token0.syncThreadsInGroup = 1;
+ } else {
+ token0.syncUAVMemoryGlobal = 1;
+ }
+
+ assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
+ token0.syncThreadGroupShared);
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, token0.value);
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+/**
+ * Emit memory barrier instruction
+ */
+static boolean
+emit_memory_barrier(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned index = inst->Src[0].Register.Index;
+ unsigned swizzle = inst->Src[0].Register.SwizzleX;
+ unsigned bartype = emit->immediates[index][swizzle].Int;
+ VGPU10OpcodeToken0 token0;
+
+ token0.value = 0;
+ token0.opcodeType = VGPU10_OPCODE_SYNC;
+
+ if (emit->unit == PIPE_SHADER_COMPUTE) {
+
+ /* For compute shader, issue sync opcode with different options
+ * depending on the memory barrier type.
+ *
+ * Bit 0: Shader storage buffers
+ * Bit 1: Atomic buffers
+ * Bit 2: Images
+ * Bit 3: Shared memory
+ * Bit 4: Thread group
+ */
+
+ if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE))
+ token0.syncUAVMemoryGlobal = 1;
+ else if (bartype & TGSI_MEMBAR_THREAD_GROUP)
+ token0.syncUAVMemoryGroup = 1;
+
+ if (bartype & TGSI_MEMBAR_SHARED)
+ token0.syncThreadGroupShared = 1;
+ }
+ else {
+ /**
+ * For graphics stages, only sync_uglobal is available.
+ */
+ if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE))
+ token0.syncUAVMemoryGlobal = 1;
+ }
+
+ assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
+ token0.syncThreadGroupShared);
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, token0.value);
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RESQ (image size) instruction.
+ */
+static boolean
+emit_resq(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ struct tgsi_full_src_register zero =
+ make_immediate_reg_int(emit, 0);
+
+ unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource;
+
+ if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) {
+ struct tgsi_full_src_register image_src;
+
+ image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src);
+ return TRUE;
+ }
+
+ begin_emit_instruction(emit);
+ if (uav_resource == TGSI_TEXTURE_BUFFER) {
+ emit_opcode(emit, VGPU10_OPCODE_BUFINFO, FALSE);
+ emit_dst_register(emit, &inst->Dst[0]);
+ }
+ else {
+ emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
+ emit_dst_register(emit, &inst->Dst[0]);
+ emit_src_register(emit, &zero);
+ }
+ emit_uav_register(emit, inst->Src[0].Register.Index,
+ UAV_RESQ, inst->Src[0].Register.File, 0);
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_instruction(struct svga_shader_emitter_v10 *emit,
+ unsigned inst_number,
+ const struct tgsi_full_instruction *inst)
+{
+ const enum tgsi_opcode opcode = inst->Instruction.Opcode;
switch (opcode) {
case TGSI_OPCODE_ADD:
@@ -9397,9 +10857,9 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
case TGSI_OPCODE_IF:
return emit_if(emit, &inst->Src[0]);
case TGSI_OPCODE_KILL:
- return emit_kill(emit, inst);
+ return emit_discard(emit, inst);
case TGSI_OPCODE_KILL_IF:
- return emit_kill_if(emit, inst);
+ return emit_cond_discard(emit, inst);
case TGSI_OPCODE_LG2:
return emit_lg2(emit, inst);
case TGSI_OPCODE_LIT:
@@ -9496,12 +10956,14 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
return emit_simple(emit, inst);
case TGSI_OPCODE_INTERP_OFFSET:
return emit_interp_offset(emit, inst);
+ case TGSI_OPCODE_FMA:
+ case TGSI_OPCODE_DFMA:
+ return emit_simple(emit, inst);
/* The following opcodes should never be seen here. We return zero
* for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
* FMA_SUPPORTED, LDEXP_SUPPORTED queries.
*/
- case TGSI_OPCODE_FMA:
case TGSI_OPCODE_LDEXP:
case TGSI_OPCODE_DSSG:
case TGSI_OPCODE_DFRACEXP:
@@ -9515,31 +10977,49 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
return FALSE;
case TGSI_OPCODE_LOAD:
+ return emit_load(emit, inst);
+
case TGSI_OPCODE_STORE:
+ return emit_store(emit, inst);
+
case TGSI_OPCODE_ATOMAND:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND);
+
case TGSI_OPCODE_ATOMCAS:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
+
case TGSI_OPCODE_ATOMIMAX:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX);
+
case TGSI_OPCODE_ATOMIMIN:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN);
+
case TGSI_OPCODE_ATOMOR:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR);
+
case TGSI_OPCODE_ATOMUADD:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD);
+
case TGSI_OPCODE_ATOMUMAX:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX);
+
case TGSI_OPCODE_ATOMUMIN:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN);
+
case TGSI_OPCODE_ATOMXCHG:
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH);
+
case TGSI_OPCODE_ATOMXOR:
- return FALSE;
+ return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR);
+
case TGSI_OPCODE_BARRIER:
- if (emit->unit == PIPE_SHADER_TESS_CTRL) {
- /* SM5 device doesn't support BARRIER in tcs . If barrier is used
- * in shader, don't do anything for this opcode and continue rest
- * of shader translation
- */
- pipe_debug_message(&emit->svga_debug_callback, INFO,
- "barrier instruction is not supported in tessellation control shader\n");
- return TRUE;
- }
- else {
- return emit_simple(emit, inst);
- }
+ return emit_barrier(emit, inst);
+
+ case TGSI_OPCODE_MEMBAR:
+ return emit_memory_barrier(emit, inst);
+
+ case TGSI_OPCODE_RESQ:
+ return emit_resq(emit, inst);
case TGSI_OPCODE_END:
if (!emit_post_helpers(emit))
@@ -9557,6 +11037,38 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
/**
+ * Translate a single TGSI instruction to VGPU10.
+ */
+static boolean
+emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
+ unsigned inst_number,
+ const struct tgsi_full_instruction *inst)
+{
+ if (emit->skip_instruction)
+ return TRUE;
+
+ boolean ret = TRUE;
+ unsigned start_token = emit_get_num_tokens(emit);
+
+ emit->reemit_tgsi_instruction = FALSE;
+
+ ret = emit_instruction(emit, inst_number, inst);
+
+ if (emit->reemit_tgsi_instruction) {
+ /**
+ * Reset emit->ptr to where the translation of this tgsi instruction
+ * started.
+ */
+ VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
+ emit->ptr = (char *) (tokens + start_token);
+
+ emit->reemit_tgsi_instruction = FALSE;
+ }
+ return ret;
+}
+
+
+/**
* Emit the extra instructions to adjust the vertex position.
* There are two possible adjustments:
* 1. Converting from Gallium to VGPU10 coordinate space by applying the
@@ -10232,50 +11744,61 @@ emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
{
assert(emit->unit == PIPE_SHADER_TESS_CTRL);
assert(emit->tcs.control_point_phase);
- assert(emit->tcs.control_point_input_index != INVALID_INDEX);
assert(emit->tcs.control_point_out_index != INVALID_INDEX);
assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
- /* UARL ADDR[INDEX].x INVOCATION.xxxx */
+ struct tgsi_full_dst_register output_control_point;
+ output_control_point =
+ make_dst_output_reg(emit->tcs.control_point_out_index);
- struct tgsi_full_src_register invocation_src;
- struct tgsi_full_dst_register addr_dst;
- struct tgsi_full_dst_register addr_dst_x;
- unsigned addr_tmp;
+ if (emit->tcs.control_point_input_index == INVALID_INDEX) {
+ /* MOV OUTPUT 0.0f */
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+ begin_emit_instruction(emit);
+ emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
+ emit_dst_register(emit, &output_control_point);
+ emit_src_register(emit, &zero);
+ end_emit_instruction(emit);
+ }
+ else {
+ /* UARL ADDR[INDEX].x INVOCATION.xxxx */
- addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
- addr_dst = make_dst_temp_reg(addr_tmp);
- addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
+ struct tgsi_full_src_register invocation_src;
+ struct tgsi_full_dst_register addr_dst;
+ struct tgsi_full_dst_register addr_dst_x;
+ unsigned addr_tmp;
- invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
- emit->tcs.invocation_id_sys_index);
+ addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
+ addr_dst = make_dst_temp_reg(addr_tmp);
+ addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
- begin_emit_instruction(emit);
- emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
- emit_dst_register(emit, &addr_dst_x);
- emit_src_register(emit, &invocation_src);
- end_emit_instruction(emit);
+ invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
+ emit->tcs.invocation_id_sys_index);
+ begin_emit_instruction(emit);
+ emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
+ emit_dst_register(emit, &addr_dst_x);
+ emit_src_register(emit, &invocation_src);
+ end_emit_instruction(emit);
- /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
- struct tgsi_full_src_register input_control_point;
- struct tgsi_full_dst_register output_control_point;
+ /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
- input_control_point = make_src_reg(TGSI_FILE_INPUT,
- emit->tcs.control_point_input_index);
- input_control_point.Register.Dimension = 1;
- input_control_point.Dimension.Indirect = 1;
- input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
- input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index;
- output_control_point =
- make_dst_output_reg(emit->tcs.control_point_out_index);
+ struct tgsi_full_src_register input_control_point;
+ input_control_point = make_src_reg(TGSI_FILE_INPUT,
+ emit->tcs.control_point_input_index);
+ input_control_point.Register.Dimension = 1;
+ input_control_point.Dimension.Indirect = 1;
+ input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
+ input_control_point.DimIndirect.Index =
+ emit->tcs.control_point_addr_index;
- begin_emit_instruction(emit);
- emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
- emit_dst_register(emit, &output_control_point);
- emit_src_register(emit, &input_control_point);
- end_emit_instruction(emit);
+ begin_emit_instruction(emit);
+ emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
+ emit_dst_register(emit, &output_control_point);
+ emit_src_register(emit, &input_control_point);
+ end_emit_instruction(emit);
+ }
}
/**
@@ -10404,6 +11927,9 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
emit_domain_shader_declarations(emit);
}
+ else if (emit->unit == PIPE_SHADER_COMPUTE) {
+ emit_compute_shader_declarations(emit);
+ }
/* Declare inputs */
if (!emit_input_declarations(emit))
@@ -10420,6 +11946,9 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
* will already be declared in hs_decls (emit_hull_shader_declarations)
*/
if (emit->unit != PIPE_SHADER_TESS_CTRL) {
+
+ alloc_common_immediates(emit);
+
/* Declare constant registers */
emit_constant_declaration(emit);
@@ -10427,13 +11956,18 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
emit_sampler_declarations(emit);
emit_resource_declarations(emit);
- alloc_common_immediates(emit);
- /* Now, emit the constant block containing all the immediates
- * declared by shader, as well as the extra ones seen above.
- */
+ /* Declare images */
+ emit_image_declarations(emit);
+
+ /* Declare shader buffers */
+ emit_shader_buf_declarations(emit);
+
+ /* Declare atomic buffers */
+ emit_atomic_buf_declarations(emit);
}
- if (emit->unit != PIPE_SHADER_FRAGMENT) {
+ if (emit->unit != PIPE_SHADER_FRAGMENT &&
+ emit->unit != PIPE_SHADER_COMPUTE) {
/*
* Declare clip distance output registers for ClipVertex or
* user defined planes
@@ -10441,6 +11975,18 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
emit_clip_distance_declarations(emit);
}
+ if (emit->unit == PIPE_SHADER_COMPUTE) {
+ emit_memory_declarations(emit);
+
+ if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) {
+ emit->cs.grid_size.imm_index =
+ alloc_immediate_int4(emit,
+ emit->key.cs.grid_size[0],
+ emit->key.cs.grid_size[1],
+ emit->key.cs.grid_size[2], 0);
+ }
+ }
+
if (emit->unit == PIPE_SHADER_FRAGMENT &&
emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
float alpha = emit->key.fs.alpha_ref;
@@ -10536,7 +12082,9 @@ static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
unsigned fs_color_tmp_index)
{
- /* compare output color's alpha to alpha ref and kill */
+ /* compare output color's alpha to alpha ref and discard if comparison
+ * fails.
+ */
unsigned tmp = get_temp_index(emit);
struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
struct tgsi_full_src_register tmp_src_x =
@@ -10671,6 +12219,93 @@ emit_post_helpers(struct svga_shader_emitter_v10 *emit)
/**
+ * Reemit rawbuf instruction
+ */
+static boolean
+emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
+ unsigned inst_number,
+ const struct tgsi_full_instruction *inst)
+{
+ boolean ret;
+
+ /* For all the rawbuf references in this instruction,
+ * load the rawbuf reference and assign to the designated temporary.
+ * Then reeemit the instruction.
+ */
+ emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS;
+
+ unsigned offset_tmp = get_temp_index(emit);
+ struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp);
+ struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp);
+ struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
+
+ for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) {
+ struct tgsi_full_src_register element_src;
+
+ /* First get the element index register. */
+
+ if (emit->raw_buf_tmp[i].indirect) {
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register element_index =
+ make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
+ struct tgsi_full_src_register element_rel =
+ make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel);
+
+ element_src = make_src_temp_reg(tmp);
+ element_src = scalar_src(&element_src, TGSI_SWIZZLE_X);
+ element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X);
+
+ /* element index from the indirect register */
+ element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
+ element_index = scalar_src(&element_index, TGSI_SWIZZLE_X);
+
+ /* IADD element_src element_index element_index_relative */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst,
+ &element_index, &element_rel);
+ }
+ else {
+ element_src =
+ make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_index);
+ }
+
+ /* byte offset = element index << 4 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst,
+ &element_src, &four);
+
+ struct tgsi_full_dst_register dst_tmp =
+ make_dst_temp_reg(i + emit->raw_buf_tmp_index);
+
+ /* LD_RAW tmp, rawbuf byte offset, rawbuf */
+
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_LD_RAW, FALSE);
+ emit_dst_register(emit, &dst_tmp);
+
+ struct tgsi_full_src_register offset_x =
+ scalar_src(&offset_src, TGSI_SWIZZLE_X);
+ emit_src_register(emit, &offset_x);
+
+ emit_resource_register(emit,
+ emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index);
+ end_emit_instruction(emit);
+ }
+
+ emit->raw_buf_cur_tmp_index = 0;
+
+ ret = emit_vgpu10_instruction(emit, inst_number, inst);
+
+ /* reset raw buf state */
+ emit->raw_buf_cur_tmp_index = 0;
+ emit->reemit_rawbuf_instruction = REEMIT_FALSE;
+
+ free_temp_indexes(emit);
+
+ return ret;
+}
+
+
+/**
* Translate the TGSI tokens into VGPU10 tokens.
*/
static boolean
@@ -10730,6 +12365,10 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
ret = emit_vgpu10_instruction(emit, inst_number - 1,
&parse.FullToken.FullInstruction);
}
+ else if (emit->reemit_rawbuf_instruction) {
+ ret = emit_rawbuf_instruction(emit, inst_number - 1,
+ &parse.FullToken.FullInstruction);
+ }
if (!ret)
goto done;
@@ -10765,9 +12404,13 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
VGPU10ProgramToken ptoken;
/* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
+
+ /* Maximum supported shader version is 50 */
+ unsigned version = MIN2(emit->version, 50);
+
ptoken.value = 0; /* init whole token to zero */
- ptoken.majorVersion = emit->version / 10;
- ptoken.minorVersion = emit->version % 10;
+ ptoken.majorVersion = version / 10;
+ ptoken.minorVersion = version % 10;
ptoken.programType = translate_shader_type(emit->unit);
if (!emit_dword(emit, ptoken.value))
return FALSE;
@@ -10840,6 +12483,15 @@ emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
ptoken->refactoringAllowed = 1;
}
+ if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) {
+ /* Replace the reserved token with the forceEarlyDepthStencil global flag */
+ VGPU10OpcodeToken0 *ptoken;
+
+ ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
+ ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
+ ptoken->forceEarlyDepthStencil = 1;
+ }
+
return TRUE;
}
@@ -10884,6 +12536,9 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
emit->fs.pstipple_sampler_unit = unit;
+ /* The new sampler state is appended to the end of the samplers list */
+ emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++;
+
/* Setup texture state for stipple */
emit->sampler_target[unit] = TGSI_TEXTURE_2D;
emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
@@ -10891,6 +12546,7 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
emit->key.tex[unit].target = PIPE_TEXTURE_2D;
+ emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index;
if (0) {
debug_printf("After pstipple ------------------\n");
@@ -11020,6 +12676,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
const struct svga_compile_key *key,
enum pipe_shader_type unit)
{
+ struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
struct svga_shader_variant *variant = NULL;
struct svga_shader_emitter_v10 *emit;
const struct tgsi_token *tokens = shader->tokens;
@@ -11045,7 +12702,9 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
goto done;
emit->unit = unit;
- if (svga_have_sm5(svga)) {
+ if (svga_have_gl43(svga)) {
+ emit->version = 51;
+ } else if (svga_have_sm5(svga)) {
emit->version = 50;
} else if (svga_have_sm4_1(svga)) {
emit->version = 41;
@@ -11053,6 +12712,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
emit->version = 40;
}
+ emit->use_sampler_state_mapping = emit->key.sampler_state_mapping;
+
emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
emit->key = *key;
@@ -11098,7 +12759,6 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
emit->tcs.control_point_tmp_index = INVALID_INDEX;
emit->tcs.control_point_out_count = 0;
emit->tcs.inner.out_index = INVALID_INDEX;
- emit->tcs.inner.out_index = INVALID_INDEX;
emit->tcs.inner.temp_index = INVALID_INDEX;
emit->tcs.inner.tgsi_index = INVALID_INDEX;
emit->tcs.outer.out_index = INVALID_INDEX;
@@ -11118,6 +12778,14 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
emit->tes.outer.tgsi_index = INVALID_INDEX;
emit->tes.prim_id_index = INVALID_INDEX;
+ emit->cs.thread_id_index = INVALID_INDEX;
+ emit->cs.block_id_index = INVALID_INDEX;
+ emit->cs.grid_size.tgsi_index = INVALID_INDEX;
+ emit->cs.grid_size.imm_index = INVALID_INDEX;
+ emit->cs.block_width = 1;
+ emit->cs.block_height = 1;
+ emit->cs.block_depth = 1;
+
emit->clip_dist_out_index = INVALID_INDEX;
emit->clip_dist_tmp_index = INVALID_INDEX;
emit->clip_dist_so_index = INVALID_INDEX;
@@ -11135,6 +12803,11 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
emit->current_loop_depth = 0;
emit->initialize_temp_index = INVALID_INDEX;
+ emit->image_size_index = INVALID_INDEX;
+
+ emit->max_vs_inputs = svgascreen->max_vs_inputs;
+ emit->max_vs_outputs = svgascreen->max_vs_outputs;
+ emit->max_gs_inputs = svgascreen->max_gs_inputs;
if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
@@ -11202,6 +12875,12 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
}
}
+ /* Determine if constbuf to rawbuf translation is needed */
+ if (emit->info.const_buffers_declared) {
+ emit->raw_bufs = emit->key.raw_buffers;
+ emit->raw_buf_srv_start_index = emit->key.srv_raw_buf_index;
+ }
+
/*
* Do actual shader translation.
*/
@@ -11262,6 +12941,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
+ fs_variant->pstipple_sampler_state_index =
+ emit->fs.pstipple_sampler_state_index;
/* If there was exactly one write to a fragment shader output register
* and it came from a constant buffer, we know all fragments will have
@@ -11275,7 +12956,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
*/
fs_variant->uses_flat_interp = emit->uses_flat_interp;
- fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
+ fs_variant->fs_shadow_compare_units = emit->shadow_compare_units;
}
else if (unit == PIPE_SHADER_TESS_EVAL) {
struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_winsys.h b/lib/mesa/src/gallium/drivers/svga/svga_winsys.h
index 7b3f439af..6818c2931 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_winsys.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_winsys.h
@@ -49,7 +49,7 @@ struct svga_winsys_screen;
struct svga_winsys_buffer;
struct pipe_screen;
struct pipe_context;
-struct pipe_debug_callback;
+struct util_debug_callback;
struct pipe_fence_handle;
struct pipe_resource;
struct svga_region;
@@ -109,6 +109,7 @@ enum svga_stats_count {
SVGA_STATS_COUNT_BLITBLITTERCOPY,
SVGA_STATS_COUNT_DEPTHSTENCILSTATE,
SVGA_STATS_COUNT_RASTERIZERSTATE,
+ SVGA_STATS_COUNT_RAWBUFFERSRVIEW,
SVGA_STATS_COUNT_SAMPLER,
SVGA_STATS_COUNT_SAMPLERVIEW,
SVGA_STATS_COUNT_SURFACEWRITEFLUSH,
@@ -131,6 +132,7 @@ enum svga_stats_time {
SVGA_STATS_TIME_CREATEBACKEDSURFACEVIEW,
SVGA_STATS_TIME_CREATEBUFFER,
SVGA_STATS_TIME_CREATECONTEXT,
+ SVGA_STATS_TIME_CREATECS,
SVGA_STATS_TIME_CREATEFS,
SVGA_STATS_TIME_CREATEGS,
SVGA_STATS_TIME_CREATESURFACE,
@@ -144,8 +146,10 @@ enum svga_stats_time {
SVGA_STATS_TIME_DRAWVBO,
SVGA_STATS_TIME_DRAWARRAYS,
SVGA_STATS_TIME_DRAWELEMENTS,
+ SVGA_STATS_TIME_EMITCS,
SVGA_STATS_TIME_EMITFS,
SVGA_STATS_TIME_EMITGS,
+ SVGA_STATS_TIME_EMITRAWBUFFER,
SVGA_STATS_TIME_EMITTCS,
SVGA_STATS_TIME_EMITTES,
SVGA_STATS_TIME_EMITVS,
@@ -156,6 +160,7 @@ enum svga_stats_time {
SVGA_STATS_TIME_HWTNLDRAWELEMENTS,
SVGA_STATS_TIME_HWTNLFLUSH,
SVGA_STATS_TIME_HWTNLPRIM,
+ SVGA_STATS_TIME_LAUNCHGRID,
SVGA_STATS_TIME_PROPAGATESURFACE,
SVGA_STATS_TIME_SETSAMPLERVIEWS,
SVGA_STATS_TIME_SURFACEFLUSH,
@@ -166,7 +171,9 @@ enum svga_stats_time {
SVGA_STATS_TIME_TEXTRANSFERUNMAP,
SVGA_STATS_TIME_TGSIVGPU10TRANSLATE,
SVGA_STATS_TIME_TGSIVGPU9TRANSLATE,
+ SVGA_STATS_TIME_UPDATECSUAV,
SVGA_STATS_TIME_UPDATESTATE,
+ SVGA_STATS_TIME_UPDATEUAV,
SVGA_STATS_TIME_VALIDATESURFACEVIEW,
SVGA_STATS_TIME_VBUFDRAWARRAYS,
SVGA_STATS_TIME_VBUFDRAWELEMENTS,
@@ -184,6 +191,7 @@ enum svga_stats_time {
SVGA_STATS_PREFIX "BlitBlitterCopy", \
SVGA_STATS_PREFIX "DepthStencilState", \
SVGA_STATS_PREFIX "RasterizerState", \
+ SVGA_STATS_PREFIX "RawBufferSRView", \
SVGA_STATS_PREFIX "Sampler", \
SVGA_STATS_PREFIX "SamplerView", \
SVGA_STATS_PREFIX "SurfaceWriteFlush", \
@@ -204,6 +212,7 @@ enum svga_stats_time {
SVGA_STATS_PREFIX "CreateBackedSurfaceView", \
SVGA_STATS_PREFIX "CreateBuffer", \
SVGA_STATS_PREFIX "CreateContext", \
+ SVGA_STATS_PREFIX "CreateCS", \
SVGA_STATS_PREFIX "CreateFS", \
SVGA_STATS_PREFIX "CreateGS", \
SVGA_STATS_PREFIX "CreateSurface", \
@@ -217,8 +226,10 @@ enum svga_stats_time {
SVGA_STATS_PREFIX "DrawVBO", \
SVGA_STATS_PREFIX "DrawArrays", \
SVGA_STATS_PREFIX "DrawElements", \
+ SVGA_STATS_PREFIX "EmitCS", \
SVGA_STATS_PREFIX "EmitFS", \
SVGA_STATS_PREFIX "EmitGS", \
+ SVGA_STATS_PREFIX "EmitRawBuffer", \
SVGA_STATS_PREFIX "EmitTCS", \
SVGA_STATS_PREFIX "EmitTES", \
SVGA_STATS_PREFIX "EmitVS", \
@@ -229,6 +240,7 @@ enum svga_stats_time {
SVGA_STATS_PREFIX "HWtnlDrawElements", \
SVGA_STATS_PREFIX "HWtnlFlush", \
SVGA_STATS_PREFIX "HWtnlPrim", \
+ SVGA_STATS_PREFIX "LaunchGrid", \
SVGA_STATS_PREFIX "PropagateSurface", \
SVGA_STATS_PREFIX "SetSamplerViews", \
SVGA_STATS_PREFIX "SurfaceFlush", \
@@ -239,7 +251,9 @@ enum svga_stats_time {
SVGA_STATS_PREFIX "TextureTransferUnmap", \
SVGA_STATS_PREFIX "TGSIVGPU10Translate", \
SVGA_STATS_PREFIX "TGSIVGPU9Translate", \
+ SVGA_STATS_PREFIX "UpdateCSUAV", \
SVGA_STATS_PREFIX "UpdateState", \
+ SVGA_STATS_PREFIX "UpdateUAV", \
SVGA_STATS_PREFIX "ValidateSurfaceView", \
SVGA_STATS_PREFIX "VbufDrawArrays", \
SVGA_STATS_PREFIX "VbufDrawElements", \
@@ -463,7 +477,7 @@ struct svga_winsys_context
unsigned flags);
/** To report perf/conformance/etc issues to the gallium frontend */
- struct pipe_debug_callback *debug_callback;
+ struct util_debug_callback *debug_callback;
/** The more recent command issued to command buffer */
SVGAFifo3dCmdId last_command;
@@ -803,6 +817,16 @@ struct svga_winsys_screen
boolean have_fence_fd;
boolean have_intra_surface_copy;
boolean have_constant_buffer_offset_cmd;
+ boolean have_index_vertex_buffer_offset_cmd;
+
+ /* Have rasterizer state v2 command support */
+ boolean have_rasterizer_state_v2_cmd;
+
+ /** Have GL43 capable device */
+ boolean have_gl43;
+
+ /** SVGA device_id version we're running on */
+ uint16_t device_id;
};