diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-09-02 05:47:02 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-09-02 05:47:02 +0000 |
commit | 0dbbf1e0708df85a357d70e2708c0a11aeb5480e (patch) | |
tree | 6656ff8eb8b15a2fc1c02888973caf618388cfd0 /lib/mesa/src/gallium/drivers/svga | |
parent | 5f66494d31f735486b8222ecfa0a0c9046e92543 (diff) |
Merge Mesa 22.1.7
Diffstat (limited to 'lib/mesa/src/gallium/drivers/svga')
45 files changed, 5439 insertions, 1535 deletions
diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h index 1eff07d27..8178c467e 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_surfacedefs.h @@ -1,7 +1,7 @@ /************************************************************************** * - * Copyright © 1998-2015 VMware, Inc., Palo Alto, CA., USA - * All Rights Reserved. + * Copyright 1998-2022 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -61,108 +61,292 @@ enum svga3d_block_desc { SVGA3DBLOCKDESC_NONE = 0, /* No channels are active */ SVGA3DBLOCKDESC_BLUE = 1 << 0, /* Block with red channel data */ - SVGA3DBLOCKDESC_U = 1 << 0, /* Block with bump U channel data */ - SVGA3DBLOCKDESC_GREEN = 1 << 1, /* Block with green channel data */ - SVGA3DBLOCKDESC_V = 1 << 1, /* Block with bump V channel data */ - SVGA3DBLOCKDESC_RED = 1 << 2, /* Block with blue channel data */ - SVGA3DBLOCKDESC_W = 1 << 2, /* Block with bump W channel data */ - SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, /* Block with luminance channel data */ - SVGA3DBLOCKDESC_Y = 1 << 2, /* Block with video luminance data */ + SVGA3DBLOCKDESC_W = 1 << 0, + SVGA3DBLOCKDESC_BUMP_L = 1 << 0, + + /* Format contains Green/V data */ + SVGA3DBLOCKDESC_GREEN = 1 << 1, + SVGA3DBLOCKDESC_V = 1 << 1, + + /* Format contains Red/W/Luminance data */ + SVGA3DBLOCKDESC_RED = 1 << 2, + SVGA3DBLOCKDESC_U = 1 << 2, + SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, + SVGA3DBLOCKDESC_ALPHA = 1 << 3, /* Block with an alpha channel */ SVGA3DBLOCKDESC_Q = 1 << 3, /* Block with bump Q channel data */ SVGA3DBLOCKDESC_BUFFER = 1 << 4, /* Block stores 1 byte of data */ SVGA3DBLOCKDESC_COMPRESSED = 1 << 5, /* Block stores n bytes of data depending on the compression method used */ - SVGA3DBLOCKDESC_IEEE_FP = 1 << 6, /* Block stores data in an IEEE floating point - representation in all channels */ - SVGA3DBLOCKDESC_UV_VIDEO = 1 << 7, /* Block with alternating video U and V */ - SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 8, /* Three separate blocks store data. */ - SVGA3DBLOCKDESC_U_VIDEO = 1 << 9, /* Block with U video data */ - SVGA3DBLOCKDESC_V_VIDEO = 1 << 10, /* Block with V video data */ - SVGA3DBLOCKDESC_EXP = 1 << 11, /* Shared exponent */ - SVGA3DBLOCKDESC_SRGB = 1 << 12, /* Data is in sRGB format */ - SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13, /* 2 planes of Y, UV, e.g., NV12. */ - SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14, /* 3 planes of separate Y, U, V, e.g., YV12. */ - SVGA3DBLOCKDESC_DEPTH = 1 << 15, /* Block with depth channel */ - SVGA3DBLOCKDESC_STENCIL = 1 << 16, /* Block with a stencil channel */ - - SVGA3DBLOCKDESC_RG = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN, - SVGA3DBLOCKDESC_RGB = SVGA3DBLOCKDESC_RG | - SVGA3DBLOCKDESC_BLUE, - SVGA3DBLOCKDESC_RGB_SRGB = SVGA3DBLOCKDESC_RGB | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_RGBA = SVGA3DBLOCKDESC_RGB | - SVGA3DBLOCKDESC_ALPHA, - SVGA3DBLOCKDESC_RGBA_SRGB = SVGA3DBLOCKDESC_RGBA | - SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_FP = 1 << 6, + + SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 7, + SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 8, + SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 9, + SVGA3DBLOCKDESC_STENCIL = 1 << 11, + SVGA3DBLOCKDESC_TYPELESS = 1 << 12, + SVGA3DBLOCKDESC_SINT = 1 << 13, + SVGA3DBLOCKDESC_UINT = 1 << 14, + SVGA3DBLOCKDESC_NORM = 1 << 15, + SVGA3DBLOCKDESC_SRGB = 1 << 16, + SVGA3DBLOCKDESC_EXP = 1 << 17, + SVGA3DBLOCKDESC_COLOR = 1 << 18, + SVGA3DBLOCKDESC_DEPTH = 1 << 19, + SVGA3DBLOCKDESC_BUMP = 1 << 20, + SVGA3DBLOCKDESC_YUV_VIDEO = 1 << 21, + SVGA3DBLOCKDESC_MIXED = 1 << 22, + SVGA3DBLOCKDESC_CX = 1 << 23, + + /* Different compressed format groups. */ + SVGA3DBLOCKDESC_BC1 = 1 << 24, + SVGA3DBLOCKDESC_BC2 = 1 << 25, + SVGA3DBLOCKDESC_BC3 = 1 << 26, + SVGA3DBLOCKDESC_BC4 = 1 << 27, + SVGA3DBLOCKDESC_BC5 = 1 << 28, + SVGA3DBLOCKDESC_BC6H = 1 << 29, + SVGA3DBLOCKDESC_BC7 = 1 << 30, + SVGA3DBLOCKDESC_COMPRESSED_MASK = SVGA3DBLOCKDESC_BC1 | + SVGA3DBLOCKDESC_BC2 | + SVGA3DBLOCKDESC_BC3 | + SVGA3DBLOCKDESC_BC4 | + SVGA3DBLOCKDESC_BC5 | + SVGA3DBLOCKDESC_BC6H | + SVGA3DBLOCKDESC_BC7, + + SVGA3DBLOCKDESC_A_UINT = SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_A_UNORM = SVGA3DBLOCKDESC_A_UINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_R_UINT = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_R_UNORM = SVGA3DBLOCKDESC_R_UINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_R_SINT = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_R_SNORM = SVGA3DBLOCKDESC_R_SINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_G_UINT = SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RG_UINT = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RG_UNORM = SVGA3DBLOCKDESC_RG_UINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RG_SINT = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RG_SNORM = SVGA3DBLOCKDESC_RG_SINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RGB_UINT = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGB_SINT = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | + SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGB_UNORM = SVGA3DBLOCKDESC_RGB_UINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RGB_UNORM_SRGB = SVGA3DBLOCKDESC_RGB_UNORM | + SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_RGBA_UINT = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | + SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGBA_UNORM = SVGA3DBLOCKDESC_RGBA_UINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RGBA_UNORM_SRGB = SVGA3DBLOCKDESC_RGBA_UNORM | + SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_RGBA_SINT = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | + SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGBA_SNORM = SVGA3DBLOCKDESC_RGBA_SINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | + SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_FP | + SVGA3DBLOCKDESC_COLOR, SVGA3DBLOCKDESC_UV = SVGA3DBLOCKDESC_U | - SVGA3DBLOCKDESC_V, + SVGA3DBLOCKDESC_V | + SVGA3DBLOCKDESC_BUMP, SVGA3DBLOCKDESC_UVL = SVGA3DBLOCKDESC_UV | - SVGA3DBLOCKDESC_LUMINANCE, + SVGA3DBLOCKDESC_BUMP_L | + SVGA3DBLOCKDESC_MIXED | + SVGA3DBLOCKDESC_BUMP, SVGA3DBLOCKDESC_UVW = SVGA3DBLOCKDESC_UV | - SVGA3DBLOCKDESC_W, + SVGA3DBLOCKDESC_W | + SVGA3DBLOCKDESC_BUMP, SVGA3DBLOCKDESC_UVWA = SVGA3DBLOCKDESC_UVW | - SVGA3DBLOCKDESC_ALPHA, + SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_MIXED | + SVGA3DBLOCKDESC_BUMP, SVGA3DBLOCKDESC_UVWQ = SVGA3DBLOCKDESC_U | SVGA3DBLOCKDESC_V | SVGA3DBLOCKDESC_W | - SVGA3DBLOCKDESC_Q, - SVGA3DBLOCKDESC_LA = SVGA3DBLOCKDESC_LUMINANCE | - SVGA3DBLOCKDESC_ALPHA, + SVGA3DBLOCKDESC_Q | + SVGA3DBLOCKDESC_BUMP, + SVGA3DBLOCKDESC_L_UNORM = SVGA3DBLOCKDESC_LUMINANCE | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_NORM | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_LA_UNORM = SVGA3DBLOCKDESC_LUMINANCE | + SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_NORM | + SVGA3DBLOCKDESC_COLOR, SVGA3DBLOCKDESC_R_FP = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_IEEE_FP, + SVGA3DBLOCKDESC_FP | + SVGA3DBLOCKDESC_COLOR, SVGA3DBLOCKDESC_RG_FP = SVGA3DBLOCKDESC_R_FP | - SVGA3DBLOCKDESC_GREEN, + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_COLOR, SVGA3DBLOCKDESC_RGB_FP = SVGA3DBLOCKDESC_RG_FP | - SVGA3DBLOCKDESC_BLUE, - SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RGB_FP | - SVGA3DBLOCKDESC_ALPHA, - SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH | - SVGA3DBLOCKDESC_STENCIL, - SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_UV_VIDEO | - SVGA3DBLOCKDESC_Y, + SVGA3DBLOCKDESC_BLUE | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_YUV_VIDEO | + SVGA3DBLOCKDESC_COLOR, SVGA3DBLOCKDESC_AYUV = SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_Y | - SVGA3DBLOCKDESC_U_VIDEO | - SVGA3DBLOCKDESC_V_VIDEO, - SVGA3DBLOCKDESC_RGBE = SVGA3DBLOCKDESC_RGB | - SVGA3DBLOCKDESC_EXP, - SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED | + SVGA3DBLOCKDESC_YUV_VIDEO | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGB_EXP = SVGA3DBLOCKDESC_RED | + SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | + SVGA3DBLOCKDESC_EXP | + SVGA3DBLOCKDESC_COLOR, + + SVGA3DBLOCKDESC_COMP_TYPELESS = SVGA3DBLOCKDESC_COMPRESSED | + SVGA3DBLOCKDESC_TYPELESS, + SVGA3DBLOCKDESC_COMP_UNORM = SVGA3DBLOCKDESC_COMPRESSED | + SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_NORM | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_COMP_SNORM = SVGA3DBLOCKDESC_COMPRESSED | + SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_NORM | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_COMP_UNORM | SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_PLANAR_YUV | - SVGA3DBLOCKDESC_2PLANAR_YUV, - SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_PLANAR_YUV | - SVGA3DBLOCKDESC_3PLANAR_YUV, + SVGA3DBLOCKDESC_BC1_COMP_TYPELESS = SVGA3DBLOCKDESC_BC1 | + SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC1_COMP_UNORM = SVGA3DBLOCKDESC_BC1 | + SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC1_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC1_COMP_UNORM | + SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_BC2_COMP_TYPELESS = SVGA3DBLOCKDESC_BC2 | + SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC2_COMP_UNORM = SVGA3DBLOCKDESC_BC2 | + SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC2_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC2_COMP_UNORM | + SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_BC3_COMP_TYPELESS = SVGA3DBLOCKDESC_BC3 | + SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC3_COMP_UNORM = SVGA3DBLOCKDESC_BC3 | + SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC3_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC3_COMP_UNORM | + SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_BC4_COMP_TYPELESS = SVGA3DBLOCKDESC_BC4 | + SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC4_COMP_UNORM = SVGA3DBLOCKDESC_BC4 | + SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC4_COMP_SNORM = SVGA3DBLOCKDESC_BC4 | + SVGA3DBLOCKDESC_COMP_SNORM, + SVGA3DBLOCKDESC_BC5_COMP_TYPELESS = SVGA3DBLOCKDESC_BC5 | + SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC5_COMP_UNORM = SVGA3DBLOCKDESC_BC5 | + SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC5_COMP_SNORM = SVGA3DBLOCKDESC_BC5 | + SVGA3DBLOCKDESC_COMP_SNORM, + SVGA3DBLOCKDESC_BC6H_COMP_TYPELESS = SVGA3DBLOCKDESC_BC6H | + SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC6H_COMP_UF16 = SVGA3DBLOCKDESC_BC6H | + SVGA3DBLOCKDESC_COMPRESSED, + SVGA3DBLOCKDESC_BC6H_COMP_SF16 = SVGA3DBLOCKDESC_BC6H | + SVGA3DBLOCKDESC_COMPRESSED, + SVGA3DBLOCKDESC_BC7_COMP_TYPELESS = SVGA3DBLOCKDESC_BC7 | + SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC7_COMP_UNORM = SVGA3DBLOCKDESC_BC7 | + SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC7_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC7_COMP_UNORM | + SVGA3DBLOCKDESC_SRGB, + + SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_YUV_VIDEO | + SVGA3DBLOCKDESC_PLANAR_YUV | + SVGA3DBLOCKDESC_2PLANAR_YUV | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_YUV_VIDEO | + SVGA3DBLOCKDESC_PLANAR_YUV | + SVGA3DBLOCKDESC_3PLANAR_YUV | + SVGA3DBLOCKDESC_COLOR, + + SVGA3DBLOCKDESC_DEPTH_UINT = SVGA3DBLOCKDESC_DEPTH | + SVGA3DBLOCKDESC_UINT, + SVGA3DBLOCKDESC_DEPTH_UNORM = SVGA3DBLOCKDESC_DEPTH_UINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH | + SVGA3DBLOCKDESC_STENCIL, + SVGA3DBLOCKDESC_DS_UINT = SVGA3DBLOCKDESC_DEPTH | + SVGA3DBLOCKDESC_STENCIL | + SVGA3DBLOCKDESC_UINT, + SVGA3DBLOCKDESC_DS_UNORM = SVGA3DBLOCKDESC_DS_UINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_DEPTH_FP = SVGA3DBLOCKDESC_DEPTH | + SVGA3DBLOCKDESC_FP, + + SVGA3DBLOCKDESC_UV_UINT = SVGA3DBLOCKDESC_UV | + SVGA3DBLOCKDESC_UINT, + SVGA3DBLOCKDESC_UV_SNORM = SVGA3DBLOCKDESC_UV | + SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_UVCX_SNORM = SVGA3DBLOCKDESC_UV_SNORM | + SVGA3DBLOCKDESC_CX, + SVGA3DBLOCKDESC_UVWQ_SNORM = SVGA3DBLOCKDESC_UVWQ | + SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_NORM, }; typedef struct SVGA3dChannelDef { - union { - uint8 blue; - uint8 u; - uint8 uv_video; - uint8 u_video; - }; - union { - uint8 green; - uint8 v; - uint8 stencil; - uint8 v_video; - }; - union { - uint8 red; - uint8 w; - uint8 luminance; - uint8 y; - uint8 depth; - uint8 data; - }; - union { - uint8 alpha; - uint8 q; - uint8 exp; - }; + union { + uint8 blue; + uint8 w_bump; + uint8 l_bump; + uint8 uv_video; + uint8 u_video; + }; + union { + uint8 green; + uint8 stencil; + uint8 v_bump; + uint8 v_video; + }; + union { + uint8 red; + uint8 u_bump; + uint8 luminance; + uint8 y_video; + uint8 depth; + uint8 data; + }; + union { + uint8 alpha; + uint8 q_bump; + uint8 exp; + }; } SVGA3dChannelDef; struct svga3d_surface_desc { @@ -173,739 +357,784 @@ struct svga3d_surface_desc { uint32 bytes_per_block; uint32 pitch_bytes_per_block; - uint32 totalBitDepth; - SVGA3dChannelDef bitDepth; - SVGA3dChannelDef bitOffset; + SVGA3dChannelDef bitDepth; + SVGA3dChannelDef bitOffset; }; static const struct svga3d_surface_desc svga3d_surface_descs[] = { {SVGA3D_FORMAT_INVALID, SVGA3DBLOCKDESC_NONE, {1, 1, 1}, 0, 0, - 0, {{0}, {0}, {0}, {0}}, + {{0}, {0}, {0}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB, + {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB_UNORM, {1, 1, 1}, 4, 4, - 24, {{8}, {8}, {8}, {0}}, + {{8}, {8}, {8}, {0}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB, + {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB_UNORM, {1, 1, 1}, 2, 2, - 16, {{5}, {6}, {5}, {0}}, + {{5}, {6}, {5}, {0}}, {{0}, {5}, {11}, {0}}}, - {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB, + {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB_UNORM, {1, 1, 1}, 2, 2, - 15, {{5}, {5}, {5}, {0}}, + {{5}, {5}, {5}, {0}}, {{0}, {5}, {10}, {0}}}, - {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 2, 2, - 16, {{5}, {5}, {5}, {1}}, + {{5}, {5}, {5}, {1}}, {{0}, {5}, {10}, {15}}}, - {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 2, 2, - 16, {{4}, {4}, {4}, {4}}, + {{4}, {4}, {4}, {4}}, {{0}, {4}, {8}, {12}}}, - {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH, + {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {32}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH, + {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH_UNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS, + {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {8}, {24}, {0}}, - {{0}, {24}, {0}, {0}}}, + {{0}, {8}, {24}, {0}}, + {{0}, {0}, {8}, {0}}}, - {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS, + {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS_UNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {1}, {15}, {0}}, - {{0}, {15}, {0}, {0}}}, + {{0}, {1}, {15}, {0}}, + {{0}, {0}, {1}, {0}}}, - {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_LUMINANCE, + {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_L_UNORM, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA, - {1 , 1, 1}, 1, 1, - 8, {{0}, {0}, {4}, {4}}, + {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA_UNORM, + {1, 1, 1}, 1, 1, + {{0}, {0}, {4}, {4}}, {{0}, {0}, {0}, {4}}}, - {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_LUMINANCE, + {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_L_UNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA, + {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA_UNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {8}, {8}}, + {{0}, {0}, {8}, {8}}, {{0}, {0}, {0}, {8}}}, - {SVGA3D_DXT1, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_DXT1, SVGA3DBLOCKDESC_BC1_COMP_UNORM, {4, 4, 1}, 8, 8, - 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {64}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_DXT2, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_DXT2, SVGA3DBLOCKDESC_BC2_COMP_UNORM, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_DXT3, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_DXT3, SVGA3DBLOCKDESC_BC2_COMP_UNORM, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_DXT4, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_DXT4, SVGA3DBLOCKDESC_BC3_COMP_UNORM, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_DXT5, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_DXT5, SVGA3DBLOCKDESC_BC3_COMP_UNORM, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV, + {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV_SNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {8}, {8}}, - {{0}, {0}, {0}, {8}}}, + {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, {SVGA3D_BUMPL6V5U5, SVGA3DBLOCKDESC_UVL, {1, 1, 1}, 2, 2, - 16, {{5}, {5}, {6}, {0}}, - {{11}, {6}, {0}, {0}}}, + {{6}, {5}, {5}, {0}}, + {{10}, {5}, {0}, {0}}}, {SVGA3D_BUMPX8L8V8U8, SVGA3DBLOCKDESC_UVL, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {0}}, + {{8}, {8}, {8}, {0}}, {{16}, {8}, {0}, {0}}}, - {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_UVL, - {0, 0, 0}, 0, 0, - 0, {{0}, {0}, {0}, {0}}, - {{0}, {0}, {0}, {0}}}, + {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_NONE, + {1, 1, 1}, 3, 3, + {{8}, {8}, {8}, {0}}, + {{16}, {8}, {0}, {0}}}, {SVGA3D_ARGB_S10E5, SVGA3DBLOCKDESC_RGBA_FP, {1, 1, 1}, 8, 8, - 64, {{16}, {16}, {16}, {16}}, + {{16}, {16}, {16}, {16}}, {{32}, {16}, {0}, {48}}}, {SVGA3D_ARGB_S23E8, SVGA3DBLOCKDESC_RGBA_FP, {1, 1, 1}, 16, 16, - 128, {{32}, {32}, {32}, {32}}, + {{32}, {32}, {32}, {32}}, {{64}, {32}, {0}, {96}}}, - {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 4, 4, - 32, {{10}, {10}, {10}, {2}}, + {{10}, {10}, {10}, {2}}, {{0}, {10}, {20}, {30}}}, - {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV, + {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV_SNORM, {1, 1, 1}, 2, 2, - 16, {{8}, {8}, {0}, {0}}, - {{8}, {0}, {0}, {0}}}, + {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, - {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ, + {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ_SNORM, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, - {{24}, {16}, {8}, {0}}}, + {{8}, {8}, {8}, {8}}, + {{16}, {8}, {0}, {24}}}, - {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UV, + {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UVCX_SNORM, {1, 1, 1}, 2, 2, - 16, {{8}, {8}, {0}, {0}}, - {{8}, {0}, {0}, {0}}}, + {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, {SVGA3D_X8L8V8U8, SVGA3DBLOCKDESC_UVL, {1, 1, 1}, 4, 4, - 24, {{8}, {8}, {8}, {0}}, + {{8}, {8}, {8}, {0}}, {{16}, {8}, {0}, {0}}}, {SVGA3D_A2W10V10U10, SVGA3DBLOCKDESC_UVWA, {1, 1, 1}, 4, 4, - 32, {{10}, {10}, {10}, {2}}, - {{0}, {10}, {20}, {30}}}, + {{10}, {10}, {10}, {2}}, + {{20}, {10}, {0}, {30}}}, - {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_ALPHA, + {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_A_UNORM, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {0}, {8}}, + {{0}, {0}, {0}, {8}}, {{0}, {0}, {0}, {0}}}, {SVGA3D_R_S10E5, SVGA3DBLOCKDESC_R_FP, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, {SVGA3D_R_S23E8, SVGA3DBLOCKDESC_R_FP, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {32}, {0}}, {{0}, {0}, {0}, {0}}}, {SVGA3D_RG_S10E5, SVGA3DBLOCKDESC_RG_FP, {1, 1, 1}, 4, 4, - 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {16}, {0}}, {{0}, {16}, {0}, {0}}}, {SVGA3D_RG_S23E8, SVGA3DBLOCKDESC_RG_FP, {1, 1, 1}, 8, 8, - 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {32}, {0}}, {{0}, {32}, {0}, {0}}}, {SVGA3D_BUFFER, SVGA3DBLOCKDESC_BUFFER, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH, + {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {24}, {0}}, - {{0}, {24}, {0}, {0}}}, + {{0}, {0}, {24}, {0}}, + {{0}, {0}, {8}, {0}}}, - {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV, + {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV_SNORM, {1, 1, 1}, 4, 4, - 32, {{16}, {16}, {0}, {0}}, - {{16}, {0}, {0}, {0}}}, + {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, - {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG, + {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {16}, {16}, {0}}, - {{0}, {0}, {16}, {0}}}, + {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, - {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 8, 8, - 64, {{16}, {16}, {16}, {16}}, + {{16}, {16}, {16}, {16}}, {{32}, {16}, {0}, {48}}}, {SVGA3D_UYVY, SVGA3DBLOCKDESC_YUV, - {1, 1, 1}, 2, 2, - 16, {{8}, {0}, {8}, {0}}, + {2, 1, 1}, 4, 4, + {{8}, {0}, {8}, {0}}, {{0}, {0}, {8}, {0}}}, {SVGA3D_YUY2, SVGA3DBLOCKDESC_YUV, - {1, 1, 1}, 2, 2, - 16, {{8}, {0}, {8}, {0}}, + {2, 1, 1}, 4, 4, + {{8}, {0}, {8}, {0}}, {{8}, {0}, {0}, {0}}}, {SVGA3D_NV12, SVGA3DBLOCKDESC_NV12, {2, 2, 1}, 6, 2, - 48, {{0}, {0}, {48}, {0}}, + {{0}, {0}, {48}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV, + {SVGA3D_FORMAT_DEAD2, SVGA3DBLOCKDESC_NONE, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 16, 16, - 128, {{32}, {32}, {32}, {32}}, + {{32}, {32}, {32}, {32}}, {{64}, {32}, {0}, {96}}}, - {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA_UINT, {1, 1, 1}, 16, 16, - 128, {{32}, {32}, {32}, {32}}, + {{32}, {32}, {32}, {32}}, {{64}, {32}, {0}, {96}}}, - {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_UVWQ, + {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_RGBA_SINT, {1, 1, 1}, 16, 16, - 128, {{32}, {32}, {32}, {32}}, + {{32}, {32}, {32}, {32}}, {{64}, {32}, {0}, {96}}}, - {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_RGB, + {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 12, 12, - 96, {{32}, {32}, {32}, {0}}, + {{32}, {32}, {32}, {0}}, {{64}, {32}, {0}, {0}}}, {SVGA3D_R32G32B32_FLOAT, SVGA3DBLOCKDESC_RGB_FP, {1, 1, 1}, 12, 12, - 96, {{32}, {32}, {32}, {0}}, + {{32}, {32}, {32}, {0}}, {{64}, {32}, {0}, {0}}}, - {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB, + {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB_UINT, {1, 1, 1}, 12, 12, - 96, {{32}, {32}, {32}, {0}}, + {{32}, {32}, {32}, {0}}, {{64}, {32}, {0}, {0}}}, - {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_UVW, + {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_RGB_SINT, {1, 1, 1}, 12, 12, - 96, {{32}, {32}, {32}, {0}}, + {{32}, {32}, {32}, {0}}, {{64}, {32}, {0}, {0}}}, - {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 8, 8, - 64, {{16}, {16}, {16}, {16}}, + {{16}, {16}, {16}, {16}}, {{32}, {16}, {0}, {48}}}, - {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA_UINT, {1, 1, 1}, 8, 8, - 64, {{16}, {16}, {16}, {16}}, + {{16}, {16}, {16}, {16}}, {{32}, {16}, {0}, {48}}}, - {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_UVWQ, + {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_RGBA_SNORM, {1, 1, 1}, 8, 8, - 64, {{16}, {16}, {16}, {16}}, + {{16}, {16}, {16}, {16}}, {{32}, {16}, {0}, {48}}}, - {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_UVWQ, + {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_RGBA_SINT, {1, 1, 1}, 8, 8, - 64, {{16}, {16}, {16}, {16}}, + {{16}, {16}, {16}, {16}}, {{32}, {16}, {0}, {48}}}, - {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_RG, + {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 8, 8, - 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {32}, {0}}, {{0}, {32}, {0}, {0}}}, - {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG, + {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG_UINT, {1, 1, 1}, 8, 8, - 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {32}, {0}}, {{0}, {32}, {0}, {0}}}, - {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_UV, + {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_RG_SINT, {1, 1, 1}, 8, 8, - 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {32}, {0}}, {{0}, {32}, {0}, {0}}}, - {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_RG, + {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 8, 8, - 64, {{0}, {8}, {32}, {0}}, + {{0}, {8}, {32}, {0}}, {{0}, {32}, {0}, {0}}}, {SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3DBLOCKDESC_DS, {1, 1, 1}, 8, 8, - 64, {{0}, {8}, {32}, {0}}, + {{0}, {8}, {32}, {0}}, {{0}, {32}, {0}, {0}}}, {SVGA3D_R32_FLOAT_X8X24, SVGA3DBLOCKDESC_R_FP, {1, 1, 1}, 8, 8, - 64, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {32}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_X32_G8X24_UINT, SVGA3DBLOCKDESC_GREEN, + {SVGA3D_X32_G8X24_UINT, SVGA3DBLOCKDESC_G_UINT, {1, 1, 1}, 8, 8, - 64, {{0}, {8}, {0}, {0}}, + {{0}, {8}, {0}, {0}}, {{0}, {32}, {0}, {0}}}, - {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 4, 4, - 32, {{10}, {10}, {10}, {2}}, - {{0}, {10}, {20}, {30}}}, + {{10}, {10}, {10}, {2}}, + {{20}, {10}, {0}, {30}}}, - {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA_UINT, {1, 1, 1}, 4, 4, - 32, {{10}, {10}, {10}, {2}}, - {{0}, {10}, {20}, {30}}}, + {{10}, {10}, {10}, {2}}, + {{20}, {10}, {0}, {30}}}, {SVGA3D_R11G11B10_FLOAT, SVGA3DBLOCKDESC_RGB_FP, {1, 1, 1}, 4, 4, - 32, {{10}, {11}, {11}, {0}}, - {{0}, {10}, {21}, {0}}}, + {{10}, {11}, {11}, {0}}, + {{22}, {11}, {0}, {0}}}, - {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{16}, {8}, {0}, {24}}}, - {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{16}, {8}, {0}, {24}}}, - {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB, + {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_UNORM_SRGB, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{16}, {8}, {0}, {24}}}, - {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA_UINT, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{16}, {8}, {0}, {24}}}, - {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA_SINT, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{16}, {8}, {0}, {24}}}, - {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_RG, + {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 4, 4, - 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {16}, {0}}, {{0}, {16}, {0}, {0}}}, - {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_FP, + {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_UINT, {1, 1, 1}, 4, 4, - 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {16}, {0}}, {{0}, {16}, {0}, {0}}}, - {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_UV, + {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_RG_SINT, {1, 1, 1}, 4, 4, - 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {16}, {0}}, {{0}, {16}, {0}, {0}}}, - {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_RED, + {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {32}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH, + {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH_FP, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {32}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_RED, + {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_R_UINT, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {32}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_RED, + {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_R_SINT, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {32}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_RG, + {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 4, 4, - 32, {{0}, {8}, {24}, {0}}, + {{0}, {8}, {24}, {0}}, {{0}, {24}, {0}, {0}}}, - {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS, + {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {8}, {24}, {0}}, + {{0}, {8}, {24}, {0}}, {{0}, {24}, {0}, {0}}}, - {SVGA3D_R24_UNORM_X8, SVGA3DBLOCKDESC_RED, + {SVGA3D_R24_UNORM_X8, SVGA3DBLOCKDESC_R_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {24}, {0}}, + {{0}, {0}, {24}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_X24_G8_UINT, SVGA3DBLOCKDESC_GREEN, + {SVGA3D_X24_G8_UINT, SVGA3DBLOCKDESC_G_UINT, {1, 1, 1}, 4, 4, - 32, {{0}, {8}, {0}, {0}}, + {{0}, {8}, {0}, {0}}, {{0}, {24}, {0}, {0}}}, - {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_RG, + {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 2, 2, - 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {8}, {0}}, {{0}, {8}, {0}, {0}}}, - {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG, + {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG_UNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {8}, {0}}, {{0}, {8}, {0}, {0}}}, - {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG, + {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG_UINT, {1, 1, 1}, 2, 2, - 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {8}, {0}}, {{0}, {8}, {0}, {0}}}, - {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_UV, + {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_RG_SINT, {1, 1, 1}, 2, 2, - 16, {{0}, {8}, {8}, {0}}, + {{0}, {8}, {8}, {0}}, {{0}, {8}, {0}, {0}}}, - {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_RED, + {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_RED, + {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_R_UNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_RED, + {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_R_UINT, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_U, + {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_R_SNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_U, + {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_R_SINT, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_RED, + {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_RED, + {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_R_UNORM, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_RED, + {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_R_UINT, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_U, + {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_R_SNORM, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_U, + {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_R_SINT, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_P8, SVGA3DBLOCKDESC_RED, + {SVGA3D_P8, SVGA3DBLOCKDESC_NONE, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {8}, {0}}, + {{0}, {0}, {8}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGBE, + {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGB_EXP, {1, 1, 1}, 4, 4, - 32, {{9}, {9}, {9}, {5}}, + {{9}, {9}, {9}, {5}}, {{18}, {9}, {0}, {27}}}, - {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 2, 2, - 16, {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, + {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_NONE, + {2, 1, 1}, 4, 4, + {{0}, {8}, {8}, {0}}, + {{0}, {0}, {8}, {0}}}, - {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_RG, - {1, 1, 1}, 2, 2, - 16, {{0}, {8}, {8}, {0}}, + {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_NONE, + {2, 1, 1}, 4, 4, + {{0}, {8}, {8}, {0}}, {{0}, {8}, {0}, {0}}}, - {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_BC1_COMP_TYPELESS, {4, 4, 1}, 8, 8, - 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {64}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB, + {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_BC1_COMP_UNORM_SRGB, {4, 4, 1}, 8, 8, - 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {64}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_BC2_COMP_TYPELESS, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB, + {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_BC2_COMP_UNORM_SRGB, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_BC3_COMP_TYPELESS, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB, + {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_BC3_COMP_UNORM_SRGB, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_BC4_COMP_TYPELESS, {4, 4, 1}, 8, 8, - 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {64}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_ATI1, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_ATI1, SVGA3DBLOCKDESC_BC4_COMP_UNORM, {4, 4, 1}, 8, 8, - 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {64}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_BC4_COMP_SNORM, {4, 4, 1}, 8, 8, - 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {64}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_BC5_COMP_TYPELESS, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_ATI2, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_ATI2, SVGA3DBLOCKDESC_BC5_COMP_UNORM, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_BC5_COMP_SNORM, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 4, 4, - 32, {{10}, {10}, {10}, {2}}, - {{0}, {10}, {20}, {30}}}, + {{10}, {10}, {10}, {2}}, + {{20}, {10}, {0}, {30}}}, - {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB, + {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_UNORM_SRGB, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_RGB, + {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, {1, 1, 1}, 4, 4, - 24, {{8}, {8}, {8}, {0}}, + {{8}, {8}, {8}, {0}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_SRGB, + {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_UNORM_SRGB, {1, 1, 1}, 4, 4, - 24, {{8}, {8}, {8}, {0}}, + {{8}, {8}, {8}, {0}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH, + {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH_UNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH, + {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {8}, {24}, {0}}, - {{0}, {24}, {0}, {0}}}, + {{0}, {0}, {24}, {0}}, + {{0}, {0}, {8}, {0}}}, - {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS, + {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {8}, {24}, {0}}, - {{0}, {24}, {0}, {0}}}, + {{0}, {8}, {24}, {0}}, + {{0}, {0}, {8}, {0}}}, {SVGA3D_YV12, SVGA3DBLOCKDESC_YV12, {2, 2, 1}, 6, 2, - 48, {{0}, {0}, {48}, {0}}, + {{0}, {0}, {48}, {0}}, {{0}, {0}, {0}, {0}}}, {SVGA3D_R32G32B32A32_FLOAT, SVGA3DBLOCKDESC_RGBA_FP, {1, 1, 1}, 16, 16, - 128, {{32}, {32}, {32}, {32}}, + {{32}, {32}, {32}, {32}}, {{64}, {32}, {0}, {96}}}, {SVGA3D_R16G16B16A16_FLOAT, SVGA3DBLOCKDESC_RGBA_FP, {1, 1, 1}, 8, 8, - 64, {{16}, {16}, {16}, {16}}, + {{16}, {16}, {16}, {16}}, {{32}, {16}, {0}, {48}}}, - {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 8, 8, - 64, {{16}, {16}, {16}, {16}}, + {{16}, {16}, {16}, {16}}, {{32}, {16}, {0}, {48}}}, {SVGA3D_R32G32_FLOAT, SVGA3DBLOCKDESC_RG_FP, {1, 1, 1}, 8, 8, - 64, {{0}, {32}, {32}, {0}}, + {{0}, {32}, {32}, {0}}, {{0}, {32}, {0}, {0}}}, - {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 4, 4, - 32, {{10}, {10}, {10}, {2}}, - {{0}, {10}, {20}, {30}}}, + {{10}, {10}, {10}, {2}}, + {{20}, {10}, {0}, {30}}}, - {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA_SNORM, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, - {{24}, {16}, {8}, {0}}}, + {{8}, {8}, {8}, {8}}, + {{16}, {8}, {0}, {24}}}, {SVGA3D_R16G16_FLOAT, SVGA3DBLOCKDESC_RG_FP, {1, 1, 1}, 4, 4, - 32, {{0}, {16}, {16}, {0}}, + {{0}, {16}, {16}, {0}}, {{0}, {16}, {0}, {0}}}, - {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG, + {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG_UNORM, {1, 1, 1}, 4, 4, - 32, {{0}, {16}, {16}, {0}}, - {{0}, {0}, {16}, {0}}}, + {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, - {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG, + {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG_SNORM, {1, 1, 1}, 4, 4, - 32, {{16}, {16}, {0}, {0}}, - {{16}, {0}, {0}, {0}}}, + {{0}, {16}, {16}, {0}}, + {{0}, {16}, {0}, {0}}}, {SVGA3D_R32_FLOAT, SVGA3DBLOCKDESC_R_FP, {1, 1, 1}, 4, 4, - 32, {{0}, {0}, {32}, {0}}, + {{0}, {0}, {32}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG, + {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG_SNORM, {1, 1, 1}, 2, 2, - 16, {{8}, {8}, {0}, {0}}, - {{8}, {0}, {0}, {0}}}, + {{0}, {8}, {8}, {0}}, + {{0}, {8}, {0}, {0}}}, {SVGA3D_R16_FLOAT, SVGA3DBLOCKDESC_R_FP, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH, + {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH_UNORM, {1, 1, 1}, 2, 2, - 16, {{0}, {0}, {16}, {0}}, + {{0}, {0}, {16}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_ALPHA, + {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_A_UNORM, {1, 1, 1}, 1, 1, - 8, {{0}, {0}, {0}, {8}}, + {{0}, {0}, {0}, {8}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_BC1_COMP_UNORM, {4, 4, 1}, 8, 8, - 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {64}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_BC2_COMP_UNORM, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_BC3_COMP_UNORM, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB, + {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB_UNORM, {1, 1, 1}, 2, 2, - 16, {{5}, {6}, {5}, {0}}, + {{5}, {6}, {5}, {0}}, {{0}, {5}, {11}, {0}}}, - {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 2, 2, - 16, {{5}, {5}, {5}, {1}}, + {{5}, {5}, {5}, {1}}, {{0}, {5}, {10}, {15}}}, - {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA, + {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, {1, 1, 1}, 4, 4, - 32, {{8}, {8}, {8}, {8}}, + {{8}, {8}, {8}, {8}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB, + {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB_UNORM, {1, 1, 1}, 4, 4, - 24, {{8}, {8}, {8}, {0}}, + {{8}, {8}, {8}, {0}}, {{0}, {8}, {16}, {24}}}, - {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_BC4_COMP_UNORM, {4, 4, 1}, 8, 8, - 64, {{0}, {0}, {64}, {0}}, + {{0}, {0}, {64}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_BC5_COMP_UNORM, + {4, 4, 1}, 16, 16, + {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_B4G4R4A4_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, + {1, 1, 1}, 2, 2, + {{4}, {4}, {4}, {4}}, + {{0}, {4}, {8}, {12}}}, + + {SVGA3D_BC6H_TYPELESS, SVGA3DBLOCKDESC_BC6H_COMP_TYPELESS, + {4, 4, 1}, 16, 16, + {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC6H_UF16, SVGA3DBLOCKDESC_BC6H_COMP_UF16, + {4, 4, 1}, 16, 16, + {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC6H_SF16, SVGA3DBLOCKDESC_BC6H_COMP_SF16, + {4, 4, 1}, 16, 16, + {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC7_TYPELESS, SVGA3DBLOCKDESC_BC7_COMP_TYPELESS, + {4, 4, 1}, 16, 16, + {{0}, {0}, {128}, {0}}, + {{0}, {0}, {0}, {0}}}, + + {SVGA3D_BC7_UNORM, SVGA3DBLOCKDESC_BC7_COMP_UNORM, + {4, 4, 1}, 16, 16, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, - {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_COMPRESSED, + {SVGA3D_BC7_UNORM_SRGB, SVGA3DBLOCKDESC_BC7_COMP_UNORM_SRGB, {4, 4, 1}, 16, 16, - 128, {{0}, {0}, {128}, {0}}, + {{0}, {0}, {128}, {0}}, {{0}, {0}, {0}, {0}}}, + + {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV, + {1, 1, 1}, 4, 4, + {{8}, {8}, {8}, {8}}, + {{0}, {8}, {16}, {24}}}, + + {SVGA3D_R11G11B10_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, + {1, 1, 1}, 4, 4, + {{10}, {11}, {11}, {0}}, + {{22}, {11}, {0}, {0}}}, }; + extern const struct svga3d_surface_desc g_SVGA3dSurfaceDescs[]; extern int g_SVGA3dSurfaceDescs_size; diff --git a/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h b/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h index 94262314e..1807b1dd5 100644 --- a/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h +++ b/lib/mesa/src/gallium/drivers/svga/include/svga3d_types.h @@ -1,5 +1,6 @@ -/********************************************************** - * Copyright 2007-2015 VMware, Inc. All rights reserved. +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2012-2022 VMware, Inc. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -21,14 +22,18 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga3d_types.h -- * - * SVGA 3d hardware definitions for basic types + * SVGA 3d hardware definitions for basic types */ + + + + #ifndef _SVGA3D_TYPES_H_ #define _SVGA3D_TYPES_H_ @@ -232,8 +237,7 @@ typedef enum SVGA3dSurfaceFormat { /* Planar video formats */ SVGA3D_NV12 = 44, - /* Video format with alpha */ - SVGA3D_AYUV = 45, + SVGA3D_FORMAT_DEAD2 = 45, SVGA3D_R32G32B32A32_TYPELESS = 46, SVGA3D_R32G32B32A32_UINT = 47, @@ -339,14 +343,22 @@ typedef enum SVGA3dSurfaceFormat { SVGA3D_B8G8R8X8_UNORM = 142, SVGA3D_BC4_UNORM = 143, SVGA3D_BC5_UNORM = 144, + SVGA3D_B4G4R4A4_UNORM = 145, + + + SVGA3D_BC6H_TYPELESS = 146, + SVGA3D_BC6H_UF16 = 147, + SVGA3D_BC6H_SF16 = 148, + SVGA3D_BC7_TYPELESS = 149, + SVGA3D_BC7_UNORM = 150, + SVGA3D_BC7_UNORM_SRGB = 151, + SVGA3D_AYUV = 152, + SVGA3D_R11G11B10_TYPELESS = 153, SVGA3D_FORMAT_MAX } SVGA3dSurfaceFormat; -typedef uint32 SVGA3dSurfaceFlags; -/* - * SVGA3d Surface Flags -- - */ + #define SVGA3D_SURFACE_CUBEMAP (1 << 0) /* @@ -426,27 +438,42 @@ typedef uint32 SVGA3dSurfaceFlags; */ #define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER (CONST64U(1) << 30) -/* - * Marker for the last defined bit in SVGA3dSurfaceFlags. - */ -#define SVGA3D_SURFACE_VADECODE (CONST64U(1) << 31) -/* - * Specifies that a surface is multisample, and therefore requires the full - * mob-backing to store all the samples. - */ +#define SVGA3D_SURFACE_RESERVED1 (CONST64U(1) << 31) + + #define SVGA3D_SURFACE_MULTISAMPLE (CONST64U(1) << 32) + + +#define SVGA3D_SURFACE_BIND_UAVIEW (CONST64U(1) << 33) + + +#define SVGA3D_SURFACE_TRANSFER_TO_BUFFER (CONST64U(1) << 34) + +#define SVGA3D_SURFACE_BIND_LOGICOPS (CONST64U(1) << 35) + + +#define SVGA3D_SURFACE_BIND_RAW_VIEWS (CONST64U(1) << 36) +#define SVGA3D_SURFACE_BUFFER_STRUCTURED (CONST64U(1) << 37) + #define SVGA3D_SURFACE_DRAWINDIRECT_ARGS (CONST64U(1) << 38) +#define SVGA3D_SURFACE_RESOURCE_CLAMP (CONST64U(1) << 39) + + +#define SVGA3D_SURFACE_STAGING_COPY (CONST64U(1) << 40) + + + + + + + + + + +#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 44) -#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 42) -/* - * Surface flags types: - * - * SVGA3dSurface1Flags: Lower 32-bits of flags. - * SVGA3dSurface2Flags: Upper 32-bits of flags. - * SVGA3dSurfaceAllFlags: Full 64-bits of flags. - */ typedef uint32 SVGA3dSurface1Flags; typedef uint32 SVGA3dSurface2Flags; typedef uint64 SVGA3dSurfaceAllFlags; @@ -464,14 +491,28 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_STAGING_DOWNLOAD | \ SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_VADECODE | \ SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ + SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_BIND_LOGICOPS | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_RESOURCE_CLAMP | \ + SVGA3D_SURFACE_STAGING_COPY | \ + SVGA3D_SURFACE_RESTRICT_UPDATE | \ + SVGA3D_SURFACE_BIND_TENSOR | \ + SVGA3D_SURFACE_LO_STAGING \ + ) + +#define SVGA3D_SURFACE_HB_PRESENT_DISALLOWED_MASK \ + ( SVGA3D_SURFACE_1D | \ + SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_STAGING_COPY \ ) #define SVGA3D_SURFACE_2D_DISALLOWED_MASK \ ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_DEAD2 | \ SVGA3D_SURFACE_AUTOGENMIPMAPS | \ SVGA3D_SURFACE_VOLUME | \ SVGA3D_SURFACE_1D | \ @@ -481,9 +522,14 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_VADECODE | \ SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ + SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_RESOURCE_CLAMP | \ + SVGA3D_SURFACE_BIND_TENSOR \ ) #define SVGA3D_SURFACE_BASICOPS_DISALLOWED_MASK \ @@ -491,7 +537,6 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_AUTOGENMIPMAPS | \ SVGA3D_SURFACE_VOLUME | \ SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_VADECODE | \ SVGA3D_SURFACE_MULTISAMPLE \ ) @@ -510,9 +555,15 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_STAGING_DOWNLOAD | \ SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_VADECODE | \ SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_RESOURCE_CLAMP | \ + SVGA3D_SURFACE_STAGING_COPY | \ + SVGA3D_SURFACE_BIND_TENSOR | \ + SVGA3D_SURFACE_LO_STAGING \ ) #define SVGA3D_SURFACE_BUFFER_DISALLOWED_MASK \ @@ -523,30 +574,48 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_DEAD2 | \ SVGA3D_SURFACE_ARRAY | \ SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_MOB_PITCH \ + SVGA3D_SURFACE_MOB_PITCH | \ + SVGA3D_SURFACE_RESOURCE_CLAMP \ ) #define SVGA3D_SURFACE_MULTISAMPLE_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + ( SVGA3D_SURFACE_CUBEMAP | \ + SVGA3D_SURFACE_AUTOGENMIPMAPS | \ SVGA3D_SURFACE_VOLUME | \ SVGA3D_SURFACE_1D | \ SVGA3D_SURFACE_SCREENTARGET | \ SVGA3D_SURFACE_MOB_PITCH | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ + SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_BIND_LOGICOPS | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_STAGING_COPY \ ) -#define SVGA3D_SURFACE_DX_ONLY_MASK \ - ( SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \ +#define SVGA3D_SURFACE_DX_ONLY_MASK \ + ( SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ + SVGA3D_SURFACE_STAGING_UPLOAD | \ + SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER \ ) -#define SVGA3D_SURFACE_STAGING_MASK \ +#define SVGA3D_SURFACE_ANY_STAGING_MASK \ ( SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD \ + SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_STAGING_COPY | \ + SVGA3D_SURFACE_LO_STAGING \ ) +#define SVGA3D_SURFACE_ANY_NONHINT_STAGING_MASK \ + (SVGA3D_SURFACE_ANY_STAGING_MASK & \ + ~( \ + SVGA3D_SURFACE_LO_STAGING \ + )) + #define SVGA3D_SURFACE_BIND_MASK \ ( SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ @@ -554,78 +623,29 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \ SVGA3D_SURFACE_BIND_RENDER_TARGET | \ SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ + SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_BIND_LOGICOPS | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BIND_TENSOR \ ) -#define SVGA3D_SURFACE_VADECODE_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_HINT_STATIC | \ - SVGA3D_SURFACE_HINT_DYNAMIC | \ - SVGA3D_SURFACE_HINT_INDEXBUFFER | \ - SVGA3D_SURFACE_HINT_VERTEXBUFFER | \ - SVGA3D_SURFACE_HINT_TEXTURE | \ - SVGA3D_SURFACE_HINT_RENDERTARGET | \ - SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \ - SVGA3D_SURFACE_HINT_WRITEONLY | \ - SVGA3D_SURFACE_DEAD2 | \ - SVGA3D_SURFACE_AUTOGENMIPMAPS | \ - SVGA3D_SURFACE_HINT_RT_LOCKABLE | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_SCREENTARGET | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_RENDER_TARGET | \ - SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_INACTIVE | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_MULTISAMPLE \ - ) - -#define SVGA3D_SURFACE_VAPROCESSFRAME_OUTPUT_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_HINT_INDEXBUFFER | \ - SVGA3D_SURFACE_HINT_VERTEXBUFFER | \ - SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \ - SVGA3D_SURFACE_DEAD2 | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_INACTIVE | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_VADECODE | \ - SVGA3D_SURFACE_MULTISAMPLE \ +#define SVGA3D_SURFACE_STAGING_DISALLOWED_MASK \ + ( SVGA3D_SURFACE_BIND_MASK | \ + SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_SCREENTARGET | \ + SVGA3D_SURFACE_HINT_RENDERTARGET | \ + SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ + SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_RESOURCE_CLAMP | \ + SVGA3D_SURFACE_BIND_TENSOR \ ) -#define SVGA3D_SURFACE_VAPROCESSFRAME_INPUT_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_HINT_INDEXBUFFER | \ - SVGA3D_SURFACE_HINT_VERTEXBUFFER | \ - SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \ - SVGA3D_SURFACE_DEAD2 | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_SCREENTARGET | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_MULTISAMPLE \ +#define SVGA3D_SURFACE_STAGING_COPY_DISALLOWED_MASK \ + ( SVGA3D_SURFACE_STAGING_DISALLOWED_MASK | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \ ) #define SVGA3D_SURFACE_LOGICOPS_DISALLOWED_MASK \ @@ -640,18 +660,28 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_VADECODE | \ SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ + SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_RESOURCE_CLAMP | \ + SVGA3D_SURFACE_STAGING_COPY \ ) +#define SVGA3D_SURFACE_SM5_MASK \ + ( SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_RESOURCE_CLAMP \ + ) + +#define SVGA3D_BUFFER_STRUCTURED_STRIDE_MAX 2048 + + -/* - * These are really the D3DFORMAT_OP defines from the wdk. We need - * them so that we can query the host for what the supported surface - * operations are (when we're using the D3D backend, in particular), - * and so we can send those operations to the guest. - */ typedef enum { SVGA3DFORMAT_OP_TEXTURE = 0x00000001, SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002, @@ -661,98 +691,52 @@ typedef enum { SVGA3DFORMAT_OP_ZSTENCIL = 0x00000040, SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH = 0x00000080, -/* - * This format can be used as a render target if the current display mode - * is the same depth if the alpha channel is ignored. e.g. if the device - * can render to A8R8G8B8 when the display mode is X8R8G8B8, then the - * format op list entry for A8R8G8B8 should have this cap. - */ + SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET = 0x00000100, -/* - * This format contains DirectDraw support (including Flip). This flag - * should not to be set on alpha formats. - */ + SVGA3DFORMAT_OP_DISPLAYMODE = 0x00000400, -/* - * The rasterizer can support some level of Direct3D support in this format - * and implies that the driver can create a Context in this mode (for some - * render target format). When this flag is set, the SVGA3DFORMAT_OP_DISPLAYMODE - * flag must also be set. - */ + SVGA3DFORMAT_OP_3DACCELERATION = 0x00000800, -/* - * This is set for a private format when the driver has put the bpp in - * the structure. - */ + SVGA3DFORMAT_OP_PIXELSIZE = 0x00001000, -/* - * Indicates that this format can be converted to any RGB format for which - * SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB is specified. - */ + SVGA3DFORMAT_OP_CONVERT_TO_ARGB = 0x00002000, -/* - * Indicates that this format can be used to create offscreen plain surfaces. - */ + SVGA3DFORMAT_OP_OFFSCREENPLAIN = 0x00004000, -/* - * Indicated that this format can be read as an SRGB texture (meaning that the - * sampler will linearize the looked up data). - */ + SVGA3DFORMAT_OP_SRGBREAD = 0x00008000, -/* - * Indicates that this format can be used in the bumpmap instructions. - */ + SVGA3DFORMAT_OP_BUMPMAP = 0x00010000, -/* - * Indicates that this format can be sampled by the displacement map sampler. - */ + SVGA3DFORMAT_OP_DMAP = 0x00020000, -/* - * Indicates that this format cannot be used with texture filtering. - */ + SVGA3DFORMAT_OP_NOFILTER = 0x00040000, -/* - * Indicates that format conversions are supported to this RGB format if - * SVGA3DFORMAT_OP_CONVERT_TO_ARGB is specified in the source format. - */ + SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB = 0x00080000, -/* - * Indicated that this format can be written as an SRGB target - * (meaning that the pixel pipe will DE-linearize data on output to format) - */ + SVGA3DFORMAT_OP_SRGBWRITE = 0x00100000, -/* - * Indicates that this format cannot be used with alpha blending. - */ + SVGA3DFORMAT_OP_NOALPHABLEND = 0x00200000, -/* - * Indicates that the device can auto-generated sublevels for resources - * of this format. - */ + SVGA3DFORMAT_OP_AUTOGENMIPMAP = 0x00400000, -/* - * Indicates that this format can be used by vertex texture sampler. - */ + SVGA3DFORMAT_OP_VERTEXTEXTURE = 0x00800000, -/* - * Indicates that this format supports neither texture coordinate - * wrap modes, nor mipmapping. - */ + SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP = 0x01000000 } SVGA3dFormatOp; @@ -783,10 +767,7 @@ typedef enum { SVGA3DFORMAT_OP_NOALPHABLEND | \ SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP) -/* - * This structure is a conversion of SVGA3DFORMAT_OP_* - * Entries must be located at the same position. - */ + typedef union { uint32 value; struct { @@ -818,123 +799,113 @@ typedef union { }; } SVGA3dSurfaceFormatCaps; -/* - * SVGA_3D_CMD_SETRENDERSTATE Types. All value types - * must fit in a uint32. - */ + typedef enum { SVGA3D_RS_INVALID = 0, SVGA3D_RS_MIN = 1, - SVGA3D_RS_ZENABLE = 1, /* SVGA3dBool */ - SVGA3D_RS_ZWRITEENABLE = 2, /* SVGA3dBool */ - SVGA3D_RS_ALPHATESTENABLE = 3, /* SVGA3dBool */ - SVGA3D_RS_DITHERENABLE = 4, /* SVGA3dBool */ - SVGA3D_RS_BLENDENABLE = 5, /* SVGA3dBool */ - SVGA3D_RS_FOGENABLE = 6, /* SVGA3dBool */ - SVGA3D_RS_SPECULARENABLE = 7, /* SVGA3dBool */ - SVGA3D_RS_STENCILENABLE = 8, /* SVGA3dBool */ - SVGA3D_RS_LIGHTINGENABLE = 9, /* SVGA3dBool */ - SVGA3D_RS_NORMALIZENORMALS = 10, /* SVGA3dBool */ - SVGA3D_RS_POINTSPRITEENABLE = 11, /* SVGA3dBool */ - SVGA3D_RS_POINTSCALEENABLE = 12, /* SVGA3dBool */ - SVGA3D_RS_STENCILREF = 13, /* uint32 */ - SVGA3D_RS_STENCILMASK = 14, /* uint32 */ - SVGA3D_RS_STENCILWRITEMASK = 15, /* uint32 */ - SVGA3D_RS_FOGSTART = 16, /* float */ - SVGA3D_RS_FOGEND = 17, /* float */ - SVGA3D_RS_FOGDENSITY = 18, /* float */ - SVGA3D_RS_POINTSIZE = 19, /* float */ - SVGA3D_RS_POINTSIZEMIN = 20, /* float */ - SVGA3D_RS_POINTSIZEMAX = 21, /* float */ - SVGA3D_RS_POINTSCALE_A = 22, /* float */ - SVGA3D_RS_POINTSCALE_B = 23, /* float */ - SVGA3D_RS_POINTSCALE_C = 24, /* float */ - SVGA3D_RS_FOGCOLOR = 25, /* SVGA3dColor */ - SVGA3D_RS_AMBIENT = 26, /* SVGA3dColor */ - SVGA3D_RS_CLIPPLANEENABLE = 27, /* SVGA3dClipPlanes */ - SVGA3D_RS_FOGMODE = 28, /* SVGA3dFogMode */ - SVGA3D_RS_FILLMODE = 29, /* SVGA3dFillMode */ - SVGA3D_RS_SHADEMODE = 30, /* SVGA3dShadeMode */ - SVGA3D_RS_LINEPATTERN = 31, /* SVGA3dLinePattern */ - SVGA3D_RS_SRCBLEND = 32, /* SVGA3dBlendOp */ - SVGA3D_RS_DSTBLEND = 33, /* SVGA3dBlendOp */ - SVGA3D_RS_BLENDEQUATION = 34, /* SVGA3dBlendEquation */ - SVGA3D_RS_CULLMODE = 35, /* SVGA3dFace */ - SVGA3D_RS_ZFUNC = 36, /* SVGA3dCmpFunc */ - SVGA3D_RS_ALPHAFUNC = 37, /* SVGA3dCmpFunc */ - SVGA3D_RS_STENCILFUNC = 38, /* SVGA3dCmpFunc */ - SVGA3D_RS_STENCILFAIL = 39, /* SVGA3dStencilOp */ - SVGA3D_RS_STENCILZFAIL = 40, /* SVGA3dStencilOp */ - SVGA3D_RS_STENCILPASS = 41, /* SVGA3dStencilOp */ - SVGA3D_RS_ALPHAREF = 42, /* float (0.0 .. 1.0) */ - SVGA3D_RS_FRONTWINDING = 43, /* SVGA3dFrontWinding */ - SVGA3D_RS_COORDINATETYPE = 44, /* SVGA3dCoordinateType */ - SVGA3D_RS_ZBIAS = 45, /* float */ - SVGA3D_RS_RANGEFOGENABLE = 46, /* SVGA3dBool */ - SVGA3D_RS_COLORWRITEENABLE = 47, /* SVGA3dColorMask */ - SVGA3D_RS_VERTEXMATERIALENABLE = 48, /* SVGA3dBool */ - SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49, /* SVGA3dVertexMaterial */ - SVGA3D_RS_SPECULARMATERIALSOURCE = 50, /* SVGA3dVertexMaterial */ - SVGA3D_RS_AMBIENTMATERIALSOURCE = 51, /* SVGA3dVertexMaterial */ - SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52, /* SVGA3dVertexMaterial */ - SVGA3D_RS_TEXTUREFACTOR = 53, /* SVGA3dColor */ - SVGA3D_RS_LOCALVIEWER = 54, /* SVGA3dBool */ - SVGA3D_RS_SCISSORTESTENABLE = 55, /* SVGA3dBool */ - SVGA3D_RS_BLENDCOLOR = 56, /* SVGA3dColor */ - SVGA3D_RS_STENCILENABLE2SIDED = 57, /* SVGA3dBool */ - SVGA3D_RS_CCWSTENCILFUNC = 58, /* SVGA3dCmpFunc */ - SVGA3D_RS_CCWSTENCILFAIL = 59, /* SVGA3dStencilOp */ - SVGA3D_RS_CCWSTENCILZFAIL = 60, /* SVGA3dStencilOp */ - SVGA3D_RS_CCWSTENCILPASS = 61, /* SVGA3dStencilOp */ - SVGA3D_RS_VERTEXBLEND = 62, /* SVGA3dVertexBlendFlags */ - SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63, /* float */ - SVGA3D_RS_DEPTHBIAS = 64, /* float */ - - - /* - * Output Gamma Level - * - * Output gamma effects the gamma curve of colors that are output from the - * rendering pipeline. A value of 1.0 specifies a linear color space. If the - * value is <= 0.0, gamma correction is ignored and linear color space is - * used. - */ - - SVGA3D_RS_OUTPUTGAMMA = 65, /* float */ - SVGA3D_RS_ZVISIBLE = 66, /* SVGA3dBool */ - SVGA3D_RS_LASTPIXEL = 67, /* SVGA3dBool */ - SVGA3D_RS_CLIPPING = 68, /* SVGA3dBool */ - SVGA3D_RS_WRAP0 = 69, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP1 = 70, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP2 = 71, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP3 = 72, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP4 = 73, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP5 = 74, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP6 = 75, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP7 = 76, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP8 = 77, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP9 = 78, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP10 = 79, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP11 = 80, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP12 = 81, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP13 = 82, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP14 = 83, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP15 = 84, /* SVGA3dWrapFlags */ - SVGA3D_RS_MULTISAMPLEANTIALIAS = 85, /* SVGA3dBool */ - SVGA3D_RS_MULTISAMPLEMASK = 86, /* uint32 */ - SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87, /* SVGA3dBool */ - SVGA3D_RS_TWEENFACTOR = 88, /* float */ - SVGA3D_RS_ANTIALIASEDLINEENABLE = 89, /* SVGA3dBool */ - SVGA3D_RS_COLORWRITEENABLE1 = 90, /* SVGA3dColorMask */ - SVGA3D_RS_COLORWRITEENABLE2 = 91, /* SVGA3dColorMask */ - SVGA3D_RS_COLORWRITEENABLE3 = 92, /* SVGA3dColorMask */ - SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93, /* SVGA3dBool */ - SVGA3D_RS_SRCBLENDALPHA = 94, /* SVGA3dBlendOp */ - SVGA3D_RS_DSTBLENDALPHA = 95, /* SVGA3dBlendOp */ - SVGA3D_RS_BLENDEQUATIONALPHA = 96, /* SVGA3dBlendEquation */ - SVGA3D_RS_TRANSPARENCYANTIALIAS = 97, /* SVGA3dTransparencyAntialiasType */ - SVGA3D_RS_LINEWIDTH = 98, /* float */ + SVGA3D_RS_ZENABLE = 1, + SVGA3D_RS_ZWRITEENABLE = 2, + SVGA3D_RS_ALPHATESTENABLE = 3, + SVGA3D_RS_DITHERENABLE = 4, + SVGA3D_RS_BLENDENABLE = 5, + SVGA3D_RS_FOGENABLE = 6, + SVGA3D_RS_SPECULARENABLE = 7, + SVGA3D_RS_STENCILENABLE = 8, + SVGA3D_RS_LIGHTINGENABLE = 9, + SVGA3D_RS_NORMALIZENORMALS = 10, + SVGA3D_RS_POINTSPRITEENABLE = 11, + SVGA3D_RS_POINTSCALEENABLE = 12, + SVGA3D_RS_STENCILREF = 13, + SVGA3D_RS_STENCILMASK = 14, + SVGA3D_RS_STENCILWRITEMASK = 15, + SVGA3D_RS_FOGSTART = 16, + SVGA3D_RS_FOGEND = 17, + SVGA3D_RS_FOGDENSITY = 18, + SVGA3D_RS_POINTSIZE = 19, + SVGA3D_RS_POINTSIZEMIN = 20, + SVGA3D_RS_POINTSIZEMAX = 21, + SVGA3D_RS_POINTSCALE_A = 22, + SVGA3D_RS_POINTSCALE_B = 23, + SVGA3D_RS_POINTSCALE_C = 24, + SVGA3D_RS_FOGCOLOR = 25, + SVGA3D_RS_AMBIENT = 26, + SVGA3D_RS_CLIPPLANEENABLE = 27, + SVGA3D_RS_FOGMODE = 28, + SVGA3D_RS_FILLMODE = 29, + SVGA3D_RS_SHADEMODE = 30, + SVGA3D_RS_LINEPATTERN = 31, + SVGA3D_RS_SRCBLEND = 32, + SVGA3D_RS_DSTBLEND = 33, + SVGA3D_RS_BLENDEQUATION = 34, + SVGA3D_RS_CULLMODE = 35, + SVGA3D_RS_ZFUNC = 36, + SVGA3D_RS_ALPHAFUNC = 37, + SVGA3D_RS_STENCILFUNC = 38, + SVGA3D_RS_STENCILFAIL = 39, + SVGA3D_RS_STENCILZFAIL = 40, + SVGA3D_RS_STENCILPASS = 41, + SVGA3D_RS_ALPHAREF = 42, + SVGA3D_RS_FRONTWINDING = 43, + SVGA3D_RS_COORDINATETYPE = 44, + SVGA3D_RS_ZBIAS = 45, + SVGA3D_RS_RANGEFOGENABLE = 46, + SVGA3D_RS_COLORWRITEENABLE = 47, + SVGA3D_RS_VERTEXMATERIALENABLE = 48, + SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49, + SVGA3D_RS_SPECULARMATERIALSOURCE = 50, + SVGA3D_RS_AMBIENTMATERIALSOURCE = 51, + SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52, + SVGA3D_RS_TEXTUREFACTOR = 53, + SVGA3D_RS_LOCALVIEWER = 54, + SVGA3D_RS_SCISSORTESTENABLE = 55, + SVGA3D_RS_BLENDCOLOR = 56, + SVGA3D_RS_STENCILENABLE2SIDED = 57, + SVGA3D_RS_CCWSTENCILFUNC = 58, + SVGA3D_RS_CCWSTENCILFAIL = 59, + SVGA3D_RS_CCWSTENCILZFAIL = 60, + SVGA3D_RS_CCWSTENCILPASS = 61, + SVGA3D_RS_VERTEXBLEND = 62, + SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63, + SVGA3D_RS_DEPTHBIAS = 64, + + + + + SVGA3D_RS_OUTPUTGAMMA = 65, + SVGA3D_RS_ZVISIBLE = 66, + SVGA3D_RS_LASTPIXEL = 67, + SVGA3D_RS_CLIPPING = 68, + SVGA3D_RS_WRAP0 = 69, + SVGA3D_RS_WRAP1 = 70, + SVGA3D_RS_WRAP2 = 71, + SVGA3D_RS_WRAP3 = 72, + SVGA3D_RS_WRAP4 = 73, + SVGA3D_RS_WRAP5 = 74, + SVGA3D_RS_WRAP6 = 75, + SVGA3D_RS_WRAP7 = 76, + SVGA3D_RS_WRAP8 = 77, + SVGA3D_RS_WRAP9 = 78, + SVGA3D_RS_WRAP10 = 79, + SVGA3D_RS_WRAP11 = 80, + SVGA3D_RS_WRAP12 = 81, + SVGA3D_RS_WRAP13 = 82, + SVGA3D_RS_WRAP14 = 83, + SVGA3D_RS_WRAP15 = 84, + SVGA3D_RS_MULTISAMPLEANTIALIAS = 85, + SVGA3D_RS_MULTISAMPLEMASK = 86, + SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87, + SVGA3D_RS_TWEENFACTOR = 88, + SVGA3D_RS_ANTIALIASEDLINEENABLE = 89, + SVGA3D_RS_COLORWRITEENABLE1 = 90, + SVGA3D_RS_COLORWRITEENABLE2 = 91, + SVGA3D_RS_COLORWRITEENABLE3 = 92, + SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93, + SVGA3D_RS_SRCBLENDALPHA = 94, + SVGA3D_RS_DSTBLENDALPHA = 95, + SVGA3D_RS_BLENDEQUATIONALPHA = 96, + SVGA3D_RS_TRANSPARENCYANTIALIAS = 97, + SVGA3D_RS_LINEWIDTH = 98, SVGA3D_RS_MAX } SVGA3dRenderStateName; @@ -946,9 +917,9 @@ typedef enum { } SVGA3dTransparencyAntialiasType; typedef enum { - SVGA3D_VERTEXMATERIAL_NONE = 0, /* Use the value in the current material */ - SVGA3D_VERTEXMATERIAL_DIFFUSE = 1, /* Use the value in the diffuse component */ - SVGA3D_VERTEXMATERIAL_SPECULAR = 2, /* Use the value in the specular component */ + SVGA3D_VERTEXMATERIAL_NONE = 0, + SVGA3D_VERTEXMATERIAL_DIFFUSE = 1, + SVGA3D_VERTEXMATERIAL_SPECULAR = 2, SVGA3D_VERTEXMATERIAL_MAX = 3, } SVGA3dVertexMaterial; @@ -1068,9 +1039,7 @@ typedef enum { SVGA3D_FACE_MAX } SVGA3dFace; -/* - * The order and the values should not be changed - */ + typedef enum { SVGA3D_CMP_INVALID = 0, @@ -1085,11 +1054,7 @@ typedef enum { SVGA3D_CMP_MAX } SVGA3dCmpFunc; -/* - * SVGA3D_FOGFUNC_* specifies the fog equation, or PER_VERTEX which allows - * the fog factor to be specified in the alpha component of the specular - * (a.k.a. secondary) vertex color. - */ + typedef enum { SVGA3D_FOGFUNC_INVALID = 0, SVGA3D_FOGFUNC_EXP = 1, @@ -1098,10 +1063,7 @@ typedef enum { SVGA3D_FOGFUNC_PER_VERTEX = 4 } SVGA3dFogFunction; -/* - * SVGA3D_FOGTYPE_* specifies if fog factors are computed on a per-vertex - * or per-pixel basis. - */ + typedef enum { SVGA3D_FOGTYPE_INVALID = 0, SVGA3D_FOGTYPE_VERTEX = 1, @@ -1109,11 +1071,7 @@ typedef enum { SVGA3D_FOGTYPE_MAX = 3 } SVGA3dFogType; -/* - * SVGA3D_FOGBASE_* selects depth or range-based fog. Depth-based fog is - * computed using the eye Z value of each pixel (or vertex), whereas range- - * based fog is computed using the actual distance (range) to the eye. - */ + typedef enum { SVGA3D_FOGBASE_INVALID = 0, SVGA3D_FOGBASE_DEPTHBASED = 1, @@ -1149,11 +1107,7 @@ typedef enum { SVGA3D_CLEAR_DEPTH = 0x2, SVGA3D_CLEAR_STENCIL = 0x4, - /* - * Hint only, must be used together with SVGA3D_CLEAR_COLOR. If - * SVGA3D_CLEAR_DEPTH or SVGA3D_CLEAR_STENCIL bit is set, this - * bit will be ignored. - */ + SVGA3D_CLEAR_COLORFILL = 0x8 } SVGA3dClearFlag; @@ -1205,57 +1159,48 @@ typedef enum { SVGA3D_WRAPCOORD_ALL = 0xF, } SVGA3dWrapFlags; -/* - * SVGA_3D_CMD_TEXTURESTATE Types. All value types - * must fit in a uint32. - */ + typedef enum { SVGA3D_TS_INVALID = 0, SVGA3D_TS_MIN = 1, - SVGA3D_TS_BIND_TEXTURE = 1, /* SVGA3dSurfaceId */ - SVGA3D_TS_COLOROP = 2, /* SVGA3dTextureCombiner */ - SVGA3D_TS_COLORARG1 = 3, /* SVGA3dTextureArgData */ - SVGA3D_TS_COLORARG2 = 4, /* SVGA3dTextureArgData */ - SVGA3D_TS_ALPHAOP = 5, /* SVGA3dTextureCombiner */ - SVGA3D_TS_ALPHAARG1 = 6, /* SVGA3dTextureArgData */ - SVGA3D_TS_ALPHAARG2 = 7, /* SVGA3dTextureArgData */ - SVGA3D_TS_ADDRESSU = 8, /* SVGA3dTextureAddress */ - SVGA3D_TS_ADDRESSV = 9, /* SVGA3dTextureAddress */ - SVGA3D_TS_MIPFILTER = 10, /* SVGA3dTextureFilter */ - SVGA3D_TS_MAGFILTER = 11, /* SVGA3dTextureFilter */ - SVGA3D_TS_MINFILTER = 12, /* SVGA3dTextureFilter */ - SVGA3D_TS_BORDERCOLOR = 13, /* SVGA3dColor */ - SVGA3D_TS_TEXCOORDINDEX = 14, /* uint32 */ - SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15, /* SVGA3dTexTransformFlags */ - SVGA3D_TS_TEXCOORDGEN = 16, /* SVGA3dTextureCoordGen */ - SVGA3D_TS_BUMPENVMAT00 = 17, /* float */ - SVGA3D_TS_BUMPENVMAT01 = 18, /* float */ - SVGA3D_TS_BUMPENVMAT10 = 19, /* float */ - SVGA3D_TS_BUMPENVMAT11 = 20, /* float */ - SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21, /* uint32 */ - SVGA3D_TS_TEXTURE_LOD_BIAS = 22, /* float */ - SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23, /* uint32 */ - SVGA3D_TS_ADDRESSW = 24, /* SVGA3dTextureAddress */ - - - /* - * Sampler Gamma Level - * - * Sampler gamma effects the color of samples taken from the sampler. A - * value of 1.0 will produce linear samples. If the value is <= 0.0 the - * gamma value is ignored and a linear space is used. - */ - - SVGA3D_TS_GAMMA = 25, /* float */ - SVGA3D_TS_BUMPENVLSCALE = 26, /* float */ - SVGA3D_TS_BUMPENVLOFFSET = 27, /* float */ - SVGA3D_TS_COLORARG0 = 28, /* SVGA3dTextureArgData */ - SVGA3D_TS_ALPHAARG0 = 29, /* SVGA3dTextureArgData */ - SVGA3D_TS_PREGB_MAX = 30, /* Max value before GBObjects */ - SVGA3D_TS_CONSTANT = 30, /* SVGA3dColor */ - SVGA3D_TS_COLOR_KEY_ENABLE = 31, /* SVGA3dBool */ - SVGA3D_TS_COLOR_KEY = 32, /* SVGA3dColor */ + SVGA3D_TS_BIND_TEXTURE = 1, + SVGA3D_TS_COLOROP = 2, + SVGA3D_TS_COLORARG1 = 3, + SVGA3D_TS_COLORARG2 = 4, + SVGA3D_TS_ALPHAOP = 5, + SVGA3D_TS_ALPHAARG1 = 6, + SVGA3D_TS_ALPHAARG2 = 7, + SVGA3D_TS_ADDRESSU = 8, + SVGA3D_TS_ADDRESSV = 9, + SVGA3D_TS_MIPFILTER = 10, + SVGA3D_TS_MAGFILTER = 11, + SVGA3D_TS_MINFILTER = 12, + SVGA3D_TS_BORDERCOLOR = 13, + SVGA3D_TS_TEXCOORDINDEX = 14, + SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15, + SVGA3D_TS_TEXCOORDGEN = 16, + SVGA3D_TS_BUMPENVMAT00 = 17, + SVGA3D_TS_BUMPENVMAT01 = 18, + SVGA3D_TS_BUMPENVMAT10 = 19, + SVGA3D_TS_BUMPENVMAT11 = 20, + SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21, + SVGA3D_TS_TEXTURE_LOD_BIAS = 22, + SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23, + SVGA3D_TS_ADDRESSW = 24, + + + + + SVGA3D_TS_GAMMA = 25, + SVGA3D_TS_BUMPENVLSCALE = 26, + SVGA3D_TS_BUMPENVLOFFSET = 27, + SVGA3D_TS_COLORARG0 = 28, + SVGA3D_TS_ALPHAARG0 = 29, + SVGA3D_TS_PREGB_MAX = 30, + SVGA3D_TS_CONSTANT = 30, + SVGA3D_TS_COLOR_KEY_ENABLE = 31, + SVGA3D_TS_COLOR_KEY = 32, SVGA3D_TS_MAX } SVGA3dTextureStateName; @@ -1304,20 +1249,17 @@ typedef enum { SVGA3D_TEX_ADDRESS_MAX } SVGA3dTextureAddress; -/* - * SVGA3D_TEX_FILTER_NONE as the minification filter means mipmapping is - * disabled, and the rasterizer should use the magnification filter instead. - */ + typedef enum { SVGA3D_TEX_FILTER_NONE = 0, SVGA3D_TEX_FILTER_MIN = 0, SVGA3D_TEX_FILTER_NEAREST = 1, SVGA3D_TEX_FILTER_LINEAR = 2, SVGA3D_TEX_FILTER_ANISOTROPIC = 3, - SVGA3D_TEX_FILTER_FLATCUBIC = 4, /* Deprecated, not implemented */ - SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, /* Deprecated, not implemented */ - SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, /* Not currently implemented */ - SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, /* Not currently implemented */ + SVGA3D_TEX_FILTER_FLATCUBIC = 4, + SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, + SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, + SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, SVGA3D_TEX_FILTER_MAX } SVGA3dTextureFilter; @@ -1339,9 +1281,7 @@ typedef enum { SVGA3D_TEXCOORD_GEN_MAX } SVGA3dTextureCoordGen; -/* - * Texture argument constants for texture combiner - */ + typedef enum { SVGA3D_TA_INVALID = 0, SVGA3D_TA_TFACTOR = 1, @@ -1355,23 +1295,14 @@ typedef enum { #define SVGA3D_TM_MASK_LEN 4 -/* Modifiers for texture argument constants defined above. */ + typedef enum { SVGA3D_TM_NONE = 0, SVGA3D_TM_ALPHA = (1 << SVGA3D_TM_MASK_LEN), SVGA3D_TM_ONE_MINUS = (2 << SVGA3D_TM_MASK_LEN), } SVGA3dTextureArgModifier; -/* - * Vertex declarations - * - * Notes: - * - * SVGA3D_DECLUSAGE_POSITIONT is for pre-transformed vertices. If you - * draw with any POSITIONT vertex arrays, the programmable vertex - * pipeline will be implicitly disabled. Drawing will take place as if - * no vertex shader was bound. - */ + typedef enum { SVGA3D_DECLUSAGE_POSITION = 0, @@ -1395,11 +1326,10 @@ typedef enum { SVGA3D_DECLMETHOD_DEFAULT = 0, SVGA3D_DECLMETHOD_PARTIALU, SVGA3D_DECLMETHOD_PARTIALV, - SVGA3D_DECLMETHOD_CROSSUV, /* Normal */ + SVGA3D_DECLMETHOD_CROSSUV, SVGA3D_DECLMETHOD_UV, - SVGA3D_DECLMETHOD_LOOKUP, /* Lookup a displacement map */ - SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement */ - /* map */ + SVGA3D_DECLMETHOD_LOOKUP, + SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, } SVGA3dDeclMethod; typedef enum { @@ -1423,28 +1353,16 @@ typedef enum { SVGA3D_DECLTYPE_MAX, } SVGA3dDeclType; -/* - * This structure is used for the divisor for geometry instancing; - * it's a direct translation of the Direct3D equivalent. - */ + typedef union { struct { - /* - * For index data, this number represents the number of instances to draw. - * For instance data, this number represents the number of - * instances/vertex in this stream - */ + uint32 count : 30; - /* - * This is 1 if this is supposed to be the data that is repeated for - * every instance. - */ + uint32 indexedData : 1; - /* - * This is 1 if this is supposed to be the per-instance data. - */ + uint32 instanceData : 1; }; @@ -1452,12 +1370,7 @@ typedef union { } SVGA3dVertexDivisor; typedef enum { - /* - * SVGA3D_PRIMITIVE_INVALID is a valid primitive type. - * - * List MIN second so debuggers will think INVALID is - * the correct name. - */ + SVGA3D_PRIMITIVE_INVALID = 0, SVGA3D_PRIMITIVE_MIN = 0, SVGA3D_PRIMITIVE_TRIANGLELIST = 1, @@ -1538,8 +1451,8 @@ typedef enum { SVGA3D_LIGHTTYPE_INVALID = 0, SVGA3D_LIGHTTYPE_MIN = 1, SVGA3D_LIGHTTYPE_POINT = 1, - SVGA3D_LIGHTTYPE_SPOT1 = 2, /* 1-cone, in degrees */ - SVGA3D_LIGHTTYPE_SPOT2 = 3, /* 2-cone, in radians */ + SVGA3D_LIGHTTYPE_SPOT1 = 2, + SVGA3D_LIGHTTYPE_SPOT2 = 3, SVGA3D_LIGHTTYPE_DIRECTIONAL = 4, SVGA3D_LIGHTTYPE_MAX } SVGA3dLightType; @@ -1584,9 +1497,7 @@ typedef enum { SVGA3D_CONST_TYPE_MAX = 3, } SVGA3dShaderConstType; -/* - * Register limits for shader consts. - */ + #define SVGA3D_CONSTREG_MAX 256 #define SVGA3D_CONSTINTREG_MAX 16 #define SVGA3D_CONSTBOOLREG_MAX 16 @@ -1624,10 +1535,7 @@ typedef uint8 SVGA3dQueryTypeUint8; #define SVGA3D_NUM_QUERYTYPE (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN) -/* - * This is the maximum number of queries per context that can be active - * simultaneously between a beginQuery and endQuery. - */ + #define SVGA3D_MAX_QUERY 64 /* @@ -1750,19 +1658,26 @@ typedef enum { SVGA3D_READ_HOST_VRAM = 2, } SVGA3dTransferType; -typedef enum { - SVGA3D_LOGICOP_INVALID = 0, - SVGA3D_LOGICOP_MIN = 1, - SVGA3D_LOGICOP_COPY = 1, - SVGA3D_LOGICOP_NOT = 2, - SVGA3D_LOGICOP_AND = 3, - SVGA3D_LOGICOP_OR = 4, - SVGA3D_LOGICOP_XOR = 5, - SVGA3D_LOGICOP_NXOR = 6, - SVGA3D_LOGICOP_ROP3MIN = 30, /* 7-29 are reserved for future logic ops. */ - SVGA3D_LOGICOP_ROP3MAX = (SVGA3D_LOGICOP_ROP3MIN + 255), - SVGA3D_LOGICOP_MAX = (SVGA3D_LOGICOP_ROP3MAX + 1), -} SVGA3dLogicOp; +#define SVGA3D_LOGICOP_INVALID 0 +#define SVGA3D_LOGICOP_COPY 1 + +#define SVGA3D_LOGICOP_MIN 1 +#define SVGA3D_LOGICOP_NOT 2 +#define SVGA3D_LOGICOP_AND 3 +#define SVGA3D_LOGICOP_OR 4 +#define SVGA3D_LOGICOP_XOR 5 +#define SVGA3D_LOGICOP_NXOR 6 +#define SVGA3D_LOGICOP_ROP3 7 + +#define SVGA3D_LOGICOP_MAX 8 + +typedef uint16 SVGA3dLogicOp; + +#define SVGA3D_LOGICOP_ROP3_INVALID ((uint16) -1) +#define SVGA3D_LOGICOP_ROP3_MIN 0 +#define SVGA3D_LOGICOP_ROP3_MAX 256 + +typedef uint16 SVGA3dLogicOpRop3; typedef #include "vmware_pack_begin.h" @@ -1812,9 +1727,7 @@ struct { #include "vmware_pack_end.h" SVGA3dSize; -/* - * Guest-backed objects definitions. - */ + typedef enum { SVGA_OTABLE_MOB = 0, SVGA_OTABLE_MIN = 0, @@ -1828,22 +1741,16 @@ typedef enum { SVGA_OTABLE_DXCONTEXT = 5, SVGA_OTABLE_DX_MAX = 6, - SVGA_OTABLE_VADECODER = 6, - SVGA_OTABLE_VAPROCESSOR = 7, - SVGA_OTABLE_BUG_1952836_MAX = 8, - /* - * Additions to this table need to be tied to HW-version features and - * checkpointed accordingly. (See SVGACheckpointGuestBackedObjects.) - */ - SVGA_OTABLE_DEVEL_MAX = 8, - SVGA_OTABLE_MAX = 8 -} SVGAOTableType; -/* - * Deprecated. - */ -#define SVGA_OTABLE_COUNT 4 + SVGA_OTABLE_DEVEL_MAX = 6, + SVGA_OTABLE_MAX = 6, + + + SVGA_OTABLE_RESERVED1 = 6, + SVGA_OTABLE_RESERVED2 = 7, + SVGA_OTABLE_BUG_1952836_MAX = 8, +} SVGAOTableType; typedef enum { SVGA_COTABLE_MIN = 0, @@ -1860,27 +1767,29 @@ typedef enum { SVGA_COTABLE_DXSHADER = 10, SVGA_COTABLE_DX10_MAX = 11, SVGA_COTABLE_UAVIEW = 11, - SVGA_COTABLE_MAX + SVGA_COTABLE_MAX = 12, } SVGACOTableType; -/* - * The largest size (number of entries) allowed in a COTable. - */ + #define SVGA_COTABLE_MAX_IDS (MAX_UINT16 - 2) typedef enum SVGAMobFormat { - SVGA3D_MOBFMT_INVALID = SVGA3D_INVALID_ID, - SVGA3D_MOBFMT_PTDEPTH_0 = 0, - SVGA3D_MOBFMT_MIN = 0, - SVGA3D_MOBFMT_PTDEPTH_1 = 1, - SVGA3D_MOBFMT_PTDEPTH_2 = 2, - SVGA3D_MOBFMT_RANGE = 3, - SVGA3D_MOBFMT_PTDEPTH64_0 = 4, - SVGA3D_MOBFMT_PTDEPTH64_1 = 5, - SVGA3D_MOBFMT_PTDEPTH64_2 = 6, - SVGA3D_MOBFMT_PREDX_MAX = 7, - SVGA3D_MOBFMT_EMPTY = 7, + SVGA3D_MOBFMT_INVALID = SVGA3D_INVALID_ID, + SVGA3D_MOBFMT_PT_0 = 0, + SVGA3D_MOBFMT_MIN = 0, + SVGA3D_MOBFMT_PT_1 = 1, + SVGA3D_MOBFMT_PT_2 = 2, + SVGA3D_MOBFMT_RANGE = 3, + SVGA3D_MOBFMT_PT64_0 = 4, + SVGA3D_MOBFMT_PT64_1 = 5, + SVGA3D_MOBFMT_PT64_2 = 6, + SVGA3D_MOBFMT_PREDX_MAX = 7, + SVGA3D_MOBFMT_EMPTY = 7, + SVGA3D_MOBFMT_MAX, + + + SVGA3D_MOBFMT_HB, } SVGAMobFormat; #define SVGA3D_MOB_EMPTY_BASE 1 @@ -1905,7 +1814,18 @@ typedef enum SVGA3dMSQualityLevel { SVGA3D_MS_QUALITY_NONE = 0, SVGA3D_MS_QUALITY_MIN = 0, SVGA3D_MS_QUALITY_FULL = 1, - SVGA3D_MS_QUALITY_MAX = 2, + SVGA3D_MS_QUALITY_RESOLVED = 2, + SVGA3D_MS_QUALITY_MAX = 3, } SVGA3dMSQualityLevel; -#endif // _SVGA3D_TYPES_H_ + + +typedef enum SVGA3dFrameUpdateType { + SVGA3D_FRAME_END = 0, + SVGA3D_FRAME_MIN = 0, + SVGA3D_FRAME_PARTIAL = 1, + SVGA3D_FRAME_UNKNOWN = 2, + SVGA3D_FRAME_MAX = 3, +} SVGA3dFrameUpdateType; + +#endif diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd.c b/lib/mesa/src/gallium/drivers/svga/svga_cmd.c index 6577c839c..8347dc175 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_cmd.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd.c @@ -439,6 +439,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, unsigned region_flags; unsigned surface_flags; + assert(!swc->have_gb_objects); + if (transfer == SVGA3D_WRITE_HOST_VRAM) { region_flags = SVGA_RELOC_READ; surface_flags = SVGA_RELOC_WRITE; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_cmd.h b/lib/mesa/src/gallium/drivers/svga/svga_cmd.h index 924729873..86d1120b3 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_cmd.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_cmd.h @@ -636,10 +636,23 @@ enum pipe_error SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc, unsigned count, uint32 startBuffer, - const SVGA3dVertexBuffer *bufferInfo, + const SVGA3dVertexBuffer_v2 *bufferInfo, struct svga_winsys_surface **surfaces); enum pipe_error +SVGA3D_vgpu10_SetVertexBuffers_v2(struct svga_winsys_context *swc, + unsigned count, + uint32 startBuffer, + const SVGA3dVertexBuffer_v2 *bufferInfo, + struct svga_winsys_surface **surfaces); + +enum pipe_error +SVGA3D_vgpu10_SetVertexBuffersOffsetAndSize(struct svga_winsys_context *swc, + unsigned count, + uint32 startBuffer, + const SVGA3dVertexBuffer_v2 *bufferInfo); + +enum pipe_error SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc, SVGA3dPrimitiveType topology); @@ -649,6 +662,17 @@ SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc, SVGA3dSurfaceFormat format, uint32 offset); enum pipe_error +SVGA3D_vgpu10_SetIndexBuffer_v2(struct svga_winsys_context *swc, + struct svga_winsys_surface *indexes, + SVGA3dSurfaceFormat format, uint32 offset, + uint32 sizeInBytes); + +enum pipe_error +SVGA3D_vgpu10_SetIndexBufferOffsetAndSize(struct svga_winsys_context *swc, + SVGA3dSurfaceFormat format, uint32 offset, + uint32 sizeInBytes); + +enum pipe_error SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc, unsigned slot, SVGA3dShaderType type, @@ -714,6 +738,31 @@ SVGA3D_sm5_DrawInstancedIndirect(struct svga_winsys_context *swc, unsigned argOffset); enum pipe_error +SVGA3D_sm5_DefineUAView(struct svga_winsys_context *swc, + SVGA3dUAViewId uaViewId, + struct svga_winsys_surface *surface, + SVGA3dSurfaceFormat format, + SVGA3dResourceType resourceDimension, + const SVGA3dUAViewDesc *desc); + +enum pipe_error +SVGA3D_sm5_DestroyUAView(struct svga_winsys_context *swc, + SVGA3dUAViewId uaViewId); + +enum pipe_error +SVGA3D_sm5_SetUAViews(struct svga_winsys_context *swc, + uint32 uavSpliceIndex, + unsigned count, + const SVGA3dUAViewId ids[], + struct svga_winsys_surface **uaViews); + +enum pipe_error +SVGA3D_sm5_SetCSUAViews(struct svga_winsys_context *swc, + unsigned count, + const SVGA3dUAViewId ids[], + struct svga_winsys_surface **uaViews); + +enum pipe_error SVGA3D_sm5_Dispatch(struct svga_winsys_context *swc, const uint32 threadGroupCount[3]); @@ -732,4 +781,23 @@ SVGA3D_sm5_DefineAndBindStreamOutput(struct svga_winsys_context *swc, uint32 rasterizedStream, uint32 sizeInBytes); +enum pipe_error +SVGA3D_sm5_DefineRasterizerState_v2(struct svga_winsys_context *swc, + SVGA3dRasterizerStateId rasterizerId, + uint8 fillMode, + SVGA3dCullMode cullMode, + uint8 frontCounterClockwise, + int32 depthBias, + float depthBiasClamp, + float slopeScaledDepthBias, + uint8 depthClipEnable, + uint8 scissorEnable, + uint8 multisampleEnable, + uint8 antialiasedLineEnable, + float lineWidth, + uint8 lineStippleEnable, + uint8 lineStippleFactor, + uint16 lineStipplePattern, + uint8 provokingVertexLast, + uint32 forcedSampleCount); #endif /* __SVGA3D_H__ */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_context.c b/lib/mesa/src/gallium/drivers/svga/svga_context.c index d80336cf7..a1e054b7c 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_context.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_context.c @@ -61,13 +61,6 @@ svga_destroy(struct pipe_context *pipe) struct svga_context *svga = svga_context(pipe); unsigned shader, i; - /* free any alternate rasterizer states used for point sprite */ - for (i = 0; i < ARRAY_SIZE(svga->rasterizer_no_cull); i++) { - if (svga->rasterizer_no_cull[i]) { - pipe->delete_rasterizer_state(pipe, svga->rasterizer_no_cull[i]); - } - } - /* free depthstencil_disable state */ if (svga->depthstencil_disable) { pipe->delete_depth_stencil_alpha_state(pipe, svga->depthstencil_disable); @@ -98,6 +91,7 @@ svga_destroy(struct pipe_context *pipe) svga_cleanup_tss_binding(svga); svga_cleanup_vertex_state(svga); svga_cleanup_tcs_state(svga); + svga_cleanup_shader_image_state(svga); svga_destroy_swtnl(svga); svga_hwtnl_destroy(svga->hwtnl); @@ -114,6 +108,9 @@ svga_destroy(struct pipe_context *pipe) util_bitmask_destroy(svga->surface_view_id_bm); util_bitmask_destroy(svga->stream_output_id_bm); util_bitmask_destroy(svga->query_id_bm); + util_bitmask_destroy(svga->uav_id_bm); + util_bitmask_destroy(svga->uav_to_free_id_bm); + u_upload_destroy(svga->const0_upload); u_upload_destroy(svga->pipe.stream_uploader); u_upload_destroy(svga->pipe.const_uploader); @@ -126,6 +123,15 @@ svga_destroy(struct pipe_context *pipe) } } + /* free any pending srvs that were created for rawbuf sr view for + * constant buf. + */ + if (svga_have_gl43(svga)) { + svga_destroy_rawbuf_srv(svga); + util_bitmask_destroy(svga->sampler_view_to_free_id_bm); + pipe_resource_reference(&svga->dummy_resource, NULL); + } + FREE(svga); } @@ -189,6 +195,9 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) svga_init_stream_output_functions(svga); svga_init_clear_functions(svga); svga_init_tracked_state(svga); + svga_init_shader_image_functions(svga); + svga_init_shader_buffer_functions(svga); + svga_init_cs_functions(svga); /* init misc state */ svga->curr.sample_mask = ~0; @@ -230,6 +239,15 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) if (!(svga->query_id_bm = util_bitmask_create())) goto cleanup; + if (!(svga->uav_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->uav_to_free_id_bm = util_bitmask_create())) + goto cleanup; + + if (!(svga->sampler_view_to_free_id_bm = util_bitmask_create())) + goto cleanup; + svga->hwtnl = svga_hwtnl_create(svga); if (svga->hwtnl == NULL) goto cleanup; @@ -275,6 +293,11 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) svga->state.hw_draw.num_backed_views = 0; svga->state.hw_draw.rasterizer_discard = FALSE; + /* Initialize uavs */ + svga->state.hw_draw.uavSpliceIndex = -1; + svga->state.hw_draw.num_uavs = 0; + svga->state.hw_draw.num_cs_uavs = 0; + /* Initialize the shader pointers */ svga->state.hw_draw.vs = NULL; svga->state.hw_draw.gs = NULL; @@ -289,6 +312,10 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) sizeof(svga->state.hw_draw.default_constbuf_size)); memset(svga->state.hw_draw.enabled_constbufs, 0, sizeof(svga->state.hw_draw.enabled_constbufs)); + memset(svga->state.hw_draw.enabled_rawbufs, 0, + sizeof(svga->state.hw_draw.enabled_rawbufs)); + memset(svga->state.hw_draw.rawbufs, 0, + sizeof(svga->state.hw_draw.rawbufs)); svga->state.hw_draw.ib = NULL; svga->state.hw_draw.num_vbuffers = 0; memset(svga->state.hw_draw.vbuffers, 0, @@ -296,6 +323,17 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) svga->state.hw_draw.const0_buffer = NULL; svga->state.hw_draw.const0_handle = NULL; + if (svga_have_gl43(svga)) { + for (unsigned shader = 0; shader < PIPE_SHADER_TYPES; ++shader) { + for (unsigned i = 0; + i < ARRAY_SIZE(svga->state.hw_draw.rawbufs[shader]); i++) { + svga->state.hw_draw.rawbufs[shader][i].srvid = SVGA3D_INVALID_ID; + } + } + svga_uav_cache_init(svga); + svga->dummy_resource = NULL; + } + /* Create a no-operation blend state which we will bind whenever the * requested blend state is impossible (e.g. due to having an integer * render target attached). @@ -346,11 +384,15 @@ cleanup: util_bitmask_destroy(svga->input_element_object_id_bm); util_bitmask_destroy(svga->rast_object_id_bm); util_bitmask_destroy(svga->sampler_object_id_bm); - util_bitmask_destroy(svga->sampler_view_id_bm); util_bitmask_destroy(svga->shader_id_bm); util_bitmask_destroy(svga->surface_view_id_bm); util_bitmask_destroy(svga->stream_output_id_bm); util_bitmask_destroy(svga->query_id_bm); + + util_bitmask_destroy(svga->uav_id_bm); + util_bitmask_destroy(svga->uav_to_free_id_bm); + util_bitmask_destroy(svga->sampler_view_id_bm); + FREE(svga); svga = NULL; @@ -424,6 +466,11 @@ svga_context_flush(struct svga_context *svga, if (svga_need_to_rebind_resources(svga)) { svga->rebind.flags.query = TRUE; } + + if (svga_sws(svga)->have_index_vertex_buffer_offset_cmd) { + svga->rebind.flags.vertexbufs = TRUE; + svga->rebind.flags.indexbuf = TRUE; + } } if (SVGA_DEBUG & DEBUG_SYNC) { diff --git a/lib/mesa/src/gallium/drivers/svga/svga_context.h b/lib/mesa/src/gallium/drivers/svga/svga_context.h index d5ef4c345..0227d64cf 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_context.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_context.h @@ -43,6 +43,8 @@ #include "svga_winsys.h" #include "svga_hw_reg.h" #include "svga3d_shaderdefs.h" +#include "svga_image_view.h" +#include "svga_shader_buffer.h" #include "svga_debug.h" /** Non-GPU queries for gallium HUD */ @@ -83,10 +85,12 @@ enum svga_hud { SVGA_QUERY_MAX }; + /** * Maximum supported number of constant buffers per shader + * including the zero slot for the default constant buffer. */ -#define SVGA_MAX_CONST_BUFS 14 +#define SVGA_MAX_CONST_BUFS 15 /** * Maximum constant buffer size that can be set in the @@ -96,6 +100,18 @@ enum svga_hud { #define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int)) #define CONST0_UPLOAD_ALIGNMENT 256 +#define SVGA_MAX_IMAGES SVGA3D_MAX_UAVIEWS +#define SVGA_MAX_SHADER_BUFFERS SVGA3D_MAX_UAVIEWS +#define SVGA_MAX_ATOMIC_BUFFERS SVGA3D_MAX_UAVIEWS +#define SVGA_MAX_UAVIEWS SVGA3D_DX11_1_MAX_UAVIEWS + +enum svga_surface_state +{ + SVGA_SURFACE_STATE_CREATED, + SVGA_SURFACE_STATE_INVALIDATED, + SVGA_SURFACE_STATE_UPDATED, + SVGA_SURFACE_STATE_RENDERED, +}; struct draw_vertex_shader; struct draw_fragment_shader; @@ -169,6 +185,8 @@ struct svga_depth_stencil_state { #define SVGA_PIPELINE_FLAG_LINES (1<<PIPE_PRIM_LINES) #define SVGA_PIPELINE_FLAG_TRIS (1<<PIPE_PRIM_TRIANGLES) +#define SVGA_MAX_FRAMEBUFFER_DEFAULT_SAMPLES 4 + struct svga_rasterizer_state { struct pipe_rasterizer_state templ; /* needed for draw module */ @@ -194,6 +212,11 @@ struct svga_rasterizer_state { SVGA3dRasterizerStateId id; /**< vgpu10 */ + /* Alternate SVGA rasterizer state object with forcedSampleCount */ + int altRastIds[SVGA_MAX_FRAMEBUFFER_DEFAULT_SAMPLES+1]; + + struct svga_rasterizer_state *no_cull_rasterizer; + /** For debugging: */ const char* need_pipeline_tris_str; const char* need_pipeline_lines_str; @@ -263,6 +286,14 @@ struct svga_constant_buffer { unsigned size; }; +struct svga_raw_buffer { + struct svga_winsys_surface *handle; + unsigned buffer_offset; + unsigned buffer_size; + struct pipe_resource *buffer; + int32 srvid; +}; + /* Use to calculate differences between state emitted to hardware and * current driver-calculated state. */ @@ -270,10 +301,10 @@ struct svga_state { const struct svga_blend_state *blend; const struct svga_depth_stencil_state *depth; - const struct svga_rasterizer_state *rast; const struct svga_sampler_state *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; const struct svga_velems_state *velems; + struct svga_rasterizer_state *rast; struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; /* or texture ID's? */ struct svga_fragment_shader *fs; struct svga_vertex_shader *vs; @@ -291,6 +322,7 @@ struct svga_state * svga_shader_emitter_v10.num_shader_consts. */ struct pipe_constant_buffer constbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS]; + struct svga_raw_buffer rawbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS]; struct pipe_framebuffer_state framebuffer; float depthscale; @@ -322,12 +354,26 @@ struct svga_state unsigned sample_mask; unsigned vertices_per_patch; float default_tesslevels[6]; /* tessellation (outer[4] + inner[2]) levels */ + + /* Image views */ + unsigned num_image_views[PIPE_SHADER_TYPES]; + struct svga_image_view image_views[PIPE_SHADER_TYPES][SVGA_MAX_IMAGES]; + + /* Shader buffers */ + unsigned num_shader_buffers[PIPE_SHADER_TYPES]; + struct svga_shader_buffer shader_buffers[PIPE_SHADER_TYPES][SVGA_MAX_SHADER_BUFFERS]; + + /* HW atomic buffers */ + unsigned num_atomic_buffers; + struct svga_shader_buffer atomic_buffers[SVGA_MAX_SHADER_BUFFERS]; + struct { /* Determine the layout of the grid (in block units) to be used. */ unsigned size[3]; /* If DispatchIndirect is used, this will has grid size info*/ struct pipe_resource *indirect; } grid_info; + }; struct svga_prescale { @@ -399,6 +445,8 @@ struct svga_hw_draw_state /** Currently bound constant buffer, per shader stage */ struct pipe_resource *constbuf[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS]; struct svga_constant_buffer constbufoffsets[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS]; + struct svga_raw_buffer rawbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS]; + unsigned enabled_rawbufs[PIPE_SHADER_TYPES]; /** Bitmask of enabled constant buffers */ unsigned enabled_constbufs[PIPE_SHADER_TYPES]; @@ -421,7 +469,7 @@ struct svga_hw_draw_state SVGA3dPrimitiveType topology; /** Vertex buffer state */ - SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS]; + SVGA3dVertexBuffer_v2 vbuffer_attrs[PIPE_MAX_ATTRIBS]; struct pipe_resource *vbuffers[PIPE_MAX_ATTRIBS]; unsigned num_vbuffers; @@ -441,6 +489,35 @@ struct svga_hw_draw_state boolean rasterizer_discard; /* set if rasterization is disabled */ boolean has_backed_views; /* set if any of the rtv/dsv is a backed surface view */ + + /* Image Views */ + int uavSpliceIndex; + unsigned num_image_views[PIPE_SHADER_TYPES]; + struct svga_image_view image_views[PIPE_SHADER_TYPES][SVGA_MAX_IMAGES]; + + /* Shader Buffers */ + unsigned num_shader_buffers[PIPE_SHADER_TYPES]; + struct svga_shader_buffer shader_buffers[PIPE_SHADER_TYPES][SVGA_MAX_SHADER_BUFFERS]; + + /* HW Atomic Buffers */ + unsigned num_atomic_buffers; + struct svga_shader_buffer atomic_buffers[SVGA_MAX_SHADER_BUFFERS]; + + /* UAV state */ + unsigned num_uavs; + SVGA3dUAViewId uaViewIds[SVGA_MAX_UAVIEWS]; + struct svga_winsys_surface *uaViews[SVGA_MAX_UAVIEWS]; + + /* Compute UAV state */ + unsigned num_cs_uavs; + SVGA3dUAViewId csUAViewIds[SVGA_MAX_UAVIEWS]; + struct svga_winsys_surface *csUAViews[SVGA_MAX_UAVIEWS]; + + /* starting uav index for each shader */ + unsigned uav_start_index[PIPE_SHADER_TYPES]; + + /* starting uav index for HW atomic buffers */ + unsigned uav_atomic_buf_index; }; @@ -468,6 +545,32 @@ struct svga_hw_queue; struct svga_query; struct svga_qmem_alloc_entry; +enum svga_uav_type +{ + SVGA_IMAGE_VIEW = 0, + SVGA_SHADER_BUFFER +}; + +struct svga_uav +{ + enum svga_uav_type type; + union { + struct svga_image_view image_view; + struct svga_shader_buffer shader_buffer; + } desc; + struct pipe_resource *resource; + unsigned next_uaView; + SVGA3dUAViewId uaViewId; + unsigned timestamp[2]; +}; + +struct svga_cache_uav +{ + unsigned num_uaViews; + unsigned next_uaView; + struct svga_uav uaViews[SVGA3D_DX11_1_MAX_UAVIEWS]; +}; + struct svga_context { struct pipe_context pipe; @@ -488,7 +591,7 @@ struct svga_context boolean force_hw_line_stipple; /** To report perf/conformance/etc issues to the gallium frontend */ - struct pipe_debug_callback callback; + struct util_debug_callback callback; } debug; struct { @@ -517,6 +620,9 @@ struct svga_context /* Bitmask of sampler view IDs */ struct util_bitmask *sampler_view_id_bm; + /* Bitmask of to-free sampler view IDs created for raw buffer srv */ + struct util_bitmask *sampler_view_to_free_id_bm; + /* Bitmask of used shader IDs */ struct util_bitmask *shader_id_bm; @@ -529,13 +635,23 @@ struct svga_context /* Bitmask of used query IDs */ struct util_bitmask *query_id_bm; + /* Bitmask of used uav IDs */ + struct util_bitmask *uav_id_bm; + + /* Bitmask of to-free uav IDs */ + struct util_bitmask *uav_to_free_id_bm; + struct { uint64_t dirty[SVGA_STATE_MAX]; /** bitmasks of which const buffers are changed */ unsigned dirty_constbufs[PIPE_SHADER_TYPES]; + /** bitmasks of which const buffers to be bound as raw buffers */ + unsigned raw_constbufs[PIPE_SHADER_TYPES]; + unsigned texture_timestamp; + unsigned uav_timestamp[2]; struct svga_sw_state sw; struct svga_hw_draw_state hw_draw; @@ -557,6 +673,12 @@ struct svga_context unsigned tes:1; unsigned cs:1; unsigned query:1; + unsigned images:1; + unsigned shaderbufs:1; + unsigned atomicbufs:1; + unsigned uav:1; + unsigned indexbuf:1; + unsigned vertexbufs:1; } flags; unsigned val; } rebind; @@ -641,9 +763,6 @@ struct svga_context void *sampler; } polygon_stipple; - /** Alternate rasterizer states created for point sprite */ - struct svga_rasterizer_state *rasterizer_no_cull[2]; - /** Depth stencil state created to disable depth stencil test */ struct svga_depth_stencil_state *depthstencil_disable; @@ -665,6 +784,8 @@ struct svga_context boolean passthrough; } tcs; + struct svga_cache_uav cache_uav; + struct pipe_resource *dummy_resource; }; /* A flag for each frontend state object: @@ -707,19 +828,38 @@ struct svga_context #define SVGA_NEW_TCS_CONST_BUFFER ((uint64_t) 0x1000000000) #define SVGA_NEW_TES_CONST_BUFFER ((uint64_t) 0x2000000000) #define SVGA_NEW_TCS_PARAM ((uint64_t) 0x4000000000) -#define SVGA_NEW_FS_CONSTS ((uint64_t) 0x8000000000) -#define SVGA_NEW_VS_CONSTS ((uint64_t) 0x10000000000) -#define SVGA_NEW_GS_CONSTS ((uint64_t) 0x20000000000) -#define SVGA_NEW_TCS_CONSTS ((uint64_t) 0x40000000000) -#define SVGA_NEW_TES_CONSTS ((uint64_t) 0x800000000000) +#define SVGA_NEW_IMAGE_VIEW ((uint64_t) 0x8000000000) +#define SVGA_NEW_SHADER_BUFFER ((uint64_t) 0x10000000000) +#define SVGA_NEW_CS ((uint64_t) 0x20000000000) +#define SVGA_NEW_CS_VARIANT ((uint64_t) 0x40000000000) +#define SVGA_NEW_CS_CONST_BUFFER ((uint64_t) 0x80000000000) +#define SVGA_NEW_FS_CONSTS ((uint64_t) 0x100000000000) +#define SVGA_NEW_VS_CONSTS ((uint64_t) 0x200000000000) +#define SVGA_NEW_GS_CONSTS ((uint64_t) 0x400000000000) +#define SVGA_NEW_TCS_CONSTS ((uint64_t) 0x800000000000) +#define SVGA_NEW_TES_CONSTS ((uint64_t) 0x1000000000000) +#define SVGA_NEW_CS_CONSTS ((uint64_t) 0x2000000000000) +#define SVGA_NEW_FS_RAW_BUFFER ((uint64_t) 0x4000000000000) +#define SVGA_NEW_VS_RAW_BUFFER ((uint64_t) 0x8000000000000) +#define SVGA_NEW_GS_RAW_BUFFER ((uint64_t) 0x10000000000000) +#define SVGA_NEW_TCS_RAW_BUFFER ((uint64_t) 0x20000000000000) +#define SVGA_NEW_TES_RAW_BUFFER ((uint64_t) 0x40000000000000) +#define SVGA_NEW_CS_RAW_BUFFER ((uint64_t) 0x80000000000000) #define SVGA_NEW_ALL ((uint64_t) 0xFFFFFFFFFFFFFFFF) #define SVGA_NEW_CONST_BUFFER \ (SVGA_NEW_FS_CONST_BUFFER | SVGA_NEW_VS_CONST_BUFFER | \ - SVGA_NEW_GS_CONST_BUFFER | \ + SVGA_NEW_GS_CONST_BUFFER | SVGA_NEW_CS_CONST_BUFFER | \ SVGA_NEW_TCS_CONST_BUFFER | SVGA_NEW_TES_CONST_BUFFER) +/** Program pipelines */ +enum svga_pipe_type +{ + SVGA_PIPE_GRAPHICS = 0, + SVGA_PIPE_COMPUTE = 1 +}; + void svga_init_state_functions( struct svga_context *svga ); void svga_init_flush_functions( struct svga_context *svga ); void svga_init_string_functions( struct svga_context *svga ); @@ -742,6 +882,7 @@ void svga_init_query_functions( struct svga_context *svga ); void svga_init_surface_functions(struct svga_context *svga); void svga_init_stream_output_functions( struct svga_context *svga ); void svga_init_clear_functions( struct svga_context *svga ); +void svga_init_shader_image_functions( struct svga_context *svga ); void svga_cleanup_vertex_state( struct svga_context *svga ); void svga_cleanup_sampler_state( struct svga_context *svga ); @@ -769,6 +910,36 @@ void svga_toggle_render_condition(struct svga_context *svga, boolean render_condition_enabled, boolean on); +int svga_define_rasterizer_object(struct svga_context *svga, + struct svga_rasterizer_state *, + unsigned samples); + +enum pipe_error +svga_validate_sampler_resources(struct svga_context *svga, + enum svga_pipe_type); + +enum pipe_error +svga_validate_constant_buffers(struct svga_context *svga, + enum svga_pipe_type); + +enum pipe_error +svga_validate_image_views(struct svga_context *svga, + enum svga_pipe_type); + +enum pipe_error +svga_validate_shader_buffers(struct svga_context *svga, + enum svga_pipe_type); + +void +svga_destroy_rawbuf_srv(struct svga_context *svga); + +void +svga_uav_cache_init(struct svga_context *svga); + +void +svga_destroy_rawbuf_srv(struct svga_context *svga); + + /*********************************************************************** * Inline conversion functions. These are better-typed than the * macros used previously: @@ -816,6 +987,12 @@ svga_have_sm5(const struct svga_context *svga) } static inline boolean +svga_have_gl43(const struct svga_context *svga) +{ + return svga_screen(svga->pipe.screen)->sws->have_gl43; +} + +static inline boolean svga_need_to_rebind_resources(const struct svga_context *svga) { return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources; @@ -827,6 +1004,21 @@ svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2) return memcmp(r1, r2, sizeof(*r1)) == 0; } + +/* A helper function to return TRUE if sampler state mapping is + * to be used. Sampler state mapping is used in GL43 context + * if the number of sampler states exceeds the SVGA device limit or + * the sampler state mapping environment variable is set. + */ +static inline boolean +svga_use_sampler_state_mapping(const struct svga_context *svga, + unsigned num_sampler_states) +{ + return svga_have_gl43(svga) && + (svga_screen(svga->pipe.screen)->debug.sampler_state_mapping || + num_sampler_states > SVGA3D_DX_MAX_SAMPLERS); +} + /** * If the Gallium HUD is enabled, this will return the current time. * Otherwise, just return zero. diff --git a/lib/mesa/src/gallium/drivers/svga/svga_debug.h b/lib/mesa/src/gallium/drivers/svga/svga_debug.h index cdad858b0..8b43279a9 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_debug.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_debug.h @@ -46,6 +46,8 @@ #define DEBUG_CACHE 0x8000 #define DEBUG_STREAMOUT 0x10000 #define DEBUG_SAMPLERS 0x20000 +#define DEBUG_IMAGE 0x40000 +#define DEBUG_UAV 0x80000 #define DEBUG_RETRY 0x100000 #ifdef DEBUG diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_draw.c index 0d6fb987e..284622762 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw.c @@ -342,14 +342,30 @@ xlate_index_format(unsigned indexWidth) } -static enum pipe_error -validate_sampler_resources(struct svga_context *svga) +/** + * A helper function to validate sampler view resources to ensure any + * pending updates to buffers will be emitted before they are referenced + * at draw or dispatch time. It also rebinds the resources if needed. + */ +enum pipe_error +svga_validate_sampler_resources(struct svga_context *svga, + enum svga_pipe_type pipe_type) { - enum pipe_shader_type shader; + enum pipe_shader_type shader, first_shader, last_shader; assert(svga_have_vgpu10(svga)); - for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) { + if (pipe_type == SVGA_PIPE_GRAPHICS) { + first_shader = PIPE_SHADER_VERTEX; + last_shader = PIPE_SHADER_TESS_EVAL; + } + else { + assert(svga_have_gl43(svga)); + first_shader = PIPE_SHADER_COMPUTE; + last_shader = PIPE_SHADER_COMPUTE; + } + + for (shader = first_shader; shader <= last_shader; shader++) { unsigned count = svga->curr.num_sampler_views[shader]; unsigned i; struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS]; @@ -409,14 +425,31 @@ validate_sampler_resources(struct svga_context *svga) } -static enum pipe_error -validate_constant_buffers(struct svga_context *svga) +/** + * A helper function to validate constant buffers to ensure any + * pending updates to the buffers will be emitted before they are referenced + * at draw or dispatch time. It also rebinds the resources if needed. + */ +enum pipe_error +svga_validate_constant_buffers(struct svga_context *svga, + enum svga_pipe_type pipe_type) { - enum pipe_shader_type shader; + enum pipe_shader_type shader, first_shader, last_shader; assert(svga_have_vgpu10(svga)); - for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) { + if (pipe_type == SVGA_PIPE_GRAPHICS) { + first_shader = PIPE_SHADER_VERTEX; + last_shader = PIPE_SHADER_TESS_EVAL; + } + else { + assert(svga_have_gl43(svga)); + first_shader = PIPE_SHADER_COMPUTE; + last_shader = PIPE_SHADER_COMPUTE; + } + + for (shader = first_shader; shader <= last_shader; shader++) { + enum pipe_error ret; struct svga_buffer *buffer; @@ -463,6 +496,28 @@ validate_constant_buffers(struct svga_context *svga) return ret; } } + + /* Reference raw constant buffers as they are not included in the + * hw constant buffers list. + */ + unsigned enabled_rawbufs = svga->state.hw_draw.enabled_rawbufs[shader] & ~1u; + while (enabled_rawbufs) { + unsigned i = u_bit_scan(&enabled_rawbufs); + buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer); + + assert(buffer != NULL); + handle = svga_buffer_handle(svga, &buffer->b, + PIPE_BIND_SAMPLER_VIEW); + + if (svga->rebind.flags.constbufs && handle) { + ret = svga->swc->resource_rebind(svga->swc, + handle, + NULL, + SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; + } + } } svga->rebind.flags.constbufs = FALSE; @@ -471,6 +526,94 @@ validate_constant_buffers(struct svga_context *svga) /** + * A helper function to validate image view resources to ensure any + * pending updates to buffers will be emitted before they are referenced + * at draw or dispatch time. It also rebinds the resources if needed. + */ +enum pipe_error +svga_validate_image_views(struct svga_context *svga, + enum svga_pipe_type pipe_type) +{ + enum pipe_shader_type shader, first_shader, last_shader; + bool rebind = svga->rebind.flags.images; + enum pipe_error ret; + + assert(svga_have_gl43(svga)); + + if (pipe_type == SVGA_PIPE_GRAPHICS) { + first_shader = PIPE_SHADER_VERTEX; + last_shader = PIPE_SHADER_TESS_EVAL; + } + else { + first_shader = PIPE_SHADER_COMPUTE; + last_shader = PIPE_SHADER_COMPUTE; + } + + for (shader = first_shader; shader <= last_shader; shader++) { + ret = svga_validate_image_view_resources(svga, + svga->state.hw_draw.num_image_views[shader], + &svga->state.hw_draw.image_views[shader][0], rebind); + + if (ret != PIPE_OK) + return ret; + } + + svga->rebind.flags.images = FALSE; + + return PIPE_OK; +} + + +/** + * A helper function to validate shader buffer and atomic buffer resources to + * ensure any pending updates to buffers will be emitted before they are + * referenced at draw or dispatch time. It also rebinds the resources if needed. + */ +enum pipe_error +svga_validate_shader_buffers(struct svga_context *svga, + enum svga_pipe_type pipe_type) +{ + enum pipe_shader_type shader, first_shader, last_shader; + bool rebind = svga->rebind.flags.shaderbufs; + enum pipe_error ret; + + assert(svga_have_gl43(svga)); + + if (pipe_type == SVGA_PIPE_GRAPHICS) { + first_shader = PIPE_SHADER_VERTEX; + last_shader = PIPE_SHADER_TESS_EVAL; + } + else { + first_shader = PIPE_SHADER_COMPUTE; + last_shader = PIPE_SHADER_COMPUTE; + } + + for (shader = first_shader; shader <= last_shader; shader++) { + ret = svga_validate_shader_buffer_resources(svga, + svga->state.hw_draw.num_shader_buffers[shader], + &svga->state.hw_draw.shader_buffers[shader][0], rebind); + + if (ret != PIPE_OK) + return ret; + } + + svga->rebind.flags.shaderbufs = FALSE; + + ret = svga_validate_shader_buffer_resources(svga, + svga->state.hw_draw.num_atomic_buffers, + svga->state.hw_draw.atomic_buffers, + svga->rebind.flags.atomicbufs); + + if (ret != PIPE_OK) + return ret; + + svga->rebind.flags.atomicbufs = FALSE; + + return PIPE_OK; +} + + +/** * Was the last command put into the command buffer a drawing command? * We use this to determine if we can skip emitting buffer re-bind * commands when we have a sequence of drawing commands that use the @@ -505,9 +648,9 @@ last_command_was_draw(const struct svga_context *svga) */ static boolean vertex_buffers_equal(unsigned count, - SVGA3dVertexBuffer *pVBufAttr1, + SVGA3dVertexBuffer_v2 *pVBufAttr1, struct pipe_resource **pVBuf1, - SVGA3dVertexBuffer *pVBufAttr2, + SVGA3dVertexBuffer_v2 *pVBufAttr2, struct pipe_resource **pVBuf2) { return (memcmp(pVBufAttr1, pVBufAttr2, @@ -526,24 +669,58 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, struct svga_context *svga = hwtnl->svga; struct pipe_resource *vbuffers[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *vbuffer_handles[SVGA3D_INPUTREG_MAX]; - struct svga_winsys_surface *so_vertex_count_handle; + struct svga_winsys_surface *so_vertex_count_handle = NULL; const unsigned vbuf_count = so_vertex_count ? 1 : hwtnl->cmd.vbuf_count; + SVGA3dVertexBuffer_v2 vbuffer_attrs[PIPE_MAX_ATTRIBS]; int last_vbuf = -1; unsigned i; assert(svga_have_vgpu10(svga)); + /* setup vertex attribute input layout */ + if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) { + enum pipe_error ret = + SVGA3D_vgpu10_SetInputLayout(svga->swc, + hwtnl->cmd.vdecl_layout_id); + if (ret != PIPE_OK) + return ret; + + svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id; + } + /* Get handle for each referenced vertex buffer, unless we're using a * stream-out buffer to specify the drawing information (DrawAuto). + * Also set up the buffer attributes. */ if (so_vertex_count) { - i = 0; + so_vertex_count_handle = svga_buffer_handle(svga, + so_vertex_count->buffer, + (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_STREAM_OUTPUT)); + if (!so_vertex_count_handle) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* Set IA slot0 input buffer to the SO buffer */ + assert(vbuf_count == 1); + vbuffer_attrs[0].stride = hwtnl->cmd.vbufs[0].stride; + vbuffer_attrs[0].offset = hwtnl->cmd.vbufs[0].buffer_offset; + vbuffer_attrs[0].sid = 0; + assert(so_vertex_count->buffer != NULL); + vbuffer_attrs[0].sizeInBytes = svga_buffer(so_vertex_count->buffer)->size; + vbuffers[0] = so_vertex_count->buffer; + vbuffer_handles[0] = so_vertex_count_handle; + + i = 1; } else { for (i = 0; i < vbuf_count; i++) { struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer.resource); + vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride; + vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; + vbuffer_attrs[i].sid = 0; + if (sbuf) { vbuffer_handles[i] = svga_buffer_handle(svga, &sbuf->b, PIPE_BIND_VERTEX_BUFFER); @@ -552,17 +729,25 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, return PIPE_ERROR_OUT_OF_MEMORY; vbuffers[i] = &sbuf->b; last_vbuf = i; + + vbuffer_attrs[i].sizeInBytes = sbuf->size; } else { vbuffers[i] = NULL; vbuffer_handles[i] = NULL; + vbuffer_attrs[i].sizeInBytes = 0; } } } + /* Unbind the unreferenced the vertex buffer handles */ for (; i < svga->state.hw_draw.num_vbuffers; i++) { vbuffers[i] = NULL; vbuffer_handles[i] = NULL; + vbuffer_attrs[i].sid = 0; + vbuffer_attrs[i].stride = 0; + vbuffer_attrs[i].offset = 0; + vbuffer_attrs[i].sizeInBytes = 0; } /* Get handle for each referenced vertex buffer */ @@ -616,25 +801,6 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, /* setup vertex buffers */ { - SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS]; - - if (so_vertex_count) { - /* Set IA slot0 input buffer to the SO buffer */ - assert(vbuf_count == 1); - vbuffer_attrs[0].stride = hwtnl->cmd.vbufs[0].stride; - vbuffer_attrs[0].offset = hwtnl->cmd.vbufs[0].buffer_offset; - vbuffer_attrs[0].sid = 0; - vbuffers[0] = so_vertex_count->buffer; - vbuffer_handles[0] = so_vertex_count_handle; - } - else { - for (i = 0; i < vbuf_count; i++) { - vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride; - vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; - vbuffer_attrs[i].sid = 0; - } - } - /* If any of the vertex buffer state has changed, issue * the SetVertexBuffers command. Otherwise, we will just * need to rebind the resources. @@ -654,20 +820,13 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, */ num_vbuffers = MAX2(vbuf_count, svga->state.hw_draw.num_vbuffers); - /* Zero-out the old buffers we want to unbind (the number of loop - * iterations here is typically very small, and often zero.) - */ - for (i = vbuf_count; i < num_vbuffers; i++) { - vbuffer_attrs[i].sid = 0; - vbuffer_attrs[i].stride = 0; - vbuffer_attrs[i].offset = 0; - vbuffer_handles[i] = NULL; - } - if (num_vbuffers > 0) { - SVGA3dVertexBuffer *pbufAttrs = vbuffer_attrs; + SVGA3dVertexBuffer_v2 *pbufAttrs = vbuffer_attrs; struct svga_winsys_surface **pbufHandles = vbuffer_handles; unsigned numVBuf = 0; + boolean emitVBufs = + !svga_sws(svga)->have_index_vertex_buffer_offset_cmd || + svga->rebind.flags.vertexbufs; /* Loop through the vertex buffer lists to only emit * those vertex buffers that are not already in the @@ -681,6 +840,10 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, &svga->state.hw_draw.vbuffer_attrs[i], &svga->state.hw_draw.vbuffers[i]); + /* Check if we can use the SetVertexBuffersOffsetAndSize command */ + emitVBufs = emitVBufs || + (vbuffers[i] != svga->state.hw_draw.vbuffers[i]); + if (!emit && i == num_vbuffers-1) { /* Include the last vertex buffer in the next emit * if it is different. @@ -696,11 +859,23 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, * In this case, there is nothing to send yet. */ if (numVBuf) { - enum pipe_error ret = - SVGA3D_vgpu10_SetVertexBuffers(svga->swc, - numVBuf, - i - numVBuf, - pbufAttrs, pbufHandles); + enum pipe_error ret; + + /* If all vertex buffers handle are the same as the one + * in the device, just use the + * SetVertexBuffersOffsetAndSize comand. + */ + if (emitVBufs) { + ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, + numVBuf, + i - numVBuf, + pbufAttrs, pbufHandles); + } else { + ret = SVGA3D_vgpu10_SetVertexBuffersOffsetAndSize(svga->swc, + numVBuf, + i - numVBuf, + pbufAttrs); + } if (ret != PIPE_OK) return ret; } @@ -740,6 +915,8 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, } } + svga->rebind.flags.vertexbufs = FALSE; + return PIPE_OK; } @@ -755,6 +932,7 @@ validate_index_buffer(struct svga_hwtnl *hwtnl, struct svga_context *svga = hwtnl->svga; struct svga_winsys_surface *ib_handle = svga_buffer_handle(svga, ib, PIPE_BIND_INDEX_BUFFER); + enum pipe_error ret; if (!ib_handle) return PIPE_ERROR_OUT_OF_MEMORY; @@ -770,12 +948,26 @@ validate_index_buffer(struct svga_hwtnl *hwtnl, range->indexArray.offset != svga->state.hw_draw.ib_offset) { assert(indexFormat != SVGA3D_FORMAT_INVALID); - enum pipe_error ret = - SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle, - indexFormat, - range->indexArray.offset); - if (ret != PIPE_OK) - return ret; + + if ((ib == svga->state.hw_draw.ib) && + svga_sws(hwtnl->svga)->have_index_vertex_buffer_offset_cmd && + !svga->rebind.flags.indexbuf) { + + ret = SVGA3D_vgpu10_SetIndexBufferOffsetAndSize(svga->swc, + indexFormat, + range->indexArray.offset, + sbuf->size); + if (ret != PIPE_OK) + return ret; + } + else { + + ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle, + indexFormat, + range->indexArray.offset); + if (ret != PIPE_OK) + return ret; + } pipe_resource_reference(&svga->state.hw_draw.ib, ib); svga->state.hw_draw.ib_format = indexFormat; @@ -795,6 +987,8 @@ validate_index_buffer(struct svga_hwtnl *hwtnl, } } + svga->rebind.flags.indexbuf = FALSE; + return PIPE_OK; } @@ -842,14 +1036,30 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, */ } - ret = validate_sampler_resources(svga); + ret = svga_validate_sampler_resources(svga, SVGA_PIPE_GRAPHICS); if (ret != PIPE_OK) return ret; - ret = validate_constant_buffers(svga); + ret = svga_validate_constant_buffers(svga, SVGA_PIPE_GRAPHICS); if (ret != PIPE_OK) return ret; + if (svga_have_gl43(svga)) { + ret = svga_validate_image_views(svga, SVGA_PIPE_GRAPHICS); + if (ret != PIPE_OK) + return ret; + + ret = svga_validate_shader_buffers(svga, SVGA_PIPE_GRAPHICS); + if (ret != PIPE_OK) + return ret; + + if (svga->rebind.flags.uav) { + ret= svga_rebind_uav(svga); + if (ret != PIPE_OK) + return ret; + } + } + ret = validate_vertex_buffers(hwtnl, so_vertex_count); if (ret != PIPE_OK) return ret; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_format.c b/lib/mesa/src/gallium/drivers/svga/svga_format.c index 832c50e7c..6cfc92b6f 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_format.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_format.c @@ -61,9 +61,9 @@ static const struct vgpu10_format_entry format_conversion_table[] = [ PIPE_FORMAT_B8G8R8X8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8X8_UNORM, SVGA3D_B8G8R8X8_UNORM, TF_GEN_MIPS }, [ PIPE_FORMAT_B5G5R5A1_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B5G5R5A1_UNORM, SVGA3D_B5G5R5A1_UNORM, TF_GEN_MIPS }, [ PIPE_FORMAT_B5G6R5_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B5G6R5_UNORM, SVGA3D_B5G6R5_UNORM, TF_GEN_MIPS }, - [ PIPE_FORMAT_R10G10B10A2_UNORM ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, TF_GEN_MIPS }, + [ PIPE_FORMAT_R10G10B10A2_UNORM ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_L8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UNORM, TF_XXX1 }, - [ PIPE_FORMAT_A8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS | TF_000X}, + [ PIPE_FORMAT_A8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS | TF_000X | TF_UAV }, [ PIPE_FORMAT_I8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UNORM, TF_XXXX }, [ PIPE_FORMAT_L8A8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8G8_UNORM, TF_XXXY }, [ PIPE_FORMAT_L16_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R16_UNORM, TF_XXX1 }, @@ -71,10 +71,10 @@ static const struct vgpu10_format_entry format_conversion_table[] = [ PIPE_FORMAT_Z32_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, SVGA3D_D32_FLOAT, 0 }, [ PIPE_FORMAT_Z24_UNORM_S8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D24_UNORM_S8_UINT, 0 }, [ PIPE_FORMAT_Z24X8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D24_UNORM_S8_UINT, 0 }, - [ PIPE_FORMAT_R32_FLOAT ] = { SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS }, - [ PIPE_FORMAT_R32G32_FLOAT ] = { SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS }, + [ PIPE_FORMAT_R32_FLOAT ] = { SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS | TF_UAV }, + [ PIPE_FORMAT_R32G32_FLOAT ] = { SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_R32G32B32_FLOAT ] = { SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, TF_GEN_MIPS }, - [ PIPE_FORMAT_R32G32B32A32_FLOAT ] = { SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS }, + [ PIPE_FORMAT_R32G32B32A32_FLOAT ] = { SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_R32_USCALED ] = { SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, [ PIPE_FORMAT_R32G32_USCALED ] = { SVGA3D_R32G32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, [ PIPE_FORMAT_R32G32B32_USCALED ] = { SVGA3D_R32G32B32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, @@ -83,45 +83,42 @@ static const struct vgpu10_format_entry format_conversion_table[] = [ PIPE_FORMAT_R32G32_SSCALED ] = { SVGA3D_R32G32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, [ PIPE_FORMAT_R32G32B32_SSCALED ] = { SVGA3D_R32G32B32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, [ PIPE_FORMAT_R32G32B32A32_SSCALED ] = { SVGA3D_R32G32B32A32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, - [ PIPE_FORMAT_R16_UNORM ] = { SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, TF_GEN_MIPS }, - [ PIPE_FORMAT_R16G16_UNORM ] = { SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, TF_GEN_MIPS }, + [ PIPE_FORMAT_R16_UNORM ] = { SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, TF_GEN_MIPS | TF_UAV }, + [ PIPE_FORMAT_R16G16_UNORM ] = { SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_R16G16B16_UNORM ] = { SVGA3D_R16G16B16A16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R16G16B16A16_UNORM ] = { SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, TF_GEN_MIPS }, + [ PIPE_FORMAT_R16G16B16A16_UNORM ] = { SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_R16_USCALED ] = { SVGA3D_R16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, [ PIPE_FORMAT_R16G16_USCALED ] = { SVGA3D_R16G16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, [ PIPE_FORMAT_R16G16B16_USCALED ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_U_TO_F_CAST }, [ PIPE_FORMAT_R16G16B16A16_USCALED ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, - [ PIPE_FORMAT_R16_SNORM ] = { SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, 0 }, - [ PIPE_FORMAT_R16G16_SNORM ] = { SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, 0 }, + [ PIPE_FORMAT_R16_SNORM ] = { SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, TF_UAV }, + [ PIPE_FORMAT_R16G16_SNORM ] = { SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, TF_UAV }, [ PIPE_FORMAT_R16G16B16_SNORM ] = { SVGA3D_R16G16B16A16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R16G16B16A16_SNORM ] = { SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, 0 }, + [ PIPE_FORMAT_R16G16B16A16_SNORM ] = { SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, TF_UAV }, [ PIPE_FORMAT_R16_SSCALED ] = { SVGA3D_R16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, [ PIPE_FORMAT_R16G16_SSCALED ] = { SVGA3D_R16G16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, [ PIPE_FORMAT_R16G16B16_SSCALED ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_I_TO_F_CAST }, [ PIPE_FORMAT_R16G16B16A16_SSCALED ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, - [ PIPE_FORMAT_R8_UNORM ] = { SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS }, - [ PIPE_FORMAT_R8G8_UNORM ] = { SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, TF_GEN_MIPS }, + [ PIPE_FORMAT_R8_UNORM ] = { SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, TF_GEN_MIPS | TF_UAV }, + [ PIPE_FORMAT_R8G8_UNORM ] = { SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_R8G8B8_UNORM ] = { SVGA3D_R8G8B8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R8G8B8A8_UNORM ] = { SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, TF_GEN_MIPS }, + [ PIPE_FORMAT_R8G8B8A8_UNORM ] = { SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_R8_USCALED ] = { SVGA3D_R8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, [ PIPE_FORMAT_R8G8_USCALED ] = { SVGA3D_R8G8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, [ PIPE_FORMAT_R8G8B8_USCALED ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_U_TO_F_CAST }, [ PIPE_FORMAT_R8G8B8A8_USCALED ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, - - [ PIPE_FORMAT_R8_SNORM ] = { SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, 0 }, - [ PIPE_FORMAT_R8G8_SNORM ] = { SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, 0 }, + [ PIPE_FORMAT_R8_SNORM ] = { SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, TF_UAV }, + [ PIPE_FORMAT_R8G8_SNORM ] = { SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, TF_UAV }, [ PIPE_FORMAT_R8G8B8_SNORM ] = { SVGA3D_R8G8B8A8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R8G8B8A8_SNORM ] = { SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, 0 }, - + [ PIPE_FORMAT_R8G8B8A8_SNORM ] = { SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, TF_UAV }, [ PIPE_FORMAT_R8_SSCALED ] = { SVGA3D_R8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, [ PIPE_FORMAT_R8G8_SSCALED ] = { SVGA3D_R8G8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, [ PIPE_FORMAT_R8G8B8_SSCALED ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_I_TO_F_CAST }, [ PIPE_FORMAT_R8G8B8A8_SSCALED ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST }, - - [ PIPE_FORMAT_R16_FLOAT ] = { SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, TF_GEN_MIPS }, - [ PIPE_FORMAT_R16G16_FLOAT ] = { SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, TF_GEN_MIPS }, + [ PIPE_FORMAT_R16_FLOAT ] = { SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, TF_GEN_MIPS | TF_UAV }, + [ PIPE_FORMAT_R16G16_FLOAT ] = { SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_R16G16B16_FLOAT ] = { SVGA3D_R16G16B16A16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R16G16B16A16_FLOAT ] = { SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, TF_GEN_MIPS }, + [ PIPE_FORMAT_R16G16B16A16_FLOAT ] = { SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_B8G8R8A8_SRGB ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3D_FORMAT_INVALID, TF_GEN_MIPS }, [ PIPE_FORMAT_B8G8R8X8_SRGB ] = { SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3D_FORMAT_INVALID, TF_GEN_MIPS }, [ PIPE_FORMAT_R8G8B8A8_SRGB ] = { SVGA3D_FORMAT_INVALID, SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3D_FORMAT_INVALID, TF_GEN_MIPS }, @@ -138,7 +135,7 @@ static const struct vgpu10_format_entry format_conversion_table[] = [ PIPE_FORMAT_RGTC2_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC5_UNORM, SVGA3D_FORMAT_INVALID, 0 }, [ PIPE_FORMAT_RGTC2_SNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC5_SNORM, SVGA3D_FORMAT_INVALID, 0 }, [ PIPE_FORMAT_R10G10B10A2_USCALED ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_PUINT_TO_USCALED }, - [ PIPE_FORMAT_R11G11B10_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_R11G11B10_FLOAT, SVGA3D_R11G11B10_FLOAT, TF_GEN_MIPS }, + [ PIPE_FORMAT_R11G11B10_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_R11G11B10_FLOAT, SVGA3D_R11G11B10_FLOAT, TF_GEN_MIPS | TF_UAV }, [ PIPE_FORMAT_R9G9B9E5_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3D_FORMAT_INVALID, 0 }, [ PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3D_FORMAT_INVALID, 0 }, [ PIPE_FORMAT_B10G10R10A2_UNORM ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_BGRA }, @@ -158,30 +155,30 @@ static const struct vgpu10_format_entry format_conversion_table[] = [ PIPE_FORMAT_B10G10R10A2_USCALED ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_USCALED }, [ PIPE_FORMAT_B10G10R10A2_SSCALED ] = { SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_SSCALED }, [ PIPE_FORMAT_B10G10R10A2_SNORM ] = { SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_SNORM }, - [ PIPE_FORMAT_R8_UINT ] = { SVGA3D_R8_UINT, SVGA3D_R8_UINT, SVGA3D_R8_UINT, 0 }, - [ PIPE_FORMAT_R8G8_UINT ] = { SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, 0 }, + [ PIPE_FORMAT_R8_UINT ] = { SVGA3D_R8_UINT, SVGA3D_R8_UINT, SVGA3D_R8_UINT, TF_UAV }, + [ PIPE_FORMAT_R8G8_UINT ] = { SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, TF_UAV }, [ PIPE_FORMAT_R8G8B8_UINT ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R8G8B8A8_UINT ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, 0 }, - [ PIPE_FORMAT_R8_SINT ] = { SVGA3D_R8_SINT, SVGA3D_R8_SINT, SVGA3D_R8_SINT, 0 }, - [ PIPE_FORMAT_R8G8_SINT ] = { SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, 0 }, + [ PIPE_FORMAT_R8G8B8A8_UINT ] = { SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, TF_UAV }, + [ PIPE_FORMAT_R8_SINT ] = { SVGA3D_R8_SINT, SVGA3D_R8_SINT, SVGA3D_R8_SINT, TF_UAV }, + [ PIPE_FORMAT_R8G8_SINT ] = { SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, TF_UAV }, [ PIPE_FORMAT_R8G8B8_SINT ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R8G8B8A8_SINT ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, 0 }, - [ PIPE_FORMAT_R16_UINT ] = { SVGA3D_R16_UINT, SVGA3D_R16_UINT, SVGA3D_R16_UINT, 0 }, - [ PIPE_FORMAT_R16G16_UINT ] = { SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, 0 }, + [ PIPE_FORMAT_R8G8B8A8_SINT ] = { SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, TF_UAV }, + [ PIPE_FORMAT_R16_UINT ] = { SVGA3D_R16_UINT, SVGA3D_R16_UINT, SVGA3D_R16_UINT, TF_UAV }, + [ PIPE_FORMAT_R16G16_UINT ] = { SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, TF_UAV }, [ PIPE_FORMAT_R16G16B16_UINT ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R16G16B16A16_UINT ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, 0 }, - [ PIPE_FORMAT_R16_SINT ] = { SVGA3D_R16_SINT, SVGA3D_R16_SINT, SVGA3D_R16_SINT, 0 }, - [ PIPE_FORMAT_R16G16_SINT ] = { SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, 0 }, + [ PIPE_FORMAT_R16G16B16A16_UINT ] = { SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, TF_UAV }, + [ PIPE_FORMAT_R16_SINT ] = { SVGA3D_R16_SINT, SVGA3D_R16_SINT, SVGA3D_R16_SINT, TF_UAV }, + [ PIPE_FORMAT_R16G16_SINT ] = { SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, TF_UAV }, [ PIPE_FORMAT_R16G16B16_SINT ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_W_TO_1 }, - [ PIPE_FORMAT_R16G16B16A16_SINT ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, 0 }, - [ PIPE_FORMAT_R32_UINT ] = { SVGA3D_R32_UINT, SVGA3D_R32_UINT, SVGA3D_R32_UINT, 0 }, - [ PIPE_FORMAT_R32G32_UINT ] = { SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, 0 }, + [ PIPE_FORMAT_R16G16B16A16_SINT ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, TF_UAV }, + [ PIPE_FORMAT_R32_UINT ] = { SVGA3D_R32_UINT, SVGA3D_R32_UINT, SVGA3D_R32_UINT, TF_UAV }, + [ PIPE_FORMAT_R32G32_UINT ] = { SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, TF_UAV }, [ PIPE_FORMAT_R32G32B32_UINT ] = { SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, 0 }, - [ PIPE_FORMAT_R32G32B32A32_UINT ] = { SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, 0 }, - [ PIPE_FORMAT_R32_SINT ] = { SVGA3D_R32_SINT, SVGA3D_R32_SINT, SVGA3D_R32_SINT, 0 }, - [ PIPE_FORMAT_R32G32_SINT ] = { SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, 0 }, + [ PIPE_FORMAT_R32G32B32A32_UINT ] = { SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, TF_UAV }, + [ PIPE_FORMAT_R32_SINT ] = { SVGA3D_R32_SINT, SVGA3D_R32_SINT, SVGA3D_R32_SINT, TF_UAV }, + [ PIPE_FORMAT_R32G32_SINT ] = { SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, TF_UAV }, [ PIPE_FORMAT_R32G32B32_SINT ] = { SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, 0 }, - [ PIPE_FORMAT_R32G32B32A32_SINT ] = { SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, 0 }, + [ PIPE_FORMAT_R32G32B32A32_SINT ] = { SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, TF_UAV }, [ PIPE_FORMAT_A8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_000X }, [ PIPE_FORMAT_I8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_XXXX }, [ PIPE_FORMAT_L8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_XXX1 }, @@ -206,7 +203,13 @@ static const struct vgpu10_format_entry format_conversion_table[] = [ PIPE_FORMAT_I32_SINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R32_SINT, TF_XXXX }, [ PIPE_FORMAT_L32_SINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R32_SINT, TF_XXX1 }, [ PIPE_FORMAT_L32A32_SINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R32G32_SINT, TF_XXXY }, - [ PIPE_FORMAT_R10G10B10A2_UINT ] = { SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, 0 }, + [ PIPE_FORMAT_R10G10B10A2_UINT ] = { SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, TF_UAV }, + [ PIPE_FORMAT_BPTC_RGBA_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC7_UNORM, SVGA3D_FORMAT_INVALID, TF_SM5 }, + [ PIPE_FORMAT_BPTC_SRGBA ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC7_UNORM_SRGB, SVGA3D_FORMAT_INVALID, TF_SM5 }, + [ PIPE_FORMAT_BPTC_RGB_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC6H_SF16, SVGA3D_FORMAT_INVALID, TF_SM5 }, + [ PIPE_FORMAT_BPTC_RGB_UFLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_BC6H_UF16, SVGA3D_FORMAT_INVALID, TF_SM5 }, + [ PIPE_FORMAT_X24S8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_X24_G8_UINT, SVGA3D_FORMAT_INVALID, 0 }, + [ PIPE_FORMAT_X32_S8X24_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_X32_G8X24_UINT, SVGA3D_FORMAT_INVALID, 0 }, /* Must specify following entry to give the sense of size of format_conversion_table[] */ [ PIPE_FORMAT_COUNT ] = {SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, }; @@ -298,8 +301,18 @@ svga_translate_format(const struct svga_screen *ss, else if (bind & PIPE_BIND_SCANOUT) { return svga_translate_screen_target_format_vgpu10(format); } + else if (bind & PIPE_BIND_SHADER_IMAGE) { + if (format_conversion_table[format].flags & TF_UAV) + return format_conversion_table[format].pixel_format; + else + return SVGA3D_FORMAT_INVALID; + } else { - return entry->pixel_format; + if ((format_conversion_table[format].flags & TF_SM5) && + !ss->sws->have_sm5) + return SVGA3D_FORMAT_INVALID; + else + return entry->pixel_format; } } @@ -784,10 +797,8 @@ static const struct format_cap format_cap_table[] = { 0, 0, 0, 0 }, { - "SVGA3D_AYUV", - SVGA3D_AYUV, - SVGA3D_DEVCAP_SURFACEFMT_AYUV, - 0, 0, 0, 0 + "SVGA3D_FORMAT_DEAD2", + SVGA3D_FORMAT_DEAD2, 0, 0, 0, 0, 0 }, { "SVGA3D_R32G32B32A32_TYPELESS", @@ -1369,6 +1380,59 @@ static const struct format_cap format_cap_table[] = { SVGA3D_BC5_UNORM, SVGA3D_DEVCAP_DXFMT_BC5_UNORM, 4, 4, 16, 0 + }, + { + "SVGA3D_B4G4R4A4_UNORM", + SVGA3D_B4G4R4A4_UNORM, + 0, 0, 0, 0 + }, + { + "SVGA3D_BC6H_TYPELESS", + SVGA3D_BC6H_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC6H_TYPELESS, + 4, 4, 16, 0 + }, + { + "SVGA3D_BC6H_UF16", + SVGA3D_BC6H_UF16, + SVGA3D_DEVCAP_DXFMT_BC6H_UF16, + 4, 4, 16, 0 + }, + { + "SVGA3D_BC6H_SF16", + SVGA3D_BC6H_SF16, + SVGA3D_DEVCAP_DXFMT_BC6H_SF16, + 4, 4, 16, 0 + }, + { + "SVGA3D_BC7_TYPELESS", + SVGA3D_BC7_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC7_TYPELESS, + 4, 4, 16, 0 + }, + { + "SVGA3D_BC7_UNORM", + SVGA3D_BC7_UNORM, + SVGA3D_DEVCAP_DXFMT_BC6H_TYPELESS, + 4, 4, 16, 0 + }, + { + "SVGA3D_BC7_UNORM_SRGB", + SVGA3D_BC7_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_BC6H_TYPELESS, + 4, 4, 16, 0 + }, + { + "SVGA3D_AYUV", + SVGA3D_AYUV, + 0, + 1, 1, 4, 0 + }, + { + "SVGA3D_R11G11B10_TYPELESS", + SVGA3D_R11G11B10_TYPELESS, + SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT, + 1, 1, 4, 0 } }; @@ -1470,7 +1534,6 @@ svga_devcap_name(SVGA3dDevCapIndex cap) DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_UYVY), DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_YUY2), DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_NV12), - DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_AYUV), DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS), DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT), DEBUG_NAMED_VALUE(SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT), @@ -1662,7 +1725,7 @@ svga_get_dx_format_cap(struct svga_screen *ss, if (entry->devcap) { sws->get_cap(sws, entry->devcap, caps); - /* pre-SM41 capabable svga device supports SHADER_SAMPLE capability for + /* pre-SM41 capable svga device supports SHADER_SAMPLE capability for * these formats but does not advertise the devcap. * So enable this bit here. */ @@ -1672,6 +1735,9 @@ svga_get_dx_format_cap(struct svga_screen *ss, caps->u |= SVGA3D_DXFMT_SHADER_SAMPLE; } } + else { + caps->u = entry->defaultOperations; + } if (0) { debug_printf("Format %s, devcap %s = 0x%x (%s)\n", @@ -1853,20 +1919,24 @@ svga_typeless_format(SVGA3dSurfaceFormat format) case SVGA3D_R32G32B32A32_UINT: case SVGA3D_R32G32B32A32_SINT: case SVGA3D_R32G32B32A32_FLOAT: + case SVGA3D_R32G32B32A32_TYPELESS: return SVGA3D_R32G32B32A32_TYPELESS; case SVGA3D_R32G32B32_FLOAT: case SVGA3D_R32G32B32_UINT: case SVGA3D_R32G32B32_SINT: + case SVGA3D_R32G32B32_TYPELESS: return SVGA3D_R32G32B32_TYPELESS; case SVGA3D_R16G16B16A16_UINT: case SVGA3D_R16G16B16A16_UNORM: case SVGA3D_R16G16B16A16_SNORM: case SVGA3D_R16G16B16A16_SINT: case SVGA3D_R16G16B16A16_FLOAT: + case SVGA3D_R16G16B16A16_TYPELESS: return SVGA3D_R16G16B16A16_TYPELESS; case SVGA3D_R32G32_UINT: case SVGA3D_R32G32_SINT: case SVGA3D_R32G32_FLOAT: + case SVGA3D_R32G32_TYPELESS: return SVGA3D_R32G32_TYPELESS; case SVGA3D_D32_FLOAT_S8X24_UINT: case SVGA3D_X32_G8X24_UINT: @@ -1874,6 +1944,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format) return SVGA3D_R32G8X24_TYPELESS; case SVGA3D_R10G10B10A2_UINT: case SVGA3D_R10G10B10A2_UNORM: + case SVGA3D_R10G10B10A2_TYPELESS: return SVGA3D_R10G10B10A2_TYPELESS; case SVGA3D_R8G8B8A8_UNORM: case SVGA3D_R8G8B8A8_SNORM: @@ -1887,6 +1958,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format) case SVGA3D_R16G16_UNORM: case SVGA3D_R16G16_SNORM: case SVGA3D_R16G16_FLOAT: + case SVGA3D_R16G16_TYPELESS: return SVGA3D_R16G16_TYPELESS; case SVGA3D_D32_FLOAT: case SVGA3D_R32_FLOAT: @@ -1903,6 +1975,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format) case SVGA3D_R8G8_SNORM: case SVGA3D_R8G8_UINT: case SVGA3D_R8G8_SINT: + case SVGA3D_R8G8_TYPELESS: return SVGA3D_R8G8_TYPELESS; case SVGA3D_D16_UNORM: case SVGA3D_R16_UNORM: @@ -1916,6 +1989,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format) case SVGA3D_R8_UINT: case SVGA3D_R8_SNORM: case SVGA3D_R8_SINT: + case SVGA3D_R8_TYPELESS: return SVGA3D_R8_TYPELESS; case SVGA3D_B8G8R8A8_UNORM_SRGB: case SVGA3D_B8G8R8A8_UNORM: @@ -1939,16 +2013,28 @@ svga_typeless_format(SVGA3dSurfaceFormat format) return SVGA3D_BC3_TYPELESS; case SVGA3D_BC4_UNORM: case SVGA3D_BC4_SNORM: + case SVGA3D_BC4_TYPELESS: return SVGA3D_BC4_TYPELESS; case SVGA3D_BC5_UNORM: case SVGA3D_BC5_SNORM: + case SVGA3D_BC5_TYPELESS: return SVGA3D_BC5_TYPELESS; + case SVGA3D_BC6H_UF16: + case SVGA3D_BC6H_SF16: + case SVGA3D_BC6H_TYPELESS: + return SVGA3D_BC6H_TYPELESS; + case SVGA3D_BC7_UNORM: + case SVGA3D_BC7_UNORM_SRGB: + case SVGA3D_BC7_TYPELESS: + return SVGA3D_BC7_TYPELESS; + case SVGA3D_R11G11B10_FLOAT: + case SVGA3D_R11G11B10_TYPELESS: + return SVGA3D_R11G11B10_TYPELESS; /* Special cases (no corresponding _TYPELESS formats) */ case SVGA3D_A8_UNORM: case SVGA3D_B5G5R5A1_UNORM: case SVGA3D_B5G6R5_UNORM: - case SVGA3D_R11G11B10_FLOAT: case SVGA3D_R9G9B9E5_SHAREDEXP: return format; default: @@ -2024,6 +2110,8 @@ svga_format_is_typeless(SVGA3dSurfaceFormat format) case SVGA3D_BC3_TYPELESS: case SVGA3D_BC4_TYPELESS: case SVGA3D_BC5_TYPELESS: + case SVGA3D_BC6H_TYPELESS: + case SVGA3D_BC7_TYPELESS: case SVGA3D_B8G8R8A8_TYPELESS: case SVGA3D_B8G8R8X8_TYPELESS: return true; @@ -2228,10 +2316,16 @@ svga_is_dx_format_supported(struct pipe_screen *screen, assert(bindings); assert(ss->sws->have_vgpu10); - if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) - return false; + /* To support framebuffer without attachments */ + if ((format == PIPE_FORMAT_NONE) && (bindings == PIPE_BIND_RENDER_TARGET)) + return (ss->sws->have_gl43 && (sample_count <= ss->forcedSampleCount)); if (sample_count > 1) { + + /* No MSAA support for shader image */ + if (bindings & PIPE_BIND_SHADER_IMAGE) + return false; + /* In ms_samples, if bit N is set it means that we support * multisample with N+1 samples per pixel. */ @@ -2246,7 +2340,6 @@ svga_is_dx_format_supported(struct pipe_screen *screen, */ if (bindings & PIPE_BIND_VERTEX_BUFFER) { - SVGA3dSurfaceFormat svga_format; unsigned flags; svga_translate_vertex_format_vgpu10(format, &svga_format, &flags); return svga_format != SVGA3D_FORMAT_INVALID; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_format.h b/lib/mesa/src/gallium/drivers/svga/svga_format.h index a2ef47963..dae9a556f 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_format.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_format.h @@ -60,6 +60,8 @@ struct svga_screen; #define TF_XXXX (1 << 10) /* swizzle <X, X, X, X> */ #define TF_XXX1 (1 << 11) /* swizzle <X, X, X, 1> */ #define TF_XXXY (1 << 12) /* swizzle <X, X, X, Y> */ +#define TF_UAV (1 << 13) /* supports uav */ +#define TF_SM5 (1 << 14) /* supported in SM5 */ void svga_translate_vertex_format_vgpu10(enum pipe_format format, diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c index 9cb53ef0a..d5be5697c 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blit.c @@ -103,8 +103,8 @@ intra_surface_copy(struct svga_context *svga, struct pipe_resource *tex, SVGA_RETRY(svga, SVGA3D_vgpu10_IntraSurfaceCopy(svga->swc, stex->handle, level, layer_face, &box)); - /* Mark the texture subresource as rendered-to. */ - svga_set_texture_rendered_to(stex, layer_face, level); + /* Mark the texture surface as RENDERED. */ + svga_set_texture_rendered_to(stex); } /** @@ -139,8 +139,8 @@ copy_region_vgpu10(struct svga_context *svga, struct pipe_resource *src_tex, /* Mark the texture subresource as defined. */ svga_define_texture_level(dtex, dst_layer_face, dst_level); - /* Mark the texture subresource as rendered-to. */ - svga_set_texture_rendered_to(dtex, dst_layer_face, dst_level); + /* Mark the texture surface as RENDERED. */ + svga_set_texture_rendered_to(dtex); } @@ -322,7 +322,7 @@ can_blit_via_svga_copy_region(struct svga_context *svga, local_blit.dst.format = local_blit.src.format; if (local_blit.filter == PIPE_TEX_FILTER_LINEAR) local_blit.filter = PIPE_TEX_FILTER_NEAREST; - if (!util_can_blit_via_copy_region(&local_blit, TRUE)) + if (!util_can_blit_via_copy_region(&local_blit, TRUE, svga->render_condition)) return false; /* For depth+stencil formats, copy with mask != PIPE_MASK_ZS is not @@ -504,7 +504,8 @@ try_copy_region(struct svga_context *svga, blit->src.box.depth); svga_define_texture_level(dtex, dst_layer_face, blit->dst.level); - svga_set_texture_rendered_to(dtex, dst_layer_face, blit->dst.level); + svga_set_texture_rendered_to(dtex); + return true; } @@ -634,7 +635,7 @@ try_blit(struct svga_context *svga, const struct pipe_blit_info *blit_info) util_blitter_save_depth_stencil_alpha(svga->blitter, (void*)svga->curr.depth); util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref); - util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask); + util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask, 0); util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer); util_blitter_save_fragment_sampler_states(svga->blitter, svga->curr.num_samplers[PIPE_SHADER_FRAGMENT], @@ -752,8 +753,8 @@ static bool try_cpu_copy_region(struct svga_context *svga, const struct pipe_blit_info *blit) { - if (util_can_blit_via_copy_region(blit, TRUE) || - util_can_blit_via_copy_region(blit, FALSE)) { + if (util_can_blit_via_copy_region(blit, TRUE, svga->render_condition) || + util_can_blit_via_copy_region(blit, FALSE, svga->render_condition)) { if (svga->render_condition && blit->render_condition_enable) { debug_warning("CPU copy_region doesn't support " @@ -772,6 +773,66 @@ try_cpu_copy_region(struct svga_context *svga, return false; } +/** + * A helper function to resolve a multisampled surface to a single-sampled + * surface using SVGA command ResolveCopy. + */ +static boolean +try_resolve_copy(struct svga_context *svga, + const struct pipe_blit_info *blit) +{ + enum pipe_error ret; + struct svga_texture *src_tex = svga_texture(blit->src.resource); + struct svga_texture *dst_tex = svga_texture(blit->dst.resource); + + /* check if formats are compatible for resolve copy */ + if (!formats_compatible(svga_screen(svga->pipe.screen), + src_tex->key.format, dst_tex->key.format)) + return FALSE; + + /* check if the copy dimensions are the same */ + if ((blit->src.box.x || blit->src.box.y || blit->src.box.z) || + (blit->dst.box.x || blit->dst.box.y || blit->dst.box.z) || + (blit->src.box.width != blit->dst.box.width) || + (blit->src.box.height != blit->dst.box.height) || + (blit->src.box.depth != blit->dst.box.depth)) + return FALSE; + + ret = SVGA3D_vgpu10_ResolveCopy(svga->swc, 0, dst_tex->handle, + 0, src_tex->handle, dst_tex->key.format); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_ResolveCopy(svga->swc, 0, dst_tex->handle, + 0, src_tex->handle, dst_tex->key.format); + } + + /* Mark surface state as RENDERED */ + dst_tex->surface_state = SVGA_SURFACE_STATE_RENDERED; + + return (ret == PIPE_OK); +} + + +/** + * Returns FALSE if the resource does not have data to copy. + */ +static boolean +is_texture_valid_to_copy(struct svga_context *svga, + struct pipe_resource *resource) +{ + if (resource->target == PIPE_BUFFER) { + struct svga_buffer *buf = svga_buffer(resource); + struct svga_buffer_surface *bufsurf = buf->bufsurf; + + return (bufsurf && + bufsurf->surface_state >= SVGA_SURFACE_STATE_UPDATED); + } else { + struct svga_texture *tex = svga_texture(resource); + return ((tex->surface_state >= SVGA_SURFACE_STATE_UPDATED) || + (resource->bind & PIPE_BIND_SHARED)); + } +} + /** * The pipe::blit member. @@ -794,6 +855,20 @@ svga_blit(struct pipe_context *pipe, SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_BLIT); + if (!is_texture_valid_to_copy(svga, blit->src.resource)) { + debug_printf("%s: texture is not defined to copy\n", + __FUNCTION__); + goto done; + } + + if (svga_have_sm4_1(svga) && + blit->src.resource->nr_samples > 1 && + blit->dst.resource->nr_samples <=1 && + (blit->dst.resource->bind & PIPE_BIND_DISPLAY_TARGET)) { + if (try_resolve_copy(svga, blit)) + goto done; + } + if (try_copy_region(svga, blit)) goto done; @@ -826,6 +901,12 @@ svga_resource_copy_region(struct pipe_context *pipe, SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_COPYREGION); + if (!is_texture_valid_to_copy(svga, src_tex)) { + debug_printf("%s: texture is not defined to copy\n", + __FUNCTION__); + goto done; + } + if (dst_tex->target == PIPE_BUFFER && src_tex->target == PIPE_BUFFER) { /* can't copy within the same buffer, unfortunately */ if (svga_have_vgpu10(svga) && src_tex != dst_tex) { @@ -841,6 +922,10 @@ svga_resource_copy_region(struct pipe_context *pipe, dst_surf, src_box->x, dstx, src_box->width)); dbuffer->dirty = TRUE; + + /* Mark the buffer surface as RENDERED */ + assert(dbuffer->bufsurf); + dbuffer->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED; } else { /* use map/memcpy fallback */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c index 82b102081..1e58549f4 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_clear.c @@ -57,7 +57,7 @@ begin_blit(struct svga_context *svga) util_blitter_save_depth_stencil_alpha(svga->blitter, (void*)svga->curr.depth); util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref); - util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask); + util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask, 0); } @@ -174,14 +174,29 @@ try_clear(struct svga_context *svga, if (svga_have_vgpu10(svga)) { if (flags & SVGA3D_CLEAR_COLOR) { unsigned i; + bool int_target = is_integer_target(fb, buffers); - if (is_integer_target(fb, buffers) && !ints_fit_in_floats(color)) { + if (int_target && !ints_fit_in_floats(color)) { clear_buffers_with_quad(svga, buffers, color, depth, stencil); /* We also cleared depth/stencil, so that's done */ flags &= ~(SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL); } else { struct pipe_surface *rtv; + float rgba[4]; + + if (int_target) { + rgba[0] = (float) color->i[0]; + rgba[1] = (float) color->i[1]; + rgba[2] = (float) color->i[2]; + rgba[3] = (float) color->i[3]; + } + else { + rgba[0] = color->f[0]; + rgba[1] = color->f[1]; + rgba[2] = color->f[2]; + rgba[3] = color->f[3]; + } /* Issue VGPU10 Clear commands */ for (i = 0; i < fb->nr_cbufs; i++) { @@ -194,8 +209,7 @@ try_clear(struct svga_context *svga, if (!rtv) return PIPE_ERROR_OUT_OF_MEMORY; - ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, - rtv, color->f); + ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv, rgba); if (ret != PIPE_OK) return ret; } @@ -325,7 +339,7 @@ svga_clear_texture(struct pipe_context *pipe, if (box->x == 0 && box->y == 0 && box->width == surface->width && box->height == surface->height) { /* clearing whole surface, use direct VGPU10 command */ - + assert(svga_surface(dsv)->view_id != SVGA3D_INVALID_ID); SVGA_RETRY(svga, SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, clear_flags, @@ -367,16 +381,32 @@ svga_clear_texture(struct pipe_context *pipe, if (box->x == 0 && box->y == 0 && box->width == surface->width && box->height == surface->height) { struct pipe_framebuffer_state *curr = &svga->curr.framebuffer; + bool int_target = is_integer_target(curr, PIPE_CLEAR_COLOR); - if (is_integer_target(curr, PIPE_CLEAR_COLOR) && - !ints_fit_in_floats(&color)) { + if (int_target && !ints_fit_in_floats(&color)) { /* To clear full texture with integer format */ clear_buffers_with_quad(svga, PIPE_CLEAR_COLOR, &color, 0.0, 0); } else { + float rgba[4]; + + if (int_target) { + rgba[0] = (float) color.i[0]; + rgba[1] = (float) color.i[1]; + rgba[2] = (float) color.i[2]; + rgba[3] = (float) color.i[3]; + } + else { + rgba[0] = color.f[0]; + rgba[1] = color.f[1]; + rgba[2] = color.f[2]; + rgba[3] = color.f[3]; + } + /* clearing whole surface using VGPU10 command */ + assert(svga_surface(rtv)->view_id != SVGA3D_INVALID_ID); SVGA_RETRY(svga, SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv, - color.f)); + rgba)); } } else { @@ -446,6 +476,7 @@ svga_try_clear_render_target(struct svga_context *svga, if (!rtv) return PIPE_ERROR_OUT_OF_MEMORY; + assert(svga_surface(rtv)->view_id != SVGA3D_INVALID_ID); return SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv, color->f); } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c index feeacd2f2..4d7299da6 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_constants.c @@ -54,6 +54,7 @@ svga_set_constant_buffer(struct pipe_context *pipe, if (cb) { buffer_size = cb->buffer_size; + if (cb->user_buffer) { buf = svga_user_buffer_create(pipe->screen, (void *) cb->user_buffer, @@ -94,6 +95,8 @@ svga_set_constant_buffer(struct pipe_context *pipe, svga->dirty |= SVGA_NEW_TCS_CONSTS; else if (shader == PIPE_SHADER_TESS_EVAL) svga->dirty |= SVGA_NEW_TES_CONSTS; + else if (shader == PIPE_SHADER_COMPUTE) + svga->dirty |= SVGA_NEW_CS_CONSTS; } else { if (shader == PIPE_SHADER_FRAGMENT) svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; @@ -105,9 +108,14 @@ svga_set_constant_buffer(struct pipe_context *pipe, svga->dirty |= SVGA_NEW_TCS_CONST_BUFFER; else if (shader == PIPE_SHADER_TESS_EVAL) svga->dirty |= SVGA_NEW_TES_CONST_BUFFER; + else if (shader == PIPE_SHADER_COMPUTE) + svga->dirty |= SVGA_NEW_CS_CONST_BUFFER; /* update bitmask of dirty const buffers */ svga->state.dirty_constbufs[shader] |= (1 << index); + + /* purge any stale rawbuf srv */ + svga_destroy_rawbuf_srv(svga); } if (cb && cb->user_buffer) { diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c index 745fdad64..ffdd3df05 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_draw.c @@ -318,7 +318,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, if (!svga_update_state_retry(svga, SVGA_STATE_HW_DRAW)) { static const char *msg = "State update failed, skipping draw call"; debug_printf("%s\n", msg); - pipe_debug_message(&svga->debug.callback, INFO, "%s", msg); + util_debug_message(&svga->debug.callback, INFO, "%s", msg); goto done; } svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c index 7e809d0cd..df1284c6a 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_flush.c @@ -50,7 +50,7 @@ static void svga_flush( struct pipe_context *pipe, svga_context_flush(svga, fence); SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n", - __FUNCTION__, fence ? *fence : 0x0); + __FUNCTION__, fence ? *fence : NULL); /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c index 61b4897c5..f4ed782cd 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_misc.c @@ -225,7 +225,7 @@ svga_set_viewport_states(struct pipe_context *pipe, */ static void svga_set_debug_callback(struct pipe_context *pipe, - const struct pipe_debug_callback *cb) + const struct util_debug_callback *cb) { struct svga_context *svga = svga_context(pipe); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 1b823d64e..061cd5520 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -101,9 +101,10 @@ translate_cull_mode(unsigned cull) } -static void -define_rasterizer_object(struct svga_context *svga, - struct svga_rasterizer_state *rast) +int +svga_define_rasterizer_object(struct svga_context *svga, + struct svga_rasterizer_state *rast, + unsigned samples) { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); unsigned fill_mode = translate_fill_mode(rast->templ.fill_front); @@ -120,8 +121,10 @@ define_rasterizer_object(struct svga_context *svga, rast->templ.line_stipple_pattern : 0; const uint8 pv_last = !rast->templ.flatshade_first && svgascreen->haveProvokingVertex; + int rastId; + enum pipe_error ret; - rast->id = util_bitmask_add(svga->rast_object_id_bm); + rastId = util_bitmask_add(svga->rast_object_id_bm); if (rast->templ.fill_front != rast->templ.fill_back) { /* The VGPU10 device can't handle different front/back fill modes. @@ -131,24 +134,53 @@ define_rasterizer_object(struct svga_context *svga, fill_mode = SVGA3D_FILLMODE_FILL; } - SVGA_RETRY(svga, SVGA3D_vgpu10_DefineRasterizerState - (svga->swc, - rast->id, - fill_mode, - cull_mode, - rast->templ.front_ccw, - depth_bias, - depth_bias_clamp, - slope_scaled_depth_bias, - rast->templ.depth_clip_near, - rast->templ.scissor, - rast->templ.multisample, - rast->templ.line_smooth, - line_width, - rast->templ.line_stipple_enable, - line_factor, - line_pattern, - pv_last)); + if (samples > 1 && svga_have_gl43(svga) && + svgascreen->sws->have_rasterizer_state_v2_cmd) { + + ret = SVGA3D_sm5_DefineRasterizerState_v2(svga->swc, + rastId, + fill_mode, + cull_mode, + rast->templ.front_ccw, + depth_bias, + depth_bias_clamp, + slope_scaled_depth_bias, + rast->templ.depth_clip_near, + rast->templ.scissor, + rast->templ.multisample, + rast->templ.line_smooth, + line_width, + rast->templ.line_stipple_enable, + line_factor, + line_pattern, + pv_last, + samples); + } else { + ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc, + rastId, + fill_mode, + cull_mode, + rast->templ.front_ccw, + depth_bias, + depth_bias_clamp, + slope_scaled_depth_bias, + rast->templ.depth_clip_near, + rast->templ.scissor, + rast->templ.multisample, + rast->templ.line_smooth, + line_width, + rast->templ.line_stipple_enable, + line_factor, + line_pattern, + pv_last); + } + + if (ret != PIPE_OK) { + util_bitmask_clear(svga->rast_object_id_bm, rastId); + return SVGA3D_INVALID_ID; + } + + return rastId; } @@ -180,7 +212,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, * though. Our smooth point implementation involves drawing a square, * computing fragment distance from point center, then attenuating * the fragment alpha value. We should not attenuate alpha if msaa - * is enabled. We should kill fragments entirely outside the circle + * is enabled. We should discard fragments entirely outside the circle * and let the GPU compute per-fragment coverage. * But as-is, our implementation gives acceptable results and passes * Piglit's MSAA point smooth test. @@ -191,7 +223,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, if (rast->templ.point_smooth && rast->templ.point_size_per_vertex == 0 && rast->templ.point_size <= screen->pointSmoothThreshold) { - /* If the point size is less than the threshold, disable smoothing. + /* If the point size is less than the threshold, deactivate smoothing. * Note that this only effects point rendering when we use the * pipe_rasterizer_state::point_size value, not when the point size * is set in the VS. @@ -359,11 +391,27 @@ svga_create_rasterizer_state(struct pipe_context *pipe, } if (svga_have_vgpu10(svga)) { - define_rasterizer_object(svga, rast); + rast->id = svga_define_rasterizer_object(svga, rast, 0); + if (rast->id == SVGA3D_INVALID_ID) { + svga_context_flush(svga, NULL); + rast->id = svga_define_rasterizer_object(svga, rast, 0); + assert(rast->id != SVGA3D_INVALID_ID); + } + } + + if (svga_have_gl43(svga)) { + /* initialize the alternate rasterizer state ids. + * For 0 and 1 sample count, we can use the same rasterizer object. + */ + rast->altRastIds[0] = rast->altRastIds[1] = rast->id; + + for (unsigned i = 2; i < ARRAY_SIZE(rast->altRastIds); i++) { + rast->altRastIds[i] = SVGA3D_INVALID_ID; + } } if (templ->poly_smooth) { - pipe_debug_message(&svga->debug.callback, CONFORMANCE, + util_debug_message(&svga->debug.callback, CONFORMANCE, "GL_POLYGON_SMOOTH not supported"); } @@ -408,6 +456,10 @@ svga_delete_rasterizer_state(struct pipe_context *pipe, void *state) struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *) state; + /* free any alternate rasterizer state used for point sprite */ + if (raster->no_cull_rasterizer) + svga_delete_rasterizer_state(pipe, (void *)(raster->no_cull_rasterizer)); + if (svga_have_vgpu10(svga)) { SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id)); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c index ad1040c9d..3e0e26c76 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -341,7 +341,7 @@ svga_delete_sampler_state(struct pipe_context *pipe, void *sampler) if (svga_have_vgpu10(svga)) { unsigned i; - for (i = 0; i < 2; i++) { + for (i = 0; i < ARRAY_SIZE(ss->id); i++) { if (ss->id[i] != SVGA3D_INVALID_ID) { svga_hwtnl_flush_retry(svga); @@ -537,7 +537,7 @@ svga_cleanup_sampler_state(struct svga_context *svga) { enum pipe_shader_type shader; - for (shader = 0; shader <= PIPE_SHADER_TESS_EVAL; shader++) { + for (shader = 0; shader <= PIPE_SHADER_COMPUTE; shader++) { unsigned i; for (i = 0; i < svga->state.hw_draw.num_sampler_views[shader]; i++) { diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c index a74825496..4e3280457 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_streamout.c @@ -141,7 +141,7 @@ svga_create_stream_output(struct svga_context *svga, unsigned i; enum pipe_error ret; unsigned id; - ASSERTED unsigned maxDecls; + ASSERTED unsigned maxDecls = 0; assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS); @@ -450,6 +450,7 @@ svga_set_stream_output_targets(struct pipe_context *pipe, for (i = 0; i < num_targets; i++) { struct svga_stream_output_target *sot = svga_stream_output_target(targets[i]); + struct svga_buffer *sbuf = svga_buffer(sot->base.buffer); unsigned size; svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer, @@ -458,6 +459,10 @@ svga_set_stream_output_targets(struct pipe_context *pipe, assert(svga_buffer(sot->base.buffer)->key.flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT); + /* Mark the buffer surface as RENDERED */ + assert(sbuf->bufsurf); + sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED; + svga->so_targets[i] = &sot->base; if (offsets[i] == -1) { soBindings[i].offset = -1; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c index aae91e4f4..6fa3af526 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.c @@ -79,6 +79,26 @@ svga_buffer_needs_hw_storage(const struct svga_screen *ss, return !!(template->bind & bind_mask); } + +static inline boolean +need_buf_readback(struct svga_context *svga, + struct pipe_transfer *st) +{ + struct svga_buffer *sbuf = svga_buffer(st->resource); + + if (st->usage != PIPE_MAP_READ) + return FALSE; + + /* No buffer surface has been created */ + if (!sbuf->bufsurf) + return FALSE; + + return ((sbuf->dirty || + sbuf->bufsurf->surface_state == SVGA_SURFACE_STATE_RENDERED) && + !sbuf->key.coherent && !svga->swc->force_coherent); +} + + /** * Create a buffer transfer. * @@ -131,11 +151,12 @@ svga_buffer_transfer_map(struct pipe_context *pipe, pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); } - if ((usage & PIPE_MAP_READ) && sbuf->dirty && - !sbuf->key.coherent && !svga->swc->force_coherent) { - - /* Host-side buffers can only be dirtied with vgpu10 features - * (streamout and buffer copy). + /* If it is a read transfer and the buffer is dirty or the buffer is bound + * to a uav, we will need to read the subresource content from the device. + */ + if (need_buf_readback(svga, transfer)) { + /* Host-side buffers can be dirtied with vgpu10 features + * (streamout and buffer copy) and sm5 feature via uav. */ assert(svga_have_vgpu10(svga)); @@ -150,13 +171,16 @@ svga_buffer_transfer_map(struct pipe_context *pipe, assert(sbuf->handle); - SVGA_RETRY(svga, SVGA3D_vgpu10_ReadbackSubResource(svga->swc, - sbuf->handle, 0)); + SVGA_RETRY(svga, SVGA3D_ReadbackGBSurface(svga->swc, sbuf->handle)); svga->hud.num_readbacks++; svga_context_finish(svga); sbuf->dirty = FALSE; + + /* Mark the buffer surface state as UPDATED */ + assert(sbuf->bufsurf); + sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_UPDATED; } if (usage & PIPE_MAP_WRITE) { @@ -434,11 +458,13 @@ svga_resource_destroy(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle); - svga_screen_surface_destroy(ss, &tex->key, &tex->handle); + + boolean to_invalidate = svga_was_texture_rendered_to(tex); + svga_screen_surface_destroy(ss, &tex->key, to_invalidate, &tex->handle); /* Destroy the backed surface handle if exists */ if (tex->backed_handle) - svga_screen_surface_destroy(ss, &tex->backed_key, &tex->backed_handle); + svga_screen_surface_destroy(ss, &tex->backed_key, to_invalidate, &tex->backed_handle); ss->hud.total_resource_bytes -= tex->size; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h index 97649d972..5652bbcec 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer.h @@ -65,6 +65,7 @@ struct svga_buffer_surface unsigned bind_flags; struct svga_host_surface_cache_key key; struct svga_winsys_surface *handle; + enum svga_surface_state surface_state; }; /** @@ -120,6 +121,9 @@ struct svga_buffer */ struct list_head surfaces; + /* Current surface structure */ + struct svga_buffer_surface *bufsurf; + /** * Information about ongoing and past map operations. */ @@ -212,6 +216,7 @@ struct svga_buffer unsigned size; /**< Approximate size in bytes */ boolean dirty; /**< Need to do a readback before mapping? */ + boolean uav; /* Set if the buffer is bound to a uav */ /** In some cases we try to keep the results of the translate_indices() * function from svga_draw_elements.c @@ -332,6 +337,24 @@ svga_buffer_hw_storage_unmap(struct svga_context *svga, } } else sws->buffer_unmap(sws, sbuf->hwbuf); + + /* Mark the buffer surface as UPDATED */ + assert(sbuf->bufsurf); + sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_UPDATED; +} + + +static inline void +svga_set_buffer_rendered_to(struct svga_buffer_surface *bufsurf) +{ + bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED; +} + + +static inline boolean +svga_was_buffer_rendered_to(const struct svga_buffer_surface *bufsurf) +{ + return (bufsurf->surface_state == SVGA_SURFACE_STATE_RENDERED); } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 5bebbb509..1e86b5d12 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -54,6 +54,10 @@ struct svga_3d_invalidate_gb_image { }; +static void +svga_buffer_upload_ranges(struct svga_context *, struct svga_buffer *); + + /** * Allocate a winsys_buffer (ie. DMA, aka GMR memory). * @@ -142,8 +146,7 @@ svga_buffer_create_hw_storage(struct svga_screen *ss, /** - * Allocate graphics memory for vertex/index/constant/etc buffer (not - * textures). + * Allocate graphics memory for vertex/index/constant/texture buffer. */ enum pipe_error svga_buffer_create_host_surface(struct svga_screen *ss, @@ -155,7 +158,7 @@ svga_buffer_create_host_surface(struct svga_screen *ss, assert(!sbuf->user); if (!sbuf->handle) { - boolean validated; + boolean invalidated; sbuf->key.flags = 0; @@ -190,6 +193,15 @@ svga_buffer_create_host_surface(struct svga_screen *ss, sbuf->key.flags = SVGA3D_SURFACE_TRANSFER_FROM_BUFFER; } + if (ss->sws->have_gl43 && + (bind_flags & (PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE)) && + (!(bind_flags & (PIPE_BIND_STREAM_OUTPUT)))) { + /* This surface can be bound to a uav. */ + assert((bind_flags & PIPE_BIND_CONSTANT_BUFFER) == 0); + sbuf->key.flags |= SVGA3D_SURFACE_BIND_UAVIEW | + SVGA3D_SURFACE_BIND_RAW_VIEWS; + } + if (sbuf->b.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) { /* This surface can be mapped persistently. We use * coherent memory to avoid implementing memory barriers for @@ -213,22 +225,31 @@ svga_buffer_create_host_surface(struct svga_screen *ss, sbuf->handle = svga_screen_surface_create(ss, bind_flags, sbuf->b.usage, - &validated, &sbuf->key); + &invalidated, &sbuf->key); if (!sbuf->handle) return PIPE_ERROR_OUT_OF_MEMORY; - /* Always set the discard flag on the first time the buffer is written + /* Set the discard flag on the first time the buffer is written * as svga_screen_surface_create might have passed a recycled host - * buffer. + * buffer. This is only needed for host-backed mode. As in guest-backed + * mode, the recycled buffer would have been invalidated. */ - sbuf->dma.flags.discard = TRUE; + if (!ss->sws->have_gb_objects) + sbuf->dma.flags.discard = TRUE; SVGA_DBG(DEBUG_DMA, " --> got sid %p sz %d (buffer)\n", sbuf->handle, sbuf->b.width0); /* Add the new surface to the buffer surface list */ - ret = svga_buffer_add_host_surface(sbuf, sbuf->handle, &sbuf->key, - bind_flags); + sbuf->bufsurf = svga_buffer_add_host_surface(sbuf, sbuf->handle, + &sbuf->key, + bind_flags); + if (sbuf->bufsurf == NULL) + return PIPE_ERROR_OUT_OF_MEMORY; + + sbuf->bufsurf->surface_state = + invalidated ? SVGA_SURFACE_STATE_INVALIDATED : + SVGA_SURFACE_STATE_CREATED; if (ss->sws->have_gb_objects) { /* Initialize the surface with zero */ @@ -263,14 +284,23 @@ svga_buffer_recreate_host_surface(struct svga_context *svga, if (ret == PIPE_OK) { /* Copy the surface data */ assert(sbuf->handle); + assert(sbuf->bufsurf); SVGA_RETRY(svga, SVGA3D_vgpu10_BufferCopy(svga->swc, old_handle, sbuf->handle, 0, 0, sbuf->b.width0)); + + /* Mark this surface as RENDERED */ + sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED; } /* Set the new bind flags for this buffer resource */ sbuf->bind_flags = bind_flags; + /* Set the dirty bit to signal a read back is needed before the data copied + * to this new surface can be referenced. + */ + sbuf->dirty = TRUE; + return ret; } @@ -286,6 +316,10 @@ compatible_bind_flags(unsigned bind_flags, return TRUE; else if ((bind_flags|tobind_flags) & PIPE_BIND_CONSTANT_BUFFER) return FALSE; + else if ((bind_flags & PIPE_BIND_STREAM_OUTPUT) && + (tobind_flags & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SHADER_BUFFER))) + /* Stream out cannot be mixed with UAV */ + return FALSE; else return TRUE; } @@ -313,7 +347,7 @@ svga_buffer_get_host_surface(struct svga_buffer *sbuf, /** * Adds the host surface to the buffer surface list. */ -enum pipe_error +struct svga_buffer_surface * svga_buffer_add_host_surface(struct svga_buffer *sbuf, struct svga_winsys_surface *handle, struct svga_host_surface_cache_key *key, @@ -323,7 +357,7 @@ svga_buffer_add_host_surface(struct svga_buffer *sbuf, bufsurf = CALLOC_STRUCT(svga_buffer_surface); if (!bufsurf) - return PIPE_ERROR_OUT_OF_MEMORY; + return NULL; bufsurf->bind_flags = bind_flags; bufsurf->handle = handle; @@ -335,7 +369,7 @@ svga_buffer_add_host_surface(struct svga_buffer *sbuf, /* Set the new bind flags for this buffer resource */ sbuf->bind_flags = bind_flags; - return PIPE_OK; + return bufsurf; } @@ -358,12 +392,14 @@ svga_buffer_bind_host_surface(struct svga_context *svga, SVGA_RETRY(svga, SVGA3D_vgpu10_BufferCopy(svga->swc, sbuf->handle, bufsurf->handle, 0, 0, sbuf->b.width0)); + bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED; } /* Set this surface as the current one */ sbuf->handle = bufsurf->handle; sbuf->key = bufsurf->key; sbuf->bind_flags = bufsurf->bind_flags; + sbuf->bufsurf = bufsurf; } @@ -387,6 +423,9 @@ svga_buffer_validate_host_surface(struct svga_context *svga, struct svga_buffer_surface *bufsurf; enum pipe_error ret = PIPE_OK; + /* upload any dirty ranges */ + svga_buffer_upload_ranges(svga, sbuf); + /* Flush any pending upload first */ svga_buffer_upload_flush(svga, sbuf); @@ -409,7 +448,9 @@ svga_buffer_validate_host_surface(struct svga_context *svga, /* Destroy the old surface */ svga_screen_surface_destroy(svga_screen(sbuf->b.screen), - &bufsurf->key, &bufsurf->handle); + &bufsurf->key, + svga_was_buffer_rendered_to(bufsurf), + &bufsurf->handle); list_del(&bufsurf->list); FREE(bufsurf); @@ -434,7 +475,9 @@ svga_buffer_destroy_host_surface(struct svga_screen *ss, LIST_FOR_EACH_ENTRY_SAFE(bufsurf, next, &sbuf->surfaces, list) { SVGA_DBG(DEBUG_DMA, " ungrab sid %p sz %d\n", bufsurf->handle, sbuf->b.width0); - svga_screen_surface_destroy(ss, &bufsurf->key, &bufsurf->handle); + svga_screen_surface_destroy(ss, &bufsurf->key, + svga_was_buffer_rendered_to(bufsurf), + &bufsurf->handle); FREE(bufsurf); } } @@ -464,57 +507,20 @@ svga_buffer_upload_gb_command(struct svga_context *svga, assert(numBoxes); assert(sbuf->dma.updates == NULL); - if (sbuf->dma.flags.discard) { - struct svga_3d_invalidate_gb_image *cicmd = NULL; - SVGA3dCmdInvalidateGBImage *invalidate_cmd; - const unsigned total_commands_size = - sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd); + /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */ + const unsigned total_commands_size = + sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd); - /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by - * 'numBoxes' UPDATE_GB_IMAGE commands. Allocate all at once rather - * than with separate commands because we need to properly deal with - * filling the command buffer. - */ - invalidate_cmd = SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_INVALIDATE_GB_IMAGE, - total_commands_size, 1 + numBoxes); - if (!invalidate_cmd) - return PIPE_ERROR_OUT_OF_MEMORY; - - cicmd = container_of(invalidate_cmd, struct svga_3d_invalidate_gb_image, body); - cicmd->header.size = sizeof(*invalidate_cmd); - swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL, - sbuf->handle, - (SVGA_RELOC_WRITE | - SVGA_RELOC_INTERNAL | - SVGA_RELOC_DMA)); - invalidate_cmd->image.face = 0; - invalidate_cmd->image.mipmap = 0; - - /* The whole_update_command is a SVGA3dCmdHeader plus the - * SVGA3dCmdUpdateGBImage command. - */ - whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1]; - /* initialize the first UPDATE_GB_IMAGE command */ - whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - update_cmd = &whole_update_cmd->body; - - } else { - /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */ - const unsigned total_commands_size = - sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd); - - update_cmd = SVGA3D_FIFOReserve(swc, - SVGA_3D_CMD_UPDATE_GB_IMAGE, - total_commands_size, numBoxes); - if (!update_cmd) - return PIPE_ERROR_OUT_OF_MEMORY; + update_cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_UPDATE_GB_IMAGE, + total_commands_size, numBoxes); + if (!update_cmd) + return PIPE_ERROR_OUT_OF_MEMORY; - /* The whole_update_command is a SVGA3dCmdHeader plus the - * SVGA3dCmdUpdateGBImage command. - */ - whole_update_cmd = container_of(update_cmd, struct svga_3d_update_gb_image, body); - } + /* The whole_update_command is a SVGA3dCmdHeader plus the + * SVGA3dCmdUpdateGBImage command. + */ + whole_update_cmd = container_of(update_cmd, struct svga_3d_update_gb_image, body); /* Init the first UPDATE_GB_IMAGE command */ whole_update_cmd->header.size = sizeof(*update_cmd); @@ -840,7 +846,6 @@ svga_buffer_add_range(struct svga_buffer *sbuf, unsigned start, unsigned end) } - /** * Copy the contents of the malloc buffer to a hardware buffer. */ @@ -979,6 +984,61 @@ svga_buffer_upload_piecewise(struct svga_screen *ss, /** + * A helper function to add an update command for the dirty ranges if there + * isn't already one. + */ +static void +svga_buffer_upload_ranges(struct svga_context *svga, + struct svga_buffer *sbuf) +{ + struct pipe_screen *screen = svga->pipe.screen; + struct svga_screen *ss = svga_screen(screen); + enum pipe_error ret = PIPE_OK; + + if (sbuf->map.num_ranges) { + if (!sbuf->dma.pending) { + /* No pending DMA/update commands yet. */ + + /* Migrate the data from swbuf -> hwbuf if necessary */ + ret = svga_buffer_update_hw(svga, sbuf, sbuf->bind_flags); + if (ret == PIPE_OK) { + /* Emit DMA or UpdateGBImage commands */ + SVGA_RETRY_OOM(svga, ret, svga_buffer_upload_command(svga, sbuf)); + if (ret == PIPE_OK) { + sbuf->dma.pending = TRUE; + assert(!sbuf->head.prev && !sbuf->head.next); + list_addtail(&sbuf->head, &svga->dirty_buffers); + } + } + else if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + /* + * The buffer is too big to fit in the GMR aperture, so break it in + * smaller pieces. + */ + ret = svga_buffer_upload_piecewise(ss, svga, sbuf); + } + + if (ret != PIPE_OK) { + /* + * Something unexpected happened above. There is very little that + * we can do other than proceeding while ignoring the dirty ranges. + */ + assert(0); + sbuf->map.num_ranges = 0; + } + } + else { + /* + * There a pending dma already. Make sure it is from this context. + */ + assert(sbuf->dma.svga == svga); + } + } + return; +} + + +/** * Get (or create/upload) the winsys surface handle so that we can * refer to this buffer in fifo commands. * This function will create the host surface, and in the GB case also the @@ -1033,48 +1093,12 @@ svga_buffer_handle(struct svga_context *svga, struct pipe_resource *buf, } assert(sbuf->handle); + assert(sbuf->bufsurf); if (svga->swc->force_coherent || sbuf->key.coherent) return sbuf->handle; - if (sbuf->map.num_ranges) { - if (!sbuf->dma.pending) { - /* No pending DMA/update commands yet. */ - - /* Migrate the data from swbuf -> hwbuf if necessary */ - ret = svga_buffer_update_hw(svga, sbuf, sbuf->bind_flags); - if (ret == PIPE_OK) { - /* Emit DMA or UpdateGBImage commands */ - SVGA_RETRY_OOM(svga, ret, svga_buffer_upload_command(svga, sbuf)); - if (ret == PIPE_OK) { - sbuf->dma.pending = TRUE; - assert(!sbuf->head.prev && !sbuf->head.next); - list_addtail(&sbuf->head, &svga->dirty_buffers); - } - } - else if (ret == PIPE_ERROR_OUT_OF_MEMORY) { - /* - * The buffer is too big to fit in the GMR aperture, so break it in - * smaller pieces. - */ - ret = svga_buffer_upload_piecewise(ss, svga, sbuf); - } - - if (ret != PIPE_OK) { - /* - * Something unexpected happened above. There is very little that - * we can do other than proceeding while ignoring the dirty ranges. - */ - assert(0); - sbuf->map.num_ranges = 0; - } - } - else { - /* - * There a pending dma already. Make sure it is from this context. - */ - assert(sbuf->dma.svga == svga); - } - } + /* upload any dirty ranges */ + svga_buffer_upload_ranges(svga, sbuf); assert(sbuf->map.num_ranges == 0 || sbuf->dma.pending); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h index c2d749b20..7b15a66d1 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_buffer_upload.h @@ -55,7 +55,7 @@ svga_buffer_recreate_host_surface(struct svga_context *svga, struct svga_buffer *sbuf, unsigned bind_flags); -enum pipe_error +struct svga_buffer_surface * svga_buffer_add_host_surface(struct svga_buffer *sbuf, struct svga_winsys_surface *handle, struct svga_host_surface_cache_key *key, diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c index 412be0ada..f2ab20edb 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.c @@ -231,8 +231,7 @@ need_tex_readback(struct svga_transfer *st) if ((st->base.usage & PIPE_MAP_WRITE) && ((st->base.usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) == 0)) { - return svga_was_texture_rendered_to(svga_texture(st->base.resource), - st->slice, st->base.level); + return svga_was_texture_rendered_to(svga_texture(st->base.resource)); } return FALSE; @@ -240,30 +239,19 @@ need_tex_readback(struct svga_transfer *st) static void -readback_image_vgpu9(struct svga_context *svga, - struct svga_winsys_surface *surf, - unsigned slice, - unsigned level) +readback_texture_surface(struct svga_context *svga, + struct svga_texture *tex, + struct svga_winsys_surface *surf) { - SVGA_RETRY(svga, SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level)); -} + SVGA_RETRY(svga, SVGA3D_ReadbackGBSurface(svga->swc, surf)); + /* Mark the texture surface as UPDATED */ + tex->surface_state = SVGA_SURFACE_STATE_UPDATED; -static void -readback_image_vgpu10(struct svga_context *svga, - struct svga_winsys_surface *surf, - unsigned slice, - unsigned level, - unsigned numMipLevels) -{ - unsigned subResource; - - subResource = slice * numMipLevels + level; - SVGA_RETRY(svga, SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, - subResource)); + svga->hud.num_readbacks++; + SVGA_STATS_COUNT_INC(svga_sws(svga), SVGA_STATS_COUNT_TEXREADBACK); } - /** * Use DMA for the transfer request */ @@ -346,31 +334,23 @@ svga_texture_transfer_map_direct(struct svga_context *svga, struct svga_texture *tex = svga_texture(texture); struct svga_winsys_surface *surf = tex->handle; unsigned level = st->base.level; - unsigned w, h, nblocksx, nblocksy, i; + unsigned w, h, nblocksx, nblocksy; unsigned usage = st->base.usage; if (need_tex_readback(st)) { svga_surfaces_flush(svga); if (!svga->swc->force_coherent || tex->imported) { - for (i = 0; i < st->box.d; i++) { - if (svga_have_vgpu10(svga)) { - readback_image_vgpu10(svga, surf, st->slice + i, level, - tex->b.last_level + 1); - } else { - readback_image_vgpu9(svga, surf, st->slice + i, level); - } - } - svga->hud.num_readbacks++; - SVGA_STATS_COUNT_INC(sws, SVGA_STATS_COUNT_TEXREADBACK); + /* Readback the whole surface */ + readback_texture_surface(svga, tex, surf); - svga_context_flush(svga, NULL); + svga_context_finish(svga); } /* * Note: if PIPE_MAP_DISCARD_WHOLE_RESOURCE were specified * we could potentially clear the flag for all faces/layers/mips. */ - svga_clear_texture_rendered_to(tex, st->slice, level); + svga_clear_texture_rendered_to(tex); } else { assert(usage & PIPE_MAP_WRITE); @@ -427,7 +407,6 @@ svga_texture_transfer_map_direct(struct svga_context *svga, map = svga->swc->surface_map(svga->swc, surf, usage, &retry, &rebind); svga_retry_exit(svga); } - if (map && rebind) { enum pipe_error ret; @@ -556,10 +535,11 @@ svga_texture_transfer_map(struct pipe_context *pipe, break; } - /* Force direct map for multisample surface */ - if (texture->nr_samples > 1) { - assert(svga_have_gb_objects(svga)); - assert(sws->have_sm4_1); + /* We never want to use DMA transfers on systems with GBObjects because + * it causes serialization issues and in SVGAv3 vram is gone which + * makes it impossible to support both at the same time. + */ + if (svga_have_gb_objects(svga)) { use_direct_map = TRUE; } @@ -584,8 +564,7 @@ svga_texture_transfer_map(struct pipe_context *pipe, boolean can_use_upload = tex->can_use_upload && !(st->base.usage & PIPE_MAP_READ); boolean was_rendered_to = - svga_was_texture_rendered_to(svga_texture(texture), - st->slice, st->base.level); + svga_was_texture_rendered_to(svga_texture(texture)); /* If the texture was already rendered to and upload buffer * is supported, then we will use upload buffer to @@ -721,7 +700,7 @@ svga_texture_transfer_unmap_dma(struct svga_context *svga, } svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags); - svga_set_texture_rendered_to(tex, st->slice, st->base.level); + svga_set_texture_rendered_to(tex); } FREE(st->swbuf); @@ -785,6 +764,9 @@ svga_texture_transfer_unmap_direct(struct svga_context *svga, transfer->level); } } + + /* Mark the texture surface state as UPDATED */ + tex->surface_state = SVGA_SURFACE_STATE_UPDATED; } } @@ -901,12 +883,6 @@ svga_texture_create(struct pipe_screen *screen, goto fail_notex; } - tex->rendered_to = CALLOC(template->depth0 * template->array_size, - sizeof(tex->rendered_to[0])); - if (!tex->rendered_to) { - goto fail; - } - tex->dirty = CALLOC(template->depth0 * template->array_size, sizeof(tex->dirty[0])); if (!tex->dirty) { @@ -1062,14 +1038,22 @@ svga_texture_create(struct pipe_screen *screen, goto fail; } - /* Use typeless formats for sRGB and depth resources. Typeless - * formats can be reinterpreted as other formats. For example, - * SVGA3D_R8G8B8A8_UNORM_TYPELESS can be interpreted as - * SVGA3D_R8G8B8A8_UNORM_SRGB or SVGA3D_R8G8B8A8_UNORM. - */ - if (svgascreen->sws->have_vgpu10 && - (util_format_is_srgb(template->format) || - format_has_depth(template->format))) { + bool use_typeless = FALSE; + if (svgascreen->sws->have_gl43) { + /* Do not use typeless for SHARED, SCANOUT or DISPLAY_TARGET surfaces. */ + use_typeless = !(bindings & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT | + PIPE_BIND_DISPLAY_TARGET)); + } else if (svgascreen->sws->have_vgpu10) { + /* For VGPU10 device, use typeless formats only for sRGB and depth resources + * if they do not have SHARED, SCANOUT or DISPLAY_TARGET bind flags + */ + use_typeless = (util_format_is_srgb(template->format) || + format_has_depth(template->format)) && + !(bindings & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT | + PIPE_BIND_DISPLAY_TARGET)); + } + + if (use_typeless) { SVGA3dSurfaceFormat typeless = svga_typeless_format(tex->key.format); if (0) { debug_printf("Convert resource type %s -> %s (bind 0x%x)\n", @@ -1090,13 +1074,35 @@ svga_texture_create(struct pipe_screen *screen, tex->key.format = typeless; } + if (svgascreen->sws->have_sm5 && + bindings & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET)) { + if (template->nr_samples < 2 && + screen->is_format_supported(screen, template->format, + template->target, + template->nr_samples, + template->nr_storage_samples, + PIPE_BIND_SHADER_IMAGE)) { + /* Any non multi-samples texture that can be used as a render target + * or sampler view can be bound to an image unit. + * So make sure to set the UAV flag here. + */ + tex->key.flags |= SVGA3D_SURFACE_BIND_UAVIEW; + } + } + SVGA_DBG(DEBUG_DMA, "surface_create for texture\n"); + boolean invalidated; tex->handle = svga_screen_surface_create(svgascreen, bindings, tex->b.usage, - &tex->validated, &tex->key); + &invalidated, &tex->key); if (!tex->handle) { goto fail; } + if (invalidated) { + tex->surface_state = SVGA_SURFACE_STATE_INVALIDATED; + } else { + tex->surface_state = SVGA_SURFACE_STATE_CREATED; + } SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle); @@ -1122,8 +1128,6 @@ svga_texture_create(struct pipe_screen *screen, fail: if (tex->dirty) FREE(tex->dirty); - if (tex->rendered_to) - FREE(tex->rendered_to); if (tex->defined) FREE(tex->defined); FREE(tex); @@ -1181,9 +1185,24 @@ svga_texture_from_handle(struct pipe_screen *screen, tex->key.format = format; tex->handle = srf; - tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0])); - if (!tex->rendered_to) - goto out_no_rendered_to; + + /* set bind flags for the imported texture handle according to the bind + * flags in the template + */ + if (template->bind & PIPE_BIND_RENDER_TARGET){ + tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET; + tex->key.flags |= SVGA3D_SURFACE_BIND_RENDER_TARGET; + } + + if (template->bind & PIPE_BIND_DEPTH_STENCIL) { + tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL; + tex->key.flags |= SVGA3D_SURFACE_BIND_DEPTH_STENCIL; + } + + if (template->bind & PIPE_BIND_SAMPLER_VIEW) { + tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; + tex->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE; + } tex->dirty = CALLOC(1, sizeof(tex->dirty[0])); if (!tex->dirty) @@ -1196,8 +1215,6 @@ svga_texture_from_handle(struct pipe_screen *screen, return &tex->b; out_no_dirty: - FREE(tex->rendered_to); -out_no_rendered_to: FREE(tex->defined); out_no_defined: FREE(tex); @@ -1222,10 +1239,6 @@ svga_texture_generate_mipmap(struct pipe_context *pipe, assert(svga_have_vgpu10(svga)); - /* Only support 2D texture for now */ - if (pt->target != PIPE_TEXTURE_2D) - return false; - /* Fallback to the mipmap generation utility for those formats that * do not support hw generate mipmap */ @@ -1239,11 +1252,21 @@ svga_texture_generate_mipmap(struct pipe_context *pipe, return false; templ.format = format; + templ.target = pt->target; templ.u.tex.first_layer = first_layer; templ.u.tex.last_layer = last_layer; templ.u.tex.first_level = base_level; templ.u.tex.last_level = last_level; + if (pt->target == PIPE_TEXTURE_CUBE) { + /** + * state tracker generates mipmap one face at a time. + * But SVGA generates mipmap for the entire cubemap. + */ + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = 5; + } + psv = pipe->create_sampler_view(pipe, pt, &templ); if (psv == NULL) return false; @@ -1254,6 +1277,9 @@ svga_texture_generate_mipmap(struct pipe_context *pipe, SVGA_RETRY(svga, SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle)); pipe_sampler_view_reference(&psv, NULL); + /* Mark the texture surface as RENDERED */ + svga_set_texture_rendered_to(tex); + svga->hud.num_generate_mipmap++; return true; @@ -1449,11 +1475,11 @@ svga_texture_transfer_unmap_upload(struct svga_context *svga, dstsurf, subResource, &st->upload.box)); offset += st->base.layer_stride; - - /* Set rendered-to flag */ - svga_set_texture_rendered_to(tex, layer, st->base.level); } + /* Mark the texture surface state as RENDERED */ + svga_set_texture_rendered_to(tex); + pipe_resource_reference(&st->upload.buf, NULL); } @@ -1476,8 +1502,12 @@ svga_texture_device_format_has_alpha(struct pipe_resource *texture) /* the svga_texture() call below is invalid for PIPE_BUFFER resources */ assert(texture->target != PIPE_BUFFER); - enum svga3d_block_desc block_desc = - svga3dsurface_get_desc(svga_texture(texture)->key.format)->block_desc; + const struct svga3d_surface_desc *surf_desc = + svga3dsurface_get_desc(svga_texture(texture)->key.format); + + enum svga3d_block_desc block_desc = surf_desc->block_desc; - return !!(block_desc & SVGA3DBLOCKDESC_ALPHA); + return !!((block_desc & SVGA3DBLOCKDESC_ALPHA) || + ((block_desc == SVGA3DBLOCKDESC_TYPELESS) && + (surf_desc->bitDepth.alpha > 0))); } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h index cbfc46426..e1872faad 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_resource_texture.h @@ -33,6 +33,7 @@ #include "util/u_memory.h" #include "util/u_transfer.h" #include "svga_screen_cache.h" +#include "svga_context.h" struct pipe_context; struct pipe_screen; @@ -74,13 +75,6 @@ struct svga_texture struct svga_winsys_surface *handle; /** - * Whether the host side surface is validated, either through the - * InvalidateGBSurface command or after the surface is updated - * or rendered to. - */ - boolean validated; - - /** * Whether the host side surface is imported and not created by this * driver. */ @@ -101,6 +95,8 @@ struct svga_texture */ ushort *dirty; + enum svga_surface_state surface_state; + /** * A cached backing host side surface to be used if this texture is being * used for rendering and sampling at the same time. @@ -209,7 +205,6 @@ svga_define_texture_level(struct svga_texture *tex, { check_face_level(tex, face, level); tex->defined[face] |= 1 << level; - tex->validated = TRUE; } @@ -223,30 +218,22 @@ svga_is_texture_level_defined(const struct svga_texture *tex, static inline void -svga_set_texture_rendered_to(struct svga_texture *tex, - unsigned face, unsigned level) +svga_set_texture_rendered_to(struct svga_texture *tex) { - check_face_level(tex, face, level); - tex->rendered_to[face] |= 1 << level; - tex->validated = TRUE; + tex->surface_state = SVGA_SURFACE_STATE_RENDERED; } static inline void -svga_clear_texture_rendered_to(struct svga_texture *tex, - unsigned face, unsigned level) +svga_clear_texture_rendered_to(struct svga_texture *tex) { - check_face_level(tex, face, level); - tex->rendered_to[face] &= ~(1 << level); + tex->surface_state = SVGA_SURFACE_STATE_UPDATED; } - static inline boolean -svga_was_texture_rendered_to(const struct svga_texture *tex, - unsigned face, unsigned level) +svga_was_texture_rendered_to(const struct svga_texture *tex) { - check_face_level(tex, face, level); - return !!(tex->rendered_to[face] & (1 << level)); + return (tex->surface_state == SVGA_SURFACE_STATE_RENDERED); } static inline void diff --git a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c index fa0c02604..7adbee06d 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_sampler_view.c @@ -223,7 +223,9 @@ svga_destroy_sampler_view_priv(struct svga_sampler_view *v) if (v->handle != tex->handle) { struct svga_screen *ss = svga_screen(v->texture->screen); SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle); - svga_screen_surface_destroy(ss, &v->key, &v->handle); + svga_screen_surface_destroy(ss, &v->key, + svga_was_texture_rendered_to(tex), + &v->handle); } /* Note: we're not refcounting the texture resource here to avoid diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen.c b/lib/mesa/src/gallium/drivers/svga/svga_screen.c index 2537ac7a8..22cd21f7e 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_screen.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_screen.c @@ -74,6 +74,9 @@ static const struct debug_named_value svga_debug_flags[] = { { "streamout", DEBUG_STREAMOUT, NULL }, { "query", DEBUG_QUERY, NULL }, { "samplers", DEBUG_SAMPLERS, NULL }, + { "image", DEBUG_IMAGE, NULL }, + { "uav", DEBUG_UAV, NULL }, + { "retry", DEBUG_RETRY, NULL }, DEBUG_NAMED_VALUE_END }; #endif @@ -153,14 +156,22 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param) struct svga_winsys_screen *sws = svgascreen->sws; switch (param) { + case PIPE_CAPF_MIN_LINE_WIDTH: + case PIPE_CAPF_MIN_LINE_WIDTH_AA: + case PIPE_CAPF_MIN_POINT_SIZE: + case PIPE_CAPF_MIN_POINT_SIZE_AA: + return 1; + case PIPE_CAPF_POINT_SIZE_GRANULARITY: + case PIPE_CAPF_LINE_WIDTH_GRANULARITY: + return 0.1; case PIPE_CAPF_MAX_LINE_WIDTH: return svgascreen->maxLineWidth; case PIPE_CAPF_MAX_LINE_WIDTH_AA: return svgascreen->maxLineWidthAA; - case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_SIZE: FALLTHROUGH; - case PIPE_CAPF_MAX_POINT_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_SIZE_AA: return svgascreen->maxPointSize; case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: @@ -212,6 +223,9 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return sws->have_vgpu10; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return sws->have_vgpu10 ? 16 : 0; + case PIPE_CAP_TEXTURE_SWIZZLE: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -251,11 +265,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */ return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT: return 1; - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return sws->have_vgpu10; - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER: return !sws->have_vgpu10; case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: @@ -267,7 +281,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; /* expected for GL_ARB_framebuffer_object */ case PIPE_CAP_GLSL_FEATURE_LEVEL: - if (sws->have_sm5) { + case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: + if (sws->have_gl43) { + return 430; + } else if (sws->have_sm5) { return 410; } else if (sws->have_vgpu10) { return 330; @@ -275,10 +292,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 120; } - case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: - return sws->have_sm5 ? 410 : (sws->have_vgpu10 ? 330 : 120); - - case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_TEXTURE_TRANSFER_MODES: return 0; case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: @@ -290,7 +304,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_QUERY_TIMESTAMP: - case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VS_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_FAKE_SW_MSAA: @@ -360,12 +374,28 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_VERTEX_STREAMS: return sws->have_sm5 ? 4 : 0; case PIPE_CAP_COMPUTE: - return 0; + return sws->have_gl43; case PIPE_CAP_MAX_VARYINGS: - return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS : 10; + /* According to the spec, max varyings does not include the components + * for position, so remove one count from the max for position. + */ + return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS-1 : 10; case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: return sws->have_coherent; + case PIPE_CAP_START_INSTANCE: + return sws->have_sm5; + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: + return sws->have_sm5; + + case PIPE_CAP_SAMPLER_VIEW_TARGET: + return sws->have_gl43; + + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + return sws->have_gl43; + + case PIPE_CAP_CLIP_HALFZ: + return sws->have_gl43; case PIPE_CAP_SHAREABLE_SHADERS: return 0; @@ -374,6 +404,15 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: return 0; + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return sws->have_gl43 ? 16 : 0; + + case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: + case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS: + return sws->have_gl43 ? SVGA_MAX_SHADER_BUFFERS : 0; + case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS: + case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: + return sws->have_gl43 ? SVGA_MAX_ATOMIC_BUFFERS : 0; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return 64; case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: @@ -393,7 +432,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_VENDOR_ID: return 0x15ad; /* VMware Inc. */ case PIPE_CAP_DEVICE_ID: - return 0x0405; /* assume SVGA II */ + if (sws->device_id) { + return sws->device_id; + } else { + return 0x0405; /* assume SVGA II */ + } case PIPE_CAP_ACCELERATED: return 0; /* XXX: */ case PIPE_CAP_VIDEO_MEMORY: @@ -418,6 +461,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) /* Verify this once protocol is finalized. Setting it to minimum value. */ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: return sws->have_sm5 ? 30 : 0; + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + return 1; + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + return 1; + case PIPE_CAP_IMAGE_STORE_FORMATTED: + return sws->have_gl43; default: return u_pipe_screen_get_param_defaults(screen, param); } @@ -613,7 +662,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen, (shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL)) return 0; - if (shader == PIPE_SHADER_COMPUTE) + if ((!sws->have_gl43) && (shader == PIPE_SHADER_COMPUTE)) return 0; /* NOTE: we do not query the device for any caps/limits at this time */ @@ -631,13 +680,13 @@ vgpu10_get_shader_param(struct pipe_screen *screen, if (shader == PIPE_SHADER_FRAGMENT) return VGPU10_MAX_FS_INPUTS; else if (shader == PIPE_SHADER_GEOMETRY) - return VGPU10_MAX_GS_INPUTS; + return svgascreen->max_gs_inputs; else if (shader == PIPE_SHADER_TESS_CTRL) return VGPU11_MAX_HS_INPUT_CONTROL_POINTS; else if (shader == PIPE_SHADER_TESS_EVAL) return VGPU11_MAX_DS_INPUT_CONTROL_POINTS; else - return VGPU10_MAX_VS_INPUTS; + return svgascreen->max_vs_inputs; case PIPE_SHADER_CAP_MAX_OUTPUTS: if (shader == PIPE_SHADER_FRAGMENT) return VGPU10_MAX_FS_OUTPUTS; @@ -648,7 +697,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen, else if (shader == PIPE_SHADER_TESS_EVAL) return VGPU11_MAX_DS_OUTPUTS; else - return VGPU10_MAX_VS_OUTPUTS; + return svgascreen->max_vs_outputs; + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: @@ -673,23 +723,38 @@ vgpu10_get_shader_param(struct pipe_screen *screen, return FALSE; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: - return SVGA3D_DX_MAX_SAMPLERS; + return sws->have_gl43 ? PIPE_MAX_SAMPLERS : SVGA3D_DX_MAX_SAMPLERS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: - return 1 << PIPE_SHADER_IR_TGSI; + if (sws->have_gl43) + return 1 << PIPE_SHADER_IR_TGSI; + else + return 0; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: + /* For the above cases, we rely on the GLSL compiler to translate/lower + * the TGIS instruction into other instructions we do support. + */ + return 0; case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: - case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: - case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return sws->have_sm5; + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + return sws->have_gl43 ? SVGA_MAX_IMAGES : 0; + + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return sws->have_gl43 ? SVGA_MAX_SHADER_BUFFERS : 0; + + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: + return sws->have_gl43 ? SVGA_MAX_ATOMIC_BUFFERS : 0; + + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: case PIPE_SHADER_CAP_INT64_ATOMICS: - case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: - case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -716,6 +781,45 @@ svga_get_shader_param(struct pipe_screen *screen, enum pipe_shader_type shader, } +static int +svga_sm5_get_compute_param(struct pipe_screen *screen, + enum pipe_shader_ir ir_type, + enum pipe_compute_cap param, + void *ret) +{ + ASSERTED struct svga_screen *svgascreen = svga_screen(screen); + ASSERTED struct svga_winsys_screen *sws = svgascreen->sws; + uint64_t *iret = (uint64_t *)ret; + + assert(sws->have_gl43); + assert(ir_type == PIPE_SHADER_IR_TGSI); + + switch (param) { + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + iret[0] = 65535; + iret[1] = 65535; + iret[2] = 65535; + return 3 * sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + iret[0] = 1024; + iret[1] = 1024; + iret[2] = 64; + return 3 * sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + *iret = 1024; + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: + *iret = 32768; + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: + *iret = 0; + return sizeof(uint64_t); + default: + debug_printf("Unexpected compute param %u\n", param); + } + return 0; +} + static void svga_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, @@ -948,6 +1052,9 @@ svga_screen_create(struct svga_winsys_screen *sws) screen->fence_get_fd = svga_fence_get_fd; screen->get_driver_query_info = svga_get_driver_query_info; + + screen->get_compute_param = svga_sm5_get_compute_param; + svgascreen->sws = sws; svga_init_screen_resource_functions(svgascreen); @@ -965,7 +1072,29 @@ svga_screen_create(struct svga_winsys_screen *sws) goto error2; } + if (sws->have_gl43) { + svgascreen->forcedSampleCount = + get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FORCED_SAMPLE_COUNT, 0); + + sws->have_gl43 = sws->have_gl43 && (svgascreen->forcedSampleCount >= 4); + + /* Allow a temporary environment variable to enable/disable GL43 support. + */ + sws->have_gl43 = + debug_get_bool_option("SVGA_GL43", sws->have_gl43); + + svgascreen->debug.sampler_state_mapping = + debug_get_bool_option("SVGA_SAMPLER_STATE_MAPPING", FALSE); + } + else { + /* sampler state mapping code is only enabled with GL43 + * due to the limitation in SW Renderer. (VMware bug 2825014) + */ + svgascreen->debug.sampler_state_mapping = FALSE; + } + debug_printf("%s enabled\n", + sws->have_gl43 ? "SM5+" : sws->have_sm5 ? "SM5" : sws->have_sm4_1 ? "SM4_1" : sws->have_vgpu10 ? "VGPU10" : "VGPU9"); @@ -1041,10 +1170,15 @@ svga_screen_create(struct svga_winsys_screen *sws) } /* Maximum number of constant buffers */ - svgascreen->max_const_buffers = - get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1); - svgascreen->max_const_buffers = MIN2(svgascreen->max_const_buffers, - SVGA_MAX_CONST_BUFS); + if (sws->have_gl43) { + svgascreen->max_const_buffers = SVGA_MAX_CONST_BUFS; + } + else { + svgascreen->max_const_buffers = + get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1); + svgascreen->max_const_buffers = MIN2(svgascreen->max_const_buffers, + SVGA_MAX_CONST_BUFS); + } svgascreen->haveBlendLogicops = get_bool_cap(sws, SVGA3D_DEVCAP_LOGIC_BLENDOPS, FALSE); @@ -1052,6 +1186,18 @@ svga_screen_create(struct svga_winsys_screen *sws) screen->is_format_supported = svga_is_dx_format_supported; svgascreen->max_viewports = SVGA3D_DX_MAX_VIEWPORTS; + + /* Shader limits */ + if (sws->have_sm4_1) { + svgascreen->max_vs_inputs = VGPU10_1_MAX_VS_INPUTS; + svgascreen->max_vs_outputs = VGPU10_1_MAX_VS_OUTPUTS; + svgascreen->max_gs_inputs = VGPU10_1_MAX_GS_INPUTS; + } + else { + svgascreen->max_vs_inputs = VGPU10_MAX_VS_INPUTS; + svgascreen->max_vs_outputs = VGPU10_MAX_VS_OUTPUTS; + svgascreen->max_gs_inputs = VGPU10_MAX_GS_INPUTS; + } } else { /* VGPU9 */ @@ -1089,6 +1235,11 @@ svga_screen_create(struct svga_winsys_screen *sws) /* Only one viewport */ svgascreen->max_viewports = 1; + + /* Shader limits */ + svgascreen->max_vs_inputs = 16; + svgascreen->max_vs_outputs = 10; + svgascreen->max_gs_inputs = 0; } /* common VGPU9 / VGPU10 caps */ diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen.h b/lib/mesa/src/gallium/drivers/svga/svga_screen.h index aa0001b11..1d2db59a3 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_screen.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_screen.h @@ -58,14 +58,21 @@ struct svga_screen unsigned max_const_buffers; unsigned max_viewports; unsigned ms_samples; + unsigned forcedSampleCount; /* available with GL43 capable device only */ + unsigned max_vs_inputs; + unsigned max_vs_outputs; + unsigned max_gs_inputs; struct { - boolean force_level_surface_view; - boolean force_surface_view; - boolean no_surface_view; - boolean force_sampler_view; - boolean no_sampler_view; - boolean no_cache_index_buffers; + unsigned force_level_surface_view:1; + unsigned force_surface_view:1; + unsigned no_surface_view:1; + unsigned force_sampler_view:1; + unsigned no_sampler_view:1; + unsigned no_cache_index_buffers:1; + unsigned tessellation:1; + unsigned sampler_state_mapping:1; + unsigned pad:24; } debug; unsigned texture_timestamp; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c index aba6e304f..7765a15aa 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.c @@ -212,6 +212,7 @@ svga_screen_cache_shrink(struct svga_screen *svgascreen, static void svga_screen_cache_add(struct svga_screen *svgascreen, const struct svga_host_surface_cache_key *key, + boolean to_invalidate, struct svga_winsys_surface **p_handle) { struct svga_host_surface_cache *cache = &svgascreen->cache; @@ -293,8 +294,12 @@ svga_screen_cache_add(struct svga_screen *svgascreen, "cache sid %p\n", entry->handle); /* If we don't have gb objects, we don't need to invalidate. */ - if (sws->have_gb_objects) - list_add(&entry->head, &cache->validated); + if (sws->have_gb_objects) { + if (to_invalidate) + list_add(&entry->head, &cache->validated); + else + list_add(&entry->head, &cache->invalidated); + } else list_add(&entry->head, &cache->invalidated); @@ -603,6 +608,7 @@ svga_screen_surface_create(struct svga_screen *svgascreen, void svga_screen_surface_destroy(struct svga_screen *svgascreen, const struct svga_host_surface_cache_key *key, + boolean to_invalidate, struct svga_winsys_surface **p_handle) { struct svga_winsys_screen *sws = svgascreen->sws; @@ -612,7 +618,7 @@ svga_screen_surface_destroy(struct svga_screen *svgascreen, * that case. */ if (SVGA_SURFACE_CACHE_ENABLED && key->cachable) { - svga_screen_cache_add(svgascreen, key, p_handle); + svga_screen_cache_add(svgascreen, key, to_invalidate, p_handle); } else { SVGA_DBG(DEBUG_DMA, diff --git a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h index c2bfc076f..5793869f3 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_screen_cache.h @@ -148,12 +148,13 @@ svga_screen_cache_init(struct svga_screen *svgascreen); struct svga_winsys_surface * svga_screen_surface_create(struct svga_screen *svgascreen, unsigned bind_flags, enum pipe_resource_usage usage, - boolean *validated, + boolean *invalidated, struct svga_host_surface_cache_key *key); void svga_screen_surface_destroy(struct svga_screen *svgascreen, const struct svga_host_surface_cache_key *key, + boolean to_invalidate, struct svga_winsys_surface **handle); void diff --git a/lib/mesa/src/gallium/drivers/svga/svga_shader.c b/lib/mesa/src/gallium/drivers/svga/svga_shader.c index 3c48d6724..68883a713 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_shader.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_shader.c @@ -223,6 +223,16 @@ static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = { PIPE_SWIZZLE_NONE }; +static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = { + PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_0, + PIPE_SWIZZLE_1, + PIPE_SWIZZLE_NONE +}; + static VGPU10_RESOURCE_RETURN_TYPE vgpu10_return_type(enum pipe_format format) @@ -243,6 +253,17 @@ vgpu10_return_type(enum pipe_format format) /** + * A helper function to return TRUE if the specified format + * is a supported format for sample_c instruction. + */ +static bool +isValidSampleCFormat(enum pipe_format format) +{ + return util_format_is_depth_or_stencil(format); +} + + +/** * Initialize the shader-neutral fields of svga_compile_key from context * state. This is basically the texture-related state. */ @@ -253,15 +274,28 @@ svga_init_shader_key_common(const struct svga_context *svga, struct svga_compile_key *key) { unsigned i, idx = 0; + unsigned sampler_slots = 0; assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views)); /* In case the number of samplers and sampler_views doesn't match, - * loop over the lower of the two counts. + * loop over the upper of the two counts. */ key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type], svga->curr.num_samplers[shader_type]); + key->num_samplers = 0; + + /* Set sampler_state_mapping only if GL43 is supported and + * the number of samplers exceeds SVGA limit or the sampler state + * mapping env is set. + */ + boolean sampler_state_mapping = + svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]); + + key->sampler_state_mapping = + key->num_textures && sampler_state_mapping ? 1 : 0; + for (i = 0; i < key->num_textures; i++) { struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i]; const struct svga_sampler_state @@ -269,22 +303,21 @@ svga_init_shader_key_common(const struct svga_context *svga, if (view) { assert(view->texture); - assert(view->texture->target < (1 << 4)); /* texture_target:4 */ enum pipe_texture_target target = view->target; + assert(target < (1 << 4)); /* texture_target:4 */ key->tex[i].target = target; key->tex[i].sampler_return_type = vgpu10_return_type(view->format); key->tex[i].sampler_view = 1; - /* 1D/2D array textures with one slice and cube map array textures * with one cube are treated as non-arrays by the SVGA3D device. * Set the is_array flag only if we know that we have more than 1 * element. This will be used to select shader instruction/resource * types during shader translation. */ - switch (view->texture->target) { + switch (target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: key->tex[i].is_array = view->texture->array_size > 1; @@ -300,10 +333,12 @@ svga_init_shader_key_common(const struct svga_context *svga, key->tex[i].num_samples = view->texture->nr_samples; const enum pipe_swizzle *swizzle_tab; - if (view->texture->target == PIPE_BUFFER) { + if (target == PIPE_BUFFER) { SVGA3dSurfaceFormat svga_format; unsigned tf_flags; + assert(view->texture->target == PIPE_BUFFER); + /* Apply any special swizzle mask for the view format if needed */ svga_translate_texture_buffer_view_format(view->format, @@ -334,11 +369,24 @@ svga_init_shader_key_common(const struct svga_context *svga, view->texture->format == PIPE_FORMAT_DXT1_SRGB) swizzle_tab = set_alpha; + if (view->format == PIPE_FORMAT_X24S8_UINT || + view->format == PIPE_FORMAT_X32_S8X24_UINT) + swizzle_tab = set_YYYY; + /* Save the compare function as we need to handle * depth compare in the shader. */ key->tex[i].compare_mode = sampler->compare_mode; key->tex[i].compare_func = sampler->compare_func; + + /* Set the compare_in_shader bit if the view format + * is not a supported format for shadow compare. + * In this case, we'll do the comparison in the shader. + */ + if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) && + !isValidSampleCFormat(view->format)) { + key->tex[i].compare_in_shader = TRUE; + } } key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r]; @@ -364,6 +412,139 @@ svga_init_shader_key_common(const struct svga_context *svga, key->tex[i].texel_bias = TRUE; } } + + if (!sampler_state_mapping) { + /* Use the same index if sampler state mapping is not supported */ + key->tex[i].sampler_index = i; + key->num_samplers = i + 1; + } + else { + + /* The current samplers list can have redundant entries. + * In order to allow the number of bound samplers within the + * max limit supported by SVGA, we'll recreate the list with + * unique sampler state objects only. + */ + + /* Check to see if this sampler is already on the list. + * If so, set the sampler index of this sampler to the + * same sampler index. + */ + for (unsigned j = 0; j <= i; j++) { + if (svga->curr.sampler[shader_type][j] == sampler) { + + if (!(sampler_slots & (1 << j))) { + + /* if this sampler is not added to the new list yet, + * set its sampler index to the next sampler index, + * increment the sampler count, and mark this + * sampler as added to the list. + */ + + unsigned next_index = + MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1); + + key->tex[i].sampler_index = next_index; + key->num_samplers = next_index + 1; + + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* reserve one slot for the alternate sampler */ + key->num_samplers++; + } + + sampler_slots |= (1 << j); + } + else { + key->tex[i].sampler_index = key->tex[j].sampler_index; + } + break; + } + } + } + } + } + + if (svga_have_gl43(svga)) { + if (shader->info.images_declared || shader->info.hw_atomic_declared || + shader->info.shader_buffers_declared) { + + /* Save the uavSpliceIndex which is the index used for the first uav + * in the draw pipeline. For compute, uavSpliceIndex is always 0. + */ + if (shader_type != PIPE_SHADER_COMPUTE) + key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex; + + unsigned uav_splice_index = key->uav_splice_index; + + /* Also get the texture data type to be used in the uav declaration */ + const struct svga_image_view *cur_image_view = + &svga->curr.image_views[shader_type][0]; + + for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]); + i++, cur_image_view++) { + + struct pipe_resource *resource = cur_image_view->desc.resource; + + if (resource) { + key->images[i].return_type = + svga_get_texture_datatype(cur_image_view->desc.format); + + key->images[i].is_array = resource->array_size > 1; + + /* Save the image resource target in the shader key because + * for single layer image view, the resource target in the + * tgsi shader is changed to a different texture target. + */ + key->images[i].resource_target = resource->target; + if (resource->target == PIPE_TEXTURE_3D || + resource->target == PIPE_TEXTURE_1D_ARRAY || + resource->target == PIPE_TEXTURE_2D_ARRAY || + resource->target == PIPE_TEXTURE_CUBE || + resource->target == PIPE_TEXTURE_CUBE_ARRAY) { + key->images[i].is_single_layer = + cur_image_view->desc.u.tex.first_layer == + cur_image_view->desc.u.tex.last_layer; + } + + key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index; + } + else + key->images[i].uav_index = SVGA3D_INVALID_ID; + } + + const struct svga_shader_buffer *cur_sbuf = + &svga->curr.shader_buffers[shader_type][0]; + + for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]); + i++, cur_sbuf++) { + + if (cur_sbuf->resource) + key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index; + else + key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID; + } + + const struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0]; + + for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers); + i++, cur_buf++) { + + if (cur_buf->resource) + key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index; + else + key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID; + } + } + + /* Save info about which constant buffers are to be viewed + * as raw buffers in the shader key. + */ + if (shader->info.const_buffers_declared & + svga->state.raw_constbufs[shader_type]) { + key->raw_buffers = svga->state.raw_constbufs[shader_type]; + + /* beginning index for srv for raw buffers */ + key->srv_raw_buf_index = PIPE_MAX_SAMPLERS; } } @@ -605,6 +786,9 @@ svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type) case PIPE_SHADER_TESS_CTRL: variant = CALLOC(1, sizeof(struct svga_tcs_variant)); break; + case PIPE_SHADER_COMPUTE: + variant = CALLOC(1, sizeof(struct svga_cs_variant)); + break; default: return NULL; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_shader.h b/lib/mesa/src/gallium/drivers/svga/svga_shader.h index 472499c91..cd64dc61f 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_shader.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_shader.h @@ -115,20 +115,24 @@ struct svga_compile_key /* compute shader */ struct { unsigned grid_size[3]; + unsigned mem_size; } cs; /* any shader type */ int8_t generic_remap_table[MAX_GENERIC_VARYING]; unsigned num_textures:8; + unsigned num_samplers:8; unsigned num_unnormalized_coords:8; unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES; unsigned last_vertex_stage:1; unsigned clamp_vertex_color:1; + unsigned sampler_state_mapping:1; /* Set if use sampler state mapping */ unsigned sprite_origin_lower_left:1; uint16_t sprite_coord_enable; struct { unsigned compare_mode:1; unsigned compare_func:3; + unsigned compare_in_shader:1; unsigned unnormalized:1; unsigned texel_bias:1; unsigned width_height_idx:5; /**< texture unit */ @@ -141,10 +145,25 @@ struct svga_compile_key unsigned target:4; unsigned sampler_return_type:4; unsigned sampler_view:1; + unsigned sampler_index:5; } tex[PIPE_MAX_SAMPLERS]; - /* Note: svga_compile_keys_equal() depends on the variable-size - * tex[] array being at the end of this structure. - */ + + unsigned uav_splice_index:4; /* starting uav index */ + unsigned srv_raw_buf_index:8; /* start index for srv raw buffers */ + unsigned image_size_used:1; + + uint16_t raw_buffers; /* bitmask of raw buffers */ + + struct { + enum tgsi_return_type return_type; + enum pipe_texture_target resource_target; + unsigned is_array:1; + unsigned is_single_layer:1; + unsigned uav_index; + } images[PIPE_MAX_SHADER_IMAGES]; + + uint32_t shader_buf_uav_index[PIPE_MAX_SHADER_BUFFERS]; + uint32_t atomic_buf_uav_index[PIPE_MAX_HW_ATOMIC_BUFFERS]; }; /* A key for a variant of token string of a shader */ @@ -222,7 +241,8 @@ struct svga_fs_variant unsigned fs_shadow_compare_units; /** For FS-based polygon stipple */ - unsigned pstipple_sampler_unit; + unsigned pstipple_sampler_unit:8; + unsigned pstipple_sampler_state_index:8; }; @@ -360,6 +380,7 @@ struct svga_tes_shader struct svga_compute_shader { struct svga_shader base; + unsigned shared_mem_size; }; @@ -367,8 +388,7 @@ static inline boolean svga_compile_keys_equal(const struct svga_compile_key *a, const struct svga_compile_key *b) { - unsigned key_size = - (const char *) &a->tex[a->num_textures] - (const char *) a; + unsigned key_size = sizeof(*a); return memcmp(a, b, key_size) == 0; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state.c b/lib/mesa/src/gallium/drivers/svga/svga_state.c index 4f6af8052..698f53802 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state.c @@ -138,6 +138,39 @@ static const struct svga_tracked_state *hw_draw_state_sm5[] = }; +/** + * Atoms to update hardware state prior to emitting a draw packet + * for GL43 device which includes uav update. + */ +static const struct svga_tracked_state *hw_draw_state_gl43[] = +{ + &svga_need_tgsi_transform, + &svga_hw_uav, + &svga_need_rawbuf_srv, + &svga_hw_fs, + &svga_hw_gs, + &svga_hw_tes, + &svga_hw_tcs, + &svga_hw_vs, + &svga_hw_rss, + &svga_hw_sampler, + &svga_hw_sampler_bindings, + &svga_hw_clip_planes, + &svga_hw_vdecl, + &svga_hw_fs_constants, + &svga_hw_fs_constbufs, + &svga_hw_gs_constants, + &svga_hw_gs_constbufs, + &svga_hw_tes_constants, + &svga_hw_tes_constbufs, + &svga_hw_tcs_constants, + &svga_hw_tcs_constbufs, + &svga_hw_vs_constants, + &svga_hw_vs_constbufs, + NULL +}; + + static const struct svga_tracked_state *swtnl_draw_state[] = { &svga_update_swtnl_draw, @@ -309,7 +342,6 @@ svga_update_state_retry(struct svga_context *svga, unsigned max_level) } - #define EMIT_RS(_rs, _count, _name, _value) \ do { \ _rs[_count].state = _name; \ @@ -383,7 +415,45 @@ svga_init_tracked_state(struct svga_context *svga) { /* Set the hw_draw_state atom list to the one for the particular gpu version. */ - state_levels[2] = svga_have_sm5(svga) ? hw_draw_state_sm5 : - (svga_have_vgpu10(svga) ? hw_draw_state_vgpu10 : - hw_draw_state_vgpu9); + state_levels[2] = + svga_have_gl43(svga) ? hw_draw_state_gl43 : + (svga_have_sm5(svga) ? hw_draw_state_sm5 : + ((svga_have_vgpu10(svga) ? hw_draw_state_vgpu10 : + hw_draw_state_vgpu9))); +} + + +static const struct svga_tracked_state *compute_state[] = +{ + &svga_hw_cs_uav, + &svga_hw_cs_sampler, + &svga_hw_cs_sampler_bindings, + &svga_hw_cs, + &svga_hw_cs_constants, + &svga_hw_cs_constbufs, + NULL +}; + +/** + * Update compute state. + * If the first attempt fails, flush the command buffer and retry. + * \return true if success, false if second attempt fails. + */ +bool +svga_update_compute_state(struct svga_context *svga) +{ + enum pipe_error ret = PIPE_OK; + uint64_t compute_dirty = svga->dirty; + + if (compute_dirty) { + SVGA_RETRY_OOM(svga, ret, update_state(svga, compute_state, + &compute_dirty)); + + /* Set the dirty flag to the remaining dirty bits which are + * not processed in the compute pipeline. + */ + svga->dirty = compute_dirty; + } + + return ret == PIPE_OK; } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c b/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c index be1637d7a..1c5c20a58 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_constants.c @@ -25,6 +25,7 @@ **********************************************************/ #include "util/format/u_format.h" +#include "util/u_bitmask.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "pipe/p_defines.h" @@ -42,6 +43,44 @@ #include "svga_hw_reg.h" +static unsigned +svga_get_image_size_constant(const struct svga_context *svga, float **dest, + enum pipe_shader_type shader, + unsigned num_image_views, + const struct svga_image_view images[PIPE_SHADER_TYPES][SVGA3D_MAX_UAVIEWS]) +{ + uint32_t *dest_u = (uint32_t *) *dest; + + for (int i = 0; i < num_image_views; i++) { + if (images[shader][i].desc.resource) { + if (images[shader][i].desc.resource->target == PIPE_BUFFER) { + unsigned bytes_per_element = util_format_get_blocksize(images[shader][i].desc.format); + *dest_u++ = images[shader][i].desc.resource->width0 / bytes_per_element; + } + else + *dest_u++ = images[shader][i].desc.resource->width0; + + if (images[shader][i].desc.resource->target == PIPE_TEXTURE_1D_ARRAY) + *dest_u++ = images[shader][i].desc.resource->array_size; + else + *dest_u++ = images[shader][i].desc.resource->height0; + + if (images[shader][i].desc.resource->target == PIPE_TEXTURE_2D_ARRAY) + *dest_u++ = images[shader][i].desc.resource->array_size; + else if (images[shader][i].desc.resource->target == PIPE_TEXTURE_CUBE_ARRAY) + *dest_u++ = images[shader][i].desc.resource->array_size / 6; + else + *dest_u++ = images[shader][i].desc.resource->depth0; + *dest_u++ = 1; // Later this can be used for sample counts + } + else { + *dest_u += 4; + } + } + return num_image_views; +} + + /* * Don't try to send more than 4kb of successive constants. */ @@ -104,6 +143,14 @@ svga_get_extra_constants_common(const struct svga_context *svga, } } + /* image_size */ + if (variant->key.image_size_used) { + count += svga_get_image_size_constant(svga, &dest, shader, + svga->state.hw_draw.num_image_views[shader], + svga->state.hw_draw.image_views); + } + + return count; } @@ -572,6 +619,121 @@ emit_consts_vgpu9(struct svga_context *svga, enum pipe_shader_type shader) /** + * A helper function to destroy any pending unused srv. + */ +void +svga_destroy_rawbuf_srv(struct svga_context *svga) +{ + unsigned index = 0; + + while ((index = util_bitmask_get_next_index( + svga->sampler_view_to_free_id_bm, index)) + != UTIL_BITMASK_INVALID_INDEX) { + + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, + index)); + util_bitmask_clear(svga->sampler_view_id_bm, index); + util_bitmask_clear(svga->sampler_view_to_free_id_bm, index); + } +} + +/** + * A helper function to emit constant buffer as srv raw buffer. + */ +static enum pipe_error +emit_rawbuf(struct svga_context *svga, + unsigned slot, + enum pipe_shader_type shader, + unsigned buffer_offset, + unsigned buffer_size, + void *buffer) +{ + enum pipe_error ret = PIPE_OK; + struct svga_raw_buffer *rawbuf = &svga->state.hw_draw.rawbufs[shader][slot]; + struct svga_winsys_surface *buf_handle = NULL; + unsigned srvid = SVGA3D_INVALID_ID; + unsigned enabled_rawbufs = svga->state.hw_draw.enabled_rawbufs[shader]; + + SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITRAWBUFFER); + + if (buffer == NULL) { + if ((svga->state.hw_draw.enabled_rawbufs[shader] & (1 << slot)) == 0) { + goto done; + } + enabled_rawbufs &= ~(1 << slot); + } + else { + if ((rawbuf->buffer_offset != buffer_offset) || + (rawbuf->buffer_size != buffer_size) || + (rawbuf->buffer != buffer)) { + + /* Add the current srvid to the delete list */ + if (rawbuf->srvid != SVGA3D_INVALID_ID) { + util_bitmask_set(svga->sampler_view_to_free_id_bm, rawbuf->srvid); + rawbuf->srvid = SVGA3D_INVALID_ID; + } + + buf_handle = svga_buffer_handle(svga, buffer, + PIPE_BIND_SAMPLER_VIEW); + if (!buf_handle) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto done; + } + + /* Create a srv for the constant buffer */ + srvid = util_bitmask_add(svga->sampler_view_id_bm); + + SVGA3dShaderResourceViewDesc viewDesc; + viewDesc.bufferex.firstElement = buffer_offset / 4; + viewDesc.bufferex.numElements = buffer_size / 4; + viewDesc.bufferex.flags = SVGA3D_BUFFEREX_SRV_RAW; + + ret = SVGA3D_vgpu10_DefineShaderResourceView(svga->swc, + srvid, buf_handle, SVGA3D_R32_TYPELESS, + SVGA3D_RESOURCE_BUFFEREX, &viewDesc); + + if (ret != PIPE_OK) { + util_bitmask_clear(svga->sampler_view_id_bm, srvid); + goto done; + } + + /* Save the current raw buffer attributes in the slot */ + rawbuf->srvid = srvid; + rawbuf->buffer_size = buffer_size; + rawbuf->buffer = buffer; + rawbuf->handle = buf_handle; + + SVGA_STATS_COUNT_INC(svga_sws(svga), SVGA_STATS_COUNT_RAWBUFFERSRVIEW); + } + else { + /* Same buffer attributes in the slot. Can use the same SRV. */ + assert(rawbuf->srvid != SVGA3D_INVALID_ID); + srvid = rawbuf->srvid; + buf_handle = rawbuf->handle; + } + enabled_rawbufs |= (1 << slot); + } + + ret = SVGA3D_vgpu10_SetShaderResources(svga->swc, + svga_shader_type(shader), + slot + PIPE_MAX_SAMPLERS, + 1, + &srvid, + &buf_handle); + if (ret != PIPE_OK) { + goto done; + } + + /* Save the enabled rawbuf state */ + svga->state.hw_draw.enabled_rawbufs[shader] = enabled_rawbufs; + +done: + SVGA_STATS_TIME_POP(svga_sws(svga)); + return ret; +} + + +/** * A helper function to emit a constant buffer binding at the * specified slot for the specified shader type */ @@ -678,6 +840,9 @@ emit_constbuf(struct svga_context *svga, assert(new_buf_size % 16 == 0); + /* clamp the buf size before sending the command */ + new_buf_size = MIN2(new_buf_size, SVGA3D_DX_MAX_CONSTBUF_BINDING_SIZE); + const struct svga_screen *screen = svga_screen(svga->pipe.screen); const struct svga_winsys_screen *sws = screen->sws; @@ -850,11 +1015,42 @@ emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) assert(size % 16 == 0); - ret = emit_constbuf(svga, index, shader, offset, size, buffer, - 0, 0, NULL); - if (ret != PIPE_OK) - return ret; + /** + * If the buffer has been bound as an uav buffer, it will + * need to be bound as a shader resource raw buffer. + */ + if (svga->state.raw_constbufs[shader] & (1 << index)) { + ret = emit_rawbuf(svga, index, shader, offset, size, buffer); + if (ret != PIPE_OK) { + return ret; + } + + ret = emit_constbuf(svga, index, shader, 0, 0, NULL, + 0, 0, NULL); + if (ret != PIPE_OK) { + return ret; + } + /* Remove the rawbuf from the to-be-enabled constbuf list + * so the buffer will not be referenced again as constant buffer + * at resource validation time. + */ + enabled_constbufs &= ~(1 << index); + } + else { + if (svga->state.hw_draw.enabled_rawbufs[shader] & (1 << index)) { + ret = emit_rawbuf(svga, index, shader, offset, size, NULL); + if (ret != PIPE_OK) { + return ret; + } + } + + ret = emit_constbuf(svga, index, shader, offset, size, buffer, + 0, 0, NULL); + if (ret != PIPE_OK) { + return ret; + } + } svga->hud.num_const_buf_updates++; } @@ -909,7 +1105,8 @@ emit_fs_constbuf(struct svga_context *svga, uint64_t dirty) struct svga_tracked_state svga_hw_fs_constants = { "hw fs params", - (SVGA_NEW_FS_CONSTS | + (SVGA_NEW_IMAGE_VIEW | + SVGA_NEW_FS_CONSTS | SVGA_NEW_FS_VARIANT | SVGA_NEW_TEXTURE_CONSTS), emit_fs_consts @@ -972,6 +1169,7 @@ struct svga_tracked_state svga_hw_vs_constants = { "hw vs params", (SVGA_NEW_PRESCALE | + SVGA_NEW_IMAGE_VIEW | SVGA_NEW_VS_CONSTS | SVGA_NEW_VS_VARIANT | SVGA_NEW_TEXTURE_CONSTS), @@ -1040,6 +1238,7 @@ struct svga_tracked_state svga_hw_gs_constants = { "hw gs params", (SVGA_NEW_PRESCALE | + SVGA_NEW_IMAGE_VIEW | SVGA_NEW_GS_CONSTS | SVGA_NEW_RAST | SVGA_NEW_GS_VARIANT | @@ -1102,7 +1301,8 @@ emit_tcs_constbuf(struct svga_context *svga, uint64_t dirty) struct svga_tracked_state svga_hw_tcs_constants = { "hw tcs params", - (SVGA_NEW_TCS_CONSTS | + (SVGA_NEW_IMAGE_VIEW | + SVGA_NEW_TCS_CONSTS | SVGA_NEW_TCS_VARIANT), emit_tcs_consts }; @@ -1161,6 +1361,7 @@ struct svga_tracked_state svga_hw_tes_constants = { "hw tes params", (SVGA_NEW_PRESCALE | + SVGA_NEW_IMAGE_VIEW | SVGA_NEW_TES_CONSTS | SVGA_NEW_TES_VARIANT), emit_tes_consts @@ -1173,3 +1374,169 @@ struct svga_tracked_state svga_hw_tes_constbufs = SVGA_NEW_TES_CONST_BUFFER, emit_tes_constbuf }; + + +/** + * Emit constant buffer for compute shader + */ +static enum pipe_error +emit_cs_consts(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.cs; + enum pipe_error ret = PIPE_OK; + + assert(svga_have_sm5(svga)); + + /* SVGA_NEW_CS_VARIANT */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_CS_CONST_BUFFER */ + ret = emit_consts_vgpu10(svga, PIPE_SHADER_COMPUTE); + + return ret; +} + + +static enum pipe_error +emit_cs_constbuf(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.cs; + enum pipe_error ret = PIPE_OK; + + /* SVGA_NEW_CS_VARIANT + */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_CS_CONSTBUF + */ + assert(svga_have_vgpu10(svga)); + ret = emit_constbuf_vgpu10(svga, PIPE_SHADER_COMPUTE); + + return ret; +} + + +struct svga_tracked_state svga_hw_cs_constants = +{ + "hw cs params", + (SVGA_NEW_IMAGE_VIEW | + SVGA_NEW_CS_CONSTS | + SVGA_NEW_CS_VARIANT | + SVGA_NEW_TEXTURE_CONSTS), + emit_cs_consts +}; + + +struct svga_tracked_state svga_hw_cs_constbufs = +{ + "hw cs params", + SVGA_NEW_CS_CONST_BUFFER, + emit_cs_constbuf +}; + + +/** + * A helper function to update the rawbuf for constbuf mask + */ +static void +update_rawbuf_mask(struct svga_context *svga, enum pipe_shader_type shader) +{ + unsigned dirty_constbufs; + unsigned enabled_constbufs; + + enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; + dirty_constbufs = (svga->state.dirty_constbufs[shader]|enabled_constbufs) & ~1u; + + while (dirty_constbufs) { + unsigned index = u_bit_scan(&dirty_constbufs); + struct svga_buffer *sbuf = + svga_buffer(svga->curr.constbufs[shader][index].buffer); + + if (sbuf && sbuf->uav) { + svga->state.raw_constbufs[shader] |= (1 << index); + } else { + svga->state.raw_constbufs[shader] &= ~(1 << index); + } + } +} + + +/** + * update_rawbuf is called at hw state update time to determine + * if any of the bound constant buffers need to be bound as + * raw buffer srv. This function is called after uav state is + * updated and before shader variants are bound. + */ +static enum pipe_error +update_rawbuf(struct svga_context *svga, uint64 dirty) +{ + uint64_t rawbuf_dirtybit[] = { + SVGA_NEW_VS_RAW_BUFFER, /* PIPE_SHADER_VERTEX */ + SVGA_NEW_FS_RAW_BUFFER, /* PIPE_SHADER_FRAGMENT */ + SVGA_NEW_GS_RAW_BUFFER, /* PIPE_SHADER_GEOMETRY */ + SVGA_NEW_TCS_RAW_BUFFER, /* PIPE_SHADER_TESS_CTRL */ + SVGA_NEW_TES_RAW_BUFFER, /* PIPE_SHADER_TESS_EVAL */ + }; + + for (enum pipe_shader_type shader = PIPE_SHADER_VERTEX; + shader <= PIPE_SHADER_TESS_EVAL; shader++) { + unsigned rawbuf_mask = svga->state.raw_constbufs[shader]; + + update_rawbuf_mask(svga, shader); + + /* If the rawbuf state is different for the shader stage, + * send SVGA_NEW_XX_RAW_BUFFER to trigger a new shader + * variant that will use srv for ubo access. + */ + if (svga->state.raw_constbufs[shader] != rawbuf_mask) + svga->dirty |= rawbuf_dirtybit[shader]; + } + + return PIPE_OK; +} + + +struct svga_tracked_state svga_need_rawbuf_srv = +{ + "raw buffer srv", + (SVGA_NEW_IMAGE_VIEW | + SVGA_NEW_SHADER_BUFFER | + SVGA_NEW_CONST_BUFFER), + update_rawbuf +}; + + +/** + * update_cs_rawbuf is called at compute dispatch time to determine + * if any of the bound constant buffers need to be bound as + * raw buffer srv. This function is called after uav state is + * updated and before a compute shader variant is bound. + */ +static enum pipe_error +update_cs_rawbuf(struct svga_context *svga, uint64 dirty) +{ + unsigned rawbuf_mask = svga->state.raw_constbufs[PIPE_SHADER_COMPUTE]; + + update_rawbuf_mask(svga, PIPE_SHADER_COMPUTE); + + /* if the rawbuf state is different for the shader stage, + * send SVGA_NEW_RAW_BUFFER to trigger a new shader + * variant to use srv for ubo access. + */ + if (svga->state.raw_constbufs[PIPE_SHADER_COMPUTE] != rawbuf_mask) + svga->dirty |= SVGA_NEW_CS_RAW_BUFFER; + + return PIPE_OK; +} + + +struct svga_tracked_state svga_cs_need_rawbuf_srv = +{ + "raw buffer srv", + (SVGA_NEW_IMAGE_VIEW | + SVGA_NEW_SHADER_BUFFER | + SVGA_NEW_CONST_BUFFER), + update_cs_rawbuf +}; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c b/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c index dacf86c42..9c6997550 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -87,8 +87,7 @@ emit_fb_vgpu9(struct svga_context *svga) /* Set the rendered-to flag */ struct pipe_surface *s = curr->cbufs[i]; if (s) { - svga_set_texture_rendered_to(svga_texture(s->texture), - s->u.tex.first_layer, s->u.tex.level); + svga_set_texture_rendered_to(svga_texture(s->texture)); } } @@ -119,8 +118,7 @@ emit_fb_vgpu9(struct svga_context *svga) /* Set the rendered-to flag */ struct pipe_surface *s = curr->zsbuf; if (s) { - svga_set_texture_rendered_to(svga_texture(s->texture), - s->u.tex.first_layer, s->u.tex.level); + svga_set_texture_rendered_to(svga_texture(s->texture)); } } @@ -225,8 +223,7 @@ emit_fb_vgpu10(struct svga_context *svga) last_rtv = i; /* Set the rendered-to flag */ - svga_set_texture_rendered_to(svga_texture(s->texture), - s->u.tex.first_layer, s->u.tex.level); + svga_set_texture_rendered_to(svga_texture(s->texture)); } else { rtv[i] = NULL; @@ -247,8 +244,7 @@ emit_fb_vgpu10(struct svga_context *svga) } /* Set the rendered-to flag */ - svga_set_texture_rendered_to(svga_texture(s->texture), - s->u.tex.first_layer, s->u.tex.level); + svga_set_texture_rendered_to(svga_texture(s->texture)); } else { dsv = NULL; @@ -614,6 +610,10 @@ get_viewport_prescale(struct svga_context *svga, prescale->translate[2] -= 0.5f; } + /* Clamp depth range, making sure it's between 0 and 1 */ + range_min = CLAMP(range_min, 0.0f, 1.0f); + range_max = CLAMP(range_max, 0.0f, 1.0f); + if (prescale->enabled) { float H[2]; float J[2]; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c index 5f3df6a2b..a8411186f 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_fs.c @@ -369,6 +369,9 @@ make_fs_key(const struct svga_context *svga, key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; } + if (svga_have_gl43(svga)) + key->image_size_used = fs->base.info.opcode_count[TGSI_OPCODE_RESQ] ? 1 : 0; + return PIPE_OK; } @@ -498,7 +501,8 @@ struct svga_tracked_state svga_hw_fs = SVGA_NEW_SAMPLER | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_DEPTH_STENCIL_ALPHA | - SVGA_NEW_BLEND), + SVGA_NEW_BLEND | + SVGA_NEW_FS_RAW_BUFFER), emit_hw_fs }; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c index 670b757c4..250a74258 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_gs.c @@ -135,6 +135,9 @@ make_gs_key(struct svga_context *svga, struct svga_compile_key *key) /* Mark this as the last shader in the vertex processing stage */ key->last_vertex_stage = 1; + + if (svga_have_gl43(svga)) + key->image_size_used = gs->base.info.opcode_count[TGSI_OPCODE_RESQ] ? 1 : 0; } @@ -235,6 +238,7 @@ struct svga_tracked_state svga_hw_gs = SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER | SVGA_NEW_RAST | - SVGA_NEW_NEED_SWTNL), + SVGA_NEW_NEED_SWTNL | + SVGA_NEW_GS_RAW_BUFFER), emit_hw_gs }; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c b/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c index 5a52c25a4..4fbab1743 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -136,7 +136,7 @@ update_need_pipeline(struct svga_context *svga, uint64_t dirty) if (svga->state.sw.need_pipeline) { assert(reason); - pipe_debug_message(&svga->debug.callback, FALLBACK, + util_debug_message(&svga->debug.callback, FALLBACK, "Using semi-fallback for %s", reason); } diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c b/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c index 8df0f2eca..8cdf60c34 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_rss.c @@ -321,10 +321,9 @@ emit_rss_vgpu9(struct svga_context *svga, uint64_t dirty) static struct svga_rasterizer_state * get_no_cull_rasterizer_state(struct svga_context *svga) { - const struct svga_rasterizer_state *r = svga->curr.rast; - unsigned int aa_point = r->templ.point_smooth; + struct svga_rasterizer_state *r = svga->curr.rast; - if (!svga->rasterizer_no_cull[aa_point]) { + if (!r->no_cull_rasterizer) { struct pipe_rasterizer_state rast; memset(&rast, 0, sizeof(rast)); @@ -341,10 +340,10 @@ get_no_cull_rasterizer_state(struct svga_context *svga) rast.bottom_edge_rule = r->templ.bottom_edge_rule; rast.clip_halfz = r->templ.clip_halfz; - svga->rasterizer_no_cull[aa_point] = + r->no_cull_rasterizer = svga->pipe.create_rasterizer_state(&svga->pipe, &rast); } - return svga->rasterizer_no_cull[aa_point]; + return r->no_cull_rasterizer; } @@ -362,6 +361,29 @@ get_no_depth_stencil_test_state(struct svga_context *svga) } +/** + * A helper function to create an alternate svga rasterizer state object to use + * forcedSampleCount to support multisampled framebuffer without attachments. + */ +static SVGA3dRasterizerStateId +get_alt_rasterizer_state_id(struct svga_context *svga, + struct svga_rasterizer_state *rast, + unsigned samples) +{ + assert(samples <= SVGA_MAX_FRAMEBUFFER_DEFAULT_SAMPLES); + assert(samples >= 0); + + if (samples <= 1) + return rast->id; + + if (rast->altRastIds[samples] == SVGA3D_INVALID_ID) { + rast->altRastIds[samples] = svga_define_rasterizer_object(svga, rast, samples); + } + + return rast->altRastIds[samples]; +} + + static enum pipe_error emit_rss_vgpu10(struct svga_context *svga, uint64_t dirty) { @@ -457,8 +479,9 @@ emit_rss_vgpu10(struct svga_context *svga, uint64_t dirty) } } - if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST)) { - const struct svga_rasterizer_state *rast; + if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST | + SVGA_NEW_FRAME_BUFFER)) { + struct svga_rasterizer_state *rast = svga->curr.rast; if (svga->curr.reduced_prim == PIPE_PRIM_POINTS && svga->curr.gs && svga->curr.gs->wide_point) { @@ -468,16 +491,28 @@ emit_rss_vgpu10(struct svga_context *svga, uint64_t dirty) */ rast = get_no_cull_rasterizer_state(svga); } - else { - rast = svga->curr.rast; + + int rastId = rast->id; + + /* In the case of no-attachment framebuffer, the sample count will be + * specified in forcedSampleCount in the RasterizerState_v2 object. + */ + if ((svga->curr.framebuffer.nr_cbufs == 0) && + (svga->curr.framebuffer.zsbuf == NULL)) { + rastId = + get_alt_rasterizer_state_id(svga, rast, + svga->curr.framebuffer.samples); + + if (rastId == SVGA3D_INVALID_ID) + return PIPE_ERROR; } - if (svga->state.hw_draw.rasterizer_id != rast->id) { + if (svga->state.hw_draw.rasterizer_id != rastId) { /* Set/bind the rasterizer state object */ - ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rast->id); + ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rastId); if (ret != PIPE_OK) return ret; - svga->state.hw_draw.rasterizer_id = rast->id; + svga->state.hw_draw.rasterizer_id = rastId; } } svga->state.hw_draw.rasterizer_discard = FALSE; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c b/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c index bbfd889e9..b14a642ed 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_sampler.c @@ -136,6 +136,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, SVGA3dResourceType resourceDim; SVGA3dShaderResourceViewDesc viewDesc; enum pipe_format viewFormat = sv->base.format; + enum pipe_texture_target target = sv->base.target; /* vgpu10 cannot create a BGRX view for a BGRA resource, so force it to * create a BGRA view (and vice versa). @@ -149,8 +150,9 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, viewFormat = PIPE_FORMAT_B8G8R8X8_UNORM; } - if (texture->target == PIPE_BUFFER) { + if (target == PIPE_BUFFER) { unsigned pf_flags; + assert(texture->target == PIPE_BUFFER); svga_translate_texture_buffer_view_format(viewFormat, &format, &pf_flags); @@ -168,7 +170,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, assert(format != SVGA3D_FORMAT_INVALID); - if (texture->target == PIPE_BUFFER) { + if (target == PIPE_BUFFER) { unsigned elem_size = util_format_get_blocksize(sv->base.format); viewDesc.buffer.firstElement = sv->base.u.buf.offset / elem_size; @@ -188,11 +190,10 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, * hence we need to set arraySize to 1 explicitly. */ viewDesc.tex.arraySize = - (texture->target == PIPE_TEXTURE_3D || - texture->target == PIPE_BUFFER) ? 1 : + (target == PIPE_TEXTURE_3D || target == PIPE_BUFFER) ? 1 : (sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1); - switch (texture->target) { + switch (target) { case PIPE_BUFFER: resourceDim = SVGA3D_RESOURCE_BUFFER; break; @@ -397,55 +398,100 @@ update_samplers(struct svga_context *svga, uint64_t dirty ) for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) { const unsigned count = svga->curr.num_samplers[shader]; - SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS]; + SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS*2]; unsigned i; - unsigned nsamplers; + unsigned nsamplers = 0; + boolean sampler_state_mapping = + svga_use_sampler_state_mapping(svga, count); for (i = 0; i < count; i++) { bool fs_shadow = false; + const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; /* _NEW_FS */ if (shader == PIPE_SHADER_FRAGMENT) { struct svga_fs_variant *fs = svga_fs_variant(svga->state.hw_draw.fs); - /* If the fragment shader is doing the shadow comparison - * for this texture unit, don't enable shadow compare in - * the texture sampler state. - */ + if (fs && (fs->fs_shadow_compare_units & (1 << i))) { + + /* Use the alternate sampler state with the compare + * bit disabled when comparison is done in the shader and + * sampler state mapping is not enabled. + */ fs_shadow = true; } } - if (svga->curr.sampler[shader][i]) { - ids[i] = svga->curr.sampler[shader][i]->id[fs_shadow]; - assert(ids[i] != SVGA3D_INVALID_ID); + if (!sampler_state_mapping) { + if (sampler) { + SVGA3dSamplerId id = sampler->id[fs_shadow]; + assert(id != SVGA3D_INVALID_ID); + ids[i] = id; + } + else { + ids[i] = SVGA3D_INVALID_ID; + } + nsamplers++; } else { - ids[i] = SVGA3D_INVALID_ID; + if (sampler) { + SVGA3dSamplerId id = sampler->id[0]; + assert(id != SVGA3D_INVALID_ID); + + /* Check if the sampler id is already on the ids list */ + unsigned k; + for (k = 0; k < nsamplers; k++) { + if (ids[k] == id) + break; + } + + /* add the id to the list if it is not already on the list */ + if (k == nsamplers) { + ids[nsamplers++] = id; + + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* + * add the alternate sampler state as well as the shader + * might use this alternate sampler state which has comparison + * disabled when the comparison is done in the shader. + */ + ids[nsamplers++] = sampler->id[1]; + } + } + } } } - for (; i < svga->state.hw_draw.num_samplers[shader]; i++) { + for (i = nsamplers; i < svga->state.hw_draw.num_samplers[shader]; i++) { ids[i] = SVGA3D_INVALID_ID; } - nsamplers = MAX2(svga->state.hw_draw.num_samplers[shader], count); - if (nsamplers > 0) { - if (count != svga->state.hw_draw.num_samplers[shader] || + unsigned nsamplerIds = + MAX2(nsamplers, svga->state.hw_draw.num_samplers[shader]); + + if (nsamplerIds > 0) { + + if (nsamplers > SVGA3D_DX_MAX_SAMPLERS) { + debug_warn_once("Too many sampler states"); + nsamplers = SVGA3D_DX_MAX_SAMPLERS; + } + + if (nsamplers != svga->state.hw_draw.num_samplers[shader] || memcmp(ids, svga->state.hw_draw.samplers[shader], - count * sizeof(ids[0])) != 0) { + nsamplerIds * sizeof(ids[0])) != 0) { + /* HW state is really changing */ ret = SVGA3D_vgpu10_SetSamplers(svga->swc, - nsamplers, + nsamplerIds, 0, /* start */ svga_shader_type(shader), /* type */ ids); if (ret != PIPE_OK) return ret; memcpy(svga->state.hw_draw.samplers[shader], ids, - nsamplers * sizeof(ids[0])); - svga->state.hw_draw.num_samplers[shader] = count; + nsamplerIds * sizeof(ids[0])); + svga->state.hw_draw.num_samplers[shader] = nsamplers; } } } @@ -453,7 +499,7 @@ update_samplers(struct svga_context *svga, uint64_t dirty ) /* Handle polygon stipple sampler texture */ if (svga->curr.rast->templ.poly_stipple_enable) { const unsigned unit = - svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit; + svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_state_index; struct svga_sampler_state *sampler = svga->polygon_stipple.sampler; assert(sampler); @@ -475,6 +521,7 @@ update_samplers(struct svga_context *svga, uint64_t dirty ) svga->state.hw_draw.samplers[PIPE_SHADER_FRAGMENT][unit] = sampler->id[0]; } + svga->state.hw_draw.num_samplers[PIPE_SHADER_FRAGMENT]++; } return ret; @@ -488,3 +535,180 @@ struct svga_tracked_state svga_hw_sampler = { SVGA_NEW_STIPPLE), update_samplers }; + + +static enum pipe_error +update_cs_sampler_resources(struct svga_context *svga, uint64_t dirty) +{ + enum pipe_error ret = PIPE_OK; + enum pipe_shader_type shader = PIPE_SHADER_COMPUTE; + + assert(svga_have_sm5(svga)); + + SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS]; + struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; + unsigned count; + unsigned nviews; + unsigned i; + + count = svga->curr.num_sampler_views[shader]; + for (i = 0; i < count; i++) { + struct svga_pipe_sampler_view *sv = + svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]); + + if (sv) { + surfaces[i] = svga_resource_handle(sv->base.texture); + + ret = svga_validate_pipe_sampler_view(svga, sv); + if (ret != PIPE_OK) + return ret; + + assert(sv->id != SVGA3D_INVALID_ID); + ids[i] = sv->id; + sampler_views[i] = &sv->base; + } + else { + surfaces[i] = NULL; + ids[i] = SVGA3D_INVALID_ID; + sampler_views[i] = NULL; + } + } + + for (; i < svga->state.hw_draw.num_sampler_views[shader]; i++) { + ids[i] = SVGA3D_INVALID_ID; + surfaces[i] = NULL; + sampler_views[i] = NULL; + } + + /* Number of ShaderResources that need to be modified. This includes + * the one that need to be unbound. + */ + nviews = MAX2(svga->state.hw_draw.num_sampler_views[shader], count); + if (nviews > 0) { + if (count != svga->state.hw_draw.num_sampler_views[shader] || + memcmp(sampler_views, svga->state.hw_draw.sampler_views[shader], + count * sizeof(sampler_views[0])) != 0) { + SVGA3dShaderResourceViewId *pIds = ids; + struct svga_winsys_surface **pSurf = surfaces; + unsigned numSR = 0; + + /* Loop through the sampler view list to only emit the sampler views + * that are not already in the corresponding entries in the device's + * shader resource list. + */ + for (i = 0; i < nviews; i++) { + boolean emit; + + emit = sampler_views[i] == + svga->state.hw_draw.sampler_views[shader][i]; + + if (!emit && i == nviews - 1) { + /* Include the last sampler view in the next emit + * if it is different. + */ + emit = TRUE; + numSR++; + i++; + } + + if (emit) { + /* numSR can only be 0 if the first entry of the list + * is the same as the one in the device list. + * In this case, * there is nothing to send yet. + */ + if (numSR) { + ret = SVGA3D_vgpu10_SetShaderResources(svga->swc, + svga_shader_type(shader), + i - numSR, /* startView */ + numSR, + pIds, + pSurf); + + if (ret != PIPE_OK) + return ret; + } + pIds += (numSR + 1); + pSurf += (numSR + 1); + numSR = 0; + } + else + numSR++; + } + + /* Save referenced sampler views in the hw draw state. */ + svga->state.hw_draw.num_sampler_views[shader] = count; + for (i = 0; i < nviews; i++) { + pipe_sampler_view_reference( + &svga->state.hw_draw.sampler_views[shader][i], + sampler_views[i]); + } + } + } + return ret; +} + + +struct svga_tracked_state svga_hw_cs_sampler_bindings = { + "cs shader resources emit", + SVGA_NEW_TEXTURE_BINDING, + update_cs_sampler_resources +}; + +static enum pipe_error +update_cs_samplers(struct svga_context *svga, uint64_t dirty ) +{ + enum pipe_error ret = PIPE_OK; + enum pipe_shader_type shader = PIPE_SHADER_COMPUTE; + + assert(svga_have_sm5(svga)); + + const unsigned count = svga->curr.num_samplers[shader]; + SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS]; + unsigned i; + unsigned nsamplers; + + for (i = 0; i < count; i++) { + if (svga->curr.sampler[shader][i]) { + ids[i] = svga->curr.sampler[shader][i]->id[0]; + assert(ids[i] != SVGA3D_INVALID_ID); + } + else { + ids[i] = SVGA3D_INVALID_ID; + } + } + + for (; i < svga->state.hw_draw.num_samplers[shader]; i++) { + ids[i] = SVGA3D_INVALID_ID; + } + + nsamplers = MAX2(svga->state.hw_draw.num_samplers[shader], count); + if (nsamplers > 0) { + if (count != svga->state.hw_draw.num_samplers[shader] || + memcmp(ids, svga->state.hw_draw.samplers[shader], + count * sizeof(ids[0])) != 0) { + /* HW state is really changing */ + ret = SVGA3D_vgpu10_SetSamplers(svga->swc, + nsamplers, + 0, /* start */ + svga_shader_type(shader), /* type */ + ids); + if (ret != PIPE_OK) + return ret; + + memcpy(svga->state.hw_draw.samplers[shader], ids, + nsamplers * sizeof(ids[0])); + svga->state.hw_draw.num_samplers[shader] = count; + } + } + + return ret; +} + + +struct svga_tracked_state svga_hw_cs_sampler = { + "texture cs sampler emit", + (SVGA_NEW_CS | + SVGA_NEW_SAMPLER), + update_cs_samplers +}; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c b/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c index 492a929bd..de10da694 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_state_vs.c @@ -214,6 +214,9 @@ make_vs_key(struct svga_context *svga, struct svga_compile_key *key) */ key->last_vertex_stage = !(svga->curr.gs || svga->curr.tcs || svga->curr.tes); + + if (svga_have_gl43(svga)) + key->image_size_used = vs->base.info.opcode_count[TGSI_OPCODE_RESQ] ? 1 : 0; } @@ -434,6 +437,7 @@ struct svga_tracked_state svga_hw_vs = SVGA_NEW_RAST | SVGA_NEW_PRESCALE | SVGA_NEW_VELEMENT | - SVGA_NEW_NEED_SWTNL), + SVGA_NEW_NEED_SWTNL | + SVGA_NEW_VS_RAW_BUFFER), emit_hw_vs }; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_surface.c b/lib/mesa/src/gallium/drivers/svga/svga_surface.c index 09a6afb68..c80f0a4f9 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_surface.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_surface.c @@ -194,7 +194,7 @@ svga_texture_view_surface(struct svga_context *svga, { struct svga_screen *ss = svga_screen(svga->pipe.screen); struct svga_winsys_surface *handle = NULL; - boolean validated; + boolean invalidated; boolean needCopyResource; SVGA_DBG(DEBUG_PERF, @@ -241,7 +241,7 @@ svga_texture_view_surface(struct svga_context *svga, } else { SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n"); handle = svga_screen_surface_create(ss, bind_flags, PIPE_USAGE_DEFAULT, - &validated, key); + &invalidated, key); needCopyResource = TRUE; if (cacheable && !tex->backed_handle) { @@ -551,7 +551,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s) * associated resource. We will then use the cloned surface view for * render target. */ - for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) { + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) { if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) { SVGA_DBG(DEBUG_VIEWS, "same resource used in shaderResource and renderTarget 0x%x\n", @@ -582,7 +582,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s) SVGA3dRenderTargetViewDesc desc; struct svga_texture *stex = svga_texture(s->base.texture); - if (stex->validated == FALSE) { + if (stex->surface_state < SVGA_SURFACE_STATE_INVALIDATED) { assert(stex->handle); /* We are about to render into a surface that has not been validated. @@ -591,7 +591,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s) * content when the associated mob is first bound to the surface. */ SVGA_RETRY(svga, SVGA3D_InvalidateGBSurface(svga->swc, stex->handle)); - stex->validated = TRUE; + stex->surface_state = SVGA_SURFACE_STATE_INVALIDATED; } desc.tex.mipSlice = s->real_level; @@ -599,38 +599,48 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s) desc.tex.arraySize = s->base.u.tex.last_layer - s->base.u.tex.first_layer + 1; - s->view_id = util_bitmask_add(svga->surface_view_id_bm); - resType = svga_resource_type(s->base.texture->target); if (util_format_is_depth_or_stencil(s->base.format)) { - ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc, - s->view_id, - s->handle, - s->key.format, - resType, - &desc); - } - else { - SVGA3dSurfaceFormat view_format = s->key.format; - const struct svga_texture *stex = svga_texture(s->base.texture); - /* Can't create RGBA render target view of a RGBX surface so adjust - * the view format. We do something similar for texture samplers in - * svga_validate_pipe_sampler_view(). + /* Create depth stencil view only if the resource is created + * with depth stencil bind flag. */ - if (view_format == SVGA3D_B8G8R8A8_UNORM && - (stex->key.format == SVGA3D_B8G8R8X8_UNORM || - stex->key.format == SVGA3D_B8G8R8X8_TYPELESS)) { - view_format = SVGA3D_B8G8R8X8_UNORM; + if (stex->key.flags & SVGA3D_SURFACE_BIND_DEPTH_STENCIL) { + s->view_id = util_bitmask_add(svga->surface_view_id_bm); + ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc, + s->view_id, + s->handle, + s->key.format, + resType, + &desc); } + } + else { + /* Create render target view only if the resource is created + * with render target bind flag. + */ + if (stex->key.flags & SVGA3D_SURFACE_BIND_RENDER_TARGET) { + SVGA3dSurfaceFormat view_format = s->key.format; + + /* Can't create RGBA render target view of a RGBX surface so adjust + * the view format. We do something similar for texture samplers in + * svga_validate_pipe_sampler_view(). + */ + if (view_format == SVGA3D_B8G8R8A8_UNORM && + (stex->key.format == SVGA3D_B8G8R8X8_UNORM || + stex->key.format == SVGA3D_B8G8R8X8_TYPELESS)) { + view_format = SVGA3D_B8G8R8X8_UNORM; + } - ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc, - s->view_id, - s->handle, - view_format, - resType, - &desc); + s->view_id = util_bitmask_add(svga->surface_view_id_bm); + ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc, + s->view_id, + s->handle, + view_format, + resType, + &desc); + } } if (ret != PIPE_OK) { @@ -669,7 +679,9 @@ svga_surface_destroy(struct pipe_context *pipe, */ if (s->handle != t->handle && s->handle != t->backed_handle) { SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle); - svga_screen_surface_destroy(ss, &s->key, &s->handle); + svga_screen_surface_destroy(ss, &s->key, + svga_was_texture_rendered_to(t), + &s->handle); } if (s->view_id != SVGA3D_INVALID_ID) { diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c index e2d0865d9..d2115a263 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -1344,11 +1344,12 @@ emit_ssg(struct svga_shader_emitter *emit, /** - * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative). + * Translate/emit the conditional discard instruction (discard if + * any of X,Y,Z,W are negative). */ static boolean -emit_kill_if(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) +emit_cond_discard(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) { const struct tgsi_full_src_register *reg = &insn->Src[0]; struct src_register src0, srcIn; @@ -1375,7 +1376,7 @@ emit_kill_if(struct svga_shader_emitter *emit, src0 = src( temp ); } - /* Do the texkill by checking if any of the XYZW components are < 0. + /* Do the discard by checking if any of the XYZW components are < 0. * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x * only used XYZ. The MSDN documentation about this is incorrect. */ @@ -1387,12 +1388,12 @@ emit_kill_if(struct svga_shader_emitter *emit, /** - * Translate/emit unconditional kill instruction (usually found inside + * Translate/emit the unconditional discard instruction (usually found inside * an IF/ELSE/ENDIF block). */ static boolean -emit_kill(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) +emit_discard(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) { SVGA3dShaderDestToken temp; struct src_register one = get_one_immediate(emit); @@ -2044,6 +2045,73 @@ emit_mov(struct svga_shader_emitter *emit, /** + * Translate TGSI SQRT instruction + * if src1 == 0 + * mov dst, src1 + * else + * rsq temp, src1 + * rcp dst, temp + * endif + */ +static boolean +emit_sqrt(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct src_register src1 = translate_src_register(emit, &insn->Src[0]); + const struct src_register zero = get_zero_immediate(emit); + SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); + SVGA3dShaderDestToken temp = get_temp(emit); + SVGA3dShaderInstToken if_token = inst_token(SVGA3DOP_IFC); + boolean ret = TRUE; + + if_token.control = SVGA3DOPCOMP_EQ; + + if (!(emit_instruction(emit, if_token) && + emit_src(emit, src1) && + emit_src(emit, zero))) { + ret = FALSE; + goto cleanup; + } + + if (!submit_op1(emit, + inst_token(SVGA3DOP_MOV), + dst, src1)) { + ret = FALSE; + goto cleanup; + } + + if (!emit_instruction(emit, inst_token(SVGA3DOP_ELSE))) { + ret = FALSE; + goto cleanup; + } + + if (!submit_op1(emit, + inst_token(SVGA3DOP_RSQ), + temp, src1)) { + ret = FALSE; + goto cleanup; + } + + if (!submit_op1(emit, + inst_token(SVGA3DOP_RCP), + dst, src(temp))) { + ret = FALSE; + goto cleanup; + } + + if (!emit_instruction(emit, inst_token(SVGA3DOP_ENDIF))) { + ret = FALSE; + goto cleanup; + } + +cleanup: + release_temp(emit, temp); + + return ret; +} + + +/** * Translate/emit TGSI DDX, DDY instructions. */ static boolean @@ -2820,7 +2888,7 @@ svga_emit_instruction(struct svga_shader_emitter *emit, return emit_end( emit ); case TGSI_OPCODE_KILL_IF: - return emit_kill_if( emit, insn ); + return emit_cond_discard( emit, insn ); /* Selection opcodes. The underlying language is fairly * non-orthogonal about these. @@ -2902,7 +2970,7 @@ svga_emit_instruction(struct svga_shader_emitter *emit, return emit_brk( emit, insn ); case TGSI_OPCODE_KILL: - return emit_kill( emit, insn ); + return emit_discard( emit, insn ); case TGSI_OPCODE_DST: return emit_dst_insn( emit, insn ); @@ -2919,6 +2987,9 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_MOV: return emit_mov( emit, insn ); + case TGSI_OPCODE_SQRT: + return emit_sqrt( emit, insn ); + default: { SVGA3dShaderOpCodeType opcode = @@ -3517,7 +3588,8 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || - emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1) + emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SQRT] >= 1) return TRUE; return FALSE; diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index a9435a098..362bbf637 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 1998-2013 VMware, Inc. All rights reserved. + * Copyright 1998-2022 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -66,7 +66,6 @@ (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) #define MAX_TEMP_ARRAYS 64 /* Enough? */ - /** * Clipping is complicated. There's four different cases which we * handle during VS/GS shader translation: @@ -181,6 +180,18 @@ map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name) return tgsi_semantic_to_sgn_name[name]; } +enum reemit_mode { + REEMIT_FALSE = 0, + REEMIT_TRUE = 1, + REEMIT_IN_PROGRESS = 2 +}; + +struct svga_raw_buf_tmp { + bool indirect; + unsigned buffer_index:8; + unsigned element_index:8; + unsigned element_rel:8; +}; struct svga_shader_emitter_v10 { @@ -193,18 +204,21 @@ struct svga_shader_emitter_v10 struct svga_compile_key key; struct tgsi_shader_info info; unsigned unit; - unsigned version; /**< Either 40 or 41 at this time */ + unsigned version; /**< Either 40, 41, 50 or 51 at this time */ unsigned cur_tgsi_token; /**< current tgsi token position */ unsigned inst_start_token; boolean discard_instruction; /**< throw away current instruction? */ boolean reemit_instruction; /**< reemit current instruction */ + boolean reemit_tgsi_instruction; /**< reemit current tgsi instruction */ boolean skip_instruction; /**< skip current instruction */ + boolean use_sampler_state_mapping; /* use sampler state mapping */ + enum reemit_mode reemit_rawbuf_instruction; union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; double (*immediates_dbl)[2]; unsigned num_immediates; /**< Number of immediates emitted */ - unsigned common_immediate_pos[10]; /**< literals for common immediates */ + unsigned common_immediate_pos[20]; /**< literals for common immediates */ unsigned num_common_immediates; boolean immediates_emitted; @@ -235,12 +249,36 @@ struct svga_shader_emitter_v10 */ unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; + /* Raw constant buffers */ + unsigned raw_buf_srv_start_index; /* starting srv index for raw buffers */ + unsigned raw_bufs; /* raw buffers bitmask */ + unsigned raw_buf_tmp_index; /* starting temp index for raw buffers */ + unsigned raw_buf_cur_tmp_index; /* current temp index for raw buffers */ + struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */ + /* Samplers */ unsigned num_samplers; boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/ ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ + /* Images */ + unsigned num_images; + unsigned image_mask; + struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES]; + unsigned image_size_index; /* starting index to cbuf for image size */ + + /* Shader buffers */ + unsigned num_shader_bufs; + + /* HW atomic buffers */ + unsigned num_atomic_bufs; + unsigned atomic_bufs_mask; + unsigned max_atomic_counter_index; + VGPU10_OPCODE_TYPE cur_atomic_opcode; /* current atomic opcode */ + + boolean uav_declared; /* True if uav is declared */ + /* Index Range declaration */ struct { unsigned start_index; @@ -274,6 +312,11 @@ struct svga_shader_emitter_v10 unsigned have_prescale:1; } vposition; + /* Shader limits */ + unsigned max_vs_inputs; + unsigned max_vs_outputs; + unsigned max_gs_inputs; + /* For vertex shaders only */ struct { /* viewport constant */ @@ -299,6 +342,7 @@ struct svga_shader_emitter_v10 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ unsigned pstipple_sampler_unit; + unsigned pstipple_sampler_state_index; unsigned fragcoord_input_index; /**< real fragment position input reg */ unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ @@ -311,12 +355,11 @@ struct svga_shader_emitter_v10 /** TGSI index of sample mask input sys value */ unsigned sample_mask_in_sys_index; - /** Which texture units are doing shadow comparison in the FS code */ - unsigned shadow_compare_units; - /* layer */ unsigned layer_input_index; /**< TGSI index of layer */ unsigned layer_imm_index; /**< immediate for default layer 0 */ + + boolean forceEarlyDepthStencil; /**< true if Early Depth stencil test is enabled */ } fs; /* For geometry shaders only */ @@ -383,6 +426,19 @@ struct svga_shader_emitter_v10 } outer; } tes; + struct { + unsigned block_width; /* thread group size in x dimension */ + unsigned block_height; /* thread group size in y dimension */ + unsigned block_depth; /* thread group size in z dimension */ + unsigned thread_id_index; /* thread id tgsi index */ + unsigned block_id_index; /* block id tgsi index */ + bool shared_memory_declared; /* set if shared memory is declared */ + struct { + unsigned tgsi_index; /* grid size tgsi index */ + unsigned imm_index; /* grid size imm index */ + } grid_size; + } cs; + /* For vertex or geometry shaders */ enum clipping_mode clip_mode; unsigned clip_dist_out_index; /**< clip distance output register index */ @@ -410,6 +466,9 @@ struct svga_shader_emitter_v10 /* For all shaders: const reg index for texture buffer size */ unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; + /** Which texture units are doing shadow comparison in the shader code */ + unsigned shadow_compare_units; + /* VS/TCS/TES/GS/FS Linkage info */ struct shader_linkage linkage; struct tgsi_shader_info *prevShaderInfo; @@ -419,8 +478,8 @@ struct svga_shader_emitter_v10 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ - /* For pipe_debug_message */ - struct pipe_debug_callback svga_debug_callback; + /* For util_debug_message */ + struct util_debug_callback svga_debug_callback; /* current loop depth in shader */ unsigned current_loop_depth; @@ -435,6 +494,9 @@ static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit); static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit); static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit); static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit); +static void emit_image_declarations(struct svga_shader_emitter_v10 *emit); +static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit); +static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit); static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit); static boolean @@ -459,6 +521,11 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit, boolean addSignature, SVGA3dDXSignatureSemanticName sgnName); +static boolean +emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst); + static void create_temp_array(struct svga_shader_emitter_v10 *emit, unsigned arrayID, unsigned first, unsigned count, @@ -602,9 +669,9 @@ check_register_index(struct svga_shader_emitter_v10 *emit, case VGPU10_OPCODE_DCL_INPUT_PS_SGV: case VGPU10_OPCODE_DCL_INPUT_PS_SIV: if ((emit->unit == PIPE_SHADER_VERTEX && - index >= VGPU10_MAX_VS_INPUTS) || + index >= emit->max_vs_inputs) || (emit->unit == PIPE_SHADER_GEOMETRY && - index >= VGPU10_MAX_GS_INPUTS) || + index >= emit->max_gs_inputs) || (emit->unit == PIPE_SHADER_FRAGMENT && index >= VGPU10_MAX_FS_INPUTS) || (emit->unit == PIPE_SHADER_TESS_CTRL && @@ -625,7 +692,7 @@ check_register_index(struct svga_shader_emitter_v10 *emit, * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2 */ if ((emit->unit == PIPE_SHADER_VERTEX && - index >= VGPU10_MAX_VS_OUTPUTS) || + index >= emit->max_vs_outputs) || (emit->unit == PIPE_SHADER_GEOMETRY && index >= VGPU10_MAX_GS_OUTPUTS) || (emit->unit == PIPE_SHADER_FRAGMENT && @@ -655,6 +722,13 @@ check_register_index(struct svga_shader_emitter_v10 *emit, } break; case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT: + case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT: + case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT: + case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: /* nothing */ break; default: @@ -960,6 +1034,10 @@ translate_opcode(enum tgsi_opcode opcode) return VGPU10_OPCODE_EVAL_SAMPLE_INDEX; case TGSI_OPCODE_BARRIER: return VGPU10_OPCODE_SYNC; + case TGSI_OPCODE_DFMA: + return VGPU10_OPCODE_DFMA; + case TGSI_OPCODE_FMA: + return VGPU10_OPCODE_MAD; /* DX11.1 Opcodes */ case TGSI_OPCODE_DDIV: @@ -1474,7 +1552,7 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, { enum tgsi_file_type file = reg->Register.File; unsigned index = reg->Register.Index; - const boolean indirect = reg->Register.Indirect; + boolean indirect = reg->Register.Indirect; unsigned tempArrayId = get_temp_array_id(emit, file, index); boolean index2d = (reg->Register.Dimension || tempArrayId > 0 || @@ -1528,11 +1606,8 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, /* Emitted as vCoverage0.x */ /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32) * elements where s is the maximum number of color samples supported - * by the implementation. With current implementation, we should not - * have more than one element. So assert if Index != 0 + * by the implementation. */ - assert((!reg->Register.Indirect && reg->Register.Index == 0) || - reg->Register.Indirect); operand0.value = 0; operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK; operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; @@ -1749,12 +1824,93 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, } } } + else if (emit->unit == PIPE_SHADER_COMPUTE) { + if (file == TGSI_FILE_SYSTEM_VALUE) { + if (index == emit->cs.thread_id_index) { + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP; + index = 0; + } else if (index == emit->cs.block_id_index) { + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = swizzleX; + operand0.swizzleY = swizzleY; + operand0.swizzleZ = swizzleZ; + operand0.swizzleW = swizzleW; + emit_dword(emit, operand0.value); + return; + } else if (index == emit->cs.grid_size.tgsi_index) { + file = TGSI_FILE_IMMEDIATE; + index = emit->cs.grid_size.imm_index; + } + } + } if (file == TGSI_FILE_ADDRESS) { index = emit->address_reg_index[index]; file = TGSI_FILE_TEMPORARY; } + if (file == TGSI_FILE_CONSTANT) { + /** + * If this constant buffer is to be bound as srv raw buffer, + * then we have to load the constant to a temp first before + * it can be used as a source in the instruction. + * This is accomplished in two passes. The first pass is to + * identify if there is any constbuf to rawbuf translation. + * If there isn't, emit the instruction as usual. + * If there is, then we save the constant buffer reference info, + * and then instead of emitting the instruction at the end + * of the instruction, it will trigger a second pass of parsing + * this instruction. Before it starts the parsing, it will + * load the referenced raw buffer elements to temporaries. + * Then it will emit the instruction that replaces the + * constant buffer replaces with the corresponding temporaries. + */ + if (emit->raw_bufs & (1 << index2)) { + if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) { + unsigned tmpIdx = emit->raw_buf_cur_tmp_index; + + emit->raw_buf_tmp[tmpIdx].buffer_index = index2; + + /* Save whether the element index is indirect indexing */ + emit->raw_buf_tmp[tmpIdx].indirect = indirect; + + /* If it is indirect index, save the temporary + * address index, otherwise, save the immediate index. + */ + if (indirect) { + emit->raw_buf_tmp[tmpIdx].element_index = + emit->address_reg_index[reg->Indirect.Index]; + emit->raw_buf_tmp[tmpIdx].element_rel = + reg->Register.Index; + } + else { + emit->raw_buf_tmp[tmpIdx].element_index = index; + emit->raw_buf_tmp[tmpIdx].element_rel = 0; + } + + emit->raw_buf_cur_tmp_index++; + emit->reemit_rawbuf_instruction = REEMIT_TRUE; + emit->discard_instruction = TRUE; + emit->reemit_tgsi_instruction = TRUE; + } + else { + /* In the reemitting process, replace the constant buffer + * reference with temporary. + */ + file = TGSI_FILE_TEMPORARY; + index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index; + index2d = FALSE; + indirect = FALSE; + emit->raw_buf_cur_tmp_index++; + } + } + } + if (file == TGSI_FILE_TEMPORARY) { if (need_temp_reg_initialization(emit, index)) { emit->initialize_temp_index = index; @@ -1801,6 +1957,8 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, } } + check_register_index(emit, operand0.operandType, index); + /* Emit the operand tokens */ emit_dword(emit, operand0.value); if (operand0.extended) @@ -1834,6 +1992,7 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, emit_dword(emit, remap_temp_index(emit, file, index)); if (indirect) { + assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP); emit_indirect_register(emit, reg->Indirect.Index); } } @@ -1873,9 +2032,15 @@ emit_resource_register(struct svga_shader_emitter_v10 *emit, */ static void emit_sampler_register(struct svga_shader_emitter_v10 *emit, - unsigned sampler_number) + unsigned unit) { VGPU10OperandToken0 operand0; + unsigned sampler_number; + + sampler_number = emit->key.tex[unit].sampler_index; + + if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping) + sampler_number++; check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); @@ -2861,6 +3026,22 @@ emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, emit->tes.point_mode = prop->u[0].Data; break; + case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH: + emit->cs.block_width = prop->u[0].Data; + break; + + case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT: + emit->cs.block_height = prop->u[0].Data; + break; + + case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH: + emit->cs.block_depth = prop->u[0].Data; + break; + + case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL: + emit->fs.forceEarlyDepthStencil = TRUE; + break; + default: debug_printf("Unexpected TGSI property %s\n", tgsi_property_names[prop->Property.PropertyName]); @@ -2986,7 +3167,10 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit) } emit->common_immediate_pos[n++] = - alloc_immediate_int4(emit, 0, 1, 0, -1); + alloc_immediate_int4(emit, 0, 1, 2, -1); + + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 3, 4, 5, 6); if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 || emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) { @@ -3059,6 +3243,25 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit) } } + /** TODO: allocate immediates for all possible element byte offset? + */ + if (emit->raw_bufs) { + unsigned i; + for (i = 7; i < 12; i+=4) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3)); + } + } + + if (emit->info.indirect_files & + (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) { + unsigned i; + for (i = 7; i < 8; i+=4) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3)); + } + } + assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); emit->num_common_immediates = n; } @@ -3139,6 +3342,8 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) emit_dword(emit, opcode0.value); end_emit_instruction(emit); + alloc_common_immediates(emit); + /* Declare constant registers */ emit_constant_declaration(emit); @@ -3146,7 +3351,14 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) emit_sampler_declarations(emit); emit_resource_declarations(emit); - alloc_common_immediates(emit); + /* Declare images */ + emit_image_declarations(emit); + + /* Declare shader buffers */ + emit_shader_buf_declarations(emit); + + /* Declare atomic buffers */ + emit_atomic_buf_declarations(emit); int nVertices = emit->key.tcs.vertices_per_patch; emit->tcs.imm_index = @@ -3387,13 +3599,17 @@ emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, /* Usually this applies to TCS only. If shader is reading output of * patch constant in fork phase, we should reemit all instructions - * which are writting into ouput of patch constant in fork phase + * which are writting into output of patch constant in fork phase * to store results into temporaries. */ + assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction)); if (emit->reemit_instruction) { assert(emit->unit == PIPE_SHADER_TESS_CTRL); ret = emit_vgpu10_instruction(emit, inst_number, &parse->FullToken.FullInstruction); + } else if (emit->reemit_rawbuf_instruction) { + ret = emit_rawbuf_instruction(emit, inst_number, + &parse->FullToken.FullInstruction); } if (!ret) @@ -3405,6 +3621,25 @@ emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, /** + * Emit the thread group declaration for compute shader. + */ +static void +emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP; + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, emit->cs.block_width); + emit_dword(emit, emit->cs.block_height); + emit_dword(emit, emit->cs.block_depth); + end_emit_instruction(emit); +} + + +/** * Emit index range declaration. */ static boolean @@ -4557,6 +4792,41 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, map_tgsi_semantic_to_sgn_name(semantic_name)); } break; + case TGSI_SEMANTIC_THREAD_ID: + assert(emit->unit >= PIPE_SHADER_COMPUTE); + assert(emit->version >= 50); + emit->cs.thread_id_index = index; + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP, + VGPU10_OPERAND_INDEX_0D, + index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_BLOCK_ID: + assert(emit->unit >= PIPE_SHADER_COMPUTE); + assert(emit->version >= 50); + emit->cs.block_id_index = index; + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID, + VGPU10_OPERAND_INDEX_0D, + index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_GRID_SIZE: + assert(emit->unit == PIPE_SHADER_COMPUTE); + assert(emit->version >= 50); + emit->cs.grid_size.tgsi_index = index; + break; default: debug_printf("unexpected system value semantic index %u / %s\n", semantic_name, tgsi_semantic_names[semantic_name]); @@ -4623,6 +4893,7 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, " but [%u] is the limit.\n", num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + emit->register_overflow = TRUE; } /* The linker doesn't enforce the max UBO size so we clamp here */ emit->num_shader_consts[constbuf] = @@ -4669,6 +4940,43 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, } return TRUE; + case TGSI_FILE_IMAGE: + { + unsigned unit = decl->Range.First; + assert(decl->Range.First == decl->Range.Last); + assert(unit < PIPE_MAX_SHADER_IMAGES); + emit->image[unit] = decl->Image; + emit->image_mask |= 1 << unit; + emit->num_images++; + } + return TRUE; + + case TGSI_FILE_HW_ATOMIC: + /* Declare the atomic buffer if it is not already declared. */ + if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) { + emit->num_atomic_bufs++; + emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D); + } + + /* Remember the maximum atomic counter index encountered */ + emit->max_atomic_counter_index = + MAX2(emit->max_atomic_counter_index, decl->Range.Last); + return TRUE; + + case TGSI_FILE_MEMORY: + /* Record memory has been used. */ + if (emit->unit == PIPE_SHADER_COMPUTE && + decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) { + emit->cs.shared_memory_declared = TRUE; + } + + return TRUE; + + case TGSI_FILE_BUFFER: + assert(emit->version >= 50); + emit->num_shader_bufs++; + return TRUE; + default: assert(!"Unexpected type of declaration"); return FALSE; @@ -4676,7 +4984,6 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, } - /** * Emit input declarations for fragment shader. */ @@ -4928,27 +5235,6 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) } if (emit->tcs.control_point_phase) { - if (emit->tcs.control_point_input_index == INVALID_INDEX) { - - /* Add input control point declaration if it does not exist */ - if ((indicesMask & (1 << emit->linkage.position_index)) == 0) { - emit->linkage.input_map[emit->linkage.num_inputs] = - emit->linkage.position_index; - emit->tcs.control_point_input_index = emit->linkage.num_inputs++; - - emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, - VGPU10_OPERAND_TYPE_INPUT, - VGPU10_OPERAND_INDEX_2D, - emit->linkage.position_index, - emit->key.tcs.vertices_per_patch, - VGPU10_NAME_UNDEFINED, - VGPU10_OPERAND_4_COMPONENT, - VGPU10_OPERAND_4_COMPONENT_MASK_MODE, - VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, - SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); - } - } /* Also add an address register for the indirection to the * input control points @@ -5447,6 +5733,17 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) emit->tcs.invocation_id_tmp_index = total_temps++; } + if (emit->raw_bufs) { + /** + * Add 3 more temporaries if we need to translate constant buffer + * to srv raw buffer. Since we need to load the value to a temporary + * before it can be used as a source. There could be three source + * register in an instruction. + */ + emit->raw_buf_tmp_index = total_temps; + total_temps+=3; + } + for (i = 0; i < emit->num_address_regs; i++) { emit->address_reg_index[i] = total_temps++; } @@ -5618,8 +5915,20 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit) } } } + if (emit->key.image_size_used) { + emit->image_size_index = total_consts; + total_consts += emit->num_images; + } if (total_consts > 0) { + if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { + debug_printf("Warning: Too many constants [%u] declared in constant" + " buffer 0. %u is the limit.\n", + total_consts, + VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT; + emit->register_overflow = TRUE; + } begin_emit_instruction(emit); emit_dword(emit, opcode0.value); emit_dword(emit, operand0.value); @@ -5629,14 +5938,41 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit) } /* Declare remaining constant buffers (UBOs) */ + for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { if (emit->num_shader_consts[i] > 0) { - begin_emit_instruction(emit); - emit_dword(emit, opcode0.value); - emit_dword(emit, operand0.value); - emit_dword(emit, i); /* which const buffer slot */ - emit_dword(emit, emit->num_shader_consts[i]); - end_emit_instruction(emit); + if (emit->raw_bufs & (1 << i)) { + /* UBO declared as srv raw buffer */ + + VGPU10OpcodeToken0 opcode1; + VGPU10OperandToken0 operand1; + + opcode1.value = 0; + opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW; + opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN; + + operand1.value = 0; + operand1.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE; + operand1.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + begin_emit_instruction(emit); + emit_dword(emit, opcode1.value); + emit_dword(emit, operand1.value); + emit_dword(emit, i + emit->raw_buf_srv_start_index); + end_emit_instruction(emit); + } + else { + + /* UBO declared as const buffer */ + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, i); /* which const buffer slot */ + emit_dword(emit, emit->num_shader_consts[i]); + end_emit_instruction(emit); + } } } @@ -5652,7 +5988,8 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) { unsigned i; - for (i = 0; i < emit->num_samplers; i++) { + for (i = 0; i < emit->key.num_samplers; i++) { + VGPU10OpcodeToken0 opcode0; VGPU10OperandToken0 operand0; @@ -5683,7 +6020,8 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) static unsigned pipe_texture_to_resource_dimension(enum tgsi_texture_type target, unsigned num_samples, - boolean is_array) + boolean is_array, + boolean is_uav) { switch (target) { case PIPE_BUFFER: @@ -5710,8 +6048,9 @@ pipe_texture_to_resource_dimension(enum tgsi_texture_type target, else return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; case PIPE_TEXTURE_CUBE_ARRAY: - return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : - VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; + return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : + (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : + VGPU10_RESOURCE_DIMENSION_TEXTURECUBE); default: assert(!"Unexpected resource type"); return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; @@ -5725,7 +6064,8 @@ pipe_texture_to_resource_dimension(enum tgsi_texture_type target, static unsigned tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, unsigned num_samples, - boolean is_array) + boolean is_array, + boolean is_uav) { if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) { target = TGSI_TEXTURE_2D; @@ -5746,7 +6086,8 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_SHADOWCUBE: - return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; + return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : + VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; case TGSI_TEXTURE_SHADOW1D: return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; case TGSI_TEXTURE_SHADOW2D: @@ -5766,6 +6107,9 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; case TGSI_TEXTURE_CUBE_ARRAY: + return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : + (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : + VGPU10_RESOURCE_DIMENSION_TEXTURECUBE); case TGSI_TEXTURE_SHADOWCUBE_ARRAY: return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; @@ -5811,6 +6155,9 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) /* Emit resource decl for each sampler */ for (i = 0; i < emit->num_samplers; i++) { + if (!(emit->info.samplers_declared & (1 << i))) + continue; + VGPU10OpcodeToken0 opcode0; VGPU10OperandToken0 operand0; VGPU10ResourceReturnTypeToken return_type; @@ -5822,13 +6169,15 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) opcode0.resourceDimension = tgsi_texture_to_resource_dimension(emit->sampler_target[i], emit->key.tex[i].num_samples, - emit->key.tex[i].is_array); + emit->key.tex[i].is_array, + FALSE); } else { opcode0.resourceDimension = pipe_texture_to_resource_dimension(emit->key.tex[i].target, emit->key.tex[i].num_samples, - emit->key.tex[i].is_array); + emit->key.tex[i].is_array, + FALSE); } opcode0.sampleCount = emit->key.tex[i].num_samples; operand0.value = 0; @@ -5882,6 +6231,240 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) return TRUE; } + +/** + * Emit instruction to declare uav for the shader image + */ +static void +emit_image_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i = 0; + unsigned unit = 0; + unsigned uav_mask = 0; + + /* Emit uav decl for each image */ + for (i = 0; i < emit->num_images; i++, unit++) { + + /* Find the unit index of the next declared image. + */ + while (!(emit->image_mask & (1 << unit))) { + unit++; + } + + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10ResourceReturnTypeToken return_type; + + /* If the corresponding uav for the image is already declared, + * skip this image declaration. + */ + if (uav_mask & (1 << emit->key.images[unit].uav_index)) + continue; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED; + opcode0.uavResourceDimension = + tgsi_texture_to_resource_dimension(emit->image[unit].Resource, + 0, emit->key.images[unit].is_array, + TRUE); + + if (emit->key.images[unit].is_single_layer && + emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) { + opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D; + } + + /* Declare the uav as global coherent if the shader includes memory + * barrier instructions. + */ + opcode0.globallyCoherent = + (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_UAV; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + return_type.value = 0; + return_type.component0 = + return_type.component1 = + return_type.component2 = + return_type.component3 = emit->key.images[unit].return_type + 1; + + assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID); + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, emit->key.images[unit].uav_index); + emit_dword(emit, return_type.value); + end_emit_instruction(emit); + + /* Mark the uav is already declared */ + uav_mask |= 1 << emit->key.images[unit].uav_index; + } + + emit->uav_declared |= uav_mask; +} + + +/** + * Emit instruction to declare uav for the shader buffer + */ +static void +emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + unsigned uav_mask = 0; + + /* Emit uav decl for each shader buffer */ + for (i = 0; i < emit->num_shader_bufs; i++) { + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + + /* If the corresponding uav for the shader buf is already declared, + * skip this shader buffer declaration. + */ + if (uav_mask & (1 << emit->key.shader_buf_uav_index[i])) + continue; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW; + + /* Declare the uav as global coherent if the shader includes memory + * barrier instructions. + */ + opcode0.globallyCoherent = + (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_UAV; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID); + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, emit->key.shader_buf_uav_index[i]); + end_emit_instruction(emit); + + /* Mark the uav is already declared */ + uav_mask |= 1 << emit->key.shader_buf_uav_index[i]; + } + + emit->uav_declared |= uav_mask; +} + + +/** + * Emit instruction to declare thread group shared memory(tgsm) for shared memory + */ +static void +emit_memory_declarations(struct svga_shader_emitter_v10 *emit) +{ + if (emit->cs.shared_memory_declared) { + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW; + + /* Declare the uav as global coherent if the shader includes memory + * barrier instructions. + */ + opcode0.globallyCoherent = + (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + + /* In current state tracker, TGSI shader declares only one shared memory + * TODO: To fix TGSI shader in state tracker to get all shared memory + * declarations and then fix following indexing. For now, default index + * is 1 as per translated TGSI shader + */ + emit_dword(emit, 1); + emit_dword(emit, emit->key.cs.mem_size); /* byte Count */ + end_emit_instruction(emit); + } +} + + +/** + * Emit instruction to declare uav for atomic buffers + */ +static void +emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned atomic_bufs_mask = emit->atomic_bufs_mask; + unsigned uav_mask = 0; + + /* Emit uav decl for each atomic buffer */ + while (atomic_bufs_mask) { + unsigned buf_index = u_bit_scan(&atomic_bufs_mask); + unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index]; + + /* If the corresponding uav for the shader buf is already declared, + * skip this shader buffer declaration. + */ + if (uav_mask & (1 << uav_index)) + continue; + + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + + assert(uav_index != SVGA3D_INVALID_ID); + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW; + opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER; + + /* Declare the uav as global coherent if the shader includes memory + * barrier instructions. + */ + opcode0.globallyCoherent = + (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; + opcode0.uavHasCounter = 1; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_UAV; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, uav_index); + end_emit_instruction(emit); + + /* Mark the uav is already declared */ + uav_mask |= 1 << uav_index; + } + + emit->uav_declared |= uav_mask; + + /* Allocate immediates to be used for index to the atomic buffers */ + unsigned j = 0; + for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) { + alloc_immediate_int4(emit, j+0, j+1, j+2, j+3); + } + + /* Allocate immediates for the atomic counter index */ + for (; j <= emit->max_atomic_counter_index; j+=4) { + alloc_immediate_int4(emit, j+0, j+1, j+2, j+3); + } +} + + /** * Emit instruction with n=1, 2 or 3 source registers. */ @@ -6751,12 +7334,12 @@ emit_if(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of + * Emit code for conditional discard instruction (discard fragment if any of * the register components are negative). */ static boolean -emit_kill_if(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_cond_discard(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { unsigned tmp = get_temp_index(emit); struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); @@ -6774,8 +7357,9 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit, if (!same_swizzle_terms(&inst->Src[0])) { /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to - * logically OR the swizzle terms. Most uses of KILL_IF only - * test one channel so it's good to avoid these extra steps. + * logically OR the swizzle terms. Most uses of this conditional + * discard instruction only test one channel so it's good to + * avoid these extra steps. */ struct tgsi_full_src_register tmp_src_yyyy = scalar_src(&tmp_src, TGSI_SWIZZLE_Y); @@ -6804,11 +7388,11 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). + * Emit code for the unconditional discard instruction. */ static boolean -emit_kill(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_discard(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); @@ -7834,7 +8418,7 @@ begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, swz->inst_dst = &inst->Dst[0]; swz->coord_src = &inst->Src[0]; - emit->fs.shadow_compare_units |= shadow_compare << unit; + emit->shadow_compare_units |= shadow_compare << unit; } @@ -8045,12 +8629,16 @@ emit_tex(struct svga_shader_emitter_v10 *emit, struct tgsi_full_src_register coord; int offsets[3]; struct tex_swizzle_info swz_info; + boolean compare_in_shader; /* check that the sampler returns a float */ if (!is_valid_tex_instruction(emit, inst)) return TRUE; - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + compare_in_shader = tgsi_is_shadow_target(target) && + emit->key.tex[unit].compare_in_shader; + + begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); get_texel_offsets(emit, inst, offsets); @@ -8059,7 +8647,7 @@ emit_tex(struct svga_shader_emitter_v10 *emit, /* SAMPLE dst, coord(s0), resource, sampler */ begin_emit_instruction(emit); - if (tgsi_is_shadow_target(target)) + if (tgsi_is_shadow_target(target) && !compare_in_shader) opcode = VGPU10_OPCODE_SAMPLE_C; else opcode = VGPU10_OPCODE_SAMPLE; @@ -8182,7 +8770,8 @@ emit_tg4(struct svga_shader_emitter_v10 *emit, emit_resource_register(emit, unit); /* sampler */ - sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); + sampler = make_src_reg(TGSI_FILE_SAMPLER, + emit->key.tex[unit].sampler_index); sampler.Register.SwizzleX = sampler.Register.SwizzleY = sampler.Register.SwizzleZ = @@ -8222,7 +8811,8 @@ emit_tg4(struct svga_shader_emitter_v10 *emit, emit_resource_register(emit, unit); /* sampler */ - sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); + sampler = make_src_reg(TGSI_FILE_SAMPLER, + emit->key.tex[unit].sampler_index); sampler.Register.SwizzleX = sampler.Register.SwizzleY = sampler.Register.SwizzleZ = @@ -8263,12 +8853,20 @@ emit_tex2(struct svga_shader_emitter_v10 *emit, struct tgsi_full_src_register coord, ref; int offsets[3]; struct tex_swizzle_info swz_info; + VGPU10_OPCODE_TYPE opcode; + boolean compare_in_shader; /* check that the sampler returns a float */ if (!is_valid_tex_instruction(emit, inst)) return TRUE; - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + compare_in_shader = emit->key.tex[unit].compare_in_shader; + if (compare_in_shader) + opcode = VGPU10_OPCODE_SAMPLE; + else + opcode = VGPU10_OPCODE_SAMPLE_C; + + begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); get_texel_offsets(emit, inst, offsets); @@ -8277,13 +8875,15 @@ emit_tex2(struct svga_shader_emitter_v10 *emit, /* SAMPLE_C dst, coord, resource, sampler, ref */ begin_emit_instruction(emit); - emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C, + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); emit_src_register(emit, &coord); emit_resource_register(emit, unit); emit_sampler_register(emit, unit); - emit_tex_compare_refcoord(emit, target, &ref); + if (opcode == VGPU10_OPCODE_SAMPLE_C) { + emit_tex_compare_refcoord(emit, target, &ref); + } end_emit_instruction(emit); end_tex_swizzle(emit, &swz_info); @@ -8312,12 +8912,16 @@ emit_txp(struct svga_shader_emitter_v10 *emit, scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); struct tgsi_full_src_register coord; struct tex_swizzle_info swz_info; + boolean compare_in_shader; /* check that the sampler returns a float */ if (!is_valid_tex_instruction(emit, inst)) return TRUE; - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + compare_in_shader = tgsi_is_shadow_target(target) && + emit->key.tex[unit].compare_in_shader; + + begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); get_texel_offsets(emit, inst, offsets); @@ -8330,7 +8934,7 @@ emit_txp(struct svga_shader_emitter_v10 *emit, /* SAMPLE dst, coord(tmp), resource, sampler */ begin_emit_instruction(emit); - if (tgsi_is_shadow_target(target)) + if (tgsi_is_shadow_target(target) && !compare_in_shader) /* NOTE: for non-fragment shaders, we should use * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is. */ @@ -8604,6 +9208,7 @@ opcode_has_dbl_dst(unsigned opcode) case TGSI_OPCODE_DNEG: case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: + case TGSI_OPCODE_DFMA: // XXX more TBD return true; default: @@ -8629,6 +9234,7 @@ opcode_has_dbl_src(unsigned opcode) case TGSI_OPCODE_DNEG: case TGSI_OPCODE_D2I: case TGSI_OPCODE_D2U: + case TGSI_OPCODE_DFMA: // XXX more TBD return true; default: @@ -8676,6 +9282,7 @@ check_double_dst_writemask(const struct tgsi_full_instruction *inst) case TGSI_OPCODE_DRCP: case TGSI_OPCODE_DSQRT: case TGSI_OPCODE_F2D: + case TGSI_OPCODE_DFMA: assert(writemask == TGSI_WRITEMASK_XYZW || writemask == TGSI_WRITEMASK_XY || writemask == TGSI_WRITEMASK_ZW); @@ -9263,19 +9870,872 @@ emit_vmware(struct svga_shader_emitter_v10 *emit, return TRUE; } +/** + * Emit a memory register + */ + +typedef enum { + MEM_STORE = 0, + MEM_LOAD = 1, + MEM_ATOMIC_COUNTER +} memory_op; + +static void +emit_memory_register(struct svga_shader_emitter_v10 *emit, + memory_op mem_op, + const struct tgsi_full_instruction *inst, + unsigned regIndex, unsigned writemask) +{ + VGPU10OperandToken0 operand0; + unsigned resIndex = 0; + + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + switch (mem_op) { + case MEM_ATOMIC_COUNTER: + { + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + resIndex = inst->Src[regIndex].Register.Index; + break; + } + case MEM_STORE: + { + const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex]; + + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.mask = writemask; + resIndex = reg->Register.Index; + break; + } + case MEM_LOAD: + { + const struct tgsi_full_src_register *reg = &inst->Src[regIndex]; + + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = reg->Register.SwizzleX; + operand0.swizzleY = reg->Register.SwizzleY; + operand0.swizzleZ = reg->Register.SwizzleZ; + operand0.swizzleW = reg->Register.SwizzleW; + resIndex = reg->Register.Index; + break; + } + default: + assert(!"Unexpected memory opcode"); + break; + } + + emit_dword(emit, operand0.value); + emit_dword(emit, resIndex); +} + + +typedef enum { + UAV_STORE = 0, + UAV_LOAD = 1, + UAV_ATOMIC = 2, + UAV_RESQ = 3, +} UAV_OP; + /** - * Translate a single TGSI instruction to VGPU10. + * Emit a uav register + * \param uav_index index of resource register + * \param uav_op UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode + * \param resourceType resource file type + * \param writemask resource writemask + */ + +static void +emit_uav_register(struct svga_shader_emitter_v10 *emit, + unsigned res_index, UAV_OP uav_op, + enum tgsi_file_type resourceType, unsigned writemask) +{ + VGPU10OperandToken0 operand0; + unsigned uav_index = INVALID_INDEX; + + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_UAV; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + switch (resourceType) { + case TGSI_FILE_IMAGE: + uav_index = emit->key.images[res_index].uav_index; + break; + case TGSI_FILE_BUFFER: + uav_index = emit->key.shader_buf_uav_index[res_index]; + break; + case TGSI_FILE_HW_ATOMIC: + uav_index = emit->key.atomic_buf_uav_index[res_index]; + break; + default: + assert(0); + } + + switch (uav_op) { + case UAV_ATOMIC: + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + break; + + case UAV_STORE: + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.mask = writemask; + break; + + case UAV_LOAD: + case UAV_RESQ: + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = VGPU10_COMPONENT_X; + operand0.swizzleY = VGPU10_COMPONENT_Y; + operand0.swizzleZ = VGPU10_COMPONENT_Z; + operand0.swizzleW = VGPU10_COMPONENT_W; + break; + + default: + break; + } + + emit_dword(emit, operand0.value); + emit_dword(emit, uav_index); +} + + +/** + * A helper function to emit the uav address. + * For memory, buffer, and image resource, it is set to the specified address. + * For HW atomic counter, the address is the sum of the address offset and the + * offset into the HW atomic buffer as specified by the register index. + * It is also possible to specify the counter index as an indirect address. + * And in this case, the uav address will be the sum of the address offset and the + * counter index specified in the indirect address. + */ +static +struct tgsi_full_src_register +emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit, + enum tgsi_file_type resourceType, + unsigned resourceIndex, + unsigned resourceIndirect, + unsigned resourceIndirectIndex, + const struct tgsi_full_src_register *addr_reg) +{ + unsigned addr_tmp; + struct tgsi_full_dst_register addr_dst; + struct tgsi_full_src_register addr_src; + struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2); + + addr_tmp = get_temp_index(emit); + addr_dst = make_dst_temp_reg(addr_tmp); + addr_src = make_src_temp_reg(addr_tmp); + + /* specified address offset */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg); + + /* For HW atomic counter, we need to find the index to the + * HW atomic buffer. + */ + if (resourceType == TGSI_FILE_HW_ATOMIC) { + if (resourceIndirect) { + + /** + * uav addr offset = counter layout offset + + * counter indirect index address + address offset + */ + + /* counter layout offset */ + struct tgsi_full_src_register layout_offset; + layout_offset = + make_immediate_reg_int(emit, resourceIndex); + + /* counter layout offset + address offset */ + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst, + &addr_src, &layout_offset); + + /* counter indirect index address */ + unsigned indirect_addr = + emit->address_reg_index[resourceIndirectIndex]; + + struct tgsi_full_src_register indirect_addr_src = + make_src_temp_reg(indirect_addr); + + indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); + + /* counter layout offset + address offset + counter indirect address */ + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst, + &addr_src, &indirect_addr_src); + + } else { + struct tgsi_full_src_register index_src; + + index_src = make_immediate_reg_int(emit, resourceIndex); + + /* uav addr offset = counter index address + address offset */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst, + &addr_src, &index_src); + } + + /* HW atomic buffer is declared as raw buffer, so the buffer address is + * the byte offset, so we need to multiple the counter addr offset by 4. + */ + emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst, + &addr_src, &two); + } + else if (resourceType == TGSI_FILE_IMAGE) { + if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D) + && emit->key.images[resourceIndex].is_single_layer) { + + struct tgsi_full_dst_register addr_dst_z = + writemask_dst(&addr_dst, TGSI_WRITEMASK_Z); + struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); + + /* For non-layered 3D texture image view, we have to make sure the z + * component of the address offset is set to 0. + */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z, + &zero); + } + } + + return addr_src; +} + + + +/** + * A helper function to expand indirect indexing to uav resource + * by looping through the resource array, compare the indirect index and + * emit the instruction for each resource in the array. + */ +static void +loop_instruction(unsigned index, unsigned count, + struct tgsi_full_src_register *addr_index, + void (*fb)(struct svga_shader_emitter_v10 *, + const struct tgsi_full_instruction *, unsigned), + struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + if (count == 0) + return; + + if (index > 0) { + /* ELSE */ + emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); + } + + struct tgsi_full_src_register index_src = + make_immediate_reg_int(emit, index); + + unsigned tmp_index = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); + struct tgsi_full_src_register tmp_src_x = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index); + + /* IEQ tmp, addr_tmp_index, index */ + emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst, + addr_index, &index_src); + + /* IF tmp */ + emit_if(emit, &tmp_src_x); + + free_temp_indexes(emit); + + (*fb)(emit, inst, index); + + loop_instruction(index+1, count-1, addr_index, fb, emit, inst); + + /* ENDIF */ + emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); +} + + +/** + * A helper function to emit the load instruction. + */ +static void +emit_load_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + unsigned resourceIndex) +{ + VGPU10OpcodeToken0 token0; + struct tgsi_full_src_register addr_src; + enum tgsi_file_type resourceType = inst->Src[0].Register.File; + + /* Resolve the resource address for this resource first */ + addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex, + inst->Src[0].Register.Indirect, + inst->Src[0].Indirect.Index, + &inst->Src[1]); + + /* LOAD resource, address, src */ + begin_emit_instruction(emit); + + token0.value = 0; + + if (resourceType == TGSI_FILE_MEMORY || + resourceType == TGSI_FILE_BUFFER || + resourceType == TGSI_FILE_HW_ATOMIC) { + token0.opcodeType = VGPU10_OPCODE_LD_RAW; + addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); + } + else { + token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED; + } + + token0.saturate = inst->Instruction.Saturate, + emit_dword(emit, token0.value); + + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &addr_src); + + if (resourceType == TGSI_FILE_MEMORY) { + emit_memory_register(emit, MEM_LOAD, inst, 0, 0); + } else if (resourceType == TGSI_FILE_HW_ATOMIC) { + emit_uav_register(emit, inst->Src[0].Dimension.Index, + UAV_LOAD, inst->Src[0].Register.File, 0); + } else { + emit_uav_register(emit, resourceIndex, + UAV_LOAD, inst->Src[0].Register.File, 0); + } + + end_emit_instruction(emit); + + free_temp_indexes(emit); +} + + +/** + * Emit uav / memory load instruction */ static boolean -emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, - unsigned inst_number, - const struct tgsi_full_instruction *inst) +emit_load(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const enum tgsi_opcode opcode = inst->Instruction.Opcode; + enum tgsi_file_type resourceType = inst->Src[0].Register.File; + unsigned resourceIndex = inst->Src[0].Register.Index; - if (emit->skip_instruction) + /* If the resource register has indirect index, we will need + * to expand it since SM5 device does not support indirect indexing + * for uav. + */ + if (inst->Src[0].Register.Indirect && + (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { + + unsigned indirect_index = inst->Src[0].Indirect.Index; + unsigned num_resources = + resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : + emit->num_images; + + /* indirect index tmp register */ + unsigned indirect_addr = emit->address_reg_index[indirect_index]; + struct tgsi_full_src_register indirect_addr_src = + make_src_temp_reg(indirect_addr); + indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); + + /* Add offset to the indirect index */ + if (inst->Src[0].Register.Index != 0) { + struct tgsi_full_src_register offset = + make_immediate_reg_int(emit, inst->Src[0].Register.Index); + struct tgsi_full_dst_register indirect_addr_dst = + make_dst_temp_reg(indirect_addr); + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst, + &indirect_addr_src, &offset); + } + + /* Loop through the resource array to find which resource to use. + */ + loop_instruction(0, num_resources, &indirect_addr_src, + emit_load_instruction, emit, inst); + } + else { + emit_load_instruction(emit, inst, resourceIndex); + } + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * A helper function to emit a store instruction. + */ +static void +emit_store_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + unsigned resourceIndex) +{ + VGPU10OpcodeToken0 token0; + enum tgsi_file_type resourceType = inst->Dst[0].Register.File; + unsigned writemask = inst->Dst[0].Register.WriteMask; + struct tgsi_full_src_register addr_src; + + unsigned tmp_index = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); + struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index); + struct tgsi_full_dst_register tmp_dst; + + struct tgsi_full_src_register src = inst->Src[1]; + struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4); + + boolean needLoad = FALSE; + boolean needPerComponentStore = FALSE; + unsigned swizzles = 0; + + /* Resolve the resource address for this resource first */ + addr_src = emit_uav_addr_offset(emit, resourceType, + inst->Dst[0].Register.Index, + inst->Dst[0].Register.Indirect, + inst->Dst[0].Indirect.Index, + &inst->Src[0]); + + /* First check the writemask to see if it can be supported + * by the store instruction. + * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory, + * we can adjust the address offset, and do a per-component store. + * store_uav_typed only allows .xyzw. In this case, we need to + * do a load first, update the temporary and then issue the + * store. This does have a small risk that if different threads + * update different components of the same address, data might not be + * in sync. + */ + if (resourceType == TGSI_FILE_IMAGE) { + needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? FALSE : TRUE; + } + else if (resourceType == TGSI_FILE_BUFFER || + resourceType == TGSI_FILE_MEMORY) { + if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY || + writemask == TGSI_WRITEMASK_XYZ || + writemask == TGSI_WRITEMASK_XYZW)) { + needPerComponentStore = TRUE; + } + } + + if (needLoad) { + assert(resourceType == TGSI_FILE_IMAGE); + + /* LOAD resource, address, src */ + begin_emit_instruction(emit); + + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED; + token0.saturate = inst->Instruction.Saturate, + emit_dword(emit, token0.value); + + emit_dst_register(emit, &tmp_dst_xyzw); + emit_src_register(emit, &addr_src); + emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0); + + end_emit_instruction(emit); + + /* MOV tmp(writemask) src */ + tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]); + + /* Now set the writemask to xyzw for the store_uav_typed instruction */ + writemask = TGSI_WRITEMASK_XYZW; + } + else if (needPerComponentStore) { + /* Save the src swizzles */ + swizzles = src.Register.SwizzleX | + src.Register.SwizzleY << 2 | + src.Register.SwizzleZ << 4 | + src.Register.SwizzleW << 6; + } + + boolean storeDone = FALSE; + unsigned perComponentWritemask = writemask; + unsigned shift = 0; + struct tgsi_full_src_register shift_src; + + while (!storeDone) { + + if (needPerComponentStore) { + assert(perComponentWritemask); + while (!(perComponentWritemask & TGSI_WRITEMASK_X)) { + shift++; + perComponentWritemask >>= 1; + } + + /* First adjust the addr_src to the next component */ + if (shift != 0) { + struct tgsi_full_dst_register addr_dst = + make_dst_temp_reg(addr_src.Register.Index); + shift_src = make_immediate_reg_int(emit, shift); + emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four, + &shift_src, &addr_src); + + /* Adjust the src swizzle as well */ + swizzles >>= (shift * 2); + } + + /* Now the address offset is set to the next component, + * we can set the writemask to .x and make sure to set + * the src swizzle as well. + */ + src.Register.SwizzleX = swizzles & 0x3; + writemask = TGSI_WRITEMASK_X; + + /* Shift for the next component check */ + perComponentWritemask >>= 1; + shift = 1; + } + + /* STORE resource, address, src */ + begin_emit_instruction(emit); + + token0.value = 0; + token0.saturate = inst->Instruction.Saturate; + + if (resourceType == TGSI_FILE_MEMORY) { + token0.opcodeType = VGPU10_OPCODE_STORE_RAW; + addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); + emit_dword(emit, token0.value); + emit_memory_register(emit, MEM_STORE, inst, 0, writemask); + } + else if (resourceType == TGSI_FILE_BUFFER || + resourceType == TGSI_FILE_HW_ATOMIC) { + token0.opcodeType = VGPU10_OPCODE_STORE_RAW; + addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); + emit_dword(emit, token0.value); + emit_uav_register(emit, resourceIndex, UAV_STORE, + resourceType, writemask); + } + else { + token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED; + emit_dword(emit, token0.value); + emit_uav_register(emit, resourceIndex, UAV_STORE, + resourceType, writemask); + } + + emit_src_register(emit, &addr_src); + + if (needLoad) + emit_src_register(emit, &tmp_src); + else + emit_src_register(emit, &src); + + end_emit_instruction(emit); + + if (!needPerComponentStore || !perComponentWritemask) + storeDone = TRUE; + } + + free_temp_indexes(emit); +} + + +/** + * Emit uav / memory store instruction + */ +static boolean +emit_store(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + enum tgsi_file_type resourceType = inst->Dst[0].Register.File; + unsigned resourceIndex = inst->Dst[0].Register.Index; + + /* If the resource register has indirect index, we will need + * to expand it since SM5 device does not support indirect indexing + * for uav. + */ + if (inst->Dst[0].Register.Indirect && + (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { + + unsigned indirect_index = inst->Dst[0].Indirect.Index; + unsigned num_resources = + resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : + emit->num_images; + + /* Indirect index tmp register */ + unsigned indirect_addr = emit->address_reg_index[indirect_index]; + struct tgsi_full_src_register indirect_addr_src = + make_src_temp_reg(indirect_addr); + indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); + + /* Add offset to the indirect index */ + if (inst->Dst[0].Register.Index != 0) { + struct tgsi_full_src_register offset = + make_immediate_reg_int(emit, inst->Dst[0].Register.Index); + struct tgsi_full_dst_register indirect_addr_dst = + make_dst_temp_reg(indirect_addr); + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst, + &indirect_addr_src, &offset); + } + + /* Loop through the resource array to find which resource to use. + */ + loop_instruction(0, num_resources, &indirect_addr_src, + emit_store_instruction, emit, inst); + } + else { + emit_store_instruction(emit, inst, resourceIndex); + } + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * A helper function to emit an atomic instruction. + */ + +static void +emit_atomic_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + unsigned resourceIndex) +{ + VGPU10OpcodeToken0 token0; + enum tgsi_file_type resourceType = inst->Src[0].Register.File; + struct tgsi_full_src_register addr_src; + VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode; + + /* Resolve the resource address */ + addr_src = emit_uav_addr_offset(emit, resourceType, + inst->Src[0].Register.Index, + inst->Src[0].Register.Indirect, + inst->Src[0].Indirect.Index, + &inst->Src[1]); + + /* Emit the atomic operation */ + begin_emit_instruction(emit); + + token0.value = 0; + token0.opcodeType = opcode; + token0.saturate = inst->Instruction.Saturate, + emit_dword(emit, token0.value); + + emit_dst_register(emit, &inst->Dst[0]); + + if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { + emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0); + } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) { + assert(inst->Src[0].Register.Dimension == 1); + emit_uav_register(emit, inst->Src[0].Dimension.Index, + UAV_ATOMIC, inst->Src[0].Register.File, 0); + } else { + emit_uav_register(emit, resourceIndex, + UAV_ATOMIC, inst->Src[0].Register.File, 0); + } + + /* resource address offset */ + emit_src_register(emit, &addr_src); + + struct tgsi_full_src_register src0_x = + swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + emit_src_register(emit, &src0_x); + + if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) { + struct tgsi_full_src_register src1_x = + swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + emit_src_register(emit, &src1_x); + } + + end_emit_instruction(emit); + + free_temp_indexes(emit); +} + + +/** + * Emit atomic instruction + */ +static boolean +emit_atomic(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + VGPU10_OPCODE_TYPE opcode) +{ + enum tgsi_file_type resourceType = inst->Src[0].Register.File; + unsigned resourceIndex = inst->Src[0].Register.Index; + + emit->cur_atomic_opcode = opcode; + + /* If the resource register has indirect index, we will need + * to expand it since SM5 device does not support indirect indexing + * for uav. + */ + if (inst->Dst[0].Register.Indirect && + (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { + + unsigned indirect_index = inst->Dst[0].Indirect.Index; + unsigned num_resources = + resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : + emit->num_images; + + /* indirect index tmp register */ + unsigned indirect_addr = emit->address_reg_index[indirect_index]; + struct tgsi_full_src_register indirect_addr_src = + make_src_temp_reg(indirect_addr); + indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); + + /* Loop through the resource array to find which resource to use. + */ + loop_instruction(0, num_resources, &indirect_addr_src, + emit_atomic_instruction, emit, inst); + } + else { + emit_atomic_instruction(emit, inst, resourceIndex); + } + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit barrier instruction + */ +static boolean +emit_barrier(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + VGPU10OpcodeToken0 token0; + + assert(emit->version >= 50); + + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_SYNC; + + if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) { + /* SM5 device doesn't support BARRIER in tcs . If barrier is used + * in shader, don't do anything for this opcode and continue rest + * of shader translation + */ + util_debug_message(&emit->svga_debug_callback, INFO, + "barrier instruction is not supported in tessellation control shader\n"); return TRUE; + } + else if (emit->unit == PIPE_SHADER_COMPUTE) { + if (emit->cs.shared_memory_declared) + token0.syncThreadGroupShared = 1; + + if (emit->uav_declared) + token0.syncUAVMemoryGroup = 1; + + token0.syncThreadsInGroup = 1; + } else { + token0.syncUAVMemoryGlobal = 1; + } + + assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup || + token0.syncThreadGroupShared); + + begin_emit_instruction(emit); + emit_dword(emit, token0.value); + end_emit_instruction(emit); + + return TRUE; +} + +/** + * Emit memory barrier instruction + */ +static boolean +emit_memory_barrier(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned index = inst->Src[0].Register.Index; + unsigned swizzle = inst->Src[0].Register.SwizzleX; + unsigned bartype = emit->immediates[index][swizzle].Int; + VGPU10OpcodeToken0 token0; + + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_SYNC; + + if (emit->unit == PIPE_SHADER_COMPUTE) { + + /* For compute shader, issue sync opcode with different options + * depending on the memory barrier type. + * + * Bit 0: Shader storage buffers + * Bit 1: Atomic buffers + * Bit 2: Images + * Bit 3: Shared memory + * Bit 4: Thread group + */ + + if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE)) + token0.syncUAVMemoryGlobal = 1; + else if (bartype & TGSI_MEMBAR_THREAD_GROUP) + token0.syncUAVMemoryGroup = 1; + + if (bartype & TGSI_MEMBAR_SHARED) + token0.syncThreadGroupShared = 1; + } + else { + /** + * For graphics stages, only sync_uglobal is available. + */ + if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE)) + token0.syncUAVMemoryGlobal = 1; + } + + assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup || + token0.syncThreadGroupShared); + + begin_emit_instruction(emit); + emit_dword(emit, token0.value); + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_RESQ (image size) instruction. + */ +static boolean +emit_resq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_full_src_register zero = + make_immediate_reg_int(emit, 0); + + unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource; + + if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) { + struct tgsi_full_src_register image_src; + + image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src); + return TRUE; + } + + begin_emit_instruction(emit); + if (uav_resource == TGSI_TEXTURE_BUFFER) { + emit_opcode(emit, VGPU10_OPCODE_BUFINFO, FALSE); + emit_dst_register(emit, &inst->Dst[0]); + } + else { + emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &zero); + } + emit_uav_register(emit, inst->Src[0].Register.Index, + UAV_RESQ, inst->Src[0].Register.File, 0); + end_emit_instruction(emit); + + return TRUE; +} + + +static boolean +emit_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst) +{ + const enum tgsi_opcode opcode = inst->Instruction.Opcode; switch (opcode) { case TGSI_OPCODE_ADD: @@ -9397,9 +10857,9 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_IF: return emit_if(emit, &inst->Src[0]); case TGSI_OPCODE_KILL: - return emit_kill(emit, inst); + return emit_discard(emit, inst); case TGSI_OPCODE_KILL_IF: - return emit_kill_if(emit, inst); + return emit_cond_discard(emit, inst); case TGSI_OPCODE_LG2: return emit_lg2(emit, inst); case TGSI_OPCODE_LIT: @@ -9496,12 +10956,14 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return emit_simple(emit, inst); case TGSI_OPCODE_INTERP_OFFSET: return emit_interp_offset(emit, inst); + case TGSI_OPCODE_FMA: + case TGSI_OPCODE_DFMA: + return emit_simple(emit, inst); /* The following opcodes should never be seen here. We return zero * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED, * FMA_SUPPORTED, LDEXP_SUPPORTED queries. */ - case TGSI_OPCODE_FMA: case TGSI_OPCODE_LDEXP: case TGSI_OPCODE_DSSG: case TGSI_OPCODE_DFRACEXP: @@ -9515,31 +10977,49 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return FALSE; case TGSI_OPCODE_LOAD: + return emit_load(emit, inst); + case TGSI_OPCODE_STORE: + return emit_store(emit, inst); + case TGSI_OPCODE_ATOMAND: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND); + case TGSI_OPCODE_ATOMCAS: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH); + case TGSI_OPCODE_ATOMIMAX: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX); + case TGSI_OPCODE_ATOMIMIN: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN); + case TGSI_OPCODE_ATOMOR: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR); + case TGSI_OPCODE_ATOMUADD: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD); + case TGSI_OPCODE_ATOMUMAX: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX); + case TGSI_OPCODE_ATOMUMIN: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN); + case TGSI_OPCODE_ATOMXCHG: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH); + case TGSI_OPCODE_ATOMXOR: - return FALSE; + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR); + case TGSI_OPCODE_BARRIER: - if (emit->unit == PIPE_SHADER_TESS_CTRL) { - /* SM5 device doesn't support BARRIER in tcs . If barrier is used - * in shader, don't do anything for this opcode and continue rest - * of shader translation - */ - pipe_debug_message(&emit->svga_debug_callback, INFO, - "barrier instruction is not supported in tessellation control shader\n"); - return TRUE; - } - else { - return emit_simple(emit, inst); - } + return emit_barrier(emit, inst); + + case TGSI_OPCODE_MEMBAR: + return emit_memory_barrier(emit, inst); + + case TGSI_OPCODE_RESQ: + return emit_resq(emit, inst); case TGSI_OPCODE_END: if (!emit_post_helpers(emit)) @@ -9557,6 +11037,38 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, /** + * Translate a single TGSI instruction to VGPU10. + */ +static boolean +emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst) +{ + if (emit->skip_instruction) + return TRUE; + + boolean ret = TRUE; + unsigned start_token = emit_get_num_tokens(emit); + + emit->reemit_tgsi_instruction = FALSE; + + ret = emit_instruction(emit, inst_number, inst); + + if (emit->reemit_tgsi_instruction) { + /** + * Reset emit->ptr to where the translation of this tgsi instruction + * started. + */ + VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; + emit->ptr = (char *) (tokens + start_token); + + emit->reemit_tgsi_instruction = FALSE; + } + return ret; +} + + +/** * Emit the extra instructions to adjust the vertex position. * There are two possible adjustments: * 1. Converting from Gallium to VGPU10 coordinate space by applying the @@ -10232,50 +11744,61 @@ emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit) { assert(emit->unit == PIPE_SHADER_TESS_CTRL); assert(emit->tcs.control_point_phase); - assert(emit->tcs.control_point_input_index != INVALID_INDEX); assert(emit->tcs.control_point_out_index != INVALID_INDEX); assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX); - /* UARL ADDR[INDEX].x INVOCATION.xxxx */ + struct tgsi_full_dst_register output_control_point; + output_control_point = + make_dst_output_reg(emit->tcs.control_point_out_index); - struct tgsi_full_src_register invocation_src; - struct tgsi_full_dst_register addr_dst; - struct tgsi_full_dst_register addr_dst_x; - unsigned addr_tmp; + if (emit->tcs.control_point_input_index == INVALID_INDEX) { + /* MOV OUTPUT 0.0f */ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); + emit_dst_register(emit, &output_control_point); + emit_src_register(emit, &zero); + end_emit_instruction(emit); + } + else { + /* UARL ADDR[INDEX].x INVOCATION.xxxx */ - addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index]; - addr_dst = make_dst_temp_reg(addr_tmp); - addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register invocation_src; + struct tgsi_full_dst_register addr_dst; + struct tgsi_full_dst_register addr_dst_x; + unsigned addr_tmp; - invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE, - emit->tcs.invocation_id_sys_index); + addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index]; + addr_dst = make_dst_temp_reg(addr_tmp); + addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X); - begin_emit_instruction(emit); - emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); - emit_dst_register(emit, &addr_dst_x); - emit_src_register(emit, &invocation_src); - end_emit_instruction(emit); + invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE, + emit->tcs.invocation_id_sys_index); + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); + emit_dst_register(emit, &addr_dst_x); + emit_src_register(emit, &invocation_src); + end_emit_instruction(emit); - /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */ - struct tgsi_full_src_register input_control_point; - struct tgsi_full_dst_register output_control_point; + /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */ - input_control_point = make_src_reg(TGSI_FILE_INPUT, - emit->tcs.control_point_input_index); - input_control_point.Register.Dimension = 1; - input_control_point.Dimension.Indirect = 1; - input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS; - input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index; - output_control_point = - make_dst_output_reg(emit->tcs.control_point_out_index); + struct tgsi_full_src_register input_control_point; + input_control_point = make_src_reg(TGSI_FILE_INPUT, + emit->tcs.control_point_input_index); + input_control_point.Register.Dimension = 1; + input_control_point.Dimension.Indirect = 1; + input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS; + input_control_point.DimIndirect.Index = + emit->tcs.control_point_addr_index; - begin_emit_instruction(emit); - emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); - emit_dst_register(emit, &output_control_point); - emit_src_register(emit, &input_control_point); - end_emit_instruction(emit); + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); + emit_dst_register(emit, &output_control_point); + emit_src_register(emit, &input_control_point); + end_emit_instruction(emit); + } } /** @@ -10404,6 +11927,9 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) else if (emit->unit == PIPE_SHADER_TESS_EVAL) { emit_domain_shader_declarations(emit); } + else if (emit->unit == PIPE_SHADER_COMPUTE) { + emit_compute_shader_declarations(emit); + } /* Declare inputs */ if (!emit_input_declarations(emit)) @@ -10420,6 +11946,9 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) * will already be declared in hs_decls (emit_hull_shader_declarations) */ if (emit->unit != PIPE_SHADER_TESS_CTRL) { + + alloc_common_immediates(emit); + /* Declare constant registers */ emit_constant_declaration(emit); @@ -10427,13 +11956,18 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) emit_sampler_declarations(emit); emit_resource_declarations(emit); - alloc_common_immediates(emit); - /* Now, emit the constant block containing all the immediates - * declared by shader, as well as the extra ones seen above. - */ + /* Declare images */ + emit_image_declarations(emit); + + /* Declare shader buffers */ + emit_shader_buf_declarations(emit); + + /* Declare atomic buffers */ + emit_atomic_buf_declarations(emit); } - if (emit->unit != PIPE_SHADER_FRAGMENT) { + if (emit->unit != PIPE_SHADER_FRAGMENT && + emit->unit != PIPE_SHADER_COMPUTE) { /* * Declare clip distance output registers for ClipVertex or * user defined planes @@ -10441,6 +11975,18 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) emit_clip_distance_declarations(emit); } + if (emit->unit == PIPE_SHADER_COMPUTE) { + emit_memory_declarations(emit); + + if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) { + emit->cs.grid_size.imm_index = + alloc_immediate_int4(emit, + emit->key.cs.grid_size[0], + emit->key.cs.grid_size[1], + emit->key.cs.grid_size[2], 0); + } + } + if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { float alpha = emit->key.fs.alpha_ref; @@ -10536,7 +12082,9 @@ static void emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, unsigned fs_color_tmp_index) { - /* compare output color's alpha to alpha ref and kill */ + /* compare output color's alpha to alpha ref and discard if comparison + * fails. + */ unsigned tmp = get_temp_index(emit); struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); struct tgsi_full_src_register tmp_src_x = @@ -10671,6 +12219,93 @@ emit_post_helpers(struct svga_shader_emitter_v10 *emit) /** + * Reemit rawbuf instruction + */ +static boolean +emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst) +{ + boolean ret; + + /* For all the rawbuf references in this instruction, + * load the rawbuf reference and assign to the designated temporary. + * Then reeemit the instruction. + */ + emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS; + + unsigned offset_tmp = get_temp_index(emit); + struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp); + struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp); + struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4); + + for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) { + struct tgsi_full_src_register element_src; + + /* First get the element index register. */ + + if (emit->raw_buf_tmp[i].indirect) { + unsigned tmp = get_temp_index(emit); + struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register element_index = + make_src_temp_reg(emit->raw_buf_tmp[i].element_index); + struct tgsi_full_src_register element_rel = + make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel); + + element_src = make_src_temp_reg(tmp); + element_src = scalar_src(&element_src, TGSI_SWIZZLE_X); + element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X); + + /* element index from the indirect register */ + element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index); + element_index = scalar_src(&element_index, TGSI_SWIZZLE_X); + + /* IADD element_src element_index element_index_relative */ + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst, + &element_index, &element_rel); + } + else { + element_src = + make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_index); + } + + /* byte offset = element index << 4 */ + emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst, + &element_src, &four); + + struct tgsi_full_dst_register dst_tmp = + make_dst_temp_reg(i + emit->raw_buf_tmp_index); + + /* LD_RAW tmp, rawbuf byte offset, rawbuf */ + + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_LD_RAW, FALSE); + emit_dst_register(emit, &dst_tmp); + + struct tgsi_full_src_register offset_x = + scalar_src(&offset_src, TGSI_SWIZZLE_X); + emit_src_register(emit, &offset_x); + + emit_resource_register(emit, + emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index); + end_emit_instruction(emit); + } + + emit->raw_buf_cur_tmp_index = 0; + + ret = emit_vgpu10_instruction(emit, inst_number, inst); + + /* reset raw buf state */ + emit->raw_buf_cur_tmp_index = 0; + emit->reemit_rawbuf_instruction = REEMIT_FALSE; + + free_temp_indexes(emit); + + return ret; +} + + +/** * Translate the TGSI tokens into VGPU10 tokens. */ static boolean @@ -10730,6 +12365,10 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, ret = emit_vgpu10_instruction(emit, inst_number - 1, &parse.FullToken.FullInstruction); } + else if (emit->reemit_rawbuf_instruction) { + ret = emit_rawbuf_instruction(emit, inst_number - 1, + &parse.FullToken.FullInstruction); + } if (!ret) goto done; @@ -10765,9 +12404,13 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) VGPU10ProgramToken ptoken; /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ + + /* Maximum supported shader version is 50 */ + unsigned version = MIN2(emit->version, 50); + ptoken.value = 0; /* init whole token to zero */ - ptoken.majorVersion = emit->version / 10; - ptoken.minorVersion = emit->version % 10; + ptoken.majorVersion = version / 10; + ptoken.minorVersion = version % 10; ptoken.programType = translate_shader_type(emit->unit); if (!emit_dword(emit, ptoken.value)) return FALSE; @@ -10840,6 +12483,15 @@ emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) ptoken->refactoringAllowed = 1; } + if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) { + /* Replace the reserved token with the forceEarlyDepthStencil global flag */ + VGPU10OpcodeToken0 *ptoken; + + ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token]; + ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; + ptoken->forceEarlyDepthStencil = 1; + } + return TRUE; } @@ -10884,6 +12536,9 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, emit->fs.pstipple_sampler_unit = unit; + /* The new sampler state is appended to the end of the samplers list */ + emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++; + /* Setup texture state for stipple */ emit->sampler_target[unit] = TGSI_TEXTURE_2D; emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; @@ -10891,6 +12546,7 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; emit->key.tex[unit].target = PIPE_TEXTURE_2D; + emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index; if (0) { debug_printf("After pstipple ------------------\n"); @@ -11020,6 +12676,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, const struct svga_compile_key *key, enum pipe_shader_type unit) { + struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); struct svga_shader_variant *variant = NULL; struct svga_shader_emitter_v10 *emit; const struct tgsi_token *tokens = shader->tokens; @@ -11045,7 +12702,9 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, goto done; emit->unit = unit; - if (svga_have_sm5(svga)) { + if (svga_have_gl43(svga)) { + emit->version = 51; + } else if (svga_have_sm5(svga)) { emit->version = 50; } else if (svga_have_sm4_1(svga)) { emit->version = 41; @@ -11053,6 +12712,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->version = 40; } + emit->use_sampler_state_mapping = emit->key.sampler_state_mapping; + emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0; emit->key = *key; @@ -11098,7 +12759,6 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->tcs.control_point_tmp_index = INVALID_INDEX; emit->tcs.control_point_out_count = 0; emit->tcs.inner.out_index = INVALID_INDEX; - emit->tcs.inner.out_index = INVALID_INDEX; emit->tcs.inner.temp_index = INVALID_INDEX; emit->tcs.inner.tgsi_index = INVALID_INDEX; emit->tcs.outer.out_index = INVALID_INDEX; @@ -11118,6 +12778,14 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->tes.outer.tgsi_index = INVALID_INDEX; emit->tes.prim_id_index = INVALID_INDEX; + emit->cs.thread_id_index = INVALID_INDEX; + emit->cs.block_id_index = INVALID_INDEX; + emit->cs.grid_size.tgsi_index = INVALID_INDEX; + emit->cs.grid_size.imm_index = INVALID_INDEX; + emit->cs.block_width = 1; + emit->cs.block_height = 1; + emit->cs.block_depth = 1; + emit->clip_dist_out_index = INVALID_INDEX; emit->clip_dist_tmp_index = INVALID_INDEX; emit->clip_dist_so_index = INVALID_INDEX; @@ -11135,6 +12803,11 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->current_loop_depth = 0; emit->initialize_temp_index = INVALID_INDEX; + emit->image_size_index = INVALID_INDEX; + + emit->max_vs_inputs = svgascreen->max_vs_inputs; + emit->max_vs_outputs = svgascreen->max_vs_outputs; + emit->max_gs_inputs = svgascreen->max_gs_inputs; if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; @@ -11202,6 +12875,12 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, } } + /* Determine if constbuf to rawbuf translation is needed */ + if (emit->info.const_buffers_declared) { + emit->raw_bufs = emit->key.raw_buffers; + emit->raw_buf_srv_start_index = emit->key.srv_raw_buf_index; + } + /* * Do actual shader translation. */ @@ -11262,6 +12941,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, struct svga_fs_variant *fs_variant = svga_fs_variant(variant); fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; + fs_variant->pstipple_sampler_state_index = + emit->fs.pstipple_sampler_state_index; /* If there was exactly one write to a fragment shader output register * and it came from a constant buffer, we know all fragments will have @@ -11275,7 +12956,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, */ fs_variant->uses_flat_interp = emit->uses_flat_interp; - fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; + fs_variant->fs_shadow_compare_units = emit->shadow_compare_units; } else if (unit == PIPE_SHADER_TESS_EVAL) { struct svga_tes_variant *tes_variant = svga_tes_variant(variant); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_winsys.h b/lib/mesa/src/gallium/drivers/svga/svga_winsys.h index 7b3f439af..6818c2931 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_winsys.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_winsys.h @@ -49,7 +49,7 @@ struct svga_winsys_screen; struct svga_winsys_buffer; struct pipe_screen; struct pipe_context; -struct pipe_debug_callback; +struct util_debug_callback; struct pipe_fence_handle; struct pipe_resource; struct svga_region; @@ -109,6 +109,7 @@ enum svga_stats_count { SVGA_STATS_COUNT_BLITBLITTERCOPY, SVGA_STATS_COUNT_DEPTHSTENCILSTATE, SVGA_STATS_COUNT_RASTERIZERSTATE, + SVGA_STATS_COUNT_RAWBUFFERSRVIEW, SVGA_STATS_COUNT_SAMPLER, SVGA_STATS_COUNT_SAMPLERVIEW, SVGA_STATS_COUNT_SURFACEWRITEFLUSH, @@ -131,6 +132,7 @@ enum svga_stats_time { SVGA_STATS_TIME_CREATEBACKEDSURFACEVIEW, SVGA_STATS_TIME_CREATEBUFFER, SVGA_STATS_TIME_CREATECONTEXT, + SVGA_STATS_TIME_CREATECS, SVGA_STATS_TIME_CREATEFS, SVGA_STATS_TIME_CREATEGS, SVGA_STATS_TIME_CREATESURFACE, @@ -144,8 +146,10 @@ enum svga_stats_time { SVGA_STATS_TIME_DRAWVBO, SVGA_STATS_TIME_DRAWARRAYS, SVGA_STATS_TIME_DRAWELEMENTS, + SVGA_STATS_TIME_EMITCS, SVGA_STATS_TIME_EMITFS, SVGA_STATS_TIME_EMITGS, + SVGA_STATS_TIME_EMITRAWBUFFER, SVGA_STATS_TIME_EMITTCS, SVGA_STATS_TIME_EMITTES, SVGA_STATS_TIME_EMITVS, @@ -156,6 +160,7 @@ enum svga_stats_time { SVGA_STATS_TIME_HWTNLDRAWELEMENTS, SVGA_STATS_TIME_HWTNLFLUSH, SVGA_STATS_TIME_HWTNLPRIM, + SVGA_STATS_TIME_LAUNCHGRID, SVGA_STATS_TIME_PROPAGATESURFACE, SVGA_STATS_TIME_SETSAMPLERVIEWS, SVGA_STATS_TIME_SURFACEFLUSH, @@ -166,7 +171,9 @@ enum svga_stats_time { SVGA_STATS_TIME_TEXTRANSFERUNMAP, SVGA_STATS_TIME_TGSIVGPU10TRANSLATE, SVGA_STATS_TIME_TGSIVGPU9TRANSLATE, + SVGA_STATS_TIME_UPDATECSUAV, SVGA_STATS_TIME_UPDATESTATE, + SVGA_STATS_TIME_UPDATEUAV, SVGA_STATS_TIME_VALIDATESURFACEVIEW, SVGA_STATS_TIME_VBUFDRAWARRAYS, SVGA_STATS_TIME_VBUFDRAWELEMENTS, @@ -184,6 +191,7 @@ enum svga_stats_time { SVGA_STATS_PREFIX "BlitBlitterCopy", \ SVGA_STATS_PREFIX "DepthStencilState", \ SVGA_STATS_PREFIX "RasterizerState", \ + SVGA_STATS_PREFIX "RawBufferSRView", \ SVGA_STATS_PREFIX "Sampler", \ SVGA_STATS_PREFIX "SamplerView", \ SVGA_STATS_PREFIX "SurfaceWriteFlush", \ @@ -204,6 +212,7 @@ enum svga_stats_time { SVGA_STATS_PREFIX "CreateBackedSurfaceView", \ SVGA_STATS_PREFIX "CreateBuffer", \ SVGA_STATS_PREFIX "CreateContext", \ + SVGA_STATS_PREFIX "CreateCS", \ SVGA_STATS_PREFIX "CreateFS", \ SVGA_STATS_PREFIX "CreateGS", \ SVGA_STATS_PREFIX "CreateSurface", \ @@ -217,8 +226,10 @@ enum svga_stats_time { SVGA_STATS_PREFIX "DrawVBO", \ SVGA_STATS_PREFIX "DrawArrays", \ SVGA_STATS_PREFIX "DrawElements", \ + SVGA_STATS_PREFIX "EmitCS", \ SVGA_STATS_PREFIX "EmitFS", \ SVGA_STATS_PREFIX "EmitGS", \ + SVGA_STATS_PREFIX "EmitRawBuffer", \ SVGA_STATS_PREFIX "EmitTCS", \ SVGA_STATS_PREFIX "EmitTES", \ SVGA_STATS_PREFIX "EmitVS", \ @@ -229,6 +240,7 @@ enum svga_stats_time { SVGA_STATS_PREFIX "HWtnlDrawElements", \ SVGA_STATS_PREFIX "HWtnlFlush", \ SVGA_STATS_PREFIX "HWtnlPrim", \ + SVGA_STATS_PREFIX "LaunchGrid", \ SVGA_STATS_PREFIX "PropagateSurface", \ SVGA_STATS_PREFIX "SetSamplerViews", \ SVGA_STATS_PREFIX "SurfaceFlush", \ @@ -239,7 +251,9 @@ enum svga_stats_time { SVGA_STATS_PREFIX "TextureTransferUnmap", \ SVGA_STATS_PREFIX "TGSIVGPU10Translate", \ SVGA_STATS_PREFIX "TGSIVGPU9Translate", \ + SVGA_STATS_PREFIX "UpdateCSUAV", \ SVGA_STATS_PREFIX "UpdateState", \ + SVGA_STATS_PREFIX "UpdateUAV", \ SVGA_STATS_PREFIX "ValidateSurfaceView", \ SVGA_STATS_PREFIX "VbufDrawArrays", \ SVGA_STATS_PREFIX "VbufDrawElements", \ @@ -463,7 +477,7 @@ struct svga_winsys_context unsigned flags); /** To report perf/conformance/etc issues to the gallium frontend */ - struct pipe_debug_callback *debug_callback; + struct util_debug_callback *debug_callback; /** The more recent command issued to command buffer */ SVGAFifo3dCmdId last_command; @@ -803,6 +817,16 @@ struct svga_winsys_screen boolean have_fence_fd; boolean have_intra_surface_copy; boolean have_constant_buffer_offset_cmd; + boolean have_index_vertex_buffer_offset_cmd; + + /* Have rasterizer state v2 command support */ + boolean have_rasterizer_state_v2_cmd; + + /** Have GL43 capable device */ + boolean have_gl43; + + /** SVGA device_id version we're running on */ + uint16_t device_id; }; |