diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2017-12-31 07:12:27 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2017-12-31 07:12:27 +0000 |
commit | 051645c92924bf915d82bf219f2ed67309b5577a (patch) | |
tree | 4aae126dd8e5a18c6a9926a5468d1561e6038a07 /lib/mesa/src/intel/isl | |
parent | 2dae6fe6f74cf7fb9fd65285302c0331d9786b00 (diff) |
Merge Mesa 17.2.8
Diffstat (limited to 'lib/mesa/src/intel/isl')
-rw-r--r-- | lib/mesa/src/intel/isl/isl.c | 1327 | ||||
-rw-r--r-- | lib/mesa/src/intel/isl/isl.h | 484 | ||||
-rw-r--r-- | lib/mesa/src/intel/isl/isl_emit_depth_stencil.c | 225 | ||||
-rw-r--r-- | lib/mesa/src/intel/isl/isl_format.c | 658 | ||||
-rw-r--r-- | lib/mesa/src/intel/isl/isl_gen7.c | 224 | ||||
-rw-r--r-- | lib/mesa/src/intel/isl/isl_gen8.c | 173 | ||||
-rw-r--r-- | lib/mesa/src/intel/isl/isl_priv.h | 55 | ||||
-rw-r--r-- | lib/mesa/src/intel/isl/isl_storage_image.c | 10 | ||||
-rw-r--r-- | lib/mesa/src/intel/isl/isl_surface_state.c | 112 |
9 files changed, 2324 insertions, 944 deletions
diff --git a/lib/mesa/src/intel/isl/isl.c b/lib/mesa/src/intel/isl/isl.c index 32463b129..133986782 100644 --- a/lib/mesa/src/intel/isl/isl.c +++ b/lib/mesa/src/intel/isl/isl.c @@ -25,6 +25,8 @@ #include <stdarg.h> #include <stdio.h> +#include "genxml/genX_bits.h" + #include "isl.h" #include "isl_gen4.h" #include "isl_gen6.h" @@ -67,6 +69,54 @@ isl_device_init(struct isl_device *dev, assert(info->has_hiz_and_separate_stencil); if (info->must_use_separate_stencil) assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); + + dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4; + dev->ss.align = isl_align(dev->ss.size, 32); + + dev->ss.clear_value_size = + isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) + + RENDER_SURFACE_STATE_GreenClearColor_bits(info) + + RENDER_SURFACE_STATE_BlueClearColor_bits(info) + + RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8; + + dev->ss.clear_value_offset = + RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4; + + assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0); + dev->ss.addr_offset = + RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8; + + /* The "Auxiliary Surface Base Address" field starts a bit higher up + * because the bottom 12 bits are used for other things. Round down to + * the nearest dword before. + */ + dev->ss.aux_addr_offset = + (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8; + + dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4; + assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); + dev->ds.depth_offset = + _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; + + if (dev->use_separate_stencil) { + dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 + + _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 + + _3DSTATE_CLEAR_PARAMS_length(info) * 4; + + assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); + dev->ds.stencil_offset = + _3DSTATE_DEPTH_BUFFER_length(info) * 4 + + _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8; + + assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); + dev->ds.hiz_offset = + _3DSTATE_DEPTH_BUFFER_length(info) * 4 + + _3DSTATE_STENCIL_BUFFER_length(info) * 4 + + _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; + } else { + dev->ds.stencil_offset = 0; + dev->ds.hiz_offset = 0; + } } /** @@ -104,9 +154,8 @@ isl_device_get_sample_counts(struct isl_device *dev) /** * @param[out] info is written only on success */ -static bool -isl_tiling_get_info(const struct isl_device *dev, - enum isl_tiling tiling, +static void +isl_tiling_get_info(enum isl_tiling tiling, uint32_t format_bpb, struct isl_tile_info *tile_info) { @@ -121,7 +170,8 @@ isl_tiling_get_info(const struct isl_device *dev, */ assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0); assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3)); - return isl_tiling_get_info(dev, tiling, format_bpb / 3, tile_info); + isl_tiling_get_info(tiling, format_bpb / 3, tile_info); + return; } switch (tiling) { @@ -162,12 +212,6 @@ isl_tiling_get_info(const struct isl_device *dev, case ISL_TILING_Yf: case ISL_TILING_Ys: { - if (ISL_DEV_GEN(dev) < 9) - return false; - - if (!isl_is_pow2(bs)) - return false; - bool is_Ys = tiling == ISL_TILING_Ys; assert(bs > 0); @@ -222,6 +266,31 @@ isl_tiling_get_info(const struct isl_device *dev, .logical_extent_el = logical_el, .phys_extent_B = phys_B, }; +} + +bool +isl_color_value_is_zero_one(union isl_color_value value, + enum isl_format format) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(format); + +#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \ + if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \ + return false + + if (isl_format_has_int_channel(format)) { + RETURN_FALSE_IF_NOT_0_1(r, 0, u32); + RETURN_FALSE_IF_NOT_0_1(g, 1, u32); + RETURN_FALSE_IF_NOT_0_1(b, 2, u32); + RETURN_FALSE_IF_NOT_0_1(a, 3, u32); + } else { + RETURN_FALSE_IF_NOT_0_1(r, 0, f32); + RETURN_FALSE_IF_NOT_0_1(g, 1, f32); + RETURN_FALSE_IF_NOT_0_1(b, 2, f32); + RETURN_FALSE_IF_NOT_0_1(a, 3, f32); + } + +#undef RETURN_FALSE_IF_NOT_0_1 return true; } @@ -255,8 +324,7 @@ isl_surf_choose_tiling(const struct isl_device *dev, if (ISL_DEV_GEN(dev) >= 6) { isl_gen6_filter_tiling(dev, info, &tiling_flags); } else { - isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); - isl_gen6_filter_tiling(dev, info, &tiling_flags); + isl_gen4_filter_tiling(dev, info, &tiling_flags); } #define CHOOSE(__tiling) \ @@ -415,7 +483,6 @@ isl_choose_array_pitch_span(const struct isl_device *dev, * the storage for LODs other than LOD 0 is not needed. */ assert(info->levels == 1); - assert(phys_level0_sa->array_len == 1); return ISL_ARRAY_PITCH_SPAN_COMPACT; } else { if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && @@ -447,6 +514,12 @@ isl_choose_array_pitch_span(const struct isl_device *dev, * compact QPitch possible in order to conserve memory. */ return ISL_ARRAY_PITCH_SPAN_COMPACT; + + case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: + /* Each array image in the gen6 stencil of HiZ surface is compact in the + * sense that every LOD is a compact array of the same size as LOD0. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; } unreachable("bad isl_dim_layout"); @@ -461,12 +534,32 @@ isl_choose_image_alignment_el(const struct isl_device *dev, enum isl_msaa_layout msaa_layout, struct isl_extent3d *image_align_el) { - if (info->format == ISL_FORMAT_HIZ) { - assert(ISL_DEV_GEN(dev) >= 6); - /* HiZ surfaces are always aligned to 16x8 pixels in the primary surface - * which works out to 2x2 HiZ elments. + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + if (fmtl->txc == ISL_TXC_MCS) { + assert(tiling == ISL_TILING_Y0); + + /* + * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": + * + * Height, width, and layout of MCS buffer in this case must match with + * Render Target height, width, and layout. MCS buffer is tiledY. + * + * To avoid wasting memory, choose the smallest alignment possible: + * HALIGN_4 and VALIGN_4. */ - *image_align_el = isl_extent3d(2, 2, 1); + *image_align_el = isl_extent3d(4, 4, 1); + return; + } else if (info->format == ISL_FORMAT_HIZ) { + assert(ISL_DEV_GEN(dev) >= 6); + if (ISL_DEV_GEN(dev) == 6) { + /* HiZ surfaces on Sandy Bridge are packed tightly. */ + *image_align_el = isl_extent3d(1, 1, 1); + } else { + /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the + * primary surface which works out to 2x2 HiZ elments. + */ + *image_align_el = isl_extent3d(2, 2, 1); + } return; } @@ -491,8 +584,14 @@ isl_choose_image_alignment_el(const struct isl_device *dev, static enum isl_dim_layout isl_surf_choose_dim_layout(const struct isl_device *dev, enum isl_surf_dim logical_dim, - enum isl_tiling tiling) + enum isl_tiling tiling, + isl_surf_usage_flags_t usage) { + /* Sandy bridge needs a special layout for HiZ and stencil. */ + if (ISL_DEV_GEN(dev) == 6 && + (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ)) + return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ; + if (ISL_DEV_GEN(dev) >= 9) { switch (logical_dim) { case ISL_SURF_DIM_1D: @@ -522,6 +621,16 @@ isl_surf_choose_dim_layout(const struct isl_device *dev, switch (logical_dim) { case ISL_SURF_DIM_1D: case ISL_SURF_DIM_2D: + /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": + * + * The cube face textures are stored in the same way as 3D surfaces + * are stored (see section 6.17.5 for details). For cube surfaces, + * however, the depth is equal to the number of faces (always 6) and + * is not reduced for each MIP. + */ + if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT)) + return ISL_DIM_LAYOUT_GEN4_3D; + return ISL_DIM_LAYOUT_GEN4_2D; case ISL_SURF_DIM_3D: return ISL_DIM_LAYOUT_GEN4_3D; @@ -561,6 +670,7 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, case ISL_DIM_LAYOUT_GEN9_1D: case ISL_DIM_LAYOUT_GEN4_2D: + case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: *phys_level0_sa = (struct isl_extent4d) { .w = isl_align_npot(info->width, fmtl->bw), .h = fmtl->bh, @@ -572,7 +682,11 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, break; case ISL_SURF_DIM_2D: - assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D); + if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT)) + assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D); + else + assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D || + dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ); if (tiling == ISL_TILING_Ys && info->samples > 1) isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); @@ -637,6 +751,7 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, switch (dim_layout) { case ISL_DIM_LAYOUT_GEN9_1D: + case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: unreachable("bad isl_dim_layout"); case ISL_DIM_LAYOUT_GEN4_2D: @@ -665,6 +780,108 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, } /** + * Calculate the pitch between physical array slices, in units of rows of + * surface elements. + */ +static uint32_t +isl_calc_array_pitch_el_rows_gen4_2d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + enum isl_array_pitch_span array_pitch_span, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + uint32_t pitch_sa_rows = 0; + + switch (array_pitch_span) { + case ISL_ARRAY_PITCH_SPAN_COMPACT: + pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + break; + case ISL_ARRAY_PITCH_SPAN_FULL: { + /* The QPitch equation is found in the Broadwell PRM >> Volume 5: + * Memory Views >> Common Surface Formats >> Surface Layout >> 2D + * Surfaces >> Surface Arrays. + */ + uint32_t H0_sa = phys_level0_sa->h; + uint32_t H1_sa = isl_minify(H0_sa, 1); + + uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); + uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); + + uint32_t m; + if (ISL_DEV_GEN(dev) >= 7) { + /* The QPitch equation changed slightly in Ivybridge. */ + m = 12; + } else { + m = 11; + } + + pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); + + if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && + (info->height % 4 == 1)) { + /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than + * the value calculated in the equation above , for every + * other odd Surface Height starting from 1 i.e. 1,5,9,13. + * + * XXX(chadv): Is the errata natural corollary of the physical + * layout of interleaved samples? + */ + pitch_sa_rows += 4; + } + + pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); + } /* end case */ + break; + } + + assert(pitch_sa_rows % fmtl->bh == 0); + uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; + + if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) { + /* + * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632): + * + * "Mip-mapped and arrayed surfaces are supported with MCS buffer + * layout with these alignments in the RT space: Horizontal + * Alignment = 128 and Vertical Alignment = 64." + * + * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435): + * + * "For non-multisampled render target's CCS auxiliary surface, + * QPitch must be computed with Horizontal Alignment = 128 and + * Surface Vertical Alignment = 256. These alignments are only for + * CCS buffer and not for associated render target." + * + * The first restriction is already handled by isl_choose_image_alignment_el + * but the second restriction, which is an extension of the first, only + * applies to qpitch and must be applied here. + */ + assert(fmtl->bh == 4); + pitch_el_rows = isl_align(pitch_el_rows, 256 / 4); + } + + if (ISL_DEV_GEN(dev) >= 9 && + info->dim == ISL_SURF_DIM_3D && + tile_info->tiling != ISL_TILING_LINEAR) { + /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: + * + * Tile Mode != Linear: This field must be set to an integer multiple + * of the tile height + */ + pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height); + } + + return pitch_el_rows; +} + +/** * A variant of isl_calc_phys_slice0_extent_sa() specific to * ISL_DIM_LAYOUT_GEN4_2D. */ @@ -740,43 +957,158 @@ isl_calc_phys_slice0_extent_sa_gen4_2d( }; } +static void +isl_calc_phys_total_extent_el_gen4_2d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + enum isl_array_pitch_span array_pitch_span, + uint32_t *array_pitch_el_rows, + struct isl_extent2d *total_extent_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + struct isl_extent2d phys_slice0_sa; + isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, + image_align_sa, phys_level0_sa, + &phys_slice0_sa); + *array_pitch_el_rows = + isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info, + image_align_sa, phys_level0_sa, + array_pitch_span, + &phys_slice0_sa); + *total_extent_el = (struct isl_extent2d) { + .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw), + .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) + + isl_assert_div(phys_slice0_sa.h, fmtl->bh), + }; +} + /** * A variant of isl_calc_phys_slice0_extent_sa() specific to * ISL_DIM_LAYOUT_GEN4_3D. */ static void -isl_calc_phys_slice0_extent_sa_gen4_3d( +isl_calc_phys_total_extent_el_gen4_3d( const struct isl_device *dev, const struct isl_surf_init_info *restrict info, const struct isl_extent3d *image_align_sa, const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) + uint32_t *array_pitch_el_rows, + struct isl_extent2d *phys_total_el) { + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + assert(info->samples == 1); - assert(phys_level0_sa->array_len == 1); - uint32_t slice_w = 0; - uint32_t slice_h = 0; + if (info->dim != ISL_SURF_DIM_3D) { + /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": + * + * The cube face textures are stored in the same way as 3D surfaces + * are stored (see section 6.17.5 for details). For cube surfaces, + * however, the depth is equal to the number of faces (always 6) and + * is not reduced for each MIP. + */ + assert(ISL_DEV_GEN(dev) == 4); + assert(info->usage & ISL_SURF_USAGE_CUBE_BIT); + assert(phys_level0_sa->array_len == 6); + } else { + assert(phys_level0_sa->array_len == 1); + } + + uint32_t total_w = 0; + uint32_t total_h = 0; uint32_t W0 = phys_level0_sa->w; uint32_t H0 = phys_level0_sa->h; uint32_t D0 = phys_level0_sa->d; + uint32_t A0 = phys_level0_sa->a; for (uint32_t l = 0; l < info->levels; ++l) { uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); - uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d); + uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0; uint32_t max_layers_horiz = MIN(level_d, 1u << l); uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); - slice_w = MAX(slice_w, level_w * max_layers_horiz); - slice_h += level_h * max_layers_vert; + total_w = MAX(total_w, level_w * max_layers_horiz); + total_h += level_h * max_layers_vert; } - *phys_slice0_sa = (struct isl_extent2d) { - .w = slice_w, - .h = slice_h, + /* GEN4_3D layouts don't really have an array pitch since each LOD has a + * different number of horizontal and vertical layers. We have to set it + * to something, so at least make it true for LOD0. + */ + *array_pitch_el_rows = + isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw; + *phys_total_el = (struct isl_extent2d) { + .w = isl_assert_div(total_w, fmtl->bw), + .h = isl_assert_div(total_h, fmtl->bh), + }; +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ. + */ +static void +isl_calc_phys_total_extent_el_gen6_stencil_hiz( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + uint32_t *array_pitch_el_rows, + struct isl_extent2d *phys_total_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + const struct isl_extent2d tile_extent_sa = { + .w = tile_info->logical_extent_el.w * fmtl->bw, + .h = tile_info->logical_extent_el.h * fmtl->bh, + }; + /* Tile size is a multiple of image alignment */ + assert(tile_extent_sa.w % image_align_sa->w == 0); + assert(tile_extent_sa.h % image_align_sa->h == 0); + + const uint32_t W0 = phys_level0_sa->w; + const uint32_t H0 = phys_level0_sa->h; + + /* Each image has the same height as LOD0 because the hardware thinks + * everything is LOD0 + */ + const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a; + + uint32_t total_top_w = 0; + uint32_t total_bottom_w = 0; + uint32_t total_h = 0; + + for (uint32_t l = 0; l < info->levels; ++l) { + const uint32_t W = isl_minify(W0, l); + + const uint32_t w = isl_align(W, tile_extent_sa.w); + const uint32_t h = isl_align(H, tile_extent_sa.h); + + if (l == 0) { + total_top_w = w; + total_h = h; + } else if (l == 1) { + total_bottom_w = w; + total_h += h; + } else { + total_bottom_w += w; + } + } + + *array_pitch_el_rows = + isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh); + *phys_total_el = (struct isl_extent2d) { + .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw), + .h = isl_assert_div(total_h, fmtl->bh), }; } @@ -785,16 +1117,17 @@ isl_calc_phys_slice0_extent_sa_gen4_3d( * ISL_DIM_LAYOUT_GEN9_1D. */ static void -isl_calc_phys_slice0_extent_sa_gen9_1d( +isl_calc_phys_total_extent_el_gen9_1d( const struct isl_device *dev, const struct isl_surf_init_info *restrict info, const struct isl_extent3d *image_align_sa, const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) + uint32_t *array_pitch_el_rows, + struct isl_extent2d *phys_total_el) { MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - assert(phys_level0_sa->height == 1); + assert(phys_level0_sa->height / fmtl->bh == 1); assert(phys_level0_sa->depth == 1); assert(info->samples == 1); assert(image_align_sa->w >= fmtl->bw); @@ -809,217 +1142,68 @@ isl_calc_phys_slice0_extent_sa_gen9_1d( slice_w += w; } - *phys_slice0_sa = isl_extent2d(slice_w, 1); + *array_pitch_el_rows = 1; + *phys_total_el = (struct isl_extent2d) { + .w = isl_assert_div(slice_w, fmtl->bw), + .h = phys_level0_sa->array_len, + }; } /** - * Calculate the physical extent of the surface's first array slice, in units - * of surface samples. If the surface is multi-leveled, then the result will - * be aligned to \a image_align_sa. + * Calculate the two-dimensional total physical extent of the surface, in + * units of surface elements. */ static void -isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_dim_layout dim_layout, - enum isl_msaa_layout msaa_layout, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) +isl_calc_phys_total_extent_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + enum isl_dim_layout dim_layout, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + enum isl_array_pitch_span array_pitch_span, + uint32_t *array_pitch_el_rows, + struct isl_extent2d *total_extent_el) { switch (dim_layout) { case ISL_DIM_LAYOUT_GEN9_1D: - isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info, - image_align_sa, phys_level0_sa, - phys_slice0_sa); + assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); + isl_calc_phys_total_extent_el_gen9_1d(dev, info, + image_align_sa, phys_level0_sa, + array_pitch_el_rows, + total_extent_el); return; case ISL_DIM_LAYOUT_GEN4_2D: - isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, - image_align_sa, phys_level0_sa, - phys_slice0_sa); + isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout, + image_align_sa, phys_level0_sa, + array_pitch_span, + array_pitch_el_rows, + total_extent_el); return; - case ISL_DIM_LAYOUT_GEN4_3D: - isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa, - phys_level0_sa, phys_slice0_sa); + case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: + assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); + isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info, + image_align_sa, + phys_level0_sa, + array_pitch_el_rows, + total_extent_el); return; - } -} - -/** - * Calculate the pitch between physical array slices, in units of rows of - * surface elements. - */ -static uint32_t -isl_calc_array_pitch_el_rows(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_tile_info *tile_info, - enum isl_dim_layout dim_layout, - enum isl_array_pitch_span array_pitch_span, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - const struct isl_extent2d *phys_slice0_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - uint32_t pitch_sa_rows = 0; - - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - /* Each row is an array slice */ - pitch_sa_rows = 1; - break; - case ISL_DIM_LAYOUT_GEN4_2D: - switch (array_pitch_span) { - case ISL_ARRAY_PITCH_SPAN_COMPACT: - pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); - break; - case ISL_ARRAY_PITCH_SPAN_FULL: { - /* The QPitch equation is found in the Broadwell PRM >> Volume 5: - * Memory Views >> Common Surface Formats >> Surface Layout >> 2D - * Surfaces >> Surface Arrays. - */ - uint32_t H0_sa = phys_level0_sa->h; - uint32_t H1_sa = isl_minify(H0_sa, 1); - - uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); - uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); - - uint32_t m; - if (ISL_DEV_GEN(dev) >= 7) { - /* The QPitch equation changed slightly in Ivybridge. */ - m = 12; - } else { - m = 11; - } - - pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); - - if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && - (info->height % 4 == 1)) { - /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: - * Graphics Core >> Section 7.18.3.7: Surface Arrays: - * - * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than - * the value calculated in the equation above , for every - * other odd Surface Height starting from 1 i.e. 1,5,9,13. - * - * XXX(chadv): Is the errata natural corollary of the physical - * layout of interleaved samples? - */ - pitch_sa_rows += 4; - } - - pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); - } /* end case */ - break; - } - break; case ISL_DIM_LAYOUT_GEN4_3D: assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); - pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); - break; - default: - unreachable("bad isl_dim_layout"); - break; - } - - assert(pitch_sa_rows % fmtl->bh == 0); - uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; - - if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) { - /* - * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632): - * - * "Mip-mapped and arrayed surfaces are supported with MCS buffer - * layout with these alignments in the RT space: Horizontal - * Alignment = 128 and Vertical Alignment = 64." - * - * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435): - * - * "For non-multisampled render target's CCS auxiliary surface, - * QPitch must be computed with Horizontal Alignment = 128 and - * Surface Vertical Alignment = 256. These alignments are only for - * CCS buffer and not for associated render target." - * - * The first restriction is already handled by isl_choose_image_alignment_el - * but the second restriction, which is an extension of the first, only - * applies to qpitch and must be applied here. - */ - assert(fmtl->bh == 4); - pitch_el_rows = isl_align(pitch_el_rows, 256 / 4); - } - - if (ISL_DEV_GEN(dev) >= 9 && - info->dim == ISL_SURF_DIM_3D && - tile_info->tiling != ISL_TILING_LINEAR) { - /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: - * - * Tile Mode != Linear: This field must be set to an integer multiple - * of the tile height - */ - pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height); + isl_calc_phys_total_extent_el_gen4_3d(dev, info, + image_align_sa, phys_level0_sa, + array_pitch_el_rows, + total_extent_el); + return; } - - return pitch_el_rows; } -/** - * Calculate the pitch of each surface row, in bytes. - */ static uint32_t -isl_calc_linear_row_pitch(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_extent2d *phys_slice0_sa) +isl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info, + const struct isl_tile_info *tile_info) { - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - uint32_t row_pitch = info->min_pitch; - - /* First, align the surface to a cache line boundary, as the PRM explains - * below. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Render Target and Media - * Surfaces: - * - * The data port accesses data (pixels) outside of the surface if they - * are contained in the same cache request as pixels that are within the - * surface. These pixels will not be returned by the requesting message, - * however if these pixels lie outside of defined pages in the GTT, - * a GTT error will result when the cache request is processed. In order - * to avoid these GTT errors, “padding” at the bottom of the surface is - * sometimes necessary. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: - * - * The sampling engine accesses texels outside of the surface if they - * are contained in the same cache line as texels that are within the - * surface. These texels will not participate in any calculation - * performed by the sampling engine and will not affect the result of - * any sampling engine operation, however if these texels lie outside of - * defined pages in the GTT, a GTT error will result when the cache line - * is accessed. In order to avoid these GTT errors, “padding” at the - * bottom and right side of a sampling engine surface is sometimes - * necessary. - * - * It is possible that a cache line will straddle a page boundary if the - * base address or pitch is not aligned. All pages included in the cache - * lines that are part of the surface must map to valid GTT entries to - * avoid errors. To determine the necessary padding on the bottom and - * right side of the surface, refer to the table in Alignment Unit Size - * section for the i and j parameters for the surface format in use. The - * surface must then be extended to the next multiple of the alignment - * unit size in each dimension, and all texels contained in this - * extended surface must have valid GTT entries. - * - * For example, suppose the surface size is 15 texels by 10 texels and - * the alignment parameters are i=4 and j=2. In this case, the extended - * surface would be 16 by 10. Note that these calculations are done in - * texels, and must be converted to bytes based on the surface format - * being used to determine whether additional pages need to be defined. - */ - assert(phys_slice0_sa->w % fmtl->bw == 0); - const uint32_t bs = fmtl->bpb / 8; - row_pitch = MAX(row_pitch, bs * (phys_slice0_sa->w / fmtl->bw)); + if (tile_info->tiling != ISL_TILING_LINEAR) + return tile_info->phys_extent_B.width; /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> * RENDER_SURFACE_STATE Surface Pitch (p349): @@ -1035,125 +1219,165 @@ isl_calc_linear_row_pitch(const struct isl_device *dev, * - For other linear surfaces, the pitch can be any multiple of * bytes. */ - if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { - if (isl_format_is_yuv(info->format)) { - row_pitch = isl_align_npot(row_pitch, 2 * bs); + const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); + const uint32_t bs = fmtl->bpb / 8; + + if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { + if (isl_format_is_yuv(surf_info->format)) { + return 2 * bs; } else { - row_pitch = isl_align_npot(row_pitch, bs); + return bs; } } - return row_pitch; + return 1; +} + +static uint32_t +isl_calc_linear_min_row_pitch(const struct isl_device *dev, + const struct isl_surf_init_info *info, + const struct isl_extent2d *phys_total_el, + uint32_t alignment) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + const uint32_t bs = fmtl->bpb / 8; + + return isl_align_npot(bs * phys_total_el->w, alignment); +} + +static uint32_t +isl_calc_tiled_min_row_pitch(const struct isl_device *dev, + const struct isl_surf_init_info *surf_info, + const struct isl_tile_info *tile_info, + const struct isl_extent2d *phys_total_el, + uint32_t alignment) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); + + assert(fmtl->bpb % tile_info->format_bpb == 0); + + const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb; + const uint32_t total_w_tl = + isl_align_div(phys_total_el->w * tile_el_scale, + tile_info->logical_extent_el.width); + + assert(alignment == tile_info->phys_extent_B.width); + return total_w_tl * tile_info->phys_extent_B.width; +} + +static uint32_t +isl_calc_min_row_pitch(const struct isl_device *dev, + const struct isl_surf_init_info *surf_info, + const struct isl_tile_info *tile_info, + const struct isl_extent2d *phys_total_el, + uint32_t alignment) +{ + if (tile_info->tiling == ISL_TILING_LINEAR) { + return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el, + alignment); + } else { + return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info, + phys_total_el, alignment); + } } /** - * Calculate and apply any padding required for the surface. + * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's + * size is `bits` bits? * - * @param[inout] total_h_el is updated with the new height - * @param[out] pad_bytes is overwritten with additional padding requirements. + * Hardware pitch fields are offset by 1. For example, if the size of + * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid + * pitches is [1, 2^b] inclusive. If the surface pitch is N, then + * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1. */ -static void -isl_apply_surface_padding(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_tile_info *tile_info, - uint32_t *total_h_el, - uint32_t *pad_bytes) +static bool +pitch_in_range(uint32_t n, uint32_t bits) { - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + assert(n != 0); + return likely(bits != 0 && 1 <= n && n <= (1 << bits)); +} - *pad_bytes = 0; +static bool +isl_calc_row_pitch(const struct isl_device *dev, + const struct isl_surf_init_info *surf_info, + const struct isl_tile_info *tile_info, + enum isl_dim_layout dim_layout, + const struct isl_extent2d *phys_total_el, + uint32_t *out_row_pitch) +{ + uint32_t alignment = + isl_calc_row_pitch_alignment(surf_info, tile_info); - /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Render Target and Media - * Surfaces: - * - * The data port accesses data (pixels) outside of the surface if they - * are contained in the same cache request as pixels that are within the - * surface. These pixels will not be returned by the requesting message, - * however if these pixels lie outside of defined pages in the GTT, - * a GTT error will result when the cache request is processed. In - * order to avoid these GTT errors, “padding” at the bottom of the - * surface is sometimes necessary. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: - * - * ... Lots of padding requirements, all listed separately below. + /* If pitch isn't given and it can be chosen freely, align it by cache line + * allowing one to use blit engine on the surface. */ + if (surf_info->row_pitch == 0 && tile_info->tiling == ISL_TILING_LINEAR) { + /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress: + * + * "Base address of the destination surface: X=0, Y=0. Lower 32bits + * of the 48bit addressing. When Src Tiling is enabled (Bit_15 + * enabled), this address must be 4KB-aligned. When Tiling is not + * enabled, this address should be CL (64byte) aligned." + */ + alignment = MAX2(alignment, 64); + } - /* We can safely ignore the first padding requirement, quoted below, - * because isl doesn't do buffers. - * - * - [pre-BDW] For buffers, which have no inherent “height,” padding - * requirements are different. A buffer must be padded to the next - * multiple of 256 array elements, with an additional 16 bytes added - * beyond that to account for the L1 cache line. - */ + const uint32_t min_row_pitch = + isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el, + alignment); - /* - * - For compressed textures [...], padding at the bottom of the surface - * is to an even compressed row. - */ - if (isl_format_is_compressed(info->format)) - *total_h_el = isl_align(*total_h_el, 2); + uint32_t row_pitch = min_row_pitch; - /* - * - For cube surfaces, an additional two rows of padding are required - * at the bottom of the surface. - */ - if (info->usage & ISL_SURF_USAGE_CUBE_BIT) - *total_h_el += 2; - - /* - * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, - * additional padding is required. These surfaces require an extra row - * plus 16 bytes of padding at the bottom in addition to the general - * padding requirements. - */ - if (isl_format_is_yuv(info->format) && - (fmtl->bpb == 96 || fmtl->bpb == 48|| fmtl->bpb == 24)) { - *total_h_el += 1; - *pad_bytes += 16; + if (surf_info->row_pitch != 0) { + row_pitch = surf_info->row_pitch; + + if (row_pitch < min_row_pitch) + return false; + + if (row_pitch % alignment != 0) + return false; } - /* - * - For linear surfaces, additional padding of 64 bytes is required at - * the bottom of the surface. This is in addition to the padding - * required above. - */ - if (tile_info->tiling == ISL_TILING_LINEAR) - *pad_bytes += 64; + const uint32_t row_pitch_tiles = row_pitch / tile_info->phys_extent_B.width; - /* The below text weakens, not strengthens, the padding requirements for - * linear surfaces. Therefore we can safely ignore it. - * - * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array, - * non-MSAA, non-mip-mapped surfaces in linear memory, the only - * padding requirement is to the next aligned 64-byte boundary beyond - * the end of the surface. The rest of the padding requirements - * documented above do not apply to these surfaces. - */ + if (row_pitch == 0) + return false; - /* - * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and - * height % 4 != 0, the surface must be padded with - * 4-(height % 4)*Surface Pitch # of bytes. - */ - if (ISL_DEV_GEN(dev) >= 9 && - tile_info->tiling == ISL_TILING_LINEAR && - (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) { - *total_h_el = isl_align(*total_h_el, 4); + if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { + /* SurfacePitch is ignored for this layout. */ + goto done; } - /* - * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded - * to 4 times the Surface Pitch # of bytes - */ - if (ISL_DEV_GEN(dev) >= 9 && - tile_info->tiling == ISL_TILING_LINEAR && - info->dim == ISL_SURF_DIM_1D) { - *total_h_el += 4; - } + if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | + ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_STORAGE_BIT)) && + !pitch_in_range(row_pitch, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info))) + return false; + + if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT | + ISL_SURF_USAGE_MCS_BIT)) && + !pitch_in_range(row_pitch_tiles, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info))) + return false; + + if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + !pitch_in_range(row_pitch, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) + return false; + + if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) && + !pitch_in_range(row_pitch, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) + return false; + + const uint32_t stencil_pitch_bits = dev->use_separate_stencil ? + _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) : + _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info); + + if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) && + !pitch_in_range(row_pitch, stencil_pitch_bits)) + return false; + + done: + *out_row_pitch = row_pitch; + return true; } bool @@ -1175,11 +1399,10 @@ isl_surf_init_s(const struct isl_device *dev, return false; struct isl_tile_info tile_info; - if (!isl_tiling_get_info(dev, tiling, fmtl->bpb, &tile_info)) - return false; + isl_tiling_get_info(tiling, fmtl->bpb, &tile_info); const enum isl_dim_layout dim_layout = - isl_surf_choose_dim_layout(dev, info->dim, tiling); + isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage); enum isl_msaa_layout msaa_layout; if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) @@ -1201,27 +1424,23 @@ isl_surf_init_s(const struct isl_device *dev, enum isl_array_pitch_span array_pitch_span = isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); - struct isl_extent2d phys_slice0_sa; - isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, - &image_align_sa, &phys_level0_sa, - &phys_slice0_sa); - assert(phys_slice0_sa.w % fmtl->bw == 0); - assert(phys_slice0_sa.h % fmtl->bh == 0); - - const uint32_t array_pitch_el_rows = - isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout, - array_pitch_span, &image_align_sa, - &phys_level0_sa, &phys_slice0_sa); - - uint32_t total_h_el = phys_level0_sa.array_len * array_pitch_el_rows; - - uint32_t pad_bytes; - isl_apply_surface_padding(dev, info, &tile_info, &total_h_el, &pad_bytes); + uint32_t array_pitch_el_rows; + struct isl_extent2d phys_total_el; + isl_calc_phys_total_extent_el(dev, info, &tile_info, + dim_layout, msaa_layout, + &image_align_sa, &phys_level0_sa, + array_pitch_span, &array_pitch_el_rows, + &phys_total_el); + + uint32_t row_pitch; + if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout, + &phys_total_el, &row_pitch)) + return false; - uint32_t row_pitch, size, base_alignment; + uint32_t base_alignment; + uint64_t size; if (tiling == ISL_TILING_LINEAR) { - row_pitch = isl_calc_linear_row_pitch(dev, info, &phys_slice0_sa); - size = row_pitch * total_h_el + pad_bytes; + size = (uint64_t) row_pitch * phys_total_el.h; /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress: * @@ -1242,26 +1461,10 @@ isl_surf_init_s(const struct isl_device *dev, } base_alignment = isl_round_up_to_power_of_two(base_alignment); } else { - assert(fmtl->bpb % tile_info.format_bpb == 0); - const uint32_t tile_el_scale = fmtl->bpb / tile_info.format_bpb; - - assert(phys_slice0_sa.w % fmtl->bw == 0); - const uint32_t total_w_el = phys_slice0_sa.width / fmtl->bw; - const uint32_t total_w_tl = - isl_align_div(total_w_el * tile_el_scale, - tile_info.logical_extent_el.width); - - row_pitch = total_w_tl * tile_info.phys_extent_B.width; - if (row_pitch < info->min_pitch) { - row_pitch = isl_align_npot(info->min_pitch, - tile_info.phys_extent_B.width); - } - - total_h_el += isl_align_div_npot(pad_bytes, row_pitch); const uint32_t total_h_tl = - isl_align_div(total_h_el, tile_info.logical_extent_el.height); + isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height); - size = total_h_tl * tile_info.phys_extent_B.height * row_pitch; + size = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch; const uint32_t tile_size = tile_info.phys_extent_B.width * tile_info.phys_extent_B.height; @@ -1269,6 +1472,28 @@ isl_surf_init_s(const struct isl_device *dev, base_alignment = MAX(info->min_alignment, tile_size); } + if (ISL_DEV_GEN(dev) < 9) { + /* From the Broadwell PRM Vol 5, Surface Layout: + * + * "In addition to restrictions on maximum height, width, and depth, + * surfaces are also restricted to a maximum size in bytes. This + * maximum is 2 GB for all products and all surface types." + * + * This comment is applicable to all Pre-gen9 platforms. + */ + if (size > (uint64_t) 1 << 31) + return false; + } else { + /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes: + * "In addition to restrictions on maximum height, width, and depth, + * surfaces are also restricted to a maximum size of 2^38 bytes. + * All pixels within the surface must be contained within 2^38 bytes + * of the base address." + */ + if (size > (uint64_t) 1 << 38) + return false; + } + *surf = (struct isl_surf) { .dim = info->dim, .dim_layout = dim_layout, @@ -1296,15 +1521,14 @@ isl_surf_init_s(const struct isl_device *dev, } void -isl_surf_get_tile_info(const struct isl_device *dev, - const struct isl_surf *surf, +isl_surf_get_tile_info(const struct isl_surf *surf, struct isl_tile_info *tile_info) { const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - isl_tiling_get_info(dev, surf->tiling, fmtl->bpb, tile_info); + isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info); } -void +bool isl_surf_get_hiz_surf(const struct isl_device *dev, const struct isl_surf *surf, struct isl_surf *hiz_surf) @@ -1372,20 +1596,20 @@ isl_surf_get_hiz_surf(const struct isl_device *dev, */ const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples; - isl_surf_init(dev, hiz_surf, - .dim = surf->dim, - .format = ISL_FORMAT_HIZ, - .width = surf->logical_level0_px.width, - .height = surf->logical_level0_px.height, - .depth = surf->logical_level0_px.depth, - .levels = surf->levels, - .array_len = surf->logical_level0_px.array_len, - .samples = samples, - .usage = ISL_SURF_USAGE_HIZ_BIT, - .tiling_flags = ISL_TILING_HIZ_BIT); + return isl_surf_init(dev, hiz_surf, + .dim = surf->dim, + .format = ISL_FORMAT_HIZ, + .width = surf->logical_level0_px.width, + .height = surf->logical_level0_px.height, + .depth = surf->logical_level0_px.depth, + .levels = surf->levels, + .array_len = surf->logical_level0_px.array_len, + .samples = samples, + .usage = ISL_SURF_USAGE_HIZ_BIT, + .tiling_flags = ISL_TILING_HIZ_BIT); } -void +bool isl_surf_get_mcs_surf(const struct isl_device *dev, const struct isl_surf *surf, struct isl_surf *mcs_surf) @@ -1398,6 +1622,16 @@ isl_surf_get_mcs_surf(const struct isl_device *dev, assert(surf->levels == 1); assert(surf->logical_level0_px.depth == 1); + /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9 + * bits which means the maximum pitch of a compression surface is 512 + * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is + * 64bpp, this gives us a maximum width of 8192 pixels. We can create + * larger multisampled surfaces, we just can't compress them. For 2x, 4x, + * and 8x, we have enough room for the full 16k supported by the hardware. + */ + if (surf->samples == 16 && surf->logical_level0_px.width > 8192) + return false; + enum isl_format mcs_format; switch (surf->samples) { case 2: mcs_format = ISL_FORMAT_MCS_2X; break; @@ -1408,30 +1642,57 @@ isl_surf_get_mcs_surf(const struct isl_device *dev, unreachable("Invalid sample count"); } - isl_surf_init(dev, mcs_surf, - .dim = ISL_SURF_DIM_2D, - .format = mcs_format, - .width = surf->logical_level0_px.width, - .height = surf->logical_level0_px.height, - .depth = 1, - .levels = 1, - .array_len = surf->logical_level0_px.array_len, - .samples = 1, /* MCS surfaces are really single-sampled */ - .usage = ISL_SURF_USAGE_MCS_BIT, - .tiling_flags = ISL_TILING_Y0_BIT); + return isl_surf_init(dev, mcs_surf, + .dim = ISL_SURF_DIM_2D, + .format = mcs_format, + .width = surf->logical_level0_px.width, + .height = surf->logical_level0_px.height, + .depth = 1, + .levels = 1, + .array_len = surf->logical_level0_px.array_len, + .samples = 1, /* MCS surfaces are really single-sampled */ + .usage = ISL_SURF_USAGE_MCS_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); } bool isl_surf_get_ccs_surf(const struct isl_device *dev, const struct isl_surf *surf, - struct isl_surf *ccs_surf) + struct isl_surf *ccs_surf, + uint32_t row_pitch) { assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE); assert(ISL_DEV_GEN(dev) >= 7); - assert(ISL_DEV_GEN(dev) >= 8 || surf->dim == ISL_SURF_DIM_2D); + if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) + return false; - assert(surf->logical_level0_px.depth == 1); + /* The PRM doesn't say this explicitly, but fast-clears don't appear to + * work for 3D textures until gen9 where the layout of 3D textures changes + * to match 2D array textures. + */ + if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D) + return false; + + /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of + * Non-MultiSampler Render Target Restrictions): + * + * "Support is for non-mip-mapped and non-array surface types only." + * + * This restriction is lifted on gen8+. Technically, it may be possible to + * create a CCS for an arrayed or mipmapped image and only enable CCS_D + * when rendering to the base slice. However, there is no documentation + * tell us what the hardware would do in that case or what it does if you + * walk off the bases slice. (Does it ignore CCS or does it start + * scribbling over random memory?) We play it safe and just follow the + * docs and don't allow CCS_D for arrayed or mip-mapped surfaces. + */ + if (ISL_DEV_GEN(dev) <= 7 && + (surf->levels > 1 || surf->logical_level0_px.array_len > 1)) + return false; + + if (isl_format_is_compressed(surf->format)) + return false; /* TODO: More conditions where it can fail. */ @@ -1467,19 +1728,18 @@ isl_surf_get_ccs_surf(const struct isl_device *dev, return false; } - isl_surf_init(dev, ccs_surf, - .dim = ISL_SURF_DIM_2D, - .format = ccs_format, - .width = surf->logical_level0_px.width, - .height = surf->logical_level0_px.height, - .depth = 1, - .levels = surf->levels, - .array_len = surf->logical_level0_px.array_len, - .samples = 1, - .usage = ISL_SURF_USAGE_CCS_BIT, - .tiling_flags = ISL_TILING_CCS_BIT); - - return true; + return isl_surf_init(dev, ccs_surf, + .dim = surf->dim, + .format = ccs_format, + .width = surf->logical_level0_px.width, + .height = surf->logical_level0_px.height, + .depth = surf->logical_level0_px.depth, + .levels = surf->levels, + .array_len = surf->logical_level0_px.array_len, + .samples = 1, + .row_pitch = row_pitch, + .usage = ISL_SURF_USAGE_CCS_BIT, + .tiling_flags = ISL_TILING_CCS_BIT); } void @@ -1533,6 +1793,9 @@ isl_surf_fill_state_s(const struct isl_device *dev, void *state, case 9: isl_gen9_surf_fill_state_s(dev, state, info); break; + case 10: + isl_gen10_surf_fill_state_s(dev, state, info); + break; default: assert(!"Cannot fill surface state for this gen"); } @@ -1564,6 +1827,79 @@ isl_buffer_fill_state_s(const struct isl_device *dev, void *state, case 9: isl_gen9_buffer_fill_state_s(state, info); break; + case 10: + isl_gen10_buffer_fill_state_s(state, info); + break; + default: + assert(!"Cannot fill surface state for this gen"); + } +} + +void +isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info) +{ + if (info->depth_surf && info->stencil_surf) { + if (!dev->info->has_hiz_and_separate_stencil) { + assert(info->depth_surf == info->stencil_surf); + assert(info->depth_address == info->stencil_address); + } + assert(info->depth_surf->dim == info->stencil_surf->dim); + } + + if (info->depth_surf) { + assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT)); + if (info->depth_surf->dim == ISL_SURF_DIM_3D) { + assert(info->view->base_array_layer + info->view->array_len <= + info->depth_surf->logical_level0_px.depth); + } else { + assert(info->view->base_array_layer + info->view->array_len <= + info->depth_surf->logical_level0_px.array_len); + } + } + + if (info->stencil_surf) { + assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT)); + if (info->stencil_surf->dim == ISL_SURF_DIM_3D) { + assert(info->view->base_array_layer + info->view->array_len <= + info->stencil_surf->logical_level0_px.depth); + } else { + assert(info->view->base_array_layer + info->view->array_len <= + info->stencil_surf->logical_level0_px.array_len); + } + } + + switch (ISL_DEV_GEN(dev)) { + case 4: + if (ISL_DEV_IS_G4X(dev)) { + /* G45 surface state is the same as gen5 */ + isl_gen5_emit_depth_stencil_hiz_s(dev, batch, info); + } else { + isl_gen4_emit_depth_stencil_hiz_s(dev, batch, info); + } + break; + case 5: + isl_gen5_emit_depth_stencil_hiz_s(dev, batch, info); + break; + case 6: + isl_gen6_emit_depth_stencil_hiz_s(dev, batch, info); + break; + case 7: + if (ISL_DEV_IS_HASWELL(dev)) { + isl_gen75_emit_depth_stencil_hiz_s(dev, batch, info); + } else { + isl_gen7_emit_depth_stencil_hiz_s(dev, batch, info); + } + break; + case 8: + isl_gen8_emit_depth_stencil_hiz_s(dev, batch, info); + break; + case 9: + isl_gen9_emit_depth_stencil_hiz_s(dev, batch, info); + break; + case 10: + isl_gen10_emit_depth_stencil_hiz_s(dev, batch, info); + break; default: assert(!"Cannot fill surface state for this gen"); } @@ -1622,8 +1958,15 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf, uint32_t *y_offset_sa) { assert(level < surf->levels); - assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); - assert(surf->phys_level0_sa.array_len == 1); + if (surf->dim == ISL_SURF_DIM_3D) { + assert(surf->phys_level0_sa.array_len == 1); + assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); + } else { + assert(surf->dim == ISL_SURF_DIM_2D); + assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT); + assert(surf->phys_level0_sa.array_len == 6); + assert(logical_z_offset_px < surf->phys_level0_sa.array_len); + } const struct isl_extent3d image_align_sa = isl_surf_get_image_alignment_sa(surf); @@ -1631,13 +1974,16 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf, const uint32_t W0 = surf->phys_level0_sa.width; const uint32_t H0 = surf->phys_level0_sa.height; const uint32_t D0 = surf->phys_level0_sa.depth; + const uint32_t AL = surf->phys_level0_sa.array_len; uint32_t x = 0; uint32_t y = 0; for (uint32_t l = 0; l < level; ++l) { const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); - const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d); + const uint32_t level_d = + isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL, + image_align_sa.d); const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); y += level_h * max_layers_vert; @@ -1645,7 +1991,9 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf, const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); - const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d); + const uint32_t level_d = + isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL, + image_align_sa.d); const uint32_t max_layers_horiz = MIN(level_d, 1u << level); @@ -1656,6 +2004,65 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf, *y_offset_sa = y; } +static void +get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(surf->logical_level0_px.depth == 1); + assert(logical_array_layer < surf->logical_level0_px.array_len); + + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + struct isl_tile_info tile_info; + isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info); + const struct isl_extent2d tile_extent_sa = { + .w = tile_info.logical_extent_el.w * fmtl->bw, + .h = tile_info.logical_extent_el.h * fmtl->bh, + }; + /* Tile size is a multiple of image alignment */ + assert(tile_extent_sa.w % image_align_sa.w == 0); + assert(tile_extent_sa.h % image_align_sa.h == 0); + + const uint32_t W0 = surf->phys_level0_sa.w; + const uint32_t H0 = surf->phys_level0_sa.h; + + /* Each image has the same height as LOD0 because the hardware thinks + * everything is LOD0 + */ + const uint32_t H = isl_align(H0, image_align_sa.h); + + /* Quick sanity check for consistency */ + if (surf->phys_level0_sa.array_len > 1) + assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh)); + + uint32_t x = 0, y = 0; + for (uint32_t l = 0; l < level; ++l) { + const uint32_t W = isl_minify(W0, l); + + const uint32_t w = isl_align(W, tile_extent_sa.w); + const uint32_t h = isl_align(H * surf->phys_level0_sa.a, + tile_extent_sa.h); + + if (l == 0) { + y += h; + } else { + x += w; + } + } + + y += H * logical_array_layer; + + *x_offset_sa = x; + *y_offset_sa = y; +} + /** * A variant of isl_surf_get_image_offset_sa() specific to * ISL_DIM_LAYOUT_GEN9_1D. @@ -1721,9 +2128,15 @@ isl_surf_get_image_offset_sa(const struct isl_surf *surf, x_offset_sa, y_offset_sa); break; case ISL_DIM_LAYOUT_GEN4_3D: - get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px, + get_image_offset_sa_gen4_3d(surf, level, logical_array_layer + + logical_z_offset_px, x_offset_sa, y_offset_sa); break; + case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: + get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer + + logical_z_offset_px, + x_offset_sa, y_offset_sa); + break; default: unreachable("not reached"); @@ -1757,9 +2170,88 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, } void -isl_tiling_get_intratile_offset_el(const struct isl_device *dev, - enum isl_tiling tiling, - uint8_t bs, +isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *offset_B, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + uint32_t total_x_offset_el, total_y_offset_el; + isl_surf_get_image_offset_el(surf, level, logical_array_layer, + logical_z_offset_px, + &total_x_offset_el, + &total_y_offset_el); + + uint32_t x_offset_el, y_offset_el; + isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb, + surf->row_pitch, + total_x_offset_el, + total_y_offset_el, + offset_B, + &x_offset_el, + &y_offset_el); + + if (x_offset_sa) { + *x_offset_sa = x_offset_el * fmtl->bw; + } else { + assert(x_offset_el == 0); + } + + if (y_offset_sa) { + *y_offset_sa = y_offset_el * fmtl->bh; + } else { + assert(y_offset_el == 0); + } +} + +void +isl_surf_get_image_surf(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + struct isl_surf *image_surf, + uint32_t *offset_B, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + isl_surf_get_image_offset_B_tile_sa(surf, + level, + logical_array_layer, + logical_z_offset_px, + offset_B, + x_offset_sa, + y_offset_sa); + + /* Even for cube maps there will be only single face, therefore drop the + * corresponding flag if present. + */ + const isl_surf_usage_flags_t usage = + surf->usage & (~ISL_SURF_USAGE_CUBE_BIT); + + bool ok UNUSED; + ok = isl_surf_init(dev, image_surf, + .dim = ISL_SURF_DIM_2D, + .format = surf->format, + .width = isl_minify(surf->logical_level0_px.w, level), + .height = isl_minify(surf->logical_level0_px.h, level), + .depth = 1, + .levels = 1, + .array_len = 1, + .samples = surf->samples, + .row_pitch = surf->row_pitch, + .usage = usage, + .tiling_flags = (1 << surf->tiling)); + assert(ok); +} + +void +isl_tiling_get_intratile_offset_el(enum isl_tiling tiling, + uint32_t bpb, uint32_t row_pitch, uint32_t total_x_offset_el, uint32_t total_y_offset_el, @@ -1768,17 +2260,16 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev, uint32_t *y_offset_el) { if (tiling == ISL_TILING_LINEAR) { + assert(bpb % 8 == 0); *base_address_offset = total_y_offset_el * row_pitch + - total_x_offset_el * bs; + total_x_offset_el * (bpb / 8); *x_offset_el = 0; *y_offset_el = 0; return; } - const uint32_t bpb = bs * 8; - struct isl_tile_info tile_info; - isl_tiling_get_info(dev, tiling, bpb, &tile_info); + isl_tiling_get_info(tiling, bpb, &tile_info); assert(row_pitch % tile_info.phys_extent_B.width == 0); diff --git a/lib/mesa/src/intel/isl/isl.h b/lib/mesa/src/intel/isl/isl.h index 11ad8919e..dafe95229 100644 --- a/lib/mesa/src/intel/isl/isl.h +++ b/lib/mesa/src/intel/isl/isl.h @@ -353,6 +353,20 @@ enum isl_format { ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16 = 630, ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16 = 638, ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16 = 639, + ISL_FORMAT_ASTC_HDR_2D_4X4_FLT16 = 832, + ISL_FORMAT_ASTC_HDR_2D_5X4_FLT16 = 840, + ISL_FORMAT_ASTC_HDR_2D_5X5_FLT16 = 841, + ISL_FORMAT_ASTC_HDR_2D_6X5_FLT16 = 849, + ISL_FORMAT_ASTC_HDR_2D_6X6_FLT16 = 850, + ISL_FORMAT_ASTC_HDR_2D_8X5_FLT16 = 865, + ISL_FORMAT_ASTC_HDR_2D_8X6_FLT16 = 866, + ISL_FORMAT_ASTC_HDR_2D_8X8_FLT16 = 868, + ISL_FORMAT_ASTC_HDR_2D_10X5_FLT16 = 881, + ISL_FORMAT_ASTC_HDR_2D_10X6_FLT16 = 882, + ISL_FORMAT_ASTC_HDR_2D_10X8_FLT16 = 884, + ISL_FORMAT_ASTC_HDR_2D_10X10_FLT16 = 886, + ISL_FORMAT_ASTC_HDR_2D_12X10_FLT16 = 894, + ISL_FORMAT_ASTC_HDR_2D_12X12_FLT16 = 895, /* The formats that follow are internal to ISL and as such don't have an * explicit number. We'll just let the C compiler assign it for us. Any @@ -514,6 +528,46 @@ enum isl_dim_layout { ISL_DIM_LAYOUT_GEN4_3D, /** + * Special layout used for HiZ and stencil on Sandy Bridge to work around + * the hardware's lack of mipmap support. On gen6, HiZ and stencil buffers + * work the same as on gen7+ except that they don't technically support + * mipmapping. That does not, however, stop us from doing it. As far as + * Sandy Bridge hardware is concerned, HiZ and stencil always operates on a + * single miplevel 2D (possibly array) image. The dimensions of that image + * are NOT minified. + * + * In order to implement HiZ and stencil on Sandy Bridge, we create one + * full-sized 2D (possibly array) image for every LOD with every image + * aligned to a page boundary. When the surface is used with the stencil + * or HiZ hardware, we manually offset to the image for the given LOD. + * + * As a memory saving measure, we pretend that the width of each miplevel + * is minified and we place LOD1 and above below LOD0 but horizontally + * adjacent to each other. When considered as full-sized images, LOD1 and + * above technically overlap. However, since we only write to part of that + * image, the hardware will never notice the overlap. + * + * This layout looks something like this: + * + * +---------+ + * | | + * | | + * +---------+ + * | | + * | | + * +---------+ + * + * +----+ +-+ . + * | | +-+ + * +----+ + * + * +----+ +-+ . + * | | +-+ + * +----+ + */ + ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ, + + /** * For details, see the Skylake BSpec >> Memory Views >> Common Surface * Formats >> Surface Layout and Tiling >> » 1D Surfaces. */ @@ -546,6 +600,193 @@ enum isl_aux_usage { ISL_AUX_USAGE_CCS_E, }; +/** + * Enum for keeping track of the state an auxiliary compressed surface. + * + * For any given auxiliary surface compression format (HiZ, CCS, or MCS), any + * given slice (lod + array layer) can be in one of the six states described + * by this enum. Draw and resolve operations may cause the slice to change + * from one state to another. The six valid states are: + * + * 1) Clear: In this state, each block in the auxiliary surface contains a + * magic value that indicates that the block is in the clear state. If + * a block is in the clear state, it's values in the primary surface are + * ignored and the color of the samples in the block is taken either the + * RENDER_SURFACE_STATE packet for color or 3DSTATE_CLEAR_PARAMS for + * depth. Since neither the primary surface nor the auxiliary surface + * contains the clear value, the surface can be cleared to a different + * color by simply changing the clear color without modifying either + * surface. + * + * 2) Partial Clear: In this state, each block in the auxiliary surface + * contains either the magic clear or pass-through value. See Clear and + * Pass-through for more details. + * + * 3) Compressed w/ Clear: In this state, neither the auxiliary surface + * nor the primary surface has a complete representation of the data. + * Instead, both surfaces must be used together or else rendering + * corruption may occur. Depending on the auxiliary compression format + * and the data, any given block in the primary surface may contain all, + * some, or none of the data required to reconstruct the actual sample + * values. Blocks may also be in the clear state (see Clear) and have + * their value taken from outside the surface. + * + * 4) Compressed w/o Clear: This state is identical to the state above + * except that no blocks are in the clear state. In this state, all of + * the data required to reconstruct the final sample values is contained + * in the auxiliary and primary surface and the clear value is not + * considered. + * + * 5) Resolved: In this state, the primary surface contains 100% of the + * data. The auxiliary surface is also valid so the surface can be + * validly used with or without aux enabled. The auxiliary surface may, + * however, contain non-trivial data and any update to the primary + * surface with aux disabled will cause the two to get out of sync. + * + * 6) Pass-through: In this state, the primary surface contains 100% of the + * data and every block in the auxiliary surface contains a magic value + * which indicates that the auxiliary surface should be ignored and the + * only the primary surface should be considered. Updating the primary + * surface without aux works fine and can be done repeatedly in this + * mode. Writing to a surface in pass-through mode with aux enabled may + * cause the auxiliary buffer to contain non-trivial data and no longer + * be in the pass-through state. + * + * 7) Aux Invalid: In this state, the primary surface contains 100% of the + * data and the auxiliary surface is completely bogus. Any attempt to + * use the auxiliary surface is liable to result in rendering + * corruption. The only thing that one can do to re-enable aux once + * this state is reached is to use an ambiguate pass to transition into + * the pass-through state. + * + * Drawing with or without aux enabled may implicitly cause the surface to + * transition between these states. There are also four types of auxiliary + * compression operations which cause an explicit transition: + * + * 1) Fast Clear: This operation writes the magic "clear" value to the + * auxiliary surface. This operation will safely transition any slice + * of a surface from any state to the clear state so long as the entire + * slice is fast cleared at once. A fast clear that only covers part of + * a slice of a surface is called a partial fast clear. + * + * 2) Full Resolve: This operation combines the auxiliary surface data + * with the primary surface data and writes the result to the primary. + * For HiZ, the docs call this a depth resolve. For CCS, the hardware + * full resolve operation does both a full resolve and an ambiguate so + * it actually takes you all the way to the pass-through state. + * + * 3) Partial Resolve: This operation considers blocks which are in the + * "clear" state and writes the clear value directly into the primary or + * auxiliary surface. Once this operation completes, the surface is + * still compressed but no longer references the clear color. This + * operation is only available for CCS. + * + * 4) Ambiguate: This operation throws away the current auxiliary data and + * replaces it with the magic pass-through value. If an ambiguate + * operation is performed when the primary surface does not contain 100% + * of the data, data will be lost. This operation is only implemented + * in hardware for depth where it is called a HiZ resolve. + * + * Not all operations are valid or useful in all states. The diagram below + * contains a complete description of the states and all valid and useful + * transitions except clear. + * + * Draw w/ Aux + * +----------+ + * | | + * | +-------------+ Draw w/ Aux +-------------+ + * +------>| Compressed |<-------------------| Clear | + * | w/ Clear |----->----+ | | + * +-------------+ | +-------------+ + * | /|\ | | | + * | | | | | + * | | +------<-----+ | Draw w/ + * | | | | Clear Only + * | | Full | | +----------+ + * Partial | | Resolve | \|/ | | + * Resolve | | | +-------------+ | + * | | | | Partial |<------+ + * | | | | Clear |<----------+ + * | | | +-------------+ | + * | | | | | + * | | +------>---------+ Full | + * | | | Resolve | + * Draw w/ aux | | Partial Fast Clear | | + * +----------+ | +--------------------------+ | | + * | | \|/ | \|/ | + * | +-------------+ Full Resolve +-------------+ | + * +------>| Compressed |------------------->| Resolved | | + * | w/o Clear |<-------------------| | | + * +-------------+ Draw w/ Aux +-------------+ | + * /|\ | | | + * | Draw | | Draw | + * | w/ Aux | | w/o Aux | + * | Ambiguate | | | + * | +--------------------------+ | | + * Draw w/o Aux | | | Draw w/o Aux | + * +----------+ | | | +----------+ | + * | | | \|/ \|/ | | | + * | +-------------+ Ambiguate +-------------+ | | + * +------>| Pass- |<-------------------| Aux |<------+ | + * +------>| through | | Invalid | | + * | +-------------+ +-------------+ | + * | | | | + * +----------+ +-----------------------------------------------------+ + * Draw w/ Partial Fast Clear + * Clear Only + * + * + * While the above general theory applies to all forms of auxiliary + * compression on Intel hardware, not all states and operations are available + * on all compression types. However, each of the auxiliary states and + * operations can be fairly easily mapped onto the above diagram: + * + * HiZ: Hierarchical depth compression is capable of being in any of the + * states above. Hardware provides three HiZ operations: "Depth + * Clear", "Depth Resolve", and "HiZ Resolve" which map to "Fast + * Clear", "Full Resolve", and "Ambiguate" respectively. The + * hardware provides no HiZ partial resolve operation so the only way + * to get into the "Compressed w/o Clear" state is to render with HiZ + * when the surface is in the resolved or pass-through states. + * + * MCS: Multisample compression is technically capable of being in any of + * the states above except that most of them aren't useful. Both the + * render engine and the sampler support MCS compression and, apart + * from clear color, MCS is format-unaware so we leave the surface + * compressed 100% of the time. The hardware provides no MCS + * operations. + * + * CCS_D: Single-sample fast-clears (also called CCS_D in ISL) are one of + * the simplest forms of compression since they don't do anything + * beyond clear color tracking. They really only support three of + * the six states: Clear, Partial Clear, and Pass-through. The + * only CCS_D operation is "Resolve" which maps to a full resolve + * followed by an ambiguate. + * + * CCS_E: Single-sample render target compression (also called CCS_E in ISL) + * is capable of being in almost all of the above states. THe only + * exception is that it does not have separate resolved and pass- + * through states. Instead, the CCS_E full resolve operation does + * both a resolve and an ambiguate so it goes directly into the + * pass-through state. CCS_E also provides fast clear and partial + * resolve operations which work as described above. + * + * While it is technically possible to perform a CCS_E ambiguate, it + * is not provided by Sky Lake hardware so we choose to avoid the aux + * invalid state. If the aux invalid state were determined to be + * useful, a CCS ambiguate could be done by carefully rendering to + * the CCS and filling it with zeros. + */ +enum isl_aux_state { + ISL_AUX_STATE_CLEAR = 0, + ISL_AUX_STATE_PARTIAL_CLEAR, + ISL_AUX_STATE_COMPRESSED_CLEAR, + ISL_AUX_STATE_COMPRESSED_NO_CLEAR, + ISL_AUX_STATE_RESOLVED, + ISL_AUX_STATE_PASS_THROUGH, + ISL_AUX_STATE_AUX_INVALID, +}; + /* TODO(chadv): Explain */ enum isl_array_pitch_span { ISL_ARRAY_PITCH_SPAN_FULL, @@ -576,6 +817,21 @@ typedef uint64_t isl_surf_usage_flags_t; /** @} */ /** + * @defgroup Channel Mask + * + * These #define values are chosen to match the values of + * RENDER_SURFACE_STATE::Color Buffer Component Write Disables + * + * @{ + */ +typedef uint8_t isl_channel_mask_t; +#define ISL_CHANNEL_BLUE_BIT (1 << 0) +#define ISL_CHANNEL_GREEN_BIT (1 << 1) +#define ISL_CHANNEL_RED_BIT (1 << 2) +#define ISL_CHANNEL_ALPHA_BIT (1 << 3) +/** @} */ + +/** * @brief A channel select (also known as texture swizzle) value */ enum isl_channel_select { @@ -671,6 +927,32 @@ struct isl_device { const struct gen_device_info *info; bool use_separate_stencil; bool has_bit6_swizzling; + + /** + * Describes the layout of a RENDER_SURFACE_STATE structure for the + * current gen. + */ + struct { + uint8_t size; + uint8_t align; + uint8_t addr_offset; + uint8_t aux_addr_offset; + + /* Rounded up to the nearest dword to simplify GPU memcpy operations. */ + uint8_t clear_value_size; + uint8_t clear_value_offset; + } ss; + + /** + * Describes the layout of the depth/stencil/hiz commands as emitted by + * isl_emit_depth_stencil_hiz. + */ + struct { + uint8_t size; + uint8_t depth_offset; + uint8_t stencil_offset; + uint8_t hiz_offset; + } ds; }; struct isl_extent2d { @@ -772,6 +1054,25 @@ struct isl_tile_info { }; /** + * Metadata about a DRM format modifier. + */ +struct isl_drm_modifier_info { + uint64_t modifier; + + /** Text name of the modifier */ + const char *name; + + /** ISL tiling implied by this modifier */ + enum isl_tiling tiling; + + /** ISL aux usage implied by this modifier */ + enum isl_aux_usage aux_usage; + + /** Whether or not this modifier supports clear color */ + bool supports_clear_color; +}; + +/** * @brief Input to surface initialization * * @invariant width >= 1 @@ -799,8 +1100,11 @@ struct isl_surf_init_info { /** Lower bound for isl_surf::alignment, in bytes. */ uint32_t min_alignment; - /** Lower bound for isl_surf::pitch, in bytes. */ - uint32_t min_pitch; + /** + * Exact value for isl_surf::row_pitch. Ignored if zero. isl_surf_init() + * will fail if this is misaligned or out of bounds. + */ + uint32_t row_pitch; isl_surf_usage_flags_t usage; @@ -843,7 +1147,7 @@ struct isl_surf { uint32_t samples; /** Total size of the surface, in bytes. */ - uint32_t size; + uint64_t size; /** Required alignment for the surface's base address. */ uint32_t alignment; @@ -923,6 +1227,12 @@ struct isl_view { * for texturing, they are ignored. */ uint32_t base_array_layer; + + /** + * Array Length + * + * Indicates the number of array elements starting at Base Array Layer. + */ uint32_t array_len; struct isl_swizzle swizzle; @@ -964,6 +1274,11 @@ struct isl_surf_fill_state_info { */ union isl_color_value clear_color; + /** + * Surface write disables for gen4-5 + */ + isl_channel_mask_t write_disables; + /* Intra-tile offset */ uint16_t x_offset_sa, y_offset_sa; }; @@ -997,6 +1312,61 @@ struct isl_buffer_fill_state_info { uint32_t stride; }; +struct isl_depth_stencil_hiz_emit_info { + /** + * The depth surface + */ + const struct isl_surf *depth_surf; + + /** + * The stencil surface + * + * If separate stencil is not available, this must point to the same + * isl_surf as depth_surf. + */ + const struct isl_surf *stencil_surf; + + /** + * The view into the depth and stencil surfaces. + * + * This view applies to both surfaces simultaneously. + */ + const struct isl_view *view; + + /** + * The address of the depth surface in GPU memory + */ + uint64_t depth_address; + + /** + * The address of the stencil surface in GPU memory + * + * If separate stencil is not available, this must have the same value as + * depth_address. + */ + uint64_t stencil_address; + + /** + * The Memory Object Control state for depth and stencil buffers + * + * Both depth and stencil will get the same MOCS value. The exact format + * of this value depends on hardware generation. + */ + uint32_t mocs; + + /** + * The HiZ surface or NULL if HiZ is disabled. + */ + const struct isl_surf *hiz_surf; + enum isl_aux_usage hiz_usage; + uint64_t hiz_address; + + /** + * The depth clear value + */ + float depth_clear_value; +}; + extern const struct isl_format_layout isl_format_layouts[]; void @@ -1029,11 +1399,21 @@ bool isl_format_supports_filtering(const struct gen_device_info *devinfo, enum isl_format format); bool isl_format_supports_vertex_fetch(const struct gen_device_info *devinfo, enum isl_format format); -bool isl_format_supports_lossless_compression(const struct gen_device_info *devinfo, - enum isl_format format); +bool isl_format_supports_typed_writes(const struct gen_device_info *devinfo, + enum isl_format format); +bool isl_format_supports_typed_reads(const struct gen_device_info *devinfo, + enum isl_format format); +bool isl_format_supports_ccs_d(const struct gen_device_info *devinfo, + enum isl_format format); +bool isl_format_supports_ccs_e(const struct gen_device_info *devinfo, + enum isl_format format); bool isl_format_supports_multisampling(const struct gen_device_info *devinfo, enum isl_format format); +bool isl_formats_are_ccs_e_compatible(const struct gen_device_info *devinfo, + enum isl_format format1, + enum isl_format format2); + bool isl_format_has_unorm_channel(enum isl_format fmt) ATTRIBUTE_CONST; bool isl_format_has_snorm_channel(enum isl_format fmt) ATTRIBUTE_CONST; bool isl_format_has_ufloat_channel(enum isl_format fmt) ATTRIBUTE_CONST; @@ -1119,6 +1499,14 @@ isl_format_block_is_1x1x1(enum isl_format fmt) } static inline bool +isl_format_is_srgb(enum isl_format fmt) +{ + return isl_format_layouts[fmt].colorspace == ISL_COLORSPACE_SRGB; +} + +enum isl_format isl_format_srgb_to_linear(enum isl_format fmt); + +static inline bool isl_format_is_rgb(enum isl_format fmt) { return isl_format_layouts[fmt].channels.r.bits > 0 && @@ -1155,6 +1543,15 @@ isl_tiling_is_std_y(enum isl_tiling tiling) return (1u << tiling) & ISL_TILING_STD_Y_MASK; } +uint32_t +isl_tiling_to_i915_tiling(enum isl_tiling tiling); + +enum isl_tiling +isl_tiling_from_i915_tiling(uint32_t tiling); + +const struct isl_drm_modifier_info * ATTRIBUTE_CONST +isl_drm_modifier_get_info(uint64_t modifier); + struct isl_extent2d ATTRIBUTE_CONST isl_get_interleaved_msaa_px_size_sa(uint32_t samples); @@ -1240,6 +1637,9 @@ isl_extent4d(uint32_t width, uint32_t height, uint32_t depth, return e; } +bool isl_color_value_is_zero_one(union isl_color_value value, + enum isl_format format); + #define isl_surf_init(dev, surf, ...) \ isl_surf_init_s((dev), (surf), \ &(struct isl_surf_init_info) { __VA_ARGS__ }); @@ -1250,16 +1650,15 @@ isl_surf_init_s(const struct isl_device *dev, const struct isl_surf_init_info *restrict info); void -isl_surf_get_tile_info(const struct isl_device *dev, - const struct isl_surf *surf, +isl_surf_get_tile_info(const struct isl_surf *surf, struct isl_tile_info *tile_info); -void +bool isl_surf_get_hiz_surf(const struct isl_device *dev, const struct isl_surf *surf, struct isl_surf *hiz_surf); -void +bool isl_surf_get_mcs_surf(const struct isl_device *dev, const struct isl_surf *surf, struct isl_surf *mcs_surf); @@ -1267,7 +1666,8 @@ isl_surf_get_mcs_surf(const struct isl_device *dev, bool isl_surf_get_ccs_surf(const struct isl_device *dev, const struct isl_surf *surf, - struct isl_surf *ccs_surf); + struct isl_surf *ccs_surf, + uint32_t row_pitch /**< Ignored if 0 */); #define isl_surf_fill_state(dev, state, ...) \ isl_surf_fill_state_s((dev), (state), \ @@ -1285,6 +1685,14 @@ void isl_buffer_fill_state_s(const struct isl_device *dev, void *state, const struct isl_buffer_fill_state_info *restrict info); +#define isl_emit_depth_stencil_hiz(dev, batch, ...) \ + isl_emit_depth_stencil_hiz_s((dev), (batch), \ + &(struct isl_depth_stencil_hiz_emit_info) { __VA_ARGS__ }) + +void +isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + void isl_surf_fill_image_param(const struct isl_device *dev, struct brw_image_param *param, @@ -1413,6 +1821,50 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, uint32_t *y_offset_el); /** + * Calculate the offset, in bytes and intratile surface samples, to a + * subimage in the surface. + * + * This is equivalent to calling isl_surf_get_image_offset_el, passing the + * result to isl_tiling_get_intratile_offset_el, and converting the tile + * offsets to samples. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *offset_B, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa); + +/** + * Create an isl_surf that represents a particular subimage in the surface. + * + * The newly created surface will have a single miplevel and array slice. The + * surface lives at the returned byte and intratile offsets, in samples. + * + * It is safe to call this function with surf == image_surf. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_surf(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + struct isl_surf *image_surf, + uint32_t *offset_B, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa); + +/** * @brief Calculate the intratile offsets to a surface. * * In @a base_address_offset return the offset from the base of the surface to @@ -1423,9 +1875,8 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, * surface's tiling format. */ void -isl_tiling_get_intratile_offset_el(const struct isl_device *dev, - enum isl_tiling tiling, - uint8_t bs, +isl_tiling_get_intratile_offset_el(enum isl_tiling tiling, + uint32_t bpb, uint32_t row_pitch, uint32_t total_x_offset_el, uint32_t total_y_offset_el, @@ -1434,8 +1885,7 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev, uint32_t *y_offset_el); static inline void -isl_tiling_get_intratile_offset_sa(const struct isl_device *dev, - enum isl_tiling tiling, +isl_tiling_get_intratile_offset_sa(enum isl_tiling tiling, enum isl_format format, uint32_t row_pitch, uint32_t total_x_offset_sa, @@ -1446,8 +1896,6 @@ isl_tiling_get_intratile_offset_sa(const struct isl_device *dev, { const struct isl_format_layout *fmtl = isl_format_get_layout(format); - assert(fmtl->bpb % 8 == 0); - /* For computing the intratile offsets, we actually want a strange unit * which is samples for multisampled surfaces but elements for compressed * surfaces. @@ -1457,7 +1905,7 @@ isl_tiling_get_intratile_offset_sa(const struct isl_device *dev, const uint32_t total_x_offset = total_x_offset_sa / fmtl->bw; const uint32_t total_y_offset = total_y_offset_sa / fmtl->bh; - isl_tiling_get_intratile_offset_el(dev, tiling, fmtl->bpb / 8, row_pitch, + isl_tiling_get_intratile_offset_el(tiling, fmtl->bpb, row_pitch, total_x_offset, total_y_offset, base_address_offset, x_offset_sa, y_offset_sa); diff --git a/lib/mesa/src/intel/isl/isl_emit_depth_stencil.c b/lib/mesa/src/intel/isl/isl_emit_depth_stencil.c new file mode 100644 index 000000000..0d541fd1c --- /dev/null +++ b/lib/mesa/src/intel/isl/isl_emit_depth_stencil.c @@ -0,0 +1,225 @@ +/* + * Copyright 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdint.h> + +#define __gen_address_type uint64_t +#define __gen_user_data void + +static inline uint64_t +__gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta) +{ + return addr + delta; +} + +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" + +#include "isl_priv.h" + +#define __PASTE2(x, y) x ## y +#define __PASTE(x, y) __PASTE2(x, y) +#define isl_genX(x) __PASTE(isl_, genX(x)) + +static const uint32_t isl_to_gen_ds_surftype[] = { +#if GEN_GEN >= 9 + /* From the SKL PRM, "3DSTATE_DEPTH_STENCIL::SurfaceType": + * + * "If depth/stencil is enabled with 1D render target, depth/stencil + * surface type needs to be set to 2D surface type and height set to 1. + * Depth will use (legacy) TileY and stencil will use TileW. For this + * case only, the Surface Type of the depth buffer can be 2D while the + * Surface Type of the render target(s) are 1D, representing an + * exception to a programming note above. + */ + [ISL_SURF_DIM_1D] = SURFTYPE_2D, +#else + [ISL_SURF_DIM_1D] = SURFTYPE_1D, +#endif + [ISL_SURF_DIM_2D] = SURFTYPE_2D, + [ISL_SURF_DIM_3D] = SURFTYPE_3D, +}; + +void +isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info) +{ + struct GENX(3DSTATE_DEPTH_BUFFER) db = { + GENX(3DSTATE_DEPTH_BUFFER_header), + }; + + if (info->depth_surf) { + db.SurfaceType = isl_to_gen_ds_surftype[info->depth_surf->dim]; + db.SurfaceFormat = isl_surf_get_depth_format(dev, info->depth_surf); + db.Width = info->depth_surf->logical_level0_px.width - 1; + db.Height = info->depth_surf->logical_level0_px.height - 1; + } else if (info->stencil_surf) { + db.SurfaceType = isl_to_gen_ds_surftype[info->stencil_surf->dim]; + db.SurfaceFormat = D32_FLOAT; + db.Width = info->stencil_surf->logical_level0_px.width - 1; + db.Height = info->stencil_surf->logical_level0_px.height - 1; + } else { + db.SurfaceType = SURFTYPE_NULL; + db.SurfaceFormat = D32_FLOAT; + } + + if (info->depth_surf || info->stencil_surf) { + /* These are based entirely on the view */ + db.Depth = db.RenderTargetViewExtent = info->view->array_len - 1; + db.LOD = info->view->base_level; + db.MinimumArrayElement = info->view->base_array_layer; + } + + if (info->depth_surf) { +#if GEN_GEN >= 7 + db.DepthWriteEnable = true; +#endif + db.SurfaceBaseAddress = info->depth_address; +#if GEN_GEN >= 6 + db.DepthBufferMOCS = info->mocs; +#endif + +#if GEN_GEN <= 6 + db.TiledSurface = info->depth_surf->tiling != ISL_TILING_LINEAR; + db.TileWalk = info->depth_surf->tiling == ISL_TILING_Y0 ? TILEWALK_YMAJOR : + TILEWALK_XMAJOR; + db.MIPMapLayoutMode = MIPLAYOUT_BELOW; +#endif + + db.SurfacePitch = info->depth_surf->row_pitch - 1; +#if GEN_GEN >= 8 + db.SurfaceQPitch = + isl_surf_get_array_pitch_el_rows(info->depth_surf) >> 2; +#endif + } + +#if GEN_GEN == 5 || GEN_GEN == 6 + const bool separate_stencil = + info->stencil_surf && info->stencil_surf->format == ISL_FORMAT_R8_UINT; + if (separate_stencil || info->hiz_usage == ISL_AUX_USAGE_HIZ) { + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); + db.SeparateStencilBufferEnable = true; + db.HierarchicalDepthBufferEnable = true; + } +#endif + +#if GEN_GEN >= 6 + struct GENX(3DSTATE_STENCIL_BUFFER) sb = { + GENX(3DSTATE_STENCIL_BUFFER_header), + }; +#else +# define sb db +#endif + + if (info->stencil_surf) { +#if GEN_GEN >= 7 + db.StencilWriteEnable = true; +#endif +#if GEN_GEN >= 8 || GEN_IS_HASWELL + sb.StencilBufferEnable = true; +#endif + sb.SurfaceBaseAddress = info->stencil_address; +#if GEN_GEN >= 6 + sb.StencilBufferMOCS = info->mocs; +#endif + sb.SurfacePitch = info->stencil_surf->row_pitch - 1; +#if GEN_GEN >= 8 + sb.SurfaceQPitch = + isl_surf_get_array_pitch_el_rows(info->stencil_surf) >> 2; +#endif + } + +#if GEN_GEN >= 6 + struct GENX(3DSTATE_HIER_DEPTH_BUFFER) hiz = { + GENX(3DSTATE_HIER_DEPTH_BUFFER_header), + }; + struct GENX(3DSTATE_CLEAR_PARAMS) clear = { + GENX(3DSTATE_CLEAR_PARAMS_header), + }; + + assert(info->hiz_usage == ISL_AUX_USAGE_NONE || + info->hiz_usage == ISL_AUX_USAGE_HIZ); + if (info->hiz_usage == ISL_AUX_USAGE_HIZ) { + db.HierarchicalDepthBufferEnable = true; + + hiz.SurfaceBaseAddress = info->hiz_address; + hiz.HierarchicalDepthBufferMOCS = info->mocs; + hiz.SurfacePitch = info->hiz_surf->row_pitch - 1; +#if GEN_GEN >= 8 + /* From the SKL PRM Vol2a: + * + * The interpretation of this field is dependent on Surface Type + * as follows: + * - SURFTYPE_1D: distance in pixels between array slices + * - SURFTYPE_2D/CUBE: distance in rows between array slices + * - SURFTYPE_3D: distance in rows between R - slices + * + * Unfortunately, the docs aren't 100% accurate here. They fail to + * mention that the 1-D rule only applies to linear 1-D images. + * Since depth and HiZ buffers are always tiled, they are treated as + * 2-D images. Prior to Sky Lake, this field is always in rows. + */ + hiz.SurfaceQPitch = + isl_surf_get_array_pitch_sa_rows(info->hiz_surf) >> 2; +#endif + + clear.DepthClearValueValid = true; +#if GEN_GEN >= 8 + clear.DepthClearValue = info->depth_clear_value; +#else + switch (info->depth_surf->format) { + case ISL_FORMAT_R32_FLOAT: { + union { float f; uint32_t u; } fu; + fu.f = info->depth_clear_value; + clear.DepthClearValue = fu.u; + break; + } + case ISL_FORMAT_R24_UNORM_X8_TYPELESS: + clear.DepthClearValue = info->depth_clear_value * ((1u << 24) - 1); + break; + case ISL_FORMAT_R16_UNORM: + clear.DepthClearValue = info->depth_clear_value * ((1u << 16) - 1); + break; + default: + unreachable("Invalid depth type"); + } +#endif + } +#endif /* GEN_GEN >= 6 */ + + /* Pack everything into the batch */ + uint32_t *dw = batch; + GENX(3DSTATE_DEPTH_BUFFER_pack)(NULL, dw, &db); + dw += GENX(3DSTATE_DEPTH_BUFFER_length); + +#if GEN_GEN >= 6 + GENX(3DSTATE_STENCIL_BUFFER_pack)(NULL, dw, &sb); + dw += GENX(3DSTATE_STENCIL_BUFFER_length); + + GENX(3DSTATE_HIER_DEPTH_BUFFER_pack)(NULL, dw, &hiz); + dw += GENX(3DSTATE_HIER_DEPTH_BUFFER_length); + + GENX(3DSTATE_CLEAR_PARAMS_pack)(NULL, dw, &clear); + dw += GENX(3DSTATE_CLEAR_PARAMS_length); +#endif +} diff --git a/lib/mesa/src/intel/isl/isl_format.c b/lib/mesa/src/intel/isl/isl_format.c index 1a6727b50..435b0d003 100644 --- a/lib/mesa/src/intel/isl/isl_format.c +++ b/lib/mesa/src/intel/isl/isl_format.c @@ -37,14 +37,16 @@ struct surface_format_info { uint8_t input_vb; uint8_t streamed_output_vb; uint8_t color_processing; - uint8_t lossless_compression; + uint8_t typed_write; + uint8_t typed_read; + uint8_t ccs_e; }; /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. */ -#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, sf) \ - [ISL_FORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e}, +#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, tw, tr, ccs_e, sf) \ + [ISL_FORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, tw, tr, ccs_e}, #define Y 0 #define x 255 @@ -86,259 +88,273 @@ struct surface_format_info { * - Render Target Surface Types [SKL+] */ static const struct surface_format_info format_info[] = { -/* smpl filt shad CK RT AB VB SO color ccs_e */ - SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_UINT) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R64G64_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_USCALED) - SF( x, x, x, x, x, x, 75, x, x, x, R32G32B32A32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) - SF( Y, 50, x, x, x, x, Y, Y, x, x, R32G32B32_FLOAT) - SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_SINT) - SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_UINT) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_USCALED) - SF( x, x, x, x, x, x, 75, x, x, x, R32G32B32_SFIXED) - SF( Y, Y, x, x, Y, 45, Y, x, 60, 90, R16G16B16A16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16B16A16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16B16A16_FLOAT) - SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32_FLOAT) - SF( Y, 70, x, x, Y, Y, Y, Y, x, x, R32G32_FLOAT_LD) - SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_UINT) - SF( Y, 50, Y, x, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) - SF( Y, 50, x, x, x, x, x, x, x, x, L32A32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, x, R16G16B16X16_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, 90, R16G16B16X16_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, x, A32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, x, L32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, x, I32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R32G32_USCALED) - SF( x, x, x, x, x, x, 75, x, x, x, R32G32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, x, R64_PASSTHRU) - SF( Y, Y, x, Y, Y, Y, Y, x, 60, 90, B8G8R8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, x, B8G8R8A8_UNORM_SRGB) -/* smpl filt shad CK RT AB VB SO color ccs_e */ - SF( Y, Y, x, x, Y, Y, Y, x, 60, x, R10G10B10A2_UNORM) - SF( Y, Y, x, x, x, x, x, x, 60, x, R10G10B10A2_UNORM_SRGB) - SF( Y, x, x, x, Y, x, Y, x, x, x, R10G10B10A2_UINT) - SF( Y, Y, x, x, x, x, Y, x, x, x, R10G10B10_SNORM_A2_UNORM) - SF( Y, Y, x, x, Y, Y, Y, x, 60, 90, R8G8B8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, x, R8G8B8A8_UNORM_SRGB) - SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R8G8B8A8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_UINT) - SF( Y, Y, x, x, Y, 45, Y, x, x, 90, R16G16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16_FLOAT) - SF( Y, Y, x, x, Y, Y, 75, x, 60, x, B10G10R10A2_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, x, R11G11B10_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_UINT) - SF( Y, 50, Y, x, Y, Y, Y, Y, x, 90, R32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) - SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, x, I24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, x, L24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, x, A24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, x, I32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, x, L32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, x, A32_FLOAT) - SF( Y, Y, x, Y, 80, 80, x, x, 60, 90, B8G8R8X8_UNORM) - SF( Y, Y, x, x, 80, 80, x, x, x, x, B8G8R8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) - SF( Y, Y, x, x, x, x, x, x, x, x, B10G10R10X2_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, x, R32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R32_SNORM) -/* smpl filt shad CK RT AB VB SO color ccs_e */ - SF( x, x, x, x, x, x, Y, x, x, x, R10G10B10X2_USCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R16G16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R16G16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R32_USCALED) - SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G6R5_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G6R5_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G5R5A1_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G5R5A1_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, x, B4G4R4A4_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, x, B4G4R4A4_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, x, R8G8_UNORM) - SF( Y, Y, x, Y, Y, 60, Y, x, x, x, R8G8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_UINT) - SF( Y, Y, Y, x, Y, 45, Y, x, 70, x, R16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, x, R16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, x, R16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, x, R16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, x, R16_FLOAT) - SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) - SF( Y, Y, Y, x, x, x, x, x, x, x, I16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, x, L16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, x, A16_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, x, L8A8_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, x, I16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, x, L16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, x, A16_FLOAT) - SF(45, 45, x, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) - SF( Y, Y, x, Y, x, x, x, x, x, x, R5G5_SNORM_B6_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM_SRGB) - SF( x, x, x, x, x, x, Y, x, x, x, R8G8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R8G8_USCALED) -/* smpl filt shad CK RT AB VB SO color ccs_e */ - SF( x, x, x, x, x, x, Y, x, x, x, R16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R16_USCALED) - SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) +/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */ + SF( Y, 50, x, x, Y, Y, Y, Y, x, 70, 90, 90, R32G32B32A32_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 70, 90, 90, R32G32B32A32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 70, 90, 90, R32G32B32A32_UINT) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32A32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32A32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R64G64_FLOAT) + SF( Y, 50, x, x, 100, 100, x, x, x, x, x, 100, R32G32B32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32A32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32A32_USCALED) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, R32G32B32A32_SFIXED) + SF( x, x, x, x, x, x, 80, x, x, x, x, x, R64G64_PASSTHRU) + SF( Y, 50, x, x, x, x, Y, Y, x, x, x, x, R32G32B32_FLOAT) + SF( Y, x, x, x, x, x, Y, Y, x, x, x, x, R32G32B32_SINT) + SF( Y, x, x, x, x, x, Y, Y, x, x, x, x, R32G32B32_UINT) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32_USCALED) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, R32G32B32_SFIXED) + SF( Y, Y, x, x, Y, 45, Y, x, 60, 70, x, 90, R16G16B16A16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, 90, R16G16B16A16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, 90, R16G16B16A16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, 90, R16G16B16A16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 70, 90, 90, R16G16B16A16_FLOAT) + SF( Y, 50, x, x, Y, Y, Y, Y, x, 70, 90, 90, R32G32_FLOAT) + SF( Y, 70, x, x, Y, Y, Y, Y, x, x, x, x, R32G32_FLOAT_LD) + SF( Y, x, x, x, Y, x, Y, Y, x, 70, 90, 90, R32G32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 70, 90, 90, R32G32_UINT) + SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) + SF( Y, 50, x, x, x, x, x, x, x, x, x, x, L32A32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R16G16B16X16_UNORM) + SF( Y, Y, x, x, 90, 90, x, x, x, x, x, 90, R16G16B16X16_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, x, x, A32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, x, x, L32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, x, x, I32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16B16A16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16B16A16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32_USCALED) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, R32G32_SFIXED) + SF( x, x, x, x, x, x, 80, x, x, x, x, x, R64_PASSTHRU) + SF( Y, Y, x, Y, Y, Y, Y, x, 60, 70, x, 90, B8G8R8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, x, 100, B8G8R8A8_UNORM_SRGB) +/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */ + SF( Y, Y, x, x, Y, Y, Y, x, 60, 70, x, 100, R10G10B10A2_UNORM) + SF( Y, Y, x, x, x, x, x, x, 60, x, x, x, R10G10B10A2_UNORM_SRGB) + SF( Y, x, x, x, Y, x, Y, x, x, 70, x, 100, R10G10B10A2_UINT) + SF( Y, Y, x, x, x, x, Y, x, x, x, x, x, R10G10B10_SNORM_A2_UNORM) + SF( Y, Y, x, x, Y, Y, Y, x, 60, 70, x, 90, R8G8B8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, x, 100, R8G8B8A8_UNORM_SRGB) + SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, 90, R8G8B8A8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, 90, R8G8B8A8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, 90, R8G8B8A8_UINT) + SF( Y, Y, x, x, Y, 45, Y, x, x, 70, x, 90, R16G16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, 90, R16G16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, 90, R16G16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, 90, R16G16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 70, 90, 90, R16G16_FLOAT) + SF( Y, Y, x, x, Y, Y, 75, x, 60, 70, x, 100, B10G10R10A2_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, x, 100, B10G10R10A2_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, 70, x, 100, R11G11B10_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 70, 70, 90, R32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 70, 70, 90, R32_UINT) + SF( Y, 50, Y, x, Y, Y, Y, Y, x, 70, 70, 90, R32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, L16A16_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, I24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, L24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, A24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, I32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, L32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, A32_FLOAT) + SF( Y, Y, x, Y, 80, 80, x, x, 60, x, x, 90, B8G8R8X8_UNORM) + SF( Y, Y, x, x, 80, 80, x, x, x, x, x, 100, B8G8R8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, B10G10R10X2_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, L16A16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32_SNORM) +/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */ + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R10G10B10X2_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8B8A8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8B8A8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32_USCALED) + SF( Y, Y, x, Y, Y, Y, x, x, x, 70, x, x, B5G6R5_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, x, x, B5G6R5_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, 70, x, x, B5G5R5A1_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, x, x, B5G5R5A1_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, 70, x, x, B4G4R4A4_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, x, x, B4G4R4A4_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, 70, x, x, R8G8_UNORM) + SF( Y, Y, x, Y, Y, 60, Y, x, x, 70, x, x, R8G8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, x, R8G8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, x, R8G8_UINT) + SF( Y, Y, Y, x, Y, 45, Y, x, 70, 70, x, x, R16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, x, R16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, x, R16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, x, R16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 70, 90, x, R16_FLOAT) + SF( 50, 50, x, x, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) + SF( 50, 50, x, x, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) + SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, I16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, L16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, A16_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, L8A8_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, I16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, L16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, A16_FLOAT) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) + SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, R5G5_SNORM_B6_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, 70, x, x, B5G5R5X1_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, x, x, x, B5G5R5X1_UNORM_SRGB) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8_USCALED) +/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */ + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16_USCALED) + SF( 50, 50, x, x, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) + SF( 50, 50, x, x, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) /* According to the PRM, A4B4G4R4_UNORM isn't supported until Sky Lake * but empirical testing indicates that at least sampling works just fine * on Broadwell. */ - SF(80, 80, x, x, 90, x, x, x, x, x, A4B4G4R4_UNORM) - SF(90, x, x, x, x, x, x, x, x, x, L8A8_UINT) - SF(90, x, x, x, x, x, x, x, x, x, L8A8_SINT) - SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, x, R8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, x, R8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, x, R8_UINT) - SF( Y, Y, x, Y, Y, Y, x, x, x, x, A8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, I8_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, x, L8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE0) - SF( Y, Y, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE0) - SF( x, x, x, x, x, x, Y, x, x, x, R8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R8_USCALED) - SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, x, L8_UNORM_SRGB) - SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, x, Y8_UNORM) - SF(90, x, x, x, x, x, x, x, x, x, L8_UINT) - SF(90, x, x, x, x, x, x, x, x, x, L8_SINT) - SF(90, x, x, x, x, x, x, x, x, x, I8_UINT) - SF(90, x, x, x, x, x, x, x, x, x, I8_SINT) - SF(45, 45, x, x, x, x, x, x, x, x, DXT1_RGB_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, x, R1_UNORM) - SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_NORMAL) - SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_SWAPUVY) - SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) - SF( Y, Y, x, Y, x, x, x, x, x, x, BC1_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, x, BC2_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, x, BC3_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, BC4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, BC5_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, BC1_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, x, BC2_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, x, BC3_UNORM_SRGB) - SF( Y, x, x, x, x, x, x, x, x, x, MONO8) - SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPUV) - SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPY) - SF( Y, Y, x, x, x, x, x, x, x, x, DXT1_RGB) -/* smpl filt shad CK RT AB VB SO color ccs_e */ - SF( Y, Y, x, x, x, x, x, x, x, x, FXT1) - SF(75, 75, x, x, x, x, Y, x, x, x, R8G8B8_UNORM) - SF(75, 75, x, x, x, x, Y, x, x, x, R8G8B8_SNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64A64_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, x, BC4_SNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, BC5_SNORM) - SF(50, 50, x, x, x, x, 60, x, x, x, R16G16B16_FLOAT) - SF(75, 75, x, x, x, x, Y, x, x, x, R16G16B16_UNORM) - SF(75, 75, x, x, x, x, Y, x, x, x, R16G16B16_SNORM) - SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_USCALED) - SF(70, 70, x, x, x, x, x, x, x, x, BC6H_SF16) - SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM) - SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB) - SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16) - SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8) - SF(75, 75, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ETC1_RGB8) - SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8) - SF(80, 80, x, x, x, x, x, x, x, x, EAC_R11) - SF(80, 80, x, x, x, x, x, x, x, x, EAC_RG11) - SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) - SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) - SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8) - SF(90, x, x, x, x, x, 75, x, x, x, R16G16B16_UINT) - SF(90, x, x, x, x, x, 75, x, x, x, R16G16B16_SINT) - SF( x, x, x, x, x, x, 75, x, x, x, R32_SFIXED) - SF( x, x, x, x, x, x, 75, x, x, x, R10G10B10A2_SNORM) - SF( x, x, x, x, x, x, 75, x, x, x, R10G10B10A2_USCALED) - SF( x, x, x, x, x, x, 75, x, x, x, R10G10B10A2_SSCALED) - SF( x, x, x, x, x, x, 75, x, x, x, R10G10B10A2_SINT) - SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_SNORM) - SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_USCALED) - SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_SSCALED) - SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_UINT) - SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_SINT) - SF( x, x, x, x, x, x, 80, x, x, x, R64G64B64A64_PASSTHRU) - SF( x, x, x, x, x, x, 80, x, x, x, R64G64B64_PASSTHRU) - SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) - SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) - SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) - SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) - SF(90, x, x, x, x, x, 75, x, x, x, R8G8B8_UINT) - SF(90, x, x, x, x, x, 75, x, x, x, R8G8B8_SINT) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4X4_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X4_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X8_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X8_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X10_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X10_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X12_FLT16) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4X4_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X4_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X5_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X5_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X6_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X5_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X6_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X8_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X5_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X6_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X8_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X10_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X10_U8SRGB) - SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X12_U8SRGB) + SF( 80, 80, x, x, 90, x, x, x, x, x, x, x, A4B4G4R4_UNORM) + SF( 90, x, x, x, x, x, x, x, x, x, x, x, L8A8_UINT) + SF( 90, x, x, x, x, x, x, x, x, x, x, x, L8A8_SINT) + SF( Y, Y, x, 45, Y, Y, Y, x, x, 70, x, x, R8_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, x, R8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, x, R8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, x, R8_UINT) + SF( Y, Y, x, Y, Y, Y, x, x, x, 70, x, x, A8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, I8_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, L8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE0) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE0) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8_USCALED) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, L8_UNORM_SRGB) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, x, x, x, Y8_UNORM) + SF( 90, x, x, x, x, x, x, x, x, x, x, x, L8_UINT) + SF( 90, x, x, x, x, x, x, x, x, x, x, x, L8_SINT) + SF( 90, x, x, x, x, x, x, x, x, x, x, x, I8_UINT) + SF( 90, x, x, x, x, x, x, x, x, x, x, x, I8_SINT) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, DXT1_RGB_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R1_UNORM) + SF( Y, Y, x, Y, Y, x, x, x, 60, x, x, x, YCRCB_NORMAL) + SF( Y, Y, x, Y, Y, x, x, x, 60, x, x, x, YCRCB_SWAPUVY) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) + SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) + SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, BC1_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, BC2_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, BC3_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC5_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC1_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC2_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC3_UNORM_SRGB) + SF( Y, x, x, x, x, x, x, x, x, x, x, x, MONO8) + SF( Y, Y, x, x, Y, x, x, x, 60, x, x, x, YCRCB_SWAPUV) + SF( Y, Y, x, x, Y, x, x, x, 60, x, x, x, YCRCB_SWAPY) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, DXT1_RGB) +/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */ + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, FXT1) + SF( 75, 75, x, x, x, x, Y, x, x, x, x, x, R8G8B8_UNORM) + SF( 75, 75, x, x, x, x, Y, x, x, x, x, x, R8G8B8_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8B8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8B8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R64G64B64A64_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R64G64B64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC4_SNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC5_SNORM) + SF( 50, 50, x, x, x, x, 60, x, x, x, x, x, R16G16B16_FLOAT) + SF( 75, 75, x, x, x, x, Y, x, x, x, x, x, R16G16B16_UNORM) + SF( 75, 75, x, x, x, x, Y, x, x, x, x, x, R16G16B16_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16B16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16B16_USCALED) + SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC6H_SF16) + SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC7_UNORM) + SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB) + SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC6H_UF16) + SF( x, x, x, x, x, x, x, x, x, x, x, x, PLANAR_420_8) + SF( 75, 75, x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC1_RGB8) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_RGB8) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_R11) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_RG11) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8) + SF( 90, x, x, x, x, x, 75, x, x, x, x, x, R16G16B16_UINT) + SF( 90, x, x, x, x, x, 75, x, x, x, x, x, R16G16B16_SINT) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, R32_SFIXED) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, R10G10B10A2_SNORM) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, R10G10B10A2_USCALED) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, R10G10B10A2_SSCALED) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, R10G10B10A2_SINT) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_SNORM) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_USCALED) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_SSCALED) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_UINT) + SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_SINT) + SF( x, x, x, x, x, x, 80, x, x, x, x, x, R64G64B64A64_PASSTHRU) + SF( x, x, x, x, x, x, 80, x, x, x, x, x, R64G64B64_PASSTHRU) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) + SF( 90, x, x, x, x, x, 75, x, x, x, x, x, R8G8B8_UINT) + SF( 90, x, x, x, x, x, 75, x, x, x, x, x, R8G8B8_SINT) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4X4_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X4_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X5_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X5_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X6_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X5_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X6_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X8_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X5_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X6_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X8_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X10_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X10_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X12_FLT16) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4X4_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X4_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X5_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X5_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X6_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X5_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X6_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X8_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X5_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X6_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X8_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X10_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X10_U8SRGB) + SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X12_U8SRGB) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_4X4_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_5X4_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_5X5_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_6X5_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_6X6_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_8X5_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_8X6_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_8X8_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_10X5_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_10X6_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_10X8_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_10X10_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_12X10_FLT16) + SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_12X12_FLT16) }; #undef x #undef Y @@ -383,6 +399,20 @@ isl_format_supports_sampling(const struct gen_device_info *devinfo, */ if (fmtl->txc == ISL_TXC_ETC1 || fmtl->txc == ISL_TXC_ETC2) return true; + } else if (devinfo->is_cherryview) { + const struct isl_format_layout *fmtl = isl_format_get_layout(format); + /* Support for ASTC LDR exists on Cherry View even though big-core + * GPUs didn't get it until Skylake. + */ + if (fmtl->txc == ISL_TXC_ASTC) + return format < ISL_FORMAT_ASTC_HDR_2D_4X4_FLT16; + } else if (gen_device_info_is_9lp(devinfo)) { + const struct isl_format_layout *fmtl = isl_format_get_layout(format); + /* Support for ASTC HDR exists on Broxton even though big-core + * GPUs didn't get it until Cannonlake. + */ + if (fmtl->txc == ISL_TXC_ASTC) + return true; } return format_gen(devinfo) >= format_info[format].sampling; @@ -402,6 +432,20 @@ isl_format_supports_filtering(const struct gen_device_info *devinfo, */ if (fmtl->txc == ISL_TXC_ETC1 || fmtl->txc == ISL_TXC_ETC2) return true; + } else if (devinfo->is_cherryview) { + const struct isl_format_layout *fmtl = isl_format_get_layout(format); + /* Support for ASTC LDR exists on Cherry View even though big-core + * GPUs didn't get it until Skylake. + */ + if (fmtl->txc == ISL_TXC_ASTC) + return format < ISL_FORMAT_ASTC_HDR_2D_4X4_FLT16; + } else if (gen_device_info_is_9lp(devinfo)) { + const struct isl_format_layout *fmtl = isl_format_get_layout(format); + /* Support for ASTC HDR exists on Broxton even though big-core + * GPUs didn't get it until Cannonlake. + */ + if (fmtl->txc == ISL_TXC_ASTC) + return true; } return format_gen(devinfo) >= format_info[format].filtering; @@ -423,14 +467,76 @@ isl_format_supports_vertex_fetch(const struct gen_device_info *devinfo, return format_gen(devinfo) >= format_info[format].input_vb; } +/** + * Returns true if the given format can support typed writes. + */ +bool +isl_format_supports_typed_writes(const struct gen_device_info *devinfo, + enum isl_format format) +{ + if (!format_info[format].exists) + return false; + + return format_gen(devinfo) >= format_info[format].typed_write; +} + + +/** + * Returns true if the given format can support typed reads with format + * conversion fully handled by hardware. On Sky Lake, all formats which are + * supported for typed writes also support typed reads but some of them return + * the raw image data and don't provide format conversion. + * + * For anyone looking to find this data in the PRM, the easiest way to find + * format tables is to search for R11G11B10. There are only a few + * occurrences. + */ +bool +isl_format_supports_typed_reads(const struct gen_device_info *devinfo, + enum isl_format format) +{ + if (!format_info[format].exists) + return false; + + return format_gen(devinfo) >= format_info[format].typed_read; +} + +/** + * Returns true if the given format can support single-sample fast clears. + * This function only checks the format. In order to determine if a surface + * supports CCS_E, several other factors need to be considered such as tiling + * and sample count. See isl_surf_get_ccs_surf for details. + */ +bool +isl_format_supports_ccs_d(const struct gen_device_info *devinfo, + enum isl_format format) +{ + /* Fast clears were first added on Ivy Bridge */ + if (devinfo->gen < 7) + return false; + + if (!isl_format_supports_rendering(devinfo, format)) + return false; + + const struct isl_format_layout *fmtl = isl_format_get_layout(format); + + return fmtl->bpb == 32 || fmtl->bpb == 64 || fmtl->bpb == 128; +} + +/** + * Returns true if the given format can support single-sample color + * compression. This function only checks the format. In order to determine + * if a surface supports CCS_E, several other factors need to be considered + * such as tiling and sample count. See isl_surf_get_ccs_surf for details. + */ bool -isl_format_supports_lossless_compression(const struct gen_device_info *devinfo, - enum isl_format format) +isl_format_supports_ccs_e(const struct gen_device_info *devinfo, + enum isl_format format) { if (!format_info[format].exists) return false; - return format_gen(devinfo) >= format_info[format].lossless_compression; + return format_gen(devinfo) >= format_info[format].ccs_e; } bool @@ -448,16 +554,19 @@ isl_format_supports_multisampling(const struct gen_device_info *devinfo, * - any compressed texture format (BC*) * - any YCRCB* format * - * The restriction on the format's size is removed on Broadwell. Also, - * there is an exception for HiZ which we treat as a compressed format and - * is allowed to be multisampled on Broadwell and earlier. + * The restriction on the format's size is removed on Broadwell. Moreover, + * empirically it looks that even IvyBridge can handle multisampled surfaces + * with format sizes all the way to 128-bits (RGBA32F, RGBA32I, RGBA32UI). + * + * Also, there is an exception for HiZ which we treat as a compressed + * format and is allowed to be multisampled on Broadwell and earlier. */ if (format == ISL_FORMAT_HIZ) { /* On SKL+, HiZ is always single-sampled even when the primary surface * is multisampled. See also isl_surf_get_hiz_surf(). */ return devinfo->gen <= 8; - } else if (devinfo->gen < 8 && isl_format_get_layout(format)->bpb > 64) { + } else if (devinfo->gen < 7 && isl_format_get_layout(format)->bpb > 64) { return false; } else if (isl_format_is_compressed(format)) { return false; @@ -468,6 +577,37 @@ isl_format_supports_multisampling(const struct gen_device_info *devinfo, } } +/** + * Returns true if the two formats are "CCS_E compatible" meaning that you can + * render in one format with CCS_E enabled and then texture using the other + * format without needing a resolve. + * + * Note: Even if the formats are compatible, special care must be taken if a + * clear color is involved because the encoding of the clear color is heavily + * format-dependent. + */ +bool +isl_formats_are_ccs_e_compatible(const struct gen_device_info *devinfo, + enum isl_format format1, + enum isl_format format2) +{ + /* They must support CCS_E */ + if (!isl_format_supports_ccs_e(devinfo, format1) || + !isl_format_supports_ccs_e(devinfo, format2)) + return false; + + const struct isl_format_layout *fmtl1 = isl_format_get_layout(format1); + const struct isl_format_layout *fmtl2 = isl_format_get_layout(format2); + + /* The compression used by CCS is not dependent on the actual data encoding + * of the format but only depends on the bit-layout of the channels. + */ + return fmtl1->channels.r.bits == fmtl2->channels.r.bits && + fmtl1->channels.g.bits == fmtl2->channels.g.bits && + fmtl1->channels.b.bits == fmtl2->channels.b.bits && + fmtl1->channels.a.bits == fmtl2->channels.a.bits; +} + static inline bool isl_format_has_channel_type(enum isl_format fmt, enum isl_base_type type) { diff --git a/lib/mesa/src/intel/isl/isl_gen7.c b/lib/mesa/src/intel/isl/isl_gen7.c index b6a86d23f..24d411f51 100644 --- a/lib/mesa/src/intel/isl/isl_gen7.c +++ b/lib/mesa/src/intel/isl/isl_gen7.c @@ -24,6 +24,25 @@ #include "isl_gen7.h" #include "isl_priv.h" +static bool +gen7_format_needs_valign2(const struct isl_device *dev, + enum isl_format format) +{ + assert(ISL_DEV_GEN(dev) == 7); + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + * + * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT. + */ + return isl_format_is_yuv(format) || + format == ISL_FORMAT_R32G32B32_FLOAT; +} + bool isl_gen7_choose_msaa_layout(const struct isl_device *dev, const struct isl_surf_init_info *info, @@ -76,8 +95,13 @@ isl_gen7_choose_msaa_layout(const struct isl_device *dev, * Note that the above SINT restrictions apply only to *MSRTs* (that is, * *multisampled* render targets). The restrictions seem to permit an MCS * if the render target is singlesampled. + * + * Moreover, empirically it looks that hardware can render multisampled + * surfaces with RGBA8I, RGBA16I and RGBA32I. */ - if (isl_format_has_sint_channel(info->format)) + + /* Multisampling requires vertical alignment of four. */ + if (info->samples > 1 && gen7_format_needs_valign2(dev, info->format)) return false; /* More obvious restrictions */ @@ -151,25 +175,6 @@ isl_gen7_choose_msaa_layout(const struct isl_device *dev, return true; } -static bool -gen7_format_needs_valign2(const struct isl_device *dev, - enum isl_format format) -{ - assert(ISL_DEV_GEN(dev) == 7); - - /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, - * RENDER_SURFACE_STATE Surface Vertical Alignment: - * - * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL - * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY - * (0x190) - * - * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT. - */ - return isl_format_is_yuv(format) || - format == ISL_FORMAT_R32G32B32_FLOAT; -} - /** * @brief Filter out tiling flags that are incompatible with the surface. * @@ -215,6 +220,12 @@ isl_gen6_filter_tiling(const struct isl_device *dev, *flags &= ~ISL_TILING_W_BIT; } + /* From the SKL+ PRMs, RENDER_SURFACE_STATE:TileMode, + * If Surface Format is ASTC*, this field must be TILEMODE_YMAJOR. + */ + if (isl_format_get_layout(info->format)->txc == ISL_TXC_ASTC) + *flags &= ISL_TILING_Y0_BIT; + /* MCS buffers are always Y-tiled */ if (isl_format_get_layout(info->format)->txc == ISL_TXC_MCS) *flags &= ISL_TILING_Y0_BIT; @@ -283,123 +294,96 @@ isl_gen6_filter_tiling(const struct isl_device *dev, *flags &= ~ISL_TILING_Y0_BIT; } -/** - * Choose horizontal subimage alignment, in units of surface elements. - */ -static uint32_t -gen7_choose_halign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) +void +isl_gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_dim_layout dim_layout, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) { - if (isl_format_is_compressed(info->format)) - return 1; + assert(ISL_DEV_GEN(dev) == 7); - /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, - * RENDER_SURFACE_STATE Surface Hoizontal Alignment: + /* Handled by isl_choose_image_alignment_el */ + assert(info->format != ISL_FORMAT_HIZ); + + /* IVB+ does not support combined depthstencil. */ + assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); + + /* From the Ivy Bridge PRM, Vol. 2, Part 2, Section 6.18.4.4, + * "Alignment unit size", the alignment parameters are summarized in the + * following table: * - * - This field is intended to be set to HALIGN_8 only if the surface - * was rendered as a depth buffer with Z16 format or a stencil buffer, - * since these surfaces support only alignment of 8. Use of HALIGN_8 - * for other surfaces is supported, but uses more memory. + * Surface Defined By | Surface Format | Align Width | Align Height + * --------------------+-----------------+-------------+-------------- + * DEPTH_BUFFER | D16_UNORM | 8 | 4 + * | other | 4 | 4 + * --------------------+-----------------+-------------+-------------- + * STENCIL_BUFFER | N/A | 8 | 8 + * --------------------+-----------------+-------------+-------------- + * SURFACE_STATE | BC*, ETC*, EAC* | 4 | 4 + * | FXT1 | 8 | 4 + * | all others | HALIGN | VALIGN + * ------------------------------------------------------------------- */ - if (isl_surf_info_is_z16(info) || - isl_surf_usage_is_stencil(info->usage)) - return 8; - - return 4; -} + if (isl_surf_usage_is_depth(info->usage)) { + *image_align_el = info->format == ISL_FORMAT_R16_UNORM ? + isl_extent3d(8, 4, 1) : isl_extent3d(4, 4, 1); + return; + } else if (isl_surf_usage_is_stencil(info->usage)) { + *image_align_el = isl_extent3d(8, 8, 1); + return; + } else if (isl_format_is_compressed(info->format)) { + /* Compressed formats all have alignment equal to block size. */ + *image_align_el = isl_extent3d(1, 1, 1); + return; + } -/** - * Choose vertical subimage alignment, in units of surface elements. - */ -static uint32_t -gen7_choose_valign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling) -{ - MAYBE_UNUSED bool require_valign2 = false; - bool require_valign4 = false; + /* Everything after this point is in the "set by Surface Horizontal or + * Vertical Alignment" case. Now it's just a matter of applying + * restrictions. + */ - if (isl_format_is_compressed(info->format)) - return 1; + /* There are no restrictions on halign beyond what's given in the table + * above. We set it to the minimum value of 4 because that uses the least + * memory. + */ + const uint32_t halign = 4; - if (gen7_format_needs_valign2(dev, info->format)) - require_valign2 = true; + bool require_valign4 = false; /* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1: * RENDER_SURFACE_STATE Surface Vertical Alignment: * - * - This field is intended to be set to VALIGN_4 if the surface was - * rendered as a depth buffer, for a multisampled (4x) render target, - * or for a multisampled (8x) render target, since these surfaces - * support only alignment of 4. Use of VALIGN_4 for other surfaces is - * supported, but uses more memory. This field must be set to - * VALIGN_4 for all tiled Y Render Target surfaces. + * * This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, * + * * for a multisampled (4x) render target, or for a multisampled (8x) + * render target, since these surfaces support only alignment of 4. + * + * * This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces + * + * * Value of 1 is not supported for format YCRCB_NORMAL (0x182), + * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) + * + * * If Number of Multisamples is not MULTISAMPLECOUNT_1, this field + * must be set to VALIGN_4." + * + * The first restriction is already handled by the table above and the + * second restriction is redundant with the fifth. */ - if (isl_surf_usage_is_depth(info->usage) || - info->samples > 1 || - ((info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && - tiling == ISL_TILING_Y0)) { + if (info->samples > 1) require_valign4 = true; - } - if (isl_surf_usage_is_stencil(info->usage)) { - /* The Ivybridge PRM states that the stencil buffer's vertical alignment - * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment - * Unit Size]. However, valign=8 is outside the set of valid values of - * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2 - * (0x0) and VALIGN_4 (0x1). - * - * The PRM is generally confused about the width, height, and alignment - * of the stencil buffer; and this confusion appears elsewhere. For - * example, the following PRM text effectively converts the stencil - * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM, - * Volume 1, Part 1, Section - * 6.18.4.2 Base Address and LOD Calculation]: - * - * For separate stencil buffer, the width must be mutiplied by 2 and - * height divided by 2 as follows: - * - * w_L = 2*i*ceil(W_L/i) - * h_L = 1/2*j*ceil(H_L/j) - * - * The root of the confusion is that, in W tiling, each pair of rows is - * interleaved into one. - * - * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API - * is more polished. - */ + if (tiling == ISL_TILING_Y0 && + (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)) require_valign4 = true; - } - assert(!require_valign2 || !require_valign4); + assert(!(require_valign4 && gen7_format_needs_valign2(dev, info->format))); - if (require_valign4) - return 4; - - /* Prefer VALIGN_2 because it conserves memory. */ - return 2; -} - -void -isl_gen7_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_dim_layout dim_layout, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - assert(ISL_DEV_GEN(dev) == 7); - - /* Handled by isl_choose_image_alignment_el */ - assert(info->format != ISL_FORMAT_HIZ); - - /* IVB+ does not support combined depthstencil. */ - assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); + /* We default to VALIGN_2 because it uses the least memory. */ + const uint32_t valign = require_valign4 ? 4 : 2; - *image_align_el = (struct isl_extent3d) { - .w = gen7_choose_halign_el(dev, info), - .h = gen7_choose_valign_el(dev, info, tiling), - .d = 1, - }; + *image_align_el = isl_extent3d(halign, valign, 1); } diff --git a/lib/mesa/src/intel/isl/isl_gen8.c b/lib/mesa/src/intel/isl/isl_gen8.c index 81c69dc13..2199b8d22 100644 --- a/lib/mesa/src/intel/isl/isl_gen8.c +++ b/lib/mesa/src/intel/isl/isl_gen8.c @@ -87,98 +87,6 @@ isl_gen8_choose_msaa_layout(const struct isl_device *dev, return true; } -/** - * Choose horizontal subimage alignment, in units of surface elements. - */ -static uint32_t -gen8_choose_halign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) -{ - if (isl_format_is_compressed(info->format)) - return 1; - - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * - This field is intended to be set to HALIGN_8 only if the surface - * was rendered as a depth buffer with Z16 format or a stencil buffer. - * In this case it must be set to HALIGN_8 since these surfaces - * support only alignment of 8. [...] - */ - if (isl_surf_info_is_z16(info)) - return 8; - if (isl_surf_usage_is_stencil(info->usage)) - return 8; - - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * [...] For Z32 formats it must be set to HALIGN_4. - */ - if (isl_surf_usage_is_depth(info->usage)) - return 4; - - if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, - * HALIGN 16 must be used. - * - * This case handles color surfaces that may own an auxiliary MCS, CCS_D, - * or CCS_E. Depth buffers, including those that own an auxiliary HiZ - * surface, are handled above and do not require HALIGN_16. - */ - assert(!isl_surf_usage_is_depth(info->usage)); - return 16; - } - - /* XXX(chadv): I believe the hardware requires each image to be - * cache-aligned. If that's true, then defaulting to halign=4 is wrong for - * many formats. Depending on the format's block size, we may need to - * increase halign to 8. - */ - return 4; -} - -/** - * Choose vertical subimage alignment, in units of surface elements. - */ -static uint32_t -gen8_choose_valign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) -{ - /* From the Broadwell PRM > Volume 2d: Command Reference: Structures - * > RENDER_SURFACE_STATE Surface Vertical Alignment (p325): - * - * - For Sampling Engine and Render Target Surfaces: This field - * specifies the vertical alignment requirement in elements for the - * surface. [...] An element is defined as a pixel in uncompresed - * surface formats, and as a compression block in compressed surface - * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an - * element is a sample. - * - * - This field is intended to be set to VALIGN_4 if the surface was - * rendered as a depth buffer, for a multisampled (4x) render target, - * or for a multisampled (8x) render target, since these surfaces - * support only alignment of 4. Use of VALIGN_4 for other surfaces is - * supported, but increases memory usage. - * - * - This field is intended to be set to VALIGN_8 only if the surface - * was rendered as a stencil buffer, since stencil buffer surfaces - * support only alignment of 8. If set to VALIGN_8, Surface Format - * must be R8_UINT. - */ - - if (isl_format_is_compressed(info->format)) - return 1; - - if (isl_surf_usage_is_stencil(info->usage)) - return 8; - - return 4; -} - void isl_gen8_choose_image_alignment_el(const struct isl_device *dev, const struct isl_surf_init_info *restrict info, @@ -205,30 +113,65 @@ isl_gen8_choose_image_alignment_el(const struct isl_device *dev, return; } - /* The below text from the Broadwell PRM provides some insight into the - * hardware's requirements for LOD alignment. From the Broadwell PRM >> - * Volume 5: Memory Views >> Surface Layout >> 2D Surfaces: + /* From the Broadwell PRM, Volume 4, "Memory Views" p. 186, the alignment + * parameters are summarized in the following table: * - * These [2D surfaces] must adhere to the following memory organization - * rules: - * - * - For non-compressed texture formats, each mipmap must start on an - * even row within the monolithic rectangular area. For - * 1-texel-high mipmaps, this may require a row of padding below - * the previous mipmap. This restriction does not apply to any - * compressed texture formats; each subsequent (lower-res) - * compressed mipmap is positioned directly below the previous - * mipmap. - * - * - Vertical alignment restrictions vary with memory tiling type: - * 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled - * mipmaps are not required to start at the left edge of a tile - * row.) + * Surface Defined By | Surface Format | Align Width | Align Height + * --------------------+-----------------+-------------+-------------- + * DEPTH_BUFFER | D16_UNORM | 8 | 4 + * | other | 4 | 4 + * --------------------+-----------------+-------------+-------------- + * STENCIL_BUFFER | N/A | 8 | 8 + * --------------------+-----------------+-------------+-------------- + * SURFACE_STATE | BC*, ETC*, EAC* | 4 | 4 + * | FXT1 | 8 | 4 + * | all others | HALIGN | VALIGN + * ------------------------------------------------------------------- + */ + if (isl_surf_usage_is_depth(info->usage)) { + *image_align_el = info->format == ISL_FORMAT_R16_UNORM ? + isl_extent3d(8, 4, 1) : isl_extent3d(4, 4, 1); + return; + } else if (isl_surf_usage_is_stencil(info->usage)) { + *image_align_el = isl_extent3d(8, 8, 1); + return; + } else if (isl_format_is_compressed(info->format)) { + /* Compressed formats all have alignment equal to block size. */ + *image_align_el = isl_extent3d(1, 1, 1); + return; + } + + /* For all other formats, the alignment is determined by the horizontal and + * vertical alignment fields of RENDER_SURFACE_STATE. There are a few + * restrictions, but we generally have a choice. + */ + + /* Vertical alignment is unrestricted so we choose the smallest allowed + * alignment because that will use the least memory + */ + const uint32_t valign = 4; + + bool needs_halign16 = false; + if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, + * HALIGN 16 must be used. + * + * This case handles color surfaces that may own an auxiliary MCS, CCS_D, + * or CCS_E. Depth buffers, including those that own an auxiliary HiZ + * surface, are handled above and do not require HALIGN_16. + */ + needs_halign16 = true; + } + + /* XXX(chadv): I believe the hardware requires each image to be + * cache-aligned. If that's true, then defaulting to halign=4 is wrong for + * many formats. Depending on the format's block size, we may need to + * increase halign to 8. */ + const uint32_t halign = needs_halign16 ? 16 : 4; - *image_align_el = (struct isl_extent3d) { - .w = gen8_choose_halign_el(dev, info), - .h = gen8_choose_valign_el(dev, info), - .d = 1, - }; + *image_align_el = isl_extent3d(halign, valign, 1); } diff --git a/lib/mesa/src/intel/isl/isl_priv.h b/lib/mesa/src/intel/isl/isl_priv.h index dc3975d3c..525d8a206 100644 --- a/lib/mesa/src/intel/isl/isl_priv.h +++ b/lib/mesa/src/intel/isl/isl_priv.h @@ -33,7 +33,13 @@ #include "isl.h" #define isl_finishme(format, ...) \ - __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__) + do { \ + static bool reported = false; \ + if (!reported) { \ + __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \ + reported = true; \ + } \ + } while (0) void PRINTFLIKE(3, 4) UNUSED __isl_finishme(const char *file, int line, const char *fmt, ...); @@ -74,6 +80,13 @@ isl_align_npot(uintmax_t n, uintmax_t a) return ((n + a - 1) / a) * a; } +static inline uintmax_t +isl_assert_div(uintmax_t n, uintmax_t a) +{ + assert(n % a == 0); + return n / a; +} + /** * Alignment must be a power of 2. */ @@ -172,6 +185,10 @@ isl_gen9_surf_fill_state_s(const struct isl_device *dev, void *state, const struct isl_surf_fill_state_info *restrict info); void +isl_gen10_surf_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info); + +void isl_gen4_buffer_fill_state_s(void *state, const struct isl_buffer_fill_state_info *restrict info); @@ -199,4 +216,40 @@ void isl_gen9_buffer_fill_state_s(void *state, const struct isl_buffer_fill_state_info *restrict info); +void +isl_gen10_buffer_fill_state_s(void *state, + const struct isl_buffer_fill_state_info *restrict info); + +void +isl_gen4_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + +void +isl_gen5_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + +void +isl_gen6_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + +void +isl_gen7_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + +void +isl_gen75_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + +void +isl_gen8_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + +void +isl_gen9_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + +void +isl_gen10_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + #endif /* ISL_PRIV_H */ diff --git a/lib/mesa/src/intel/isl/isl_storage_image.c b/lib/mesa/src/intel/isl/isl_storage_image.c index ffd03e4ad..a8aebce6d 100644 --- a/lib/mesa/src/intel/isl/isl_storage_image.c +++ b/lib/mesa/src/intel/isl/isl_storage_image.c @@ -22,7 +22,7 @@ */ #include "isl_priv.h" -#include "brw_compiler.h" +#include "compiler/brw_compiler.h" bool isl_is_storage_image_format(enum isl_format format) @@ -226,8 +226,12 @@ isl_surf_fill_image_param(const struct isl_device *dev, view->base_array_layer; } - isl_surf_get_image_offset_el(surf, view->base_level, view->base_array_layer, - 0, ¶m->offset[0], ¶m->offset[1]); + isl_surf_get_image_offset_el(surf, view->base_level, + surf->dim == ISL_SURF_DIM_3D ? + 0 : view->base_array_layer, + surf->dim == ISL_SURF_DIM_3D ? + view->base_array_layer : 0, + ¶m->offset[0], ¶m->offset[1]); const int cpp = isl_format_get_layout(surf->format)->bpb / 8; param->stride[0] = cpp; diff --git a/lib/mesa/src/intel/isl/isl_surface_state.c b/lib/mesa/src/intel/isl/isl_surface_state.c index 3bb0abd5a..e8bdb6596 100644 --- a/lib/mesa/src/intel/isl/isl_surface_state.c +++ b/lib/mesa/src/intel/isl/isl_surface_state.c @@ -113,12 +113,14 @@ get_surftype(enum isl_surf_dim dim, isl_surf_usage_flags_t usage) assert(!(usage & ISL_SURF_USAGE_CUBE_BIT)); return SURFTYPE_1D; case ISL_SURF_DIM_2D: - if (usage & ISL_SURF_USAGE_STORAGE_BIT) { - /* Storage images are always plain 2-D, not cube */ - return SURFTYPE_2D; - } else if (usage & ISL_SURF_USAGE_CUBE_BIT) { + if ((usage & ISL_SURF_USAGE_CUBE_BIT) && + (usage & ISL_SURF_USAGE_TEXTURE_BIT)) { + /* We need SURFTYPE_CUBE to make cube sampling work */ return SURFTYPE_CUBE; } else { + /* Everything else (render and storage) treat cubes as plain + * 2D array textures + */ return SURFTYPE_2D; } case ISL_SURF_DIM_3D: @@ -252,8 +254,32 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, if (info->surf->dim == ISL_SURF_DIM_1D) assert(!isl_format_is_compressed(info->view->format)); + if (isl_format_is_compressed(info->surf->format)) { + /* You're not allowed to make a view of a compressed format with any + * format other than the surface format. None of the userspace APIs + * allow for this directly and doing so would mess up a number of + * surface parameters such as Width, Height, and alignments. Ideally, + * we'd like to assert that the two formats match. However, we have an + * S3TC workaround that requires us to do reinterpretation. So assert + * that they're at least the same bpb and block size. + */ + MAYBE_UNUSED const struct isl_format_layout *surf_fmtl = + isl_format_get_layout(info->surf->format); + MAYBE_UNUSED const struct isl_format_layout *view_fmtl = + isl_format_get_layout(info->surf->format); + assert(surf_fmtl->bpb == view_fmtl->bpb); + assert(surf_fmtl->bw == view_fmtl->bw); + assert(surf_fmtl->bh == view_fmtl->bh); + } + s.SurfaceFormat = info->view->format; +#if GEN_GEN <= 5 + s.ColorBufferComponentWriteDisables = info->write_disables; +#else + assert(info->write_disables == 0); +#endif + #if GEN_IS_HASWELL s.IntegerSurfaceFormat = isl_format_has_int_channel(s.SurfaceFormat); #endif @@ -451,6 +477,38 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, #endif #if (GEN_GEN >= 8 || GEN_IS_HASWELL) + if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { + /* From the Sky Lake PRM Vol. 2d, + * RENDER_SURFACE_STATE::Shader Channel Select Red + * + * "For Render Target, Red, Green and Blue Shader Channel Selects + * MUST be such that only valid components can be swapped i.e. only + * change the order of components in the pixel. Any other values for + * these Shader Channel Select fields are not valid for Render + * Targets. This also means that there MUST not be multiple shader + * channels mapped to the same RT channel." + */ + assert(info->view->swizzle.r == ISL_CHANNEL_SELECT_RED || + info->view->swizzle.r == ISL_CHANNEL_SELECT_GREEN || + info->view->swizzle.r == ISL_CHANNEL_SELECT_BLUE); + assert(info->view->swizzle.g == ISL_CHANNEL_SELECT_RED || + info->view->swizzle.g == ISL_CHANNEL_SELECT_GREEN || + info->view->swizzle.g == ISL_CHANNEL_SELECT_BLUE); + assert(info->view->swizzle.b == ISL_CHANNEL_SELECT_RED || + info->view->swizzle.b == ISL_CHANNEL_SELECT_GREEN || + info->view->swizzle.b == ISL_CHANNEL_SELECT_BLUE); + assert(info->view->swizzle.r != info->view->swizzle.g); + assert(info->view->swizzle.r != info->view->swizzle.b); + assert(info->view->swizzle.g != info->view->swizzle.b); + + /* From the Sky Lake PRM Vol. 2d, + * RENDER_SURFACE_STATE::Shader Channel Select Alpha + * + * "For Render Target, this field MUST be programmed to + * value = SCS_ALPHA." + */ + assert(info->view->swizzle.a == ISL_CHANNEL_SELECT_ALPHA); + } s.ShaderChannelSelectRed = info->view->swizzle.r; s.ShaderChannelSelectGreen = info->view->swizzle.g; s.ShaderChannelSelectBlue = info->view->swizzle.b; @@ -503,27 +561,52 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, #if GEN_GEN >= 7 if (info->aux_surf && info->aux_usage != ISL_AUX_USAGE_NONE) { + /* The docs don't appear to say anything whatsoever about compression + * and the data port. Testing seems to indicate that the data port + * completely ignores the AuxiliarySurfaceMode field. + */ + assert(!(info->view->usage & ISL_SURF_USAGE_STORAGE_BIT)); + struct isl_tile_info tile_info; - isl_surf_get_tile_info(dev, info->aux_surf, &tile_info); + isl_surf_get_tile_info(info->aux_surf, &tile_info); uint32_t pitch_in_tiles = info->aux_surf->row_pitch / tile_info.phys_extent_B.width; + s.AuxiliarySurfaceBaseAddress = info->aux_address; + s.AuxiliarySurfacePitch = pitch_in_tiles - 1; + #if GEN_GEN >= 8 assert(GEN_GEN >= 9 || info->aux_usage != ISL_AUX_USAGE_CCS_E); - s.AuxiliarySurfacePitch = pitch_in_tiles - 1; /* Auxiliary surfaces in ISL have compressed formats but the hardware * doesn't expect our definition of the compression, it expects qpitch * in units of samples on the main surface. */ s.AuxiliarySurfaceQPitch = isl_surf_get_array_pitch_sa_rows(info->aux_surf) >> 2; - s.AuxiliarySurfaceBaseAddress = info->aux_address; + + if (info->aux_usage == ISL_AUX_USAGE_HIZ) { + /* The number of samples must be 1 */ + assert(info->surf->samples == 1); + + /* The dimension must not be 3D */ + assert(info->surf->dim != ISL_SURF_DIM_3D); + + /* The format must be one of the following: */ + switch (info->view->format) { + case ISL_FORMAT_R32_FLOAT: + case ISL_FORMAT_R24_UNORM_X8_TYPELESS: + case ISL_FORMAT_R16_UNORM: + break; + default: + assert(!"Incompatible HiZ Sampling format"); + break; + } + } + s.AuxiliarySurfaceMode = isl_to_gen_aux_mode[info->aux_usage]; #else assert(info->aux_usage == ISL_AUX_USAGE_MCS || info->aux_usage == ISL_AUX_USAGE_CCS_D); - s.MCSBaseAddress = info->aux_address, - s.MCSSurfacePitch = pitch_in_tiles - 1; s.MCSEnable = true; #endif } @@ -546,6 +629,15 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, s.SamplerL2BypassModeDisable = true; break; default: + /* From the SKL PRM, Programming Note under Sampler Output Channel + * Mapping: + * + * If a surface has an associated HiZ Auxilliary surface, the + * Sampler L2 Bypass Mode Disable field in the RENDER_SURFACE_STATE + * must be set. + */ + if (GEN_GEN >= 9 && info->aux_usage == ISL_AUX_USAGE_HIZ) + s.SamplerL2BypassModeDisable = true; break; } } @@ -602,7 +694,7 @@ isl_genX(buffer_fill_state_s)(void *state, */ if (info->format == ISL_FORMAT_RAW) { assert(num_elements <= (1ull << 30)); - assert((num_elements & 3) == 0); + assert(num_elements > 0); } else { assert(num_elements <= (1ull << 27)); } |