summaryrefslogtreecommitdiff
path: root/lib/mesa/src/intel/isl
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2017-12-31 07:12:27 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2017-12-31 07:12:27 +0000
commit051645c92924bf915d82bf219f2ed67309b5577a (patch)
tree4aae126dd8e5a18c6a9926a5468d1561e6038a07 /lib/mesa/src/intel/isl
parent2dae6fe6f74cf7fb9fd65285302c0331d9786b00 (diff)
Merge Mesa 17.2.8
Diffstat (limited to 'lib/mesa/src/intel/isl')
-rw-r--r--lib/mesa/src/intel/isl/isl.c1327
-rw-r--r--lib/mesa/src/intel/isl/isl.h484
-rw-r--r--lib/mesa/src/intel/isl/isl_emit_depth_stencil.c225
-rw-r--r--lib/mesa/src/intel/isl/isl_format.c658
-rw-r--r--lib/mesa/src/intel/isl/isl_gen7.c224
-rw-r--r--lib/mesa/src/intel/isl/isl_gen8.c173
-rw-r--r--lib/mesa/src/intel/isl/isl_priv.h55
-rw-r--r--lib/mesa/src/intel/isl/isl_storage_image.c10
-rw-r--r--lib/mesa/src/intel/isl/isl_surface_state.c112
9 files changed, 2324 insertions, 944 deletions
diff --git a/lib/mesa/src/intel/isl/isl.c b/lib/mesa/src/intel/isl/isl.c
index 32463b129..133986782 100644
--- a/lib/mesa/src/intel/isl/isl.c
+++ b/lib/mesa/src/intel/isl/isl.c
@@ -25,6 +25,8 @@
#include <stdarg.h>
#include <stdio.h>
+#include "genxml/genX_bits.h"
+
#include "isl.h"
#include "isl_gen4.h"
#include "isl_gen6.h"
@@ -67,6 +69,54 @@ isl_device_init(struct isl_device *dev,
assert(info->has_hiz_and_separate_stencil);
if (info->must_use_separate_stencil)
assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
+
+ dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
+ dev->ss.align = isl_align(dev->ss.size, 32);
+
+ dev->ss.clear_value_size =
+ isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
+ RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
+ RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
+ RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
+
+ dev->ss.clear_value_offset =
+ RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
+
+ assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
+ dev->ss.addr_offset =
+ RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
+
+ /* The "Auxiliary Surface Base Address" field starts a bit higher up
+ * because the bottom 12 bits are used for other things. Round down to
+ * the nearest dword before.
+ */
+ dev->ss.aux_addr_offset =
+ (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
+
+ dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
+ assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
+ dev->ds.depth_offset =
+ _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
+
+ if (dev->use_separate_stencil) {
+ dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
+ _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
+ _3DSTATE_CLEAR_PARAMS_length(info) * 4;
+
+ assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
+ dev->ds.stencil_offset =
+ _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
+ _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
+
+ assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
+ dev->ds.hiz_offset =
+ _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
+ _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
+ _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
+ } else {
+ dev->ds.stencil_offset = 0;
+ dev->ds.hiz_offset = 0;
+ }
}
/**
@@ -104,9 +154,8 @@ isl_device_get_sample_counts(struct isl_device *dev)
/**
* @param[out] info is written only on success
*/
-static bool
-isl_tiling_get_info(const struct isl_device *dev,
- enum isl_tiling tiling,
+static void
+isl_tiling_get_info(enum isl_tiling tiling,
uint32_t format_bpb,
struct isl_tile_info *tile_info)
{
@@ -121,7 +170,8 @@ isl_tiling_get_info(const struct isl_device *dev,
*/
assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
- return isl_tiling_get_info(dev, tiling, format_bpb / 3, tile_info);
+ isl_tiling_get_info(tiling, format_bpb / 3, tile_info);
+ return;
}
switch (tiling) {
@@ -162,12 +212,6 @@ isl_tiling_get_info(const struct isl_device *dev,
case ISL_TILING_Yf:
case ISL_TILING_Ys: {
- if (ISL_DEV_GEN(dev) < 9)
- return false;
-
- if (!isl_is_pow2(bs))
- return false;
-
bool is_Ys = tiling == ISL_TILING_Ys;
assert(bs > 0);
@@ -222,6 +266,31 @@ isl_tiling_get_info(const struct isl_device *dev,
.logical_extent_el = logical_el,
.phys_extent_B = phys_B,
};
+}
+
+bool
+isl_color_value_is_zero_one(union isl_color_value value,
+ enum isl_format format)
+{
+ const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+
+#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
+ if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
+ return false
+
+ if (isl_format_has_int_channel(format)) {
+ RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
+ RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
+ RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
+ RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
+ } else {
+ RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
+ RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
+ RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
+ RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
+ }
+
+#undef RETURN_FALSE_IF_NOT_0_1
return true;
}
@@ -255,8 +324,7 @@ isl_surf_choose_tiling(const struct isl_device *dev,
if (ISL_DEV_GEN(dev) >= 6) {
isl_gen6_filter_tiling(dev, info, &tiling_flags);
} else {
- isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev));
- isl_gen6_filter_tiling(dev, info, &tiling_flags);
+ isl_gen4_filter_tiling(dev, info, &tiling_flags);
}
#define CHOOSE(__tiling) \
@@ -415,7 +483,6 @@ isl_choose_array_pitch_span(const struct isl_device *dev,
* the storage for LODs other than LOD 0 is not needed.
*/
assert(info->levels == 1);
- assert(phys_level0_sa->array_len == 1);
return ISL_ARRAY_PITCH_SPAN_COMPACT;
} else {
if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
@@ -447,6 +514,12 @@ isl_choose_array_pitch_span(const struct isl_device *dev,
* compact QPitch possible in order to conserve memory.
*/
return ISL_ARRAY_PITCH_SPAN_COMPACT;
+
+ case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
+ /* Each array image in the gen6 stencil of HiZ surface is compact in the
+ * sense that every LOD is a compact array of the same size as LOD0.
+ */
+ return ISL_ARRAY_PITCH_SPAN_COMPACT;
}
unreachable("bad isl_dim_layout");
@@ -461,12 +534,32 @@ isl_choose_image_alignment_el(const struct isl_device *dev,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el)
{
- if (info->format == ISL_FORMAT_HIZ) {
- assert(ISL_DEV_GEN(dev) >= 6);
- /* HiZ surfaces are always aligned to 16x8 pixels in the primary surface
- * which works out to 2x2 HiZ elments.
+ const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
+ if (fmtl->txc == ISL_TXC_MCS) {
+ assert(tiling == ISL_TILING_Y0);
+
+ /*
+ * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
+ *
+ * Height, width, and layout of MCS buffer in this case must match with
+ * Render Target height, width, and layout. MCS buffer is tiledY.
+ *
+ * To avoid wasting memory, choose the smallest alignment possible:
+ * HALIGN_4 and VALIGN_4.
*/
- *image_align_el = isl_extent3d(2, 2, 1);
+ *image_align_el = isl_extent3d(4, 4, 1);
+ return;
+ } else if (info->format == ISL_FORMAT_HIZ) {
+ assert(ISL_DEV_GEN(dev) >= 6);
+ if (ISL_DEV_GEN(dev) == 6) {
+ /* HiZ surfaces on Sandy Bridge are packed tightly. */
+ *image_align_el = isl_extent3d(1, 1, 1);
+ } else {
+ /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the
+ * primary surface which works out to 2x2 HiZ elments.
+ */
+ *image_align_el = isl_extent3d(2, 2, 1);
+ }
return;
}
@@ -491,8 +584,14 @@ isl_choose_image_alignment_el(const struct isl_device *dev,
static enum isl_dim_layout
isl_surf_choose_dim_layout(const struct isl_device *dev,
enum isl_surf_dim logical_dim,
- enum isl_tiling tiling)
+ enum isl_tiling tiling,
+ isl_surf_usage_flags_t usage)
{
+ /* Sandy bridge needs a special layout for HiZ and stencil. */
+ if (ISL_DEV_GEN(dev) == 6 &&
+ (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
+ return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
+
if (ISL_DEV_GEN(dev) >= 9) {
switch (logical_dim) {
case ISL_SURF_DIM_1D:
@@ -522,6 +621,16 @@ isl_surf_choose_dim_layout(const struct isl_device *dev,
switch (logical_dim) {
case ISL_SURF_DIM_1D:
case ISL_SURF_DIM_2D:
+ /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
+ *
+ * The cube face textures are stored in the same way as 3D surfaces
+ * are stored (see section 6.17.5 for details). For cube surfaces,
+ * however, the depth is equal to the number of faces (always 6) and
+ * is not reduced for each MIP.
+ */
+ if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
+ return ISL_DIM_LAYOUT_GEN4_3D;
+
return ISL_DIM_LAYOUT_GEN4_2D;
case ISL_SURF_DIM_3D:
return ISL_DIM_LAYOUT_GEN4_3D;
@@ -561,6 +670,7 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
case ISL_DIM_LAYOUT_GEN9_1D:
case ISL_DIM_LAYOUT_GEN4_2D:
+ case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
*phys_level0_sa = (struct isl_extent4d) {
.w = isl_align_npot(info->width, fmtl->bw),
.h = fmtl->bh,
@@ -572,7 +682,11 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
break;
case ISL_SURF_DIM_2D:
- assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D);
+ if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
+ assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D);
+ else
+ assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
+ dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
if (tiling == ISL_TILING_Ys && info->samples > 1)
isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
@@ -637,6 +751,7 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
switch (dim_layout) {
case ISL_DIM_LAYOUT_GEN9_1D:
+ case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
unreachable("bad isl_dim_layout");
case ISL_DIM_LAYOUT_GEN4_2D:
@@ -665,6 +780,108 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
}
/**
+ * Calculate the pitch between physical array slices, in units of rows of
+ * surface elements.
+ */
+static uint32_t
+isl_calc_array_pitch_el_rows_gen4_2d(
+ const struct isl_device *dev,
+ const struct isl_surf_init_info *restrict info,
+ const struct isl_tile_info *tile_info,
+ const struct isl_extent3d *image_align_sa,
+ const struct isl_extent4d *phys_level0_sa,
+ enum isl_array_pitch_span array_pitch_span,
+ const struct isl_extent2d *phys_slice0_sa)
+{
+ const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
+ uint32_t pitch_sa_rows = 0;
+
+ switch (array_pitch_span) {
+ case ISL_ARRAY_PITCH_SPAN_COMPACT:
+ pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
+ break;
+ case ISL_ARRAY_PITCH_SPAN_FULL: {
+ /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
+ * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
+ * Surfaces >> Surface Arrays.
+ */
+ uint32_t H0_sa = phys_level0_sa->h;
+ uint32_t H1_sa = isl_minify(H0_sa, 1);
+
+ uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
+ uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
+
+ uint32_t m;
+ if (ISL_DEV_GEN(dev) >= 7) {
+ /* The QPitch equation changed slightly in Ivybridge. */
+ m = 12;
+ } else {
+ m = 11;
+ }
+
+ pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
+
+ if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
+ (info->height % 4 == 1)) {
+ /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
+ * Graphics Core >> Section 7.18.3.7: Surface Arrays:
+ *
+ * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
+ * the value calculated in the equation above , for every
+ * other odd Surface Height starting from 1 i.e. 1,5,9,13.
+ *
+ * XXX(chadv): Is the errata natural corollary of the physical
+ * layout of interleaved samples?
+ */
+ pitch_sa_rows += 4;
+ }
+
+ pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
+ } /* end case */
+ break;
+ }
+
+ assert(pitch_sa_rows % fmtl->bh == 0);
+ uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
+
+ if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) {
+ /*
+ * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
+ *
+ * "Mip-mapped and arrayed surfaces are supported with MCS buffer
+ * layout with these alignments in the RT space: Horizontal
+ * Alignment = 128 and Vertical Alignment = 64."
+ *
+ * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
+ *
+ * "For non-multisampled render target's CCS auxiliary surface,
+ * QPitch must be computed with Horizontal Alignment = 128 and
+ * Surface Vertical Alignment = 256. These alignments are only for
+ * CCS buffer and not for associated render target."
+ *
+ * The first restriction is already handled by isl_choose_image_alignment_el
+ * but the second restriction, which is an extension of the first, only
+ * applies to qpitch and must be applied here.
+ */
+ assert(fmtl->bh == 4);
+ pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
+ }
+
+ if (ISL_DEV_GEN(dev) >= 9 &&
+ info->dim == ISL_SURF_DIM_3D &&
+ tile_info->tiling != ISL_TILING_LINEAR) {
+ /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
+ *
+ * Tile Mode != Linear: This field must be set to an integer multiple
+ * of the tile height
+ */
+ pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
+ }
+
+ return pitch_el_rows;
+}
+
+/**
* A variant of isl_calc_phys_slice0_extent_sa() specific to
* ISL_DIM_LAYOUT_GEN4_2D.
*/
@@ -740,43 +957,158 @@ isl_calc_phys_slice0_extent_sa_gen4_2d(
};
}
+static void
+isl_calc_phys_total_extent_el_gen4_2d(
+ const struct isl_device *dev,
+ const struct isl_surf_init_info *restrict info,
+ const struct isl_tile_info *tile_info,
+ enum isl_msaa_layout msaa_layout,
+ const struct isl_extent3d *image_align_sa,
+ const struct isl_extent4d *phys_level0_sa,
+ enum isl_array_pitch_span array_pitch_span,
+ uint32_t *array_pitch_el_rows,
+ struct isl_extent2d *total_extent_el)
+{
+ const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
+
+ struct isl_extent2d phys_slice0_sa;
+ isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
+ image_align_sa, phys_level0_sa,
+ &phys_slice0_sa);
+ *array_pitch_el_rows =
+ isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info,
+ image_align_sa, phys_level0_sa,
+ array_pitch_span,
+ &phys_slice0_sa);
+ *total_extent_el = (struct isl_extent2d) {
+ .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
+ .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
+ isl_assert_div(phys_slice0_sa.h, fmtl->bh),
+ };
+}
+
/**
* A variant of isl_calc_phys_slice0_extent_sa() specific to
* ISL_DIM_LAYOUT_GEN4_3D.
*/
static void
-isl_calc_phys_slice0_extent_sa_gen4_3d(
+isl_calc_phys_total_extent_el_gen4_3d(
const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
const struct isl_extent3d *image_align_sa,
const struct isl_extent4d *phys_level0_sa,
- struct isl_extent2d *phys_slice0_sa)
+ uint32_t *array_pitch_el_rows,
+ struct isl_extent2d *phys_total_el)
{
+ const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
+
assert(info->samples == 1);
- assert(phys_level0_sa->array_len == 1);
- uint32_t slice_w = 0;
- uint32_t slice_h = 0;
+ if (info->dim != ISL_SURF_DIM_3D) {
+ /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
+ *
+ * The cube face textures are stored in the same way as 3D surfaces
+ * are stored (see section 6.17.5 for details). For cube surfaces,
+ * however, the depth is equal to the number of faces (always 6) and
+ * is not reduced for each MIP.
+ */
+ assert(ISL_DEV_GEN(dev) == 4);
+ assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
+ assert(phys_level0_sa->array_len == 6);
+ } else {
+ assert(phys_level0_sa->array_len == 1);
+ }
+
+ uint32_t total_w = 0;
+ uint32_t total_h = 0;
uint32_t W0 = phys_level0_sa->w;
uint32_t H0 = phys_level0_sa->h;
uint32_t D0 = phys_level0_sa->d;
+ uint32_t A0 = phys_level0_sa->a;
for (uint32_t l = 0; l < info->levels; ++l) {
uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
- uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d);
+ uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
uint32_t max_layers_horiz = MIN(level_d, 1u << l);
uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
- slice_w = MAX(slice_w, level_w * max_layers_horiz);
- slice_h += level_h * max_layers_vert;
+ total_w = MAX(total_w, level_w * max_layers_horiz);
+ total_h += level_h * max_layers_vert;
}
- *phys_slice0_sa = (struct isl_extent2d) {
- .w = slice_w,
- .h = slice_h,
+ /* GEN4_3D layouts don't really have an array pitch since each LOD has a
+ * different number of horizontal and vertical layers. We have to set it
+ * to something, so at least make it true for LOD0.
+ */
+ *array_pitch_el_rows =
+ isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
+ *phys_total_el = (struct isl_extent2d) {
+ .w = isl_assert_div(total_w, fmtl->bw),
+ .h = isl_assert_div(total_h, fmtl->bh),
+ };
+}
+
+/**
+ * A variant of isl_calc_phys_slice0_extent_sa() specific to
+ * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ.
+ */
+static void
+isl_calc_phys_total_extent_el_gen6_stencil_hiz(
+ const struct isl_device *dev,
+ const struct isl_surf_init_info *restrict info,
+ const struct isl_tile_info *tile_info,
+ const struct isl_extent3d *image_align_sa,
+ const struct isl_extent4d *phys_level0_sa,
+ uint32_t *array_pitch_el_rows,
+ struct isl_extent2d *phys_total_el)
+{
+ const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
+
+ const struct isl_extent2d tile_extent_sa = {
+ .w = tile_info->logical_extent_el.w * fmtl->bw,
+ .h = tile_info->logical_extent_el.h * fmtl->bh,
+ };
+ /* Tile size is a multiple of image alignment */
+ assert(tile_extent_sa.w % image_align_sa->w == 0);
+ assert(tile_extent_sa.h % image_align_sa->h == 0);
+
+ const uint32_t W0 = phys_level0_sa->w;
+ const uint32_t H0 = phys_level0_sa->h;
+
+ /* Each image has the same height as LOD0 because the hardware thinks
+ * everything is LOD0
+ */
+ const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
+
+ uint32_t total_top_w = 0;
+ uint32_t total_bottom_w = 0;
+ uint32_t total_h = 0;
+
+ for (uint32_t l = 0; l < info->levels; ++l) {
+ const uint32_t W = isl_minify(W0, l);
+
+ const uint32_t w = isl_align(W, tile_extent_sa.w);
+ const uint32_t h = isl_align(H, tile_extent_sa.h);
+
+ if (l == 0) {
+ total_top_w = w;
+ total_h = h;
+ } else if (l == 1) {
+ total_bottom_w = w;
+ total_h += h;
+ } else {
+ total_bottom_w += w;
+ }
+ }
+
+ *array_pitch_el_rows =
+ isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
+ *phys_total_el = (struct isl_extent2d) {
+ .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
+ .h = isl_assert_div(total_h, fmtl->bh),
};
}
@@ -785,16 +1117,17 @@ isl_calc_phys_slice0_extent_sa_gen4_3d(
* ISL_DIM_LAYOUT_GEN9_1D.
*/
static void
-isl_calc_phys_slice0_extent_sa_gen9_1d(
+isl_calc_phys_total_extent_el_gen9_1d(
const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
const struct isl_extent3d *image_align_sa,
const struct isl_extent4d *phys_level0_sa,
- struct isl_extent2d *phys_slice0_sa)
+ uint32_t *array_pitch_el_rows,
+ struct isl_extent2d *phys_total_el)
{
MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
- assert(phys_level0_sa->height == 1);
+ assert(phys_level0_sa->height / fmtl->bh == 1);
assert(phys_level0_sa->depth == 1);
assert(info->samples == 1);
assert(image_align_sa->w >= fmtl->bw);
@@ -809,217 +1142,68 @@ isl_calc_phys_slice0_extent_sa_gen9_1d(
slice_w += w;
}
- *phys_slice0_sa = isl_extent2d(slice_w, 1);
+ *array_pitch_el_rows = 1;
+ *phys_total_el = (struct isl_extent2d) {
+ .w = isl_assert_div(slice_w, fmtl->bw),
+ .h = phys_level0_sa->array_len,
+ };
}
/**
- * Calculate the physical extent of the surface's first array slice, in units
- * of surface samples. If the surface is multi-leveled, then the result will
- * be aligned to \a image_align_sa.
+ * Calculate the two-dimensional total physical extent of the surface, in
+ * units of surface elements.
*/
static void
-isl_calc_phys_slice0_extent_sa(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info,
- enum isl_dim_layout dim_layout,
- enum isl_msaa_layout msaa_layout,
- const struct isl_extent3d *image_align_sa,
- const struct isl_extent4d *phys_level0_sa,
- struct isl_extent2d *phys_slice0_sa)
+isl_calc_phys_total_extent_el(const struct isl_device *dev,
+ const struct isl_surf_init_info *restrict info,
+ const struct isl_tile_info *tile_info,
+ enum isl_dim_layout dim_layout,
+ enum isl_msaa_layout msaa_layout,
+ const struct isl_extent3d *image_align_sa,
+ const struct isl_extent4d *phys_level0_sa,
+ enum isl_array_pitch_span array_pitch_span,
+ uint32_t *array_pitch_el_rows,
+ struct isl_extent2d *total_extent_el)
{
switch (dim_layout) {
case ISL_DIM_LAYOUT_GEN9_1D:
- isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info,
- image_align_sa, phys_level0_sa,
- phys_slice0_sa);
+ assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
+ isl_calc_phys_total_extent_el_gen9_1d(dev, info,
+ image_align_sa, phys_level0_sa,
+ array_pitch_el_rows,
+ total_extent_el);
return;
case ISL_DIM_LAYOUT_GEN4_2D:
- isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
- image_align_sa, phys_level0_sa,
- phys_slice0_sa);
+ isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
+ image_align_sa, phys_level0_sa,
+ array_pitch_span,
+ array_pitch_el_rows,
+ total_extent_el);
return;
- case ISL_DIM_LAYOUT_GEN4_3D:
- isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa,
- phys_level0_sa, phys_slice0_sa);
+ case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
+ assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
+ isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info,
+ image_align_sa,
+ phys_level0_sa,
+ array_pitch_el_rows,
+ total_extent_el);
return;
- }
-}
-
-/**
- * Calculate the pitch between physical array slices, in units of rows of
- * surface elements.
- */
-static uint32_t
-isl_calc_array_pitch_el_rows(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info,
- const struct isl_tile_info *tile_info,
- enum isl_dim_layout dim_layout,
- enum isl_array_pitch_span array_pitch_span,
- const struct isl_extent3d *image_align_sa,
- const struct isl_extent4d *phys_level0_sa,
- const struct isl_extent2d *phys_slice0_sa)
-{
- const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
- uint32_t pitch_sa_rows = 0;
-
- switch (dim_layout) {
- case ISL_DIM_LAYOUT_GEN9_1D:
- /* Each row is an array slice */
- pitch_sa_rows = 1;
- break;
- case ISL_DIM_LAYOUT_GEN4_2D:
- switch (array_pitch_span) {
- case ISL_ARRAY_PITCH_SPAN_COMPACT:
- pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
- break;
- case ISL_ARRAY_PITCH_SPAN_FULL: {
- /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
- * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
- * Surfaces >> Surface Arrays.
- */
- uint32_t H0_sa = phys_level0_sa->h;
- uint32_t H1_sa = isl_minify(H0_sa, 1);
-
- uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
- uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
-
- uint32_t m;
- if (ISL_DEV_GEN(dev) >= 7) {
- /* The QPitch equation changed slightly in Ivybridge. */
- m = 12;
- } else {
- m = 11;
- }
-
- pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
-
- if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
- (info->height % 4 == 1)) {
- /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
- * Graphics Core >> Section 7.18.3.7: Surface Arrays:
- *
- * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
- * the value calculated in the equation above , for every
- * other odd Surface Height starting from 1 i.e. 1,5,9,13.
- *
- * XXX(chadv): Is the errata natural corollary of the physical
- * layout of interleaved samples?
- */
- pitch_sa_rows += 4;
- }
-
- pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
- } /* end case */
- break;
- }
- break;
case ISL_DIM_LAYOUT_GEN4_3D:
assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
- pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
- break;
- default:
- unreachable("bad isl_dim_layout");
- break;
- }
-
- assert(pitch_sa_rows % fmtl->bh == 0);
- uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
-
- if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) {
- /*
- * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
- *
- * "Mip-mapped and arrayed surfaces are supported with MCS buffer
- * layout with these alignments in the RT space: Horizontal
- * Alignment = 128 and Vertical Alignment = 64."
- *
- * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
- *
- * "For non-multisampled render target's CCS auxiliary surface,
- * QPitch must be computed with Horizontal Alignment = 128 and
- * Surface Vertical Alignment = 256. These alignments are only for
- * CCS buffer and not for associated render target."
- *
- * The first restriction is already handled by isl_choose_image_alignment_el
- * but the second restriction, which is an extension of the first, only
- * applies to qpitch and must be applied here.
- */
- assert(fmtl->bh == 4);
- pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
- }
-
- if (ISL_DEV_GEN(dev) >= 9 &&
- info->dim == ISL_SURF_DIM_3D &&
- tile_info->tiling != ISL_TILING_LINEAR) {
- /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
- *
- * Tile Mode != Linear: This field must be set to an integer multiple
- * of the tile height
- */
- pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
+ isl_calc_phys_total_extent_el_gen4_3d(dev, info,
+ image_align_sa, phys_level0_sa,
+ array_pitch_el_rows,
+ total_extent_el);
+ return;
}
-
- return pitch_el_rows;
}
-/**
- * Calculate the pitch of each surface row, in bytes.
- */
static uint32_t
-isl_calc_linear_row_pitch(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info,
- const struct isl_extent2d *phys_slice0_sa)
+isl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info,
+ const struct isl_tile_info *tile_info)
{
- const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
-
- uint32_t row_pitch = info->min_pitch;
-
- /* First, align the surface to a cache line boundary, as the PRM explains
- * below.
- *
- * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
- * Formats >> Surface Padding Requirements >> Render Target and Media
- * Surfaces:
- *
- * The data port accesses data (pixels) outside of the surface if they
- * are contained in the same cache request as pixels that are within the
- * surface. These pixels will not be returned by the requesting message,
- * however if these pixels lie outside of defined pages in the GTT,
- * a GTT error will result when the cache request is processed. In order
- * to avoid these GTT errors, “padding” at the bottom of the surface is
- * sometimes necessary.
- *
- * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
- * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
- *
- * The sampling engine accesses texels outside of the surface if they
- * are contained in the same cache line as texels that are within the
- * surface. These texels will not participate in any calculation
- * performed by the sampling engine and will not affect the result of
- * any sampling engine operation, however if these texels lie outside of
- * defined pages in the GTT, a GTT error will result when the cache line
- * is accessed. In order to avoid these GTT errors, “padding” at the
- * bottom and right side of a sampling engine surface is sometimes
- * necessary.
- *
- * It is possible that a cache line will straddle a page boundary if the
- * base address or pitch is not aligned. All pages included in the cache
- * lines that are part of the surface must map to valid GTT entries to
- * avoid errors. To determine the necessary padding on the bottom and
- * right side of the surface, refer to the table in Alignment Unit Size
- * section for the i and j parameters for the surface format in use. The
- * surface must then be extended to the next multiple of the alignment
- * unit size in each dimension, and all texels contained in this
- * extended surface must have valid GTT entries.
- *
- * For example, suppose the surface size is 15 texels by 10 texels and
- * the alignment parameters are i=4 and j=2. In this case, the extended
- * surface would be 16 by 10. Note that these calculations are done in
- * texels, and must be converted to bytes based on the surface format
- * being used to determine whether additional pages need to be defined.
- */
- assert(phys_slice0_sa->w % fmtl->bw == 0);
- const uint32_t bs = fmtl->bpb / 8;
- row_pitch = MAX(row_pitch, bs * (phys_slice0_sa->w / fmtl->bw));
+ if (tile_info->tiling != ISL_TILING_LINEAR)
+ return tile_info->phys_extent_B.width;
/* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
* RENDER_SURFACE_STATE Surface Pitch (p349):
@@ -1035,125 +1219,165 @@ isl_calc_linear_row_pitch(const struct isl_device *dev,
* - For other linear surfaces, the pitch can be any multiple of
* bytes.
*/
- if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
- if (isl_format_is_yuv(info->format)) {
- row_pitch = isl_align_npot(row_pitch, 2 * bs);
+ const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
+ const uint32_t bs = fmtl->bpb / 8;
+
+ if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
+ if (isl_format_is_yuv(surf_info->format)) {
+ return 2 * bs;
} else {
- row_pitch = isl_align_npot(row_pitch, bs);
+ return bs;
}
}
- return row_pitch;
+ return 1;
+}
+
+static uint32_t
+isl_calc_linear_min_row_pitch(const struct isl_device *dev,
+ const struct isl_surf_init_info *info,
+ const struct isl_extent2d *phys_total_el,
+ uint32_t alignment)
+{
+ const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
+ const uint32_t bs = fmtl->bpb / 8;
+
+ return isl_align_npot(bs * phys_total_el->w, alignment);
+}
+
+static uint32_t
+isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
+ const struct isl_surf_init_info *surf_info,
+ const struct isl_tile_info *tile_info,
+ const struct isl_extent2d *phys_total_el,
+ uint32_t alignment)
+{
+ const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
+
+ assert(fmtl->bpb % tile_info->format_bpb == 0);
+
+ const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
+ const uint32_t total_w_tl =
+ isl_align_div(phys_total_el->w * tile_el_scale,
+ tile_info->logical_extent_el.width);
+
+ assert(alignment == tile_info->phys_extent_B.width);
+ return total_w_tl * tile_info->phys_extent_B.width;
+}
+
+static uint32_t
+isl_calc_min_row_pitch(const struct isl_device *dev,
+ const struct isl_surf_init_info *surf_info,
+ const struct isl_tile_info *tile_info,
+ const struct isl_extent2d *phys_total_el,
+ uint32_t alignment)
+{
+ if (tile_info->tiling == ISL_TILING_LINEAR) {
+ return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
+ alignment);
+ } else {
+ return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
+ phys_total_el, alignment);
+ }
}
/**
- * Calculate and apply any padding required for the surface.
+ * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
+ * size is `bits` bits?
*
- * @param[inout] total_h_el is updated with the new height
- * @param[out] pad_bytes is overwritten with additional padding requirements.
+ * Hardware pitch fields are offset by 1. For example, if the size of
+ * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
+ * pitches is [1, 2^b] inclusive. If the surface pitch is N, then
+ * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
*/
-static void
-isl_apply_surface_padding(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info,
- const struct isl_tile_info *tile_info,
- uint32_t *total_h_el,
- uint32_t *pad_bytes)
+static bool
+pitch_in_range(uint32_t n, uint32_t bits)
{
- const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
+ assert(n != 0);
+ return likely(bits != 0 && 1 <= n && n <= (1 << bits));
+}
- *pad_bytes = 0;
+static bool
+isl_calc_row_pitch(const struct isl_device *dev,
+ const struct isl_surf_init_info *surf_info,
+ const struct isl_tile_info *tile_info,
+ enum isl_dim_layout dim_layout,
+ const struct isl_extent2d *phys_total_el,
+ uint32_t *out_row_pitch)
+{
+ uint32_t alignment =
+ isl_calc_row_pitch_alignment(surf_info, tile_info);
- /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
- * Formats >> Surface Padding Requirements >> Render Target and Media
- * Surfaces:
- *
- * The data port accesses data (pixels) outside of the surface if they
- * are contained in the same cache request as pixels that are within the
- * surface. These pixels will not be returned by the requesting message,
- * however if these pixels lie outside of defined pages in the GTT,
- * a GTT error will result when the cache request is processed. In
- * order to avoid these GTT errors, “padding” at the bottom of the
- * surface is sometimes necessary.
- *
- * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
- * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
- *
- * ... Lots of padding requirements, all listed separately below.
+ /* If pitch isn't given and it can be chosen freely, align it by cache line
+ * allowing one to use blit engine on the surface.
*/
+ if (surf_info->row_pitch == 0 && tile_info->tiling == ISL_TILING_LINEAR) {
+ /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress:
+ *
+ * "Base address of the destination surface: X=0, Y=0. Lower 32bits
+ * of the 48bit addressing. When Src Tiling is enabled (Bit_15
+ * enabled), this address must be 4KB-aligned. When Tiling is not
+ * enabled, this address should be CL (64byte) aligned."
+ */
+ alignment = MAX2(alignment, 64);
+ }
- /* We can safely ignore the first padding requirement, quoted below,
- * because isl doesn't do buffers.
- *
- * - [pre-BDW] For buffers, which have no inherent “height,” padding
- * requirements are different. A buffer must be padded to the next
- * multiple of 256 array elements, with an additional 16 bytes added
- * beyond that to account for the L1 cache line.
- */
+ const uint32_t min_row_pitch =
+ isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
+ alignment);
- /*
- * - For compressed textures [...], padding at the bottom of the surface
- * is to an even compressed row.
- */
- if (isl_format_is_compressed(info->format))
- *total_h_el = isl_align(*total_h_el, 2);
+ uint32_t row_pitch = min_row_pitch;
- /*
- * - For cube surfaces, an additional two rows of padding are required
- * at the bottom of the surface.
- */
- if (info->usage & ISL_SURF_USAGE_CUBE_BIT)
- *total_h_el += 2;
-
- /*
- * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats,
- * additional padding is required. These surfaces require an extra row
- * plus 16 bytes of padding at the bottom in addition to the general
- * padding requirements.
- */
- if (isl_format_is_yuv(info->format) &&
- (fmtl->bpb == 96 || fmtl->bpb == 48|| fmtl->bpb == 24)) {
- *total_h_el += 1;
- *pad_bytes += 16;
+ if (surf_info->row_pitch != 0) {
+ row_pitch = surf_info->row_pitch;
+
+ if (row_pitch < min_row_pitch)
+ return false;
+
+ if (row_pitch % alignment != 0)
+ return false;
}
- /*
- * - For linear surfaces, additional padding of 64 bytes is required at
- * the bottom of the surface. This is in addition to the padding
- * required above.
- */
- if (tile_info->tiling == ISL_TILING_LINEAR)
- *pad_bytes += 64;
+ const uint32_t row_pitch_tiles = row_pitch / tile_info->phys_extent_B.width;
- /* The below text weakens, not strengthens, the padding requirements for
- * linear surfaces. Therefore we can safely ignore it.
- *
- * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array,
- * non-MSAA, non-mip-mapped surfaces in linear memory, the only
- * padding requirement is to the next aligned 64-byte boundary beyond
- * the end of the surface. The rest of the padding requirements
- * documented above do not apply to these surfaces.
- */
+ if (row_pitch == 0)
+ return false;
- /*
- * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and
- * height % 4 != 0, the surface must be padded with
- * 4-(height % 4)*Surface Pitch # of bytes.
- */
- if (ISL_DEV_GEN(dev) >= 9 &&
- tile_info->tiling == ISL_TILING_LINEAR &&
- (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) {
- *total_h_el = isl_align(*total_h_el, 4);
+ if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
+ /* SurfacePitch is ignored for this layout. */
+ goto done;
}
- /*
- * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded
- * to 4 times the Surface Pitch # of bytes
- */
- if (ISL_DEV_GEN(dev) >= 9 &&
- tile_info->tiling == ISL_TILING_LINEAR &&
- info->dim == ISL_SURF_DIM_1D) {
- *total_h_el += 4;
- }
+ if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
+ ISL_SURF_USAGE_TEXTURE_BIT |
+ ISL_SURF_USAGE_STORAGE_BIT)) &&
+ !pitch_in_range(row_pitch, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
+ return false;
+
+ if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
+ ISL_SURF_USAGE_MCS_BIT)) &&
+ !pitch_in_range(row_pitch_tiles, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
+ return false;
+
+ if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
+ !pitch_in_range(row_pitch, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
+ return false;
+
+ if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
+ !pitch_in_range(row_pitch, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
+ return false;
+
+ const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
+ _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
+ _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
+
+ if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
+ !pitch_in_range(row_pitch, stencil_pitch_bits))
+ return false;
+
+ done:
+ *out_row_pitch = row_pitch;
+ return true;
}
bool
@@ -1175,11 +1399,10 @@ isl_surf_init_s(const struct isl_device *dev,
return false;
struct isl_tile_info tile_info;
- if (!isl_tiling_get_info(dev, tiling, fmtl->bpb, &tile_info))
- return false;
+ isl_tiling_get_info(tiling, fmtl->bpb, &tile_info);
const enum isl_dim_layout dim_layout =
- isl_surf_choose_dim_layout(dev, info->dim, tiling);
+ isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
enum isl_msaa_layout msaa_layout;
if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
@@ -1201,27 +1424,23 @@ isl_surf_init_s(const struct isl_device *dev,
enum isl_array_pitch_span array_pitch_span =
isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
- struct isl_extent2d phys_slice0_sa;
- isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout,
- &image_align_sa, &phys_level0_sa,
- &phys_slice0_sa);
- assert(phys_slice0_sa.w % fmtl->bw == 0);
- assert(phys_slice0_sa.h % fmtl->bh == 0);
-
- const uint32_t array_pitch_el_rows =
- isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout,
- array_pitch_span, &image_align_sa,
- &phys_level0_sa, &phys_slice0_sa);
-
- uint32_t total_h_el = phys_level0_sa.array_len * array_pitch_el_rows;
-
- uint32_t pad_bytes;
- isl_apply_surface_padding(dev, info, &tile_info, &total_h_el, &pad_bytes);
+ uint32_t array_pitch_el_rows;
+ struct isl_extent2d phys_total_el;
+ isl_calc_phys_total_extent_el(dev, info, &tile_info,
+ dim_layout, msaa_layout,
+ &image_align_sa, &phys_level0_sa,
+ array_pitch_span, &array_pitch_el_rows,
+ &phys_total_el);
+
+ uint32_t row_pitch;
+ if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
+ &phys_total_el, &row_pitch))
+ return false;
- uint32_t row_pitch, size, base_alignment;
+ uint32_t base_alignment;
+ uint64_t size;
if (tiling == ISL_TILING_LINEAR) {
- row_pitch = isl_calc_linear_row_pitch(dev, info, &phys_slice0_sa);
- size = row_pitch * total_h_el + pad_bytes;
+ size = (uint64_t) row_pitch * phys_total_el.h;
/* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
*
@@ -1242,26 +1461,10 @@ isl_surf_init_s(const struct isl_device *dev,
}
base_alignment = isl_round_up_to_power_of_two(base_alignment);
} else {
- assert(fmtl->bpb % tile_info.format_bpb == 0);
- const uint32_t tile_el_scale = fmtl->bpb / tile_info.format_bpb;
-
- assert(phys_slice0_sa.w % fmtl->bw == 0);
- const uint32_t total_w_el = phys_slice0_sa.width / fmtl->bw;
- const uint32_t total_w_tl =
- isl_align_div(total_w_el * tile_el_scale,
- tile_info.logical_extent_el.width);
-
- row_pitch = total_w_tl * tile_info.phys_extent_B.width;
- if (row_pitch < info->min_pitch) {
- row_pitch = isl_align_npot(info->min_pitch,
- tile_info.phys_extent_B.width);
- }
-
- total_h_el += isl_align_div_npot(pad_bytes, row_pitch);
const uint32_t total_h_tl =
- isl_align_div(total_h_el, tile_info.logical_extent_el.height);
+ isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
- size = total_h_tl * tile_info.phys_extent_B.height * row_pitch;
+ size = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch;
const uint32_t tile_size = tile_info.phys_extent_B.width *
tile_info.phys_extent_B.height;
@@ -1269,6 +1472,28 @@ isl_surf_init_s(const struct isl_device *dev,
base_alignment = MAX(info->min_alignment, tile_size);
}
+ if (ISL_DEV_GEN(dev) < 9) {
+ /* From the Broadwell PRM Vol 5, Surface Layout:
+ *
+ * "In addition to restrictions on maximum height, width, and depth,
+ * surfaces are also restricted to a maximum size in bytes. This
+ * maximum is 2 GB for all products and all surface types."
+ *
+ * This comment is applicable to all Pre-gen9 platforms.
+ */
+ if (size > (uint64_t) 1 << 31)
+ return false;
+ } else {
+ /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
+ * "In addition to restrictions on maximum height, width, and depth,
+ * surfaces are also restricted to a maximum size of 2^38 bytes.
+ * All pixels within the surface must be contained within 2^38 bytes
+ * of the base address."
+ */
+ if (size > (uint64_t) 1 << 38)
+ return false;
+ }
+
*surf = (struct isl_surf) {
.dim = info->dim,
.dim_layout = dim_layout,
@@ -1296,15 +1521,14 @@ isl_surf_init_s(const struct isl_device *dev,
}
void
-isl_surf_get_tile_info(const struct isl_device *dev,
- const struct isl_surf *surf,
+isl_surf_get_tile_info(const struct isl_surf *surf,
struct isl_tile_info *tile_info)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
- isl_tiling_get_info(dev, surf->tiling, fmtl->bpb, tile_info);
+ isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info);
}
-void
+bool
isl_surf_get_hiz_surf(const struct isl_device *dev,
const struct isl_surf *surf,
struct isl_surf *hiz_surf)
@@ -1372,20 +1596,20 @@ isl_surf_get_hiz_surf(const struct isl_device *dev,
*/
const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples;
- isl_surf_init(dev, hiz_surf,
- .dim = surf->dim,
- .format = ISL_FORMAT_HIZ,
- .width = surf->logical_level0_px.width,
- .height = surf->logical_level0_px.height,
- .depth = surf->logical_level0_px.depth,
- .levels = surf->levels,
- .array_len = surf->logical_level0_px.array_len,
- .samples = samples,
- .usage = ISL_SURF_USAGE_HIZ_BIT,
- .tiling_flags = ISL_TILING_HIZ_BIT);
+ return isl_surf_init(dev, hiz_surf,
+ .dim = surf->dim,
+ .format = ISL_FORMAT_HIZ,
+ .width = surf->logical_level0_px.width,
+ .height = surf->logical_level0_px.height,
+ .depth = surf->logical_level0_px.depth,
+ .levels = surf->levels,
+ .array_len = surf->logical_level0_px.array_len,
+ .samples = samples,
+ .usage = ISL_SURF_USAGE_HIZ_BIT,
+ .tiling_flags = ISL_TILING_HIZ_BIT);
}
-void
+bool
isl_surf_get_mcs_surf(const struct isl_device *dev,
const struct isl_surf *surf,
struct isl_surf *mcs_surf)
@@ -1398,6 +1622,16 @@ isl_surf_get_mcs_surf(const struct isl_device *dev,
assert(surf->levels == 1);
assert(surf->logical_level0_px.depth == 1);
+ /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
+ * bits which means the maximum pitch of a compression surface is 512
+ * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is
+ * 64bpp, this gives us a maximum width of 8192 pixels. We can create
+ * larger multisampled surfaces, we just can't compress them. For 2x, 4x,
+ * and 8x, we have enough room for the full 16k supported by the hardware.
+ */
+ if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
+ return false;
+
enum isl_format mcs_format;
switch (surf->samples) {
case 2: mcs_format = ISL_FORMAT_MCS_2X; break;
@@ -1408,30 +1642,57 @@ isl_surf_get_mcs_surf(const struct isl_device *dev,
unreachable("Invalid sample count");
}
- isl_surf_init(dev, mcs_surf,
- .dim = ISL_SURF_DIM_2D,
- .format = mcs_format,
- .width = surf->logical_level0_px.width,
- .height = surf->logical_level0_px.height,
- .depth = 1,
- .levels = 1,
- .array_len = surf->logical_level0_px.array_len,
- .samples = 1, /* MCS surfaces are really single-sampled */
- .usage = ISL_SURF_USAGE_MCS_BIT,
- .tiling_flags = ISL_TILING_Y0_BIT);
+ return isl_surf_init(dev, mcs_surf,
+ .dim = ISL_SURF_DIM_2D,
+ .format = mcs_format,
+ .width = surf->logical_level0_px.width,
+ .height = surf->logical_level0_px.height,
+ .depth = 1,
+ .levels = 1,
+ .array_len = surf->logical_level0_px.array_len,
+ .samples = 1, /* MCS surfaces are really single-sampled */
+ .usage = ISL_SURF_USAGE_MCS_BIT,
+ .tiling_flags = ISL_TILING_Y0_BIT);
}
bool
isl_surf_get_ccs_surf(const struct isl_device *dev,
const struct isl_surf *surf,
- struct isl_surf *ccs_surf)
+ struct isl_surf *ccs_surf,
+ uint32_t row_pitch)
{
assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE);
assert(ISL_DEV_GEN(dev) >= 7);
- assert(ISL_DEV_GEN(dev) >= 8 || surf->dim == ISL_SURF_DIM_2D);
+ if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
+ return false;
- assert(surf->logical_level0_px.depth == 1);
+ /* The PRM doesn't say this explicitly, but fast-clears don't appear to
+ * work for 3D textures until gen9 where the layout of 3D textures changes
+ * to match 2D array textures.
+ */
+ if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
+ return false;
+
+ /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
+ * Non-MultiSampler Render Target Restrictions):
+ *
+ * "Support is for non-mip-mapped and non-array surface types only."
+ *
+ * This restriction is lifted on gen8+. Technically, it may be possible to
+ * create a CCS for an arrayed or mipmapped image and only enable CCS_D
+ * when rendering to the base slice. However, there is no documentation
+ * tell us what the hardware would do in that case or what it does if you
+ * walk off the bases slice. (Does it ignore CCS or does it start
+ * scribbling over random memory?) We play it safe and just follow the
+ * docs and don't allow CCS_D for arrayed or mip-mapped surfaces.
+ */
+ if (ISL_DEV_GEN(dev) <= 7 &&
+ (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
+ return false;
+
+ if (isl_format_is_compressed(surf->format))
+ return false;
/* TODO: More conditions where it can fail. */
@@ -1467,19 +1728,18 @@ isl_surf_get_ccs_surf(const struct isl_device *dev,
return false;
}
- isl_surf_init(dev, ccs_surf,
- .dim = ISL_SURF_DIM_2D,
- .format = ccs_format,
- .width = surf->logical_level0_px.width,
- .height = surf->logical_level0_px.height,
- .depth = 1,
- .levels = surf->levels,
- .array_len = surf->logical_level0_px.array_len,
- .samples = 1,
- .usage = ISL_SURF_USAGE_CCS_BIT,
- .tiling_flags = ISL_TILING_CCS_BIT);
-
- return true;
+ return isl_surf_init(dev, ccs_surf,
+ .dim = surf->dim,
+ .format = ccs_format,
+ .width = surf->logical_level0_px.width,
+ .height = surf->logical_level0_px.height,
+ .depth = surf->logical_level0_px.depth,
+ .levels = surf->levels,
+ .array_len = surf->logical_level0_px.array_len,
+ .samples = 1,
+ .row_pitch = row_pitch,
+ .usage = ISL_SURF_USAGE_CCS_BIT,
+ .tiling_flags = ISL_TILING_CCS_BIT);
}
void
@@ -1533,6 +1793,9 @@ isl_surf_fill_state_s(const struct isl_device *dev, void *state,
case 9:
isl_gen9_surf_fill_state_s(dev, state, info);
break;
+ case 10:
+ isl_gen10_surf_fill_state_s(dev, state, info);
+ break;
default:
assert(!"Cannot fill surface state for this gen");
}
@@ -1564,6 +1827,79 @@ isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
case 9:
isl_gen9_buffer_fill_state_s(state, info);
break;
+ case 10:
+ isl_gen10_buffer_fill_state_s(state, info);
+ break;
+ default:
+ assert(!"Cannot fill surface state for this gen");
+ }
+}
+
+void
+isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info)
+{
+ if (info->depth_surf && info->stencil_surf) {
+ if (!dev->info->has_hiz_and_separate_stencil) {
+ assert(info->depth_surf == info->stencil_surf);
+ assert(info->depth_address == info->stencil_address);
+ }
+ assert(info->depth_surf->dim == info->stencil_surf->dim);
+ }
+
+ if (info->depth_surf) {
+ assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
+ if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
+ assert(info->view->base_array_layer + info->view->array_len <=
+ info->depth_surf->logical_level0_px.depth);
+ } else {
+ assert(info->view->base_array_layer + info->view->array_len <=
+ info->depth_surf->logical_level0_px.array_len);
+ }
+ }
+
+ if (info->stencil_surf) {
+ assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
+ if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
+ assert(info->view->base_array_layer + info->view->array_len <=
+ info->stencil_surf->logical_level0_px.depth);
+ } else {
+ assert(info->view->base_array_layer + info->view->array_len <=
+ info->stencil_surf->logical_level0_px.array_len);
+ }
+ }
+
+ switch (ISL_DEV_GEN(dev)) {
+ case 4:
+ if (ISL_DEV_IS_G4X(dev)) {
+ /* G45 surface state is the same as gen5 */
+ isl_gen5_emit_depth_stencil_hiz_s(dev, batch, info);
+ } else {
+ isl_gen4_emit_depth_stencil_hiz_s(dev, batch, info);
+ }
+ break;
+ case 5:
+ isl_gen5_emit_depth_stencil_hiz_s(dev, batch, info);
+ break;
+ case 6:
+ isl_gen6_emit_depth_stencil_hiz_s(dev, batch, info);
+ break;
+ case 7:
+ if (ISL_DEV_IS_HASWELL(dev)) {
+ isl_gen75_emit_depth_stencil_hiz_s(dev, batch, info);
+ } else {
+ isl_gen7_emit_depth_stencil_hiz_s(dev, batch, info);
+ }
+ break;
+ case 8:
+ isl_gen8_emit_depth_stencil_hiz_s(dev, batch, info);
+ break;
+ case 9:
+ isl_gen9_emit_depth_stencil_hiz_s(dev, batch, info);
+ break;
+ case 10:
+ isl_gen10_emit_depth_stencil_hiz_s(dev, batch, info);
+ break;
default:
assert(!"Cannot fill surface state for this gen");
}
@@ -1622,8 +1958,15 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
uint32_t *y_offset_sa)
{
assert(level < surf->levels);
- assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
- assert(surf->phys_level0_sa.array_len == 1);
+ if (surf->dim == ISL_SURF_DIM_3D) {
+ assert(surf->phys_level0_sa.array_len == 1);
+ assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
+ } else {
+ assert(surf->dim == ISL_SURF_DIM_2D);
+ assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
+ assert(surf->phys_level0_sa.array_len == 6);
+ assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
+ }
const struct isl_extent3d image_align_sa =
isl_surf_get_image_alignment_sa(surf);
@@ -1631,13 +1974,16 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
const uint32_t W0 = surf->phys_level0_sa.width;
const uint32_t H0 = surf->phys_level0_sa.height;
const uint32_t D0 = surf->phys_level0_sa.depth;
+ const uint32_t AL = surf->phys_level0_sa.array_len;
uint32_t x = 0;
uint32_t y = 0;
for (uint32_t l = 0; l < level; ++l) {
const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
- const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d);
+ const uint32_t level_d =
+ isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
+ image_align_sa.d);
const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
y += level_h * max_layers_vert;
@@ -1645,7 +1991,9 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
- const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d);
+ const uint32_t level_d =
+ isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
+ image_align_sa.d);
const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
@@ -1656,6 +2004,65 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
*y_offset_sa = y;
}
+static void
+get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf,
+ uint32_t level,
+ uint32_t logical_array_layer,
+ uint32_t *x_offset_sa,
+ uint32_t *y_offset_sa)
+{
+ assert(level < surf->levels);
+ assert(surf->logical_level0_px.depth == 1);
+ assert(logical_array_layer < surf->logical_level0_px.array_len);
+
+ const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
+
+ const struct isl_extent3d image_align_sa =
+ isl_surf_get_image_alignment_sa(surf);
+
+ struct isl_tile_info tile_info;
+ isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info);
+ const struct isl_extent2d tile_extent_sa = {
+ .w = tile_info.logical_extent_el.w * fmtl->bw,
+ .h = tile_info.logical_extent_el.h * fmtl->bh,
+ };
+ /* Tile size is a multiple of image alignment */
+ assert(tile_extent_sa.w % image_align_sa.w == 0);
+ assert(tile_extent_sa.h % image_align_sa.h == 0);
+
+ const uint32_t W0 = surf->phys_level0_sa.w;
+ const uint32_t H0 = surf->phys_level0_sa.h;
+
+ /* Each image has the same height as LOD0 because the hardware thinks
+ * everything is LOD0
+ */
+ const uint32_t H = isl_align(H0, image_align_sa.h);
+
+ /* Quick sanity check for consistency */
+ if (surf->phys_level0_sa.array_len > 1)
+ assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
+
+ uint32_t x = 0, y = 0;
+ for (uint32_t l = 0; l < level; ++l) {
+ const uint32_t W = isl_minify(W0, l);
+
+ const uint32_t w = isl_align(W, tile_extent_sa.w);
+ const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
+ tile_extent_sa.h);
+
+ if (l == 0) {
+ y += h;
+ } else {
+ x += w;
+ }
+ }
+
+ y += H * logical_array_layer;
+
+ *x_offset_sa = x;
+ *y_offset_sa = y;
+}
+
/**
* A variant of isl_surf_get_image_offset_sa() specific to
* ISL_DIM_LAYOUT_GEN9_1D.
@@ -1721,9 +2128,15 @@ isl_surf_get_image_offset_sa(const struct isl_surf *surf,
x_offset_sa, y_offset_sa);
break;
case ISL_DIM_LAYOUT_GEN4_3D:
- get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px,
+ get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
+ logical_z_offset_px,
x_offset_sa, y_offset_sa);
break;
+ case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
+ get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer +
+ logical_z_offset_px,
+ x_offset_sa, y_offset_sa);
+ break;
default:
unreachable("not reached");
@@ -1757,9 +2170,88 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf,
}
void
-isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
- enum isl_tiling tiling,
- uint8_t bs,
+isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
+ uint32_t level,
+ uint32_t logical_array_layer,
+ uint32_t logical_z_offset_px,
+ uint32_t *offset_B,
+ uint32_t *x_offset_sa,
+ uint32_t *y_offset_sa)
+{
+ const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
+
+ uint32_t total_x_offset_el, total_y_offset_el;
+ isl_surf_get_image_offset_el(surf, level, logical_array_layer,
+ logical_z_offset_px,
+ &total_x_offset_el,
+ &total_y_offset_el);
+
+ uint32_t x_offset_el, y_offset_el;
+ isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
+ surf->row_pitch,
+ total_x_offset_el,
+ total_y_offset_el,
+ offset_B,
+ &x_offset_el,
+ &y_offset_el);
+
+ if (x_offset_sa) {
+ *x_offset_sa = x_offset_el * fmtl->bw;
+ } else {
+ assert(x_offset_el == 0);
+ }
+
+ if (y_offset_sa) {
+ *y_offset_sa = y_offset_el * fmtl->bh;
+ } else {
+ assert(y_offset_el == 0);
+ }
+}
+
+void
+isl_surf_get_image_surf(const struct isl_device *dev,
+ const struct isl_surf *surf,
+ uint32_t level,
+ uint32_t logical_array_layer,
+ uint32_t logical_z_offset_px,
+ struct isl_surf *image_surf,
+ uint32_t *offset_B,
+ uint32_t *x_offset_sa,
+ uint32_t *y_offset_sa)
+{
+ isl_surf_get_image_offset_B_tile_sa(surf,
+ level,
+ logical_array_layer,
+ logical_z_offset_px,
+ offset_B,
+ x_offset_sa,
+ y_offset_sa);
+
+ /* Even for cube maps there will be only single face, therefore drop the
+ * corresponding flag if present.
+ */
+ const isl_surf_usage_flags_t usage =
+ surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
+
+ bool ok UNUSED;
+ ok = isl_surf_init(dev, image_surf,
+ .dim = ISL_SURF_DIM_2D,
+ .format = surf->format,
+ .width = isl_minify(surf->logical_level0_px.w, level),
+ .height = isl_minify(surf->logical_level0_px.h, level),
+ .depth = 1,
+ .levels = 1,
+ .array_len = 1,
+ .samples = surf->samples,
+ .row_pitch = surf->row_pitch,
+ .usage = usage,
+ .tiling_flags = (1 << surf->tiling));
+ assert(ok);
+}
+
+void
+isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
+ uint32_t bpb,
uint32_t row_pitch,
uint32_t total_x_offset_el,
uint32_t total_y_offset_el,
@@ -1768,17 +2260,16 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
uint32_t *y_offset_el)
{
if (tiling == ISL_TILING_LINEAR) {
+ assert(bpb % 8 == 0);
*base_address_offset = total_y_offset_el * row_pitch +
- total_x_offset_el * bs;
+ total_x_offset_el * (bpb / 8);
*x_offset_el = 0;
*y_offset_el = 0;
return;
}
- const uint32_t bpb = bs * 8;
-
struct isl_tile_info tile_info;
- isl_tiling_get_info(dev, tiling, bpb, &tile_info);
+ isl_tiling_get_info(tiling, bpb, &tile_info);
assert(row_pitch % tile_info.phys_extent_B.width == 0);
diff --git a/lib/mesa/src/intel/isl/isl.h b/lib/mesa/src/intel/isl/isl.h
index 11ad8919e..dafe95229 100644
--- a/lib/mesa/src/intel/isl/isl.h
+++ b/lib/mesa/src/intel/isl/isl.h
@@ -353,6 +353,20 @@ enum isl_format {
ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16 = 630,
ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16 = 638,
ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16 = 639,
+ ISL_FORMAT_ASTC_HDR_2D_4X4_FLT16 = 832,
+ ISL_FORMAT_ASTC_HDR_2D_5X4_FLT16 = 840,
+ ISL_FORMAT_ASTC_HDR_2D_5X5_FLT16 = 841,
+ ISL_FORMAT_ASTC_HDR_2D_6X5_FLT16 = 849,
+ ISL_FORMAT_ASTC_HDR_2D_6X6_FLT16 = 850,
+ ISL_FORMAT_ASTC_HDR_2D_8X5_FLT16 = 865,
+ ISL_FORMAT_ASTC_HDR_2D_8X6_FLT16 = 866,
+ ISL_FORMAT_ASTC_HDR_2D_8X8_FLT16 = 868,
+ ISL_FORMAT_ASTC_HDR_2D_10X5_FLT16 = 881,
+ ISL_FORMAT_ASTC_HDR_2D_10X6_FLT16 = 882,
+ ISL_FORMAT_ASTC_HDR_2D_10X8_FLT16 = 884,
+ ISL_FORMAT_ASTC_HDR_2D_10X10_FLT16 = 886,
+ ISL_FORMAT_ASTC_HDR_2D_12X10_FLT16 = 894,
+ ISL_FORMAT_ASTC_HDR_2D_12X12_FLT16 = 895,
/* The formats that follow are internal to ISL and as such don't have an
* explicit number. We'll just let the C compiler assign it for us. Any
@@ -514,6 +528,46 @@ enum isl_dim_layout {
ISL_DIM_LAYOUT_GEN4_3D,
/**
+ * Special layout used for HiZ and stencil on Sandy Bridge to work around
+ * the hardware's lack of mipmap support. On gen6, HiZ and stencil buffers
+ * work the same as on gen7+ except that they don't technically support
+ * mipmapping. That does not, however, stop us from doing it. As far as
+ * Sandy Bridge hardware is concerned, HiZ and stencil always operates on a
+ * single miplevel 2D (possibly array) image. The dimensions of that image
+ * are NOT minified.
+ *
+ * In order to implement HiZ and stencil on Sandy Bridge, we create one
+ * full-sized 2D (possibly array) image for every LOD with every image
+ * aligned to a page boundary. When the surface is used with the stencil
+ * or HiZ hardware, we manually offset to the image for the given LOD.
+ *
+ * As a memory saving measure, we pretend that the width of each miplevel
+ * is minified and we place LOD1 and above below LOD0 but horizontally
+ * adjacent to each other. When considered as full-sized images, LOD1 and
+ * above technically overlap. However, since we only write to part of that
+ * image, the hardware will never notice the overlap.
+ *
+ * This layout looks something like this:
+ *
+ * +---------+
+ * | |
+ * | |
+ * +---------+
+ * | |
+ * | |
+ * +---------+
+ *
+ * +----+ +-+ .
+ * | | +-+
+ * +----+
+ *
+ * +----+ +-+ .
+ * | | +-+
+ * +----+
+ */
+ ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ,
+
+ /**
* For details, see the Skylake BSpec >> Memory Views >> Common Surface
* Formats >> Surface Layout and Tiling >> » 1D Surfaces.
*/
@@ -546,6 +600,193 @@ enum isl_aux_usage {
ISL_AUX_USAGE_CCS_E,
};
+/**
+ * Enum for keeping track of the state an auxiliary compressed surface.
+ *
+ * For any given auxiliary surface compression format (HiZ, CCS, or MCS), any
+ * given slice (lod + array layer) can be in one of the six states described
+ * by this enum. Draw and resolve operations may cause the slice to change
+ * from one state to another. The six valid states are:
+ *
+ * 1) Clear: In this state, each block in the auxiliary surface contains a
+ * magic value that indicates that the block is in the clear state. If
+ * a block is in the clear state, it's values in the primary surface are
+ * ignored and the color of the samples in the block is taken either the
+ * RENDER_SURFACE_STATE packet for color or 3DSTATE_CLEAR_PARAMS for
+ * depth. Since neither the primary surface nor the auxiliary surface
+ * contains the clear value, the surface can be cleared to a different
+ * color by simply changing the clear color without modifying either
+ * surface.
+ *
+ * 2) Partial Clear: In this state, each block in the auxiliary surface
+ * contains either the magic clear or pass-through value. See Clear and
+ * Pass-through for more details.
+ *
+ * 3) Compressed w/ Clear: In this state, neither the auxiliary surface
+ * nor the primary surface has a complete representation of the data.
+ * Instead, both surfaces must be used together or else rendering
+ * corruption may occur. Depending on the auxiliary compression format
+ * and the data, any given block in the primary surface may contain all,
+ * some, or none of the data required to reconstruct the actual sample
+ * values. Blocks may also be in the clear state (see Clear) and have
+ * their value taken from outside the surface.
+ *
+ * 4) Compressed w/o Clear: This state is identical to the state above
+ * except that no blocks are in the clear state. In this state, all of
+ * the data required to reconstruct the final sample values is contained
+ * in the auxiliary and primary surface and the clear value is not
+ * considered.
+ *
+ * 5) Resolved: In this state, the primary surface contains 100% of the
+ * data. The auxiliary surface is also valid so the surface can be
+ * validly used with or without aux enabled. The auxiliary surface may,
+ * however, contain non-trivial data and any update to the primary
+ * surface with aux disabled will cause the two to get out of sync.
+ *
+ * 6) Pass-through: In this state, the primary surface contains 100% of the
+ * data and every block in the auxiliary surface contains a magic value
+ * which indicates that the auxiliary surface should be ignored and the
+ * only the primary surface should be considered. Updating the primary
+ * surface without aux works fine and can be done repeatedly in this
+ * mode. Writing to a surface in pass-through mode with aux enabled may
+ * cause the auxiliary buffer to contain non-trivial data and no longer
+ * be in the pass-through state.
+ *
+ * 7) Aux Invalid: In this state, the primary surface contains 100% of the
+ * data and the auxiliary surface is completely bogus. Any attempt to
+ * use the auxiliary surface is liable to result in rendering
+ * corruption. The only thing that one can do to re-enable aux once
+ * this state is reached is to use an ambiguate pass to transition into
+ * the pass-through state.
+ *
+ * Drawing with or without aux enabled may implicitly cause the surface to
+ * transition between these states. There are also four types of auxiliary
+ * compression operations which cause an explicit transition:
+ *
+ * 1) Fast Clear: This operation writes the magic "clear" value to the
+ * auxiliary surface. This operation will safely transition any slice
+ * of a surface from any state to the clear state so long as the entire
+ * slice is fast cleared at once. A fast clear that only covers part of
+ * a slice of a surface is called a partial fast clear.
+ *
+ * 2) Full Resolve: This operation combines the auxiliary surface data
+ * with the primary surface data and writes the result to the primary.
+ * For HiZ, the docs call this a depth resolve. For CCS, the hardware
+ * full resolve operation does both a full resolve and an ambiguate so
+ * it actually takes you all the way to the pass-through state.
+ *
+ * 3) Partial Resolve: This operation considers blocks which are in the
+ * "clear" state and writes the clear value directly into the primary or
+ * auxiliary surface. Once this operation completes, the surface is
+ * still compressed but no longer references the clear color. This
+ * operation is only available for CCS.
+ *
+ * 4) Ambiguate: This operation throws away the current auxiliary data and
+ * replaces it with the magic pass-through value. If an ambiguate
+ * operation is performed when the primary surface does not contain 100%
+ * of the data, data will be lost. This operation is only implemented
+ * in hardware for depth where it is called a HiZ resolve.
+ *
+ * Not all operations are valid or useful in all states. The diagram below
+ * contains a complete description of the states and all valid and useful
+ * transitions except clear.
+ *
+ * Draw w/ Aux
+ * +----------+
+ * | |
+ * | +-------------+ Draw w/ Aux +-------------+
+ * +------>| Compressed |<-------------------| Clear |
+ * | w/ Clear |----->----+ | |
+ * +-------------+ | +-------------+
+ * | /|\ | | |
+ * | | | | |
+ * | | +------<-----+ | Draw w/
+ * | | | | Clear Only
+ * | | Full | | +----------+
+ * Partial | | Resolve | \|/ | |
+ * Resolve | | | +-------------+ |
+ * | | | | Partial |<------+
+ * | | | | Clear |<----------+
+ * | | | +-------------+ |
+ * | | | | |
+ * | | +------>---------+ Full |
+ * | | | Resolve |
+ * Draw w/ aux | | Partial Fast Clear | |
+ * +----------+ | +--------------------------+ | |
+ * | | \|/ | \|/ |
+ * | +-------------+ Full Resolve +-------------+ |
+ * +------>| Compressed |------------------->| Resolved | |
+ * | w/o Clear |<-------------------| | |
+ * +-------------+ Draw w/ Aux +-------------+ |
+ * /|\ | | |
+ * | Draw | | Draw |
+ * | w/ Aux | | w/o Aux |
+ * | Ambiguate | | |
+ * | +--------------------------+ | |
+ * Draw w/o Aux | | | Draw w/o Aux |
+ * +----------+ | | | +----------+ |
+ * | | | \|/ \|/ | | |
+ * | +-------------+ Ambiguate +-------------+ | |
+ * +------>| Pass- |<-------------------| Aux |<------+ |
+ * +------>| through | | Invalid | |
+ * | +-------------+ +-------------+ |
+ * | | | |
+ * +----------+ +-----------------------------------------------------+
+ * Draw w/ Partial Fast Clear
+ * Clear Only
+ *
+ *
+ * While the above general theory applies to all forms of auxiliary
+ * compression on Intel hardware, not all states and operations are available
+ * on all compression types. However, each of the auxiliary states and
+ * operations can be fairly easily mapped onto the above diagram:
+ *
+ * HiZ: Hierarchical depth compression is capable of being in any of the
+ * states above. Hardware provides three HiZ operations: "Depth
+ * Clear", "Depth Resolve", and "HiZ Resolve" which map to "Fast
+ * Clear", "Full Resolve", and "Ambiguate" respectively. The
+ * hardware provides no HiZ partial resolve operation so the only way
+ * to get into the "Compressed w/o Clear" state is to render with HiZ
+ * when the surface is in the resolved or pass-through states.
+ *
+ * MCS: Multisample compression is technically capable of being in any of
+ * the states above except that most of them aren't useful. Both the
+ * render engine and the sampler support MCS compression and, apart
+ * from clear color, MCS is format-unaware so we leave the surface
+ * compressed 100% of the time. The hardware provides no MCS
+ * operations.
+ *
+ * CCS_D: Single-sample fast-clears (also called CCS_D in ISL) are one of
+ * the simplest forms of compression since they don't do anything
+ * beyond clear color tracking. They really only support three of
+ * the six states: Clear, Partial Clear, and Pass-through. The
+ * only CCS_D operation is "Resolve" which maps to a full resolve
+ * followed by an ambiguate.
+ *
+ * CCS_E: Single-sample render target compression (also called CCS_E in ISL)
+ * is capable of being in almost all of the above states. THe only
+ * exception is that it does not have separate resolved and pass-
+ * through states. Instead, the CCS_E full resolve operation does
+ * both a resolve and an ambiguate so it goes directly into the
+ * pass-through state. CCS_E also provides fast clear and partial
+ * resolve operations which work as described above.
+ *
+ * While it is technically possible to perform a CCS_E ambiguate, it
+ * is not provided by Sky Lake hardware so we choose to avoid the aux
+ * invalid state. If the aux invalid state were determined to be
+ * useful, a CCS ambiguate could be done by carefully rendering to
+ * the CCS and filling it with zeros.
+ */
+enum isl_aux_state {
+ ISL_AUX_STATE_CLEAR = 0,
+ ISL_AUX_STATE_PARTIAL_CLEAR,
+ ISL_AUX_STATE_COMPRESSED_CLEAR,
+ ISL_AUX_STATE_COMPRESSED_NO_CLEAR,
+ ISL_AUX_STATE_RESOLVED,
+ ISL_AUX_STATE_PASS_THROUGH,
+ ISL_AUX_STATE_AUX_INVALID,
+};
+
/* TODO(chadv): Explain */
enum isl_array_pitch_span {
ISL_ARRAY_PITCH_SPAN_FULL,
@@ -576,6 +817,21 @@ typedef uint64_t isl_surf_usage_flags_t;
/** @} */
/**
+ * @defgroup Channel Mask
+ *
+ * These #define values are chosen to match the values of
+ * RENDER_SURFACE_STATE::Color Buffer Component Write Disables
+ *
+ * @{
+ */
+typedef uint8_t isl_channel_mask_t;
+#define ISL_CHANNEL_BLUE_BIT (1 << 0)
+#define ISL_CHANNEL_GREEN_BIT (1 << 1)
+#define ISL_CHANNEL_RED_BIT (1 << 2)
+#define ISL_CHANNEL_ALPHA_BIT (1 << 3)
+/** @} */
+
+/**
* @brief A channel select (also known as texture swizzle) value
*/
enum isl_channel_select {
@@ -671,6 +927,32 @@ struct isl_device {
const struct gen_device_info *info;
bool use_separate_stencil;
bool has_bit6_swizzling;
+
+ /**
+ * Describes the layout of a RENDER_SURFACE_STATE structure for the
+ * current gen.
+ */
+ struct {
+ uint8_t size;
+ uint8_t align;
+ uint8_t addr_offset;
+ uint8_t aux_addr_offset;
+
+ /* Rounded up to the nearest dword to simplify GPU memcpy operations. */
+ uint8_t clear_value_size;
+ uint8_t clear_value_offset;
+ } ss;
+
+ /**
+ * Describes the layout of the depth/stencil/hiz commands as emitted by
+ * isl_emit_depth_stencil_hiz.
+ */
+ struct {
+ uint8_t size;
+ uint8_t depth_offset;
+ uint8_t stencil_offset;
+ uint8_t hiz_offset;
+ } ds;
};
struct isl_extent2d {
@@ -772,6 +1054,25 @@ struct isl_tile_info {
};
/**
+ * Metadata about a DRM format modifier.
+ */
+struct isl_drm_modifier_info {
+ uint64_t modifier;
+
+ /** Text name of the modifier */
+ const char *name;
+
+ /** ISL tiling implied by this modifier */
+ enum isl_tiling tiling;
+
+ /** ISL aux usage implied by this modifier */
+ enum isl_aux_usage aux_usage;
+
+ /** Whether or not this modifier supports clear color */
+ bool supports_clear_color;
+};
+
+/**
* @brief Input to surface initialization
*
* @invariant width >= 1
@@ -799,8 +1100,11 @@ struct isl_surf_init_info {
/** Lower bound for isl_surf::alignment, in bytes. */
uint32_t min_alignment;
- /** Lower bound for isl_surf::pitch, in bytes. */
- uint32_t min_pitch;
+ /**
+ * Exact value for isl_surf::row_pitch. Ignored if zero. isl_surf_init()
+ * will fail if this is misaligned or out of bounds.
+ */
+ uint32_t row_pitch;
isl_surf_usage_flags_t usage;
@@ -843,7 +1147,7 @@ struct isl_surf {
uint32_t samples;
/** Total size of the surface, in bytes. */
- uint32_t size;
+ uint64_t size;
/** Required alignment for the surface's base address. */
uint32_t alignment;
@@ -923,6 +1227,12 @@ struct isl_view {
* for texturing, they are ignored.
*/
uint32_t base_array_layer;
+
+ /**
+ * Array Length
+ *
+ * Indicates the number of array elements starting at Base Array Layer.
+ */
uint32_t array_len;
struct isl_swizzle swizzle;
@@ -964,6 +1274,11 @@ struct isl_surf_fill_state_info {
*/
union isl_color_value clear_color;
+ /**
+ * Surface write disables for gen4-5
+ */
+ isl_channel_mask_t write_disables;
+
/* Intra-tile offset */
uint16_t x_offset_sa, y_offset_sa;
};
@@ -997,6 +1312,61 @@ struct isl_buffer_fill_state_info {
uint32_t stride;
};
+struct isl_depth_stencil_hiz_emit_info {
+ /**
+ * The depth surface
+ */
+ const struct isl_surf *depth_surf;
+
+ /**
+ * The stencil surface
+ *
+ * If separate stencil is not available, this must point to the same
+ * isl_surf as depth_surf.
+ */
+ const struct isl_surf *stencil_surf;
+
+ /**
+ * The view into the depth and stencil surfaces.
+ *
+ * This view applies to both surfaces simultaneously.
+ */
+ const struct isl_view *view;
+
+ /**
+ * The address of the depth surface in GPU memory
+ */
+ uint64_t depth_address;
+
+ /**
+ * The address of the stencil surface in GPU memory
+ *
+ * If separate stencil is not available, this must have the same value as
+ * depth_address.
+ */
+ uint64_t stencil_address;
+
+ /**
+ * The Memory Object Control state for depth and stencil buffers
+ *
+ * Both depth and stencil will get the same MOCS value. The exact format
+ * of this value depends on hardware generation.
+ */
+ uint32_t mocs;
+
+ /**
+ * The HiZ surface or NULL if HiZ is disabled.
+ */
+ const struct isl_surf *hiz_surf;
+ enum isl_aux_usage hiz_usage;
+ uint64_t hiz_address;
+
+ /**
+ * The depth clear value
+ */
+ float depth_clear_value;
+};
+
extern const struct isl_format_layout isl_format_layouts[];
void
@@ -1029,11 +1399,21 @@ bool isl_format_supports_filtering(const struct gen_device_info *devinfo,
enum isl_format format);
bool isl_format_supports_vertex_fetch(const struct gen_device_info *devinfo,
enum isl_format format);
-bool isl_format_supports_lossless_compression(const struct gen_device_info *devinfo,
- enum isl_format format);
+bool isl_format_supports_typed_writes(const struct gen_device_info *devinfo,
+ enum isl_format format);
+bool isl_format_supports_typed_reads(const struct gen_device_info *devinfo,
+ enum isl_format format);
+bool isl_format_supports_ccs_d(const struct gen_device_info *devinfo,
+ enum isl_format format);
+bool isl_format_supports_ccs_e(const struct gen_device_info *devinfo,
+ enum isl_format format);
bool isl_format_supports_multisampling(const struct gen_device_info *devinfo,
enum isl_format format);
+bool isl_formats_are_ccs_e_compatible(const struct gen_device_info *devinfo,
+ enum isl_format format1,
+ enum isl_format format2);
+
bool isl_format_has_unorm_channel(enum isl_format fmt) ATTRIBUTE_CONST;
bool isl_format_has_snorm_channel(enum isl_format fmt) ATTRIBUTE_CONST;
bool isl_format_has_ufloat_channel(enum isl_format fmt) ATTRIBUTE_CONST;
@@ -1119,6 +1499,14 @@ isl_format_block_is_1x1x1(enum isl_format fmt)
}
static inline bool
+isl_format_is_srgb(enum isl_format fmt)
+{
+ return isl_format_layouts[fmt].colorspace == ISL_COLORSPACE_SRGB;
+}
+
+enum isl_format isl_format_srgb_to_linear(enum isl_format fmt);
+
+static inline bool
isl_format_is_rgb(enum isl_format fmt)
{
return isl_format_layouts[fmt].channels.r.bits > 0 &&
@@ -1155,6 +1543,15 @@ isl_tiling_is_std_y(enum isl_tiling tiling)
return (1u << tiling) & ISL_TILING_STD_Y_MASK;
}
+uint32_t
+isl_tiling_to_i915_tiling(enum isl_tiling tiling);
+
+enum isl_tiling
+isl_tiling_from_i915_tiling(uint32_t tiling);
+
+const struct isl_drm_modifier_info * ATTRIBUTE_CONST
+isl_drm_modifier_get_info(uint64_t modifier);
+
struct isl_extent2d ATTRIBUTE_CONST
isl_get_interleaved_msaa_px_size_sa(uint32_t samples);
@@ -1240,6 +1637,9 @@ isl_extent4d(uint32_t width, uint32_t height, uint32_t depth,
return e;
}
+bool isl_color_value_is_zero_one(union isl_color_value value,
+ enum isl_format format);
+
#define isl_surf_init(dev, surf, ...) \
isl_surf_init_s((dev), (surf), \
&(struct isl_surf_init_info) { __VA_ARGS__ });
@@ -1250,16 +1650,15 @@ isl_surf_init_s(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info);
void
-isl_surf_get_tile_info(const struct isl_device *dev,
- const struct isl_surf *surf,
+isl_surf_get_tile_info(const struct isl_surf *surf,
struct isl_tile_info *tile_info);
-void
+bool
isl_surf_get_hiz_surf(const struct isl_device *dev,
const struct isl_surf *surf,
struct isl_surf *hiz_surf);
-void
+bool
isl_surf_get_mcs_surf(const struct isl_device *dev,
const struct isl_surf *surf,
struct isl_surf *mcs_surf);
@@ -1267,7 +1666,8 @@ isl_surf_get_mcs_surf(const struct isl_device *dev,
bool
isl_surf_get_ccs_surf(const struct isl_device *dev,
const struct isl_surf *surf,
- struct isl_surf *ccs_surf);
+ struct isl_surf *ccs_surf,
+ uint32_t row_pitch /**< Ignored if 0 */);
#define isl_surf_fill_state(dev, state, ...) \
isl_surf_fill_state_s((dev), (state), \
@@ -1285,6 +1685,14 @@ void
isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
const struct isl_buffer_fill_state_info *restrict info);
+#define isl_emit_depth_stencil_hiz(dev, batch, ...) \
+ isl_emit_depth_stencil_hiz_s((dev), (batch), \
+ &(struct isl_depth_stencil_hiz_emit_info) { __VA_ARGS__ })
+
+void
+isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
void
isl_surf_fill_image_param(const struct isl_device *dev,
struct brw_image_param *param,
@@ -1413,6 +1821,50 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf,
uint32_t *y_offset_el);
/**
+ * Calculate the offset, in bytes and intratile surface samples, to a
+ * subimage in the surface.
+ *
+ * This is equivalent to calling isl_surf_get_image_offset_el, passing the
+ * result to isl_tiling_get_intratile_offset_el, and converting the tile
+ * offsets to samples.
+ *
+ * @invariant level < surface levels
+ * @invariant logical_array_layer < logical array length of surface
+ * @invariant logical_z_offset_px < logical depth of surface at level
+ */
+void
+isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
+ uint32_t level,
+ uint32_t logical_array_layer,
+ uint32_t logical_z_offset_px,
+ uint32_t *offset_B,
+ uint32_t *x_offset_sa,
+ uint32_t *y_offset_sa);
+
+/**
+ * Create an isl_surf that represents a particular subimage in the surface.
+ *
+ * The newly created surface will have a single miplevel and array slice. The
+ * surface lives at the returned byte and intratile offsets, in samples.
+ *
+ * It is safe to call this function with surf == image_surf.
+ *
+ * @invariant level < surface levels
+ * @invariant logical_array_layer < logical array length of surface
+ * @invariant logical_z_offset_px < logical depth of surface at level
+ */
+void
+isl_surf_get_image_surf(const struct isl_device *dev,
+ const struct isl_surf *surf,
+ uint32_t level,
+ uint32_t logical_array_layer,
+ uint32_t logical_z_offset_px,
+ struct isl_surf *image_surf,
+ uint32_t *offset_B,
+ uint32_t *x_offset_sa,
+ uint32_t *y_offset_sa);
+
+/**
* @brief Calculate the intratile offsets to a surface.
*
* In @a base_address_offset return the offset from the base of the surface to
@@ -1423,9 +1875,8 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf,
* surface's tiling format.
*/
void
-isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
- enum isl_tiling tiling,
- uint8_t bs,
+isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
+ uint32_t bpb,
uint32_t row_pitch,
uint32_t total_x_offset_el,
uint32_t total_y_offset_el,
@@ -1434,8 +1885,7 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
uint32_t *y_offset_el);
static inline void
-isl_tiling_get_intratile_offset_sa(const struct isl_device *dev,
- enum isl_tiling tiling,
+isl_tiling_get_intratile_offset_sa(enum isl_tiling tiling,
enum isl_format format,
uint32_t row_pitch,
uint32_t total_x_offset_sa,
@@ -1446,8 +1896,6 @@ isl_tiling_get_intratile_offset_sa(const struct isl_device *dev,
{
const struct isl_format_layout *fmtl = isl_format_get_layout(format);
- assert(fmtl->bpb % 8 == 0);
-
/* For computing the intratile offsets, we actually want a strange unit
* which is samples for multisampled surfaces but elements for compressed
* surfaces.
@@ -1457,7 +1905,7 @@ isl_tiling_get_intratile_offset_sa(const struct isl_device *dev,
const uint32_t total_x_offset = total_x_offset_sa / fmtl->bw;
const uint32_t total_y_offset = total_y_offset_sa / fmtl->bh;
- isl_tiling_get_intratile_offset_el(dev, tiling, fmtl->bpb / 8, row_pitch,
+ isl_tiling_get_intratile_offset_el(tiling, fmtl->bpb, row_pitch,
total_x_offset, total_y_offset,
base_address_offset,
x_offset_sa, y_offset_sa);
diff --git a/lib/mesa/src/intel/isl/isl_emit_depth_stencil.c b/lib/mesa/src/intel/isl/isl_emit_depth_stencil.c
new file mode 100644
index 000000000..0d541fd1c
--- /dev/null
+++ b/lib/mesa/src/intel/isl/isl_emit_depth_stencil.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#define __gen_address_type uint64_t
+#define __gen_user_data void
+
+static inline uint64_t
+__gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta)
+{
+ return addr + delta;
+}
+
+#include "genxml/gen_macros.h"
+#include "genxml/genX_pack.h"
+
+#include "isl_priv.h"
+
+#define __PASTE2(x, y) x ## y
+#define __PASTE(x, y) __PASTE2(x, y)
+#define isl_genX(x) __PASTE(isl_, genX(x))
+
+static const uint32_t isl_to_gen_ds_surftype[] = {
+#if GEN_GEN >= 9
+ /* From the SKL PRM, "3DSTATE_DEPTH_STENCIL::SurfaceType":
+ *
+ * "If depth/stencil is enabled with 1D render target, depth/stencil
+ * surface type needs to be set to 2D surface type and height set to 1.
+ * Depth will use (legacy) TileY and stencil will use TileW. For this
+ * case only, the Surface Type of the depth buffer can be 2D while the
+ * Surface Type of the render target(s) are 1D, representing an
+ * exception to a programming note above.
+ */
+ [ISL_SURF_DIM_1D] = SURFTYPE_2D,
+#else
+ [ISL_SURF_DIM_1D] = SURFTYPE_1D,
+#endif
+ [ISL_SURF_DIM_2D] = SURFTYPE_2D,
+ [ISL_SURF_DIM_3D] = SURFTYPE_3D,
+};
+
+void
+isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info)
+{
+ struct GENX(3DSTATE_DEPTH_BUFFER) db = {
+ GENX(3DSTATE_DEPTH_BUFFER_header),
+ };
+
+ if (info->depth_surf) {
+ db.SurfaceType = isl_to_gen_ds_surftype[info->depth_surf->dim];
+ db.SurfaceFormat = isl_surf_get_depth_format(dev, info->depth_surf);
+ db.Width = info->depth_surf->logical_level0_px.width - 1;
+ db.Height = info->depth_surf->logical_level0_px.height - 1;
+ } else if (info->stencil_surf) {
+ db.SurfaceType = isl_to_gen_ds_surftype[info->stencil_surf->dim];
+ db.SurfaceFormat = D32_FLOAT;
+ db.Width = info->stencil_surf->logical_level0_px.width - 1;
+ db.Height = info->stencil_surf->logical_level0_px.height - 1;
+ } else {
+ db.SurfaceType = SURFTYPE_NULL;
+ db.SurfaceFormat = D32_FLOAT;
+ }
+
+ if (info->depth_surf || info->stencil_surf) {
+ /* These are based entirely on the view */
+ db.Depth = db.RenderTargetViewExtent = info->view->array_len - 1;
+ db.LOD = info->view->base_level;
+ db.MinimumArrayElement = info->view->base_array_layer;
+ }
+
+ if (info->depth_surf) {
+#if GEN_GEN >= 7
+ db.DepthWriteEnable = true;
+#endif
+ db.SurfaceBaseAddress = info->depth_address;
+#if GEN_GEN >= 6
+ db.DepthBufferMOCS = info->mocs;
+#endif
+
+#if GEN_GEN <= 6
+ db.TiledSurface = info->depth_surf->tiling != ISL_TILING_LINEAR;
+ db.TileWalk = info->depth_surf->tiling == ISL_TILING_Y0 ? TILEWALK_YMAJOR :
+ TILEWALK_XMAJOR;
+ db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
+#endif
+
+ db.SurfacePitch = info->depth_surf->row_pitch - 1;
+#if GEN_GEN >= 8
+ db.SurfaceQPitch =
+ isl_surf_get_array_pitch_el_rows(info->depth_surf) >> 2;
+#endif
+ }
+
+#if GEN_GEN == 5 || GEN_GEN == 6
+ const bool separate_stencil =
+ info->stencil_surf && info->stencil_surf->format == ISL_FORMAT_R8_UINT;
+ if (separate_stencil || info->hiz_usage == ISL_AUX_USAGE_HIZ) {
+ assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
+ db.SeparateStencilBufferEnable = true;
+ db.HierarchicalDepthBufferEnable = true;
+ }
+#endif
+
+#if GEN_GEN >= 6
+ struct GENX(3DSTATE_STENCIL_BUFFER) sb = {
+ GENX(3DSTATE_STENCIL_BUFFER_header),
+ };
+#else
+# define sb db
+#endif
+
+ if (info->stencil_surf) {
+#if GEN_GEN >= 7
+ db.StencilWriteEnable = true;
+#endif
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+ sb.StencilBufferEnable = true;
+#endif
+ sb.SurfaceBaseAddress = info->stencil_address;
+#if GEN_GEN >= 6
+ sb.StencilBufferMOCS = info->mocs;
+#endif
+ sb.SurfacePitch = info->stencil_surf->row_pitch - 1;
+#if GEN_GEN >= 8
+ sb.SurfaceQPitch =
+ isl_surf_get_array_pitch_el_rows(info->stencil_surf) >> 2;
+#endif
+ }
+
+#if GEN_GEN >= 6
+ struct GENX(3DSTATE_HIER_DEPTH_BUFFER) hiz = {
+ GENX(3DSTATE_HIER_DEPTH_BUFFER_header),
+ };
+ struct GENX(3DSTATE_CLEAR_PARAMS) clear = {
+ GENX(3DSTATE_CLEAR_PARAMS_header),
+ };
+
+ assert(info->hiz_usage == ISL_AUX_USAGE_NONE ||
+ info->hiz_usage == ISL_AUX_USAGE_HIZ);
+ if (info->hiz_usage == ISL_AUX_USAGE_HIZ) {
+ db.HierarchicalDepthBufferEnable = true;
+
+ hiz.SurfaceBaseAddress = info->hiz_address;
+ hiz.HierarchicalDepthBufferMOCS = info->mocs;
+ hiz.SurfacePitch = info->hiz_surf->row_pitch - 1;
+#if GEN_GEN >= 8
+ /* From the SKL PRM Vol2a:
+ *
+ * The interpretation of this field is dependent on Surface Type
+ * as follows:
+ * - SURFTYPE_1D: distance in pixels between array slices
+ * - SURFTYPE_2D/CUBE: distance in rows between array slices
+ * - SURFTYPE_3D: distance in rows between R - slices
+ *
+ * Unfortunately, the docs aren't 100% accurate here. They fail to
+ * mention that the 1-D rule only applies to linear 1-D images.
+ * Since depth and HiZ buffers are always tiled, they are treated as
+ * 2-D images. Prior to Sky Lake, this field is always in rows.
+ */
+ hiz.SurfaceQPitch =
+ isl_surf_get_array_pitch_sa_rows(info->hiz_surf) >> 2;
+#endif
+
+ clear.DepthClearValueValid = true;
+#if GEN_GEN >= 8
+ clear.DepthClearValue = info->depth_clear_value;
+#else
+ switch (info->depth_surf->format) {
+ case ISL_FORMAT_R32_FLOAT: {
+ union { float f; uint32_t u; } fu;
+ fu.f = info->depth_clear_value;
+ clear.DepthClearValue = fu.u;
+ break;
+ }
+ case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
+ clear.DepthClearValue = info->depth_clear_value * ((1u << 24) - 1);
+ break;
+ case ISL_FORMAT_R16_UNORM:
+ clear.DepthClearValue = info->depth_clear_value * ((1u << 16) - 1);
+ break;
+ default:
+ unreachable("Invalid depth type");
+ }
+#endif
+ }
+#endif /* GEN_GEN >= 6 */
+
+ /* Pack everything into the batch */
+ uint32_t *dw = batch;
+ GENX(3DSTATE_DEPTH_BUFFER_pack)(NULL, dw, &db);
+ dw += GENX(3DSTATE_DEPTH_BUFFER_length);
+
+#if GEN_GEN >= 6
+ GENX(3DSTATE_STENCIL_BUFFER_pack)(NULL, dw, &sb);
+ dw += GENX(3DSTATE_STENCIL_BUFFER_length);
+
+ GENX(3DSTATE_HIER_DEPTH_BUFFER_pack)(NULL, dw, &hiz);
+ dw += GENX(3DSTATE_HIER_DEPTH_BUFFER_length);
+
+ GENX(3DSTATE_CLEAR_PARAMS_pack)(NULL, dw, &clear);
+ dw += GENX(3DSTATE_CLEAR_PARAMS_length);
+#endif
+}
diff --git a/lib/mesa/src/intel/isl/isl_format.c b/lib/mesa/src/intel/isl/isl_format.c
index 1a6727b50..435b0d003 100644
--- a/lib/mesa/src/intel/isl/isl_format.c
+++ b/lib/mesa/src/intel/isl/isl_format.c
@@ -37,14 +37,16 @@ struct surface_format_info {
uint8_t input_vb;
uint8_t streamed_output_vb;
uint8_t color_processing;
- uint8_t lossless_compression;
+ uint8_t typed_write;
+ uint8_t typed_read;
+ uint8_t ccs_e;
};
/* This macro allows us to write the table almost as it appears in the PRM,
* while restructuring it to turn it into the C code we want.
*/
-#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, sf) \
- [ISL_FORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e},
+#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, tw, tr, ccs_e, sf) \
+ [ISL_FORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, tw, tr, ccs_e},
#define Y 0
#define x 255
@@ -86,259 +88,273 @@ struct surface_format_info {
* - Render Target Surface Types [SKL+]
*/
static const struct surface_format_info format_info[] = {
-/* smpl filt shad CK RT AB VB SO color ccs_e */
- SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT)
- SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_UINT)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R64G64_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, x, R32G32B32X32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_USCALED)
- SF( x, x, x, x, x, x, 75, x, x, x, R32G32B32A32_SFIXED)
- SF( x, x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU)
- SF( Y, 50, x, x, x, x, Y, Y, x, x, R32G32B32_FLOAT)
- SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_SINT)
- SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_UINT)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_USCALED)
- SF( x, x, x, x, x, x, 75, x, x, x, R32G32B32_SFIXED)
- SF( Y, Y, x, x, Y, 45, Y, x, 60, 90, R16G16B16A16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16B16A16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16B16A16_FLOAT)
- SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32_FLOAT)
- SF( Y, 70, x, x, Y, Y, Y, Y, x, x, R32G32_FLOAT_LD)
- SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_UINT)
- SF( Y, 50, Y, x, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS)
- SF( Y, x, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT)
- SF( Y, 50, x, x, x, x, x, x, x, x, L32A32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R64_FLOAT)
- SF( Y, Y, x, x, x, x, x, x, x, x, R16G16B16X16_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, 90, R16G16B16X16_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, x, A32X32_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, x, L32X32_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, x, I32X32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R32G32_USCALED)
- SF( x, x, x, x, x, x, 75, x, x, x, R32G32_SFIXED)
- SF( x, x, x, x, x, x, x, x, x, x, R64_PASSTHRU)
- SF( Y, Y, x, Y, Y, Y, Y, x, 60, 90, B8G8R8A8_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, x, B8G8R8A8_UNORM_SRGB)
-/* smpl filt shad CK RT AB VB SO color ccs_e */
- SF( Y, Y, x, x, Y, Y, Y, x, 60, x, R10G10B10A2_UNORM)
- SF( Y, Y, x, x, x, x, x, x, 60, x, R10G10B10A2_UNORM_SRGB)
- SF( Y, x, x, x, Y, x, Y, x, x, x, R10G10B10A2_UINT)
- SF( Y, Y, x, x, x, x, Y, x, x, x, R10G10B10_SNORM_A2_UNORM)
- SF( Y, Y, x, x, Y, Y, Y, x, 60, 90, R8G8B8A8_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, 60, x, R8G8B8A8_UNORM_SRGB)
- SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R8G8B8A8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_UINT)
- SF( Y, Y, x, x, Y, 45, Y, x, x, 90, R16G16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16_FLOAT)
- SF( Y, Y, x, x, Y, Y, 75, x, 60, x, B10G10R10A2_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM_SRGB)
- SF( Y, Y, x, x, Y, Y, Y, x, x, x, R11G11B10_FLOAT)
- SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_UINT)
- SF( Y, 50, Y, x, Y, Y, Y, Y, x, 90, R32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS)
- SF( Y, x, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT)
- SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, x, I24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, x, L24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, x, A24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, x, I32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, x, L32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, x, A32_FLOAT)
- SF( Y, Y, x, Y, 80, 80, x, x, 60, 90, B8G8R8X8_UNORM)
- SF( Y, Y, x, x, 80, 80, x, x, x, x, B8G8R8X8_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP)
- SF( Y, Y, x, x, x, x, x, x, x, x, B10G10R10X2_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, x, R32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R32_SNORM)
-/* smpl filt shad CK RT AB VB SO color ccs_e */
- SF( x, x, x, x, x, x, Y, x, x, x, R10G10B10X2_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R16G16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R16G16_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R32_USCALED)
- SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G6R5_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G6R5_UNORM_SRGB)
- SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G5R5A1_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G5R5A1_UNORM_SRGB)
- SF( Y, Y, x, Y, Y, Y, x, x, x, x, B4G4R4A4_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, x, B4G4R4A4_UNORM_SRGB)
- SF( Y, Y, x, x, Y, Y, Y, x, x, x, R8G8_UNORM)
- SF( Y, Y, x, Y, Y, 60, Y, x, x, x, R8G8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_UINT)
- SF( Y, Y, Y, x, Y, 45, Y, x, 70, x, R16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, x, R16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, x, R16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, x, R16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, x, R16_FLOAT)
- SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0)
- SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1)
- SF( Y, Y, Y, x, x, x, x, x, x, x, I16_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, x, L16_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, x, A16_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, x, L8A8_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, x, I16_FLOAT)
- SF( Y, Y, Y, x, x, x, x, x, x, x, L16_FLOAT)
- SF( Y, Y, Y, x, x, x, x, x, x, x, A16_FLOAT)
- SF(45, 45, x, x, x, x, x, x, x, x, L8A8_UNORM_SRGB)
- SF( Y, Y, x, Y, x, x, x, x, x, x, R5G5_SNORM_B6_UNORM)
- SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM)
- SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM_SRGB)
- SF( x, x, x, x, x, x, Y, x, x, x, R8G8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R8G8_USCALED)
-/* smpl filt shad CK RT AB VB SO color ccs_e */
- SF( x, x, x, x, x, x, Y, x, x, x, R16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R16_USCALED)
- SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0)
- SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1)
- SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM)
+/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */
+ SF( Y, 50, x, x, Y, Y, Y, Y, x, 70, 90, 90, R32G32B32A32_FLOAT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 70, 90, 90, R32G32B32A32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 70, 90, 90, R32G32B32A32_UINT)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32A32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32A32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R64G64_FLOAT)
+ SF( Y, 50, x, x, 100, 100, x, x, x, x, x, 100, R32G32B32X32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32A32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32A32_USCALED)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, R32G32B32A32_SFIXED)
+ SF( x, x, x, x, x, x, 80, x, x, x, x, x, R64G64_PASSTHRU)
+ SF( Y, 50, x, x, x, x, Y, Y, x, x, x, x, R32G32B32_FLOAT)
+ SF( Y, x, x, x, x, x, Y, Y, x, x, x, x, R32G32B32_SINT)
+ SF( Y, x, x, x, x, x, Y, Y, x, x, x, x, R32G32B32_UINT)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32B32_USCALED)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, R32G32B32_SFIXED)
+ SF( Y, Y, x, x, Y, 45, Y, x, 60, 70, x, 90, R16G16B16A16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, 90, R16G16B16A16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, 90, R16G16B16A16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, 90, R16G16B16A16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, 70, 90, 90, R16G16B16A16_FLOAT)
+ SF( Y, 50, x, x, Y, Y, Y, Y, x, 70, 90, 90, R32G32_FLOAT)
+ SF( Y, 70, x, x, Y, Y, Y, Y, x, x, x, x, R32G32_FLOAT_LD)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 70, 90, 90, R32G32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 70, 90, 90, R32G32_UINT)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS)
+ SF( Y, x, x, x, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, x, x, L32A32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R64_FLOAT)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R16G16B16X16_UNORM)
+ SF( Y, Y, x, x, 90, 90, x, x, x, x, x, 90, R16G16B16X16_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, x, x, A32X32_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, x, x, L32X32_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, x, x, I32X32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16B16A16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16B16A16_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32G32_USCALED)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, R32G32_SFIXED)
+ SF( x, x, x, x, x, x, 80, x, x, x, x, x, R64_PASSTHRU)
+ SF( Y, Y, x, Y, Y, Y, Y, x, 60, 70, x, 90, B8G8R8A8_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, x, x, 100, B8G8R8A8_UNORM_SRGB)
+/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */
+ SF( Y, Y, x, x, Y, Y, Y, x, 60, 70, x, 100, R10G10B10A2_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, 60, x, x, x, R10G10B10A2_UNORM_SRGB)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, x, 100, R10G10B10A2_UINT)
+ SF( Y, Y, x, x, x, x, Y, x, x, x, x, x, R10G10B10_SNORM_A2_UNORM)
+ SF( Y, Y, x, x, Y, Y, Y, x, 60, 70, x, 90, R8G8B8A8_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, 60, x, x, 100, R8G8B8A8_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, 90, R8G8B8A8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, 90, R8G8B8A8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, 90, R8G8B8A8_UINT)
+ SF( Y, Y, x, x, Y, 45, Y, x, x, 70, x, 90, R16G16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, 90, R16G16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, 90, R16G16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, 90, R16G16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, 70, 90, 90, R16G16_FLOAT)
+ SF( Y, Y, x, x, Y, Y, 75, x, 60, 70, x, 100, B10G10R10A2_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, 60, x, x, 100, B10G10R10A2_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, 70, x, 100, R11G11B10_FLOAT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 70, 70, 90, R32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 70, 70, 90, R32_UINT)
+ SF( Y, 50, Y, x, Y, Y, Y, Y, x, 70, 70, 90, R32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS)
+ SF( Y, x, x, x, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, L16A16_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, I24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, L24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, A24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, I32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, L32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, x, x, A32_FLOAT)
+ SF( Y, Y, x, Y, 80, 80, x, x, 60, x, x, 90, B8G8R8X8_UNORM)
+ SF( Y, Y, x, x, 80, 80, x, x, x, x, x, 100, B8G8R8X8_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, B10G10R10X2_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, L16A16_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32_SNORM)
+/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R10G10B10X2_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8B8A8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8B8A8_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R32_USCALED)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, 70, x, x, B5G6R5_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, x, x, x, B5G6R5_UNORM_SRGB)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, 70, x, x, B5G5R5A1_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, x, x, x, B5G5R5A1_UNORM_SRGB)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, 70, x, x, B4G4R4A4_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, x, x, x, B4G4R4A4_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, 70, x, x, R8G8_UNORM)
+ SF( Y, Y, x, Y, Y, 60, Y, x, x, 70, x, x, R8G8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, x, R8G8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, x, R8G8_UINT)
+ SF( Y, Y, Y, x, Y, 45, Y, x, 70, 70, x, x, R16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, x, R16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, x, R16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, x, R16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, 70, 90, x, R16_FLOAT)
+ SF( 50, 50, x, x, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0)
+ SF( 50, 50, x, x, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, I16_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, L16_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, A16_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, L8A8_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, I16_FLOAT)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, L16_FLOAT)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, x, x, A16_FLOAT)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, L8A8_UNORM_SRGB)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, R5G5_SNORM_B6_UNORM)
+ SF( x, x, x, x, Y, Y, x, x, x, 70, x, x, B5G5R5X1_UNORM)
+ SF( x, x, x, x, Y, Y, x, x, x, x, x, x, B5G5R5X1_UNORM_SRGB)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8_USCALED)
+/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16_USCALED)
+ SF( 50, 50, x, x, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0)
+ SF( 50, 50, x, x, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1)
+ SF( x, x, x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM)
/* According to the PRM, A4B4G4R4_UNORM isn't supported until Sky Lake
* but empirical testing indicates that at least sampling works just fine
* on Broadwell.
*/
- SF(80, 80, x, x, 90, x, x, x, x, x, A4B4G4R4_UNORM)
- SF(90, x, x, x, x, x, x, x, x, x, L8A8_UINT)
- SF(90, x, x, x, x, x, x, x, x, x, L8A8_SINT)
- SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, x, R8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, x, R8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, x, R8_UINT)
- SF( Y, Y, x, Y, Y, Y, x, x, x, x, A8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, x, I8_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, x, L8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE0)
- SF( Y, Y, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE0)
- SF( x, x, x, x, x, x, Y, x, x, x, R8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R8_USCALED)
- SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE0)
- SF(45, 45, x, x, x, x, x, x, x, x, L8_UNORM_SRGB)
- SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE1)
- SF(45, 45, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1)
- SF(45, 45, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1)
- SF( x, x, x, x, x, x, x, x, x, x, Y8_UNORM)
- SF(90, x, x, x, x, x, x, x, x, x, L8_UINT)
- SF(90, x, x, x, x, x, x, x, x, x, L8_SINT)
- SF(90, x, x, x, x, x, x, x, x, x, I8_UINT)
- SF(90, x, x, x, x, x, x, x, x, x, I8_SINT)
- SF(45, 45, x, x, x, x, x, x, x, x, DXT1_RGB_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, x, R1_UNORM)
- SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_NORMAL)
- SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_SWAPUVY)
- SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE0)
- SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE1)
- SF( Y, Y, x, Y, x, x, x, x, x, x, BC1_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, x, BC2_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, x, BC3_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, x, BC4_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, x, BC5_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, x, BC1_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, x, BC2_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, x, BC3_UNORM_SRGB)
- SF( Y, x, x, x, x, x, x, x, x, x, MONO8)
- SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPUV)
- SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPY)
- SF( Y, Y, x, x, x, x, x, x, x, x, DXT1_RGB)
-/* smpl filt shad CK RT AB VB SO color ccs_e */
- SF( Y, Y, x, x, x, x, x, x, x, x, FXT1)
- SF(75, 75, x, x, x, x, Y, x, x, x, R8G8B8_UNORM)
- SF(75, 75, x, x, x, x, Y, x, x, x, R8G8B8_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64A64_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64_FLOAT)
- SF( Y, Y, x, x, x, x, x, x, x, x, BC4_SNORM)
- SF( Y, Y, x, x, x, x, x, x, x, x, BC5_SNORM)
- SF(50, 50, x, x, x, x, 60, x, x, x, R16G16B16_FLOAT)
- SF(75, 75, x, x, x, x, Y, x, x, x, R16G16B16_UNORM)
- SF(75, 75, x, x, x, x, Y, x, x, x, R16G16B16_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_USCALED)
- SF(70, 70, x, x, x, x, x, x, x, x, BC6H_SF16)
- SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM)
- SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB)
- SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16)
- SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8)
- SF(75, 75, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ETC1_RGB8)
- SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8)
- SF(80, 80, x, x, x, x, x, x, x, x, EAC_R11)
- SF(80, 80, x, x, x, x, x, x, x, x, EAC_RG11)
- SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_R11)
- SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11)
- SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8)
- SF(90, x, x, x, x, x, 75, x, x, x, R16G16B16_UINT)
- SF(90, x, x, x, x, x, 75, x, x, x, R16G16B16_SINT)
- SF( x, x, x, x, x, x, 75, x, x, x, R32_SFIXED)
- SF( x, x, x, x, x, x, 75, x, x, x, R10G10B10A2_SNORM)
- SF( x, x, x, x, x, x, 75, x, x, x, R10G10B10A2_USCALED)
- SF( x, x, x, x, x, x, 75, x, x, x, R10G10B10A2_SSCALED)
- SF( x, x, x, x, x, x, 75, x, x, x, R10G10B10A2_SINT)
- SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_SNORM)
- SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_USCALED)
- SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_SSCALED)
- SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_UINT)
- SF( x, x, x, x, x, x, 75, x, x, x, B10G10R10A2_SINT)
- SF( x, x, x, x, x, x, 80, x, x, x, R64G64B64A64_PASSTHRU)
- SF( x, x, x, x, x, x, 80, x, x, x, R64G64B64_PASSTHRU)
- SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA)
- SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA)
- SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8)
- SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
- SF(90, x, x, x, x, x, 75, x, x, x, R8G8B8_UINT)
- SF(90, x, x, x, x, x, 75, x, x, x, R8G8B8_SINT)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4X4_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X4_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X5_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X5_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X6_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X5_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X6_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X8_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X5_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X6_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X8_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X10_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X10_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X12_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4X4_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X4_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X5_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X5_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X6_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X5_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X6_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X8_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X5_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X6_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X8_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X10_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X10_U8SRGB)
- SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X12_U8SRGB)
+ SF( 80, 80, x, x, 90, x, x, x, x, x, x, x, A4B4G4R4_UNORM)
+ SF( 90, x, x, x, x, x, x, x, x, x, x, x, L8A8_UINT)
+ SF( 90, x, x, x, x, x, x, x, x, x, x, x, L8A8_SINT)
+ SF( Y, Y, x, 45, Y, Y, Y, x, x, 70, x, x, R8_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, 70, x, x, R8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 90, x, R8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 70, 75, x, R8_UINT)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, 70, x, x, A8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, I8_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, L8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE0)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE0)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8_USCALED)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE0)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, L8_UNORM_SRGB)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE1)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1)
+ SF( x, x, x, x, x, x, x, x, x, x, x, x, Y8_UNORM)
+ SF( 90, x, x, x, x, x, x, x, x, x, x, x, L8_UINT)
+ SF( 90, x, x, x, x, x, x, x, x, x, x, x, L8_SINT)
+ SF( 90, x, x, x, x, x, x, x, x, x, x, x, I8_UINT)
+ SF( 90, x, x, x, x, x, x, x, x, x, x, x, I8_SINT)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, DXT1_RGB_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, R1_UNORM)
+ SF( Y, Y, x, Y, Y, x, x, x, 60, x, x, x, YCRCB_NORMAL)
+ SF( Y, Y, x, Y, Y, x, x, x, 60, x, x, x, YCRCB_SWAPUVY)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE0)
+ SF( 45, 45, x, x, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE1)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, BC1_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, BC2_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, x, x, BC3_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC4_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC5_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC1_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC2_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC3_UNORM_SRGB)
+ SF( Y, x, x, x, x, x, x, x, x, x, x, x, MONO8)
+ SF( Y, Y, x, x, Y, x, x, x, 60, x, x, x, YCRCB_SWAPUV)
+ SF( Y, Y, x, x, Y, x, x, x, 60, x, x, x, YCRCB_SWAPY)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, DXT1_RGB)
+/* smpl filt shad CK RT AB VB SO color TW TR ccs_e */
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, FXT1)
+ SF( 75, 75, x, x, x, x, Y, x, x, x, x, x, R8G8B8_UNORM)
+ SF( 75, 75, x, x, x, x, Y, x, x, x, x, x, R8G8B8_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8B8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R8G8B8_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R64G64B64A64_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R64G64B64_FLOAT)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC4_SNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, x, x, BC5_SNORM)
+ SF( 50, 50, x, x, x, x, 60, x, x, x, x, x, R16G16B16_FLOAT)
+ SF( 75, 75, x, x, x, x, Y, x, x, x, x, x, R16G16B16_UNORM)
+ SF( 75, 75, x, x, x, x, Y, x, x, x, x, x, R16G16B16_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16B16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, x, x, R16G16B16_USCALED)
+ SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC6H_SF16)
+ SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC7_UNORM)
+ SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB)
+ SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC6H_UF16)
+ SF( x, x, x, x, x, x, x, x, x, x, x, x, PLANAR_420_8)
+ SF( 75, 75, x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC1_RGB8)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_RGB8)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_R11)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_RG11)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8)
+ SF( 90, x, x, x, x, x, 75, x, x, x, x, x, R16G16B16_UINT)
+ SF( 90, x, x, x, x, x, 75, x, x, x, x, x, R16G16B16_SINT)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, R32_SFIXED)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, R10G10B10A2_SNORM)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, R10G10B10A2_USCALED)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, R10G10B10A2_SSCALED)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, R10G10B10A2_SINT)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_SNORM)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_USCALED)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_SSCALED)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_UINT)
+ SF( x, x, x, x, x, x, 75, x, x, x, x, x, B10G10R10A2_SINT)
+ SF( x, x, x, x, x, x, 80, x, x, x, x, x, R64G64B64A64_PASSTHRU)
+ SF( x, x, x, x, x, x, 80, x, x, x, x, x, R64G64B64_PASSTHRU)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8)
+ SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
+ SF( 90, x, x, x, x, x, 75, x, x, x, x, x, R8G8B8_UINT)
+ SF( 90, x, x, x, x, x, 75, x, x, x, x, x, R8G8B8_SINT)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4X4_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X4_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X5_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X5_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X6_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X5_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X6_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X8_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X5_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X6_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X8_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X10_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X10_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X12_FLT16)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4X4_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X4_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5X5_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X5_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6X6_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X5_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X6_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8X8_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X5_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X6_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X8_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10X10_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X10_U8SRGB)
+ SF( 90, 90, x, x, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12X12_U8SRGB)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_4X4_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_5X4_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_5X5_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_6X5_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_6X6_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_8X5_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_8X6_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_8X8_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_10X5_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_10X6_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_10X8_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_10X10_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_12X10_FLT16)
+ SF(100, 100, x, x, x, x, x, x, x, x, x, x, ASTC_HDR_2D_12X12_FLT16)
};
#undef x
#undef Y
@@ -383,6 +399,20 @@ isl_format_supports_sampling(const struct gen_device_info *devinfo,
*/
if (fmtl->txc == ISL_TXC_ETC1 || fmtl->txc == ISL_TXC_ETC2)
return true;
+ } else if (devinfo->is_cherryview) {
+ const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+ /* Support for ASTC LDR exists on Cherry View even though big-core
+ * GPUs didn't get it until Skylake.
+ */
+ if (fmtl->txc == ISL_TXC_ASTC)
+ return format < ISL_FORMAT_ASTC_HDR_2D_4X4_FLT16;
+ } else if (gen_device_info_is_9lp(devinfo)) {
+ const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+ /* Support for ASTC HDR exists on Broxton even though big-core
+ * GPUs didn't get it until Cannonlake.
+ */
+ if (fmtl->txc == ISL_TXC_ASTC)
+ return true;
}
return format_gen(devinfo) >= format_info[format].sampling;
@@ -402,6 +432,20 @@ isl_format_supports_filtering(const struct gen_device_info *devinfo,
*/
if (fmtl->txc == ISL_TXC_ETC1 || fmtl->txc == ISL_TXC_ETC2)
return true;
+ } else if (devinfo->is_cherryview) {
+ const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+ /* Support for ASTC LDR exists on Cherry View even though big-core
+ * GPUs didn't get it until Skylake.
+ */
+ if (fmtl->txc == ISL_TXC_ASTC)
+ return format < ISL_FORMAT_ASTC_HDR_2D_4X4_FLT16;
+ } else if (gen_device_info_is_9lp(devinfo)) {
+ const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+ /* Support for ASTC HDR exists on Broxton even though big-core
+ * GPUs didn't get it until Cannonlake.
+ */
+ if (fmtl->txc == ISL_TXC_ASTC)
+ return true;
}
return format_gen(devinfo) >= format_info[format].filtering;
@@ -423,14 +467,76 @@ isl_format_supports_vertex_fetch(const struct gen_device_info *devinfo,
return format_gen(devinfo) >= format_info[format].input_vb;
}
+/**
+ * Returns true if the given format can support typed writes.
+ */
+bool
+isl_format_supports_typed_writes(const struct gen_device_info *devinfo,
+ enum isl_format format)
+{
+ if (!format_info[format].exists)
+ return false;
+
+ return format_gen(devinfo) >= format_info[format].typed_write;
+}
+
+
+/**
+ * Returns true if the given format can support typed reads with format
+ * conversion fully handled by hardware. On Sky Lake, all formats which are
+ * supported for typed writes also support typed reads but some of them return
+ * the raw image data and don't provide format conversion.
+ *
+ * For anyone looking to find this data in the PRM, the easiest way to find
+ * format tables is to search for R11G11B10. There are only a few
+ * occurrences.
+ */
+bool
+isl_format_supports_typed_reads(const struct gen_device_info *devinfo,
+ enum isl_format format)
+{
+ if (!format_info[format].exists)
+ return false;
+
+ return format_gen(devinfo) >= format_info[format].typed_read;
+}
+
+/**
+ * Returns true if the given format can support single-sample fast clears.
+ * This function only checks the format. In order to determine if a surface
+ * supports CCS_E, several other factors need to be considered such as tiling
+ * and sample count. See isl_surf_get_ccs_surf for details.
+ */
+bool
+isl_format_supports_ccs_d(const struct gen_device_info *devinfo,
+ enum isl_format format)
+{
+ /* Fast clears were first added on Ivy Bridge */
+ if (devinfo->gen < 7)
+ return false;
+
+ if (!isl_format_supports_rendering(devinfo, format))
+ return false;
+
+ const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+
+ return fmtl->bpb == 32 || fmtl->bpb == 64 || fmtl->bpb == 128;
+}
+
+/**
+ * Returns true if the given format can support single-sample color
+ * compression. This function only checks the format. In order to determine
+ * if a surface supports CCS_E, several other factors need to be considered
+ * such as tiling and sample count. See isl_surf_get_ccs_surf for details.
+ */
bool
-isl_format_supports_lossless_compression(const struct gen_device_info *devinfo,
- enum isl_format format)
+isl_format_supports_ccs_e(const struct gen_device_info *devinfo,
+ enum isl_format format)
{
if (!format_info[format].exists)
return false;
- return format_gen(devinfo) >= format_info[format].lossless_compression;
+ return format_gen(devinfo) >= format_info[format].ccs_e;
}
bool
@@ -448,16 +554,19 @@ isl_format_supports_multisampling(const struct gen_device_info *devinfo,
* - any compressed texture format (BC*)
* - any YCRCB* format
*
- * The restriction on the format's size is removed on Broadwell. Also,
- * there is an exception for HiZ which we treat as a compressed format and
- * is allowed to be multisampled on Broadwell and earlier.
+ * The restriction on the format's size is removed on Broadwell. Moreover,
+ * empirically it looks that even IvyBridge can handle multisampled surfaces
+ * with format sizes all the way to 128-bits (RGBA32F, RGBA32I, RGBA32UI).
+ *
+ * Also, there is an exception for HiZ which we treat as a compressed
+ * format and is allowed to be multisampled on Broadwell and earlier.
*/
if (format == ISL_FORMAT_HIZ) {
/* On SKL+, HiZ is always single-sampled even when the primary surface
* is multisampled. See also isl_surf_get_hiz_surf().
*/
return devinfo->gen <= 8;
- } else if (devinfo->gen < 8 && isl_format_get_layout(format)->bpb > 64) {
+ } else if (devinfo->gen < 7 && isl_format_get_layout(format)->bpb > 64) {
return false;
} else if (isl_format_is_compressed(format)) {
return false;
@@ -468,6 +577,37 @@ isl_format_supports_multisampling(const struct gen_device_info *devinfo,
}
}
+/**
+ * Returns true if the two formats are "CCS_E compatible" meaning that you can
+ * render in one format with CCS_E enabled and then texture using the other
+ * format without needing a resolve.
+ *
+ * Note: Even if the formats are compatible, special care must be taken if a
+ * clear color is involved because the encoding of the clear color is heavily
+ * format-dependent.
+ */
+bool
+isl_formats_are_ccs_e_compatible(const struct gen_device_info *devinfo,
+ enum isl_format format1,
+ enum isl_format format2)
+{
+ /* They must support CCS_E */
+ if (!isl_format_supports_ccs_e(devinfo, format1) ||
+ !isl_format_supports_ccs_e(devinfo, format2))
+ return false;
+
+ const struct isl_format_layout *fmtl1 = isl_format_get_layout(format1);
+ const struct isl_format_layout *fmtl2 = isl_format_get_layout(format2);
+
+ /* The compression used by CCS is not dependent on the actual data encoding
+ * of the format but only depends on the bit-layout of the channels.
+ */
+ return fmtl1->channels.r.bits == fmtl2->channels.r.bits &&
+ fmtl1->channels.g.bits == fmtl2->channels.g.bits &&
+ fmtl1->channels.b.bits == fmtl2->channels.b.bits &&
+ fmtl1->channels.a.bits == fmtl2->channels.a.bits;
+}
+
static inline bool
isl_format_has_channel_type(enum isl_format fmt, enum isl_base_type type)
{
diff --git a/lib/mesa/src/intel/isl/isl_gen7.c b/lib/mesa/src/intel/isl/isl_gen7.c
index b6a86d23f..24d411f51 100644
--- a/lib/mesa/src/intel/isl/isl_gen7.c
+++ b/lib/mesa/src/intel/isl/isl_gen7.c
@@ -24,6 +24,25 @@
#include "isl_gen7.h"
#include "isl_priv.h"
+static bool
+gen7_format_needs_valign2(const struct isl_device *dev,
+ enum isl_format format)
+{
+ assert(ISL_DEV_GEN(dev) == 7);
+
+ /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,
+ * RENDER_SURFACE_STATE Surface Vertical Alignment:
+ *
+ * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
+ * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
+ * (0x190)
+ *
+ * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
+ */
+ return isl_format_is_yuv(format) ||
+ format == ISL_FORMAT_R32G32B32_FLOAT;
+}
+
bool
isl_gen7_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
@@ -76,8 +95,13 @@ isl_gen7_choose_msaa_layout(const struct isl_device *dev,
* Note that the above SINT restrictions apply only to *MSRTs* (that is,
* *multisampled* render targets). The restrictions seem to permit an MCS
* if the render target is singlesampled.
+ *
+ * Moreover, empirically it looks that hardware can render multisampled
+ * surfaces with RGBA8I, RGBA16I and RGBA32I.
*/
- if (isl_format_has_sint_channel(info->format))
+
+ /* Multisampling requires vertical alignment of four. */
+ if (info->samples > 1 && gen7_format_needs_valign2(dev, info->format))
return false;
/* More obvious restrictions */
@@ -151,25 +175,6 @@ isl_gen7_choose_msaa_layout(const struct isl_device *dev,
return true;
}
-static bool
-gen7_format_needs_valign2(const struct isl_device *dev,
- enum isl_format format)
-{
- assert(ISL_DEV_GEN(dev) == 7);
-
- /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,
- * RENDER_SURFACE_STATE Surface Vertical Alignment:
- *
- * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
- * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
- * (0x190)
- *
- * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
- */
- return isl_format_is_yuv(format) ||
- format == ISL_FORMAT_R32G32B32_FLOAT;
-}
-
/**
* @brief Filter out tiling flags that are incompatible with the surface.
*
@@ -215,6 +220,12 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
*flags &= ~ISL_TILING_W_BIT;
}
+ /* From the SKL+ PRMs, RENDER_SURFACE_STATE:TileMode,
+ * If Surface Format is ASTC*, this field must be TILEMODE_YMAJOR.
+ */
+ if (isl_format_get_layout(info->format)->txc == ISL_TXC_ASTC)
+ *flags &= ISL_TILING_Y0_BIT;
+
/* MCS buffers are always Y-tiled */
if (isl_format_get_layout(info->format)->txc == ISL_TXC_MCS)
*flags &= ISL_TILING_Y0_BIT;
@@ -283,123 +294,96 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
*flags &= ~ISL_TILING_Y0_BIT;
}
-/**
- * Choose horizontal subimage alignment, in units of surface elements.
- */
-static uint32_t
-gen7_choose_halign_el(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info)
+void
+isl_gen7_choose_image_alignment_el(const struct isl_device *dev,
+ const struct isl_surf_init_info *restrict info,
+ enum isl_tiling tiling,
+ enum isl_dim_layout dim_layout,
+ enum isl_msaa_layout msaa_layout,
+ struct isl_extent3d *image_align_el)
{
- if (isl_format_is_compressed(info->format))
- return 1;
+ assert(ISL_DEV_GEN(dev) == 7);
- /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,
- * RENDER_SURFACE_STATE Surface Hoizontal Alignment:
+ /* Handled by isl_choose_image_alignment_el */
+ assert(info->format != ISL_FORMAT_HIZ);
+
+ /* IVB+ does not support combined depthstencil. */
+ assert(!isl_surf_usage_is_depth_and_stencil(info->usage));
+
+ /* From the Ivy Bridge PRM, Vol. 2, Part 2, Section 6.18.4.4,
+ * "Alignment unit size", the alignment parameters are summarized in the
+ * following table:
*
- * - This field is intended to be set to HALIGN_8 only if the surface
- * was rendered as a depth buffer with Z16 format or a stencil buffer,
- * since these surfaces support only alignment of 8. Use of HALIGN_8
- * for other surfaces is supported, but uses more memory.
+ * Surface Defined By | Surface Format | Align Width | Align Height
+ * --------------------+-----------------+-------------+--------------
+ * DEPTH_BUFFER | D16_UNORM | 8 | 4
+ * | other | 4 | 4
+ * --------------------+-----------------+-------------+--------------
+ * STENCIL_BUFFER | N/A | 8 | 8
+ * --------------------+-----------------+-------------+--------------
+ * SURFACE_STATE | BC*, ETC*, EAC* | 4 | 4
+ * | FXT1 | 8 | 4
+ * | all others | HALIGN | VALIGN
+ * -------------------------------------------------------------------
*/
- if (isl_surf_info_is_z16(info) ||
- isl_surf_usage_is_stencil(info->usage))
- return 8;
-
- return 4;
-}
+ if (isl_surf_usage_is_depth(info->usage)) {
+ *image_align_el = info->format == ISL_FORMAT_R16_UNORM ?
+ isl_extent3d(8, 4, 1) : isl_extent3d(4, 4, 1);
+ return;
+ } else if (isl_surf_usage_is_stencil(info->usage)) {
+ *image_align_el = isl_extent3d(8, 8, 1);
+ return;
+ } else if (isl_format_is_compressed(info->format)) {
+ /* Compressed formats all have alignment equal to block size. */
+ *image_align_el = isl_extent3d(1, 1, 1);
+ return;
+ }
-/**
- * Choose vertical subimage alignment, in units of surface elements.
- */
-static uint32_t
-gen7_choose_valign_el(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info,
- enum isl_tiling tiling)
-{
- MAYBE_UNUSED bool require_valign2 = false;
- bool require_valign4 = false;
+ /* Everything after this point is in the "set by Surface Horizontal or
+ * Vertical Alignment" case. Now it's just a matter of applying
+ * restrictions.
+ */
- if (isl_format_is_compressed(info->format))
- return 1;
+ /* There are no restrictions on halign beyond what's given in the table
+ * above. We set it to the minimum value of 4 because that uses the least
+ * memory.
+ */
+ const uint32_t halign = 4;
- if (gen7_format_needs_valign2(dev, info->format))
- require_valign2 = true;
+ bool require_valign4 = false;
/* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1:
* RENDER_SURFACE_STATE Surface Vertical Alignment:
*
- * - This field is intended to be set to VALIGN_4 if the surface was
- * rendered as a depth buffer, for a multisampled (4x) render target,
- * or for a multisampled (8x) render target, since these surfaces
- * support only alignment of 4. Use of VALIGN_4 for other surfaces is
- * supported, but uses more memory. This field must be set to
- * VALIGN_4 for all tiled Y Render Target surfaces.
+ * * This field is intended to be set to VALIGN_4 if the surface was
+ * rendered as a depth buffer,
*
+ * * for a multisampled (4x) render target, or for a multisampled (8x)
+ * render target, since these surfaces support only alignment of 4.
+ *
+ * * This field must be set to VALIGN_4 for all tiled Y Render Target
+ * surfaces
+ *
+ * * Value of 1 is not supported for format YCRCB_NORMAL (0x182),
+ * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
+ *
+ * * If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
+ * must be set to VALIGN_4."
+ *
+ * The first restriction is already handled by the table above and the
+ * second restriction is redundant with the fifth.
*/
- if (isl_surf_usage_is_depth(info->usage) ||
- info->samples > 1 ||
- ((info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&
- tiling == ISL_TILING_Y0)) {
+ if (info->samples > 1)
require_valign4 = true;
- }
- if (isl_surf_usage_is_stencil(info->usage)) {
- /* The Ivybridge PRM states that the stencil buffer's vertical alignment
- * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment
- * Unit Size]. However, valign=8 is outside the set of valid values of
- * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2
- * (0x0) and VALIGN_4 (0x1).
- *
- * The PRM is generally confused about the width, height, and alignment
- * of the stencil buffer; and this confusion appears elsewhere. For
- * example, the following PRM text effectively converts the stencil
- * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM,
- * Volume 1, Part 1, Section
- * 6.18.4.2 Base Address and LOD Calculation]:
- *
- * For separate stencil buffer, the width must be mutiplied by 2 and
- * height divided by 2 as follows:
- *
- * w_L = 2*i*ceil(W_L/i)
- * h_L = 1/2*j*ceil(H_L/j)
- *
- * The root of the confusion is that, in W tiling, each pair of rows is
- * interleaved into one.
- *
- * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API
- * is more polished.
- */
+ if (tiling == ISL_TILING_Y0 &&
+ (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT))
require_valign4 = true;
- }
- assert(!require_valign2 || !require_valign4);
+ assert(!(require_valign4 && gen7_format_needs_valign2(dev, info->format)));
- if (require_valign4)
- return 4;
-
- /* Prefer VALIGN_2 because it conserves memory. */
- return 2;
-}
-
-void
-isl_gen7_choose_image_alignment_el(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info,
- enum isl_tiling tiling,
- enum isl_dim_layout dim_layout,
- enum isl_msaa_layout msaa_layout,
- struct isl_extent3d *image_align_el)
-{
- assert(ISL_DEV_GEN(dev) == 7);
-
- /* Handled by isl_choose_image_alignment_el */
- assert(info->format != ISL_FORMAT_HIZ);
-
- /* IVB+ does not support combined depthstencil. */
- assert(!isl_surf_usage_is_depth_and_stencil(info->usage));
+ /* We default to VALIGN_2 because it uses the least memory. */
+ const uint32_t valign = require_valign4 ? 4 : 2;
- *image_align_el = (struct isl_extent3d) {
- .w = gen7_choose_halign_el(dev, info),
- .h = gen7_choose_valign_el(dev, info, tiling),
- .d = 1,
- };
+ *image_align_el = isl_extent3d(halign, valign, 1);
}
diff --git a/lib/mesa/src/intel/isl/isl_gen8.c b/lib/mesa/src/intel/isl/isl_gen8.c
index 81c69dc13..2199b8d22 100644
--- a/lib/mesa/src/intel/isl/isl_gen8.c
+++ b/lib/mesa/src/intel/isl/isl_gen8.c
@@ -87,98 +87,6 @@ isl_gen8_choose_msaa_layout(const struct isl_device *dev,
return true;
}
-/**
- * Choose horizontal subimage alignment, in units of surface elements.
- */
-static uint32_t
-gen8_choose_halign_el(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info)
-{
- if (isl_format_is_compressed(info->format))
- return 1;
-
- /* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
- * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
- *
- * - This field is intended to be set to HALIGN_8 only if the surface
- * was rendered as a depth buffer with Z16 format or a stencil buffer.
- * In this case it must be set to HALIGN_8 since these surfaces
- * support only alignment of 8. [...]
- */
- if (isl_surf_info_is_z16(info))
- return 8;
- if (isl_surf_usage_is_stencil(info->usage))
- return 8;
-
- /* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
- * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
- *
- * [...] For Z32 formats it must be set to HALIGN_4.
- */
- if (isl_surf_usage_is_depth(info->usage))
- return 4;
-
- if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
- /* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
- * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
- *
- * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E,
- * HALIGN 16 must be used.
- *
- * This case handles color surfaces that may own an auxiliary MCS, CCS_D,
- * or CCS_E. Depth buffers, including those that own an auxiliary HiZ
- * surface, are handled above and do not require HALIGN_16.
- */
- assert(!isl_surf_usage_is_depth(info->usage));
- return 16;
- }
-
- /* XXX(chadv): I believe the hardware requires each image to be
- * cache-aligned. If that's true, then defaulting to halign=4 is wrong for
- * many formats. Depending on the format's block size, we may need to
- * increase halign to 8.
- */
- return 4;
-}
-
-/**
- * Choose vertical subimage alignment, in units of surface elements.
- */
-static uint32_t
-gen8_choose_valign_el(const struct isl_device *dev,
- const struct isl_surf_init_info *restrict info)
-{
- /* From the Broadwell PRM > Volume 2d: Command Reference: Structures
- * > RENDER_SURFACE_STATE Surface Vertical Alignment (p325):
- *
- * - For Sampling Engine and Render Target Surfaces: This field
- * specifies the vertical alignment requirement in elements for the
- * surface. [...] An element is defined as a pixel in uncompresed
- * surface formats, and as a compression block in compressed surface
- * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
- * element is a sample.
- *
- * - This field is intended to be set to VALIGN_4 if the surface was
- * rendered as a depth buffer, for a multisampled (4x) render target,
- * or for a multisampled (8x) render target, since these surfaces
- * support only alignment of 4. Use of VALIGN_4 for other surfaces is
- * supported, but increases memory usage.
- *
- * - This field is intended to be set to VALIGN_8 only if the surface
- * was rendered as a stencil buffer, since stencil buffer surfaces
- * support only alignment of 8. If set to VALIGN_8, Surface Format
- * must be R8_UINT.
- */
-
- if (isl_format_is_compressed(info->format))
- return 1;
-
- if (isl_surf_usage_is_stencil(info->usage))
- return 8;
-
- return 4;
-}
-
void
isl_gen8_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
@@ -205,30 +113,65 @@ isl_gen8_choose_image_alignment_el(const struct isl_device *dev,
return;
}
- /* The below text from the Broadwell PRM provides some insight into the
- * hardware's requirements for LOD alignment. From the Broadwell PRM >>
- * Volume 5: Memory Views >> Surface Layout >> 2D Surfaces:
+ /* From the Broadwell PRM, Volume 4, "Memory Views" p. 186, the alignment
+ * parameters are summarized in the following table:
*
- * These [2D surfaces] must adhere to the following memory organization
- * rules:
- *
- * - For non-compressed texture formats, each mipmap must start on an
- * even row within the monolithic rectangular area. For
- * 1-texel-high mipmaps, this may require a row of padding below
- * the previous mipmap. This restriction does not apply to any
- * compressed texture formats; each subsequent (lower-res)
- * compressed mipmap is positioned directly below the previous
- * mipmap.
- *
- * - Vertical alignment restrictions vary with memory tiling type:
- * 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled
- * mipmaps are not required to start at the left edge of a tile
- * row.)
+ * Surface Defined By | Surface Format | Align Width | Align Height
+ * --------------------+-----------------+-------------+--------------
+ * DEPTH_BUFFER | D16_UNORM | 8 | 4
+ * | other | 4 | 4
+ * --------------------+-----------------+-------------+--------------
+ * STENCIL_BUFFER | N/A | 8 | 8
+ * --------------------+-----------------+-------------+--------------
+ * SURFACE_STATE | BC*, ETC*, EAC* | 4 | 4
+ * | FXT1 | 8 | 4
+ * | all others | HALIGN | VALIGN
+ * -------------------------------------------------------------------
+ */
+ if (isl_surf_usage_is_depth(info->usage)) {
+ *image_align_el = info->format == ISL_FORMAT_R16_UNORM ?
+ isl_extent3d(8, 4, 1) : isl_extent3d(4, 4, 1);
+ return;
+ } else if (isl_surf_usage_is_stencil(info->usage)) {
+ *image_align_el = isl_extent3d(8, 8, 1);
+ return;
+ } else if (isl_format_is_compressed(info->format)) {
+ /* Compressed formats all have alignment equal to block size. */
+ *image_align_el = isl_extent3d(1, 1, 1);
+ return;
+ }
+
+ /* For all other formats, the alignment is determined by the horizontal and
+ * vertical alignment fields of RENDER_SURFACE_STATE. There are a few
+ * restrictions, but we generally have a choice.
+ */
+
+ /* Vertical alignment is unrestricted so we choose the smallest allowed
+ * alignment because that will use the least memory
+ */
+ const uint32_t valign = 4;
+
+ bool needs_halign16 = false;
+ if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
+ /* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
+ * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
+ *
+ * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E,
+ * HALIGN 16 must be used.
+ *
+ * This case handles color surfaces that may own an auxiliary MCS, CCS_D,
+ * or CCS_E. Depth buffers, including those that own an auxiliary HiZ
+ * surface, are handled above and do not require HALIGN_16.
+ */
+ needs_halign16 = true;
+ }
+
+ /* XXX(chadv): I believe the hardware requires each image to be
+ * cache-aligned. If that's true, then defaulting to halign=4 is wrong for
+ * many formats. Depending on the format's block size, we may need to
+ * increase halign to 8.
*/
+ const uint32_t halign = needs_halign16 ? 16 : 4;
- *image_align_el = (struct isl_extent3d) {
- .w = gen8_choose_halign_el(dev, info),
- .h = gen8_choose_valign_el(dev, info),
- .d = 1,
- };
+ *image_align_el = isl_extent3d(halign, valign, 1);
}
diff --git a/lib/mesa/src/intel/isl/isl_priv.h b/lib/mesa/src/intel/isl/isl_priv.h
index dc3975d3c..525d8a206 100644
--- a/lib/mesa/src/intel/isl/isl_priv.h
+++ b/lib/mesa/src/intel/isl/isl_priv.h
@@ -33,7 +33,13 @@
#include "isl.h"
#define isl_finishme(format, ...) \
- __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__)
+ do { \
+ static bool reported = false; \
+ if (!reported) { \
+ __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
+ reported = true; \
+ } \
+ } while (0)
void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char *file, int line, const char *fmt, ...);
@@ -74,6 +80,13 @@ isl_align_npot(uintmax_t n, uintmax_t a)
return ((n + a - 1) / a) * a;
}
+static inline uintmax_t
+isl_assert_div(uintmax_t n, uintmax_t a)
+{
+ assert(n % a == 0);
+ return n / a;
+}
+
/**
* Alignment must be a power of 2.
*/
@@ -172,6 +185,10 @@ isl_gen9_surf_fill_state_s(const struct isl_device *dev, void *state,
const struct isl_surf_fill_state_info *restrict info);
void
+isl_gen10_surf_fill_state_s(const struct isl_device *dev, void *state,
+ const struct isl_surf_fill_state_info *restrict info);
+
+void
isl_gen4_buffer_fill_state_s(void *state,
const struct isl_buffer_fill_state_info *restrict info);
@@ -199,4 +216,40 @@ void
isl_gen9_buffer_fill_state_s(void *state,
const struct isl_buffer_fill_state_info *restrict info);
+void
+isl_gen10_buffer_fill_state_s(void *state,
+ const struct isl_buffer_fill_state_info *restrict info);
+
+void
+isl_gen4_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
+void
+isl_gen5_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
+void
+isl_gen6_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
+void
+isl_gen7_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
+void
+isl_gen75_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
+void
+isl_gen8_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
+void
+isl_gen9_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
+void
+isl_gen10_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
#endif /* ISL_PRIV_H */
diff --git a/lib/mesa/src/intel/isl/isl_storage_image.c b/lib/mesa/src/intel/isl/isl_storage_image.c
index ffd03e4ad..a8aebce6d 100644
--- a/lib/mesa/src/intel/isl/isl_storage_image.c
+++ b/lib/mesa/src/intel/isl/isl_storage_image.c
@@ -22,7 +22,7 @@
*/
#include "isl_priv.h"
-#include "brw_compiler.h"
+#include "compiler/brw_compiler.h"
bool
isl_is_storage_image_format(enum isl_format format)
@@ -226,8 +226,12 @@ isl_surf_fill_image_param(const struct isl_device *dev,
view->base_array_layer;
}
- isl_surf_get_image_offset_el(surf, view->base_level, view->base_array_layer,
- 0, &param->offset[0], &param->offset[1]);
+ isl_surf_get_image_offset_el(surf, view->base_level,
+ surf->dim == ISL_SURF_DIM_3D ?
+ 0 : view->base_array_layer,
+ surf->dim == ISL_SURF_DIM_3D ?
+ view->base_array_layer : 0,
+ &param->offset[0], &param->offset[1]);
const int cpp = isl_format_get_layout(surf->format)->bpb / 8;
param->stride[0] = cpp;
diff --git a/lib/mesa/src/intel/isl/isl_surface_state.c b/lib/mesa/src/intel/isl/isl_surface_state.c
index 3bb0abd5a..e8bdb6596 100644
--- a/lib/mesa/src/intel/isl/isl_surface_state.c
+++ b/lib/mesa/src/intel/isl/isl_surface_state.c
@@ -113,12 +113,14 @@ get_surftype(enum isl_surf_dim dim, isl_surf_usage_flags_t usage)
assert(!(usage & ISL_SURF_USAGE_CUBE_BIT));
return SURFTYPE_1D;
case ISL_SURF_DIM_2D:
- if (usage & ISL_SURF_USAGE_STORAGE_BIT) {
- /* Storage images are always plain 2-D, not cube */
- return SURFTYPE_2D;
- } else if (usage & ISL_SURF_USAGE_CUBE_BIT) {
+ if ((usage & ISL_SURF_USAGE_CUBE_BIT) &&
+ (usage & ISL_SURF_USAGE_TEXTURE_BIT)) {
+ /* We need SURFTYPE_CUBE to make cube sampling work */
return SURFTYPE_CUBE;
} else {
+ /* Everything else (render and storage) treat cubes as plain
+ * 2D array textures
+ */
return SURFTYPE_2D;
}
case ISL_SURF_DIM_3D:
@@ -252,8 +254,32 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
if (info->surf->dim == ISL_SURF_DIM_1D)
assert(!isl_format_is_compressed(info->view->format));
+ if (isl_format_is_compressed(info->surf->format)) {
+ /* You're not allowed to make a view of a compressed format with any
+ * format other than the surface format. None of the userspace APIs
+ * allow for this directly and doing so would mess up a number of
+ * surface parameters such as Width, Height, and alignments. Ideally,
+ * we'd like to assert that the two formats match. However, we have an
+ * S3TC workaround that requires us to do reinterpretation. So assert
+ * that they're at least the same bpb and block size.
+ */
+ MAYBE_UNUSED const struct isl_format_layout *surf_fmtl =
+ isl_format_get_layout(info->surf->format);
+ MAYBE_UNUSED const struct isl_format_layout *view_fmtl =
+ isl_format_get_layout(info->surf->format);
+ assert(surf_fmtl->bpb == view_fmtl->bpb);
+ assert(surf_fmtl->bw == view_fmtl->bw);
+ assert(surf_fmtl->bh == view_fmtl->bh);
+ }
+
s.SurfaceFormat = info->view->format;
+#if GEN_GEN <= 5
+ s.ColorBufferComponentWriteDisables = info->write_disables;
+#else
+ assert(info->write_disables == 0);
+#endif
+
#if GEN_IS_HASWELL
s.IntegerSurfaceFormat = isl_format_has_int_channel(s.SurfaceFormat);
#endif
@@ -451,6 +477,38 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
#endif
#if (GEN_GEN >= 8 || GEN_IS_HASWELL)
+ if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
+ /* From the Sky Lake PRM Vol. 2d,
+ * RENDER_SURFACE_STATE::Shader Channel Select Red
+ *
+ * "For Render Target, Red, Green and Blue Shader Channel Selects
+ * MUST be such that only valid components can be swapped i.e. only
+ * change the order of components in the pixel. Any other values for
+ * these Shader Channel Select fields are not valid for Render
+ * Targets. This also means that there MUST not be multiple shader
+ * channels mapped to the same RT channel."
+ */
+ assert(info->view->swizzle.r == ISL_CHANNEL_SELECT_RED ||
+ info->view->swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
+ info->view->swizzle.r == ISL_CHANNEL_SELECT_BLUE);
+ assert(info->view->swizzle.g == ISL_CHANNEL_SELECT_RED ||
+ info->view->swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
+ info->view->swizzle.g == ISL_CHANNEL_SELECT_BLUE);
+ assert(info->view->swizzle.b == ISL_CHANNEL_SELECT_RED ||
+ info->view->swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
+ info->view->swizzle.b == ISL_CHANNEL_SELECT_BLUE);
+ assert(info->view->swizzle.r != info->view->swizzle.g);
+ assert(info->view->swizzle.r != info->view->swizzle.b);
+ assert(info->view->swizzle.g != info->view->swizzle.b);
+
+ /* From the Sky Lake PRM Vol. 2d,
+ * RENDER_SURFACE_STATE::Shader Channel Select Alpha
+ *
+ * "For Render Target, this field MUST be programmed to
+ * value = SCS_ALPHA."
+ */
+ assert(info->view->swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
+ }
s.ShaderChannelSelectRed = info->view->swizzle.r;
s.ShaderChannelSelectGreen = info->view->swizzle.g;
s.ShaderChannelSelectBlue = info->view->swizzle.b;
@@ -503,27 +561,52 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
#if GEN_GEN >= 7
if (info->aux_surf && info->aux_usage != ISL_AUX_USAGE_NONE) {
+ /* The docs don't appear to say anything whatsoever about compression
+ * and the data port. Testing seems to indicate that the data port
+ * completely ignores the AuxiliarySurfaceMode field.
+ */
+ assert(!(info->view->usage & ISL_SURF_USAGE_STORAGE_BIT));
+
struct isl_tile_info tile_info;
- isl_surf_get_tile_info(dev, info->aux_surf, &tile_info);
+ isl_surf_get_tile_info(info->aux_surf, &tile_info);
uint32_t pitch_in_tiles =
info->aux_surf->row_pitch / tile_info.phys_extent_B.width;
+ s.AuxiliarySurfaceBaseAddress = info->aux_address;
+ s.AuxiliarySurfacePitch = pitch_in_tiles - 1;
+
#if GEN_GEN >= 8
assert(GEN_GEN >= 9 || info->aux_usage != ISL_AUX_USAGE_CCS_E);
- s.AuxiliarySurfacePitch = pitch_in_tiles - 1;
/* Auxiliary surfaces in ISL have compressed formats but the hardware
* doesn't expect our definition of the compression, it expects qpitch
* in units of samples on the main surface.
*/
s.AuxiliarySurfaceQPitch =
isl_surf_get_array_pitch_sa_rows(info->aux_surf) >> 2;
- s.AuxiliarySurfaceBaseAddress = info->aux_address;
+
+ if (info->aux_usage == ISL_AUX_USAGE_HIZ) {
+ /* The number of samples must be 1 */
+ assert(info->surf->samples == 1);
+
+ /* The dimension must not be 3D */
+ assert(info->surf->dim != ISL_SURF_DIM_3D);
+
+ /* The format must be one of the following: */
+ switch (info->view->format) {
+ case ISL_FORMAT_R32_FLOAT:
+ case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
+ case ISL_FORMAT_R16_UNORM:
+ break;
+ default:
+ assert(!"Incompatible HiZ Sampling format");
+ break;
+ }
+ }
+
s.AuxiliarySurfaceMode = isl_to_gen_aux_mode[info->aux_usage];
#else
assert(info->aux_usage == ISL_AUX_USAGE_MCS ||
info->aux_usage == ISL_AUX_USAGE_CCS_D);
- s.MCSBaseAddress = info->aux_address,
- s.MCSSurfacePitch = pitch_in_tiles - 1;
s.MCSEnable = true;
#endif
}
@@ -546,6 +629,15 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
s.SamplerL2BypassModeDisable = true;
break;
default:
+ /* From the SKL PRM, Programming Note under Sampler Output Channel
+ * Mapping:
+ *
+ * If a surface has an associated HiZ Auxilliary surface, the
+ * Sampler L2 Bypass Mode Disable field in the RENDER_SURFACE_STATE
+ * must be set.
+ */
+ if (GEN_GEN >= 9 && info->aux_usage == ISL_AUX_USAGE_HIZ)
+ s.SamplerL2BypassModeDisable = true;
break;
}
}
@@ -602,7 +694,7 @@ isl_genX(buffer_fill_state_s)(void *state,
*/
if (info->format == ISL_FORMAT_RAW) {
assert(num_elements <= (1ull << 30));
- assert((num_elements & 3) == 0);
+ assert(num_elements > 0);
} else {
assert(num_elements <= (1ull << 27));
}