summaryrefslogtreecommitdiff
path: root/lib/mesa/src
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2019-01-29 10:58:00 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2019-01-29 10:58:00 +0000
commitee4f0b0cddedf0afced2ceb23523bf373cbd4847 (patch)
tree539e540aca51a4f17ba7adcb821538699091a5b9 /lib/mesa/src
parent7cb5aacfe509bd505d403a38471c35da6ba95200 (diff)
Import Mesa 18.3.2
Diffstat (limited to 'lib/mesa/src')
-rw-r--r--lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c216
-rw-r--r--lib/mesa/src/intel/dev/gen_device_info.h132
-rw-r--r--lib/mesa/src/intel/tools/gen_context.h63
3 files changed, 226 insertions, 185 deletions
diff --git a/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c b/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c
index f96698b08..1a7671b74 100644
--- a/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c
+++ b/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c
@@ -27,6 +27,60 @@
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_format_convert.h"
+/* The higher compiler layers use the GL enums for image formats even if
+ * they come in from SPIR-V or Vulkan. We need to turn them into an ISL
+ * enum before we can use them.
+ */
+static enum isl_format
+isl_format_for_gl_format(uint32_t gl_format)
+{
+ switch (gl_format) {
+ case GL_R8: return ISL_FORMAT_R8_UNORM;
+ case GL_R8_SNORM: return ISL_FORMAT_R8_SNORM;
+ case GL_R8UI: return ISL_FORMAT_R8_UINT;
+ case GL_R8I: return ISL_FORMAT_R8_SINT;
+ case GL_RG8: return ISL_FORMAT_R8G8_UNORM;
+ case GL_RG8_SNORM: return ISL_FORMAT_R8G8_SNORM;
+ case GL_RG8UI: return ISL_FORMAT_R8G8_UINT;
+ case GL_RG8I: return ISL_FORMAT_R8G8_SINT;
+ case GL_RGBA8: return ISL_FORMAT_R8G8B8A8_UNORM;
+ case GL_RGBA8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM;
+ case GL_RGBA8UI: return ISL_FORMAT_R8G8B8A8_UINT;
+ case GL_RGBA8I: return ISL_FORMAT_R8G8B8A8_SINT;
+ case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT;
+ case GL_RGB10_A2: return ISL_FORMAT_R10G10B10A2_UNORM;
+ case GL_RGB10_A2UI: return ISL_FORMAT_R10G10B10A2_UINT;
+ case GL_R16: return ISL_FORMAT_R16_UNORM;
+ case GL_R16_SNORM: return ISL_FORMAT_R16_SNORM;
+ case GL_R16F: return ISL_FORMAT_R16_FLOAT;
+ case GL_R16UI: return ISL_FORMAT_R16_UINT;
+ case GL_R16I: return ISL_FORMAT_R16_SINT;
+ case GL_RG16: return ISL_FORMAT_R16G16_UNORM;
+ case GL_RG16_SNORM: return ISL_FORMAT_R16G16_SNORM;
+ case GL_RG16F: return ISL_FORMAT_R16G16_FLOAT;
+ case GL_RG16UI: return ISL_FORMAT_R16G16_UINT;
+ case GL_RG16I: return ISL_FORMAT_R16G16_SINT;
+ case GL_RGBA16: return ISL_FORMAT_R16G16B16A16_UNORM;
+ case GL_RGBA16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM;
+ case GL_RGBA16F: return ISL_FORMAT_R16G16B16A16_FLOAT;
+ case GL_RGBA16UI: return ISL_FORMAT_R16G16B16A16_UINT;
+ case GL_RGBA16I: return ISL_FORMAT_R16G16B16A16_SINT;
+ case GL_R32F: return ISL_FORMAT_R32_FLOAT;
+ case GL_R32UI: return ISL_FORMAT_R32_UINT;
+ case GL_R32I: return ISL_FORMAT_R32_SINT;
+ case GL_RG32F: return ISL_FORMAT_R32G32_FLOAT;
+ case GL_RG32UI: return ISL_FORMAT_R32G32_UINT;
+ case GL_RG32I: return ISL_FORMAT_R32G32_SINT;
+ case GL_RGBA32F: return ISL_FORMAT_R32G32B32A32_FLOAT;
+ case GL_RGBA32UI: return ISL_FORMAT_R32G32B32A32_UINT;
+ case GL_RGBA32I: return ISL_FORMAT_R32G32B32A32_SINT;
+ case GL_NONE: return ISL_FORMAT_UNSUPPORTED;
+ default:
+ assert(!"Invalid image format");
+ return ISL_FORMAT_UNSUPPORTED;
+ }
+}
+
static nir_ssa_def *
_load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
{
@@ -137,7 +191,7 @@ image_address(nir_builder *b, const struct gen_device_info *devinfo,
* by passing in the miplevel as tile.z for 3-D textures and 0 in
* tile.z for 2-D array textures.
*
- * See Volume 1 Part 1 of the Gfx7 PRM, sections 6.18.4.7 "Surface
+ * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
* Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
* of the hardware 3D texture and 2D array layouts.
*/
@@ -200,7 +254,7 @@ image_address(nir_builder *b, const struct gen_device_info *devinfo,
/* Multiply by the Bpp value. */
addr = nir_imul(b, idx, nir_channel(b, stride, 0));
- if (devinfo->ver < 8 && !devinfo->is_baytrail) {
+ if (devinfo->gen < 8 && !devinfo->is_baytrail) {
/* Take into account the two dynamically specified shifts. Both are
* used to implement swizzling of X-tiled surfaces. For Y-tiled
* surfaces only one bit needs to be XOR-ed with bit 6 of the memory
@@ -259,6 +313,15 @@ get_format_info(enum isl_format fmt)
}
static nir_ssa_def *
+nir_zero_vec(nir_builder *b, unsigned num_components)
+{
+ nir_const_value v;
+ memset(&v, 0, sizeof(v));
+
+ return nir_build_imm(b, num_components, 32, v);
+}
+
+static nir_ssa_def *
convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo,
nir_ssa_def *color,
enum isl_format image_fmt, enum isl_format lower_fmt,
@@ -299,7 +362,7 @@ convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo,
* their least significant bits. However, the data in the high bits is
* garbage so we have to discard it.
*/
- if (devinfo->ver == 7 && !devinfo->is_haswell &&
+ if (devinfo->gen == 7 && !devinfo->is_haswell &&
(lower_fmt == ISL_FORMAT_R16_UINT ||
lower_fmt == ISL_FORMAT_R8_UINT))
color = nir_format_mask_uvec(b, color, lower.bits);
@@ -368,7 +431,7 @@ lower_image_load_instr(nir_builder *b,
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
const enum isl_format image_fmt =
- isl_format_for_pipe_format(var->data.image.format);
+ isl_format_for_gl_format(var->data.image.format);
if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
const enum isl_format lower_fmt =
@@ -379,7 +442,7 @@ lower_image_load_instr(nir_builder *b,
* conversion.
*/
nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa, placeholder);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
intrin->num_components = isl_format_get_num_channels(lower_fmt);
intrin->dest.ssa.num_components = intrin->num_components;
@@ -391,7 +454,7 @@ lower_image_load_instr(nir_builder *b,
image_fmt, lower_fmt,
dest_components);
- nir_ssa_def_rewrite_uses(placeholder, color);
+ nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(color));
nir_instr_remove(placeholder->parent_instr);
} else {
const struct isl_format_layout *image_fmtl =
@@ -408,9 +471,9 @@ lower_image_load_instr(nir_builder *b,
nir_ssa_def *coord = intrin->src[1].ssa;
nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
- if (devinfo->ver == 7 && !devinfo->is_haswell) {
+ if (devinfo->gen == 7 && !devinfo->is_haswell) {
/* Check whether the first stride component (i.e. the Bpp value)
- * is greater than four, what on Gfx7 indicates that a surface of
+ * is greater than four, what on Gen7 indicates that a surface of
* type RAW has been bound for untyped access. Reading or writing
* to a surface of type other than RAW using untyped surface
* messages causes a hang on IVB and VLV.
@@ -423,23 +486,29 @@ lower_image_load_instr(nir_builder *b,
nir_push_if(b, do_load);
nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
- nir_ssa_def *load =
- nir_image_deref_load_raw_intel(b, image_fmtl->bpb / 32, 32,
- &deref->dest.ssa, addr);
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_image_deref_load_raw_intel);
+ load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+ load->src[1] = nir_src_for_ssa(addr);
+ load->num_components = image_fmtl->bpb / 32;
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ load->num_components, 32, NULL);
+ nir_builder_instr_insert(b, &load->instr);
nir_push_else(b, NULL);
- nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 32);
+ nir_ssa_def *zero = nir_zero_vec(b, load->num_components);
nir_pop_if(b, NULL);
- nir_ssa_def *value = nir_if_phi(b, load, zero);
+ nir_ssa_def *value = nir_if_phi(b, &load->dest.ssa, zero);
nir_ssa_def *color = convert_color_for_load(b, devinfo, value,
image_fmt, raw_fmt,
dest_components);
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa, color);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(color));
}
return true;
@@ -475,16 +544,38 @@ convert_color_for_store(nir_builder *b, const struct gen_device_info *devinfo,
break;
case ISL_SFLOAT:
- if (image.bits[0] == 16)
- color = nir_format_float_to_half(b, color);
+ if (image.bits[0] == 16) {
+ nir_ssa_def *f16comps[4];
+ for (unsigned i = 0; i < image.chans; i++) {
+ f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, color, i),
+ nir_imm_float(b, 0));
+ }
+ color = nir_vec(b, f16comps, image.chans);
+ }
break;
case ISL_UINT:
- color = nir_format_clamp_uint(b, color, image.bits);
+ if (image.bits[0] < 32) {
+ nir_const_value max;
+ for (unsigned i = 0; i < image.chans; i++) {
+ assert(image.bits[i] < 32);
+ max.u32[i] = (1u << image.bits[i]) - 1;
+ }
+ color = nir_umin(b, color, nir_build_imm(b, image.chans, 32, max));
+ }
break;
case ISL_SINT:
- color = nir_format_clamp_sint(b, color, image.bits);
+ if (image.bits[0] < 32) {
+ nir_const_value min, max;
+ for (unsigned i = 0; i < image.chans; i++) {
+ assert(image.bits[i] < 32);
+ max.i32[i] = (1 << (image.bits[i] - 1)) - 1;
+ min.i32[i] = -(1 << (image.bits[i] - 1));
+ }
+ color = nir_imin(b, color, nir_build_imm(b, image.chans, 32, max));
+ color = nir_imax(b, color, nir_build_imm(b, image.chans, 32, min));
+ }
break;
default:
@@ -523,11 +614,11 @@ lower_image_store_instr(nir_builder *b,
/* For write-only surfaces, we trust that the hardware can just do the
* conversion for us.
*/
- if (var->data.access & ACCESS_NON_READABLE)
+ if (var->data.image.access & ACCESS_NON_READABLE)
return false;
const enum isl_format image_fmt =
- isl_format_for_pipe_format(var->data.image.format);
+ isl_format_for_gl_format(var->data.image.format);
if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
const enum isl_format lower_fmt =
@@ -556,9 +647,9 @@ lower_image_store_instr(nir_builder *b,
nir_ssa_def *coord = intrin->src[1].ssa;
nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
- if (devinfo->ver == 7 && !devinfo->is_haswell) {
+ if (devinfo->gen == 7 && !devinfo->is_haswell) {
/* Check whether the first stride component (i.e. the Bpp value)
- * is greater than four, what on Gfx7 indicates that a surface of
+ * is greater than four, what on Gen7 indicates that a surface of
* type RAW has been bound for untyped access. Reading or writing
* to a surface of type other than RAW using untyped surface
* messages causes a hang on IVB and VLV.
@@ -595,7 +686,7 @@ lower_image_atomic_instr(nir_builder *b,
const struct gen_device_info *devinfo,
nir_intrinsic_instr *intrin)
{
- if (devinfo->is_haswell || devinfo->ver >= 8)
+ if (devinfo->is_haswell || devinfo->gen >= 8)
return false;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
@@ -604,7 +695,7 @@ lower_image_atomic_instr(nir_builder *b,
/* Use an undef to hold the uses of the load conversion. */
nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa, placeholder);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
/* Check the first component of the size field to find out if the
* image is bound. Necessary on IVB for typed atomics because
@@ -620,7 +711,7 @@ lower_image_atomic_instr(nir_builder *b,
nir_pop_if(b, NULL);
nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero);
- nir_ssa_def_rewrite_uses(placeholder, result);
+ nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(result));
return true;
}
@@ -636,19 +727,17 @@ lower_image_size_instr(nir_builder *b,
/* For write-only images, we have an actual image surface so we fall back
* and let the back-end emit a TXS for this.
*/
- if (var->data.access & ACCESS_NON_READABLE)
+ if (var->data.image.access & ACCESS_NON_READABLE)
return false;
/* If we have a matching typed format, then we have an actual image surface
* so we fall back and let the back-end emit a TXS for this.
*/
const enum isl_format image_fmt =
- isl_format_for_pipe_format(var->data.image.format);
+ isl_format_for_gl_format(var->data.image.format);
if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
return false;
- assert(nir_src_as_uint(intrin->src[1]) == 0);
-
b->cursor = nir_instr_remove(&intrin->instr);
nir_ssa_def *size = load_image_param(b, deref, SIZE);
@@ -669,15 +758,14 @@ lower_image_size_instr(nir_builder *b,
comps[c] = nir_imm_int(b, 1);
nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec));
return true;
}
bool
brw_nir_lower_image_load_store(nir_shader *shader,
- const struct gen_device_info *devinfo,
- bool *uses_atomic_load_store)
+ const struct gen_device_info *devinfo)
{
bool progress = false;
@@ -685,7 +773,6 @@ brw_nir_lower_image_load_store(nir_shader *shader,
if (function->impl == NULL)
continue;
- bool impl_progress = false;
nir_foreach_block_safe(block, function->impl) {
nir_builder b;
nir_builder_init(&b, function->impl);
@@ -698,33 +785,29 @@ brw_nir_lower_image_load_store(nir_shader *shader,
switch (intrin->intrinsic) {
case nir_intrinsic_image_deref_load:
if (lower_image_load_instr(&b, devinfo, intrin))
- impl_progress = true;
+ progress = true;
break;
case nir_intrinsic_image_deref_store:
if (lower_image_store_instr(&b, devinfo, intrin))
- impl_progress = true;
+ progress = true;
break;
case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
+ case nir_intrinsic_image_deref_atomic_min:
+ case nir_intrinsic_image_deref_atomic_max:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_image_deref_atomic_comp_swap:
- if (uses_atomic_load_store)
- *uses_atomic_load_store = true;
if (lower_image_atomic_instr(&b, devinfo, intrin))
- impl_progress = true;
+ progress = true;
break;
case nir_intrinsic_image_deref_size:
if (lower_image_size_instr(&b, devinfo, intrin))
- impl_progress = true;
+ progress = true;
break;
default:
@@ -734,13 +817,50 @@ brw_nir_lower_image_load_store(nir_shader *shader,
}
}
- if (impl_progress) {
- progress = true;
+ if (progress)
nir_metadata_preserve(function->impl, nir_metadata_none);
- } else {
- nir_metadata_preserve(function->impl, nir_metadata_all);
- }
}
return progress;
}
+
+void
+brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+ nir_ssa_def *index)
+{
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ switch (intrin->intrinsic) {
+#define CASE(op) \
+ case nir_intrinsic_image_deref_##op: \
+ intrin->intrinsic = nir_intrinsic_image_##op; \
+ break;
+ CASE(load)
+ CASE(store)
+ CASE(atomic_add)
+ CASE(atomic_min)
+ CASE(atomic_max)
+ CASE(atomic_and)
+ CASE(atomic_or)
+ CASE(atomic_xor)
+ CASE(atomic_exchange)
+ CASE(atomic_comp_swap)
+ CASE(atomic_fadd)
+ CASE(size)
+ CASE(samples)
+ CASE(load_raw_intel)
+ CASE(store_raw_intel)
+#undef CASE
+ default:
+ unreachable("Unhanded image intrinsic");
+ }
+
+ nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
+ nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
+ nir_intrinsic_set_access(intrin, var->data.image.access);
+ nir_intrinsic_set_format(intrin, var->data.image.format);
+
+ nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+ nir_src_for_ssa(index));
+}
diff --git a/lib/mesa/src/intel/dev/gen_device_info.h b/lib/mesa/src/intel/dev/gen_device_info.h
index fe5ac11a3..291a3cce8 100644
--- a/lib/mesa/src/intel/dev/gen_device_info.h
+++ b/lib/mesa/src/intel/dev/gen_device_info.h
@@ -36,20 +36,16 @@ extern "C" {
struct drm_i915_query_topology_info;
-#define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gfx10 */
-#define GEN_DEVICE_MAX_SUBSLICES (8) /* Maximum on gfx11 */
-#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
-#define GEN_DEVICE_MAX_PIXEL_PIPES (3) /* Maximum on gfx12 */
+#define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gen10 */
+#define GEN_DEVICE_MAX_SUBSLICES (8) /* Maximum on gen11 */
+#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (10) /* Maximum on Haswell */
/**
* Intel hardware information and quirks
*/
struct gen_device_info
{
- /* Driver internal numbers used to differentiate platforms. */
- int ver;
- int verx10;
- int revision;
+ int gen; /**< Generation number: 4, 5, 6, 7, ... */
int gt;
bool is_g4x;
@@ -63,11 +59,7 @@ struct gen_device_info
bool is_kabylake;
bool is_geminilake;
bool is_coffeelake;
- bool is_elkhartlake;
- bool is_tigerlake;
- bool is_rocketlake;
- bool is_dg1;
- bool is_alderlake;
+ bool is_cannonlake;
bool has_hiz_and_separate_stencil;
bool must_use_separate_stencil;
@@ -75,16 +67,12 @@ struct gen_device_info
bool has_llc;
bool has_pln;
- bool has_64bit_float;
- bool has_64bit_int;
+ bool has_64bit_types;
bool has_integer_dword_mul;
bool has_compr4;
bool has_surface_tile_offset;
bool supports_simd16_3src;
- bool disable_ccs_repack;
- bool has_aux_map;
- bool has_tiling_uapi;
- bool has_ray_tracing;
+ bool has_resource_streamer;
/**
* \name Intel hardware quirks
@@ -135,11 +123,6 @@ struct gen_device_info
unsigned num_subslices[GEN_DEVICE_MAX_SUBSLICES];
/**
- * Number of subslices on each pixel pipe (ICL).
- */
- unsigned ppipe_subslices[GEN_DEVICE_MAX_PIXEL_PIPES];
-
- /**
* Upper bound of number of EU per subslice (some SKUs might have just 1 EU
* fused across all subslices, like 47 EUs, in which case this number won't
* be acurate for one subslice).
@@ -194,7 +177,7 @@ struct gen_device_info
* automatically scale pixel shader thread count, based on a single value
* programmed into 3DSTATE_PS.
*
- * To calculate the maximum number of threads for Gfx8 beyond (which have
+ * To calculate the maximum number of threads for Gen8 beyond (which have
* multiple Pixel Shader Dispatchers):
*
* - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
@@ -213,14 +196,14 @@ struct gen_device_info
struct {
/**
- * Fixed size of the URB.
+ * Hardware default URB size.
*
- * On Gfx6 and DG1, this is measured in KB. Gfx4-5 instead measure
- * this in 512b blocks, as that's more convenient there.
+ * The units this is expressed in are somewhat inconsistent: 512b units
+ * on Gen4-5, KB on Gen6-7, and KB times the slice count on Gen8+.
*
- * On most Gfx7+ platforms, the URB is a section of the L3 cache,
- * and can be resized based on the L3 programming. For those platforms,
- * simply leave this field blank (zero) - it isn't used.
+ * Look up "URB Size" in the "Device Attributes" page, and take the
+ * maximum. Look up the slice count for each GT SKU on the same page.
+ * urb.size = URB Size (kbytes) / slice count
*/
unsigned size;
@@ -236,17 +219,11 @@ struct gen_device_info
} urb;
/**
- * Size of the command streamer prefetch. This is important to know for
- * self modifying batches.
- */
- unsigned cs_prefetch_size;
-
- /**
* For the longest time the timestamp frequency for Gen's timestamp counter
* could be assumed to be 12.5MHz, where the least significant bit neatly
* corresponded to 80 nanoseconds.
*
- * Since Gfx9 the numbers aren't so round, with a a frequency of 12MHz for
+ * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for
* SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for
* BXT.
*
@@ -264,37 +241,17 @@ struct gen_device_info
*/
uint64_t timestamp_frequency;
- uint64_t aperture_bytes;
-
/**
* ID to put into the .aub files.
*/
int simulator_id;
- /**
- * holds the pci device id
- */
- uint32_t chipset_id;
-
- /**
- * no_hw is true when the chipset_id pci device id has been overridden
- */
- bool no_hw;
/** @} */
};
-#ifdef GFX_VER
-
-#define gen_device_info_is_9lp(devinfo) \
- (GFX_VER == 9 && ((devinfo)->is_broxton || (devinfo)->is_geminilake))
-
-#else
-
#define gen_device_info_is_9lp(devinfo) \
((devinfo)->is_broxton || (devinfo)->is_geminilake)
-#endif
-
static inline bool
gen_device_info_subslice_available(const struct gen_device_info *devinfo,
int slice, int subslice)
@@ -303,58 +260,19 @@ gen_device_info_subslice_available(const struct gen_device_info *devinfo,
subslice / 8] & (1U << (subslice % 8))) != 0;
}
-static inline bool
-gen_device_info_eu_available(const struct gen_device_info *devinfo,
- int slice, int subslice, int eu)
-{
- unsigned subslice_offset = slice * devinfo->eu_slice_stride +
- subslice * devinfo->eu_subslice_stride;
-
- return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
-}
-
-static inline uint32_t
-gen_device_info_subslice_total(const struct gen_device_info *devinfo)
-{
- uint32_t total = 0;
-
- for (uint32_t i = 0; i < devinfo->num_slices; i++)
- total += __builtin_popcount(devinfo->subslice_masks[i]);
-
- return total;
-}
-
-static inline uint32_t
-gen_device_info_eu_total(const struct gen_device_info *devinfo)
-{
- uint32_t total = 0;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(devinfo->eu_masks); i++)
- total += __builtin_popcount(devinfo->eu_masks[i]);
-
- return total;
-}
-
-static inline unsigned
-gen_device_info_num_dual_subslices(UNUSED const struct gen_device_info *devinfo)
-{
- unreachable("TODO");
-}
-
+int gen_get_pci_device_id_override(void);
int gen_device_name_to_pci_device_id(const char *name);
+bool gen_get_device_info(int devid, struct gen_device_info *devinfo);
const char *gen_get_device_name(int devid);
-static inline uint64_t
-gen_device_info_timebase_scale(const struct gen_device_info *devinfo,
- uint64_t gpu_timestamp)
-{
- return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
-}
-
-bool gen_get_device_info_from_fd(int fh, struct gen_device_info *devinfo);
-bool gen_get_device_info_from_pci_id(int pci_id,
- struct gen_device_info *devinfo);
-int gen_get_aperture_size(int fd, uint64_t *size);
+/* Used with SLICE_MASK/SUBSLICE_MASK values from DRM_I915_GETPARAM. */
+void gen_device_info_update_from_masks(struct gen_device_info *devinfo,
+ uint32_t slice_mask,
+ uint32_t subslice_mask,
+ uint32_t n_eus);
+/* Used with DRM_IOCTL_I915_QUERY & DRM_I915_QUERY_TOPOLOGY_INFO. */
+void gen_device_info_update_from_topology(struct gen_device_info *devinfo,
+ const struct drm_i915_query_topology_info *topology);
#ifdef __cplusplus
}
diff --git a/lib/mesa/src/intel/tools/gen_context.h b/lib/mesa/src/intel/tools/gen_context.h
index 39041408b..3f488c07c 100644
--- a/lib/mesa/src/intel/tools/gen_context.h
+++ b/lib/mesa/src/intel/tools/gen_context.h
@@ -21,22 +21,22 @@
* IN THE SOFTWARE.
*/
-#ifndef INTEL_CONTEXT_H
-#define INTEL_CONTEXT_H
+#ifndef GEN_CONTEXT_H
+#define GEN_CONTEXT_H
#include <stdint.h>
#define RING_SIZE (1 * 4096)
#define PPHWSP_SIZE (1 * 4096)
-#define GFX11_LR_CONTEXT_RENDER_SIZE (14 * 4096)
-#define GFX10_LR_CONTEXT_RENDER_SIZE (19 * 4096)
-#define GFX9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
-#define GFX8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
-#define GFX8_LR_CONTEXT_OTHER_SIZE (2 * 4096)
+#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE (19 * 4096)
+#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * 4096)
-#define CONTEXT_RENDER_SIZE GFX9_LR_CONTEXT_RENDER_SIZE /* largest size */
-#define CONTEXT_OTHER_SIZE GFX8_LR_CONTEXT_OTHER_SIZE
+#define CONTEXT_RENDER_SIZE GEN9_LR_CONTEXT_RENDER_SIZE /* largest size */
+#define CONTEXT_OTHER_SIZE GEN8_LR_CONTEXT_OTHER_SIZE
#define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
#define MI_LRI_FORCE_POSTED (1<<12)
@@ -70,10 +70,26 @@
#define MEMORY_MAP_SIZE (64 /* MiB */ * 1024 * 1024)
#define PTE_SIZE 4
-#define GFX8_PTE_SIZE 8
+#define GEN8_PTE_SIZE 8
#define NUM_PT_ENTRIES (ALIGN(MEMORY_MAP_SIZE, 4096) / 4096)
-#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GFX8_PTE_SIZE, 4096)
+#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GEN8_PTE_SIZE, 4096)
+
+#define STATIC_GGTT_MAP_START 0
+
+#define RENDER_RING_ADDR STATIC_GGTT_MAP_START
+#define RENDER_CONTEXT_ADDR (RENDER_RING_ADDR + RING_SIZE)
+
+#define BLITTER_RING_ADDR (RENDER_CONTEXT_ADDR + PPHWSP_SIZE + GEN9_LR_CONTEXT_RENDER_SIZE)
+#define BLITTER_CONTEXT_ADDR (BLITTER_RING_ADDR + RING_SIZE)
+
+#define VIDEO_RING_ADDR (BLITTER_CONTEXT_ADDR + PPHWSP_SIZE + GEN8_LR_CONTEXT_OTHER_SIZE)
+#define VIDEO_CONTEXT_ADDR (VIDEO_RING_ADDR + RING_SIZE)
+
+#define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + GEN8_LR_CONTEXT_OTHER_SIZE)
+#define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
+
+#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
#define CONTEXT_FLAGS (0x339) /* Normal Priority | L3-LLC Coherency |
* PPGTT Enabled |
@@ -81,24 +97,11 @@
* Valid
*/
-#define MI_LOAD_REGISTER_IMM_vals(data, flags, ...) do { \
- uint32_t __regs[] = { __VA_ARGS__ }; \
- assert((ARRAY_SIZE(__regs) % 2) == 0); \
- *(data)++ = MI_LOAD_REGISTER_IMM_n(ARRAY_SIZE(__regs) / 2) | (flags); \
- for (unsigned __e = 0; __e < ARRAY_SIZE(__regs); __e++) \
- *(data)++ = __regs[__e]; \
- } while (0)
-
-
-struct gen_context_parameters {
- uint64_t pml4_addr;
- uint64_t ring_addr;
- uint32_t ring_size;
-};
-
-typedef void (*gen_context_init_t)(const struct gen_context_parameters *, uint32_t *, uint32_t *);
+#define RENDER_CONTEXT_DESCRIPTOR ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR | CONTEXT_FLAGS)
+#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR | CONTEXT_FLAGS)
+#define VIDEO_CONTEXT_DESCRIPTOR ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR | CONTEXT_FLAGS)
-#include "gfx8_context.h"
-#include "gfx10_context.h"
+#include "gen8_context.h"
+#include "gen10_context.h"
-#endif /* INTEL_CONTEXT_H */
+#endif /* GEN_CONTEXT_H */