From ee4f0b0cddedf0afced2ceb23523bf373cbd4847 Mon Sep 17 00:00:00 2001 From: Jonathan Gray Date: Tue, 29 Jan 2019 10:58:00 +0000 Subject: Import Mesa 18.3.2 --- .../compiler/brw_nir_lower_image_load_store.c | 216 ++++++++++++++++----- lib/mesa/src/intel/dev/gen_device_info.h | 132 +++---------- lib/mesa/src/intel/tools/gen_context.h | 63 +++--- 3 files changed, 226 insertions(+), 185 deletions(-) (limited to 'lib/mesa/src') diff --git a/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c b/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c index f96698b08..1a7671b74 100644 --- a/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c +++ b/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c @@ -27,6 +27,60 @@ #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_format_convert.h" +/* The higher compiler layers use the GL enums for image formats even if + * they come in from SPIR-V or Vulkan. We need to turn them into an ISL + * enum before we can use them. + */ +static enum isl_format +isl_format_for_gl_format(uint32_t gl_format) +{ + switch (gl_format) { + case GL_R8: return ISL_FORMAT_R8_UNORM; + case GL_R8_SNORM: return ISL_FORMAT_R8_SNORM; + case GL_R8UI: return ISL_FORMAT_R8_UINT; + case GL_R8I: return ISL_FORMAT_R8_SINT; + case GL_RG8: return ISL_FORMAT_R8G8_UNORM; + case GL_RG8_SNORM: return ISL_FORMAT_R8G8_SNORM; + case GL_RG8UI: return ISL_FORMAT_R8G8_UINT; + case GL_RG8I: return ISL_FORMAT_R8G8_SINT; + case GL_RGBA8: return ISL_FORMAT_R8G8B8A8_UNORM; + case GL_RGBA8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM; + case GL_RGBA8UI: return ISL_FORMAT_R8G8B8A8_UINT; + case GL_RGBA8I: return ISL_FORMAT_R8G8B8A8_SINT; + case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT; + case GL_RGB10_A2: return ISL_FORMAT_R10G10B10A2_UNORM; + case GL_RGB10_A2UI: return ISL_FORMAT_R10G10B10A2_UINT; + case GL_R16: return ISL_FORMAT_R16_UNORM; + case GL_R16_SNORM: return ISL_FORMAT_R16_SNORM; + case GL_R16F: return ISL_FORMAT_R16_FLOAT; + case GL_R16UI: return ISL_FORMAT_R16_UINT; + case GL_R16I: return ISL_FORMAT_R16_SINT; + case GL_RG16: return ISL_FORMAT_R16G16_UNORM; + case GL_RG16_SNORM: return ISL_FORMAT_R16G16_SNORM; + case GL_RG16F: return ISL_FORMAT_R16G16_FLOAT; + case GL_RG16UI: return ISL_FORMAT_R16G16_UINT; + case GL_RG16I: return ISL_FORMAT_R16G16_SINT; + case GL_RGBA16: return ISL_FORMAT_R16G16B16A16_UNORM; + case GL_RGBA16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM; + case GL_RGBA16F: return ISL_FORMAT_R16G16B16A16_FLOAT; + case GL_RGBA16UI: return ISL_FORMAT_R16G16B16A16_UINT; + case GL_RGBA16I: return ISL_FORMAT_R16G16B16A16_SINT; + case GL_R32F: return ISL_FORMAT_R32_FLOAT; + case GL_R32UI: return ISL_FORMAT_R32_UINT; + case GL_R32I: return ISL_FORMAT_R32_SINT; + case GL_RG32F: return ISL_FORMAT_R32G32_FLOAT; + case GL_RG32UI: return ISL_FORMAT_R32G32_UINT; + case GL_RG32I: return ISL_FORMAT_R32G32_SINT; + case GL_RGBA32F: return ISL_FORMAT_R32G32B32A32_FLOAT; + case GL_RGBA32UI: return ISL_FORMAT_R32G32B32A32_UINT; + case GL_RGBA32I: return ISL_FORMAT_R32G32B32A32_SINT; + case GL_NONE: return ISL_FORMAT_UNSUPPORTED; + default: + assert(!"Invalid image format"); + return ISL_FORMAT_UNSUPPORTED; + } +} + static nir_ssa_def * _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset) { @@ -137,7 +191,7 @@ image_address(nir_builder *b, const struct gen_device_info *devinfo, * by passing in the miplevel as tile.z for 3-D textures and 0 in * tile.z for 2-D array textures. * - * See Volume 1 Part 1 of the Gfx7 PRM, sections 6.18.4.7 "Surface + * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion * of the hardware 3D texture and 2D array layouts. */ @@ -200,7 +254,7 @@ image_address(nir_builder *b, const struct gen_device_info *devinfo, /* Multiply by the Bpp value. */ addr = nir_imul(b, idx, nir_channel(b, stride, 0)); - if (devinfo->ver < 8 && !devinfo->is_baytrail) { + if (devinfo->gen < 8 && !devinfo->is_baytrail) { /* Take into account the two dynamically specified shifts. Both are * used to implement swizzling of X-tiled surfaces. For Y-tiled * surfaces only one bit needs to be XOR-ed with bit 6 of the memory @@ -258,6 +312,15 @@ get_format_info(enum isl_format fmt) }; } +static nir_ssa_def * +nir_zero_vec(nir_builder *b, unsigned num_components) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + return nir_build_imm(b, num_components, 32, v); +} + static nir_ssa_def * convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo, nir_ssa_def *color, @@ -299,7 +362,7 @@ convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo, * their least significant bits. However, the data in the high bits is * garbage so we have to discard it. */ - if (devinfo->ver == 7 && !devinfo->is_haswell && + if (devinfo->gen == 7 && !devinfo->is_haswell && (lower_fmt == ISL_FORMAT_R16_UINT || lower_fmt == ISL_FORMAT_R8_UINT)) color = nir_format_mask_uvec(b, color, lower.bits); @@ -368,7 +431,7 @@ lower_image_load_instr(nir_builder *b, nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); nir_variable *var = nir_deref_instr_get_variable(deref); const enum isl_format image_fmt = - isl_format_for_pipe_format(var->data.image.format); + isl_format_for_gl_format(var->data.image.format); if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) { const enum isl_format lower_fmt = @@ -379,7 +442,7 @@ lower_image_load_instr(nir_builder *b, * conversion. */ nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, placeholder); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder)); intrin->num_components = isl_format_get_num_channels(lower_fmt); intrin->dest.ssa.num_components = intrin->num_components; @@ -391,7 +454,7 @@ lower_image_load_instr(nir_builder *b, image_fmt, lower_fmt, dest_components); - nir_ssa_def_rewrite_uses(placeholder, color); + nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(color)); nir_instr_remove(placeholder->parent_instr); } else { const struct isl_format_layout *image_fmtl = @@ -408,9 +471,9 @@ lower_image_load_instr(nir_builder *b, nir_ssa_def *coord = intrin->src[1].ssa; nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord); - if (devinfo->ver == 7 && !devinfo->is_haswell) { + if (devinfo->gen == 7 && !devinfo->is_haswell) { /* Check whether the first stride component (i.e. the Bpp value) - * is greater than four, what on Gfx7 indicates that a surface of + * is greater than four, what on Gen7 indicates that a surface of * type RAW has been bound for untyped access. Reading or writing * to a surface of type other than RAW using untyped surface * messages causes a hang on IVB and VLV. @@ -423,23 +486,29 @@ lower_image_load_instr(nir_builder *b, nir_push_if(b, do_load); nir_ssa_def *addr = image_address(b, devinfo, deref, coord); - nir_ssa_def *load = - nir_image_deref_load_raw_intel(b, image_fmtl->bpb / 32, 32, - &deref->dest.ssa, addr); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_image_deref_load_raw_intel); + load->src[0] = nir_src_for_ssa(&deref->dest.ssa); + load->src[1] = nir_src_for_ssa(addr); + load->num_components = image_fmtl->bpb / 32; + nir_ssa_dest_init(&load->instr, &load->dest, + load->num_components, 32, NULL); + nir_builder_instr_insert(b, &load->instr); nir_push_else(b, NULL); - nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 32); + nir_ssa_def *zero = nir_zero_vec(b, load->num_components); nir_pop_if(b, NULL); - nir_ssa_def *value = nir_if_phi(b, load, zero); + nir_ssa_def *value = nir_if_phi(b, &load->dest.ssa, zero); nir_ssa_def *color = convert_color_for_load(b, devinfo, value, image_fmt, raw_fmt, dest_components); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, color); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(color)); } return true; @@ -475,16 +544,38 @@ convert_color_for_store(nir_builder *b, const struct gen_device_info *devinfo, break; case ISL_SFLOAT: - if (image.bits[0] == 16) - color = nir_format_float_to_half(b, color); + if (image.bits[0] == 16) { + nir_ssa_def *f16comps[4]; + for (unsigned i = 0; i < image.chans; i++) { + f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, color, i), + nir_imm_float(b, 0)); + } + color = nir_vec(b, f16comps, image.chans); + } break; case ISL_UINT: - color = nir_format_clamp_uint(b, color, image.bits); + if (image.bits[0] < 32) { + nir_const_value max; + for (unsigned i = 0; i < image.chans; i++) { + assert(image.bits[i] < 32); + max.u32[i] = (1u << image.bits[i]) - 1; + } + color = nir_umin(b, color, nir_build_imm(b, image.chans, 32, max)); + } break; case ISL_SINT: - color = nir_format_clamp_sint(b, color, image.bits); + if (image.bits[0] < 32) { + nir_const_value min, max; + for (unsigned i = 0; i < image.chans; i++) { + assert(image.bits[i] < 32); + max.i32[i] = (1 << (image.bits[i] - 1)) - 1; + min.i32[i] = -(1 << (image.bits[i] - 1)); + } + color = nir_imin(b, color, nir_build_imm(b, image.chans, 32, max)); + color = nir_imax(b, color, nir_build_imm(b, image.chans, 32, min)); + } break; default: @@ -523,11 +614,11 @@ lower_image_store_instr(nir_builder *b, /* For write-only surfaces, we trust that the hardware can just do the * conversion for us. */ - if (var->data.access & ACCESS_NON_READABLE) + if (var->data.image.access & ACCESS_NON_READABLE) return false; const enum isl_format image_fmt = - isl_format_for_pipe_format(var->data.image.format); + isl_format_for_gl_format(var->data.image.format); if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) { const enum isl_format lower_fmt = @@ -556,9 +647,9 @@ lower_image_store_instr(nir_builder *b, nir_ssa_def *coord = intrin->src[1].ssa; nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord); - if (devinfo->ver == 7 && !devinfo->is_haswell) { + if (devinfo->gen == 7 && !devinfo->is_haswell) { /* Check whether the first stride component (i.e. the Bpp value) - * is greater than four, what on Gfx7 indicates that a surface of + * is greater than four, what on Gen7 indicates that a surface of * type RAW has been bound for untyped access. Reading or writing * to a surface of type other than RAW using untyped surface * messages causes a hang on IVB and VLV. @@ -595,7 +686,7 @@ lower_image_atomic_instr(nir_builder *b, const struct gen_device_info *devinfo, nir_intrinsic_instr *intrin) { - if (devinfo->is_haswell || devinfo->ver >= 8) + if (devinfo->is_haswell || devinfo->gen >= 8) return false; nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); @@ -604,7 +695,7 @@ lower_image_atomic_instr(nir_builder *b, /* Use an undef to hold the uses of the load conversion. */ nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, placeholder); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder)); /* Check the first component of the size field to find out if the * image is bound. Necessary on IVB for typed atomics because @@ -620,7 +711,7 @@ lower_image_atomic_instr(nir_builder *b, nir_pop_if(b, NULL); nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero); - nir_ssa_def_rewrite_uses(placeholder, result); + nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(result)); return true; } @@ -636,19 +727,17 @@ lower_image_size_instr(nir_builder *b, /* For write-only images, we have an actual image surface so we fall back * and let the back-end emit a TXS for this. */ - if (var->data.access & ACCESS_NON_READABLE) + if (var->data.image.access & ACCESS_NON_READABLE) return false; /* If we have a matching typed format, then we have an actual image surface * so we fall back and let the back-end emit a TXS for this. */ const enum isl_format image_fmt = - isl_format_for_pipe_format(var->data.image.format); + isl_format_for_gl_format(var->data.image.format); if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) return false; - assert(nir_src_as_uint(intrin->src[1]) == 0); - b->cursor = nir_instr_remove(&intrin->instr); nir_ssa_def *size = load_image_param(b, deref, SIZE); @@ -669,15 +758,14 @@ lower_image_size_instr(nir_builder *b, comps[c] = nir_imm_int(b, 1); nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec)); return true; } bool brw_nir_lower_image_load_store(nir_shader *shader, - const struct gen_device_info *devinfo, - bool *uses_atomic_load_store) + const struct gen_device_info *devinfo) { bool progress = false; @@ -685,7 +773,6 @@ brw_nir_lower_image_load_store(nir_shader *shader, if (function->impl == NULL) continue; - bool impl_progress = false; nir_foreach_block_safe(block, function->impl) { nir_builder b; nir_builder_init(&b, function->impl); @@ -698,33 +785,29 @@ brw_nir_lower_image_load_store(nir_shader *shader, switch (intrin->intrinsic) { case nir_intrinsic_image_deref_load: if (lower_image_load_instr(&b, devinfo, intrin)) - impl_progress = true; + progress = true; break; case nir_intrinsic_image_deref_store: if (lower_image_store_instr(&b, devinfo, intrin)) - impl_progress = true; + progress = true; break; case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umin: - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: case nir_intrinsic_image_deref_atomic_and: case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_image_deref_atomic_xor: case nir_intrinsic_image_deref_atomic_exchange: case nir_intrinsic_image_deref_atomic_comp_swap: - if (uses_atomic_load_store) - *uses_atomic_load_store = true; if (lower_image_atomic_instr(&b, devinfo, intrin)) - impl_progress = true; + progress = true; break; case nir_intrinsic_image_deref_size: if (lower_image_size_instr(&b, devinfo, intrin)) - impl_progress = true; + progress = true; break; default: @@ -734,13 +817,50 @@ brw_nir_lower_image_load_store(nir_shader *shader, } } - if (impl_progress) { - progress = true; + if (progress) nir_metadata_preserve(function->impl, nir_metadata_none); - } else { - nir_metadata_preserve(function->impl, nir_metadata_all); - } } return progress; } + +void +brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, + nir_ssa_def *index) +{ + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + switch (intrin->intrinsic) { +#define CASE(op) \ + case nir_intrinsic_image_deref_##op: \ + intrin->intrinsic = nir_intrinsic_image_##op; \ + break; + CASE(load) + CASE(store) + CASE(atomic_add) + CASE(atomic_min) + CASE(atomic_max) + CASE(atomic_and) + CASE(atomic_or) + CASE(atomic_xor) + CASE(atomic_exchange) + CASE(atomic_comp_swap) + CASE(atomic_fadd) + CASE(size) + CASE(samples) + CASE(load_raw_intel) + CASE(store_raw_intel) +#undef CASE + default: + unreachable("Unhanded image intrinsic"); + } + + nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type)); + nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type)); + nir_intrinsic_set_access(intrin, var->data.image.access); + nir_intrinsic_set_format(intrin, var->data.image.format); + + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(index)); +} diff --git a/lib/mesa/src/intel/dev/gen_device_info.h b/lib/mesa/src/intel/dev/gen_device_info.h index fe5ac11a3..291a3cce8 100644 --- a/lib/mesa/src/intel/dev/gen_device_info.h +++ b/lib/mesa/src/intel/dev/gen_device_info.h @@ -36,20 +36,16 @@ extern "C" { struct drm_i915_query_topology_info; -#define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gfx10 */ -#define GEN_DEVICE_MAX_SUBSLICES (8) /* Maximum on gfx11 */ -#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */ -#define GEN_DEVICE_MAX_PIXEL_PIPES (3) /* Maximum on gfx12 */ +#define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gen10 */ +#define GEN_DEVICE_MAX_SUBSLICES (8) /* Maximum on gen11 */ +#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (10) /* Maximum on Haswell */ /** * Intel hardware information and quirks */ struct gen_device_info { - /* Driver internal numbers used to differentiate platforms. */ - int ver; - int verx10; - int revision; + int gen; /**< Generation number: 4, 5, 6, 7, ... */ int gt; bool is_g4x; @@ -63,11 +59,7 @@ struct gen_device_info bool is_kabylake; bool is_geminilake; bool is_coffeelake; - bool is_elkhartlake; - bool is_tigerlake; - bool is_rocketlake; - bool is_dg1; - bool is_alderlake; + bool is_cannonlake; bool has_hiz_and_separate_stencil; bool must_use_separate_stencil; @@ -75,16 +67,12 @@ struct gen_device_info bool has_llc; bool has_pln; - bool has_64bit_float; - bool has_64bit_int; + bool has_64bit_types; bool has_integer_dword_mul; bool has_compr4; bool has_surface_tile_offset; bool supports_simd16_3src; - bool disable_ccs_repack; - bool has_aux_map; - bool has_tiling_uapi; - bool has_ray_tracing; + bool has_resource_streamer; /** * \name Intel hardware quirks @@ -134,11 +122,6 @@ struct gen_device_info */ unsigned num_subslices[GEN_DEVICE_MAX_SUBSLICES]; - /** - * Number of subslices on each pixel pipe (ICL). - */ - unsigned ppipe_subslices[GEN_DEVICE_MAX_PIXEL_PIPES]; - /** * Upper bound of number of EU per subslice (some SKUs might have just 1 EU * fused across all subslices, like 47 EUs, in which case this number won't @@ -194,7 +177,7 @@ struct gen_device_info * automatically scale pixel shader thread count, based on a single value * programmed into 3DSTATE_PS. * - * To calculate the maximum number of threads for Gfx8 beyond (which have + * To calculate the maximum number of threads for Gen8 beyond (which have * multiple Pixel Shader Dispatchers): * * - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" @@ -213,14 +196,14 @@ struct gen_device_info struct { /** - * Fixed size of the URB. + * Hardware default URB size. * - * On Gfx6 and DG1, this is measured in KB. Gfx4-5 instead measure - * this in 512b blocks, as that's more convenient there. + * The units this is expressed in are somewhat inconsistent: 512b units + * on Gen4-5, KB on Gen6-7, and KB times the slice count on Gen8+. * - * On most Gfx7+ platforms, the URB is a section of the L3 cache, - * and can be resized based on the L3 programming. For those platforms, - * simply leave this field blank (zero) - it isn't used. + * Look up "URB Size" in the "Device Attributes" page, and take the + * maximum. Look up the slice count for each GT SKU on the same page. + * urb.size = URB Size (kbytes) / slice count */ unsigned size; @@ -235,18 +218,12 @@ struct gen_device_info unsigned max_entries[4]; } urb; - /** - * Size of the command streamer prefetch. This is important to know for - * self modifying batches. - */ - unsigned cs_prefetch_size; - /** * For the longest time the timestamp frequency for Gen's timestamp counter * could be assumed to be 12.5MHz, where the least significant bit neatly * corresponded to 80 nanoseconds. * - * Since Gfx9 the numbers aren't so round, with a a frequency of 12MHz for + * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for * SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for * BXT. * @@ -264,37 +241,17 @@ struct gen_device_info */ uint64_t timestamp_frequency; - uint64_t aperture_bytes; - /** * ID to put into the .aub files. */ int simulator_id; - /** - * holds the pci device id - */ - uint32_t chipset_id; - - /** - * no_hw is true when the chipset_id pci device id has been overridden - */ - bool no_hw; /** @} */ }; -#ifdef GFX_VER - -#define gen_device_info_is_9lp(devinfo) \ - (GFX_VER == 9 && ((devinfo)->is_broxton || (devinfo)->is_geminilake)) - -#else - #define gen_device_info_is_9lp(devinfo) \ ((devinfo)->is_broxton || (devinfo)->is_geminilake) -#endif - static inline bool gen_device_info_subslice_available(const struct gen_device_info *devinfo, int slice, int subslice) @@ -303,58 +260,19 @@ gen_device_info_subslice_available(const struct gen_device_info *devinfo, subslice / 8] & (1U << (subslice % 8))) != 0; } -static inline bool -gen_device_info_eu_available(const struct gen_device_info *devinfo, - int slice, int subslice, int eu) -{ - unsigned subslice_offset = slice * devinfo->eu_slice_stride + - subslice * devinfo->eu_subslice_stride; - - return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0; -} - -static inline uint32_t -gen_device_info_subslice_total(const struct gen_device_info *devinfo) -{ - uint32_t total = 0; - - for (uint32_t i = 0; i < devinfo->num_slices; i++) - total += __builtin_popcount(devinfo->subslice_masks[i]); - - return total; -} - -static inline uint32_t -gen_device_info_eu_total(const struct gen_device_info *devinfo) -{ - uint32_t total = 0; - - for (uint32_t i = 0; i < ARRAY_SIZE(devinfo->eu_masks); i++) - total += __builtin_popcount(devinfo->eu_masks[i]); - - return total; -} - -static inline unsigned -gen_device_info_num_dual_subslices(UNUSED const struct gen_device_info *devinfo) -{ - unreachable("TODO"); -} - +int gen_get_pci_device_id_override(void); int gen_device_name_to_pci_device_id(const char *name); +bool gen_get_device_info(int devid, struct gen_device_info *devinfo); const char *gen_get_device_name(int devid); -static inline uint64_t -gen_device_info_timebase_scale(const struct gen_device_info *devinfo, - uint64_t gpu_timestamp) -{ - return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency; -} - -bool gen_get_device_info_from_fd(int fh, struct gen_device_info *devinfo); -bool gen_get_device_info_from_pci_id(int pci_id, - struct gen_device_info *devinfo); -int gen_get_aperture_size(int fd, uint64_t *size); +/* Used with SLICE_MASK/SUBSLICE_MASK values from DRM_I915_GETPARAM. */ +void gen_device_info_update_from_masks(struct gen_device_info *devinfo, + uint32_t slice_mask, + uint32_t subslice_mask, + uint32_t n_eus); +/* Used with DRM_IOCTL_I915_QUERY & DRM_I915_QUERY_TOPOLOGY_INFO. */ +void gen_device_info_update_from_topology(struct gen_device_info *devinfo, + const struct drm_i915_query_topology_info *topology); #ifdef __cplusplus } diff --git a/lib/mesa/src/intel/tools/gen_context.h b/lib/mesa/src/intel/tools/gen_context.h index 39041408b..3f488c07c 100644 --- a/lib/mesa/src/intel/tools/gen_context.h +++ b/lib/mesa/src/intel/tools/gen_context.h @@ -21,22 +21,22 @@ * IN THE SOFTWARE. */ -#ifndef INTEL_CONTEXT_H -#define INTEL_CONTEXT_H +#ifndef GEN_CONTEXT_H +#define GEN_CONTEXT_H #include #define RING_SIZE (1 * 4096) #define PPHWSP_SIZE (1 * 4096) -#define GFX11_LR_CONTEXT_RENDER_SIZE (14 * 4096) -#define GFX10_LR_CONTEXT_RENDER_SIZE (19 * 4096) -#define GFX9_LR_CONTEXT_RENDER_SIZE (22 * 4096) -#define GFX8_LR_CONTEXT_RENDER_SIZE (20 * 4096) -#define GFX8_LR_CONTEXT_OTHER_SIZE (2 * 4096) +#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * 4096) +#define GEN10_LR_CONTEXT_RENDER_SIZE (19 * 4096) +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096) +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096) +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * 4096) -#define CONTEXT_RENDER_SIZE GFX9_LR_CONTEXT_RENDER_SIZE /* largest size */ -#define CONTEXT_OTHER_SIZE GFX8_LR_CONTEXT_OTHER_SIZE +#define CONTEXT_RENDER_SIZE GEN9_LR_CONTEXT_RENDER_SIZE /* largest size */ +#define CONTEXT_OTHER_SIZE GEN8_LR_CONTEXT_OTHER_SIZE #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1)) #define MI_LRI_FORCE_POSTED (1<<12) @@ -70,10 +70,26 @@ #define MEMORY_MAP_SIZE (64 /* MiB */ * 1024 * 1024) #define PTE_SIZE 4 -#define GFX8_PTE_SIZE 8 +#define GEN8_PTE_SIZE 8 #define NUM_PT_ENTRIES (ALIGN(MEMORY_MAP_SIZE, 4096) / 4096) -#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GFX8_PTE_SIZE, 4096) +#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GEN8_PTE_SIZE, 4096) + +#define STATIC_GGTT_MAP_START 0 + +#define RENDER_RING_ADDR STATIC_GGTT_MAP_START +#define RENDER_CONTEXT_ADDR (RENDER_RING_ADDR + RING_SIZE) + +#define BLITTER_RING_ADDR (RENDER_CONTEXT_ADDR + PPHWSP_SIZE + GEN9_LR_CONTEXT_RENDER_SIZE) +#define BLITTER_CONTEXT_ADDR (BLITTER_RING_ADDR + RING_SIZE) + +#define VIDEO_RING_ADDR (BLITTER_CONTEXT_ADDR + PPHWSP_SIZE + GEN8_LR_CONTEXT_OTHER_SIZE) +#define VIDEO_CONTEXT_ADDR (VIDEO_RING_ADDR + RING_SIZE) + +#define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + GEN8_LR_CONTEXT_OTHER_SIZE) +#define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START) + +#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END)) #define CONTEXT_FLAGS (0x339) /* Normal Priority | L3-LLC Coherency | * PPGTT Enabled | @@ -81,24 +97,11 @@ * Valid */ -#define MI_LOAD_REGISTER_IMM_vals(data, flags, ...) do { \ - uint32_t __regs[] = { __VA_ARGS__ }; \ - assert((ARRAY_SIZE(__regs) % 2) == 0); \ - *(data)++ = MI_LOAD_REGISTER_IMM_n(ARRAY_SIZE(__regs) / 2) | (flags); \ - for (unsigned __e = 0; __e < ARRAY_SIZE(__regs); __e++) \ - *(data)++ = __regs[__e]; \ - } while (0) - - -struct gen_context_parameters { - uint64_t pml4_addr; - uint64_t ring_addr; - uint32_t ring_size; -}; - -typedef void (*gen_context_init_t)(const struct gen_context_parameters *, uint32_t *, uint32_t *); +#define RENDER_CONTEXT_DESCRIPTOR ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR | CONTEXT_FLAGS) +#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR | CONTEXT_FLAGS) +#define VIDEO_CONTEXT_DESCRIPTOR ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR | CONTEXT_FLAGS) -#include "gfx8_context.h" -#include "gfx10_context.h" +#include "gen8_context.h" +#include "gen10_context.h" -#endif /* INTEL_CONTEXT_H */ +#endif /* GEN_CONTEXT_H */ -- cgit v1.2.3