Import Mesa 18.3.2

author: Jonathan Gray <jsg@cvs.openbsd.org> 2019-01-29 10:58:00 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2019-01-29 10:58:00 +0000
commit: ee4f0b0cddedf0afced2ceb23523bf373cbd4847 (patch)
tree: 539e540aca51a4f17ba7adcb821538699091a5b9 /lib/mesa/src
parent: 7cb5aacfe509bd505d403a38471c35da6ba95200 (diff)
3 files changed, 226 insertions, 185 deletions
diff --git a/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c b/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c
index f96698b08..1a7671b74 100644
--- a/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c
+++ b/lib/mesa/src/intel/compiler/brw_nir_lower_image_load_store.c
@@ -27,6 +27,60 @@
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_format_convert.h"
 
+/* The higher compiler layers use the GL enums for image formats even if
+ * they come in from SPIR-V or Vulkan.  We need to turn them into an ISL
+ * enum before we can use them.
+ */
+static enum isl_format
+isl_format_for_gl_format(uint32_t gl_format)
+{
+   switch (gl_format) {
+   case GL_R8:             return ISL_FORMAT_R8_UNORM;
+   case GL_R8_SNORM:       return ISL_FORMAT_R8_SNORM;
+   case GL_R8UI:           return ISL_FORMAT_R8_UINT;
+   case GL_R8I:            return ISL_FORMAT_R8_SINT;
+   case GL_RG8:            return ISL_FORMAT_R8G8_UNORM;
+   case GL_RG8_SNORM:      return ISL_FORMAT_R8G8_SNORM;
+   case GL_RG8UI:          return ISL_FORMAT_R8G8_UINT;
+   case GL_RG8I:           return ISL_FORMAT_R8G8_SINT;
+   case GL_RGBA8:          return ISL_FORMAT_R8G8B8A8_UNORM;
+   case GL_RGBA8_SNORM:    return ISL_FORMAT_R8G8B8A8_SNORM;
+   case GL_RGBA8UI:        return ISL_FORMAT_R8G8B8A8_UINT;
+   case GL_RGBA8I:         return ISL_FORMAT_R8G8B8A8_SINT;
+   case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT;
+   case GL_RGB10_A2:       return ISL_FORMAT_R10G10B10A2_UNORM;
+   case GL_RGB10_A2UI:     return ISL_FORMAT_R10G10B10A2_UINT;
+   case GL_R16:            return ISL_FORMAT_R16_UNORM;
+   case GL_R16_SNORM:      return ISL_FORMAT_R16_SNORM;
+   case GL_R16F:           return ISL_FORMAT_R16_FLOAT;
+   case GL_R16UI:          return ISL_FORMAT_R16_UINT;
+   case GL_R16I:           return ISL_FORMAT_R16_SINT;
+   case GL_RG16:           return ISL_FORMAT_R16G16_UNORM;
+   case GL_RG16_SNORM:     return ISL_FORMAT_R16G16_SNORM;
+   case GL_RG16F:          return ISL_FORMAT_R16G16_FLOAT;
+   case GL_RG16UI:         return ISL_FORMAT_R16G16_UINT;
+   case GL_RG16I:          return ISL_FORMAT_R16G16_SINT;
+   case GL_RGBA16:         return ISL_FORMAT_R16G16B16A16_UNORM;
+   case GL_RGBA16_SNORM:   return ISL_FORMAT_R16G16B16A16_SNORM;
+   case GL_RGBA16F:        return ISL_FORMAT_R16G16B16A16_FLOAT;
+   case GL_RGBA16UI:       return ISL_FORMAT_R16G16B16A16_UINT;
+   case GL_RGBA16I:        return ISL_FORMAT_R16G16B16A16_SINT;
+   case GL_R32F:           return ISL_FORMAT_R32_FLOAT;
+   case GL_R32UI:          return ISL_FORMAT_R32_UINT;
+   case GL_R32I:           return ISL_FORMAT_R32_SINT;
+   case GL_RG32F:          return ISL_FORMAT_R32G32_FLOAT;
+   case GL_RG32UI:         return ISL_FORMAT_R32G32_UINT;
+   case GL_RG32I:          return ISL_FORMAT_R32G32_SINT;
+   case GL_RGBA32F:        return ISL_FORMAT_R32G32B32A32_FLOAT;
+   case GL_RGBA32UI:       return ISL_FORMAT_R32G32B32A32_UINT;
+   case GL_RGBA32I:        return ISL_FORMAT_R32G32B32A32_SINT;
+   case GL_NONE:           return ISL_FORMAT_UNSUPPORTED;
+   default:
+      assert(!"Invalid image format");
+      return ISL_FORMAT_UNSUPPORTED;
+   }
+}
+
 static nir_ssa_def *
 _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
 {
@@ -137,7 +191,7 @@ image_address(nir_builder *b, const struct gen_device_info *devinfo,
     * by passing in the miplevel as tile.z for 3-D textures and 0 in
     * tile.z for 2-D array textures.
     *
-    * See Volume 1 Part 1 of the Gfx7 PRM, sections 6.18.4.7 "Surface
+    * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
     * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
     * of the hardware 3D texture and 2D array layouts.
     */
@@ -200,7 +254,7 @@ image_address(nir_builder *b, const struct gen_device_info *devinfo,
       /* Multiply by the Bpp value. */
       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
 
-      if (devinfo->ver < 8 && !devinfo->is_baytrail) {
+      if (devinfo->gen < 8 && !devinfo->is_baytrail) {
          /* Take into account the two dynamically specified shifts.  Both are
           * used to implement swizzling of X-tiled surfaces.  For Y-tiled
           * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
@@ -259,6 +313,15 @@ get_format_info(enum isl_format fmt)
 }
 
 static nir_ssa_def *
+nir_zero_vec(nir_builder *b, unsigned num_components)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   return nir_build_imm(b, num_components, 32, v);
+}
+
+static nir_ssa_def *
 convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo,
                        nir_ssa_def *color,
                        enum isl_format image_fmt, enum isl_format lower_fmt,
@@ -299,7 +362,7 @@ convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo,
        * their least significant bits.  However, the data in the high bits is
        * garbage so we have to discard it.
        */
-      if (devinfo->ver == 7 && !devinfo->is_haswell &&
+      if (devinfo->gen == 7 && !devinfo->is_haswell &&
           (lower_fmt == ISL_FORMAT_R16_UINT ||
            lower_fmt == ISL_FORMAT_R8_UINT))
          color = nir_format_mask_uvec(b, color, lower.bits);
@@ -368,7 +431,7 @@ lower_image_load_instr(nir_builder *b,
    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
    nir_variable *var = nir_deref_instr_get_variable(deref);
    const enum isl_format image_fmt =
-      isl_format_for_pipe_format(var->data.image.format);
+      isl_format_for_gl_format(var->data.image.format);
 
    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
       const enum isl_format lower_fmt =
@@ -379,7 +442,7 @@ lower_image_load_instr(nir_builder *b,
        * conversion.
        */
       nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
-      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, placeholder);
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
 
       intrin->num_components = isl_format_get_num_channels(lower_fmt);
       intrin->dest.ssa.num_components = intrin->num_components;
@@ -391,7 +454,7 @@ lower_image_load_instr(nir_builder *b,
                                                   image_fmt, lower_fmt,
                                                   dest_components);
 
-      nir_ssa_def_rewrite_uses(placeholder, color);
+      nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(color));
       nir_instr_remove(placeholder->parent_instr);
    } else {
       const struct isl_format_layout *image_fmtl =
@@ -408,9 +471,9 @@ lower_image_load_instr(nir_builder *b,
       nir_ssa_def *coord = intrin->src[1].ssa;
 
       nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
-      if (devinfo->ver == 7 && !devinfo->is_haswell) {
+      if (devinfo->gen == 7 && !devinfo->is_haswell) {
          /* Check whether the first stride component (i.e. the Bpp value)
-          * is greater than four, what on Gfx7 indicates that a surface of
+          * is greater than four, what on Gen7 indicates that a surface of
           * type RAW has been bound for untyped access.  Reading or writing
           * to a surface of type other than RAW using untyped surface
           * messages causes a hang on IVB and VLV.
@@ -423,23 +486,29 @@ lower_image_load_instr(nir_builder *b,
       nir_push_if(b, do_load);
 
       nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
-      nir_ssa_def *load =
-         nir_image_deref_load_raw_intel(b, image_fmtl->bpb / 32, 32,
-                                        &deref->dest.ssa, addr);
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(b->shader,
+                                    nir_intrinsic_image_deref_load_raw_intel);
+      load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+      load->src[1] = nir_src_for_ssa(addr);
+      load->num_components = image_fmtl->bpb / 32;
+      nir_ssa_dest_init(&load->instr, &load->dest,
+                        load->num_components, 32, NULL);
+      nir_builder_instr_insert(b, &load->instr);
 
       nir_push_else(b, NULL);
 
-      nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 32);
+      nir_ssa_def *zero = nir_zero_vec(b, load->num_components);
 
       nir_pop_if(b, NULL);
 
-      nir_ssa_def *value = nir_if_phi(b, load, zero);
+      nir_ssa_def *value = nir_if_phi(b, &load->dest.ssa, zero);
 
       nir_ssa_def *color = convert_color_for_load(b, devinfo, value,
                                                   image_fmt, raw_fmt,
                                                   dest_components);
 
-      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, color);
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(color));
    }
 
    return true;
@@ -475,16 +544,38 @@ convert_color_for_store(nir_builder *b, const struct gen_device_info *devinfo,
       break;
 
    case ISL_SFLOAT:
-      if (image.bits[0] == 16)
-         color = nir_format_float_to_half(b, color);
+      if (image.bits[0] == 16) {
+         nir_ssa_def *f16comps[4];
+         for (unsigned i = 0; i < image.chans; i++) {
+            f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, color, i),
+                                                      nir_imm_float(b, 0));
+         }
+         color = nir_vec(b, f16comps, image.chans);
+      }
       break;
 
    case ISL_UINT:
-      color = nir_format_clamp_uint(b, color, image.bits);
+      if (image.bits[0] < 32) {
+         nir_const_value max;
+         for (unsigned i = 0; i < image.chans; i++) {
+            assert(image.bits[i] < 32);
+            max.u32[i] = (1u << image.bits[i]) - 1;
+         }
+         color = nir_umin(b, color, nir_build_imm(b, image.chans, 32, max));
+      }
       break;
 
    case ISL_SINT:
-      color = nir_format_clamp_sint(b, color, image.bits);
+      if (image.bits[0] < 32) {
+         nir_const_value min, max;
+         for (unsigned i = 0; i < image.chans; i++) {
+            assert(image.bits[i] < 32);
+            max.i32[i] = (1 << (image.bits[i] - 1)) - 1;
+            min.i32[i] = -(1 << (image.bits[i] - 1));
+         }
+         color = nir_imin(b, color, nir_build_imm(b, image.chans, 32, max));
+         color = nir_imax(b, color, nir_build_imm(b, image.chans, 32, min));
+      }
       break;
 
    default:
@@ -523,11 +614,11 @@ lower_image_store_instr(nir_builder *b,
    /* For write-only surfaces, we trust that the hardware can just do the
     * conversion for us.
     */
-   if (var->data.access & ACCESS_NON_READABLE)
+   if (var->data.image.access & ACCESS_NON_READABLE)
       return false;
 
    const enum isl_format image_fmt =
-      isl_format_for_pipe_format(var->data.image.format);
+      isl_format_for_gl_format(var->data.image.format);
 
    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
       const enum isl_format lower_fmt =
@@ -556,9 +647,9 @@ lower_image_store_instr(nir_builder *b,
       nir_ssa_def *coord = intrin->src[1].ssa;
 
       nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
-      if (devinfo->ver == 7 && !devinfo->is_haswell) {
+      if (devinfo->gen == 7 && !devinfo->is_haswell) {
          /* Check whether the first stride component (i.e. the Bpp value)
-          * is greater than four, what on Gfx7 indicates that a surface of
+          * is greater than four, what on Gen7 indicates that a surface of
           * type RAW has been bound for untyped access.  Reading or writing
           * to a surface of type other than RAW using untyped surface
           * messages causes a hang on IVB and VLV.
@@ -595,7 +686,7 @@ lower_image_atomic_instr(nir_builder *b,
                          const struct gen_device_info *devinfo,
                          nir_intrinsic_instr *intrin)
 {
-   if (devinfo->is_haswell || devinfo->ver >= 8)
+   if (devinfo->is_haswell || devinfo->gen >= 8)
       return false;
 
    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
@@ -604,7 +695,7 @@ lower_image_atomic_instr(nir_builder *b,
 
    /* Use an undef to hold the uses of the load conversion. */
    nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
-   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, placeholder);
+   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
 
    /* Check the first component of the size field to find out if the
     * image is bound.  Necessary on IVB for typed atomics because
@@ -620,7 +711,7 @@ lower_image_atomic_instr(nir_builder *b,
    nir_pop_if(b, NULL);
 
    nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero);
-   nir_ssa_def_rewrite_uses(placeholder, result);
+   nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(result));
 
    return true;
 }
@@ -636,19 +727,17 @@ lower_image_size_instr(nir_builder *b,
    /* For write-only images, we have an actual image surface so we fall back
     * and let the back-end emit a TXS for this.
     */
-   if (var->data.access & ACCESS_NON_READABLE)
+   if (var->data.image.access & ACCESS_NON_READABLE)
       return false;
 
    /* If we have a matching typed format, then we have an actual image surface
     * so we fall back and let the back-end emit a TXS for this.
     */
    const enum isl_format image_fmt =
-      isl_format_for_pipe_format(var->data.image.format);
+      isl_format_for_gl_format(var->data.image.format);
    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
       return false;
 
-   assert(nir_src_as_uint(intrin->src[1]) == 0);
-
    b->cursor = nir_instr_remove(&intrin->instr);
 
    nir_ssa_def *size = load_image_param(b, deref, SIZE);
@@ -669,15 +758,14 @@ lower_image_size_instr(nir_builder *b,
       comps[c] = nir_imm_int(b, 1);
 
    nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
-   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec);
+   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec));
 
    return true;
 }
 
 bool
 brw_nir_lower_image_load_store(nir_shader *shader,
-                               const struct gen_device_info *devinfo,
-                               bool *uses_atomic_load_store)
+                               const struct gen_device_info *devinfo)
 {
    bool progress = false;
 
@@ -685,7 +773,6 @@ brw_nir_lower_image_load_store(nir_shader *shader,
       if (function->impl == NULL)
          continue;
 
-      bool impl_progress = false;
       nir_foreach_block_safe(block, function->impl) {
          nir_builder b;
          nir_builder_init(&b, function->impl);
@@ -698,33 +785,29 @@ brw_nir_lower_image_load_store(nir_shader *shader,
             switch (intrin->intrinsic) {
             case nir_intrinsic_image_deref_load:
                if (lower_image_load_instr(&b, devinfo, intrin))
-                  impl_progress = true;
+                  progress = true;
                break;
 
             case nir_intrinsic_image_deref_store:
                if (lower_image_store_instr(&b, devinfo, intrin))
-                  impl_progress = true;
+                  progress = true;
                break;
 
             case nir_intrinsic_image_deref_atomic_add:
-            case nir_intrinsic_image_deref_atomic_imin:
-            case nir_intrinsic_image_deref_atomic_umin:
-            case nir_intrinsic_image_deref_atomic_imax:
-            case nir_intrinsic_image_deref_atomic_umax:
+            case nir_intrinsic_image_deref_atomic_min:
+            case nir_intrinsic_image_deref_atomic_max:
             case nir_intrinsic_image_deref_atomic_and:
             case nir_intrinsic_image_deref_atomic_or:
             case nir_intrinsic_image_deref_atomic_xor:
             case nir_intrinsic_image_deref_atomic_exchange:
             case nir_intrinsic_image_deref_atomic_comp_swap:
-               if (uses_atomic_load_store)
-                  *uses_atomic_load_store = true;
                if (lower_image_atomic_instr(&b, devinfo, intrin))
-                  impl_progress = true;
+                  progress = true;
                break;
 
             case nir_intrinsic_image_deref_size:
                if (lower_image_size_instr(&b, devinfo, intrin))
-                  impl_progress = true;
+                  progress = true;
                break;
 
             default:
@@ -734,13 +817,50 @@ brw_nir_lower_image_load_store(nir_shader *shader,
          }
       }
 
-      if (impl_progress) {
-         progress = true;
+      if (progress)
          nir_metadata_preserve(function->impl, nir_metadata_none);
-      } else {
-         nir_metadata_preserve(function->impl, nir_metadata_all);
-      }
    }
 
    return progress;
 }
+
+void
+brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+                                nir_ssa_def *index)
+{
+   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   switch (intrin->intrinsic) {
+#define CASE(op) \
+   case nir_intrinsic_image_deref_##op: \
+      intrin->intrinsic = nir_intrinsic_image_##op; \
+      break;
+   CASE(load)
+   CASE(store)
+   CASE(atomic_add)
+   CASE(atomic_min)
+   CASE(atomic_max)
+   CASE(atomic_and)
+   CASE(atomic_or)
+   CASE(atomic_xor)
+   CASE(atomic_exchange)
+   CASE(atomic_comp_swap)
+   CASE(atomic_fadd)
+   CASE(size)
+   CASE(samples)
+   CASE(load_raw_intel)
+   CASE(store_raw_intel)
+#undef CASE
+   default:
+      unreachable("Unhanded image intrinsic");
+   }
+
+   nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
+   nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
+   nir_intrinsic_set_access(intrin, var->data.image.access);
+   nir_intrinsic_set_format(intrin, var->data.image.format);
+
+   nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+                         nir_src_for_ssa(index));
+}
diff --git a/lib/mesa/src/intel/dev/gen_device_info.h b/lib/mesa/src/intel/dev/gen_device_info.h
index fe5ac11a3..291a3cce8 100644
--- a/lib/mesa/src/intel/dev/gen_device_info.h
+++ b/lib/mesa/src/intel/dev/gen_device_info.h
@@ -36,20 +36,16 @@ extern "C" {
 
 struct drm_i915_query_topology_info;
 
-#define GEN_DEVICE_MAX_SLICES           (6)  /* Maximum on gfx10 */
-#define GEN_DEVICE_MAX_SUBSLICES        (8)  /* Maximum on gfx11 */
-#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
-#define GEN_DEVICE_MAX_PIXEL_PIPES      (3)  /* Maximum on gfx12 */
+#define GEN_DEVICE_MAX_SLICES           (6)  /* Maximum on gen10 */
+#define GEN_DEVICE_MAX_SUBSLICES        (8)  /* Maximum on gen11 */
+#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (10) /* Maximum on Haswell */
 
 /**
  * Intel hardware information and quirks
  */
 struct gen_device_info
 {
-   /* Driver internal numbers used to differentiate platforms. */
-   int ver;
-   int verx10;
-   int revision;
+   int gen; /**< Generation number: 4, 5, 6, 7, ... */
    int gt;
 
    bool is_g4x;
@@ -63,11 +59,7 @@ struct gen_device_info
    bool is_kabylake;
    bool is_geminilake;
    bool is_coffeelake;
-   bool is_elkhartlake;
-   bool is_tigerlake;
-   bool is_rocketlake;
-   bool is_dg1;
-   bool is_alderlake;
+   bool is_cannonlake;
 
    bool has_hiz_and_separate_stencil;
    bool must_use_separate_stencil;
@@ -75,16 +67,12 @@ struct gen_device_info
    bool has_llc;
 
    bool has_pln;
-   bool has_64bit_float;
-   bool has_64bit_int;
+   bool has_64bit_types;
    bool has_integer_dword_mul;
    bool has_compr4;
    bool has_surface_tile_offset;
    bool supports_simd16_3src;
-   bool disable_ccs_repack;
-   bool has_aux_map;
-   bool has_tiling_uapi;
-   bool has_ray_tracing;
+   bool has_resource_streamer;
 
    /**
     * \name Intel hardware quirks
@@ -135,11 +123,6 @@ struct gen_device_info
    unsigned num_subslices[GEN_DEVICE_MAX_SUBSLICES];
 
    /**
-    * Number of subslices on each pixel pipe (ICL).
-    */
-   unsigned ppipe_subslices[GEN_DEVICE_MAX_PIXEL_PIPES];
-
-   /**
     * Upper bound of number of EU per subslice (some SKUs might have just 1 EU
     * fused across all subslices, like 47 EUs, in which case this number won't
     * be acurate for one subslice).
@@ -194,7 +177,7 @@ struct gen_device_info
     * automatically scale pixel shader thread count, based on a single value
     * programmed into 3DSTATE_PS.
     *
-    * To calculate the maximum number of threads for Gfx8 beyond (which have
+    * To calculate the maximum number of threads for Gen8 beyond (which have
     * multiple Pixel Shader Dispatchers):
     *
     * - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
@@ -213,14 +196,14 @@ struct gen_device_info
 
    struct {
       /**
-       * Fixed size of the URB.
+       * Hardware default URB size.
        *
-       * On Gfx6 and DG1, this is measured in KB.  Gfx4-5 instead measure
-       * this in 512b blocks, as that's more convenient there.
+       * The units this is expressed in are somewhat inconsistent: 512b units
+       * on Gen4-5, KB on Gen6-7, and KB times the slice count on Gen8+.
        *
-       * On most Gfx7+ platforms, the URB is a section of the L3 cache,
-       * and can be resized based on the L3 programming.  For those platforms,
-       * simply leave this field blank (zero) - it isn't used.
+       * Look up "URB Size" in the "Device Attributes" page, and take the
+       * maximum.  Look up the slice count for each GT SKU on the same page.
+       * urb.size = URB Size (kbytes) / slice count
        */
       unsigned size;
 
@@ -236,17 +219,11 @@ struct gen_device_info
    } urb;
 
    /**
-    * Size of the command streamer prefetch. This is important to know for
-    * self modifying batches.
-    */
-   unsigned cs_prefetch_size;
-
-   /**
     * For the longest time the timestamp frequency for Gen's timestamp counter
     * could be assumed to be 12.5MHz, where the least significant bit neatly
     * corresponded to 80 nanoseconds.
     *
-    * Since Gfx9 the numbers aren't so round, with a a frequency of 12MHz for
+    * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for
     * SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for
     * BXT.
     *
@@ -264,37 +241,17 @@ struct gen_device_info
     */
    uint64_t timestamp_frequency;
 
-   uint64_t aperture_bytes;
-
    /**
     * ID to put into the .aub files.
     */
    int simulator_id;
 
-   /**
-    * holds the pci device id
-    */
-   uint32_t chipset_id;
-
-   /**
-    * no_hw is true when the chipset_id pci device id has been overridden
-    */
-   bool no_hw;
    /** @} */
 };
 
-#ifdef GFX_VER
-
-#define gen_device_info_is_9lp(devinfo) \
-   (GFX_VER == 9 && ((devinfo)->is_broxton || (devinfo)->is_geminilake))
-
-#else
-
 #define gen_device_info_is_9lp(devinfo) \
    ((devinfo)->is_broxton || (devinfo)->is_geminilake)
 
-#endif
-
 static inline bool
 gen_device_info_subslice_available(const struct gen_device_info *devinfo,
                                    int slice, int subslice)
@@ -303,58 +260,19 @@ gen_device_info_subslice_available(const struct gen_device_info *devinfo,
                                    subslice / 8] & (1U << (subslice % 8))) != 0;
 }
 
-static inline bool
-gen_device_info_eu_available(const struct gen_device_info *devinfo,
-                             int slice, int subslice, int eu)
-{
-   unsigned subslice_offset = slice * devinfo->eu_slice_stride +
-      subslice * devinfo->eu_subslice_stride;
-
-   return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
-}
-
-static inline uint32_t
-gen_device_info_subslice_total(const struct gen_device_info *devinfo)
-{
-   uint32_t total = 0;
-
-   for (uint32_t i = 0; i < devinfo->num_slices; i++)
-      total += __builtin_popcount(devinfo->subslice_masks[i]);
-
-   return total;
-}
-
-static inline uint32_t
-gen_device_info_eu_total(const struct gen_device_info *devinfo)
-{
-   uint32_t total = 0;
-
-   for (uint32_t i = 0; i < ARRAY_SIZE(devinfo->eu_masks); i++)
-      total += __builtin_popcount(devinfo->eu_masks[i]);
-
-   return total;
-}
-
-static inline unsigned
-gen_device_info_num_dual_subslices(UNUSED const struct gen_device_info *devinfo)
-{
-   unreachable("TODO");
-}
-
+int gen_get_pci_device_id_override(void);
 int gen_device_name_to_pci_device_id(const char *name);
+bool gen_get_device_info(int devid, struct gen_device_info *devinfo);
 const char *gen_get_device_name(int devid);
 
-static inline uint64_t
-gen_device_info_timebase_scale(const struct gen_device_info *devinfo,
-                               uint64_t gpu_timestamp)
-{
-   return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
-}
-
-bool gen_get_device_info_from_fd(int fh, struct gen_device_info *devinfo);
-bool gen_get_device_info_from_pci_id(int pci_id,
-                                     struct gen_device_info *devinfo);
-int gen_get_aperture_size(int fd, uint64_t *size);
+/* Used with SLICE_MASK/SUBSLICE_MASK values from DRM_I915_GETPARAM. */
+void gen_device_info_update_from_masks(struct gen_device_info *devinfo,
+                                       uint32_t slice_mask,
+                                       uint32_t subslice_mask,
+                                       uint32_t n_eus);
+/* Used with DRM_IOCTL_I915_QUERY & DRM_I915_QUERY_TOPOLOGY_INFO. */
+void gen_device_info_update_from_topology(struct gen_device_info *devinfo,
+                                          const struct drm_i915_query_topology_info *topology);
 
 #ifdef __cplusplus
 }
diff --git a/lib/mesa/src/intel/tools/gen_context.h b/lib/mesa/src/intel/tools/gen_context.h
index 39041408b..3f488c07c 100644
--- a/lib/mesa/src/intel/tools/gen_context.h
+++ b/lib/mesa/src/intel/tools/gen_context.h
@@ -21,22 +21,22 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef INTEL_CONTEXT_H
-#define INTEL_CONTEXT_H
+#ifndef GEN_CONTEXT_H
+#define GEN_CONTEXT_H
 
 #include <stdint.h>
 
 #define RING_SIZE         (1 * 4096)
 #define PPHWSP_SIZE         (1 * 4096)
 
-#define GFX11_LR_CONTEXT_RENDER_SIZE    (14 * 4096)
-#define GFX10_LR_CONTEXT_RENDER_SIZE    (19 * 4096)
-#define GFX9_LR_CONTEXT_RENDER_SIZE     (22 * 4096)
-#define GFX8_LR_CONTEXT_RENDER_SIZE     (20 * 4096)
-#define GFX8_LR_CONTEXT_OTHER_SIZE      (2 * 4096)
+#define GEN11_LR_CONTEXT_RENDER_SIZE    (14 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE    (19 * 4096)
+#define GEN9_LR_CONTEXT_RENDER_SIZE     (22 * 4096)
+#define GEN8_LR_CONTEXT_RENDER_SIZE     (20 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE      (2 * 4096)
 
-#define CONTEXT_RENDER_SIZE GFX9_LR_CONTEXT_RENDER_SIZE /* largest size */
-#define CONTEXT_OTHER_SIZE GFX8_LR_CONTEXT_OTHER_SIZE
+#define CONTEXT_RENDER_SIZE GEN9_LR_CONTEXT_RENDER_SIZE /* largest size */
+#define CONTEXT_OTHER_SIZE GEN8_LR_CONTEXT_OTHER_SIZE
 
 #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
 #define MI_LRI_FORCE_POSTED       (1<<12)
@@ -70,10 +70,26 @@
 #define MEMORY_MAP_SIZE (64 /* MiB */ * 1024 * 1024)
 
 #define PTE_SIZE 4
-#define GFX8_PTE_SIZE 8
+#define GEN8_PTE_SIZE 8
 
 #define NUM_PT_ENTRIES (ALIGN(MEMORY_MAP_SIZE, 4096) / 4096)
-#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GFX8_PTE_SIZE, 4096)
+#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GEN8_PTE_SIZE, 4096)
+
+#define STATIC_GGTT_MAP_START 0
+
+#define RENDER_RING_ADDR STATIC_GGTT_MAP_START
+#define RENDER_CONTEXT_ADDR (RENDER_RING_ADDR + RING_SIZE)
+
+#define BLITTER_RING_ADDR (RENDER_CONTEXT_ADDR + PPHWSP_SIZE + GEN9_LR_CONTEXT_RENDER_SIZE)
+#define BLITTER_CONTEXT_ADDR (BLITTER_RING_ADDR + RING_SIZE)
+
+#define VIDEO_RING_ADDR (BLITTER_CONTEXT_ADDR + PPHWSP_SIZE + GEN8_LR_CONTEXT_OTHER_SIZE)
+#define VIDEO_CONTEXT_ADDR (VIDEO_RING_ADDR + RING_SIZE)
+
+#define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + GEN8_LR_CONTEXT_OTHER_SIZE)
+#define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
+
+#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
 
 #define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
                                  * PPGTT Enabled |
@@ -81,24 +97,11 @@
                                  * Valid
                                  */
 
-#define MI_LOAD_REGISTER_IMM_vals(data, flags, ...) do {                \
-      uint32_t __regs[] = { __VA_ARGS__ };                              \
-      assert((ARRAY_SIZE(__regs) % 2) == 0);                            \
-      *(data)++ = MI_LOAD_REGISTER_IMM_n(ARRAY_SIZE(__regs) / 2) | (flags); \
-      for (unsigned __e = 0; __e < ARRAY_SIZE(__regs); __e++)           \
-         *(data)++ = __regs[__e];                                       \
-   } while (0)
-
-
-struct gen_context_parameters {
-   uint64_t pml4_addr;
-   uint64_t ring_addr;
-   uint32_t ring_size;
-};
-
-typedef void (*gen_context_init_t)(const struct gen_context_parameters *, uint32_t *, uint32_t *);
+#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR  | CONTEXT_FLAGS)
+#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR | CONTEXT_FLAGS)
+#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR   | CONTEXT_FLAGS)
 
-#include "gfx8_context.h"
-#include "gfx10_context.h"
+#include "gen8_context.h"
+#include "gen10_context.h"
 
-#endif /* INTEL_CONTEXT_H */
+#endif /* GEN_CONTEXT_H */
author	Jonathan Gray <jsg@cvs.openbsd.org>	2019-01-29 10:58:00 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2019-01-29 10:58:00 +0000
commit	ee4f0b0cddedf0afced2ceb23523bf373cbd4847 (patch)
tree	539e540aca51a4f17ba7adcb821538699091a5b9 /lib/mesa/src
parent	7cb5aacfe509bd505d403a38471c35da6ba95200 (diff)