summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/nouveau/nvc0
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2023-11-02 04:53:47 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2023-11-02 04:53:47 +0000
commitb44518130b33cadb5c1d619e9e936ae0e0dbf7cb (patch)
tree6069eb03c39fbc79808a7d94f857118cce75cbe3 /lib/mesa/src/gallium/drivers/nouveau/nvc0
parent32aeb3c41fedbbd7b11aacfec48e8f699d16bff0 (diff)
Merge Mesa 23.1.9
Diffstat (limited to 'lib/mesa/src/gallium/drivers/nouveau/nvc0')
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme41
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h33
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c8
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h1
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c4
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c308
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h1
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c29
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c53
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c4
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c81
11 files changed, 241 insertions, 322 deletions
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 27a7c324b..5e06eaa65 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -581,6 +581,47 @@ crs_loop:
exit maddr 0x1452 /* CONSERVATIVE_RASTER */
send 0x1
+/* NVC0_3D_MACRO_SET_PRIV_REG
+ *
+ * Requests the GR Falcon to set a MMIO register.
+ *
+ * arg = register
+ * param[0] = value
+ * param[1] = mask
+ */
+.section #mme9097_set_priv_reg
+ maddr 0x1044 /* WAIT_FOR_IDLE */
+ send 0x0
+ parm $r2 /* value */
+ parm $r3 /* mask */
+ maddr 0x1d00 /* SCRATCH[0] */
+ send 0x0
+ send $r2
+ send $r3
+ read $r2 0xd1a /* SCRATCH[26] */
+ maddr 0x18c4 /* FIRMWARE[4] */
+ send $r1
+ mov $r1 (extrinsrt 0x0 $r2 0 8 0)
+ mov $r1 (add $r1 -2)
+ /* Set it to 0 increment for later sends */
+ maddr 0x0040 /* NO_OPERATION */
+ branz annul $r1 #spr_unk_else
+spr_wait_loop:
+ read $r1 0xd00 /* SCRATCH(0) */
+ send 0x0
+ mov $r1 (add $r1 -1)
+ branz annul $r1 #spr_wait_loop
+ bra annul #spr_exit
+spr_unk_else:
+ mov $r1 10
+spr_unk_else_loop:
+ send 0x0
+ mov $r1 (add $r1 -1)
+ branz annul $r1 #spr_unk_else_loop
+spr_exit:
+ exit mov $r1 $r1
+ mov $r1 $r1
+
/* NVC0_3D_MACRO_COMPUTE_COUNTER
*
* This macro takes 6 values, num_groups_* and group_size_*, and adds their
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index 79339b466..6ec1452a6 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -395,6 +395,39 @@ const uint32_t mme9097_conservative_raster_state[] = {
0x00004041,
};
+const uint32_t mme9097_set_priv_reg[] = {
+ 0x04110021,
+ 0x00000041,
+ 0x00000201,
+/* 0x000f: spr_wait_loop */
+ 0x00000301,
+ 0x07400021,
+/* 0x0014: spr_unk_else */
+/* 0x0015: spr_unk_else_loop */
+ 0x00000041,
+/* 0x0018: spr_exit */
+ 0x00001041,
+ 0x00001841,
+ 0x03468215,
+ 0x06310021,
+ 0x00000841,
+ 0x02008112,
+ 0xffff8911,
+ 0x00100021,
+ 0x00018837,
+ 0x03400115,
+ 0x00000041,
+ 0xffffc911,
+ 0xffff4837,
+ 0x00014027,
+ 0x00028111,
+ 0x00000041,
+ 0xffffc911,
+ 0xffff88b7,
+ 0x00000991,
+ 0x00000911,
+};
+
const uint32_t mme9097_compute_counter[] = {
/* 0x0003: iic_loop_start */
0x00000f11,
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index f718d0d8f..9d107a275 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -606,14 +606,6 @@ nvc0_get_sample_locations(unsigned sample_count)
{ 0x3, 0xd }, { 0x7, 0xb }, /* (0,1), (1,1) */
{ 0x9, 0x5 }, { 0xf, 0x1 }, /* (2,0), (3,0) */
{ 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */
-#if 0
- /* NOTE: there are alternative modes for MS2 and MS8, currently not used */
- static const uint8_t ms8_alt[8][2] = {
- { 0x9, 0x5 }, { 0x7, 0xb }, /* (2,0), (1,1) */
- { 0xd, 0x9 }, { 0x5, 0x3 }, /* (3,1), (1,0) */
- { 0x3, 0xd }, { 0x1, 0x7 }, /* (0,1), (0,0) */
- { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */
-#endif
const uint8_t (*ptr)[2];
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 74996fbc8..536841f60 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -60,7 +60,6 @@ struct nvc0_program {
uint32_t tess_mode; /* ~0 if defined by the other stage */
} tp;
struct {
- uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
} cp;
uint8_t num_barriers;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index e4c36a8ee..c727c9d0f 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -421,7 +421,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
* of the following logic more complicated.
*/
if (hq->is64bit)
- nouveau_fence_emit(hq->fence);
+ nouveau_fence_next_if_current(&nvc0->base, hq->fence);
/* We either need to compute a 32- or 64-bit difference between 2 values,
* and then store the result as either a 32- or 64-bit value. As such let's
@@ -643,7 +643,7 @@ nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q)
/* ensure the query's fence has been emitted */
if (hq->is64bit)
- nouveau_fence_emit(hq->fence);
+ nouveau_fence_next_if_current(&nvc0->base, hq->fence);
PUSH_SPACE(push, 5);
PUSH_REF1 (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 000f24aa1..40f3d6368 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -117,7 +117,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
const struct nouveau_screen *screen = nouveau_screen(pscreen);
struct nouveau_device *dev = screen->device;
- static bool debug_cap_printed[PIPE_CAP_LAST] = {};
switch (param) {
/* non-boolean caps */
@@ -216,7 +215,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
@@ -247,12 +245,10 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
case PIPE_CAP_VS_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
- case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_START_INSTANCE:
- case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -275,6 +271,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MEMOBJ:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
@@ -293,28 +290,14 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_CLOCK:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
- case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_QUERY_SO_OVERFLOW:
- case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
case PIPE_CAP_TGSI_DIV:
case PIPE_CAP_IMAGE_ATOMIC_INC_WRAP:
case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
- case PIPE_CAP_FLATSHADE:
- case PIPE_CAP_ALPHA_TEST:
- case PIPE_CAP_POINT_SIZE_FIXED:
- case PIPE_CAP_TWO_SIDED_COLOR:
- case PIPE_CAP_CLIP_PLANES:
case PIPE_CAP_TEXTURE_SHADOW_LOD:
- case PIPE_CAP_PACKED_STREAM_OUTPUT:
case PIPE_CAP_CLEAR_SCISSORED:
- case PIPE_CAP_GL_CLAMP:
case PIPE_CAP_IMAGE_STORE_FORMATTED:
- case PIPE_CAP_TEXRECT:
- case PIPE_CAP_ALLOW_DYNAMIC_VAO_FASTPATH:
- case PIPE_CAP_SHAREABLE_SHADERS:
- case PIPE_CAP_PREFER_BACK_BUFFER_REUSE:
case PIPE_CAP_QUERY_MEMORY_INFO:
return 1;
case PIPE_CAP_TEXTURE_TRANSFER_MODES:
@@ -356,83 +339,17 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_NIR_IMAGES_AS_DEREF:
return 0;
- /* unsupported caps */
- case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART:
- case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
- case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
- case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
- case PIPE_CAP_SHADER_STENCIL_EXPORT:
- case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
- case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
- case PIPE_CAP_FAKE_SW_MSAA:
- case PIPE_CAP_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
- case PIPE_CAP_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_FS_POINT_IS_SYSVAL:
- case PIPE_CAP_GENERATE_MIPMAP:
- case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
- case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_PCI_GROUP:
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
- case PIPE_CAP_SHADER_CAN_READ_OUTPUTS:
- case PIPE_CAP_NATIVE_FENCE_FD:
- case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
- case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
- case PIPE_CAP_MEMOBJ:
- case PIPE_CAP_LOAD_CONSTBUF:
- case PIPE_CAP_TILE_RASTER_ORDER:
- case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
- case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
- case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
- case PIPE_CAP_CONTEXT_PRIORITY_MASK:
- case PIPE_CAP_FENCE_SIGNAL:
- case PIPE_CAP_CONSTBUF0_FLAGS:
- case PIPE_CAP_PACKED_UNIFORMS:
- case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
- case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS:
- case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS:
- case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:
- case PIPE_CAP_SURFACE_SAMPLE_COUNT:
- case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
- case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
- case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
- case PIPE_CAP_NIR_COMPACT_ARRAYS:
- case PIPE_CAP_IMAGE_LOAD_FORMATTED:
- case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
- case PIPE_CAP_ATOMIC_FLOAT_MINMAX:
- case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
- case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
- case PIPE_CAP_FBFETCH_COHERENT:
- case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
+ return 0;
+
case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS: /* could be done */
case PIPE_CAP_INTEGER_MULTIPLY_32X16: /* could be done */
- case PIPE_CAP_FRONTEND_NOOP:
- case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
- case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:
- case PIPE_CAP_PSIZ_CLAMPED:
- case PIPE_CAP_TEXTURE_BUFFER_SAMPLER:
- case PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0:
case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE: /* when we fix MT stuff */
case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL: /* TODO */
case PIPE_CAP_SHADER_ATOMIC_INT64: /* TODO */
- case PIPE_CAP_GLSL_ZERO_INIT:
- case PIPE_CAP_BLEND_EQUATION_ADVANCED:
- case PIPE_CAP_NO_CLIP_ON_COPY_TEX:
- case PIPE_CAP_DEVICE_PROTECTED_SURFACE:
- case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB:
- case PIPE_CAP_DRAW_VERTEX_STATE:
- case PIPE_CAP_PREFER_POT_ALIGNED_VARYINGS:
- case PIPE_CAP_MAX_SPARSE_TEXTURE_SIZE:
- case PIPE_CAP_MAX_SPARSE_3D_TEXTURE_SIZE:
- case PIPE_CAP_MAX_SPARSE_ARRAY_TEXTURE_LAYERS:
- case PIPE_CAP_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS:
- case PIPE_CAP_QUERY_SPARSE_TEXTURE_RESIDENCY:
- case PIPE_CAP_CLAMP_SPARSE_TEXTURE_LOD:
case PIPE_CAP_HARDWARE_GL_SELECT:
return 0;
@@ -454,15 +371,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return nouveau_screen(pscreen)->is_uma;
default:
- if (!debug_cap_printed[param]) {
- debug_printf("%s: unhandled cap %d\n", __func__, param);
- debug_cap_printed[param] = true;
- }
- FALLTHROUGH;
- /* caps where we want the default value */
- case PIPE_CAP_DMABUF:
- case PIPE_CAP_ESSL_FEATURE_LEVEL:
- case PIPE_CAP_THROTTLE:
return u_pipe_screen_get_param_defaults(pscreen, param);
}
}
@@ -538,8 +446,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
return 1;
case PIPE_SHADER_CAP_DROUND_SUPPORTED:
return 1;
- case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED:
- case PIPE_SHADER_CAP_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_FP16:
@@ -726,7 +632,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
nouveau_bo_ref(NULL, &screen->fence.bo);
nouveau_bo_ref(NULL, &screen->poly_cache);
- nouveau_heap_destroy(&screen->lib_code);
+ nouveau_heap_free(&screen->lib_code);
nouveau_heap_destroy(&screen->text_heap);
FREE(screen->tic.entries);
@@ -734,6 +640,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->eng3d);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->copy);
nouveau_object_del(&screen->compute);
nouveau_object_del(&screen->nvsw);
@@ -881,24 +788,43 @@ nvc0_screen_fence_update(struct pipe_screen *pscreen)
static int
nvc0_screen_init_compute(struct nvc0_screen *screen)
{
+ const struct nouveau_mclass computes[] = {
+ { GA102_COMPUTE_CLASS, -1 },
+ { TU102_COMPUTE_CLASS, -1 },
+ { GV100_COMPUTE_CLASS, -1 },
+ { GP104_COMPUTE_CLASS, -1 },
+ { GP100_COMPUTE_CLASS, -1 },
+ { GM200_COMPUTE_CLASS, -1 },
+ { GM107_COMPUTE_CLASS, -1 },
+ { NVF0_COMPUTE_CLASS, -1 },
+ { NVE4_COMPUTE_CLASS, -1 },
+ /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
+ * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
+// { NVC8_COMPUTE_CLASS, -1 },
+ { NVC0_COMPUTE_CLASS, -1 },
+ {}
+ };
+ struct nouveau_object *chan = screen->base.channel;
+ int ret;
+
screen->base.base.get_compute_param = nvc0_screen_get_compute_param;
- switch (screen->base.device->chipset & ~0xf) {
- case 0xc0:
- case 0xd0:
- return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
- case 0xe0:
- case 0xf0:
- case 0x100:
- case 0x110:
- case 0x120:
- case 0x130:
- case 0x140:
- case 0x160:
- return nve4_screen_compute_setup(screen, screen->base.pushbuf);
- default:
- return -1;
+ ret = nouveau_object_mclass(chan, computes);
+ if (ret < 0) {
+ NOUVEAU_ERR("No supported compute class: %d\n", ret);
+ return ret;
}
+
+ ret = nouveau_object_new(chan, 0xbeef00c0, computes[ret].oclass, NULL, 0, &screen->compute);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate compute class: %d\n", ret);
+ return ret;
+ }
+
+ if (screen->compute->oclass < NVE4_COMPUTE_CLASS)
+ return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
+
+ return nve4_screen_compute_setup(screen, screen->base.pushbuf);
}
static int
@@ -957,7 +883,7 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, struct nouveau_pushbuf
nouveau_bo_ref(NULL, &screen->text);
screen->text = bo;
- nouveau_heap_destroy(&screen->lib_code);
+ nouveau_heap_free(&screen->lib_code);
nouveau_heap_destroy(&screen->text_heap);
/* XXX: getting a page fault at the end of the code buffer every few
@@ -1020,7 +946,7 @@ nvc0_screen_get_compiler_options(struct pipe_screen *pscreen,
struct nvc0_screen *screen = nvc0_screen(pscreen);
if (ir == PIPE_SHADER_IR_NIR)
return nv50_ir_nir_shader_compiler_options(screen->base.device->chipset,
- shader);
+ shader, screen->base.prefer_nir);
return NULL;
}
@@ -1036,9 +962,9 @@ nvc0_screen_create(struct nouveau_device *dev)
struct nvc0_screen *screen;
struct pipe_screen *pscreen;
struct nouveau_object *chan;
+
struct nouveau_pushbuf *push;
uint64_t value;
- uint32_t obj_class;
uint32_t flags;
int ret;
unsigned i;
@@ -1054,6 +980,7 @@ nvc0_screen_create(struct nouveau_device *dev)
case 0x130:
case 0x140:
case 0x160:
+ case 0x170:
break;
default:
return NULL;
@@ -1131,33 +1058,48 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->nvsw->handle);
}
- switch (dev->chipset & ~0xf) {
- case 0x160:
- case 0x140:
- case 0x130:
- case 0x120:
- case 0x110:
- case 0x100:
- case 0xf0:
- obj_class = NVF0_P2MF_CLASS;
- break;
- case 0xe0:
- obj_class = NVE4_P2MF_CLASS;
- break;
- default:
- obj_class = NVC0_M2MF_CLASS;
- break;
- }
- ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0,
+ const struct nouveau_mclass m2mfs[] = {
+ { NVF0_P2MF_CLASS, -1 },
+ { NVE4_P2MF_CLASS, -1 },
+ { NVC0_M2MF_CLASS, -1 },
+ {}
+ };
+
+ ret = nouveau_object_mclass(chan, m2mfs);
+ if (ret < 0)
+ FAIL_SCREEN_INIT("No supported m2mf class: %d\n", ret);
+
+ ret = nouveau_object_new(chan, 0xbeef323f, m2mfs[ret].oclass, NULL, 0,
&screen->m2mf);
if (ret)
FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->m2mf->oclass);
- if (screen->m2mf->oclass == NVE4_P2MF_CLASS) {
+
+ if (screen->m2mf->oclass >= NVE4_P2MF_CLASS) {
+ const struct nouveau_mclass copys[] = {
+ { AMPERE_DMA_COPY_B, -1 },
+ { AMPERE_DMA_COPY_A, -1 },
+ { TURING_DMA_COPY_A, -1 },
+ { VOLTA_DMA_COPY_A, -1 },
+ { PASCAL_DMA_COPY_B, -1 },
+ { PASCAL_DMA_COPY_A, -1 },
+ { MAXWELL_DMA_COPY_A, -1 },
+ { KEPLER_DMA_COPY_A, -1 },
+ {}
+ };
+
+ ret = nouveau_object_mclass(chan, copys);
+ if (ret < 0)
+ FAIL_SCREEN_INIT("No supported copy engine class: %d\n", ret);
+
+ ret = nouveau_object_new(chan, 0, copys[ret].oclass, NULL, 0, &screen->copy);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating copy engine class: %d\n", ret);
+
BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1);
- PUSH_DATA (push, NVE4_COPY_CLASS);
+ PUSH_DATA (push, screen->copy->oclass);
}
ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0,
@@ -1186,67 +1128,32 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAh(push, screen->fence.bo->offset + 16);
PUSH_DATA (push, screen->fence.bo->offset + 16);
- switch (dev->chipset & ~0xf) {
- case 0x160:
- obj_class = TU102_3D_CLASS;
- break;
- case 0x140:
- obj_class = GV100_3D_CLASS;
- break;
- case 0x130:
- switch (dev->chipset) {
- case 0x130:
- case 0x13b:
- obj_class = GP100_3D_CLASS;
- break;
- default:
- obj_class = GP102_3D_CLASS;
- break;
- }
- break;
- case 0x120:
- obj_class = GM200_3D_CLASS;
- break;
- case 0x110:
- obj_class = GM107_3D_CLASS;
- break;
- case 0x100:
- case 0xf0:
- obj_class = NVF0_3D_CLASS;
- break;
- case 0xe0:
- switch (dev->chipset) {
- case 0xea:
- obj_class = NVEA_3D_CLASS;
- break;
- default:
- obj_class = NVE4_3D_CLASS;
- break;
- }
- break;
- case 0xd0:
- obj_class = NVC8_3D_CLASS;
- break;
- case 0xc0:
- default:
- switch (dev->chipset) {
- case 0xc8:
- obj_class = NVC8_3D_CLASS;
- break;
- case 0xc1:
- obj_class = NVC1_3D_CLASS;
- break;
- default:
- obj_class = NVC0_3D_CLASS;
- break;
- }
- break;
- }
- ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0,
+ const struct nouveau_mclass threeds[] = {
+ { GA102_3D_CLASS, -1 },
+ { TU102_3D_CLASS, -1 },
+ { GV100_3D_CLASS, -1 },
+ { GP102_3D_CLASS, -1 },
+ { GP100_3D_CLASS, -1 },
+ { GM200_3D_CLASS, -1 },
+ { GM107_3D_CLASS, -1 },
+ { NVF0_3D_CLASS, -1 },
+ { NVEA_3D_CLASS, -1 },
+ { NVE4_3D_CLASS, -1 },
+ { NVC8_3D_CLASS, -1 },
+ { NVC1_3D_CLASS, -1 },
+ { NVC0_3D_CLASS, -1 },
+ {}
+ };
+
+ ret = nouveau_object_mclass(chan, threeds);
+ if (ret < 0)
+ FAIL_SCREEN_INIT("No supported 3d class: %d\n", ret);
+
+ ret = nouveau_object_new(chan, 0xbeef003d, threeds[ret].oclass, NULL, 0,
&screen->eng3d);
if (ret)
FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
- screen->base.class_3d = obj_class;
+ screen->base.class_3d = screen->eng3d->oclass;
BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->eng3d->oclass);
@@ -1289,7 +1196,7 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
if (screen->eng3d->oclass < NVE4_3D_CLASS) {
IMMED_NVC0(push, NVC0_3D(TEX_MISC), 0);
- } else {
+ } else if (screen->eng3d->oclass < GA102_3D_CLASS) {
BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1);
PUSH_DATA (push, 15);
}
@@ -1457,6 +1364,7 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
+ MK_MACRO(NVC0_3D_MACRO_SET_PRIV_REG, mme9097_set_priv_reg);
MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
@@ -1478,6 +1386,7 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count);
MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write);
MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state);
+ MK_MACRO(NVC0_3D_MACRO_SET_PRIV_REG, mmec597_set_priv_reg);
MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter);
MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query);
}
@@ -1554,6 +1463,15 @@ nvc0_screen_create(struct nouveau_device *dev)
BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
PUSH_DATA (push, 0);
+ /* requires Nvidia provided firmware */
+ if (screen->eng3d->oclass >= GM200_3D_CLASS) {
+ unsigned reg = screen->eng3d->oclass >= GV100_3D_CLASS ? 0x419ba4 : 0x419f78;
+ BEGIN_1IC0(push, NVC0_3D(MACRO_SET_PRIV_REG), 3);
+ PUSH_DATA (push, reg);
+ PUSH_DATA (push, 0x00000000);
+ PUSH_DATA (push, 0x00000008);
+ }
+
PUSH_KICK (push);
screen->tic.entries = CALLOC(
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 8bce90ae3..07e3cde62 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -126,6 +126,7 @@ struct nvc0_screen {
struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
+ struct nouveau_object *copy;
struct nouveau_object *compute;
struct nouveau_object *nvsw;
};
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 2f4a9c117..7141a99ba 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -741,8 +741,7 @@ nvc0_cp_state_create(struct pipe_context *pipe,
prog->type = PIPE_SHADER_COMPUTE;
prog->pipe.type = cso->ir_type;
- prog->cp.smem_size = cso->req_local_mem;
- prog->cp.lmem_size = cso->req_private_mem;
+ prog->cp.smem_size = cso->static_shared_mem;
prog->parm_size = cso->req_input_mem;
switch(cso->ir_type) {
@@ -785,6 +784,31 @@ nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso)
}
static void
+nvc0_get_compute_state_info(struct pipe_context *pipe, void *hwcso,
+ struct pipe_compute_state_object_info *info)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_program *prog = (struct nvc0_program *)hwcso;
+ uint16_t obj_class = nvc0->screen->compute->oclass;
+ uint32_t chipset = nvc0->screen->base.device->chipset;
+ uint32_t smregs;
+
+ // fermi and a handful of tegra devices have less gprs per SM
+ if (obj_class < NVE4_COMPUTE_CLASS || chipset == 0xea || chipset == 0x12b || chipset == 0x13b)
+ smregs = 32768;
+ else
+ smregs = 65536;
+
+ // TODO: not 100% sure about 8 for volta, but earlier reverse engineering indicates it
+ uint32_t gpr_alloc_size = obj_class >= GV100_COMPUTE_CLASS ? 8 : 4;
+ uint32_t threads = smregs / align(prog->num_gprs, gpr_alloc_size);
+
+ info->max_threads = MIN2(ROUND_DOWN_TO(threads, 32), 1024);
+ info->private_memory = prog->hdr[1] & 0xfffff0;
+ info->preferred_simd_size = 32;
+}
+
+static void
nvc0_set_constant_buffer(struct pipe_context *pipe,
enum pipe_shader_type shader, uint index,
bool take_ownership,
@@ -1495,6 +1519,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->create_compute_state = nvc0_cp_state_create;
pipe->bind_compute_state = nvc0_cp_state_bind;
+ pipe->get_compute_state_info = nvc0_get_compute_state_info;
pipe->delete_compute_state = nvc0_sp_state_delete;
pipe->set_blend_color = nvc0_set_blend_color;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 6726b9458..3dd7d0b6c 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -5,57 +5,6 @@
#include "nvc0/nvc0_context.h"
-#if 0
-static void
-nvc0_validate_zcull(struct nvc0_context *nvc0)
-{
- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
- struct nv50_surface *sf = nv50_surface(fb->zsbuf);
- struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
- struct nouveau_bo *bo = mt->base.bo;
- uint32_t size;
- uint32_t offset = align(mt->total_size, 1 << 17);
- unsigned width, height;
-
- assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
-
- size = mt->total_size * 2;
-
- height = align(fb->height, 32);
- width = fb->width % 224;
- if (width)
- width = fb->width + (224 - width);
- else
- width = fb->width;
-
- BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, bo->offset + offset);
- PUSH_DATA (push, bo->offset + offset);
- offset += 1 << 17;
- BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2);
- PUSH_DATAh(push, bo->offset + offset);
- PUSH_DATA (push, bo->offset + offset);
- BEGIN_NVC0(push, SUBC_3D(0x07e0), 2);
- PUSH_DATA (push, size);
- PUSH_DATA (push, size >> 16);
- BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */
- PUSH_DATA (push, 2);
- BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4);
- PUSH_DATA (push, width);
- PUSH_DATA (push, height);
- PUSH_DATA (push, 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2);
- PUSH_DATA (push, 0);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);
- PUSH_DATA (push, 0);
-}
-#endif
-
static inline void
nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
{
@@ -811,7 +760,7 @@ nvc0_validate_fbread(struct nvc0_context *nvc0)
nvc0->fragprog->fp.reads_framebuffer &&
nvc0->framebuffer.nr_cbufs &&
nvc0->framebuffer.cbufs[0]) {
- struct pipe_sampler_view tmpl;
+ struct pipe_sampler_view tmpl = {0};
struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
tmpl.target = PIPE_TEXTURE_2D_ARRAY;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 2e7de2752..ea7d737f2 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -31,7 +31,7 @@
#include "tgsi/tgsi_ureg.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_resource.h"
@@ -969,7 +969,7 @@ nvc0_blit_set_src(struct nvc0_blitctx *ctx,
{
struct nvc0_context *nvc0 = ctx->nvc0;
struct pipe_context *pipe = &nvc0->base.pipe;
- struct pipe_sampler_view templ;
+ struct pipe_sampler_view templ = {0};
uint32_t flags;
unsigned s;
enum pipe_texture_target target;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 23d157fd4..9af2601b3 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -44,49 +44,10 @@ int
nve4_screen_compute_setup(struct nvc0_screen *screen,
struct nouveau_pushbuf *push)
{
- struct nouveau_device *dev = screen->base.device;
- struct nouveau_object *chan = screen->base.channel;
int i;
- int ret;
- uint32_t obj_class;
+ uint32_t obj_class = screen->compute->oclass;
uint64_t address;
- switch (dev->chipset & ~0xf) {
- case 0x160:
- obj_class = TU102_COMPUTE_CLASS;
- break;
- case 0x140:
- obj_class = GV100_COMPUTE_CLASS;
- break;
- case 0x100:
- case 0xf0:
- obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
- break;
- case 0xe0:
- obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
- break;
- case 0x110:
- obj_class = GM107_COMPUTE_CLASS;
- break;
- case 0x120:
- obj_class = GM200_COMPUTE_CLASS;
- break;
- case 0x130:
- obj_class = (dev->chipset == 0x130 || dev->chipset == 0x13b) ?
- GP100_COMPUTE_CLASS : GP104_COMPUTE_CLASS;
- break;
- default:
- NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
- return -1;
- }
-
- ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,
- &screen->compute);
- if (ret) {
- NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
- return ret;
- }
-
BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->oclass);
@@ -627,6 +588,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
{
const struct nvc0_screen *screen = nvc0->screen;
const struct nvc0_program *cp = nvc0->compprog;
+ uint32_t shared_size = cp->cp.smem_size + info->variable_shared_mem;
NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE);
NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE);
@@ -647,19 +609,16 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
- NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE,
- align(cp->cp.smem_size, 0x100));
- NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
- (cp->hdr[1] & 0xfffff0) +
- align(cp->cp.lmem_size, 0x10));
+ NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE, align(shared_size, 0x100));
+ NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, cp->hdr[1] & 0xfffff0);
NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
- if (cp->cp.smem_size > (32 << 10))
+ if (shared_size > (32 << 10))
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
else
- if (cp->cp.smem_size > (16 << 10))
+ if (shared_size > (16 << 10))
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
else
@@ -692,6 +651,7 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
{
const struct nvc0_screen *screen = nvc0->screen;
const struct nvc0_program *cp = nvc0->compprog;
+ uint32_t shared_size = cp->cp.smem_size + info->variable_shared_mem;
NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
@@ -707,11 +667,8 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
- NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE,
- align(cp->cp.smem_size, 0x100));
- NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
- (cp->hdr[1] & 0xfffff0) +
- align(cp->cp.lmem_size, 0x10));
+ NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE, align(shared_size, 0x100));
+ NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, cp->hdr[1] & 0xfffff0);
NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
@@ -753,15 +710,13 @@ gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,
struct nvc0_program *cp = nvc0->compprog;
struct nvc0_screen *screen = nvc0->screen;
uint64_t entry = screen->text->offset + cp->code_base;
+ uint32_t shared_size = cp->cp.smem_size + info->variable_shared_mem;
NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY);
- NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
- align(cp->cp.smem_size, 0x100));
- NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
- (cp->hdr[1] & 0xfffff0) +
- align(cp->cp.lmem_size, 0x10));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE, align(shared_size, 0x100));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, cp->hdr[1] & 0xfffff0);
NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
gv100_sm_config_smem_size(8 * 1024));
@@ -770,7 +725,7 @@ gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,
NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
- gv100_sm_config_smem_size(cp->cp.smem_size));
+ gv100_sm_config_smem_size(shared_size));
NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
@@ -928,8 +883,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
PUSH_REF1(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);
PUSH_DATA (push, desc_gpuaddr >> 8);
- BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
- PUSH_DATA (push, 0x3);
+ if (screen->compute->oclass < GA102_COMPUTE_CLASS) {
+ BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
+ PUSH_DATA (push, 0x3);
+ } else {
+ BEGIN_NIC0(push, SUBC_CP(0x02c0), 2);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 2);
+ }
BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);