diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-11-02 04:53:47 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-11-02 04:53:47 +0000 |
commit | b44518130b33cadb5c1d619e9e936ae0e0dbf7cb (patch) | |
tree | 6069eb03c39fbc79808a7d94f857118cce75cbe3 /lib/mesa/src/gallium/drivers/nouveau/nvc0 | |
parent | 32aeb3c41fedbbd7b11aacfec48e8f699d16bff0 (diff) |
Merge Mesa 23.1.9
Diffstat (limited to 'lib/mesa/src/gallium/drivers/nouveau/nvc0')
11 files changed, 241 insertions, 322 deletions
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme index 27a7c324b..5e06eaa65 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme @@ -581,6 +581,47 @@ crs_loop: exit maddr 0x1452 /* CONSERVATIVE_RASTER */ send 0x1 +/* NVC0_3D_MACRO_SET_PRIV_REG + * + * Requests the GR Falcon to set a MMIO register. + * + * arg = register + * param[0] = value + * param[1] = mask + */ +.section #mme9097_set_priv_reg + maddr 0x1044 /* WAIT_FOR_IDLE */ + send 0x0 + parm $r2 /* value */ + parm $r3 /* mask */ + maddr 0x1d00 /* SCRATCH[0] */ + send 0x0 + send $r2 + send $r3 + read $r2 0xd1a /* SCRATCH[26] */ + maddr 0x18c4 /* FIRMWARE[4] */ + send $r1 + mov $r1 (extrinsrt 0x0 $r2 0 8 0) + mov $r1 (add $r1 -2) + /* Set it to 0 increment for later sends */ + maddr 0x0040 /* NO_OPERATION */ + branz annul $r1 #spr_unk_else +spr_wait_loop: + read $r1 0xd00 /* SCRATCH(0) */ + send 0x0 + mov $r1 (add $r1 -1) + branz annul $r1 #spr_wait_loop + bra annul #spr_exit +spr_unk_else: + mov $r1 10 +spr_unk_else_loop: + send 0x0 + mov $r1 (add $r1 -1) + branz annul $r1 #spr_unk_else_loop +spr_exit: + exit mov $r1 $r1 + mov $r1 $r1 + /* NVC0_3D_MACRO_COMPUTE_COUNTER * * This macro takes 6 values, num_groups_* and group_size_*, and adds their diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h index 79339b466..6ec1452a6 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h @@ -395,6 +395,39 @@ const uint32_t mme9097_conservative_raster_state[] = { 0x00004041, }; +const uint32_t mme9097_set_priv_reg[] = { + 0x04110021, + 0x00000041, + 0x00000201, +/* 0x000f: spr_wait_loop */ + 0x00000301, + 0x07400021, +/* 0x0014: spr_unk_else */ +/* 0x0015: spr_unk_else_loop */ + 0x00000041, +/* 0x0018: spr_exit */ + 0x00001041, + 0x00001841, + 0x03468215, + 0x06310021, + 0x00000841, + 0x02008112, + 0xffff8911, + 0x00100021, + 0x00018837, + 0x03400115, + 0x00000041, + 0xffffc911, + 0xffff4837, + 0x00014027, + 0x00028111, + 0x00000041, + 0xffffc911, + 0xffff88b7, + 0x00000991, + 0x00000911, +}; + const uint32_t mme9097_compute_counter[] = { /* 0x0003: iic_loop_start */ 0x00000f11, diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index f718d0d8f..9d107a275 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -606,14 +606,6 @@ nvc0_get_sample_locations(unsigned sample_count) { 0x3, 0xd }, { 0x7, 0xb }, /* (0,1), (1,1) */ { 0x9, 0x5 }, { 0xf, 0x1 }, /* (2,0), (3,0) */ { 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */ -#if 0 - /* NOTE: there are alternative modes for MS2 and MS8, currently not used */ - static const uint8_t ms8_alt[8][2] = { - { 0x9, 0x5 }, { 0x7, 0xb }, /* (2,0), (1,1) */ - { 0xd, 0x9 }, { 0x5, 0x3 }, /* (3,1), (1,0) */ - { 0x3, 0xd }, { 0x1, 0x7 }, /* (0,1), (0,0) */ - { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */ -#endif const uint8_t (*ptr)[2]; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index 74996fbc8..536841f60 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -60,7 +60,6 @@ struct nvc0_program { uint32_t tess_mode; /* ~0 if defined by the other stage */ } tp; struct { - uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */ uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */ } cp; uint8_t num_barriers; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index e4c36a8ee..c727c9d0f 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -421,7 +421,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, * of the following logic more complicated. */ if (hq->is64bit) - nouveau_fence_emit(hq->fence); + nouveau_fence_next_if_current(&nvc0->base, hq->fence); /* We either need to compute a 32- or 64-bit difference between 2 values, * and then store the result as either a 32- or 64-bit value. As such let's @@ -643,7 +643,7 @@ nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q) /* ensure the query's fence has been emitted */ if (hq->is64bit) - nouveau_fence_emit(hq->fence); + nouveau_fence_next_if_current(&nvc0->base, hq->fence); PUSH_SPACE(push, 5); PUSH_REF1 (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 000f24aa1..40f3d6368 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -117,7 +117,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; const struct nouveau_screen *screen = nouveau_screen(pscreen); struct nouveau_device *dev = screen->device; - static bool debug_cap_printed[PIPE_CAP_LAST] = {}; switch (param) { /* non-boolean caps */ @@ -216,7 +215,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: @@ -247,12 +245,10 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: case PIPE_CAP_VS_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: - case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_START_INSTANCE: - case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_QUERY_LOD: @@ -275,6 +271,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_SHADER_PACK_HALF_FLOAT: case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MEMOBJ: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_QUERY_BUFFER_OBJECT: @@ -293,28 +290,14 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_CLOCK: case PIPE_CAP_COMPUTE: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: - case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_QUERY_SO_OVERFLOW: - case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: case PIPE_CAP_TGSI_DIV: case PIPE_CAP_IMAGE_ATOMIC_INC_WRAP: case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF: - case PIPE_CAP_FLATSHADE: - case PIPE_CAP_ALPHA_TEST: - case PIPE_CAP_POINT_SIZE_FIXED: - case PIPE_CAP_TWO_SIDED_COLOR: - case PIPE_CAP_CLIP_PLANES: case PIPE_CAP_TEXTURE_SHADOW_LOD: - case PIPE_CAP_PACKED_STREAM_OUTPUT: case PIPE_CAP_CLEAR_SCISSORED: - case PIPE_CAP_GL_CLAMP: case PIPE_CAP_IMAGE_STORE_FORMATTED: - case PIPE_CAP_TEXRECT: - case PIPE_CAP_ALLOW_DYNAMIC_VAO_FASTPATH: - case PIPE_CAP_SHAREABLE_SHADERS: - case PIPE_CAP_PREFER_BACK_BUFFER_REUSE: case PIPE_CAP_QUERY_MEMORY_INFO: return 1; case PIPE_CAP_TEXTURE_TRANSFER_MODES: @@ -356,83 +339,17 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_NIR_IMAGES_AS_DEREF: return 0; - /* unsupported caps */ - case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART: - case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: - case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER: - case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY: - case PIPE_CAP_FAKE_SW_MSAA: - case PIPE_CAP_VS_WINDOW_SPACE_POSITION: - case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: - case PIPE_CAP_FS_POSITION_IS_SYSVAL: - case PIPE_CAP_FS_POINT_IS_SYSVAL: - case PIPE_CAP_GENERATE_MIPMAP: - case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: - case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_PCI_GROUP: case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: - case PIPE_CAP_SHADER_CAN_READ_OUTPUTS: - case PIPE_CAP_NATIVE_FENCE_FD: - case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: - case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: - case PIPE_CAP_MEMOBJ: - case PIPE_CAP_LOAD_CONSTBUF: - case PIPE_CAP_TILE_RASTER_ORDER: - case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: - case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS: - case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: - case PIPE_CAP_CONTEXT_PRIORITY_MASK: - case PIPE_CAP_FENCE_SIGNAL: - case PIPE_CAP_CONSTBUF0_FLAGS: - case PIPE_CAP_PACKED_UNIFORMS: - case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: - case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS: - case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS: - case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: - case PIPE_CAP_SURFACE_SAMPLE_COUNT: - case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: - case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: - case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: - case PIPE_CAP_NIR_COMPACT_ARRAYS: - case PIPE_CAP_IMAGE_LOAD_FORMATTED: - case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES: - case PIPE_CAP_ATOMIC_FLOAT_MINMAX: - case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE: - case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK: - case PIPE_CAP_FBFETCH_COHERENT: - case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE: + return 0; + case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS: /* could be done */ case PIPE_CAP_INTEGER_MULTIPLY_32X16: /* could be done */ - case PIPE_CAP_FRONTEND_NOOP: - case PIPE_CAP_SHADER_SAMPLES_IDENTICAL: - case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED: - case PIPE_CAP_PSIZ_CLAMPED: - case PIPE_CAP_TEXTURE_BUFFER_SAMPLER: - case PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0: case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE: /* when we fix MT stuff */ case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL: /* TODO */ case PIPE_CAP_SHADER_ATOMIC_INT64: /* TODO */ - case PIPE_CAP_GLSL_ZERO_INIT: - case PIPE_CAP_BLEND_EQUATION_ADVANCED: - case PIPE_CAP_NO_CLIP_ON_COPY_TEX: - case PIPE_CAP_DEVICE_PROTECTED_SURFACE: - case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB: - case PIPE_CAP_DRAW_VERTEX_STATE: - case PIPE_CAP_PREFER_POT_ALIGNED_VARYINGS: - case PIPE_CAP_MAX_SPARSE_TEXTURE_SIZE: - case PIPE_CAP_MAX_SPARSE_3D_TEXTURE_SIZE: - case PIPE_CAP_MAX_SPARSE_ARRAY_TEXTURE_LAYERS: - case PIPE_CAP_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS: - case PIPE_CAP_QUERY_SPARSE_TEXTURE_RESIDENCY: - case PIPE_CAP_CLAMP_SPARSE_TEXTURE_LOD: case PIPE_CAP_HARDWARE_GL_SELECT: return 0; @@ -454,15 +371,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return nouveau_screen(pscreen)->is_uma; default: - if (!debug_cap_printed[param]) { - debug_printf("%s: unhandled cap %d\n", __func__, param); - debug_cap_printed[param] = true; - } - FALLTHROUGH; - /* caps where we want the default value */ - case PIPE_CAP_DMABUF: - case PIPE_CAP_ESSL_FEATURE_LEVEL: - case PIPE_CAP_THROTTLE: return u_pipe_screen_get_param_defaults(pscreen, param); } } @@ -538,8 +446,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, return 1; case PIPE_SHADER_CAP_DROUND_SUPPORTED: return 1; - case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED: - case PIPE_SHADER_CAP_LDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_INT64_ATOMICS: case PIPE_SHADER_CAP_FP16: @@ -726,7 +632,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) nouveau_bo_ref(NULL, &screen->fence.bo); nouveau_bo_ref(NULL, &screen->poly_cache); - nouveau_heap_destroy(&screen->lib_code); + nouveau_heap_free(&screen->lib_code); nouveau_heap_destroy(&screen->text_heap); FREE(screen->tic.entries); @@ -734,6 +640,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->eng3d); nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->copy); nouveau_object_del(&screen->compute); nouveau_object_del(&screen->nvsw); @@ -881,24 +788,43 @@ nvc0_screen_fence_update(struct pipe_screen *pscreen) static int nvc0_screen_init_compute(struct nvc0_screen *screen) { + const struct nouveau_mclass computes[] = { + { GA102_COMPUTE_CLASS, -1 }, + { TU102_COMPUTE_CLASS, -1 }, + { GV100_COMPUTE_CLASS, -1 }, + { GP104_COMPUTE_CLASS, -1 }, + { GP100_COMPUTE_CLASS, -1 }, + { GM200_COMPUTE_CLASS, -1 }, + { GM107_COMPUTE_CLASS, -1 }, + { NVF0_COMPUTE_CLASS, -1 }, + { NVE4_COMPUTE_CLASS, -1 }, + /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, + * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ +// { NVC8_COMPUTE_CLASS, -1 }, + { NVC0_COMPUTE_CLASS, -1 }, + {} + }; + struct nouveau_object *chan = screen->base.channel; + int ret; + screen->base.base.get_compute_param = nvc0_screen_get_compute_param; - switch (screen->base.device->chipset & ~0xf) { - case 0xc0: - case 0xd0: - return nvc0_screen_compute_setup(screen, screen->base.pushbuf); - case 0xe0: - case 0xf0: - case 0x100: - case 0x110: - case 0x120: - case 0x130: - case 0x140: - case 0x160: - return nve4_screen_compute_setup(screen, screen->base.pushbuf); - default: - return -1; + ret = nouveau_object_mclass(chan, computes); + if (ret < 0) { + NOUVEAU_ERR("No supported compute class: %d\n", ret); + return ret; } + + ret = nouveau_object_new(chan, 0xbeef00c0, computes[ret].oclass, NULL, 0, &screen->compute); + if (ret) { + NOUVEAU_ERR("Failed to allocate compute class: %d\n", ret); + return ret; + } + + if (screen->compute->oclass < NVE4_COMPUTE_CLASS) + return nvc0_screen_compute_setup(screen, screen->base.pushbuf); + + return nve4_screen_compute_setup(screen, screen->base.pushbuf); } static int @@ -957,7 +883,7 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, struct nouveau_pushbuf nouveau_bo_ref(NULL, &screen->text); screen->text = bo; - nouveau_heap_destroy(&screen->lib_code); + nouveau_heap_free(&screen->lib_code); nouveau_heap_destroy(&screen->text_heap); /* XXX: getting a page fault at the end of the code buffer every few @@ -1020,7 +946,7 @@ nvc0_screen_get_compiler_options(struct pipe_screen *pscreen, struct nvc0_screen *screen = nvc0_screen(pscreen); if (ir == PIPE_SHADER_IR_NIR) return nv50_ir_nir_shader_compiler_options(screen->base.device->chipset, - shader); + shader, screen->base.prefer_nir); return NULL; } @@ -1036,9 +962,9 @@ nvc0_screen_create(struct nouveau_device *dev) struct nvc0_screen *screen; struct pipe_screen *pscreen; struct nouveau_object *chan; + struct nouveau_pushbuf *push; uint64_t value; - uint32_t obj_class; uint32_t flags; int ret; unsigned i; @@ -1054,6 +980,7 @@ nvc0_screen_create(struct nouveau_device *dev) case 0x130: case 0x140: case 0x160: + case 0x170: break; default: return NULL; @@ -1131,33 +1058,48 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->nvsw->handle); } - switch (dev->chipset & ~0xf) { - case 0x160: - case 0x140: - case 0x130: - case 0x120: - case 0x110: - case 0x100: - case 0xf0: - obj_class = NVF0_P2MF_CLASS; - break; - case 0xe0: - obj_class = NVE4_P2MF_CLASS; - break; - default: - obj_class = NVC0_M2MF_CLASS; - break; - } - ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0, + const struct nouveau_mclass m2mfs[] = { + { NVF0_P2MF_CLASS, -1 }, + { NVE4_P2MF_CLASS, -1 }, + { NVC0_M2MF_CLASS, -1 }, + {} + }; + + ret = nouveau_object_mclass(chan, m2mfs); + if (ret < 0) + FAIL_SCREEN_INIT("No supported m2mf class: %d\n", ret); + + ret = nouveau_object_new(chan, 0xbeef323f, m2mfs[ret].oclass, NULL, 0, &screen->m2mf); if (ret) FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->m2mf->oclass); - if (screen->m2mf->oclass == NVE4_P2MF_CLASS) { + + if (screen->m2mf->oclass >= NVE4_P2MF_CLASS) { + const struct nouveau_mclass copys[] = { + { AMPERE_DMA_COPY_B, -1 }, + { AMPERE_DMA_COPY_A, -1 }, + { TURING_DMA_COPY_A, -1 }, + { VOLTA_DMA_COPY_A, -1 }, + { PASCAL_DMA_COPY_B, -1 }, + { PASCAL_DMA_COPY_A, -1 }, + { MAXWELL_DMA_COPY_A, -1 }, + { KEPLER_DMA_COPY_A, -1 }, + {} + }; + + ret = nouveau_object_mclass(chan, copys); + if (ret < 0) + FAIL_SCREEN_INIT("No supported copy engine class: %d\n", ret); + + ret = nouveau_object_new(chan, 0, copys[ret].oclass, NULL, 0, &screen->copy); + if (ret) + FAIL_SCREEN_INIT("Error allocating copy engine class: %d\n", ret); + BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1); - PUSH_DATA (push, NVE4_COPY_CLASS); + PUSH_DATA (push, screen->copy->oclass); } ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0, @@ -1186,67 +1128,32 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAh(push, screen->fence.bo->offset + 16); PUSH_DATA (push, screen->fence.bo->offset + 16); - switch (dev->chipset & ~0xf) { - case 0x160: - obj_class = TU102_3D_CLASS; - break; - case 0x140: - obj_class = GV100_3D_CLASS; - break; - case 0x130: - switch (dev->chipset) { - case 0x130: - case 0x13b: - obj_class = GP100_3D_CLASS; - break; - default: - obj_class = GP102_3D_CLASS; - break; - } - break; - case 0x120: - obj_class = GM200_3D_CLASS; - break; - case 0x110: - obj_class = GM107_3D_CLASS; - break; - case 0x100: - case 0xf0: - obj_class = NVF0_3D_CLASS; - break; - case 0xe0: - switch (dev->chipset) { - case 0xea: - obj_class = NVEA_3D_CLASS; - break; - default: - obj_class = NVE4_3D_CLASS; - break; - } - break; - case 0xd0: - obj_class = NVC8_3D_CLASS; - break; - case 0xc0: - default: - switch (dev->chipset) { - case 0xc8: - obj_class = NVC8_3D_CLASS; - break; - case 0xc1: - obj_class = NVC1_3D_CLASS; - break; - default: - obj_class = NVC0_3D_CLASS; - break; - } - break; - } - ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0, + const struct nouveau_mclass threeds[] = { + { GA102_3D_CLASS, -1 }, + { TU102_3D_CLASS, -1 }, + { GV100_3D_CLASS, -1 }, + { GP102_3D_CLASS, -1 }, + { GP100_3D_CLASS, -1 }, + { GM200_3D_CLASS, -1 }, + { GM107_3D_CLASS, -1 }, + { NVF0_3D_CLASS, -1 }, + { NVEA_3D_CLASS, -1 }, + { NVE4_3D_CLASS, -1 }, + { NVC8_3D_CLASS, -1 }, + { NVC1_3D_CLASS, -1 }, + { NVC0_3D_CLASS, -1 }, + {} + }; + + ret = nouveau_object_mclass(chan, threeds); + if (ret < 0) + FAIL_SCREEN_INIT("No supported 3d class: %d\n", ret); + + ret = nouveau_object_new(chan, 0xbeef003d, threeds[ret].oclass, NULL, 0, &screen->eng3d); if (ret) FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); - screen->base.class_3d = obj_class; + screen->base.class_3d = screen->eng3d->oclass; BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->eng3d->oclass); @@ -1289,7 +1196,7 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH); if (screen->eng3d->oclass < NVE4_3D_CLASS) { IMMED_NVC0(push, NVC0_3D(TEX_MISC), 0); - } else { + } else if (screen->eng3d->oclass < GA102_3D_CLASS) { BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1); PUSH_DATA (push, 15); } @@ -1457,6 +1364,7 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); + MK_MACRO(NVC0_3D_MACRO_SET_PRIV_REG, mme9097_set_priv_reg); MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); @@ -1478,6 +1386,7 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count); MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write); MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state); + MK_MACRO(NVC0_3D_MACRO_SET_PRIV_REG, mmec597_set_priv_reg); MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter); MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query); } @@ -1554,6 +1463,15 @@ nvc0_screen_create(struct nouveau_device *dev) BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); PUSH_DATA (push, 0); + /* requires Nvidia provided firmware */ + if (screen->eng3d->oclass >= GM200_3D_CLASS) { + unsigned reg = screen->eng3d->oclass >= GV100_3D_CLASS ? 0x419ba4 : 0x419f78; + BEGIN_1IC0(push, NVC0_3D(MACRO_SET_PRIV_REG), 3); + PUSH_DATA (push, reg); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, 0x00000008); + } + PUSH_KICK (push); screen->tic.entries = CALLOC( diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 8bce90ae3..07e3cde62 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -126,6 +126,7 @@ struct nvc0_screen { struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */ struct nouveau_object *eng2d; struct nouveau_object *m2mf; + struct nouveau_object *copy; struct nouveau_object *compute; struct nouveau_object *nvsw; }; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 2f4a9c117..7141a99ba 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -741,8 +741,7 @@ nvc0_cp_state_create(struct pipe_context *pipe, prog->type = PIPE_SHADER_COMPUTE; prog->pipe.type = cso->ir_type; - prog->cp.smem_size = cso->req_local_mem; - prog->cp.lmem_size = cso->req_private_mem; + prog->cp.smem_size = cso->static_shared_mem; prog->parm_size = cso->req_input_mem; switch(cso->ir_type) { @@ -785,6 +784,31 @@ nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso) } static void +nvc0_get_compute_state_info(struct pipe_context *pipe, void *hwcso, + struct pipe_compute_state_object_info *info) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_program *prog = (struct nvc0_program *)hwcso; + uint16_t obj_class = nvc0->screen->compute->oclass; + uint32_t chipset = nvc0->screen->base.device->chipset; + uint32_t smregs; + + // fermi and a handful of tegra devices have less gprs per SM + if (obj_class < NVE4_COMPUTE_CLASS || chipset == 0xea || chipset == 0x12b || chipset == 0x13b) + smregs = 32768; + else + smregs = 65536; + + // TODO: not 100% sure about 8 for volta, but earlier reverse engineering indicates it + uint32_t gpr_alloc_size = obj_class >= GV100_COMPUTE_CLASS ? 8 : 4; + uint32_t threads = smregs / align(prog->num_gprs, gpr_alloc_size); + + info->max_threads = MIN2(ROUND_DOWN_TO(threads, 32), 1024); + info->private_memory = prog->hdr[1] & 0xfffff0; + info->preferred_simd_size = 32; +} + +static void nvc0_set_constant_buffer(struct pipe_context *pipe, enum pipe_shader_type shader, uint index, bool take_ownership, @@ -1495,6 +1519,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) pipe->create_compute_state = nvc0_cp_state_create; pipe->bind_compute_state = nvc0_cp_state_bind; + pipe->get_compute_state_info = nvc0_get_compute_state_info; pipe->delete_compute_state = nvc0_sp_state_delete; pipe->set_blend_color = nvc0_set_blend_color; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 6726b9458..3dd7d0b6c 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -5,57 +5,6 @@ #include "nvc0/nvc0_context.h" -#if 0 -static void -nvc0_validate_zcull(struct nvc0_context *nvc0) -{ - struct nouveau_pushbuf *push = nvc0->base.pushbuf; - struct pipe_framebuffer_state *fb = &nvc0->framebuffer; - struct nv50_surface *sf = nv50_surface(fb->zsbuf); - struct nv50_miptree *mt = nv50_miptree(sf->base.texture); - struct nouveau_bo *bo = mt->base.bo; - uint32_t size; - uint32_t offset = align(mt->total_size, 1 << 17); - unsigned width, height; - - assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2); - - size = mt->total_size * 2; - - height = align(fb->height, 32); - width = fb->width % 224; - if (width) - width = fb->width + (224 - width); - else - width = fb->width; - - BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2); - PUSH_DATAh(push, bo->offset + offset); - PUSH_DATA (push, bo->offset + offset); - offset += 1 << 17; - BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2); - PUSH_DATAh(push, bo->offset + offset); - PUSH_DATA (push, bo->offset + offset); - BEGIN_NVC0(push, SUBC_3D(0x07e0), 2); - PUSH_DATA (push, size); - PUSH_DATA (push, size >> 16); - BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */ - PUSH_DATA (push, 2); - BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4); - PUSH_DATA (push, width); - PUSH_DATA (push, height); - PUSH_DATA (push, 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2); - PUSH_DATA (push, 0); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1); - PUSH_DATA (push, 0); -} -#endif - static inline void nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers) { @@ -811,7 +760,7 @@ nvc0_validate_fbread(struct nvc0_context *nvc0) nvc0->fragprog->fp.reads_framebuffer && nvc0->framebuffer.nr_cbufs && nvc0->framebuffer.cbufs[0]) { - struct pipe_sampler_view tmpl; + struct pipe_sampler_view tmpl = {0}; struct pipe_surface *sf = nvc0->framebuffer.cbufs[0]; tmpl.target = PIPE_TEXTURE_2D_ARRAY; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 2e7de2752..ea7d737f2 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -31,7 +31,7 @@ #include "tgsi/tgsi_ureg.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_resource.h" @@ -969,7 +969,7 @@ nvc0_blit_set_src(struct nvc0_blitctx *ctx, { struct nvc0_context *nvc0 = ctx->nvc0; struct pipe_context *pipe = &nvc0->base.pipe; - struct pipe_sampler_view templ; + struct pipe_sampler_view templ = {0}; uint32_t flags; unsigned s; enum pipe_texture_target target; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index 23d157fd4..9af2601b3 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -44,49 +44,10 @@ int nve4_screen_compute_setup(struct nvc0_screen *screen, struct nouveau_pushbuf *push) { - struct nouveau_device *dev = screen->base.device; - struct nouveau_object *chan = screen->base.channel; int i; - int ret; - uint32_t obj_class; + uint32_t obj_class = screen->compute->oclass; uint64_t address; - switch (dev->chipset & ~0xf) { - case 0x160: - obj_class = TU102_COMPUTE_CLASS; - break; - case 0x140: - obj_class = GV100_COMPUTE_CLASS; - break; - case 0x100: - case 0xf0: - obj_class = NVF0_COMPUTE_CLASS; /* GK110 */ - break; - case 0xe0: - obj_class = NVE4_COMPUTE_CLASS; /* GK104 */ - break; - case 0x110: - obj_class = GM107_COMPUTE_CLASS; - break; - case 0x120: - obj_class = GM200_COMPUTE_CLASS; - break; - case 0x130: - obj_class = (dev->chipset == 0x130 || dev->chipset == 0x13b) ? - GP100_COMPUTE_CLASS : GP104_COMPUTE_CLASS; - break; - default: - NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); - return -1; - } - - ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0, - &screen->compute); - if (ret) { - NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); - return ret; - } - BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); @@ -627,6 +588,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, { const struct nvc0_screen *screen = nvc0->screen; const struct nvc0_program *cp = nvc0->compprog; + uint32_t shared_size = cp->cp.smem_size + info->variable_shared_mem; NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE); NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE); @@ -647,19 +609,16 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); - NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE, - align(cp->cp.smem_size, 0x100)); - NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, - (cp->hdr[1] & 0xfffff0) + - align(cp->cp.lmem_size, 0x10)); + NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE, align(shared_size, 0x100)); + NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, cp->hdr[1] & 0xfffff0); NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800); - if (cp->cp.smem_size > (32 << 10)) + if (shared_size > (32 << 10)) NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); else - if (cp->cp.smem_size > (16 << 10)) + if (shared_size > (16 << 10)) NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB); else @@ -692,6 +651,7 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, { const struct nvc0_screen *screen = nvc0->screen; const struct nvc0_program *cp = nvc0->compprog; + uint32_t shared_size = cp->cp.smem_size + info->variable_shared_mem; NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR); @@ -707,11 +667,8 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); - NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE, - align(cp->cp.smem_size, 0x100)); - NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, - (cp->hdr[1] & 0xfffff0) + - align(cp->cp.lmem_size, 0x10)); + NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE, align(shared_size, 0x100)); + NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, cp->hdr[1] & 0xfffff0); NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800); @@ -753,15 +710,13 @@ gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd, struct nvc0_program *cp = nvc0->compprog; struct nvc0_screen *screen = nvc0->screen; uint64_t entry = screen->text->offset + cp->code_base; + uint32_t shared_size = cp->cp.smem_size + info->variable_shared_mem; NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY); - NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE, - align(cp->cp.smem_size, 0x100)); - NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, - (cp->hdr[1] & 0xfffff0) + - align(cp->cp.lmem_size, 0x10)); + NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE, align(shared_size, 0x100)); + NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, cp->hdr[1] & 0xfffff0); NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, gv100_sm_config_smem_size(8 * 1024)); @@ -770,7 +725,7 @@ gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd, NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2); NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2); NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(cp->cp.smem_size)); + gv100_sm_config_smem_size(shared_size)); NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); @@ -928,8 +883,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) PUSH_REF1(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD); BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1); PUSH_DATA (push, desc_gpuaddr >> 8); - BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1); - PUSH_DATA (push, 0x3); + if (screen->compute->oclass < GA102_COMPUTE_CLASS) { + BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1); + PUSH_DATA (push, 0x3); + } else { + BEGIN_NIC0(push, SUBC_CP(0x02c0), 2); + PUSH_DATA (push, 1); + PUSH_DATA (push, 2); + } BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); |