diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-05-29 10:22:51 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-05-29 10:22:51 +0000 |
commit | c9223eed3c16cd3e98a8f56dda953d8f299de0e3 (patch) | |
tree | 53e2a1c3f13bcf6b4ed201d7bc135e7213c94ebe /lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | |
parent | 6e8f2d062ab9c198239b9283b2b7ed12f4ea17d8 (diff) |
Import Mesa 11.2.2
Diffstat (limited to 'lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c')
-rw-r--r-- | lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 258 |
1 files changed, 199 insertions, 59 deletions
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 47bd12362..c07f186af 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -37,12 +37,9 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, switch (dev->chipset & ~0xf) { case 0xc0: - if (dev->chipset == 0xc8) - obj_class = NVC8_COMPUTE_CLASS; - else - obj_class = NVC0_COMPUTE_CLASS; - break; case 0xd0: + /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, + * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ obj_class = NVC0_COMPUTE_CLASS; break; default: @@ -93,13 +90,13 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); - PUSH_DATA (push, 1 << 24); + PUSH_DATA (push, 0xff << 24); /* shared memory setup */ BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); - PUSH_DATA (push, 2 << 24); + PUSH_DATA (push, 0xfe << 24); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); PUSH_DATA (push, 0); @@ -108,15 +105,17 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); - /* bind parameters buffer */ - BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); - PUSH_DATA (push, screen->parm->size); - PUSH_DATAh(push, screen->parm->offset); - PUSH_DATA (push, screen->parm->offset); - BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); - PUSH_DATA (push, (0 << 8) | 1); + /* textures */ + BEGIN_NVC0(push, NVC0_COMPUTE(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); - /* TODO: textures & samplers */ + /* samplers */ + BEGIN_NVC0(push, NVC0_COMPUTE(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); return 0; } @@ -131,7 +130,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0) if (!prog->translated) { prog->translated = nvc0_program_translate( - prog, nvc0->screen->base.device->chipset); + prog, nvc0->screen->base.device->chipset, &nvc0->base.debug); if (!prog->translated) return false; } @@ -149,13 +148,149 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0) return false; } +static void +nvc0_compute_validate_samplers(struct nvc0_context *nvc0) +{ + bool need_flush = nvc0_validate_tsc(nvc0, 5); + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_COMPUTE(TSC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +static void +nvc0_compute_validate_textures(struct nvc0_context *nvc0) +{ + bool need_flush = nvc0_validate_tic(nvc0, 5); + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_COMPUTE(TIC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +static void +nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const int s = 5; + + while (nvc0->constbuf_dirty[s]) { + int i = ffs(nvc0->constbuf_dirty[s]) - 1; + nvc0->constbuf_dirty[s] &= ~(1 << i); + + if (nvc0->constbuf[s][i].user) { + struct nouveau_bo *bo = nvc0->screen->uniform_bo; + const unsigned base = s << 16; + const unsigned size = nvc0->constbuf[s][0].size; + assert(i == 0); /* we really only want OpenGL uniforms here */ + assert(nvc0->constbuf[s][0].u.data); + + if (nvc0->state.uniform_buffer_bound[s] < size) { + nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + } + nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), + base, nvc0->state.uniform_buffer_bound[s], + 0, (size + 3) / 4, + nvc0->constbuf[s][0].u.data); + } else { + struct nv04_resource *res = + nv04_resource(nvc0->constbuf[s][i].u.buf); + if (res) { + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, nvc0->constbuf[s][i].size); + PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); + PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (i << 8) | 1); + + BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); + + res->cb_bindings[s] |= 1 << i; + } else { + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (i << 8) | 0); + } + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + } + } + + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); +} + +static void +nvc0_compute_validate_driverconst(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10)); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (15 << 8) | 1); + + nvc0->dirty |= NVC0_NEW_DRIVERCONST; +} + +static void +nvc0_compute_validate_buffers(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const int s = 5; + int i; + + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); + PUSH_DATA (push, 512); + + for (i = 0; i < NVC0_MAX_BUFFERS; i++) { + if (nvc0->buffers[s][i].buffer) { + struct nv04_resource *res = + nv04_resource(nvc0->buffers[s][i].buffer); + PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); + PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); + PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); + PUSH_DATA (push, 0); + BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + } + } +} + static bool nvc0_compute_state_validate(struct nvc0_context *nvc0) { if (!nvc0_compute_validate_program(nvc0)) return false; - - /* TODO: textures, samplers, surfaces, global memory buffers */ + if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF) + nvc0_compute_validate_constbufs(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST) + nvc0_compute_validate_driverconst(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_BUFFERS) + nvc0_compute_validate_buffers(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES) + nvc0_compute_validate_textures(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS) + nvc0_compute_validate_samplers(nvc0); + + /* TODO: surfaces, global memory buffers */ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false); @@ -194,25 +329,24 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) } void -nvc0_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t label, - const void *input) +nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; - unsigned s, i; + unsigned s; int ret; ret = !nvc0_compute_state_validate(nvc0); - if (ret) - goto out; + if (ret) { + NOUVEAU_ERR("Failed to launch grid !\n"); + return; + } - nvc0_compute_upload_input(nvc0, input); + nvc0_compute_upload_input(nvc0, info->input); BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); - PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); + PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc)); BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); @@ -221,19 +355,11 @@ nvc0_launch_grid(struct pipe_context *pipe, BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); - PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); + PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]); PUSH_DATA (push, cp->num_barriers); BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); PUSH_DATA (push, cp->num_gprs); - /* grid/block setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); - PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); - PUSH_DATA (push, grid_layout[2]); - BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); - PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); - PUSH_DATA (push, block_layout[2]); - /* launch preliminary setup */ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); PUSH_DATA (push, 0x1); @@ -242,30 +368,44 @@ nvc0_launch_grid(struct pipe_context *pipe, BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); - /* kernel launching */ - BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); - PUSH_DATA (push, 0x1000); - BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); - PUSH_DATA (push, 0x1); + /* block setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); + PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); + PUSH_DATA (push, info->block[2]); + + if (unlikely(info->indirect)) { + struct nv04_resource *res = nv04_resource(info->indirect); + uint32_t offset = res->offset + info->indirect_offset; + unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT; + + nouveau_pushbuf_space(push, 16, 0, 1); + PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); + PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); + nouveau_pushbuf_data(push, res->bo, offset, + NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); + } else { + /* grid setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); + PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); + PUSH_DATA (push, info->grid[2]); + + /* kernel launching */ + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0x1000); + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); + PUSH_DATA (push, 0x1); + } - /* rebind all the 3D constant buffers - * (looks like binding a CB on COMPUTE clobbers 3D state) */ + /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ nvc0->dirty |= NVC0_NEW_CONSTBUF; - for (s = 0; s < 6; s++) { - for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) - if (nvc0->constbuf[s][i].u.buf) - nvc0->constbuf_dirty[s] |= 1 << i; + for (s = 0; s < 5; s++) { + nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s]; + nvc0->state.uniform_buffer_bound[s] = 0; } - memset(nvc0->state.uniform_buffer_bound, 0, - sizeof(nvc0->state.uniform_buffer_bound)); - -out: - if (ret) - NOUVEAU_ERR("Failed to launch grid !\n"); } |