summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2016-05-29 10:22:51 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2016-05-29 10:22:51 +0000
commitc9223eed3c16cd3e98a8f56dda953d8f299de0e3 (patch)
tree53e2a1c3f13bcf6b4ed201d7bc135e7213c94ebe /lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
parent6e8f2d062ab9c198239b9283b2b7ed12f4ea17d8 (diff)
Import Mesa 11.2.2
Diffstat (limited to 'lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c')
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c258
1 files changed, 199 insertions, 59 deletions
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 47bd12362..c07f186af 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -37,12 +37,9 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
switch (dev->chipset & ~0xf) {
case 0xc0:
- if (dev->chipset == 0xc8)
- obj_class = NVC8_COMPUTE_CLASS;
- else
- obj_class = NVC0_COMPUTE_CLASS;
- break;
case 0xd0:
+ /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
+ * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
obj_class = NVC0_COMPUTE_CLASS;
break;
default:
@@ -93,13 +90,13 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
- PUSH_DATA (push, 1 << 24);
+ PUSH_DATA (push, 0xff << 24);
/* shared memory setup */
BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
- PUSH_DATA (push, 2 << 24);
+ PUSH_DATA (push, 0xfe << 24);
BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
PUSH_DATA (push, 0);
@@ -108,15 +105,17 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
- /* bind parameters buffer */
- BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
- PUSH_DATA (push, screen->parm->size);
- PUSH_DATAh(push, screen->parm->offset);
- PUSH_DATA (push, screen->parm->offset);
- BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
- PUSH_DATA (push, (0 << 8) | 1);
+ /* textures */
+ BEGIN_NVC0(push, NVC0_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
- /* TODO: textures & samplers */
+ /* samplers */
+ BEGIN_NVC0(push, NVC0_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
return 0;
}
@@ -131,7 +130,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
if (!prog->translated) {
prog->translated = nvc0_program_translate(
- prog, nvc0->screen->base.device->chipset);
+ prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
if (!prog->translated)
return false;
}
@@ -149,13 +148,149 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
return false;
}
+static void
+nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
+{
+ bool need_flush = nvc0_validate_tsc(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_COMPUTE(TSC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+static void
+nvc0_compute_validate_textures(struct nvc0_context *nvc0)
+{
+ bool need_flush = nvc0_validate_tic(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_COMPUTE(TIC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+static void
+nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const int s = 5;
+
+ while (nvc0->constbuf_dirty[s]) {
+ int i = ffs(nvc0->constbuf_dirty[s]) - 1;
+ nvc0->constbuf_dirty[s] &= ~(1 << i);
+
+ if (nvc0->constbuf[s][i].user) {
+ struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+ const unsigned base = s << 16;
+ const unsigned size = nvc0->constbuf[s][0].size;
+ assert(i == 0); /* we really only want OpenGL uniforms here */
+ assert(nvc0->constbuf[s][0].u.data);
+
+ if (nvc0->state.uniform_buffer_bound[s] < size) {
+ nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+ }
+ nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
+ base, nvc0->state.uniform_buffer_bound[s],
+ 0, (size + 3) / 4,
+ nvc0->constbuf[s][0].u.data);
+ } else {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->constbuf[s][i].u.buf);
+ if (res) {
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->constbuf[s][i].size);
+ PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
+ PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (i << 8) | 1);
+
+ BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
+
+ res->cb_bindings[s] |= 1 << i;
+ } else {
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (i << 8) | 0);
+ }
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ }
+ }
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
+}
+
+static void
+nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (15 << 8) | 1);
+
+ nvc0->dirty |= NVC0_NEW_DRIVERCONST;
+}
+
+static void
+nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const int s = 5;
+ int i;
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
+ PUSH_DATA (push, 512);
+
+ for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
+ if (nvc0->buffers[s][i].buffer) {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->buffers[s][i].buffer);
+ PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
+ PUSH_DATA (push, 0);
+ BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
+ } else {
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+ }
+}
+
static bool
nvc0_compute_state_validate(struct nvc0_context *nvc0)
{
if (!nvc0_compute_validate_program(nvc0))
return false;
-
- /* TODO: textures, samplers, surfaces, global memory buffers */
+ if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF)
+ nvc0_compute_validate_constbufs(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST)
+ nvc0_compute_validate_driverconst(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_BUFFERS)
+ nvc0_compute_validate_buffers(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
+ nvc0_compute_validate_textures(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
+ nvc0_compute_validate_samplers(nvc0);
+
+ /* TODO: surfaces, global memory buffers */
nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
@@ -194,25 +329,24 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
}
void
-nvc0_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t label,
- const void *input)
+nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *cp = nvc0->compprog;
- unsigned s, i;
+ unsigned s;
int ret;
ret = !nvc0_compute_state_validate(nvc0);
- if (ret)
- goto out;
+ if (ret) {
+ NOUVEAU_ERR("Failed to launch grid !\n");
+ return;
+ }
- nvc0_compute_upload_input(nvc0, input);
+ nvc0_compute_upload_input(nvc0, info->input);
BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
- PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
+ PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
@@ -221,19 +355,11 @@ nvc0_launch_grid(struct pipe_context *pipe,
BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
- PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
+ PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
PUSH_DATA (push, cp->num_barriers);
BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
PUSH_DATA (push, cp->num_gprs);
- /* grid/block setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
- PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
- PUSH_DATA (push, grid_layout[2]);
- BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
- PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
- PUSH_DATA (push, block_layout[2]);
-
/* launch preliminary setup */
BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
PUSH_DATA (push, 0x1);
@@ -242,30 +368,44 @@ nvc0_launch_grid(struct pipe_context *pipe,
BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
- /* kernel launching */
- BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
- PUSH_DATA (push, 0x1000);
- BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
- PUSH_DATA (push, 0x1);
+ /* block setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
+ PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
+ PUSH_DATA (push, info->block[2]);
+
+ if (unlikely(info->indirect)) {
+ struct nv04_resource *res = nv04_resource(info->indirect);
+ uint32_t offset = res->offset + info->indirect_offset;
+ unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT;
+
+ nouveau_pushbuf_space(push, 16, 0, 1);
+ PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
+ PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
+ nouveau_pushbuf_data(push, res->bo, offset,
+ NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
+ } else {
+ /* grid setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
+ PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
+ PUSH_DATA (push, info->grid[2]);
+
+ /* kernel launching */
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x1000);
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
+ PUSH_DATA (push, 0x1);
+ }
- /* rebind all the 3D constant buffers
- * (looks like binding a CB on COMPUTE clobbers 3D state) */
+ /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
nvc0->dirty |= NVC0_NEW_CONSTBUF;
- for (s = 0; s < 6; s++) {
- for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
- if (nvc0->constbuf[s][i].u.buf)
- nvc0->constbuf_dirty[s] |= 1 << i;
+ for (s = 0; s < 5; s++) {
+ nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
+ nvc0->state.uniform_buffer_bound[s] = 0;
}
- memset(nvc0->state.uniform_buffer_bound, 0,
- sizeof(nvc0->state.uniform_buffer_bound));
-
-out:
- if (ret)
- NOUVEAU_ERR("Failed to launch grid !\n");
}