diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2017-02-26 12:14:54 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2017-02-26 12:14:54 +0000 |
commit | b5fce4e6eb297a6f7fabd0d6c6b4ffdfefa6ad8b (patch) | |
tree | 4c21fc3859e4eae3a2968dcd5f8b5bf23198b8a5 /lib/mesa/src/gallium/drivers | |
parent | 04c9eaba81433c32fe1a68ad44c3e2023eac56b4 (diff) |
Import Mesa 13.0.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers')
45 files changed, 182 insertions, 176 deletions
diff --git a/lib/mesa/src/gallium/drivers/freedreno/Makefile.am b/lib/mesa/src/gallium/drivers/freedreno/Makefile.am index 148dd0eb5..ffb4db182 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/Makefile.am +++ b/lib/mesa/src/gallium/drivers/freedreno/Makefile.am @@ -9,6 +9,7 @@ AM_CFLAGS = \ $(GALLIUM_DRIVER_CFLAGS) \ $(FREEDRENO_CFLAGS) +MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py $(MKDIR_GEN) $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/ir3/ir3_nir_trig.py > $@ || ($(RM) $@; false) diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c index 276f6be93..176a31c77 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c @@ -234,7 +234,6 @@ batch_flush_func(void *job, int id) fd_gmem_render_tiles(batch); batch_reset_resources(batch); - batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem); } static void @@ -275,7 +274,6 @@ batch_flush(struct fd_batch *batch) } else { fd_gmem_render_tiles(batch); batch_reset_resources(batch); - batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem); } debug_assert(batch->reference.count > 0); diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c index df11eab25..f3d5078d1 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c @@ -124,7 +124,7 @@ fd_bc_fini(struct fd_batch_cache *cache) _mesa_hash_table_destroy(cache->ht, NULL); } -uint32_t +void fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx) { struct hash_entry *entry; @@ -150,8 +150,6 @@ fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx) fd_batch_sync(last_batch); fd_batch_reference(&last_batch, NULL); } - - return ctx->last_fence; } void diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h index 1790e5cf4..44c66b58f 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h @@ -62,7 +62,7 @@ struct fd_batch_cache { void fd_bc_init(struct fd_batch_cache *cache); void fd_bc_fini(struct fd_batch_cache *cache); -uint32_t fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx); +void fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx); void fd_bc_invalidate_context(struct fd_context *ctx); void fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy); diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c index 0b12409ba..70220f88d 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c @@ -43,22 +43,15 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, unsigned flags) { struct fd_context *ctx = fd_context(pctx); - uint32_t timestamp; if (!ctx->screen->reorder) { - struct fd_batch *batch = NULL; - fd_batch_reference(&batch, ctx->batch); - fd_batch_flush(batch, true); - timestamp = fd_ringbuffer_timestamp(batch->gmem); - fd_batch_reference(&batch, NULL); + fd_batch_flush(ctx->batch, true); } else { - timestamp = fd_bc_flush(&ctx->screen->batch_cache, ctx); + fd_bc_flush(&ctx->screen->batch_cache, ctx); } - if (fence) { - fd_screen_fence_ref(pctx->screen, fence, NULL); - *fence = fd_fence_create(pctx, timestamp); - } + if (fence) + fd_fence_ref(pctx->screen, fence, ctx->last_fence); } /** @@ -109,6 +102,8 @@ fd_context_destroy(struct pipe_context *pctx) fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ fd_bc_invalidate_context(ctx); + fd_fence_ref(pctx->screen, &ctx->last_fence, NULL); + fd_prog_fini(pctx); fd_hw_query_fini(pctx); diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h index c4c08a682..4a766f5cd 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h @@ -164,7 +164,7 @@ struct fd_context { */ struct fd_batch *batch; - uint32_t last_fence; + struct pipe_fence_handle *last_fence; /* Are we in process of shadowing a resource? Used to detect recursion * in transfer_map, and skip unneeded synchronization. diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c index df4cf4dd5..a5f717169 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c @@ -40,7 +40,7 @@ struct pipe_fence_handle { }; void -fd_screen_fence_ref(struct pipe_screen *pscreen, +fd_fence_ref(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *pfence) { @@ -50,7 +50,7 @@ fd_screen_fence_ref(struct pipe_screen *pscreen, *ptr = pfence; } -boolean fd_screen_fence_finish(struct pipe_screen *screen, +boolean fd_fence_finish(struct pipe_screen *pscreen, struct pipe_context *ctx, struct pipe_fence_handle *fence, uint64_t timeout) @@ -61,11 +61,10 @@ boolean fd_screen_fence_finish(struct pipe_screen *screen, return true; } -struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx, +struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx, uint32_t timestamp) { struct pipe_fence_handle *fence; - struct fd_context *ctx = fd_context(pctx); fence = CALLOC_STRUCT(pipe_fence_handle); if (!fence) diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h index df7664bf8..32bfacc76 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h @@ -31,14 +31,16 @@ #include "pipe/p_context.h" -void fd_screen_fence_ref(struct pipe_screen *pscreen, +void fd_fence_ref(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *pfence); -boolean fd_screen_fence_finish(struct pipe_screen *screen, +boolean fd_fence_finish(struct pipe_screen *screen, struct pipe_context *ctx, struct pipe_fence_handle *pfence, uint64_t timeout); -struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx, + +struct fd_context; +struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx, uint32_t timestamp); #endif /* FREEDRENO_FENCE_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c index ed625e455..3b2ecbaea 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -34,6 +34,7 @@ #include "freedreno_gmem.h" #include "freedreno_context.h" +#include "freedreno_fence.h" #include "freedreno_resource.h" #include "freedreno_query_hw.h" #include "freedreno_util.h" @@ -394,6 +395,9 @@ fd_gmem_render_tiles(struct fd_batch *batch) } fd_ringbuffer_flush(batch->gmem); + + fd_fence_ref(&ctx->screen->base, &ctx->last_fence, NULL); + ctx->last_fence = fd_fence_create(ctx, fd_ringbuffer_timestamp(batch->gmem)); } /* tile needs restore if it isn't completely contained within the diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c index 1f7c2a5dc..cc75c509a 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c @@ -696,8 +696,8 @@ fd_screen_create(struct fd_device *dev) pscreen->get_timestamp = fd_screen_get_timestamp; - pscreen->fence_reference = fd_screen_fence_ref; - pscreen->fence_finish = fd_screen_fence_finish; + pscreen->fence_reference = fd_fence_ref; + pscreen->fence_finish = fd_fence_finish; slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c b/lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c index bb3aad784..95d8866ba 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c @@ -73,7 +73,7 @@ nouveau_vpe_fini(struct nouveau_decoder *dec) { if (!dec->cmds) return; - nouveau_pushbuf_space(push, 8, 2, 0); + nouveau_pushbuf_space(push, 16, 2, 0); nouveau_bufctx_reset(dec->bufctx, NV31_VIDEO_BIND_CMD); #define BCTX_ARGS dec->bufctx, NV31_VIDEO_BIND_CMD, NOUVEAU_BO_RD diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c index 4217bca6d..4e6df1eff 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c @@ -128,7 +128,7 @@ nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps, refn.bo = mt->base.bo; refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; - if (nouveau_pushbuf_space(push, 16, 1, 0) || + if (nouveau_pushbuf_space(push, 32, 1, 0) || nouveau_pushbuf_refn (push, &refn, 1)) return; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c index e4b949725..6f06ee6b1 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c @@ -431,7 +431,7 @@ nv30_transfer_rect_sifm(XFER_ARGS) si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR; } - if (nouveau_pushbuf_space(push, 32, 6, 0) || + if (nouveau_pushbuf_space(push, 64, 6, 0) || nouveau_pushbuf_refn (push, refs, 2)) return; @@ -516,7 +516,7 @@ nv30_transfer_rect_m2mf(XFER_ARGS) while (h) { unsigned lines = (h > 2047) ? 2047 : h; - if (nouveau_pushbuf_space(push, 13, 2, 0) || + if (nouveau_pushbuf_space(push, 32, 2, 0) || nouveau_pushbuf_refn (push, refs, 2)) return; @@ -708,7 +708,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv, lines = (pages > 2047) ? 2047 : pages; pages -= lines; - if (nouveau_pushbuf_space(push, 13, 2, 0) || + if (nouveau_pushbuf_space(push, 32, 2, 0) || nouveau_pushbuf_refn (push, refs, 2)) return; @@ -732,7 +732,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv, } if (size) { - if (nouveau_pushbuf_space(push, 13, 2, 0) || + if (nouveau_pushbuf_space(push, 32, 2, 0) || nouveau_pushbuf_refn (push, refs, 2)) return; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c index a6c0bbc26..f5fa9d6ca 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -295,7 +295,7 @@ nv50_clear_render_target(struct pipe_context *pipe, PUSH_DATAf(push, color->f[2]); PUSH_DATAf(push, color->f[3]); - if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) + if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0)) return; PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR); @@ -394,7 +394,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe, mode |= NV50_3D_CLEAR_BUFFERS_S; } - if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) + if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0)) return; PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR); @@ -752,7 +752,7 @@ nv50_clear_buffer(struct pipe_context *pipe, PUSH_DATAf(push, color.f[2]); PUSH_DATAf(push, color.f[3]); - if (nouveau_pushbuf_space(push, 32, 1, 0)) + if (nouveau_pushbuf_space(push, 64, 1, 0)) return; PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index a11cdf847..9d55c1d46 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten, BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (push, prim); - nouveau_pushbuf_space(push, 8, 0, 1); + nouveau_pushbuf_space(push, 16, 0, 1); PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain); switch (index_size) { diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c index 177a7e027..92526d9f6 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c @@ -273,7 +273,7 @@ nv98_create_decoder(struct pipe_context *context, dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); /* So lets test if the fence is working? */ - nouveau_pushbuf_space(push[0], 6, 1, 0); + nouveau_pushbuf_space(push[0], 16, 1, 0); PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NV04(push[0], SUBC_BSP(0x240), 3); PUSH_DATAh(push[0], dec->fence_bo->offset); @@ -284,7 +284,7 @@ nv98_create_decoder(struct pipe_context *context, PUSH_DATA (push[0], 0); PUSH_KICK (push[0]); - nouveau_pushbuf_space(push[1], 6, 1, 0); + nouveau_pushbuf_space(push[1], 16, 1, 0); PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NV04(push[1], SUBC_VP(0x240), 3); PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); @@ -295,7 +295,7 @@ nv98_create_decoder(struct pipe_context *context, PUSH_DATA (push[1], 0); PUSH_KICK (push[1]); - nouveau_pushbuf_space(push[2], 6, 1, 0); + nouveau_pushbuf_space(push[2], 16, 1, 0); PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NV04(push[2], SUBC_PPP(0x240), 3); PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c index 4fe0e05c9..f77258de8 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c @@ -47,7 +47,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, int ret; struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; - unsigned fence_extra = 0; struct nouveau_pushbuf_refn bo_refs[] = { { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, @@ -61,10 +60,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, if (!dec->bitplane_bo) num_refs--; -#if NOUVEAU_VP3_DEBUG_FENCE - fence_extra = 4; -#endif - bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE; for (i = 0; i < num_buffers; i++) bsp_size += num_bytes[i]; @@ -112,7 +107,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs); - nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0); + nouveau_pushbuf_space(push, 32, num_refs, 0); nouveau_pushbuf_refn(push, bo_refs, num_refs); bsp_addr = bsp_bo->offset >> 8; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c index 48f16211a..3fce65ba5 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c @@ -93,13 +93,8 @@ nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); struct nouveau_pushbuf *push = dec->pushbuf[2]; unsigned ppp_caps = 0x10; - unsigned fence_extra = 0; -#if NOUVEAU_VP3_DEBUG_FENCE - fence_extra = 4; -#endif - - nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0); + nouveau_pushbuf_space(push, 32, 4, 0); switch (codec) { case PIPE_VIDEO_FORMAT_MPEG12: { diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c index 37d7d4432..f1cdf168e 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c @@ -76,7 +76,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; - u32 fence_extra = 0, codec_extra = 0; + u32 codec_extra = 0; struct nouveau_pushbuf_refn bo_refs[] = { { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, @@ -88,10 +88,6 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, }; int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo; -#if NOUVEAU_VP3_DEBUG_FENCE - fence_extra = 4; -#endif - if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); codec_extra += 2; @@ -115,8 +111,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom)) nv98_decoder_kick_ref(dec, target); - nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) + - 6 + codec_extra + fence_extra + 2, num_refs, 0); + nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0); nouveau_pushbuf_refn(push, bo_refs, num_refs); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 4c34593ef..ff20fe635 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -403,7 +403,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, if (wait && hq->state != NVC0_HW_QUERY_STATE_READY) nvc0_hw_query_fifo_wait(nvc0, q); - nouveau_pushbuf_space(push, 16, 2, 0); + nouveau_pushbuf_space(push, 32, 2, 0); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR); BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 69ca091c4..009c606d9 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -799,7 +799,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0, } while (num_instances--) { - nouveau_pushbuf_space(push, 9, 0, 1); + nouveau_pushbuf_space(push, 16, 0, 1); BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (push, mode); BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c index a9fd1d209..b5e7bba5f 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c @@ -297,7 +297,7 @@ nvc0_create_decoder(struct pipe_context *context, dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); /* So lets test if the fence is working? */ - nouveau_pushbuf_space(push[0], 6, 1, 0); + nouveau_pushbuf_space(push[0], 16, 1, 0); PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3); PUSH_DATAh(push[0], dec->fence_bo->offset); @@ -308,7 +308,7 @@ nvc0_create_decoder(struct pipe_context *context, PUSH_DATA (push[0], 0); PUSH_KICK (push[0]); - nouveau_pushbuf_space(push[1], 6, 1, 0); + nouveau_pushbuf_space(push[1], 16, 1, 0); PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[1], SUBC_VP(0x240), 3); PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); @@ -319,7 +319,7 @@ nvc0_create_decoder(struct pipe_context *context, PUSH_DATA (push[1], 0); PUSH_KICK (push[1]); - nouveau_pushbuf_space(push[2], 6, 1, 0); + nouveau_pushbuf_space(push[2], 16, 1, 0); PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3); PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c index af072a8ac..52a031c51 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c @@ -143,7 +143,6 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc, uint32_t caps; struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; - unsigned fence_extra = 0; struct nouveau_pushbuf_refn bo_refs[] = { { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, @@ -157,15 +156,11 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc, if (!dec->bitplane_bo) num_refs--; -#if NOUVEAU_VP3_DEBUG_FENCE - fence_extra = 4; -#endif - caps = nouveau_vp3_bsp_end(dec, desc); nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs); - nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0); + nouveau_pushbuf_space(push, 32, num_refs, 0); nouveau_pushbuf_refn(push, bo_refs, num_refs); bsp_addr = bsp_bo->offset >> 8; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c index e4504e6c4..4f058628e 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c @@ -93,13 +93,8 @@ nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); struct nouveau_pushbuf *push = dec->pushbuf[2]; unsigned ppp_caps = 0x10; - unsigned fence_extra = 0; -#if NOUVEAU_VP3_DEBUG_FENCE - fence_extra = 4; -#endif - - nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0); + nouveau_pushbuf_space(push, 32, 4, 0); switch (codec) { case PIPE_VIDEO_FORMAT_MPEG12: { diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c index 73d551481..3de4ec148 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c @@ -76,7 +76,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; - u32 fence_extra = 0, codec_extra = 0; + u32 codec_extra = 0; struct nouveau_pushbuf_refn bo_refs[] = { { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, @@ -88,10 +88,6 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, }; int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo; -#if NOUVEAU_VP3_DEBUG_FENCE - fence_extra = 4; -#endif - if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); codec_extra += 2; @@ -115,8 +111,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom)) nvc0_decoder_kick_ref(dec, target); - nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) + - 6 + codec_extra + fence_extra + 2, num_refs, 0); + nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0); nouveau_pushbuf_refn(push, bo_refs, num_refs); diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index d661c000b..15b4750d3 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -816,6 +816,7 @@ nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc) debug_printf("barrier count: %u\n", desc->bar_alloc); debug_printf("$r count: %u\n", desc->gpr_alloc); debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split)); + debug_printf("linked tsc: %d\n", desc->linked_tsc); for (i = 0; i < 8; ++i) { uint64_t address; diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h index b98c65d4a..5fe58b967 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h +++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h @@ -8,7 +8,10 @@ struct nve4_cp_launch_desc { u32 unk0[8]; u32 entry; - u32 unk9[3]; + u32 unk9[2]; + u32 unk11_0 : 30; + u32 linked_tsc : 1; + u32 unk11_31 : 1; u32 griddim_x : 31; u32 unk12 : 1; u16 griddim_y; @@ -48,7 +51,7 @@ nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc) memset(desc, 0, sizeof(*desc)); desc->unk0[7] = 0xbc000000; - desc->unk9[2] = 0x44014000; + desc->unk11_0 = 0x04014000; desc->unk47_20 = 0x300; } diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.c b/lib/mesa/src/gallium/drivers/r600/r600_shader.c index 59a13ec24..6a265c894 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_shader.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.c @@ -2924,7 +2924,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, struct pipe_stream_output_info so = pipeshader->selector->so; struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; - struct r600_bytecode_output output[32]; + struct r600_bytecode_output output[ARRAY_SIZE(shader->output)]; unsigned output_done, noutput; unsigned opcode; int i, j, k, r = 0; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp index 79aef9106..abae2bf69 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp @@ -30,6 +30,18 @@ namespace r600_sb { +int dce_cleanup::run() { + int r; + + // Run cleanup for as long as there are unused nodes. + do { + nodes_changed = false; + r = vpass::run(); + } while (r == 0 && nodes_changed); + + return r; +} + bool dce_cleanup::visit(node& n, bool enter) { if (enter) { } else { @@ -110,7 +122,18 @@ bool dce_cleanup::visit(region_node& n, bool enter) { void dce_cleanup::cleanup_dst(node& n) { if (!cleanup_dst_vec(n.dst) && remove_unused && !n.dst.empty() && !(n.flags & NF_DONT_KILL) && n.parent) + { + // Delete use references to the removed node from the src values. + for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) { + value* v = *I; + if (v && v->def && v->uses.size()) + { + v->remove_use(&n); + } + } n.remove(); + nodes_changed = true; + } } bool dce_cleanup::visit(container_node& n, bool enter) { @@ -130,7 +153,7 @@ bool dce_cleanup::cleanup_dst_vec(vvec& vv) { if (v->gvn_source && v->gvn_source->is_dead()) v->gvn_source = NULL; - if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses)) + if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses.size())) v = NULL; else alive = true; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp index 236b2ea00..9c75389ad 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp @@ -199,10 +199,9 @@ void gcm::td_release_val(value *v) { sblog << "\n"; ); - use_info *u = v->uses; - while (u) { + for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) { + use_info *u = *I; if (u->op->parent != &pending) { - u = u->next; continue; } @@ -212,6 +211,7 @@ void gcm::td_release_val(value *v) { sblog << "\n"; ); + assert(uses[u->op] > 0); if (--uses[u->op] == 0) { GCM_DUMP( sblog << "td released : "; @@ -222,7 +222,6 @@ void gcm::td_release_val(value *v) { pending.remove_node(u->op); ready.push_back(u->op); } - u = u->next; } } diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp index 5226893de..d989dce62 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp @@ -255,7 +255,7 @@ void container_node::expand() { void node::remove() {parent->remove_node(this); } -value_hash node::hash_src() { +value_hash node::hash_src() const { value_hash h = 12345; @@ -269,7 +269,7 @@ value_hash node::hash_src() { } -value_hash node::hash() { +value_hash node::hash() const { if (parent && parent->subtype == NST_LOOP_PHI_CONTAINER) return 47451; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h index 4fc4da2fb..74c0549a8 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h @@ -446,15 +446,16 @@ enum use_kind { }; struct use_info { - use_info *next; node *op; use_kind kind; int arg; - use_info(node *n, use_kind kind, int arg, use_info* next) - : next(next), op(n), kind(kind), arg(arg) {} + use_info(node *n, use_kind kind, int arg) + : op(n), kind(kind), arg(arg) {} }; +typedef std::list< use_info * > uselist; + enum constraint_kind { CK_SAME_REG, CK_PACKED_BS, @@ -498,7 +499,7 @@ public: value_hash ghash; node *def, *adef; - use_info *uses; + uselist uses; ra_constraint *constraint; ra_chunk *chunk; @@ -585,6 +586,7 @@ public: } void add_use(node *n, use_kind kind, int arg); + void remove_use(const node *n); value_hash hash(); value_hash rel_hash(); @@ -790,8 +792,8 @@ public: void replace_with(node *n); void remove(); - virtual value_hash hash(); - value_hash hash_src(); + virtual value_hash hash() const; + value_hash hash_src() const; virtual bool fold_dispatch(expr_handler *ex); diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h index 0346df1b1..e878f8c70 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h @@ -124,7 +124,9 @@ class dce_cleanup : public vpass { public: dce_cleanup(shader &s) : vpass(s), - remove_unused(s.dce_flags & DF_REMOVE_UNUSED) {} + remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {} + + virtual int run(); virtual bool visit(node &n, bool enter); virtual bool visit(alu_group_node &n, bool enter); @@ -140,6 +142,8 @@ private: void cleanup_dst(node &n); bool cleanup_dst_vec(vvec &vv); + // Did we alter/remove nodes during a single pass? + bool nodes_changed; }; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp index eb242b1c2..d31a1b76d 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp @@ -220,17 +220,34 @@ void value::add_use(node* n, use_kind kind, int arg) { dump::dump_op(n); sblog << " kind " << kind << " arg " << arg << "\n"; } - uses = new use_info(n, kind, arg, uses); + uses.push_back(new use_info(n, kind, arg)); } -unsigned value::use_count() { - use_info *u = uses; - unsigned c = 0; - while (u) { - ++c; - u = u->next; +struct use_node_comp { + explicit use_node_comp(const node *n) : n(n) {} + bool operator() (const use_info *u) { + return u->op->hash() == n->hash(); + } + + private: + const node *n; +}; + +void value::remove_use(const node *n) { + uselist::iterator it = + std::find_if(uses.begin(), uses.end(), use_node_comp(n)); + + if (it != uses.end()) + { + // TODO assert((*it)->kind == kind) ? + // TODO assert((*it)->arg == arg) ? + delete *it; + uses.erase(it); } - return c; +} + +unsigned value::use_count() { + return uses.size(); } bool value::is_global() { @@ -274,13 +291,12 @@ bool value::is_prealloc() { } void value::delete_uses() { - use_info *u, *c = uses; - while (c) { - u = c->next; - delete c; - c = u; + for (uselist::iterator it = uses.begin(); it != uses.end(); ++it) + { + delete *it; } - uses = NULL; + + uses.clear(); } void ra_constraint::update_values() { @@ -468,7 +484,7 @@ bool r600_sb::sb_value_set::add_vec(vvec& vv) { bool r600_sb::sb_value_set::contains(value* v) { unsigned b = v->uid - 1; if (b < bs.size()) - return bs.get(v->uid - 1); + return bs.get(b); else return false; } diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c index 74bec2626..bbab58946 100644 --- a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c @@ -377,11 +377,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } } - /* Using a staging buffer in GTT for larger reads is much faster. */ + /* Use a staging buffer in cached GTT for reads. */ else if ((usage & PIPE_TRANSFER_READ) && - !(usage & (PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_PERSISTENT)) && - rbuffer->domains & RADEON_DOMAIN_VRAM && + !(usage & PIPE_TRANSFER_PERSISTENT) && + (rbuffer->domains & RADEON_DOMAIN_VRAM || + rbuffer->flags & RADEON_FLAG_GTT_WC) && r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { struct r600_resource *staging; @@ -390,11 +390,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT)); if (staging) { /* Copy the VRAM buffer to the staging buffer. */ - ctx->resource_copy_region(ctx, &staging->b.b, 0, - box->x % R600_MAP_BUFFER_ALIGNMENT, - 0, 0, resource, level, box); + rctx->dma_copy(ctx, &staging->b.b, 0, + box->x % R600_MAP_BUFFER_ALIGNMENT, + 0, 0, resource, 0, box); - data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ); + data = r600_buffer_map_sync_with_rings(rctx, staging, + usage & ~PIPE_TRANSFER_UNSYNCHRONIZED); if (!data) { r600_resource_reference(&staging, NULL); return NULL; diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c b/lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c index 5ec988157..e89bcfed2 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c @@ -320,14 +320,21 @@ static void si_sampler_view_add_buffer(struct si_context *sctx, if (resource->target == PIPE_BUFFER) return; - /* Now add separate DCC if it's present. */ + /* Now add separate DCC or HTILE. */ rtex = (struct r600_texture*)resource; - if (!rtex->dcc_separate_buffer) - return; + if (rtex->dcc_separate_buffer) { + radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, + rtex->dcc_separate_buffer, usage, + RADEON_PRIO_DCC, check_mem); + } - radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, - rtex->dcc_separate_buffer, usage, - RADEON_PRIO_DCC, check_mem); + if (rtex->htile_buffer && + rtex->tc_compatible_htile && + !is_stencil_sampler) { + radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, + rtex->htile_buffer, usage, + RADEON_PRIO_HTILE, check_mem); + } } static void si_sampler_views_begin_new_cs(struct si_context *sctx, diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c index 60c24014e..e8eec87f9 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c @@ -5396,10 +5396,13 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - /* The real barrier instruction isn’t needed, because an entire patch + /* SI only (thanks to a hw bug workaround): + * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->type == PIPE_SHADER_TESS_CTRL) { + if (HAVE_LLVM >= 0x0309 && + ctx->screen->b.chip_class == SI && + ctx->type == PIPE_SHADER_TESS_CTRL) { emit_waitcnt(ctx, LGKM_CNT & VM_CNT); return; } diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state.c index 9e6e3d2b0..db74ca4d1 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_state.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state.c @@ -698,8 +698,10 @@ static void si_update_poly_offset_state(struct si_context *sctx) { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; - if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) + if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { + si_pm4_bind_state(sctx, poly_offset, NULL); return; + } /* Use the user format, not db_render_format, so that the polygon * offset behaves as expected by applications. diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c index 6bbe36d9a..963d3735f 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c @@ -847,11 +847,12 @@ void si_emit_cache_flush(struct si_context *sctx) if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 || (rctx->chip_class <= CIK && (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { - /* Invalidate L1 & L2. (L1 is always invalidated) + /* Invalidate L1 & L2. (L1 is always invalidated on SI) * WB must be set on VI+ when TC_ACTION is set. */ si_emit_surface_sync(rctx, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | + S_0085F0_TCL1_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI)); cp_coher_cntl = 0; } else { diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h index 43bc5222c..ee1eb3a76 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -262,45 +262,6 @@ public: return _simd_movemask_ps(vClipCullMask); } - // clip a single primitive - int ClipScalar(PA_STATE& pa, uint32_t primIndex, float* pOutPos, float* pOutAttribs) - { - OSALIGNSIMD(float) inVerts[3 * 4]; - OSALIGNSIMD(float) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4]; - - // transpose primitive position - __m128 verts[3]; - pa.AssembleSingle(VERTEX_POSITION_SLOT, primIndex, verts); - _mm_store_ps(&inVerts[0], verts[0]); - _mm_store_ps(&inVerts[4], verts[1]); - _mm_store_ps(&inVerts[8], verts[2]); - - // transpose attribs - uint32_t numScalarAttribs = this->state.linkageCount * 4; - - int idx = 0; - DWORD slot = 0; - uint32_t mapIdx = 0; - uint32_t tmpLinkage = uint32_t(this->state.linkageMask); - while (_BitScanForward(&slot, tmpLinkage)) - { - tmpLinkage &= ~(1 << slot); - // Compute absolute attrib slot in vertex array - uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + this->state.linkageMap[mapIdx++]; - __m128 attrib[3]; // triangle attribs (always 4 wide) - pa.AssembleSingle(inputSlot, primIndex, attrib); - _mm_store_ps(&inAttribs[idx], attrib[0]); - _mm_store_ps(&inAttribs[idx + numScalarAttribs], attrib[1]); - _mm_store_ps(&inAttribs[idx + numScalarAttribs * 2], attrib[2]); - idx += 4; - } - - int numVerts; - Clip(inVerts, inAttribs, numScalarAttribs, pOutPos, &numVerts, pOutAttribs); - - return numVerts; - } - // clip SIMD primitives void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx) { diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c index 4c105f373..e97cb63ae 100644 --- a/lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c +++ b/lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c @@ -52,6 +52,17 @@ qir_opt_small_immediates(struct vc4_compile *c) if (uses_small_imm) continue; + /* Don't propagate small immediates into the top-end bounds + * checking for indirect UBO loads. The kernel doesn't parse + * small immediates and rejects the shader in this case. UBO + * loads are much more expensive than the uniform load, and + * indirect UBO regions are usually much larger than a small + * immediate, so it's not worth updating the kernel to allow + * optimizing it. + */ + if (inst->op == QOP_MIN_NOIMM) + continue; + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { struct qreg src = qir_follow_movs(c, inst->src[i]); diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_program.c b/lib/mesa/src/gallium/drivers/vc4/vc4_program.c index 15e8984ef..00e16e3db 100644 --- a/lib/mesa/src/gallium/drivers/vc4/vc4_program.c +++ b/lib/mesa/src/gallium/drivers/vc4/vc4_program.c @@ -102,9 +102,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr) /* Clamp to [0, array size). Note that MIN/MAX are signed. */ indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0)); - indirect_offset = qir_MIN(c, indirect_offset, - qir_uniform_ui(c, (range->dst_offset + - range->size - 4))); + indirect_offset = qir_MIN_NOIMM(c, indirect_offset, + qir_uniform_ui(c, (range->dst_offset + + range->size - 4))); qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0)); c->num_texture_samples++; @@ -322,7 +322,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr) /* Perform the clamping required by kernel validation. */ addr = qir_MAX(c, addr, qir_uniform_ui(c, 0)); - addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4)); + addr = qir_MIN_NOIMM(c, addr, qir_uniform_ui(c, size - 4)); qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit)); diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c index 446af66af..4b94fcfb9 100644 --- a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c +++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c @@ -58,6 +58,7 @@ static const struct qir_op_info qir_op_info[] = { [QOP_ASR] = { "asr", 1, 2 }, [QOP_SHL] = { "shl", 1, 2 }, [QOP_MIN] = { "min", 1, 2 }, + [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 }, [QOP_MAX] = { "max", 1, 2 }, [QOP_AND] = { "and", 1, 2 }, [QOP_OR] = { "or", 1, 2 }, diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h index c76aeb2bf..b3cac6bf2 100644 --- a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h +++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h @@ -111,6 +111,7 @@ enum qop { QOP_SHR, QOP_ASR, QOP_MIN, + QOP_MIN_NOIMM, QOP_MAX, QOP_AND, QOP_OR, @@ -709,6 +710,7 @@ QIR_ALU2(SHL) QIR_ALU2(SHR) QIR_ALU2(ASR) QIR_ALU2(MIN) +QIR_ALU2(MIN_NOIMM) QIR_ALU2(MAX) QIR_ALU2(AND) QIR_ALU2(OR) diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c b/lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c index eedee55a9..2ee52a497 100644 --- a/lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -285,6 +285,8 @@ vc4_generate_code_block(struct vc4_compile *c, [QOP_MOV] = { QPU_A_OR }, [QOP_FMOV] = { QPU_A_FMAX }, [QOP_MMOV] = { QPU_M_V8MIN }, + + [QOP_MIN_NOIMM] = { QPU_A_MIN }, }; uint64_t unpack = 0; |