summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2017-02-26 12:14:54 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2017-02-26 12:14:54 +0000
commitb5fce4e6eb297a6f7fabd0d6c6b4ffdfefa6ad8b (patch)
tree4c21fc3859e4eae3a2968dcd5f8b5bf23198b8a5 /lib/mesa/src/gallium/drivers
parent04c9eaba81433c32fe1a68ad44c3e2023eac56b4 (diff)
Import Mesa 13.0.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers')
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/Makefile.am1
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c2
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c4
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h2
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c17
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h2
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c7
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h8
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c4
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c4
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c8
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c6
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c6
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c7
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c7
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c9
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c6
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c7
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c7
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c9
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c1
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h7
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_shader.c2
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp25
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp7
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp4
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h14
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h6
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp46
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c17
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c19
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_shader.c7
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state.c4
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c3
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h39
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c11
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_program.c8
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_qir.h2
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c2
45 files changed, 182 insertions, 176 deletions
diff --git a/lib/mesa/src/gallium/drivers/freedreno/Makefile.am b/lib/mesa/src/gallium/drivers/freedreno/Makefile.am
index 148dd0eb5..ffb4db182 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/Makefile.am
+++ b/lib/mesa/src/gallium/drivers/freedreno/Makefile.am
@@ -9,6 +9,7 @@ AM_CFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(FREEDRENO_CFLAGS)
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
$(MKDIR_GEN)
$(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/ir3/ir3_nir_trig.py > $@ || ($(RM) $@; false)
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c
index 276f6be93..176a31c77 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -234,7 +234,6 @@ batch_flush_func(void *job, int id)
fd_gmem_render_tiles(batch);
batch_reset_resources(batch);
- batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem);
}
static void
@@ -275,7 +274,6 @@ batch_flush(struct fd_batch *batch)
} else {
fd_gmem_render_tiles(batch);
batch_reset_resources(batch);
- batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem);
}
debug_assert(batch->reference.count > 0);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c
index df11eab25..f3d5078d1 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c
@@ -124,7 +124,7 @@ fd_bc_fini(struct fd_batch_cache *cache)
_mesa_hash_table_destroy(cache->ht, NULL);
}
-uint32_t
+void
fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
{
struct hash_entry *entry;
@@ -150,8 +150,6 @@ fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
fd_batch_sync(last_batch);
fd_batch_reference(&last_batch, NULL);
}
-
- return ctx->last_fence;
}
void
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h
index 1790e5cf4..44c66b58f 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.h
@@ -62,7 +62,7 @@ struct fd_batch_cache {
void fd_bc_init(struct fd_batch_cache *cache);
void fd_bc_fini(struct fd_batch_cache *cache);
-uint32_t fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx);
+void fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx);
void fd_bc_invalidate_context(struct fd_context *ctx);
void fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c
index 0b12409ba..70220f88d 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c
@@ -43,22 +43,15 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
unsigned flags)
{
struct fd_context *ctx = fd_context(pctx);
- uint32_t timestamp;
if (!ctx->screen->reorder) {
- struct fd_batch *batch = NULL;
- fd_batch_reference(&batch, ctx->batch);
- fd_batch_flush(batch, true);
- timestamp = fd_ringbuffer_timestamp(batch->gmem);
- fd_batch_reference(&batch, NULL);
+ fd_batch_flush(ctx->batch, true);
} else {
- timestamp = fd_bc_flush(&ctx->screen->batch_cache, ctx);
+ fd_bc_flush(&ctx->screen->batch_cache, ctx);
}
- if (fence) {
- fd_screen_fence_ref(pctx->screen, fence, NULL);
- *fence = fd_fence_create(pctx, timestamp);
- }
+ if (fence)
+ fd_fence_ref(pctx->screen, fence, ctx->last_fence);
}
/**
@@ -109,6 +102,8 @@ fd_context_destroy(struct pipe_context *pctx)
fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
fd_bc_invalidate_context(ctx);
+ fd_fence_ref(pctx->screen, &ctx->last_fence, NULL);
+
fd_prog_fini(pctx);
fd_hw_query_fini(pctx);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h
index c4c08a682..4a766f5cd 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h
@@ -164,7 +164,7 @@ struct fd_context {
*/
struct fd_batch *batch;
- uint32_t last_fence;
+ struct pipe_fence_handle *last_fence;
/* Are we in process of shadowing a resource? Used to detect recursion
* in transfer_map, and skip unneeded synchronization.
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c
index df4cf4dd5..a5f717169 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -40,7 +40,7 @@ struct pipe_fence_handle {
};
void
-fd_screen_fence_ref(struct pipe_screen *pscreen,
+fd_fence_ref(struct pipe_screen *pscreen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *pfence)
{
@@ -50,7 +50,7 @@ fd_screen_fence_ref(struct pipe_screen *pscreen,
*ptr = pfence;
}
-boolean fd_screen_fence_finish(struct pipe_screen *screen,
+boolean fd_fence_finish(struct pipe_screen *pscreen,
struct pipe_context *ctx,
struct pipe_fence_handle *fence,
uint64_t timeout)
@@ -61,11 +61,10 @@ boolean fd_screen_fence_finish(struct pipe_screen *screen,
return true;
}
-struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
+struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx,
uint32_t timestamp)
{
struct pipe_fence_handle *fence;
- struct fd_context *ctx = fd_context(pctx);
fence = CALLOC_STRUCT(pipe_fence_handle);
if (!fence)
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h
index df7664bf8..32bfacc76 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h
@@ -31,14 +31,16 @@
#include "pipe/p_context.h"
-void fd_screen_fence_ref(struct pipe_screen *pscreen,
+void fd_fence_ref(struct pipe_screen *pscreen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *pfence);
-boolean fd_screen_fence_finish(struct pipe_screen *screen,
+boolean fd_fence_finish(struct pipe_screen *screen,
struct pipe_context *ctx,
struct pipe_fence_handle *pfence,
uint64_t timeout);
-struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
+
+struct fd_context;
+struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx,
uint32_t timestamp);
#endif /* FREEDRENO_FENCE_H_ */
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c
index ed625e455..3b2ecbaea 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -34,6 +34,7 @@
#include "freedreno_gmem.h"
#include "freedreno_context.h"
+#include "freedreno_fence.h"
#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
#include "freedreno_util.h"
@@ -394,6 +395,9 @@ fd_gmem_render_tiles(struct fd_batch *batch)
}
fd_ringbuffer_flush(batch->gmem);
+
+ fd_fence_ref(&ctx->screen->base, &ctx->last_fence, NULL);
+ ctx->last_fence = fd_fence_create(ctx, fd_ringbuffer_timestamp(batch->gmem));
}
/* tile needs restore if it isn't completely contained within the
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c
index 1f7c2a5dc..cc75c509a 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -696,8 +696,8 @@ fd_screen_create(struct fd_device *dev)
pscreen->get_timestamp = fd_screen_get_timestamp;
- pscreen->fence_reference = fd_screen_fence_ref;
- pscreen->fence_finish = fd_screen_fence_finish;
+ pscreen->fence_reference = fd_fence_ref;
+ pscreen->fence_finish = fd_fence_finish;
slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c b/lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c
index bb3aad784..95d8866ba 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nouveau_video.c
@@ -73,7 +73,7 @@ nouveau_vpe_fini(struct nouveau_decoder *dec) {
if (!dec->cmds)
return;
- nouveau_pushbuf_space(push, 8, 2, 0);
+ nouveau_pushbuf_space(push, 16, 2, 0);
nouveau_bufctx_reset(dec->bufctx, NV31_VIDEO_BIND_CMD);
#define BCTX_ARGS dec->bufctx, NV31_VIDEO_BIND_CMD, NOUVEAU_BO_RD
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c
index 4217bca6d..4e6df1eff 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -128,7 +128,7 @@ nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,
refn.bo = mt->base.bo;
refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
- if (nouveau_pushbuf_space(push, 16, 1, 0) ||
+ if (nouveau_pushbuf_space(push, 32, 1, 0) ||
nouveau_pushbuf_refn (push, &refn, 1))
return;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
index e4b949725..6f06ee6b1 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -431,7 +431,7 @@ nv30_transfer_rect_sifm(XFER_ARGS)
si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
}
- if (nouveau_pushbuf_space(push, 32, 6, 0) ||
+ if (nouveau_pushbuf_space(push, 64, 6, 0) ||
nouveau_pushbuf_refn (push, refs, 2))
return;
@@ -516,7 +516,7 @@ nv30_transfer_rect_m2mf(XFER_ARGS)
while (h) {
unsigned lines = (h > 2047) ? 2047 : h;
- if (nouveau_pushbuf_space(push, 13, 2, 0) ||
+ if (nouveau_pushbuf_space(push, 32, 2, 0) ||
nouveau_pushbuf_refn (push, refs, 2))
return;
@@ -708,7 +708,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
lines = (pages > 2047) ? 2047 : pages;
pages -= lines;
- if (nouveau_pushbuf_space(push, 13, 2, 0) ||
+ if (nouveau_pushbuf_space(push, 32, 2, 0) ||
nouveau_pushbuf_refn (push, refs, 2))
return;
@@ -732,7 +732,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
}
if (size) {
- if (nouveau_pushbuf_space(push, 13, 2, 0) ||
+ if (nouveau_pushbuf_space(push, 32, 2, 0) ||
nouveau_pushbuf_refn (push, refs, 2))
return;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index a6c0bbc26..f5fa9d6ca 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -295,7 +295,7 @@ nv50_clear_render_target(struct pipe_context *pipe,
PUSH_DATAf(push, color->f[2]);
PUSH_DATAf(push, color->f[3]);
- if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+ if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
return;
PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
@@ -394,7 +394,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
mode |= NV50_3D_CLEAR_BUFFERS_S;
}
- if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+ if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
return;
PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
@@ -752,7 +752,7 @@ nv50_clear_buffer(struct pipe_context *pipe,
PUSH_DATAf(push, color.f[2]);
PUSH_DATAf(push, color.f[3]);
- if (nouveau_pushbuf_space(push, 32, 1, 0))
+ if (nouveau_pushbuf_space(push, 64, 1, 0))
return;
PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index a11cdf847..9d55c1d46 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (push, prim);
- nouveau_pushbuf_space(push, 8, 0, 1);
+ nouveau_pushbuf_space(push, 16, 0, 1);
PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
switch (index_size) {
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c
index 177a7e027..92526d9f6 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video.c
@@ -273,7 +273,7 @@ nv98_create_decoder(struct pipe_context *context,
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
/* So lets test if the fence is working? */
- nouveau_pushbuf_space(push[0], 6, 1, 0);
+ nouveau_pushbuf_space(push[0], 16, 1, 0);
PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
BEGIN_NV04(push[0], SUBC_BSP(0x240), 3);
PUSH_DATAh(push[0], dec->fence_bo->offset);
@@ -284,7 +284,7 @@ nv98_create_decoder(struct pipe_context *context,
PUSH_DATA (push[0], 0);
PUSH_KICK (push[0]);
- nouveau_pushbuf_space(push[1], 6, 1, 0);
+ nouveau_pushbuf_space(push[1], 16, 1, 0);
PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
BEGIN_NV04(push[1], SUBC_VP(0x240), 3);
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
@@ -295,7 +295,7 @@ nv98_create_decoder(struct pipe_context *context,
PUSH_DATA (push[1], 0);
PUSH_KICK (push[1]);
- nouveau_pushbuf_space(push[2], 6, 1, 0);
+ nouveau_pushbuf_space(push[2], 16, 1, 0);
PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
BEGIN_NV04(push[2], SUBC_PPP(0x240), 3);
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
index 4fe0e05c9..f77258de8 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
@@ -47,7 +47,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
int ret;
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
- unsigned fence_extra = 0;
struct nouveau_pushbuf_refn bo_refs[] = {
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
@@ -61,10 +60,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
if (!dec->bitplane_bo)
num_refs--;
-#if NOUVEAU_VP3_DEBUG_FENCE
- fence_extra = 4;
-#endif
-
bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
for (i = 0; i < num_buffers; i++)
bsp_size += num_bytes[i];
@@ -112,7 +107,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
- nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0);
+ nouveau_pushbuf_space(push, 32, num_refs, 0);
nouveau_pushbuf_refn(push, bo_refs, num_refs);
bsp_addr = bsp_bo->offset >> 8;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
index 48f16211a..3fce65ba5 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
@@ -93,13 +93,8 @@ nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
struct nouveau_pushbuf *push = dec->pushbuf[2];
unsigned ppp_caps = 0x10;
- unsigned fence_extra = 0;
-#if NOUVEAU_VP3_DEBUG_FENCE
- fence_extra = 4;
-#endif
-
- nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
+ nouveau_pushbuf_space(push, 32, 4, 0);
switch (codec) {
case PIPE_VIDEO_FORMAT_MPEG12: {
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
index 37d7d4432..f1cdf168e 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
@@ -76,7 +76,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
- u32 fence_extra = 0, codec_extra = 0;
+ u32 codec_extra = 0;
struct nouveau_pushbuf_refn bo_refs[] = {
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
@@ -88,10 +88,6 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
};
int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;
-#if NOUVEAU_VP3_DEBUG_FENCE
- fence_extra = 4;
-#endif
-
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
codec_extra += 2;
@@ -115,8 +111,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
nv98_decoder_kick_ref(dec, target);
- nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
- 6 + codec_extra + fence_extra + 2, num_refs, 0);
+ nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);
nouveau_pushbuf_refn(push, bo_refs, num_refs);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 4c34593ef..ff20fe635 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -403,7 +403,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
nvc0_hw_query_fifo_wait(nvc0, q);
- nouveau_pushbuf_space(push, 16, 2, 0);
+ nouveau_pushbuf_space(push, 32, 2, 0);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 69ca091c4..009c606d9 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -799,7 +799,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
}
while (num_instances--) {
- nouveau_pushbuf_space(push, 9, 0, 1);
+ nouveau_pushbuf_space(push, 16, 0, 1);
BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (push, mode);
BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
index a9fd1d209..b5e7bba5f 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
@@ -297,7 +297,7 @@ nvc0_create_decoder(struct pipe_context *context,
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
/* So lets test if the fence is working? */
- nouveau_pushbuf_space(push[0], 6, 1, 0);
+ nouveau_pushbuf_space(push[0], 16, 1, 0);
PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3);
PUSH_DATAh(push[0], dec->fence_bo->offset);
@@ -308,7 +308,7 @@ nvc0_create_decoder(struct pipe_context *context,
PUSH_DATA (push[0], 0);
PUSH_KICK (push[0]);
- nouveau_pushbuf_space(push[1], 6, 1, 0);
+ nouveau_pushbuf_space(push[1], 16, 1, 0);
PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
BEGIN_NVC0(push[1], SUBC_VP(0x240), 3);
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
@@ -319,7 +319,7 @@ nvc0_create_decoder(struct pipe_context *context,
PUSH_DATA (push[1], 0);
PUSH_KICK (push[1]);
- nouveau_pushbuf_space(push[2], 6, 1, 0);
+ nouveau_pushbuf_space(push[2], 16, 1, 0);
PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3);
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
index af072a8ac..52a031c51 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
@@ -143,7 +143,6 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
uint32_t caps;
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
- unsigned fence_extra = 0;
struct nouveau_pushbuf_refn bo_refs[] = {
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
@@ -157,15 +156,11 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
if (!dec->bitplane_bo)
num_refs--;
-#if NOUVEAU_VP3_DEBUG_FENCE
- fence_extra = 4;
-#endif
-
caps = nouveau_vp3_bsp_end(dec, desc);
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
- nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0);
+ nouveau_pushbuf_space(push, 32, num_refs, 0);
nouveau_pushbuf_refn(push, bo_refs, num_refs);
bsp_addr = bsp_bo->offset >> 8;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c
index e4504e6c4..4f058628e 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c
@@ -93,13 +93,8 @@ nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
struct nouveau_pushbuf *push = dec->pushbuf[2];
unsigned ppp_caps = 0x10;
- unsigned fence_extra = 0;
-#if NOUVEAU_VP3_DEBUG_FENCE
- fence_extra = 4;
-#endif
-
- nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
+ nouveau_pushbuf_space(push, 32, 4, 0);
switch (codec) {
case PIPE_VIDEO_FORMAT_MPEG12: {
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
index 73d551481..3de4ec148 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
@@ -76,7 +76,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
- u32 fence_extra = 0, codec_extra = 0;
+ u32 codec_extra = 0;
struct nouveau_pushbuf_refn bo_refs[] = {
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
@@ -88,10 +88,6 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
};
int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;
-#if NOUVEAU_VP3_DEBUG_FENCE
- fence_extra = 4;
-#endif
-
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
codec_extra += 2;
@@ -115,8 +111,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
nvc0_decoder_kick_ref(dec, target);
- nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
- 6 + codec_extra + fence_extra + 2, num_refs, 0);
+ nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);
nouveau_pushbuf_refn(push, bo_refs, num_refs);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index d661c000b..15b4750d3 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -816,6 +816,7 @@ nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
debug_printf("barrier count: %u\n", desc->bar_alloc);
debug_printf("$r count: %u\n", desc->gpr_alloc);
debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
+ debug_printf("linked tsc: %d\n", desc->linked_tsc);
for (i = 0; i < 8; ++i) {
uint64_t address;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
index b98c65d4a..5fe58b967 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -8,7 +8,10 @@ struct nve4_cp_launch_desc
{
u32 unk0[8];
u32 entry;
- u32 unk9[3];
+ u32 unk9[2];
+ u32 unk11_0 : 30;
+ u32 linked_tsc : 1;
+ u32 unk11_31 : 1;
u32 griddim_x : 31;
u32 unk12 : 1;
u16 griddim_y;
@@ -48,7 +51,7 @@ nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
memset(desc, 0, sizeof(*desc));
desc->unk0[7] = 0xbc000000;
- desc->unk9[2] = 0x44014000;
+ desc->unk11_0 = 0x04014000;
desc->unk47_20 = 0x300;
}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.c b/lib/mesa/src/gallium/drivers/r600/r600_shader.c
index 59a13ec24..6a265c894 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_shader.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.c
@@ -2924,7 +2924,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
struct pipe_stream_output_info so = pipeshader->selector->so;
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
- struct r600_bytecode_output output[32];
+ struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp
index 79aef9106..abae2bf69 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp
@@ -30,6 +30,18 @@
namespace r600_sb {
+int dce_cleanup::run() {
+ int r;
+
+ // Run cleanup for as long as there are unused nodes.
+ do {
+ nodes_changed = false;
+ r = vpass::run();
+ } while (r == 0 && nodes_changed);
+
+ return r;
+}
+
bool dce_cleanup::visit(node& n, bool enter) {
if (enter) {
} else {
@@ -110,7 +122,18 @@ bool dce_cleanup::visit(region_node& n, bool enter) {
void dce_cleanup::cleanup_dst(node& n) {
if (!cleanup_dst_vec(n.dst) && remove_unused &&
!n.dst.empty() && !(n.flags & NF_DONT_KILL) && n.parent)
+ {
+ // Delete use references to the removed node from the src values.
+ for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) {
+ value* v = *I;
+ if (v && v->def && v->uses.size())
+ {
+ v->remove_use(&n);
+ }
+ }
n.remove();
+ nodes_changed = true;
+ }
}
bool dce_cleanup::visit(container_node& n, bool enter) {
@@ -130,7 +153,7 @@ bool dce_cleanup::cleanup_dst_vec(vvec& vv) {
if (v->gvn_source && v->gvn_source->is_dead())
v->gvn_source = NULL;
- if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses))
+ if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses.size()))
v = NULL;
else
alive = true;
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp
index 236b2ea00..9c75389ad 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_gcm.cpp
@@ -199,10 +199,9 @@ void gcm::td_release_val(value *v) {
sblog << "\n";
);
- use_info *u = v->uses;
- while (u) {
+ for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
+ use_info *u = *I;
if (u->op->parent != &pending) {
- u = u->next;
continue;
}
@@ -212,6 +211,7 @@ void gcm::td_release_val(value *v) {
sblog << "\n";
);
+ assert(uses[u->op] > 0);
if (--uses[u->op] == 0) {
GCM_DUMP(
sblog << "td released : ";
@@ -222,7 +222,6 @@ void gcm::td_release_val(value *v) {
pending.remove_node(u->op);
ready.push_back(u->op);
}
- u = u->next;
}
}
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp
index 5226893de..d989dce62 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.cpp
@@ -255,7 +255,7 @@ void container_node::expand() {
void node::remove() {parent->remove_node(this);
}
-value_hash node::hash_src() {
+value_hash node::hash_src() const {
value_hash h = 12345;
@@ -269,7 +269,7 @@ value_hash node::hash_src() {
}
-value_hash node::hash() {
+value_hash node::hash() const {
if (parent && parent->subtype == NST_LOOP_PHI_CONTAINER)
return 47451;
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
index 4fc4da2fb..74c0549a8 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
@@ -446,15 +446,16 @@ enum use_kind {
};
struct use_info {
- use_info *next;
node *op;
use_kind kind;
int arg;
- use_info(node *n, use_kind kind, int arg, use_info* next)
- : next(next), op(n), kind(kind), arg(arg) {}
+ use_info(node *n, use_kind kind, int arg)
+ : op(n), kind(kind), arg(arg) {}
};
+typedef std::list< use_info * > uselist;
+
enum constraint_kind {
CK_SAME_REG,
CK_PACKED_BS,
@@ -498,7 +499,7 @@ public:
value_hash ghash;
node *def, *adef;
- use_info *uses;
+ uselist uses;
ra_constraint *constraint;
ra_chunk *chunk;
@@ -585,6 +586,7 @@ public:
}
void add_use(node *n, use_kind kind, int arg);
+ void remove_use(const node *n);
value_hash hash();
value_hash rel_hash();
@@ -790,8 +792,8 @@ public:
void replace_with(node *n);
void remove();
- virtual value_hash hash();
- value_hash hash_src();
+ virtual value_hash hash() const;
+ value_hash hash_src() const;
virtual bool fold_dispatch(expr_handler *ex);
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h
index 0346df1b1..e878f8c70 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h
@@ -124,7 +124,9 @@ class dce_cleanup : public vpass {
public:
dce_cleanup(shader &s) : vpass(s),
- remove_unused(s.dce_flags & DF_REMOVE_UNUSED) {}
+ remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {}
+
+ virtual int run();
virtual bool visit(node &n, bool enter);
virtual bool visit(alu_group_node &n, bool enter);
@@ -140,6 +142,8 @@ private:
void cleanup_dst(node &n);
bool cleanup_dst_vec(vvec &vv);
+ // Did we alter/remove nodes during a single pass?
+ bool nodes_changed;
};
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp
index eb242b1c2..d31a1b76d 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_valtable.cpp
@@ -220,17 +220,34 @@ void value::add_use(node* n, use_kind kind, int arg) {
dump::dump_op(n);
sblog << " kind " << kind << " arg " << arg << "\n";
}
- uses = new use_info(n, kind, arg, uses);
+ uses.push_back(new use_info(n, kind, arg));
}
-unsigned value::use_count() {
- use_info *u = uses;
- unsigned c = 0;
- while (u) {
- ++c;
- u = u->next;
+struct use_node_comp {
+ explicit use_node_comp(const node *n) : n(n) {}
+ bool operator() (const use_info *u) {
+ return u->op->hash() == n->hash();
+ }
+
+ private:
+ const node *n;
+};
+
+void value::remove_use(const node *n) {
+ uselist::iterator it =
+ std::find_if(uses.begin(), uses.end(), use_node_comp(n));
+
+ if (it != uses.end())
+ {
+ // TODO assert((*it)->kind == kind) ?
+ // TODO assert((*it)->arg == arg) ?
+ delete *it;
+ uses.erase(it);
}
- return c;
+}
+
+unsigned value::use_count() {
+ return uses.size();
}
bool value::is_global() {
@@ -274,13 +291,12 @@ bool value::is_prealloc() {
}
void value::delete_uses() {
- use_info *u, *c = uses;
- while (c) {
- u = c->next;
- delete c;
- c = u;
+ for (uselist::iterator it = uses.begin(); it != uses.end(); ++it)
+ {
+ delete *it;
}
- uses = NULL;
+
+ uses.clear();
}
void ra_constraint::update_values() {
@@ -468,7 +484,7 @@ bool r600_sb::sb_value_set::add_vec(vvec& vv) {
bool r600_sb::sb_value_set::contains(value* v) {
unsigned b = v->uid - 1;
if (b < bs.size())
- return bs.get(v->uid - 1);
+ return bs.get(b);
else
return false;
}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c
index 74bec2626..bbab58946 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -377,11 +377,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
}
- /* Using a staging buffer in GTT for larger reads is much faster. */
+ /* Use a staging buffer in cached GTT for reads. */
else if ((usage & PIPE_TRANSFER_READ) &&
- !(usage & (PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_PERSISTENT)) &&
- rbuffer->domains & RADEON_DOMAIN_VRAM &&
+ !(usage & PIPE_TRANSFER_PERSISTENT) &&
+ (rbuffer->domains & RADEON_DOMAIN_VRAM ||
+ rbuffer->flags & RADEON_FLAG_GTT_WC) &&
r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
struct r600_resource *staging;
@@ -390,11 +390,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
if (staging) {
/* Copy the VRAM buffer to the staging buffer. */
- ctx->resource_copy_region(ctx, &staging->b.b, 0,
- box->x % R600_MAP_BUFFER_ALIGNMENT,
- 0, 0, resource, level, box);
+ rctx->dma_copy(ctx, &staging->b.b, 0,
+ box->x % R600_MAP_BUFFER_ALIGNMENT,
+ 0, 0, resource, 0, box);
- data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+ data = r600_buffer_map_sync_with_rings(rctx, staging,
+ usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
if (!data) {
r600_resource_reference(&staging, NULL);
return NULL;
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c b/lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c
index 5ec988157..e89bcfed2 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -320,14 +320,21 @@ static void si_sampler_view_add_buffer(struct si_context *sctx,
if (resource->target == PIPE_BUFFER)
return;
- /* Now add separate DCC if it's present. */
+ /* Now add separate DCC or HTILE. */
rtex = (struct r600_texture*)resource;
- if (!rtex->dcc_separate_buffer)
- return;
+ if (rtex->dcc_separate_buffer) {
+ radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
+ rtex->dcc_separate_buffer, usage,
+ RADEON_PRIO_DCC, check_mem);
+ }
- radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
- rtex->dcc_separate_buffer, usage,
- RADEON_PRIO_DCC, check_mem);
+ if (rtex->htile_buffer &&
+ rtex->tc_compatible_htile &&
+ !is_stencil_sampler) {
+ radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
+ rtex->htile_buffer, usage,
+ RADEON_PRIO_HTILE, check_mem);
+ }
}
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c
index 60c24014e..e8eec87f9 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c
@@ -5396,10 +5396,13 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- /* The real barrier instruction isn’t needed, because an entire patch
+ /* SI only (thanks to a hw bug workaround):
+ * The real barrier instruction isn’t needed, because an entire patch
* always fits into a single wave.
*/
- if (ctx->type == PIPE_SHADER_TESS_CTRL) {
+ if (HAVE_LLVM >= 0x0309 &&
+ ctx->screen->b.chip_class == SI &&
+ ctx->type == PIPE_SHADER_TESS_CTRL) {
emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
return;
}
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state.c
index 9e6e3d2b0..db74ca4d1 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_state.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state.c
@@ -698,8 +698,10 @@ static void si_update_poly_offset_state(struct si_context *sctx)
{
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
+ if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
+ si_pm4_bind_state(sctx, poly_offset, NULL);
return;
+ }
/* Use the user format, not db_render_format, so that the polygon
* offset behaves as expected by applications.
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c
index 6bbe36d9a..963d3735f 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -847,11 +847,12 @@ void si_emit_cache_flush(struct si_context *sctx)
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 ||
(rctx->chip_class <= CIK &&
(rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
- /* Invalidate L1 & L2. (L1 is always invalidated)
+ /* Invalidate L1 & L2. (L1 is always invalidated on SI)
* WB must be set on VI+ when TC_ACTION is set.
*/
si_emit_surface_sync(rctx, cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
+ S_0085F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI));
cp_coher_cntl = 0;
} else {
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h
index 43bc5222c..ee1eb3a76 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -262,45 +262,6 @@ public:
return _simd_movemask_ps(vClipCullMask);
}
- // clip a single primitive
- int ClipScalar(PA_STATE& pa, uint32_t primIndex, float* pOutPos, float* pOutAttribs)
- {
- OSALIGNSIMD(float) inVerts[3 * 4];
- OSALIGNSIMD(float) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
-
- // transpose primitive position
- __m128 verts[3];
- pa.AssembleSingle(VERTEX_POSITION_SLOT, primIndex, verts);
- _mm_store_ps(&inVerts[0], verts[0]);
- _mm_store_ps(&inVerts[4], verts[1]);
- _mm_store_ps(&inVerts[8], verts[2]);
-
- // transpose attribs
- uint32_t numScalarAttribs = this->state.linkageCount * 4;
-
- int idx = 0;
- DWORD slot = 0;
- uint32_t mapIdx = 0;
- uint32_t tmpLinkage = uint32_t(this->state.linkageMask);
- while (_BitScanForward(&slot, tmpLinkage))
- {
- tmpLinkage &= ~(1 << slot);
- // Compute absolute attrib slot in vertex array
- uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + this->state.linkageMap[mapIdx++];
- __m128 attrib[3]; // triangle attribs (always 4 wide)
- pa.AssembleSingle(inputSlot, primIndex, attrib);
- _mm_store_ps(&inAttribs[idx], attrib[0]);
- _mm_store_ps(&inAttribs[idx + numScalarAttribs], attrib[1]);
- _mm_store_ps(&inAttribs[idx + numScalarAttribs * 2], attrib[2]);
- idx += 4;
- }
-
- int numVerts;
- Clip(inVerts, inAttribs, numScalarAttribs, pOutPos, &numVerts, pOutAttribs);
-
- return numVerts;
- }
-
// clip SIMD primitives
void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
{
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index 4c105f373..e97cb63ae 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -52,6 +52,17 @@ qir_opt_small_immediates(struct vc4_compile *c)
if (uses_small_imm)
continue;
+ /* Don't propagate small immediates into the top-end bounds
+ * checking for indirect UBO loads. The kernel doesn't parse
+ * small immediates and rejects the shader in this case. UBO
+ * loads are much more expensive than the uniform load, and
+ * indirect UBO regions are usually much larger than a small
+ * immediate, so it's not worth updating the kernel to allow
+ * optimizing it.
+ */
+ if (inst->op == QOP_MIN_NOIMM)
+ continue;
+
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
struct qreg src = qir_follow_movs(c, inst->src[i]);
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_program.c b/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
index 15e8984ef..00e16e3db 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
@@ -102,9 +102,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
/* Clamp to [0, array size). Note that MIN/MAX are signed. */
indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
- indirect_offset = qir_MIN(c, indirect_offset,
- qir_uniform_ui(c, (range->dst_offset +
- range->size - 4)));
+ indirect_offset = qir_MIN_NOIMM(c, indirect_offset,
+ qir_uniform_ui(c, (range->dst_offset +
+ range->size - 4)));
qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
c->num_texture_samples++;
@@ -322,7 +322,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
/* Perform the clamping required by kernel validation. */
addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
- addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4));
+ addr = qir_MIN_NOIMM(c, addr, qir_uniform_ui(c, size - 4));
qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c
index 446af66af..4b94fcfb9 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.c
@@ -58,6 +58,7 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_ASR] = { "asr", 1, 2 },
[QOP_SHL] = { "shl", 1, 2 },
[QOP_MIN] = { "min", 1, 2 },
+ [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 },
[QOP_MAX] = { "max", 1, 2 },
[QOP_AND] = { "and", 1, 2 },
[QOP_OR] = { "or", 1, 2 },
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h
index c76aeb2bf..b3cac6bf2 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qir.h
@@ -111,6 +111,7 @@ enum qop {
QOP_SHR,
QOP_ASR,
QOP_MIN,
+ QOP_MIN_NOIMM,
QOP_MAX,
QOP_AND,
QOP_OR,
@@ -709,6 +710,7 @@ QIR_ALU2(SHL)
QIR_ALU2(SHR)
QIR_ALU2(ASR)
QIR_ALU2(MIN)
+QIR_ALU2(MIN_NOIMM)
QIR_ALU2(MAX)
QIR_ALU2(AND)
QIR_ALU2(OR)
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c b/lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c
index eedee55a9..2ee52a497 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -285,6 +285,8 @@ vc4_generate_code_block(struct vc4_compile *c,
[QOP_MOV] = { QPU_A_OR },
[QOP_FMOV] = { QPU_A_FMAX },
[QOP_MMOV] = { QPU_M_V8MIN },
+
+ [QOP_MIN_NOIMM] = { QPU_A_MIN },
};
uint64_t unpack = 0;