summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/freedreno
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src/gallium/drivers/freedreno')
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c16
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c3
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c4
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c6
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c2
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c10
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c4
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c2
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c2
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c10
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c7
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c13
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c13
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c2
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c10
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c40
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h12
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c10
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c16
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c258
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h89
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c34
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c14
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h93
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c64
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c8
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h4
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c28
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c3
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h6
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c8
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h3
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c16
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c76
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h13
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c170
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h20
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c46
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h7
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c1
40 files changed, 735 insertions, 408 deletions
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
index b19899de7..10edf7247 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -141,7 +141,7 @@ draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
}
enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
- if (binning || info->mode == PIPE_PRIM_POINTS)
+ if (binning || info->mode == MESA_PRIM_POINTS)
vismode = IGNORE_VISIBILITY;
fd_draw_emit(ctx->batch, ring, ctx->screen->primtypes[info->mode],
@@ -169,7 +169,7 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
if (!ctx->prog.fs || !ctx->prog.vs)
return false;
- if (pinfo->mode != PIPE_PRIM_MAX && !indirect && !pinfo->primitive_restart &&
+ if (pinfo->mode != MESA_PRIM_COUNT && !indirect && !pinfo->primitive_restart &&
!u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count))
return false;
@@ -190,14 +190,14 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
*/
if (pdraw->count > 32766) {
/* clang-format off */
- static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
- [0 ... PIPE_PRIM_MAX - 1] = 32766,
- [PIPE_PRIM_LINE_STRIP] = 32765,
- [PIPE_PRIM_TRIANGLE_STRIP] = 32764,
+ static const uint16_t step_tbl[MESA_PRIM_COUNT] = {
+ [0 ... MESA_PRIM_COUNT - 1] = 32766,
+ [MESA_PRIM_LINE_STRIP] = 32765,
+ [MESA_PRIM_TRIANGLE_STRIP] = 32764,
/* needs more work */
- [PIPE_PRIM_TRIANGLE_FAN] = 0,
- [PIPE_PRIM_LINE_LOOP] = 0,
+ [MESA_PRIM_TRIANGLE_FAN] = 0,
+ [MESA_PRIM_LINE_LOOP] = 0,
};
/* clang-format on */
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c
index 46f0124fd..d60ab8c67 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c
@@ -28,7 +28,6 @@
#include "nir/tgsi_to_nir.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
@@ -176,7 +175,7 @@ patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
instr->num_format_all = fmt.num_format;
instr->format = fmt.format;
instr->exp_adjust_all = fmt.exp_adjust;
- instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride;
+ instr->stride = elem->src_stride;
instr->offset = elem->src_offset;
}
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 12e564208..3428cbf83 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -86,7 +86,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
/* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
- fd3_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS))
+ fd3_emit_get_vp(emit)->writes_psize && (info->mode == MESA_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
fd_draw_emit(ctx->batch, ring, primtype,
@@ -117,7 +117,7 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
- if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
+ if (info->mode != MESA_PRIM_COUNT && !indirect && !info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
return false;
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 7dfcce338..0375c22d5 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -428,7 +428,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
- A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(elem->src_stride) |
COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
COND(elem->instance_divisor,
@@ -728,8 +728,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2);
if (depth == 32) {
- OUT_RING(ring, (uint32_t)(zmin * 0xffffffff));
- OUT_RING(ring, (uint32_t)(zmax * 0xffffffff));
+ OUT_RING(ring, (uint32_t)(zmin * (float)0xffffffff));
+ OUT_RING(ring, (uint32_t)(zmax * (float)0xffffffff));
} else if (depth == 16) {
OUT_RING(ring, (uint32_t)(zmin * 0xffff));
OUT_RING(ring, (uint32_t)(zmax * 0xffff));
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index 0c2375f2a..9be96117d 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -36,7 +36,7 @@ struct fd3_format {
enum a3xx_tex_fmt tex;
enum a3xx_color_fmt rb;
enum a3xx_color_swap swap;
- boolean present;
+ bool present;
};
/* vertex + texture */
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 2219d2e7c..23443ee1e 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -450,10 +450,12 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr,
}
static struct ir3_program_state *
-fd3_program_create(void *data, struct ir3_shader_variant *bs,
- struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs,
+fd3_program_create(void *data, const struct ir3_shader_variant *bs,
+ const struct ir3_shader_variant *vs,
+ const struct ir3_shader_variant *hs,
+ const struct ir3_shader_variant *ds,
+ const struct ir3_shader_variant *gs,
+ const struct ir3_shader_variant *fs,
const struct ir3_cache_key *key) in_dt
{
struct fd_context *ctx = fd_context(data);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 6d49340d4..86a23c67e 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -63,7 +63,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
/* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
- fd4_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS))
+ fd4_emit_get_vp(emit)->writes_psize && (info->mode == MESA_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
fd4_draw_emit(ctx->batch, ring, primtype,
@@ -114,7 +114,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
sizeof(emit.key.key.fsampler_swizzles));
}
- if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
+ if (info->mode != MESA_PRIM_COUNT && !indirect && !info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
return false;
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 739b1cfcb..87f14f822 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -573,7 +573,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
- A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
+ A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(elem->src_stride) |
COND(elem->instance_divisor,
A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index c66e9a3d5..b71d899c9 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -38,7 +38,7 @@ struct fd4_format {
enum a4xx_tex_fmt tex;
enum a4xx_color_fmt rb;
enum a3xx_color_swap swap;
- boolean present;
+ bool present;
};
/* vertex + texture */
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index d165cebd9..be1d4a717 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -578,10 +578,12 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr,
}
static struct ir3_program_state *
-fd4_program_create(void *data, struct ir3_shader_variant *bs,
- struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs,
+fd4_program_create(void *data, const struct ir3_shader_variant *bs,
+ const struct ir3_shader_variant *vs,
+ const struct ir3_shader_variant *hs,
+ const struct ir3_shader_variant *ds,
+ const struct ir3_shader_variant *gs,
+ const struct ir3_shader_variant *fs,
const struct ir3_cache_key *key) in_dt
{
struct fd_context *ctx = fd_context(data);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
index d98b173e9..0521aa880 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
@@ -34,7 +34,7 @@
/* maybe move to fd5_program? */
static void
-cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
+cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, struct ir3_shader_variant *v) assert_dt
{
const struct ir3_info *i = &v->info;
enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS;
@@ -83,8 +83,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */
OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */
- OUT_PKT4(ring, REG_A5XX_SP_CS_OBJ_START_LO, 2);
- OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
+ fd5_emit_shader_obj(ctx, ring, v, REG_A5XX_SP_CS_OBJ_START_LO);
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0x1f00000);
@@ -120,7 +119,7 @@ fd5_launch_grid(struct fd_context *ctx,
return;
if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
- cs_program_emit(ring, v);
+ cs_program_emit(ctx, ring, v);
fd5_emit_cs_state(ctx, ring, v);
fd5_emit_cs_consts(v, ring, ctx, info);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c
index 3919a742b..ec4747c45 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c
@@ -164,19 +164,6 @@ fd5_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
fd5_draw_vbo(ctx, info, drawid_offset, indirect, &draws[i], index_offset);
}
-static bool
-is_z32(enum pipe_format format)
-{
- switch (format) {
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- case PIPE_FORMAT_Z32_UNORM:
- case PIPE_FORMAT_Z32_FLOAT:
- return true;
- default:
- return false;
- }
-}
-
static void
fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
{
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
index 5886645e4..86307c21d 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
@@ -174,10 +174,10 @@ struct PACKED bcolor_entry {
uint16_t
srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
- uint8_t __pad1[24];
+ uint8_t __pad1[56];
};
-#define FD5_BORDER_COLOR_SIZE 0x60
+#define FD5_BORDER_COLOR_SIZE 0x80
#define FD5_BORDER_COLOR_UPLOAD_SIZE \
(2 * PIPE_MAX_SAMPLERS * FD5_BORDER_COLOR_SIZE)
@@ -310,8 +310,10 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt
STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
+ const unsigned int alignment =
+ util_next_power_of_two(FD5_BORDER_COLOR_UPLOAD_SIZE);
u_upload_alloc(fd5_ctx->border_color_uploader, 0,
- FD5_BORDER_COLOR_UPLOAD_SIZE, FD5_BORDER_COLOR_UPLOAD_SIZE,
+ FD5_BORDER_COLOR_UPLOAD_SIZE, alignment,
&off, &fd5_ctx->border_color_buf, &ptr);
entries = ptr;
@@ -417,6 +419,9 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
{
unsigned count = util_last_bit(so->enabled_mask);
+ if (count == 0)
+ return;
+
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2 * count);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
@@ -481,7 +486,7 @@ fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4);
OUT_RELOC(ring, rsc->bo, off, 0, 0);
OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */
- OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */
+ OUT_RING(ring, elem->src_stride); /* VFD_FETCH[j].STRIDE */
OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2);
OUT_RING(
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c
index d51fbb262..8a35f454b 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c
@@ -38,7 +38,7 @@ struct fd5_format {
enum a5xx_tex_fmt tex;
enum a5xx_color_fmt rb;
enum a3xx_color_swap swap;
- boolean present;
+ bool present;
};
/* vertex + texture */
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
index 95847056b..5b8b7286b 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
@@ -394,6 +394,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt
static void
fd5_emit_tile_init(struct fd_batch *batch) assert_dt
{
+ struct fd_context *ctx = batch->ctx;
struct fd_ringbuffer *ring = batch->gmem;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
@@ -411,10 +412,10 @@ fd5_emit_tile_init(struct fd_batch *batch) assert_dt
OUT_RING(ring, 0x0);
OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
- OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
+ OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */
OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
- OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
+ OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */
/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
fd_wfi(batch, ring);
@@ -715,6 +716,7 @@ fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
static void
fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
{
+ struct fd_context *ctx = batch->ctx;
struct fd_ringbuffer *ring = batch->gmem;
fd5_emit_restore(batch, ring);
@@ -730,10 +732,10 @@ fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
- OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
+ OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */
OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
- OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
+ OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */
/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
fd_wfi(batch, ring);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c
index 245e3e538..32b97f04c 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c
@@ -81,6 +81,30 @@ fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
}
}
+void
+fd5_emit_shader_obj(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *so,
+ uint32_t shader_obj_reg)
+{
+ ir3_get_private_mem(ctx, so);
+
+ OUT_PKT4(ring, shader_obj_reg, 6);
+ OUT_RELOC(ring, so->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */
+
+ uint32_t per_sp_size = ctx->pvtmem[so->pvtmem_per_wave].per_sp_size;
+ OUT_RING(ring, A5XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM(
+ ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) |
+ A5XX_SP_VS_PVT_MEM_PARAM_HWSTACKOFFSET(per_sp_size));
+ if (so->pvtmem_size > 0) { /* SP_xS_PVT_MEM_ADDR */
+ OUT_RELOC(ring, ctx->pvtmem[so->pvtmem_per_wave].bo, 0, 0, 0);
+ fd_ringbuffer_attach_bo(ring, ctx->pvtmem[so->pvtmem_per_wave].bo);
+ } else {
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ }
+ OUT_RING(ring, A5XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(per_sp_size));
+}
+
/* TODO maybe some of this we could pre-compute once rather than having
* so much draw-time logic?
*/
@@ -487,8 +511,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, reg);
}
- OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2);
- OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */
+ fd5_emit_shader_obj(ctx, ring, s[VS].v, REG_A5XX_SP_VS_OBJ_START_LO);
if (s[VS].instrlen)
fd5_emit_shader(ring, s[VS].v);
@@ -512,8 +535,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */
OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */
} else {
- OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
- OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */
+ fd5_emit_shader_obj(ctx, ring, s[FS].v, REG_A5XX_SP_FS_OBJ_START_LO);
}
OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
@@ -726,10 +748,12 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
}
static struct ir3_program_state *
-fd5_program_create(void *data, struct ir3_shader_variant *bs,
- struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs,
+fd5_program_create(void *data, const struct ir3_shader_variant *bs,
+ const struct ir3_shader_variant *vs,
+ const struct ir3_shader_variant *hs,
+ const struct ir3_shader_variant *ds,
+ const struct ir3_shader_variant *gs,
+ const struct ir3_shader_variant *fs,
const struct ir3_cache_key *key) in_dt
{
struct fd_context *ctx = fd_context(data);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h
index 59c499e6d..2f9906bab 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h
@@ -37,9 +37,9 @@ struct fd5_emit;
struct fd5_program_state {
struct ir3_program_state base;
- struct ir3_shader_variant *bs; /* VS for when emit->binning */
- struct ir3_shader_variant *vs;
- struct ir3_shader_variant *fs; /* FS for when !emit->binning */
+ const struct ir3_shader_variant *bs; /* VS for when emit->binning */
+ const struct ir3_shader_variant *vs;
+ const struct ir3_shader_variant *fs; /* FS for when !emit->binning */
};
static inline struct fd5_program_state *
@@ -51,8 +51,12 @@ fd5_program_state(struct ir3_program_state *state)
void fd5_emit_shader(struct fd_ringbuffer *ring,
const struct ir3_shader_variant *so);
+void fd5_emit_shader_obj(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *so,
+ uint32_t shader_obj_reg) assert_dt;
+
void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd5_emit *emit);
+ struct fd5_emit *emit) assert_dt;
void fd5_prog_init(struct pipe_context *pctx);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c
index 9c685f3ab..945c2cfad 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c
@@ -206,16 +206,6 @@ timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
}
-static uint64_t
-ticks_to_ns(uint32_t ts)
-{
- /* This is based on the 19.2MHz always-on rbbm timer.
- *
- * TODO we should probably query this value from kernel..
- */
- return ts * (1000000000 / 19200000);
-}
-
static void
time_elapsed_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c
index 1faf272f3..4907c2851 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c
@@ -122,14 +122,14 @@ fd5_screen_is_format_supported(struct pipe_screen *pscreen,
/* clang-format off */
static const enum pc_di_primtype primtypes[] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST,
- [PIPE_PRIM_LINES] = DI_PT_LINELIST,
- [PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
- [PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
- [PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST,
- [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN,
- [PIPE_PRIM_MAX] = DI_PT_RECTLIST, /* internal clear blits */
+ [MESA_PRIM_POINTS] = DI_PT_POINTLIST,
+ [MESA_PRIM_LINES] = DI_PT_LINELIST,
+ [MESA_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
+ [MESA_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
+ [MESA_PRIM_TRIANGLES] = DI_PT_TRILIST,
+ [MESA_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
+ [MESA_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN,
+ [MESA_PRIM_COUNT] = DI_PT_RECTLIST, /* internal clear blits */
};
/* clang-format on */
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c
index df6217733..9c458e33e 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -57,18 +57,61 @@ alloc_ring(struct fd_batch *batch, unsigned sz, enum fd_ringbuffer_flags flags)
return fd_submit_new_ringbuffer(batch->submit, sz, flags);
}
+static struct fd_batch_subpass *
+subpass_create(struct fd_batch *batch)
+{
+ struct fd_batch_subpass *subpass = CALLOC_STRUCT(fd_batch_subpass);
+
+ subpass->draw = alloc_ring(batch, 0x100000, 0);
+
+ /* Replace batch->draw with reference to current subpass, for
+ * backwards compat with code that is not subpass aware.
+ */
+ if (batch->draw)
+ fd_ringbuffer_del(batch->draw);
+ batch->draw = fd_ringbuffer_ref(subpass->draw);
+
+ list_addtail(&subpass->node, &batch->subpasses);
+
+ return subpass;
+}
+
static void
-batch_init(struct fd_batch *batch)
+subpass_destroy(struct fd_batch_subpass *subpass)
{
- struct fd_context *ctx = batch->ctx;
+ fd_ringbuffer_del(subpass->draw);
+ if (subpass->subpass_clears)
+ fd_ringbuffer_del(subpass->subpass_clears);
+ list_del(&subpass->node);
+ if (subpass->lrz)
+ fd_bo_del(subpass->lrz);
+ free(subpass);
+}
+
+struct fd_batch *
+fd_batch_create(struct fd_context *ctx, bool nondraw)
+{
+ struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
+
+ if (!batch)
+ return NULL;
+
+ DBG("%p", batch);
+
+ pipe_reference_init(&batch->reference, 1);
+ batch->ctx = ctx;
+ batch->nondraw = nondraw;
+
+ batch->resources =
+ _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+
+ list_inithead(&batch->subpasses);
batch->submit = fd_submit_new(ctx->pipe);
if (batch->nondraw) {
batch->gmem = alloc_ring(batch, 0x1000, FD_RINGBUFFER_PRIMARY);
- batch->draw = alloc_ring(batch, 0x100000, 0);
} else {
batch->gmem = alloc_ring(batch, 0x100000, FD_RINGBUFFER_PRIMARY);
- batch->draw = alloc_ring(batch, 0x100000, 0);
/* a6xx+ re-uses draw rb for both draw and binning pass: */
if (ctx->screen->gen < 6) {
@@ -76,6 +119,12 @@ batch_init(struct fd_batch *batch)
}
}
+ /* Pre-attach private BOs: */
+ for (unsigned i = 0; i < ctx->num_private_bos; i++)
+ fd_ringbuffer_attach_bo(batch->gmem, ctx->private_bos[i]);
+
+ batch->subpass = subpass_create(batch);
+
batch->in_fence_fd = -1;
batch->fence = NULL;
@@ -86,23 +135,10 @@ batch_init(struct fd_batch *batch)
if (ctx->screen->gen < 6)
batch->fence = fd_pipe_fence_create(batch);
- batch->cleared = 0;
- batch->fast_cleared = 0;
- batch->invalidated = 0;
- batch->restore = batch->resolve = 0;
- batch->needs_flush = false;
- batch->flushed = false;
- batch->gmem_reason = 0;
- batch->num_draws = 0;
- batch->num_vertices = 0;
- batch->num_bins_per_pipe = 0;
- batch->prim_strm_bits = 0;
- batch->draw_strm_bits = 0;
-
fd_reset_wfi(batch);
util_dynarray_init(&batch->draw_patches, NULL);
- util_dynarray_init(&(batch->fb_read_patches), NULL);
+ util_dynarray_init(&(batch->fb_read_patches), NULL);
if (is_a2xx(ctx->screen)) {
util_dynarray_init(&batch->shader_patches, NULL);
@@ -112,42 +148,47 @@ batch_init(struct fd_batch *batch)
if (is_a3xx(ctx->screen))
util_dynarray_init(&batch->rbrc_patches, NULL);
- assert(batch->resources->entries == 0);
-
util_dynarray_init(&batch->samples, NULL);
u_trace_init(&batch->trace, &ctx->trace_context);
batch->last_timestamp_cmd = NULL;
+
+ return batch;
}
-struct fd_batch *
-fd_batch_create(struct fd_context *ctx, bool nondraw)
+struct fd_batch_subpass *
+fd_batch_create_subpass(struct fd_batch *batch)
{
- struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
+ assert(!batch->nondraw);
- if (!batch)
- return NULL;
-
- DBG("%p", batch);
-
- pipe_reference_init(&batch->reference, 1);
- batch->ctx = ctx;
- batch->nondraw = nondraw;
+ struct fd_batch_subpass *subpass = subpass_create(batch);
- batch->resources =
- _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ /* This new subpass inherits the current subpass.. this is replaced
+ * if there is a depth clear
+ */
+ if (batch->subpass->lrz)
+ subpass->lrz = fd_bo_ref(batch->subpass->lrz);
- batch_init(batch);
+ batch->subpass = subpass;
- return batch;
+ return subpass;
}
+/**
+ * Cleanup that we normally do when the submit is flushed, like dropping
+ * rb references. But also called when batch is destroyed just in case
+ * it wasn't flushed.
+ */
static void
cleanup_submit(struct fd_batch *batch)
{
if (!batch->submit)
return;
+ foreach_subpass_safe (subpass, batch) {
+ subpass_destroy(subpass);
+ }
+
fd_ringbuffer_del(batch->draw);
fd_ringbuffer_del(batch->gmem);
@@ -171,14 +212,14 @@ cleanup_submit(struct fd_batch *batch)
batch->epilogue = NULL;
}
- if (batch->tile_setup) {
- fd_ringbuffer_del(batch->tile_setup);
- batch->tile_setup = NULL;
+ if (batch->tile_loads) {
+ fd_ringbuffer_del(batch->tile_loads);
+ batch->tile_loads = NULL;
}
- if (batch->tile_fini) {
- fd_ringbuffer_del(batch->tile_fini);
- batch->tile_fini = NULL;
+ if (batch->tile_store) {
+ fd_ringbuffer_del(batch->tile_store);
+ batch->tile_store = NULL;
}
fd_submit_del(batch->submit);
@@ -186,46 +227,6 @@ cleanup_submit(struct fd_batch *batch)
}
static void
-batch_fini(struct fd_batch *batch)
-{
- DBG("%p", batch);
-
- pipe_resource_reference(&batch->query_buf, NULL);
-
- if (batch->in_fence_fd != -1)
- close(batch->in_fence_fd);
-
- /* in case batch wasn't flushed but fence was created: */
- if (batch->fence)
- fd_pipe_fence_set_batch(batch->fence, NULL);
-
- fd_pipe_fence_ref(&batch->fence, NULL);
-
- cleanup_submit(batch);
-
- util_dynarray_fini(&batch->draw_patches);
- for (int i = 0; i < MAX_RENDER_TARGETS; i++)
- util_dynarray_fini(&(batch->fb_read_patches));
-
- if (is_a2xx(batch->ctx->screen)) {
- util_dynarray_fini(&batch->shader_patches);
- util_dynarray_fini(&batch->gmem_patches);
- }
-
- if (is_a3xx(batch->ctx->screen))
- util_dynarray_fini(&batch->rbrc_patches);
-
- while (batch->samples.size > 0) {
- struct fd_hw_sample *samp =
- util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
- fd_hw_sample_reference(batch->ctx, &samp, NULL);
- }
- util_dynarray_fini(&batch->samples);
-
- u_trace_fini(&batch->trace);
-}
-
-static void
batch_flush_dependencies(struct fd_batch *batch) assert_dt
{
struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
@@ -268,28 +269,6 @@ batch_reset_resources(struct fd_batch *batch)
}
}
-static void
-batch_reset(struct fd_batch *batch) assert_dt
-{
- DBG("%p", batch);
-
- batch_reset_dependencies(batch);
-
- fd_screen_lock(batch->ctx->screen);
- batch_reset_resources(batch);
- fd_screen_unlock(batch->ctx->screen);
-
- batch_fini(batch);
- batch_init(batch);
-}
-
-void
-fd_batch_reset(struct fd_batch *batch)
-{
- if (batch->needs_flush)
- batch_reset(batch);
-}
-
void
__fd_batch_destroy_locked(struct fd_batch *batch)
{
@@ -310,7 +289,39 @@ __fd_batch_destroy_locked(struct fd_batch *batch)
assert(batch->dependents_mask == 0);
util_copy_framebuffer_state(&batch->framebuffer, NULL);
- batch_fini(batch);
+
+ pipe_resource_reference(&batch->query_buf, NULL);
+
+ if (batch->in_fence_fd != -1)
+ close(batch->in_fence_fd);
+
+ /* in case batch wasn't flushed but fence was created: */
+ if (batch->fence)
+ fd_pipe_fence_set_batch(batch->fence, NULL);
+
+ fd_pipe_fence_ref(&batch->fence, NULL);
+
+ cleanup_submit(batch);
+
+ util_dynarray_fini(&batch->draw_patches);
+ util_dynarray_fini(&(batch->fb_read_patches));
+
+ if (is_a2xx(batch->ctx->screen)) {
+ util_dynarray_fini(&batch->shader_patches);
+ util_dynarray_fini(&batch->gmem_patches);
+ }
+
+ if (is_a3xx(batch->ctx->screen))
+ util_dynarray_fini(&batch->rbrc_patches);
+
+ while (batch->samples.size > 0) {
+ struct fd_hw_sample *samp =
+ util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
+ fd_hw_sample_reference(batch->ctx, &samp, NULL);
+ }
+ util_dynarray_fini(&batch->samples);
+
+ u_trace_fini(&batch->trace);
free(batch->key);
free(batch);
@@ -389,6 +400,30 @@ batch_flush(struct fd_batch *batch) assert_dt
cleanup_submit(batch);
}
+void
+fd_batch_set_fb(struct fd_batch *batch, const struct pipe_framebuffer_state *pfb)
+{
+ assert(!batch->nondraw);
+
+ util_copy_framebuffer_state(&batch->framebuffer, pfb);
+
+ if (!pfb->zsbuf)
+ return;
+
+ struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
+
+ /* Switching back to a batch we'd previously started constructing shouldn't
+ * result in a different lrz. The dependency tracking should avoid another
+ * batch writing/clearing our depth buffer.
+ */
+ if (batch->subpass->lrz) {
+ assert(batch->subpass->lrz == zsbuf->lrz);
+ } else if (zsbuf->lrz) {
+ batch->subpass->lrz = fd_bo_ref(zsbuf->lrz);
+ }
+}
+
+
/* NOTE: could drop the last ref to batch
*/
void
@@ -469,6 +504,12 @@ fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc)
_mesa_set_add_pre_hashed(batch->resources, rsc->hash, rsc);
rsc->track->batch_mask |= (1 << batch->idx);
+
+ fd_ringbuffer_attach_bo(batch->draw, rsc->bo);
+ if (unlikely(rsc->b.b.next)) {
+ struct fd_resource *n = fd_resource(rsc->b.b.next);
+ fd_ringbuffer_attach_bo(batch->draw, n->bo);
+ }
}
void
@@ -488,8 +529,6 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
if (track->write_batch == batch)
return;
- fd_batch_write_prep(batch, rsc);
-
if (rsc->stencil)
fd_batch_resource_write(batch, rsc->stencil);
@@ -510,8 +549,10 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
* ctx dependencies and let the app have the undefined behavior
* it asked for:
*/
- if (track->write_batch->ctx != batch->ctx)
+ if (track->write_batch->ctx != batch->ctx) {
+ fd_ringbuffer_attach_bo(batch->draw, rsc->bo);
return;
+ }
flush_write_batch(rsc);
}
@@ -533,6 +574,8 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
fd_batch_reference_locked(&track->write_batch, batch);
fd_batch_add_resource(batch, rsc);
+
+ fd_batch_write_prep(batch, rsc);
}
void
@@ -558,6 +601,7 @@ fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc)
* by avoiding cross-ctx dependencies and let the app have the
* undefined behavior it asked for:
*/
+ fd_ringbuffer_attach_bo(batch->draw, rsc->bo);
return;
}
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h
index 305cc2e2d..b419d8ec6 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -45,7 +45,53 @@ struct fd_resource;
struct fd_batch_key;
struct fd_batch_result;
-/* A batch tracks everything about a cmdstream batch/submit, including the
+/**
+ * A subpass is a fragment of a batch potentially starting with a clear.
+ * If the app does a mid-batch clear, that clear and subsequent draws
+ * can be split out into another sub-pass. At gmem time, the appropriate
+ * sysmem or gmem clears can be interleaved with the CP_INDIRECT_BUFFER
+ * to the subpass's draw cmdstream.
+ *
+ * For depth clears, a replacement LRZ buffer can be allocated (clear
+ * still inserted into the prologue cmdstream since it needs be executed
+ * even in sysmem or if we aren't binning, since later batches could
+ * depend in the LRZ state). The alternative would be to invalidate
+ * LRZ for draws after the start of the new subpass.
+ */
+struct fd_batch_subpass {
+ struct list_head node;
+
+ /** draw pass cmdstream: */
+ struct fd_ringbuffer *draw;
+
+ /** for the gmem code to stash per tile per subpass clears */
+ struct fd_ringbuffer *subpass_clears;
+
+ BITMASK_ENUM(fd_buffer_mask) fast_cleared;
+
+ union pipe_color_union clear_color[MAX_RENDER_TARGETS];
+ double clear_depth;
+ unsigned clear_stencil;
+
+ /**
+ * The number of draws emitted to this subpass. If it is greater than
+ * zero, a clear triggers creating a new subpass (because clears must
+ * always come at the start of a subpass).
+ */
+ unsigned num_draws;
+
+ /**
+ * If a subpass starts with a LRZ clear, it gets a new LRZ buffer.
+ * The fd_resource::lrz always tracks the current lrz buffer, but at
+ * binning/gmem time we need to know what was the current lrz buffer
+ * at the time draws were emitted to the subpass. Which is tracked
+ * here.
+ */
+ struct fd_bo *lrz;
+};
+
+/**
+ * A batch tracks everything about a cmdstream batch/submit, including the
* ringbuffers used for binning, draw, and gmem cmds, list of associated
* fd_resource-s, etc.
*/
@@ -76,7 +122,7 @@ struct fd_batch {
* where the contents are undefined, ie. what we don't need to restore
* to gmem.
*/
- BITMASK_ENUM(fd_buffer_mask) invalidated, cleared, fast_cleared, restore, resolve;
+ BITMASK_ENUM(fd_buffer_mask) invalidated, cleared, restore, resolve;
/* is this a non-draw batch (ie compute/blit which has no pfb state)? */
bool nondraw : 1;
@@ -190,7 +236,24 @@ struct fd_batch {
struct fd_submit *submit;
- /** draw pass cmdstream: */
+ /**
+ * List of fd_batch_subpass.
+ */
+ struct list_head subpasses;
+
+#define foreach_subpass(subpass, batch) \
+ list_for_each_entry (struct fd_batch_subpass, subpass, &batch->subpasses, node)
+#define foreach_subpass_safe(subpass, batch) \
+ list_for_each_entry_safe (struct fd_batch_subpass, subpass, &batch->subpasses, node)
+
+ /**
+ * The current subpass.
+ */
+ struct fd_batch_subpass *subpass;
+
+ /**
+ * just a reference to the current subpass's draw cmds for backwards compat.
+ */
struct fd_ringbuffer *draw;
/** binning pass cmdstream: */
struct fd_ringbuffer *binning;
@@ -206,12 +269,8 @@ struct fd_batch {
/** epilogue cmdstream (executed after all tiles): */
struct fd_ringbuffer *epilogue;
- struct fd_ringbuffer *tile_setup;
- struct fd_ringbuffer *tile_fini;
-
- union pipe_color_union clear_color[MAX_RENDER_TARGETS];
- double clear_depth;
- unsigned clear_stencil;
+ struct fd_ringbuffer *tile_loads;
+ struct fd_ringbuffer *tile_store;
/**
* hw query related state:
@@ -222,6 +281,13 @@ struct fd_batch {
*/
uint32_t next_sample_offset;
+ /* The # of pipeline-stats queries running. In case of nested
+ * queries using {START/STOP}_{PRIMITIVE,FRAGMENT,COMPUTE}_CNTRS,
+ * we need to start only on the first one and stop only on the
+ * last one.
+ */
+ uint8_t pipeline_stats_queries_active[3];
+
/* cached samples (in case multiple queries need to reference
* the same sample snapshot)
*/
@@ -256,7 +322,10 @@ struct fd_batch {
struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
-void fd_batch_reset(struct fd_batch *batch) assert_dt;
+struct fd_batch_subpass *fd_batch_create_subpass(struct fd_batch *batch) assert_dt;
+
+void fd_batch_set_fb(struct fd_batch *batch, const struct pipe_framebuffer_state *pfb) assert_dt;
+
void fd_batch_flush(struct fd_batch *batch) assert_dt;
bool fd_batch_has_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c
index df49d945f..a8ce3101d 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c
@@ -440,6 +440,34 @@ alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx,
return batch;
}
+static void
+alloc_query_buf(struct fd_context *ctx, struct fd_batch *batch)
+{
+ if (batch->query_buf)
+ return;
+
+ if ((ctx->screen->gen < 3) || (ctx->screen->gen > 4))
+ return;
+
+ /* For gens that use fd_hw_query, pre-allocate an initially zero-sized
+ * (unbacked) query buffer. This simplifies draw/grid/etc-time resource
+ * tracking.
+ */
+ struct pipe_screen *pscreen = &ctx->screen->base;
+ struct pipe_resource templ = {
+ .target = PIPE_BUFFER,
+ .format = PIPE_FORMAT_R8_UNORM,
+ .bind = PIPE_BIND_QUERY_BUFFER,
+ .width0 = 0, /* create initially zero size buffer */
+ .height0 = 1,
+ .depth0 = 1,
+ .array_size = 1,
+ .last_level = 0,
+ .nr_samples = 1,
+ };
+ batch->query_buf = pscreen->resource_create(pscreen, &templ);
+}
+
struct fd_batch *
fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)
{
@@ -457,6 +485,8 @@ fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)
batch = alloc_batch_locked(cache, ctx, nondraw);
fd_screen_unlock(ctx->screen);
+ alloc_query_buf(ctx, batch);
+
if (batch && nondraw)
fd_context_switch_to(ctx, batch);
@@ -552,5 +582,9 @@ fd_batch_from_fb(struct fd_context *ctx,
struct fd_batch *batch = batch_from_key(ctx, key);
fd_screen_unlock(ctx->screen);
+ alloc_query_buf(ctx, batch);
+
+ fd_batch_set_fb(batch, pfb);
+
return batch;
}
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c
index b566a30dc..e7f098151 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c
@@ -323,6 +323,13 @@ fd_context_switch_to(struct fd_context *ctx, struct fd_batch *batch)
}
}
+void
+fd_context_add_private_bo(struct fd_context *ctx, struct fd_bo *bo)
+{
+ assert(ctx->num_private_bos < ARRAY_SIZE(ctx->private_bos));
+ ctx->private_bos[ctx->num_private_bos++] = bo;
+}
+
/**
* Return a reference to the current batch, caller must unref.
*/
@@ -343,7 +350,6 @@ fd_context_batch(struct fd_context *ctx)
if (unlikely(!batch)) {
batch =
fd_batch_from_fb(ctx, &ctx->framebuffer);
- util_copy_framebuffer_state(&batch->framebuffer, &ctx->framebuffer);
fd_batch_reference(&ctx->batch, batch);
fd_context_all_dirty(ctx);
}
@@ -589,9 +595,9 @@ fd_context_setup_common_vbos(struct fd_context *ctx)
.vertex_buffer_index = 0,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ .src_stride = 12,
}});
ctx->solid_vbuf_state.vertexbuf.count = 1;
- ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf;
/* setup blit_vbuf_state: */
@@ -602,17 +608,17 @@ fd_context_setup_common_vbos(struct fd_context *ctx)
.vertex_buffer_index = 0,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32_FLOAT,
+ .src_stride = 8,
},
{
.vertex_buffer_index = 1,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ .src_stride = 12,
}});
ctx->blit_vbuf_state.vertexbuf.count = 2;
- ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource =
ctx->blit_texcoord_vbuf;
- ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf;
}
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h
index 6d861cbea..3dedfd3e6 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h
@@ -88,6 +88,7 @@ struct fd_vertexbuf_stateobj {
struct fd_vertex_stateobj {
struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+ unsigned strides[PIPE_MAX_ATTRIBS];
unsigned num_elements;
};
@@ -165,9 +166,10 @@ enum fd_dirty_3d_state {
FD_DIRTY_IMAGE = BIT(18),
FD_DIRTY_SSBO = BIT(19),
FD_DIRTY_QUERY = BIT(20),
+ FD_DIRTY_SAMPLE_LOCATIONS = BIT(21),
/* only used by a2xx.. possibly can be removed.. */
- FD_DIRTY_TEXSTATE = BIT(21),
+ FD_DIRTY_TEXSTATE = BIT(22),
/* fine grained state changes, for cases where state is not orthogonal
* from hw perspective:
@@ -179,6 +181,69 @@ enum fd_dirty_3d_state {
#define NUM_DIRTY_BITS 28
};
+static inline void
+fd_print_dirty_state(BITMASK_ENUM(fd_dirty_3d_state) dirty)
+{
+#ifdef DEBUG
+ if (!FD_DBG(MSGS))
+ return;
+
+ struct {
+ enum fd_dirty_3d_state state;
+ const char *name;
+ } tbl[] = {
+#define STATE(n) { FD_DIRTY_ ## n, #n }
+ STATE(BLEND),
+ STATE(RASTERIZER),
+ STATE(ZSA),
+ STATE(BLEND_COLOR),
+ STATE(STENCIL_REF),
+ STATE(SAMPLE_MASK),
+ STATE(FRAMEBUFFER),
+ STATE(STIPPLE),
+ STATE(VIEWPORT),
+ STATE(VTXSTATE),
+ STATE(VTXBUF),
+ STATE(MIN_SAMPLES),
+ STATE(SCISSOR),
+ STATE(STREAMOUT),
+ STATE(UCP),
+ STATE(PROG),
+ STATE(CONST),
+ STATE(TEX),
+ STATE(IMAGE),
+ STATE(SSBO),
+ STATE(QUERY),
+ STATE(TEXSTATE),
+ STATE(RASTERIZER_DISCARD),
+ STATE(RASTERIZER_CLIP_PLANE_ENABLE),
+ STATE(BLEND_DUAL),
+ STATE(BLEND_COHERENT),
+#undef STATE
+ };
+
+ struct log_stream *s = mesa_log_streami();
+
+ mesa_log_stream_printf(s, "dirty:");
+
+ if ((uint32_t)dirty == ~0) {
+ mesa_log_stream_printf(s, " ALL");
+ dirty = 0;
+ }
+
+ for (unsigned i = 0; i < ARRAY_SIZE(tbl); i++) {
+ if (dirty & tbl[i].state) {
+ mesa_log_stream_printf(s, " %s", tbl[i].name);
+ dirty &= ~tbl[i].state;
+ }
+ }
+
+ assert(!dirty);
+
+ mesa_log_stream_destroy(s);
+#endif
+}
+
/* per shader-stage dirty state: */
enum fd_dirty_shader_state {
FD_DIRTY_SHADER_PROG = BIT(0),
@@ -195,9 +260,14 @@ enum fd_buffer_mask {
FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
+
+ /* A special internal buffer bit to signify that the LRZ buffer needs
+ * clearing
+ */
+ FD_BUFFER_LRZ = BIT(15),
};
-#define MAX_HW_SAMPLE_PROVIDERS 7
+#define MAX_HW_SAMPLE_PROVIDERS 10
struct fd_hw_sample_provider;
struct fd_hw_sample;
@@ -377,6 +447,14 @@ struct fd_context {
/* Per vsc pipe bo's (a2xx-a5xx): */
struct fd_bo *vsc_pipe_bo[32] dt;
+ /* Table of bo's attached to all batches up-front (because they
+ * are commonly used, and that is easier than attaching on-use).
+ * In particular, these are driver internal buffers which do not
+ * participate in batch resource tracking.
+ */
+ struct fd_bo *private_bos[3];
+ unsigned num_private_bos;
+
/* Maps generic gallium oriented fd_dirty_3d_state bits to generation
* specific bitmask of state "groups".
*/
@@ -418,6 +496,10 @@ struct fd_context {
unsigned sample_mask dt;
unsigned min_samples dt;
+ /* 1x1 grid, max 4x MSAA: */
+ uint8_t sample_locations[4] dt;
+ bool sample_locations_enabled dt;
+
/* local context fb state, for when ctx->batch is null: */
struct pipe_framebuffer_state framebuffer dt;
uint32_t all_mrt_channel_mask dt;
@@ -458,6 +540,7 @@ struct fd_context {
struct {
struct fd_bo *bo;
uint32_t per_fiber_size;
+ uint32_t per_sp_size;
} pvtmem[2] dt;
/* maps per-shader-stage state plus variant key to hw
@@ -496,6 +579,7 @@ struct fd_context {
/* optional, for GMEM bypass: */
void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
+ void (*emit_sysmem)(struct fd_batch *batch) dt;
void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
/* draw: */
@@ -509,6 +593,9 @@ struct fd_context {
const union pipe_color_union *color, double depth,
unsigned stencil) dt;
+ /* called to update draw_vbo func after bound shader stages change, etc: */
+ void (*update_draw)(struct fd_context *ctx);
+
/* compute: */
void (*launch_grid)(struct fd_context *ctx,
const struct pipe_grid_info *info) dt;
@@ -592,6 +679,8 @@ fd_stream_output_target(struct pipe_stream_output_target *target)
return (struct fd_stream_output_target *)target;
}
+void fd_context_add_private_bo(struct fd_context *ctx, struct fd_bo *bo);
+
/* Mark specified non-shader-stage related state as dirty: */
static inline void
fd_context_dirty(struct fd_context *ctx, BITMASK_ENUM(fd_dirty_3d_state) dirty)
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c
index f3f757b2c..56ea718b9 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -139,13 +139,13 @@ batch_draw_tracking_for_dirty_bits(struct fd_batch *batch) assert_dt
/* Mark constbuf as being read: */
if (dirty_shader & FD_DIRTY_SHADER_CONST) {
u_foreach_bit (i, ctx->constbuf[s].enabled_mask)
- resource_read(batch, ctx->constbuf[s].cb[i].buffer);
+ resource_read(batch, ctx->constbuf[s].cb[i].buffer);
}
/* Mark textures as being read */
if (dirty_shader & FD_DIRTY_SHADER_TEX) {
u_foreach_bit (i, ctx->tex[s].valid_textures)
- resource_read(batch, ctx->tex[s].textures[i]->texture);
+ resource_read(batch, ctx->tex[s].textures[i]->texture);
}
/* Mark SSBOs as being read or written: */
@@ -182,9 +182,15 @@ batch_draw_tracking_for_dirty_bits(struct fd_batch *batch) assert_dt
/* Mark streamout buffers as being written.. */
if (dirty & FD_DIRTY_STREAMOUT) {
- for (unsigned i = 0; i < ctx->streamout.num_targets; i++)
- if (ctx->streamout.targets[i])
- resource_written(batch, ctx->streamout.targets[i]->buffer);
+ for (unsigned i = 0; i < ctx->streamout.num_targets; i++) {
+ struct fd_stream_output_target *target =
+ fd_stream_output_target(ctx->streamout.targets[i]);
+
+ if (target) {
+ resource_written(batch, target->base.buffer);
+ resource_written(batch, target->offset_buf);
+ }
+ }
}
if (dirty & FD_DIRTY_QUERY) {
@@ -215,6 +221,9 @@ needs_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info,
if (indirect) {
if (indirect->buffer && !batch_references_resource(batch, indirect->buffer))
return true;
+ if (indirect->indirect_draw_count &&
+ !batch_references_resource(batch, indirect->indirect_draw_count))
+ return true;
if (indirect->count_from_stream_output)
return true;
}
@@ -228,13 +237,8 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info,
{
struct fd_context *ctx = batch->ctx;
- /* NOTE: needs to be before resource_written(batch->query_buf), otherwise
- * query_buf may not be created yet.
- */
- fd_batch_update_queries(batch);
-
if (!needs_draw_tracking(batch, info, indirect))
- return;
+ goto out;
/*
* Figure out the buffers/features we need:
@@ -251,8 +255,8 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info,
/* Mark indirect draw buffer as being read */
if (indirect) {
- if (indirect->buffer)
- resource_read(batch, indirect->buffer);
+ resource_read(batch, indirect->buffer);
+ resource_read(batch, indirect->indirect_draw_count);
if (indirect->count_from_stream_output)
resource_read(
batch, fd_stream_output_target(indirect->count_from_stream_output)
@@ -262,6 +266,9 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info,
resource_written(batch, batch->query_buf);
fd_screen_unlock(ctx->screen);
+
+out:
+ fd_batch_update_queries(batch);
}
static void
@@ -277,7 +284,7 @@ update_draw_stats(struct fd_context *ctx, const struct pipe_draw_info *info,
* so keep the count accurate for non-patch geometry.
*/
unsigned prims = 0;
- if ((info->mode != PIPE_PRIM_PATCHES) && (info->mode != PIPE_PRIM_MAX)) {
+ if ((info->mode != MESA_PRIM_PATCHES) && (info->mode != MESA_PRIM_COUNT)) {
for (unsigned i = 0; i < num_draws; i++) {
prims += u_reduced_prims_for_vertices(info->mode, draws[i].count);
}
@@ -287,7 +294,7 @@ update_draw_stats(struct fd_context *ctx, const struct pipe_draw_info *info,
if (ctx->streamout.num_targets > 0) {
/* Clip the prims we're writing to the size of the SO buffers. */
- enum pipe_prim_type tf_prim = u_decomposed_prim(info->mode);
+ enum mesa_prim tf_prim = u_decomposed_prim(info->mode);
unsigned verts_written = u_vertices_for_prims(tf_prim, prims);
unsigned remaining_vert_space =
ctx->streamout.max_tf_vtx - ctx->streamout.verts_written;
@@ -369,6 +376,9 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
}
batch->num_draws++;
+ batch->subpass->num_draws++;
+
+ fd_print_dirty_state(ctx->dirty);
/* Marking the batch as needing flush must come after the batch
* dependency tracking (resource_read()/resource_write()), as that
@@ -537,19 +547,27 @@ static void
fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
const union pipe_color_union *color, unsigned x,
unsigned y, unsigned w, unsigned h,
- bool render_condition_enabled)
+ bool render_condition_enabled) in_dt
{
- DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
+ if (render_condition_enabled && !fd_render_condition_check(pctx))
+ return;
+
+ fd_blitter_clear_render_target(pctx, ps, color, x, y, w, h,
+ render_condition_enabled);
}
static void
fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
unsigned buffers, double depth, unsigned stencil,
unsigned x, unsigned y, unsigned w, unsigned h,
- bool render_condition_enabled)
+ bool render_condition_enabled) in_dt
{
- DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
- buffers, depth, stencil, x, y, w, h);
+ if (render_condition_enabled && !fd_render_condition_check(pctx))
+ return;
+
+ fd_blitter_clear_depth_stencil(pctx, ps, buffers,
+ depth, stencil, x, y, w, h,
+ render_condition_enabled);
}
static void
@@ -602,6 +620,10 @@ fd_launch_grid(struct pipe_context *pctx,
if (info->indirect)
resource_read(batch, info->indirect);
+ list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node) {
+ resource_written(batch, aq->prsc);
+ }
+
/* If the saved batch has been flushed during the resource tracking,
* don't re-install it:
*/
@@ -610,6 +632,8 @@ fd_launch_grid(struct pipe_context *pctx,
fd_screen_unlock(ctx->screen);
+ fd_batch_update_queries(batch);
+
DBG("%p: work_dim=%u, block=%ux%ux%u, grid=%ux%ux%u",
batch, info->work_dim,
info->block[0], info->block[1], info->block[2],
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c
index 552f42a3d..e7677009f 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -55,7 +55,7 @@ fence_flush(struct pipe_context *pctx, struct pipe_fence_handle *fence,
if (!timeout)
return false;
- if (timeout == PIPE_TIMEOUT_INFINITE) {
+ if (timeout == OS_TIMEOUT_INFINITE) {
util_queue_fence_wait(&fence->ready);
} else {
int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
@@ -267,7 +267,7 @@ fd_pipe_fence_get_fd(struct pipe_screen *pscreen, struct pipe_fence_handle *fenc
* but if TC is not used, this will be null. Which is fine, we won't call
* threaded_context_flush() in that case
*/
- fence_flush(&fence->ctx->tc->base, fence, PIPE_TIMEOUT_INFINITE);
+ fence_flush(&fence->ctx->tc->base, fence, OS_TIMEOUT_INFINITE);
assert(fence->fence);
return os_dupfd_cloexec(fence->fence->fence_fd);
}
@@ -289,10 +289,10 @@ fd_pipe_fence_set_batch(struct pipe_fence_handle *fence, struct fd_batch *batch)
{
if (batch) {
assert(!fence->batch);
- fence->batch = batch;
+ fd_batch_reference(&fence->batch, batch);
fd_batch_needs_flush(batch);
} else {
- fence->batch = NULL;
+ fd_batch_reference(&fence->batch, NULL);
/* When the batch is dis-associated with the fence, we can signal TC
* that the fence is flushed
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h
index 30d568ac8..2bd4c581c 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h
@@ -44,13 +44,13 @@ struct pipe_fence_handle {
*/
struct pipe_fence_handle *last_fence;
- /* fence holds a weak reference to the batch until the batch is flushed, to
+ /* fence holds a reference to the batch until the batch is flushed, to
* accommodate PIPE_FLUSH_DEFERRED. When the batch is actually flushed, it
* is cleared (before the batch reference is dropped). If we need to wait
* on a fence, and the batch is not NULL, we need to flush it.
*
* Note that with u_threaded_context async flushes, if a fence is requested
- * by the frontend, the fence is initially created without a weak reference
+ * by the frontend, the fence is initially created without a reference
* to the batch, which is filled in later when fd_context_flush() is called
* from the driver thread. In this case tc_token will be non-null, in
* which case threaded_context_flush() should be called in fd_fence_finish()
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c
index 7b579649b..893398e7a 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -25,15 +25,16 @@
*/
#include "pipe/p_state.h"
-#include "util/u_debug.h"
#include "util/format/u_format.h"
#include "util/hash_table.h"
+#include "util/macros.h"
+#include "util/u_debug.h"
#include "util/u_dump.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
-#include "u_tracepoints.h"
#include "util/u_trace_gallium.h"
+#include "u_tracepoints.h"
#include "freedreno_context.h"
#include "freedreno_fence.h"
@@ -315,7 +316,6 @@ gmem_stateobj_init(struct fd_screen *screen, struct gmem_key *key)
* performance.
*/
-#define div_round_up(v, a) (((v) + (a)-1) / (a))
/* figure out number of tiles per pipe: */
if (is_a20x(screen)) {
/* for a20x we want to minimize the number of "pipes"
@@ -326,10 +326,10 @@ gmem_stateobj_init(struct fd_screen *screen, struct gmem_key *key)
tpp_y = 6;
} else {
tpp_x = tpp_y = 1;
- while (div_round_up(gmem->nbins_y, tpp_y) > npipes)
+ while (DIV_ROUND_UP(gmem->nbins_y, tpp_y) > npipes)
tpp_y += 2;
- while ((div_round_up(gmem->nbins_y, tpp_y) *
- div_round_up(gmem->nbins_x, tpp_x)) > npipes)
+ while ((DIV_ROUND_UP(gmem->nbins_y, tpp_y) *
+ DIV_ROUND_UP(gmem->nbins_x, tpp_x)) > npipes)
tpp_x += 1;
}
@@ -399,7 +399,7 @@ gmem_stateobj_init(struct fd_screen *screen, struct gmem_key *key)
uint32_t p;
/* pipe number: */
- p = ((i / tpp_y) * div_round_up(gmem->nbins_x, tpp_x)) + (j / tpp_x);
+ p = ((i / tpp_y) * DIV_ROUND_UP(gmem->nbins_x, tpp_x)) + (j / tpp_x);
assert(p < gmem->num_vsc_pipes);
/* clip bin width: */
@@ -480,9 +480,9 @@ gmem_key_init(struct fd_batch *batch, bool assume_zs, bool no_scis_opt)
if (has_zs || assume_zs) {
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- key->zsbuf_cpp[0] = rsc->layout.cpp;
+ key->zsbuf_cpp[0] = rsc->layout.cpp * pfb->samples;
if (rsc->stencil)
- key->zsbuf_cpp[1] = rsc->stencil->layout.cpp;
+ key->zsbuf_cpp[1] = rsc->stencil->layout.cpp * pfb->samples;
/* If we clear z or s but not both, and we are using z24s8 (ie.
* !separate_stencil) then we need to restore the other, even if
@@ -493,7 +493,7 @@ gmem_key_init(struct fd_batch *batch, bool assume_zs, bool no_scis_opt)
* u_blitter will show up as a normal draw with depth and/or
* stencil enabled.
*/
- unsigned zsclear = batch->fast_cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
+ unsigned zsclear = batch->cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
if (zsclear) {
const struct util_format_description *desc =
util_format_description(pfb->zsbuf->format);
@@ -672,7 +672,11 @@ render_sysmem(struct fd_batch *batch) assert_dt
trace_start_draw_ib(&batch->trace, batch->gmem);
}
/* emit IB to drawcmds: */
- ctx->screen->emit_ib(batch->gmem, batch->draw);
+ if (ctx->emit_sysmem) {
+ ctx->emit_sysmem(batch);
+ } else {
+ ctx->screen->emit_ib(batch->gmem, batch->draw);
+ }
if (!batch->nondraw) {
trace_end_draw_ib(&batch->trace, batch->gmem);
@@ -719,7 +723,7 @@ fd_gmem_render_tiles(struct fd_batch *batch)
/* Sometimes we need to flush a batch just to get a fence, with no
* clears or draws.. in this case promote to nondraw:
*/
- if (!(batch->fast_cleared || batch->num_draws))
+ if (!(batch->cleared || batch->num_draws))
sysmem = true;
if (!batch->nondraw) {
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c
index feb3e56f5..2a93d197b 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c
@@ -36,11 +36,14 @@ static void
update_bound_stage(struct fd_context *ctx, enum pipe_shader_type shader,
bool bound) assert_dt
{
+ uint32_t bound_shader_stages = ctx->bound_shader_stages;
if (bound) {
ctx->bound_shader_stages |= BIT(shader);
} else {
ctx->bound_shader_stages &= ~BIT(shader);
}
+ if (ctx->update_draw && (bound_shader_stages != ctx->bound_shader_stages))
+ ctx->update_draw(ctx);
}
static void
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h
index 8cb8363b7..b57895713 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h
@@ -131,6 +131,12 @@ pidx(unsigned query_type)
return 5;
case PIPE_QUERY_PRIMITIVES_EMITTED:
return 6;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ return 7;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ return 8;
+ case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
+ return 9;
default:
return -1;
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c
index e3f4c69f8..6dee86d70 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c
@@ -84,13 +84,13 @@ fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
{
const struct fd_acc_sample_provider *p = aq->provider;
- aq->batch = batch;
- fd_batch_needs_flush(aq->batch);
- p->resume(aq, aq->batch);
-
fd_screen_lock(batch->ctx->screen);
fd_batch_resource_write(batch, fd_resource(aq->prsc));
fd_screen_unlock(batch->ctx->screen);
+
+ aq->batch = batch;
+ fd_batch_needs_flush(aq->batch);
+ p->resume(aq, aq->batch);
}
static void
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h
index 8022842f9..eebe8c080 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h
@@ -138,6 +138,9 @@ copy_result(struct fd_ringbuffer *ring, enum pipe_query_value_type result_type,
struct fd_resource *dst, unsigned dst_offset,
struct fd_resource *src, unsigned src_offset)
{
+ fd_ringbuffer_attach_bo(ring, dst->bo);
+ fd_ringbuffer_attach_bo(ring, src->bo);
+
OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
OUT_RING(ring, COND(result_type >= PIPE_QUERY_TYPE_I64, CP_MEM_TO_MEM_0_DOUBLE));
OUT_RELOC(ring, dst->bo, dst_offset, 0, 0);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c
index d709c8f25..6ed0582da 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -299,22 +299,6 @@ fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
samp->tile_stride = 0;
batch->next_sample_offset += size;
- if (!batch->query_buf) {
- struct pipe_screen *pscreen = &batch->ctx->screen->base;
- struct pipe_resource templ = {
- .target = PIPE_BUFFER,
- .format = PIPE_FORMAT_R8_UNORM,
- .bind = PIPE_BIND_QUERY_BUFFER,
- .width0 = 0, /* create initially zero size buffer */
- .height0 = 1,
- .depth0 = 1,
- .array_size = 1,
- .last_level = 0,
- .nr_samples = 1,
- };
- batch->query_buf = pscreen->resource_create(pscreen, &templ);
- }
-
pipe_resource_reference(&samp->prsc, batch->query_buf);
return samp;
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c
index f210dfe55..a57807071 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -30,6 +30,7 @@
#include "util/set.h"
#include "util/u_drm.h"
#include "util/u_inlines.h"
+#include "util/u_sample_positions.h"
#include "util/u_string.h"
#include "util/u_surface.h"
#include "util/u_transfer.h"
@@ -52,14 +53,6 @@
/* XXX this should go away, needed for 'struct winsys_handle' */
#include "frontend/drm_driver.h"
-/* A private modifier for now, so we have a way to request tiled but not
- * compressed. It would perhaps be good to get real modifiers for the
- * tiled formats, but would probably need to do some work to figure out
- * the layout(s) of the tiled modes, and whether they are the same
- * across generations.
- */
-#define FD_FORMAT_MOD_QCOM_TILED fourcc_mod_code(QCOM, 0xffffffff)
-
/**
* Go through the entire state and see if the resource is bound
* anywhere. If it is, mark the relevant state as dirty. This is
@@ -573,7 +566,7 @@ fd_resource_uncompress(struct fd_context *ctx, struct fd_resource *rsc, bool lin
{
tc_assert_driver_thread(ctx->tc);
- uint64_t modifier = linear ? DRM_FORMAT_MOD_LINEAR : FD_FORMAT_MOD_QCOM_TILED;
+ uint64_t modifier = linear ? DRM_FORMAT_MOD_LINEAR : DRM_FORMAT_MOD_QCOM_TILED3;
ASSERTED bool success = fd_try_shadow_resource(ctx, rsc, 0, NULL, modifier);
@@ -1290,6 +1283,11 @@ get_best_layout(struct fd_screen *screen,
if (tmpl->bind & PIPE_BIND_USE_FRONT_RENDERING)
ubwc_ok = false;
+ /* Disallow UBWC when asked not to use data dependent bandwidth compression:
+ */
+ if (tmpl->bind & PIPE_BIND_CONST_BW)
+ ubwc_ok = false;
+
if (ubwc_ok && !can_implicit &&
!drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count)) {
perf_debug("%" PRSC_FMT
@@ -1301,15 +1299,8 @@ get_best_layout(struct fd_screen *screen,
if (ubwc_ok)
return UBWC;
- /* We can't use tiled with explicit modifiers, as there is no modifier token
- * defined for it. But we might internally force tiled allocation using a
- * private modifier token.
- *
- * TODO we should probably also limit TILED in a similar way to UBWC above,
- * once we have a public modifier token defined.
- */
if (can_implicit ||
- drm_find_modifier(FD_FORMAT_MOD_QCOM_TILED, modifiers, count))
+ drm_find_modifier(DRM_FORMAT_MOD_QCOM_TILED3, modifiers, count))
return TILED;
if (!drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count)) {
@@ -1643,10 +1634,6 @@ static const struct u_transfer_vtbl transfer_vtbl = {
.get_stencil = fd_resource_get_stencil,
};
-static const uint64_t supported_modifiers[] = {
- DRM_FORMAT_MOD_LINEAR,
-};
-
static int
fd_layout_resource_for_modifier(struct fd_resource *rsc, uint64_t modifier)
{
@@ -1760,10 +1747,6 @@ fd_resource_screen_init(struct pipe_screen *pscreen)
if (!screen->layout_resource_for_modifier)
screen->layout_resource_for_modifier = fd_layout_resource_for_modifier;
- if (!screen->supported_modifiers) {
- screen->supported_modifiers = supported_modifiers;
- screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers);
- }
/* GL_EXT_memory_object */
pscreen->memobj_create_from_handle = fd_memobj_create_from_handle;
@@ -1772,47 +1755,6 @@ fd_resource_screen_init(struct pipe_screen *pscreen)
}
static void
-fd_get_sample_position(struct pipe_context *context, unsigned sample_count,
- unsigned sample_index, float *pos_out)
-{
- /* The following is copied from nouveau/nv50 except for position
- * values, which are taken from blob driver */
- static const uint8_t pos1[1][2] = {{0x8, 0x8}};
- static const uint8_t pos2[2][2] = {{0xc, 0xc}, {0x4, 0x4}};
- static const uint8_t pos4[4][2] = {{0x6, 0x2},
- {0xe, 0x6},
- {0x2, 0xa},
- {0xa, 0xe}};
- /* TODO needs to be verified on supported hw */
- static const uint8_t pos8[8][2] = {{0x9, 0x5}, {0x7, 0xb}, {0xd, 0x9},
- {0x5, 0x3}, {0x3, 0xd}, {0x1, 0x7},
- {0xb, 0xf}, {0xf, 0x1}};
-
- const uint8_t(*ptr)[2];
-
- switch (sample_count) {
- case 1:
- ptr = pos1;
- break;
- case 2:
- ptr = pos2;
- break;
- case 4:
- ptr = pos4;
- break;
- case 8:
- ptr = pos8;
- break;
- default:
- assert(0);
- return;
- }
-
- pos_out[0] = ptr[sample_index][0] / 16.0f;
- pos_out[1] = ptr[sample_index][1] / 16.0f;
-}
-
-static void
fd_blit_pipe(struct pipe_context *pctx,
const struct pipe_blit_info *blit_info) in_dt
{
@@ -1836,5 +1778,5 @@ fd_resource_context_init(struct pipe_context *pctx)
pctx->blit = fd_blit_pipe;
pctx->flush_resource = fd_flush_resource;
pctx->invalidate_resource = fd_invalidate_resource;
- pctx->get_sample_position = fd_get_sample_position;
+ pctx->get_sample_position = u_default_get_sample_position;
}
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h
index 00f563947..0c4818df2 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -253,6 +253,19 @@ has_depth(enum pipe_format format)
return util_format_has_depth(desc);
}
+static inline bool
+is_z32(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
struct fd_transfer {
struct threaded_transfer b;
struct pipe_resource *staging_prsc;
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c
index 30c9c80db..93aa04fa3 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -125,6 +125,14 @@ fd_screen_get_device_vendor(struct pipe_screen *pscreen)
return "Qualcomm";
}
+static void
+fd_get_sample_pixel_grid(struct pipe_screen *pscreen, unsigned sample_count,
+ unsigned *out_width, unsigned *out_height)
+{
+ *out_width = 1;
+ *out_height = 1;
+}
+
static uint64_t
fd_screen_get_timestamp(struct pipe_screen *pscreen)
{
@@ -133,8 +141,7 @@ fd_screen_get_timestamp(struct pipe_screen *pscreen)
if (screen->has_timestamp) {
uint64_t n;
fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &n);
- assert(screen->max_freq > 0);
- return n * 1000000000 / screen->max_freq;
+ return ticks_to_ns(n);
} else {
int64_t cpu_time = os_time_get_nano();
return cpu_time + screen->cpu_gpu_time_delta;
@@ -181,6 +188,36 @@ fd_screen_destroy(struct pipe_screen *pscreen)
free(screen);
}
+static uint64_t
+get_memory_size(struct fd_screen *screen)
+{
+ uint64_t system_memory;
+
+ if (!os_get_total_physical_memory(&system_memory))
+ return 0;
+ if (fd_device_version(screen->dev) >= FD_VERSION_VA_SIZE) {
+ uint64_t va_size;
+ if (!fd_pipe_get_param(screen->pipe, FD_VA_SIZE, &va_size)) {
+ system_memory = MIN2(system_memory, va_size);
+ }
+ }
+
+ return system_memory;
+}
+
+static void
+fd_query_memory_info(struct pipe_screen *pscreen,
+ struct pipe_memory_info *info)
+{
+ unsigned mem = get_memory_size(fd_screen(pscreen)) >> 10;
+
+ memset(info, 0, sizeof(*info));
+
+ info->total_device_memory = mem;
+ info->avail_device_memory = mem;
+}
+
+
/*
TODO either move caps to a2xx/a3xx specific code, or maybe have some
tables for things that differ if the delta is not too much..
@@ -206,16 +243,19 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_INVALIDATE_BUFFER:
- case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
case PIPE_CAP_NIR_COMPACT_ARRAYS:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
case PIPE_CAP_GL_SPIRV:
case PIPE_CAP_FBFETCH_COHERENT:
+ case PIPE_CAP_HAS_CONST_BW:
return 1;
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
- case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return is_a6xx(screen);
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
@@ -271,6 +311,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_IMAGE_STORE_FORMATTED:
+ case PIPE_CAP_IMAGE_LOAD_FORMATTED:
return is_a5xx(screen) || is_a6xx(screen);
case PIPE_CAP_SURFACE_SAMPLE_COUNT:
@@ -279,9 +320,18 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_CLIP_DISABLE:
return is_a3xx(screen) || is_a4xx(screen) || is_a6xx(screen);
+ case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
+ case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
return is_a6xx(screen);
+ case PIPE_CAP_SAMPLER_REDUCTION_MINMAX:
+ case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB:
+ return is_a6xx(screen) && screen->info->a6xx.has_sampler_minmax;
+
+ case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
+ return is_a6xx(screen) && screen->info->a6xx.has_sample_locations;
+
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);
@@ -331,14 +381,13 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 64;
case PIPE_CAP_INT64:
- case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_DOUBLES:
return is_ir3(screen);
case PIPE_CAP_GLSL_FEATURE_LEVEL:
case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
if (is_a6xx(screen))
- return 450;
+ return 460;
else if (is_ir3(screen))
return 140;
else
@@ -403,6 +452,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 0;
case PIPE_CAP_VS_LAYER_VIEWPORT:
+ case PIPE_CAP_TES_LAYER_VIEWPORT:
return is_a6xx(screen);
case PIPE_CAP_MAX_VIEWPORTS:
@@ -470,6 +520,10 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return !is_a5xx(screen);
/* Stream output. */
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ if (is_a6xx(screen)) /* has SO + GS */
+ return PIPE_MAX_SO_BUFFERS;
+ return 0;
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
if (is_ir3(screen))
return PIPE_MAX_SO_BUFFERS;
@@ -535,7 +589,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* only a4xx, requires new enough kernel so we know max_freq: */
return (screen->max_freq > 0) &&
(is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen));
+ case PIPE_CAP_TIMER_RESOLUTION:
+ return ticks_to_ns(1);
case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
return is_a6xx(screen);
case PIPE_CAP_VENDOR_ID:
@@ -545,22 +603,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_ACCELERATED:
return 1;
- case PIPE_CAP_VIDEO_MEMORY: {
- uint64_t system_memory;
+ case PIPE_CAP_VIDEO_MEMORY:
+ return (int)(get_memory_size(screen) >> 20);
- if (!os_get_total_physical_memory(&system_memory))
- return 0;
-
- if (fd_device_version(screen->dev) >= FD_VERSION_VA_SIZE) {
- uint64_t va_size;
-
- if (!fd_pipe_get_param(screen->pipe, FD_VA_SIZE, &va_size)) {
- system_memory = MIN2(system_memory, va_size);
- }
- }
-
- return (int)(system_memory >> 20);
- }
+ case PIPE_CAP_QUERY_MEMORY_INFO: /* Enables GL_ATI_meminfo */
+ return get_memory_size(screen) != 0;
case PIPE_CAP_UMA:
return 1;
@@ -644,6 +691,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
if (has_compute(screen))
break;
return 0;
+ case PIPE_SHADER_TASK:
+ case PIPE_SHADER_MESH:
+ return 0;
default:
mesa_loge("unknown shader type %d", shader);
return 0;
@@ -661,7 +711,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_INPUTS:
if (shader == PIPE_SHADER_GEOMETRY && is_a6xx(screen))
return 16;
- return is_a6xx(screen) ? 32 : 16;
+ return is_a6xx(screen) ?
+ (screen->info->a6xx.vs_max_inputs_count) : 16;
case PIPE_SHADER_CAP_MAX_OUTPUTS:
return is_a6xx(screen) ? 32 : 16;
case PIPE_SHADER_CAP_MAX_TEMPS:
@@ -687,7 +738,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
/* a2xx compiler doesn't handle indirect: */
return is_ir3(screen) ? 1 : 0;
case PIPE_SHADER_CAP_SUBROUTINES:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
@@ -710,8 +760,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16;
- case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return (1 << PIPE_SHADER_IR_NIR) |
COND(has_compute(screen) && (shader == PIPE_SHADER_COMPUTE),
@@ -815,7 +863,7 @@ fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
RET((uint64_t[]){screen->ram_size});
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
- RET((uint64_t[]){32768});
+ RET((uint64_t[]){screen->info->cs_shared_mem_size});
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
@@ -833,9 +881,12 @@ fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t[]){1});
- case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t[]){32}); // TODO
+ case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
+ RET((uint32_t[]){0}); // TODO
+
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
RET((uint64_t[]){ compiler->max_variable_workgroup_size });
}
@@ -900,29 +951,42 @@ fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo,
}
}
+static bool
+is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ uint64_t modifier)
+{
+ struct fd_screen *screen = fd_screen(pscreen);
+ if (screen->is_format_supported)
+ return screen->is_format_supported(pscreen, format, modifier);
+ return modifier == DRM_FORMAT_MOD_LINEAR;
+}
+
static void
fd_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen,
enum pipe_format format, int max,
uint64_t *modifiers,
unsigned int *external_only, int *count)
{
- struct fd_screen *screen = fd_screen(pscreen);
- int i, num = 0;
+ const uint64_t all_modifiers[] = {
+ DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_QCOM_COMPRESSED,
+ DRM_FORMAT_MOD_QCOM_TILED3,
+ };
- max = MIN2(max, screen->num_supported_modifiers);
+ int num = 0;
- if (!max) {
- max = screen->num_supported_modifiers;
- external_only = NULL;
- modifiers = NULL;
- }
+ for (int i = 0; i < ARRAY_SIZE(all_modifiers); i++) {
+ if (!is_format_supported(pscreen, format, all_modifiers[i]))
+ continue;
- for (i = 0; i < max; i++) {
- if (modifiers)
- modifiers[num] = screen->supported_modifiers[i];
+ if (num < max) {
+ if (modifiers)
+ modifiers[num] = all_modifiers[i];
- if (external_only)
- external_only[num] = 0;
+ if (external_only)
+ external_only[num] = false;
+ }
num++;
}
@@ -936,19 +1000,7 @@ fd_screen_is_dmabuf_modifier_supported(struct pipe_screen *pscreen,
enum pipe_format format,
bool *external_only)
{
- struct fd_screen *screen = fd_screen(pscreen);
- int i;
-
- for (i = 0; i < screen->num_supported_modifiers; i++) {
- if (modifier == screen->supported_modifiers[i]) {
- if (external_only)
- *external_only = false;
-
- return true;
- }
- }
-
- return false;
+ return is_format_supported(pscreen, format, modifier);
}
struct fd_bo *
@@ -1030,6 +1082,8 @@ fd_screen_create(int fd,
fd_perfetto_init();
#endif
+ util_gpuvis_init();
+
pscreen = &screen->base;
screen->dev = dev;
@@ -1060,10 +1114,11 @@ fd_screen_create(int fd,
screen->max_freq = 0;
} else {
screen->max_freq = val;
- if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0)
- screen->has_timestamp = true;
}
+ if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0)
+ screen->has_timestamp = true;
+
screen->dev_id = fd_pipe_dev_id(screen->pipe);
if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
@@ -1174,7 +1229,7 @@ fd_screen_create(int fd,
/* fdN_screen_init() should set this: */
assert(screen->primtypes);
screen->primtypes_mask = 0;
- for (unsigned i = 0; i <= PIPE_PRIM_MAX; i++)
+ for (unsigned i = 0; i <= MESA_PRIM_COUNT; i++)
if (screen->primtypes[i])
screen->primtypes_mask |= (1 << i);
@@ -1200,6 +1255,7 @@ fd_screen_create(int fd,
pscreen->destroy = fd_screen_destroy;
pscreen->get_screen_fd = fd_screen_get_fd;
+ pscreen->query_memory_info = fd_query_memory_info;
pscreen->get_param = fd_screen_get_param;
pscreen->get_paramf = fd_screen_get_paramf;
pscreen->get_shader_param = fd_screen_get_shader_param;
@@ -1215,6 +1271,8 @@ fd_screen_create(int fd,
pscreen->get_vendor = fd_screen_get_vendor;
pscreen->get_device_vendor = fd_screen_get_device_vendor;
+ pscreen->get_sample_pixel_grid = fd_get_sample_pixel_grid;
+
pscreen->get_timestamp = fd_screen_get_timestamp;
pscreen->fence_reference = _fd_fence_ref;
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h
index c8a814ecc..48ec88121 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -129,6 +129,8 @@ struct fd_screen {
unsigned (*tile_mode)(const struct pipe_resource *prsc);
int (*layout_resource_for_modifier)(struct fd_resource *rsc,
uint64_t modifier);
+ bool (*is_format_supported)(struct pipe_screen *pscreen,
+ enum pipe_format fmt, uint64_t modifier);
/* indirect-branch emit: */
void (*emit_ib)(struct fd_ringbuffer *ring, struct fd_ringbuffer *target);
@@ -160,7 +162,7 @@ struct fd_screen {
#define FD6_TESS_BO_SIZE (FD6_TESS_FACTOR_SIZE + FD6_TESS_PARAM_SIZE)
struct fd_bo *tess_bo;
- /* table with PIPE_PRIM_MAX+1 entries mapping PIPE_PRIM_x to
+ /* table with MESA_PRIM_COUNT+1 entries mapping MESA_PRIM_x to
* DI_PT_x value to use for draw initiator. There are some
* slight differences between generation.
*
@@ -213,13 +215,13 @@ struct pipe_screen *fd_screen_create(int fd,
const struct pipe_screen_config *config,
struct renderonly *ro);
-static inline boolean
+static inline bool
is_a20x(struct fd_screen *screen)
{
return (screen->gpu_id >= 200) && (screen->gpu_id < 210);
}
-static inline boolean
+static inline bool
is_a2xx(struct fd_screen *screen)
{
return screen->gen == 2;
@@ -227,38 +229,38 @@ is_a2xx(struct fd_screen *screen)
/* is a3xx patch revision 0? */
/* TODO a306.0 probably doesn't need this.. be more clever?? */
-static inline boolean
+static inline bool
is_a3xx_p0(struct fd_screen *screen)
{
return (screen->chip_id & 0xff0000ff) == 0x03000000;
}
-static inline boolean
+static inline bool
is_a3xx(struct fd_screen *screen)
{
return screen->gen == 3;
}
-static inline boolean
+static inline bool
is_a4xx(struct fd_screen *screen)
{
return screen->gen == 4;
}
-static inline boolean
+static inline bool
is_a5xx(struct fd_screen *screen)
{
return screen->gen == 5;
}
-static inline boolean
+static inline bool
is_a6xx(struct fd_screen *screen)
{
return screen->gen == 6;
}
/* is it using the ir3 compiler (shader isa introduced with a3xx)? */
-static inline boolean
+static inline bool
is_ir3(struct fd_screen *screen)
{
return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c
index a5da323c4..d06012249 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c
@@ -99,6 +99,25 @@ fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) in_dt
}
static void
+fd_set_sample_locations(struct pipe_context *pctx, size_t size,
+ const uint8_t *locations)
+ in_dt
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ if (!locations) {
+ ctx->sample_locations_enabled = false;
+ return;
+ }
+
+ size = MIN2(size, sizeof(ctx->sample_locations));
+ memcpy(ctx->sample_locations, locations, size);
+ ctx->sample_locations_enabled = true;
+
+ fd_context_dirty(ctx, FD_DIRTY_SAMPLE_LOCATIONS);
+}
+
+static void
fd_set_min_samples(struct pipe_context *pctx, unsigned min_samples) in_dt
{
struct fd_context *ctx = fd_context(pctx);
@@ -140,8 +159,10 @@ fd_set_constant_buffer(struct pipe_context *pctx, enum pipe_shader_type shader,
return;
}
- if (cb->user_buffer && ctx->screen->gen >= 6)
+ if (cb->user_buffer && ctx->screen->gen >= 6) {
upload_user_buffer(pctx, &so->cb[index]);
+ cb = &so->cb[index];
+ }
so->enabled_mask |= 1 << index;
@@ -451,7 +472,7 @@ fd_set_viewport_states(struct pipe_context *pctx, unsigned start_slot,
}
static void
-fd_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot,
+fd_set_vertex_buffers(struct pipe_context *pctx,
unsigned count, unsigned unbind_num_trailing_slots,
bool take_ownership,
const struct pipe_vertex_buffer *vb) in_dt
@@ -467,17 +488,15 @@ fd_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot,
if (ctx->screen->gen < 3) {
for (i = 0; i < count; i++) {
bool new_enabled = vb && vb[i].buffer.resource;
- bool old_enabled = so->vb[start_slot + i].buffer.resource != NULL;
- uint32_t new_stride = vb ? vb[i].stride : 0;
- uint32_t old_stride = so->vb[start_slot + i].stride;
- if ((new_enabled != old_enabled) || (new_stride != old_stride)) {
+ bool old_enabled = so->vb[i].buffer.resource != NULL;
+ if (new_enabled != old_enabled) {
fd_context_dirty(ctx, FD_DIRTY_VTXSTATE);
break;
}
}
}
- util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot,
+ util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb,
count, unbind_num_trailing_slots,
take_ownership);
so->count = util_last_bit(so->enabled_mask);
@@ -497,7 +516,7 @@ fd_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot,
*/
if (vb[i].buffer.resource &&
unlikely(vb[i].buffer_offset >= vb[i].buffer.resource->width0)) {
- so->vb[start_slot + i].buffer_offset = 0;
+ so->vb[i].buffer_offset = 0;
}
}
}
@@ -595,6 +614,8 @@ fd_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
so->num_elements = num_elements;
+ for (unsigned i = 0; i < num_elements; i++)
+ so->strides[elements[i].vertex_buffer_index] = elements[i].src_stride;
return so;
}
@@ -675,14 +696,18 @@ fd_set_stream_output_targets(struct pipe_context *pctx, unsigned num_targets,
}
for (i = 0; i < num_targets; i++) {
- boolean changed = targets[i] != so->targets[i];
- boolean reset = (offsets[i] != (unsigned)-1);
+ bool changed = targets[i] != so->targets[i];
+ bool reset = (offsets[i] != (unsigned)-1);
so->reset |= (reset << i);
if (targets[i]) {
fd_resource_set_usage(targets[i]->buffer, FD_DIRTY_STREAMOUT);
fd_dirty_resource(ctx, targets[i]->buffer, FD_DIRTY_STREAMOUT, true);
+
+ struct fd_stream_output_target *target = fd_stream_output_target(targets[i]);
+ fd_resource_set_usage(target->offset_buf, FD_DIRTY_STREAMOUT);
+ fd_dirty_resource(ctx, target->offset_buf, FD_DIRTY_STREAMOUT, true);
}
if (!changed && !reset)
@@ -805,6 +830,7 @@ fd_state_init(struct pipe_context *pctx)
pctx->set_shader_buffers = fd_set_shader_buffers;
pctx->set_shader_images = fd_set_shader_images;
pctx->set_framebuffer_state = fd_set_framebuffer_state;
+ pctx->set_sample_locations = fd_set_sample_locations;
pctx->set_polygon_stipple = fd_set_polygon_stipple;
pctx->set_scissor_states = fd_set_scissor_states;
pctx->set_viewport_states = fd_set_viewport_states;
diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h
index 5e3fdbe4d..8a64622d1 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h
@@ -507,6 +507,13 @@ fd4_size2indextype(unsigned index_size)
return INDEX4_SIZE_32_BIT;
}
+/* Convert 19.2MHz RBBM always-on timer ticks to ns */
+static inline uint64_t
+ticks_to_ns(uint64_t ts)
+{
+ return ts * (1000000000 / 19200000);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index 263762efd..497a44b14 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -36,7 +36,6 @@
#include "nir/tgsi_to_nir.h"
#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_text.h"
#include "ir3/instr-a3xx.h"