diff options
Diffstat (limited to 'lib/mesa/src/gallium/drivers/freedreno')
40 files changed, 735 insertions, 408 deletions
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c index b19899de7..10edf7247 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_draw.c @@ -141,7 +141,7 @@ draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info, } enum pc_di_vis_cull_mode vismode = USE_VISIBILITY; - if (binning || info->mode == PIPE_PRIM_POINTS) + if (binning || info->mode == MESA_PRIM_POINTS) vismode = IGNORE_VISIBILITY; fd_draw_emit(ctx->batch, ring, ctx->screen->primtypes[info->mode], @@ -169,7 +169,7 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo, if (!ctx->prog.fs || !ctx->prog.vs) return false; - if (pinfo->mode != PIPE_PRIM_MAX && !indirect && !pinfo->primitive_restart && + if (pinfo->mode != MESA_PRIM_COUNT && !indirect && !pinfo->primitive_restart && !u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count)) return false; @@ -190,14 +190,14 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo, */ if (pdraw->count > 32766) { /* clang-format off */ - static const uint16_t step_tbl[PIPE_PRIM_MAX] = { - [0 ... PIPE_PRIM_MAX - 1] = 32766, - [PIPE_PRIM_LINE_STRIP] = 32765, - [PIPE_PRIM_TRIANGLE_STRIP] = 32764, + static const uint16_t step_tbl[MESA_PRIM_COUNT] = { + [0 ... MESA_PRIM_COUNT - 1] = 32766, + [MESA_PRIM_LINE_STRIP] = 32765, + [MESA_PRIM_TRIANGLE_STRIP] = 32764, /* needs more work */ - [PIPE_PRIM_TRIANGLE_FAN] = 0, - [PIPE_PRIM_LINE_LOOP] = 0, + [MESA_PRIM_TRIANGLE_FAN] = 0, + [MESA_PRIM_LINE_LOOP] = 0, }; /* clang-format on */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c index 46f0124fd..d60ab8c67 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_program.c @@ -28,7 +28,6 @@ #include "nir/tgsi_to_nir.h" #include "pipe/p_state.h" #include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" #include "util/format/u_format.h" #include "util/u_inlines.h" #include "util/u_memory.h" @@ -176,7 +175,7 @@ patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem, instr->num_format_all = fmt.num_format; instr->format = fmt.format; instr->exp_adjust_all = fmt.exp_adjust; - instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride; + instr->stride = elem->src_stride; instr->offset = elem->src_offset; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 12e564208..3428cbf83 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -86,7 +86,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, /* points + psize -> spritelist: */ if (ctx->rasterizer->point_size_per_vertex && - fd3_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS)) + fd3_emit_get_vp(emit)->writes_psize && (info->mode == MESA_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; fd_draw_emit(ctx->batch, ring, primtype, @@ -117,7 +117,7 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; - if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart && + if (info->mode != MESA_PRIM_COUNT && !indirect && !info->primitive_restart && !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count)) return false; diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 7dfcce338..0375c22d5 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -428,7 +428,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2); OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | - A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) | + A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(elem->src_stride) | COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) | COND(elem->instance_divisor, @@ -728,8 +728,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2); if (depth == 32) { - OUT_RING(ring, (uint32_t)(zmin * 0xffffffff)); - OUT_RING(ring, (uint32_t)(zmax * 0xffffffff)); + OUT_RING(ring, (uint32_t)(zmin * (float)0xffffffff)); + OUT_RING(ring, (uint32_t)(zmax * (float)0xffffffff)); } else if (depth == 16) { OUT_RING(ring, (uint32_t)(zmin * 0xffff)); OUT_RING(ring, (uint32_t)(zmax * 0xffff)); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 0c2375f2a..9be96117d 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -36,7 +36,7 @@ struct fd3_format { enum a3xx_tex_fmt tex; enum a3xx_color_fmt rb; enum a3xx_color_swap swap; - boolean present; + bool present; }; /* vertex + texture */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 2219d2e7c..23443ee1e 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -450,10 +450,12 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr, } static struct ir3_program_state * -fd3_program_create(void *data, struct ir3_shader_variant *bs, - struct ir3_shader_variant *vs, struct ir3_shader_variant *hs, - struct ir3_shader_variant *ds, struct ir3_shader_variant *gs, - struct ir3_shader_variant *fs, +fd3_program_create(void *data, const struct ir3_shader_variant *bs, + const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *hs, + const struct ir3_shader_variant *ds, + const struct ir3_shader_variant *gs, + const struct ir3_shader_variant *fs, const struct ir3_cache_key *key) in_dt { struct fd_context *ctx = fd_context(data); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 6d49340d4..86a23c67e 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -63,7 +63,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, /* points + psize -> spritelist: */ if (ctx->rasterizer->point_size_per_vertex && - fd4_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS)) + fd4_emit_get_vp(emit)->writes_psize && (info->mode == MESA_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; fd4_draw_emit(ctx->batch, ring, primtype, @@ -114,7 +114,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, sizeof(emit.key.key.fsampler_swizzles)); } - if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart && + if (info->mode != MESA_PRIM_COUNT && !indirect && !info->primitive_restart && !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count)) return false; diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 739b1cfcb..87f14f822 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -573,7 +573,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4); OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | - A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) | + A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(elem->src_stride) | COND(elem->instance_divisor, A4XX_VFD_FETCH_INSTR_0_INSTANCED) | COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT)); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c index c66e9a3d5..b71d899c9 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -38,7 +38,7 @@ struct fd4_format { enum a4xx_tex_fmt tex; enum a4xx_color_fmt rb; enum a3xx_color_swap swap; - boolean present; + bool present; }; /* vertex + texture */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c index d165cebd9..be1d4a717 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -578,10 +578,12 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr, } static struct ir3_program_state * -fd4_program_create(void *data, struct ir3_shader_variant *bs, - struct ir3_shader_variant *vs, struct ir3_shader_variant *hs, - struct ir3_shader_variant *ds, struct ir3_shader_variant *gs, - struct ir3_shader_variant *fs, +fd4_program_create(void *data, const struct ir3_shader_variant *bs, + const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *hs, + const struct ir3_shader_variant *ds, + const struct ir3_shader_variant *gs, + const struct ir3_shader_variant *fs, const struct ir3_cache_key *key) in_dt { struct fd_context *ctx = fd_context(data); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c index d98b173e9..0521aa880 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_compute.c @@ -34,7 +34,7 @@ /* maybe move to fd5_program? */ static void -cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) +cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, struct ir3_shader_variant *v) assert_dt { const struct ir3_info *i = &v->info; enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS; @@ -83,8 +83,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */ OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */ - OUT_PKT4(ring, REG_A5XX_SP_CS_OBJ_START_LO, 2); - OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */ + fd5_emit_shader_obj(ctx, ring, v, REG_A5XX_SP_CS_OBJ_START_LO); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x1f00000); @@ -120,7 +119,7 @@ fd5_launch_grid(struct fd_context *ctx, return; if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG) - cs_program_emit(ring, v); + cs_program_emit(ctx, ring, v); fd5_emit_cs_state(ctx, ring, v); fd5_emit_cs_consts(v, ring, ctx, info); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c index 3919a742b..ec4747c45 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c @@ -164,19 +164,6 @@ fd5_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, fd5_draw_vbo(ctx, info, drawid_offset, indirect, &draws[i], index_offset); } -static bool -is_z32(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - case PIPE_FORMAT_Z32_UNORM: - case PIPE_FORMAT_Z32_FLOAT: - return true; - default: - return false; - } -} - static void fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) { diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 5886645e4..86307c21d 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -174,10 +174,10 @@ struct PACKED bcolor_entry { uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */ - uint8_t __pad1[24]; + uint8_t __pad1[56]; }; -#define FD5_BORDER_COLOR_SIZE 0x60 +#define FD5_BORDER_COLOR_SIZE 0x80 #define FD5_BORDER_COLOR_UPLOAD_SIZE \ (2 * PIPE_MAX_SAMPLERS * FD5_BORDER_COLOR_SIZE) @@ -310,8 +310,10 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE); + const unsigned int alignment = + util_next_power_of_two(FD5_BORDER_COLOR_UPLOAD_SIZE); u_upload_alloc(fd5_ctx->border_color_uploader, 0, - FD5_BORDER_COLOR_UPLOAD_SIZE, FD5_BORDER_COLOR_UPLOAD_SIZE, + FD5_BORDER_COLOR_UPLOAD_SIZE, alignment, &off, &fd5_ctx->border_color_buf, &ptr); entries = ptr; @@ -417,6 +419,9 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, { unsigned count = util_last_bit(so->enabled_mask); + if (count == 0) + return; + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2 * count); OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | @@ -481,7 +486,7 @@ fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit) OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4); OUT_RELOC(ring, rsc->bo, off, 0, 0); OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ - OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ + OUT_RING(ring, elem->src_stride); /* VFD_FETCH[j].STRIDE */ OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2); OUT_RING( diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c index d51fbb262..8a35f454b 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c @@ -38,7 +38,7 @@ struct fd5_format { enum a5xx_tex_fmt tex; enum a5xx_color_fmt rb; enum a3xx_color_swap swap; - boolean present; + bool present; }; /* vertex + texture */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c index 95847056b..5b8b7286b 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c @@ -394,6 +394,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt static void fd5_emit_tile_init(struct fd_batch *batch) assert_dt { + struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; @@ -411,10 +412,10 @@ fd5_emit_tile_init(struct fd_batch *batch) assert_dt OUT_RING(ring, 0x0); OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ + OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */ OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ + OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */ /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ fd_wfi(batch, ring); @@ -715,6 +716,7 @@ fd5_emit_tile_fini(struct fd_batch *batch) assert_dt static void fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt { + struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; fd5_emit_restore(batch, ring); @@ -730,10 +732,10 @@ fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ + OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */ OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ + OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */ /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ fd_wfi(batch, ring); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 245e3e538..32b97f04c 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -81,6 +81,30 @@ fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) } } +void +fd5_emit_shader_obj(struct fd_context *ctx, struct fd_ringbuffer *ring, + const struct ir3_shader_variant *so, + uint32_t shader_obj_reg) +{ + ir3_get_private_mem(ctx, so); + + OUT_PKT4(ring, shader_obj_reg, 6); + OUT_RELOC(ring, so->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */ + + uint32_t per_sp_size = ctx->pvtmem[so->pvtmem_per_wave].per_sp_size; + OUT_RING(ring, A5XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM( + ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) | + A5XX_SP_VS_PVT_MEM_PARAM_HWSTACKOFFSET(per_sp_size)); + if (so->pvtmem_size > 0) { /* SP_xS_PVT_MEM_ADDR */ + OUT_RELOC(ring, ctx->pvtmem[so->pvtmem_per_wave].bo, 0, 0, 0); + fd_ringbuffer_attach_bo(ring, ctx->pvtmem[so->pvtmem_per_wave].bo); + } else { + OUT_RING(ring, 0); + OUT_RING(ring, 0); + } + OUT_RING(ring, A5XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(per_sp_size)); +} + /* TODO maybe some of this we could pre-compute once rather than having * so much draw-time logic? */ @@ -487,8 +511,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, reg); } - OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2); - OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */ + fd5_emit_shader_obj(ctx, ring, s[VS].v, REG_A5XX_SP_VS_OBJ_START_LO); if (s[VS].instrlen) fd5_emit_shader(ring, s[VS].v); @@ -512,8 +535,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */ OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */ } else { - OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); - OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */ + fd5_emit_shader_obj(ctx, ring, s[FS].v, REG_A5XX_SP_FS_OBJ_START_LO); } OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5); @@ -726,10 +748,12 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, } static struct ir3_program_state * -fd5_program_create(void *data, struct ir3_shader_variant *bs, - struct ir3_shader_variant *vs, struct ir3_shader_variant *hs, - struct ir3_shader_variant *ds, struct ir3_shader_variant *gs, - struct ir3_shader_variant *fs, +fd5_program_create(void *data, const struct ir3_shader_variant *bs, + const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *hs, + const struct ir3_shader_variant *ds, + const struct ir3_shader_variant *gs, + const struct ir3_shader_variant *fs, const struct ir3_cache_key *key) in_dt { struct fd_context *ctx = fd_context(data); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h index 59c499e6d..2f9906bab 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.h @@ -37,9 +37,9 @@ struct fd5_emit; struct fd5_program_state { struct ir3_program_state base; - struct ir3_shader_variant *bs; /* VS for when emit->binning */ - struct ir3_shader_variant *vs; - struct ir3_shader_variant *fs; /* FS for when !emit->binning */ + const struct ir3_shader_variant *bs; /* VS for when emit->binning */ + const struct ir3_shader_variant *vs; + const struct ir3_shader_variant *fs; /* FS for when !emit->binning */ }; static inline struct fd5_program_state * @@ -51,8 +51,12 @@ fd5_program_state(struct ir3_program_state *state) void fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so); +void fd5_emit_shader_obj(struct fd_context *ctx, struct fd_ringbuffer *ring, + const struct ir3_shader_variant *so, + uint32_t shader_obj_reg) assert_dt; + void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, - struct fd5_emit *emit); + struct fd5_emit *emit) assert_dt; void fd5_prog_init(struct pipe_context *pctx); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c index 9c685f3ab..945c2cfad 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c @@ -206,16 +206,6 @@ timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ } -static uint64_t -ticks_to_ns(uint32_t ts) -{ - /* This is based on the 19.2MHz always-on rbbm timer. - * - * TODO we should probably query this value from kernel.. - */ - return ts * (1000000000 / 19200000); -} - static void time_elapsed_accumulate_result(struct fd_acc_query *aq, struct fd_acc_query_sample *s, diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c index 1faf272f3..4907c2851 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c @@ -122,14 +122,14 @@ fd5_screen_is_format_supported(struct pipe_screen *pscreen, /* clang-format off */ static const enum pc_di_primtype primtypes[] = { - [PIPE_PRIM_POINTS] = DI_PT_POINTLIST, - [PIPE_PRIM_LINES] = DI_PT_LINELIST, - [PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP, - [PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP, - [PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST, - [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP, - [PIPE_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN, - [PIPE_PRIM_MAX] = DI_PT_RECTLIST, /* internal clear blits */ + [MESA_PRIM_POINTS] = DI_PT_POINTLIST, + [MESA_PRIM_LINES] = DI_PT_LINELIST, + [MESA_PRIM_LINE_STRIP] = DI_PT_LINESTRIP, + [MESA_PRIM_LINE_LOOP] = DI_PT_LINELOOP, + [MESA_PRIM_TRIANGLES] = DI_PT_TRILIST, + [MESA_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP, + [MESA_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN, + [MESA_PRIM_COUNT] = DI_PT_RECTLIST, /* internal clear blits */ }; /* clang-format on */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c index df6217733..9c458e33e 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.c @@ -57,18 +57,61 @@ alloc_ring(struct fd_batch *batch, unsigned sz, enum fd_ringbuffer_flags flags) return fd_submit_new_ringbuffer(batch->submit, sz, flags); } +static struct fd_batch_subpass * +subpass_create(struct fd_batch *batch) +{ + struct fd_batch_subpass *subpass = CALLOC_STRUCT(fd_batch_subpass); + + subpass->draw = alloc_ring(batch, 0x100000, 0); + + /* Replace batch->draw with reference to current subpass, for + * backwards compat with code that is not subpass aware. + */ + if (batch->draw) + fd_ringbuffer_del(batch->draw); + batch->draw = fd_ringbuffer_ref(subpass->draw); + + list_addtail(&subpass->node, &batch->subpasses); + + return subpass; +} + static void -batch_init(struct fd_batch *batch) +subpass_destroy(struct fd_batch_subpass *subpass) { - struct fd_context *ctx = batch->ctx; + fd_ringbuffer_del(subpass->draw); + if (subpass->subpass_clears) + fd_ringbuffer_del(subpass->subpass_clears); + list_del(&subpass->node); + if (subpass->lrz) + fd_bo_del(subpass->lrz); + free(subpass); +} + +struct fd_batch * +fd_batch_create(struct fd_context *ctx, bool nondraw) +{ + struct fd_batch *batch = CALLOC_STRUCT(fd_batch); + + if (!batch) + return NULL; + + DBG("%p", batch); + + pipe_reference_init(&batch->reference, 1); + batch->ctx = ctx; + batch->nondraw = nondraw; + + batch->resources = + _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + + list_inithead(&batch->subpasses); batch->submit = fd_submit_new(ctx->pipe); if (batch->nondraw) { batch->gmem = alloc_ring(batch, 0x1000, FD_RINGBUFFER_PRIMARY); - batch->draw = alloc_ring(batch, 0x100000, 0); } else { batch->gmem = alloc_ring(batch, 0x100000, FD_RINGBUFFER_PRIMARY); - batch->draw = alloc_ring(batch, 0x100000, 0); /* a6xx+ re-uses draw rb for both draw and binning pass: */ if (ctx->screen->gen < 6) { @@ -76,6 +119,12 @@ batch_init(struct fd_batch *batch) } } + /* Pre-attach private BOs: */ + for (unsigned i = 0; i < ctx->num_private_bos; i++) + fd_ringbuffer_attach_bo(batch->gmem, ctx->private_bos[i]); + + batch->subpass = subpass_create(batch); + batch->in_fence_fd = -1; batch->fence = NULL; @@ -86,23 +135,10 @@ batch_init(struct fd_batch *batch) if (ctx->screen->gen < 6) batch->fence = fd_pipe_fence_create(batch); - batch->cleared = 0; - batch->fast_cleared = 0; - batch->invalidated = 0; - batch->restore = batch->resolve = 0; - batch->needs_flush = false; - batch->flushed = false; - batch->gmem_reason = 0; - batch->num_draws = 0; - batch->num_vertices = 0; - batch->num_bins_per_pipe = 0; - batch->prim_strm_bits = 0; - batch->draw_strm_bits = 0; - fd_reset_wfi(batch); util_dynarray_init(&batch->draw_patches, NULL); - util_dynarray_init(&(batch->fb_read_patches), NULL); + util_dynarray_init(&(batch->fb_read_patches), NULL); if (is_a2xx(ctx->screen)) { util_dynarray_init(&batch->shader_patches, NULL); @@ -112,42 +148,47 @@ batch_init(struct fd_batch *batch) if (is_a3xx(ctx->screen)) util_dynarray_init(&batch->rbrc_patches, NULL); - assert(batch->resources->entries == 0); - util_dynarray_init(&batch->samples, NULL); u_trace_init(&batch->trace, &ctx->trace_context); batch->last_timestamp_cmd = NULL; + + return batch; } -struct fd_batch * -fd_batch_create(struct fd_context *ctx, bool nondraw) +struct fd_batch_subpass * +fd_batch_create_subpass(struct fd_batch *batch) { - struct fd_batch *batch = CALLOC_STRUCT(fd_batch); + assert(!batch->nondraw); - if (!batch) - return NULL; - - DBG("%p", batch); - - pipe_reference_init(&batch->reference, 1); - batch->ctx = ctx; - batch->nondraw = nondraw; + struct fd_batch_subpass *subpass = subpass_create(batch); - batch->resources = - _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + /* This new subpass inherits the current subpass.. this is replaced + * if there is a depth clear + */ + if (batch->subpass->lrz) + subpass->lrz = fd_bo_ref(batch->subpass->lrz); - batch_init(batch); + batch->subpass = subpass; - return batch; + return subpass; } +/** + * Cleanup that we normally do when the submit is flushed, like dropping + * rb references. But also called when batch is destroyed just in case + * it wasn't flushed. + */ static void cleanup_submit(struct fd_batch *batch) { if (!batch->submit) return; + foreach_subpass_safe (subpass, batch) { + subpass_destroy(subpass); + } + fd_ringbuffer_del(batch->draw); fd_ringbuffer_del(batch->gmem); @@ -171,14 +212,14 @@ cleanup_submit(struct fd_batch *batch) batch->epilogue = NULL; } - if (batch->tile_setup) { - fd_ringbuffer_del(batch->tile_setup); - batch->tile_setup = NULL; + if (batch->tile_loads) { + fd_ringbuffer_del(batch->tile_loads); + batch->tile_loads = NULL; } - if (batch->tile_fini) { - fd_ringbuffer_del(batch->tile_fini); - batch->tile_fini = NULL; + if (batch->tile_store) { + fd_ringbuffer_del(batch->tile_store); + batch->tile_store = NULL; } fd_submit_del(batch->submit); @@ -186,46 +227,6 @@ cleanup_submit(struct fd_batch *batch) } static void -batch_fini(struct fd_batch *batch) -{ - DBG("%p", batch); - - pipe_resource_reference(&batch->query_buf, NULL); - - if (batch->in_fence_fd != -1) - close(batch->in_fence_fd); - - /* in case batch wasn't flushed but fence was created: */ - if (batch->fence) - fd_pipe_fence_set_batch(batch->fence, NULL); - - fd_pipe_fence_ref(&batch->fence, NULL); - - cleanup_submit(batch); - - util_dynarray_fini(&batch->draw_patches); - for (int i = 0; i < MAX_RENDER_TARGETS; i++) - util_dynarray_fini(&(batch->fb_read_patches)); - - if (is_a2xx(batch->ctx->screen)) { - util_dynarray_fini(&batch->shader_patches); - util_dynarray_fini(&batch->gmem_patches); - } - - if (is_a3xx(batch->ctx->screen)) - util_dynarray_fini(&batch->rbrc_patches); - - while (batch->samples.size > 0) { - struct fd_hw_sample *samp = - util_dynarray_pop(&batch->samples, struct fd_hw_sample *); - fd_hw_sample_reference(batch->ctx, &samp, NULL); - } - util_dynarray_fini(&batch->samples); - - u_trace_fini(&batch->trace); -} - -static void batch_flush_dependencies(struct fd_batch *batch) assert_dt { struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; @@ -268,28 +269,6 @@ batch_reset_resources(struct fd_batch *batch) } } -static void -batch_reset(struct fd_batch *batch) assert_dt -{ - DBG("%p", batch); - - batch_reset_dependencies(batch); - - fd_screen_lock(batch->ctx->screen); - batch_reset_resources(batch); - fd_screen_unlock(batch->ctx->screen); - - batch_fini(batch); - batch_init(batch); -} - -void -fd_batch_reset(struct fd_batch *batch) -{ - if (batch->needs_flush) - batch_reset(batch); -} - void __fd_batch_destroy_locked(struct fd_batch *batch) { @@ -310,7 +289,39 @@ __fd_batch_destroy_locked(struct fd_batch *batch) assert(batch->dependents_mask == 0); util_copy_framebuffer_state(&batch->framebuffer, NULL); - batch_fini(batch); + + pipe_resource_reference(&batch->query_buf, NULL); + + if (batch->in_fence_fd != -1) + close(batch->in_fence_fd); + + /* in case batch wasn't flushed but fence was created: */ + if (batch->fence) + fd_pipe_fence_set_batch(batch->fence, NULL); + + fd_pipe_fence_ref(&batch->fence, NULL); + + cleanup_submit(batch); + + util_dynarray_fini(&batch->draw_patches); + util_dynarray_fini(&(batch->fb_read_patches)); + + if (is_a2xx(batch->ctx->screen)) { + util_dynarray_fini(&batch->shader_patches); + util_dynarray_fini(&batch->gmem_patches); + } + + if (is_a3xx(batch->ctx->screen)) + util_dynarray_fini(&batch->rbrc_patches); + + while (batch->samples.size > 0) { + struct fd_hw_sample *samp = + util_dynarray_pop(&batch->samples, struct fd_hw_sample *); + fd_hw_sample_reference(batch->ctx, &samp, NULL); + } + util_dynarray_fini(&batch->samples); + + u_trace_fini(&batch->trace); free(batch->key); free(batch); @@ -389,6 +400,30 @@ batch_flush(struct fd_batch *batch) assert_dt cleanup_submit(batch); } +void +fd_batch_set_fb(struct fd_batch *batch, const struct pipe_framebuffer_state *pfb) +{ + assert(!batch->nondraw); + + util_copy_framebuffer_state(&batch->framebuffer, pfb); + + if (!pfb->zsbuf) + return; + + struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); + + /* Switching back to a batch we'd previously started constructing shouldn't + * result in a different lrz. The dependency tracking should avoid another + * batch writing/clearing our depth buffer. + */ + if (batch->subpass->lrz) { + assert(batch->subpass->lrz == zsbuf->lrz); + } else if (zsbuf->lrz) { + batch->subpass->lrz = fd_bo_ref(zsbuf->lrz); + } +} + + /* NOTE: could drop the last ref to batch */ void @@ -469,6 +504,12 @@ fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc) _mesa_set_add_pre_hashed(batch->resources, rsc->hash, rsc); rsc->track->batch_mask |= (1 << batch->idx); + + fd_ringbuffer_attach_bo(batch->draw, rsc->bo); + if (unlikely(rsc->b.b.next)) { + struct fd_resource *n = fd_resource(rsc->b.b.next); + fd_ringbuffer_attach_bo(batch->draw, n->bo); + } } void @@ -488,8 +529,6 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) if (track->write_batch == batch) return; - fd_batch_write_prep(batch, rsc); - if (rsc->stencil) fd_batch_resource_write(batch, rsc->stencil); @@ -510,8 +549,10 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) * ctx dependencies and let the app have the undefined behavior * it asked for: */ - if (track->write_batch->ctx != batch->ctx) + if (track->write_batch->ctx != batch->ctx) { + fd_ringbuffer_attach_bo(batch->draw, rsc->bo); return; + } flush_write_batch(rsc); } @@ -533,6 +574,8 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) fd_batch_reference_locked(&track->write_batch, batch); fd_batch_add_resource(batch, rsc); + + fd_batch_write_prep(batch, rsc); } void @@ -558,6 +601,7 @@ fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc) * by avoiding cross-ctx dependencies and let the app have the * undefined behavior it asked for: */ + fd_ringbuffer_attach_bo(batch->draw, rsc->bo); return; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h index 305cc2e2d..b419d8ec6 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch.h @@ -45,7 +45,53 @@ struct fd_resource; struct fd_batch_key; struct fd_batch_result; -/* A batch tracks everything about a cmdstream batch/submit, including the +/** + * A subpass is a fragment of a batch potentially starting with a clear. + * If the app does a mid-batch clear, that clear and subsequent draws + * can be split out into another sub-pass. At gmem time, the appropriate + * sysmem or gmem clears can be interleaved with the CP_INDIRECT_BUFFER + * to the subpass's draw cmdstream. + * + * For depth clears, a replacement LRZ buffer can be allocated (clear + * still inserted into the prologue cmdstream since it needs be executed + * even in sysmem or if we aren't binning, since later batches could + * depend in the LRZ state). The alternative would be to invalidate + * LRZ for draws after the start of the new subpass. + */ +struct fd_batch_subpass { + struct list_head node; + + /** draw pass cmdstream: */ + struct fd_ringbuffer *draw; + + /** for the gmem code to stash per tile per subpass clears */ + struct fd_ringbuffer *subpass_clears; + + BITMASK_ENUM(fd_buffer_mask) fast_cleared; + + union pipe_color_union clear_color[MAX_RENDER_TARGETS]; + double clear_depth; + unsigned clear_stencil; + + /** + * The number of draws emitted to this subpass. If it is greater than + * zero, a clear triggers creating a new subpass (because clears must + * always come at the start of a subpass). + */ + unsigned num_draws; + + /** + * If a subpass starts with a LRZ clear, it gets a new LRZ buffer. + * The fd_resource::lrz always tracks the current lrz buffer, but at + * binning/gmem time we need to know what was the current lrz buffer + * at the time draws were emitted to the subpass. Which is tracked + * here. + */ + struct fd_bo *lrz; +}; + +/** + * A batch tracks everything about a cmdstream batch/submit, including the * ringbuffers used for binning, draw, and gmem cmds, list of associated * fd_resource-s, etc. */ @@ -76,7 +122,7 @@ struct fd_batch { * where the contents are undefined, ie. what we don't need to restore * to gmem. */ - BITMASK_ENUM(fd_buffer_mask) invalidated, cleared, fast_cleared, restore, resolve; + BITMASK_ENUM(fd_buffer_mask) invalidated, cleared, restore, resolve; /* is this a non-draw batch (ie compute/blit which has no pfb state)? */ bool nondraw : 1; @@ -190,7 +236,24 @@ struct fd_batch { struct fd_submit *submit; - /** draw pass cmdstream: */ + /** + * List of fd_batch_subpass. + */ + struct list_head subpasses; + +#define foreach_subpass(subpass, batch) \ + list_for_each_entry (struct fd_batch_subpass, subpass, &batch->subpasses, node) +#define foreach_subpass_safe(subpass, batch) \ + list_for_each_entry_safe (struct fd_batch_subpass, subpass, &batch->subpasses, node) + + /** + * The current subpass. + */ + struct fd_batch_subpass *subpass; + + /** + * just a reference to the current subpass's draw cmds for backwards compat. + */ struct fd_ringbuffer *draw; /** binning pass cmdstream: */ struct fd_ringbuffer *binning; @@ -206,12 +269,8 @@ struct fd_batch { /** epilogue cmdstream (executed after all tiles): */ struct fd_ringbuffer *epilogue; - struct fd_ringbuffer *tile_setup; - struct fd_ringbuffer *tile_fini; - - union pipe_color_union clear_color[MAX_RENDER_TARGETS]; - double clear_depth; - unsigned clear_stencil; + struct fd_ringbuffer *tile_loads; + struct fd_ringbuffer *tile_store; /** * hw query related state: @@ -222,6 +281,13 @@ struct fd_batch { */ uint32_t next_sample_offset; + /* The # of pipeline-stats queries running. In case of nested + * queries using {START/STOP}_{PRIMITIVE,FRAGMENT,COMPUTE}_CNTRS, + * we need to start only on the first one and stop only on the + * last one. + */ + uint8_t pipeline_stats_queries_active[3]; + /* cached samples (in case multiple queries need to reference * the same sample snapshot) */ @@ -256,7 +322,10 @@ struct fd_batch { struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw); -void fd_batch_reset(struct fd_batch *batch) assert_dt; +struct fd_batch_subpass *fd_batch_create_subpass(struct fd_batch *batch) assert_dt; + +void fd_batch_set_fb(struct fd_batch *batch, const struct pipe_framebuffer_state *pfb) assert_dt; + void fd_batch_flush(struct fd_batch *batch) assert_dt; bool fd_batch_has_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt; void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt; diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c index df49d945f..a8ce3101d 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_batch_cache.c @@ -440,6 +440,34 @@ alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx, return batch; } +static void +alloc_query_buf(struct fd_context *ctx, struct fd_batch *batch) +{ + if (batch->query_buf) + return; + + if ((ctx->screen->gen < 3) || (ctx->screen->gen > 4)) + return; + + /* For gens that use fd_hw_query, pre-allocate an initially zero-sized + * (unbacked) query buffer. This simplifies draw/grid/etc-time resource + * tracking. + */ + struct pipe_screen *pscreen = &ctx->screen->base; + struct pipe_resource templ = { + .target = PIPE_BUFFER, + .format = PIPE_FORMAT_R8_UNORM, + .bind = PIPE_BIND_QUERY_BUFFER, + .width0 = 0, /* create initially zero size buffer */ + .height0 = 1, + .depth0 = 1, + .array_size = 1, + .last_level = 0, + .nr_samples = 1, + }; + batch->query_buf = pscreen->resource_create(pscreen, &templ); +} + struct fd_batch * fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) { @@ -457,6 +485,8 @@ fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) batch = alloc_batch_locked(cache, ctx, nondraw); fd_screen_unlock(ctx->screen); + alloc_query_buf(ctx, batch); + if (batch && nondraw) fd_context_switch_to(ctx, batch); @@ -552,5 +582,9 @@ fd_batch_from_fb(struct fd_context *ctx, struct fd_batch *batch = batch_from_key(ctx, key); fd_screen_unlock(ctx->screen); + alloc_query_buf(ctx, batch); + + fd_batch_set_fb(batch, pfb); + return batch; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c index b566a30dc..e7f098151 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.c @@ -323,6 +323,13 @@ fd_context_switch_to(struct fd_context *ctx, struct fd_batch *batch) } } +void +fd_context_add_private_bo(struct fd_context *ctx, struct fd_bo *bo) +{ + assert(ctx->num_private_bos < ARRAY_SIZE(ctx->private_bos)); + ctx->private_bos[ctx->num_private_bos++] = bo; +} + /** * Return a reference to the current batch, caller must unref. */ @@ -343,7 +350,6 @@ fd_context_batch(struct fd_context *ctx) if (unlikely(!batch)) { batch = fd_batch_from_fb(ctx, &ctx->framebuffer); - util_copy_framebuffer_state(&batch->framebuffer, &ctx->framebuffer); fd_batch_reference(&ctx->batch, batch); fd_context_all_dirty(ctx); } @@ -589,9 +595,9 @@ fd_context_setup_common_vbos(struct fd_context *ctx) .vertex_buffer_index = 0, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32B32_FLOAT, + .src_stride = 12, }}); ctx->solid_vbuf_state.vertexbuf.count = 1; - ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12; ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf; /* setup blit_vbuf_state: */ @@ -602,17 +608,17 @@ fd_context_setup_common_vbos(struct fd_context *ctx) .vertex_buffer_index = 0, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32_FLOAT, + .src_stride = 8, }, { .vertex_buffer_index = 1, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32B32_FLOAT, + .src_stride = 12, }}); ctx->blit_vbuf_state.vertexbuf.count = 2; - ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8; ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->blit_texcoord_vbuf; - ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12; ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h index 6d861cbea..3dedfd3e6 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_context.h @@ -88,6 +88,7 @@ struct fd_vertexbuf_stateobj { struct fd_vertex_stateobj { struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; + unsigned strides[PIPE_MAX_ATTRIBS]; unsigned num_elements; }; @@ -165,9 +166,10 @@ enum fd_dirty_3d_state { FD_DIRTY_IMAGE = BIT(18), FD_DIRTY_SSBO = BIT(19), FD_DIRTY_QUERY = BIT(20), + FD_DIRTY_SAMPLE_LOCATIONS = BIT(21), /* only used by a2xx.. possibly can be removed.. */ - FD_DIRTY_TEXSTATE = BIT(21), + FD_DIRTY_TEXSTATE = BIT(22), /* fine grained state changes, for cases where state is not orthogonal * from hw perspective: @@ -179,6 +181,69 @@ enum fd_dirty_3d_state { #define NUM_DIRTY_BITS 28 }; +static inline void +fd_print_dirty_state(BITMASK_ENUM(fd_dirty_3d_state) dirty) +{ +#ifdef DEBUG + if (!FD_DBG(MSGS)) + return; + + struct { + enum fd_dirty_3d_state state; + const char *name; + } tbl[] = { +#define STATE(n) { FD_DIRTY_ ## n, #n } + STATE(BLEND), + STATE(RASTERIZER), + STATE(ZSA), + STATE(BLEND_COLOR), + STATE(STENCIL_REF), + STATE(SAMPLE_MASK), + STATE(FRAMEBUFFER), + STATE(STIPPLE), + STATE(VIEWPORT), + STATE(VTXSTATE), + STATE(VTXBUF), + STATE(MIN_SAMPLES), + STATE(SCISSOR), + STATE(STREAMOUT), + STATE(UCP), + STATE(PROG), + STATE(CONST), + STATE(TEX), + STATE(IMAGE), + STATE(SSBO), + STATE(QUERY), + STATE(TEXSTATE), + STATE(RASTERIZER_DISCARD), + STATE(RASTERIZER_CLIP_PLANE_ENABLE), + STATE(BLEND_DUAL), + STATE(BLEND_COHERENT), +#undef STATE + }; + + struct log_stream *s = mesa_log_streami(); + + mesa_log_stream_printf(s, "dirty:"); + + if ((uint32_t)dirty == ~0) { + mesa_log_stream_printf(s, " ALL"); + dirty = 0; + } + + for (unsigned i = 0; i < ARRAY_SIZE(tbl); i++) { + if (dirty & tbl[i].state) { + mesa_log_stream_printf(s, " %s", tbl[i].name); + dirty &= ~tbl[i].state; + } + } + + assert(!dirty); + + mesa_log_stream_destroy(s); +#endif +} + /* per shader-stage dirty state: */ enum fd_dirty_shader_state { FD_DIRTY_SHADER_PROG = BIT(0), @@ -195,9 +260,14 @@ enum fd_buffer_mask { FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, + + /* A special internal buffer bit to signify that the LRZ buffer needs + * clearing + */ + FD_BUFFER_LRZ = BIT(15), }; -#define MAX_HW_SAMPLE_PROVIDERS 7 +#define MAX_HW_SAMPLE_PROVIDERS 10 struct fd_hw_sample_provider; struct fd_hw_sample; @@ -377,6 +447,14 @@ struct fd_context { /* Per vsc pipe bo's (a2xx-a5xx): */ struct fd_bo *vsc_pipe_bo[32] dt; + /* Table of bo's attached to all batches up-front (because they + * are commonly used, and that is easier than attaching on-use). + * In particular, these are driver internal buffers which do not + * participate in batch resource tracking. + */ + struct fd_bo *private_bos[3]; + unsigned num_private_bos; + /* Maps generic gallium oriented fd_dirty_3d_state bits to generation * specific bitmask of state "groups". */ @@ -418,6 +496,10 @@ struct fd_context { unsigned sample_mask dt; unsigned min_samples dt; + /* 1x1 grid, max 4x MSAA: */ + uint8_t sample_locations[4] dt; + bool sample_locations_enabled dt; + /* local context fb state, for when ctx->batch is null: */ struct pipe_framebuffer_state framebuffer dt; uint32_t all_mrt_channel_mask dt; @@ -458,6 +540,7 @@ struct fd_context { struct { struct fd_bo *bo; uint32_t per_fiber_size; + uint32_t per_sp_size; } pvtmem[2] dt; /* maps per-shader-stage state plus variant key to hw @@ -496,6 +579,7 @@ struct fd_context { /* optional, for GMEM bypass: */ void (*emit_sysmem_prep)(struct fd_batch *batch) dt; + void (*emit_sysmem)(struct fd_batch *batch) dt; void (*emit_sysmem_fini)(struct fd_batch *batch) dt; /* draw: */ @@ -509,6 +593,9 @@ struct fd_context { const union pipe_color_union *color, double depth, unsigned stencil) dt; + /* called to update draw_vbo func after bound shader stages change, etc: */ + void (*update_draw)(struct fd_context *ctx); + /* compute: */ void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info) dt; @@ -592,6 +679,8 @@ fd_stream_output_target(struct pipe_stream_output_target *target) return (struct fd_stream_output_target *)target; } +void fd_context_add_private_bo(struct fd_context *ctx, struct fd_bo *bo); + /* Mark specified non-shader-stage related state as dirty: */ static inline void fd_context_dirty(struct fd_context *ctx, BITMASK_ENUM(fd_dirty_3d_state) dirty) diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c index f3f757b2c..56ea718b9 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_draw.c @@ -139,13 +139,13 @@ batch_draw_tracking_for_dirty_bits(struct fd_batch *batch) assert_dt /* Mark constbuf as being read: */ if (dirty_shader & FD_DIRTY_SHADER_CONST) { u_foreach_bit (i, ctx->constbuf[s].enabled_mask) - resource_read(batch, ctx->constbuf[s].cb[i].buffer); + resource_read(batch, ctx->constbuf[s].cb[i].buffer); } /* Mark textures as being read */ if (dirty_shader & FD_DIRTY_SHADER_TEX) { u_foreach_bit (i, ctx->tex[s].valid_textures) - resource_read(batch, ctx->tex[s].textures[i]->texture); + resource_read(batch, ctx->tex[s].textures[i]->texture); } /* Mark SSBOs as being read or written: */ @@ -182,9 +182,15 @@ batch_draw_tracking_for_dirty_bits(struct fd_batch *batch) assert_dt /* Mark streamout buffers as being written.. */ if (dirty & FD_DIRTY_STREAMOUT) { - for (unsigned i = 0; i < ctx->streamout.num_targets; i++) - if (ctx->streamout.targets[i]) - resource_written(batch, ctx->streamout.targets[i]->buffer); + for (unsigned i = 0; i < ctx->streamout.num_targets; i++) { + struct fd_stream_output_target *target = + fd_stream_output_target(ctx->streamout.targets[i]); + + if (target) { + resource_written(batch, target->base.buffer); + resource_written(batch, target->offset_buf); + } + } } if (dirty & FD_DIRTY_QUERY) { @@ -215,6 +221,9 @@ needs_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info, if (indirect) { if (indirect->buffer && !batch_references_resource(batch, indirect->buffer)) return true; + if (indirect->indirect_draw_count && + !batch_references_resource(batch, indirect->indirect_draw_count)) + return true; if (indirect->count_from_stream_output) return true; } @@ -228,13 +237,8 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info, { struct fd_context *ctx = batch->ctx; - /* NOTE: needs to be before resource_written(batch->query_buf), otherwise - * query_buf may not be created yet. - */ - fd_batch_update_queries(batch); - if (!needs_draw_tracking(batch, info, indirect)) - return; + goto out; /* * Figure out the buffers/features we need: @@ -251,8 +255,8 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info, /* Mark indirect draw buffer as being read */ if (indirect) { - if (indirect->buffer) - resource_read(batch, indirect->buffer); + resource_read(batch, indirect->buffer); + resource_read(batch, indirect->indirect_draw_count); if (indirect->count_from_stream_output) resource_read( batch, fd_stream_output_target(indirect->count_from_stream_output) @@ -262,6 +266,9 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info, resource_written(batch, batch->query_buf); fd_screen_unlock(ctx->screen); + +out: + fd_batch_update_queries(batch); } static void @@ -277,7 +284,7 @@ update_draw_stats(struct fd_context *ctx, const struct pipe_draw_info *info, * so keep the count accurate for non-patch geometry. */ unsigned prims = 0; - if ((info->mode != PIPE_PRIM_PATCHES) && (info->mode != PIPE_PRIM_MAX)) { + if ((info->mode != MESA_PRIM_PATCHES) && (info->mode != MESA_PRIM_COUNT)) { for (unsigned i = 0; i < num_draws; i++) { prims += u_reduced_prims_for_vertices(info->mode, draws[i].count); } @@ -287,7 +294,7 @@ update_draw_stats(struct fd_context *ctx, const struct pipe_draw_info *info, if (ctx->streamout.num_targets > 0) { /* Clip the prims we're writing to the size of the SO buffers. */ - enum pipe_prim_type tf_prim = u_decomposed_prim(info->mode); + enum mesa_prim tf_prim = u_decomposed_prim(info->mode); unsigned verts_written = u_vertices_for_prims(tf_prim, prims); unsigned remaining_vert_space = ctx->streamout.max_tf_vtx - ctx->streamout.verts_written; @@ -369,6 +376,9 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, } batch->num_draws++; + batch->subpass->num_draws++; + + fd_print_dirty_state(ctx->dirty); /* Marking the batch as needing flush must come after the batch * dependency tracking (resource_read()/resource_write()), as that @@ -537,19 +547,27 @@ static void fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, const union pipe_color_union *color, unsigned x, unsigned y, unsigned w, unsigned h, - bool render_condition_enabled) + bool render_condition_enabled) in_dt { - DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h); + if (render_condition_enabled && !fd_render_condition_check(pctx)) + return; + + fd_blitter_clear_render_target(pctx, ps, color, x, y, w, h, + render_condition_enabled); } static void fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, unsigned buffers, double depth, unsigned stencil, unsigned x, unsigned y, unsigned w, unsigned h, - bool render_condition_enabled) + bool render_condition_enabled) in_dt { - DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u", - buffers, depth, stencil, x, y, w, h); + if (render_condition_enabled && !fd_render_condition_check(pctx)) + return; + + fd_blitter_clear_depth_stencil(pctx, ps, buffers, + depth, stencil, x, y, w, h, + render_condition_enabled); } static void @@ -602,6 +620,10 @@ fd_launch_grid(struct pipe_context *pctx, if (info->indirect) resource_read(batch, info->indirect); + list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node) { + resource_written(batch, aq->prsc); + } + /* If the saved batch has been flushed during the resource tracking, * don't re-install it: */ @@ -610,6 +632,8 @@ fd_launch_grid(struct pipe_context *pctx, fd_screen_unlock(ctx->screen); + fd_batch_update_queries(batch); + DBG("%p: work_dim=%u, block=%ux%ux%u, grid=%ux%ux%u", batch, info->work_dim, info->block[0], info->block[1], info->block[2], diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c index 552f42a3d..e7677009f 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.c @@ -55,7 +55,7 @@ fence_flush(struct pipe_context *pctx, struct pipe_fence_handle *fence, if (!timeout) return false; - if (timeout == PIPE_TIMEOUT_INFINITE) { + if (timeout == OS_TIMEOUT_INFINITE) { util_queue_fence_wait(&fence->ready); } else { int64_t abs_timeout = os_time_get_absolute_timeout(timeout); @@ -267,7 +267,7 @@ fd_pipe_fence_get_fd(struct pipe_screen *pscreen, struct pipe_fence_handle *fenc * but if TC is not used, this will be null. Which is fine, we won't call * threaded_context_flush() in that case */ - fence_flush(&fence->ctx->tc->base, fence, PIPE_TIMEOUT_INFINITE); + fence_flush(&fence->ctx->tc->base, fence, OS_TIMEOUT_INFINITE); assert(fence->fence); return os_dupfd_cloexec(fence->fence->fence_fd); } @@ -289,10 +289,10 @@ fd_pipe_fence_set_batch(struct pipe_fence_handle *fence, struct fd_batch *batch) { if (batch) { assert(!fence->batch); - fence->batch = batch; + fd_batch_reference(&fence->batch, batch); fd_batch_needs_flush(batch); } else { - fence->batch = NULL; + fd_batch_reference(&fence->batch, NULL); /* When the batch is dis-associated with the fence, we can signal TC * that the fence is flushed diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h index 30d568ac8..2bd4c581c 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_fence.h @@ -44,13 +44,13 @@ struct pipe_fence_handle { */ struct pipe_fence_handle *last_fence; - /* fence holds a weak reference to the batch until the batch is flushed, to + /* fence holds a reference to the batch until the batch is flushed, to * accommodate PIPE_FLUSH_DEFERRED. When the batch is actually flushed, it * is cleared (before the batch reference is dropped). If we need to wait * on a fence, and the batch is not NULL, we need to flush it. * * Note that with u_threaded_context async flushes, if a fence is requested - * by the frontend, the fence is initially created without a weak reference + * by the frontend, the fence is initially created without a reference * to the batch, which is filled in later when fd_context_flush() is called * from the driver thread. In this case tc_token will be non-null, in * which case threaded_context_flush() should be called in fd_fence_finish() diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c index 7b579649b..893398e7a 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -25,15 +25,16 @@ */ #include "pipe/p_state.h" -#include "util/u_debug.h" #include "util/format/u_format.h" #include "util/hash_table.h" +#include "util/macros.h" +#include "util/u_debug.h" #include "util/u_dump.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_string.h" -#include "u_tracepoints.h" #include "util/u_trace_gallium.h" +#include "u_tracepoints.h" #include "freedreno_context.h" #include "freedreno_fence.h" @@ -315,7 +316,6 @@ gmem_stateobj_init(struct fd_screen *screen, struct gmem_key *key) * performance. */ -#define div_round_up(v, a) (((v) + (a)-1) / (a)) /* figure out number of tiles per pipe: */ if (is_a20x(screen)) { /* for a20x we want to minimize the number of "pipes" @@ -326,10 +326,10 @@ gmem_stateobj_init(struct fd_screen *screen, struct gmem_key *key) tpp_y = 6; } else { tpp_x = tpp_y = 1; - while (div_round_up(gmem->nbins_y, tpp_y) > npipes) + while (DIV_ROUND_UP(gmem->nbins_y, tpp_y) > npipes) tpp_y += 2; - while ((div_round_up(gmem->nbins_y, tpp_y) * - div_round_up(gmem->nbins_x, tpp_x)) > npipes) + while ((DIV_ROUND_UP(gmem->nbins_y, tpp_y) * + DIV_ROUND_UP(gmem->nbins_x, tpp_x)) > npipes) tpp_x += 1; } @@ -399,7 +399,7 @@ gmem_stateobj_init(struct fd_screen *screen, struct gmem_key *key) uint32_t p; /* pipe number: */ - p = ((i / tpp_y) * div_round_up(gmem->nbins_x, tpp_x)) + (j / tpp_x); + p = ((i / tpp_y) * DIV_ROUND_UP(gmem->nbins_x, tpp_x)) + (j / tpp_x); assert(p < gmem->num_vsc_pipes); /* clip bin width: */ @@ -480,9 +480,9 @@ gmem_key_init(struct fd_batch *batch, bool assume_zs, bool no_scis_opt) if (has_zs || assume_zs) { struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); - key->zsbuf_cpp[0] = rsc->layout.cpp; + key->zsbuf_cpp[0] = rsc->layout.cpp * pfb->samples; if (rsc->stencil) - key->zsbuf_cpp[1] = rsc->stencil->layout.cpp; + key->zsbuf_cpp[1] = rsc->stencil->layout.cpp * pfb->samples; /* If we clear z or s but not both, and we are using z24s8 (ie. * !separate_stencil) then we need to restore the other, even if @@ -493,7 +493,7 @@ gmem_key_init(struct fd_batch *batch, bool assume_zs, bool no_scis_opt) * u_blitter will show up as a normal draw with depth and/or * stencil enabled. */ - unsigned zsclear = batch->fast_cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); + unsigned zsclear = batch->cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); if (zsclear) { const struct util_format_description *desc = util_format_description(pfb->zsbuf->format); @@ -672,7 +672,11 @@ render_sysmem(struct fd_batch *batch) assert_dt trace_start_draw_ib(&batch->trace, batch->gmem); } /* emit IB to drawcmds: */ - ctx->screen->emit_ib(batch->gmem, batch->draw); + if (ctx->emit_sysmem) { + ctx->emit_sysmem(batch); + } else { + ctx->screen->emit_ib(batch->gmem, batch->draw); + } if (!batch->nondraw) { trace_end_draw_ib(&batch->trace, batch->gmem); @@ -719,7 +723,7 @@ fd_gmem_render_tiles(struct fd_batch *batch) /* Sometimes we need to flush a batch just to get a fence, with no * clears or draws.. in this case promote to nondraw: */ - if (!(batch->fast_cleared || batch->num_draws)) + if (!(batch->cleared || batch->num_draws)) sysmem = true; if (!batch->nondraw) { diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c index feb3e56f5..2a93d197b 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_program.c @@ -36,11 +36,14 @@ static void update_bound_stage(struct fd_context *ctx, enum pipe_shader_type shader, bool bound) assert_dt { + uint32_t bound_shader_stages = ctx->bound_shader_stages; if (bound) { ctx->bound_shader_stages |= BIT(shader); } else { ctx->bound_shader_stages &= ~BIT(shader); } + if (ctx->update_draw && (bound_shader_stages != ctx->bound_shader_stages)) + ctx->update_draw(ctx); } static void diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h index 8cb8363b7..b57895713 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query.h @@ -131,6 +131,12 @@ pidx(unsigned query_type) return 5; case PIPE_QUERY_PRIMITIVES_EMITTED: return 6; + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + return 7; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + return 8; + case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: + return 9; default: return -1; diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c index e3f4c69f8..6dee86d70 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.c @@ -84,13 +84,13 @@ fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt { const struct fd_acc_sample_provider *p = aq->provider; - aq->batch = batch; - fd_batch_needs_flush(aq->batch); - p->resume(aq, aq->batch); - fd_screen_lock(batch->ctx->screen); fd_batch_resource_write(batch, fd_resource(aq->prsc)); fd_screen_unlock(batch->ctx->screen); + + aq->batch = batch; + fd_batch_needs_flush(aq->batch); + p->resume(aq, aq->batch); } static void diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h index 8022842f9..eebe8c080 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_acc.h @@ -138,6 +138,9 @@ copy_result(struct fd_ringbuffer *ring, enum pipe_query_value_type result_type, struct fd_resource *dst, unsigned dst_offset, struct fd_resource *src, unsigned src_offset) { + fd_ringbuffer_attach_bo(ring, dst->bo); + fd_ringbuffer_attach_bo(ring, src->bo); + OUT_PKT7(ring, CP_MEM_TO_MEM, 5); OUT_RING(ring, COND(result_type >= PIPE_QUERY_TYPE_I64, CP_MEM_TO_MEM_0_DOUBLE)); OUT_RELOC(ring, dst->bo, dst_offset, 0, 0); diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c index d709c8f25..6ed0582da 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -299,22 +299,6 @@ fd_hw_sample_init(struct fd_batch *batch, uint32_t size) samp->tile_stride = 0; batch->next_sample_offset += size; - if (!batch->query_buf) { - struct pipe_screen *pscreen = &batch->ctx->screen->base; - struct pipe_resource templ = { - .target = PIPE_BUFFER, - .format = PIPE_FORMAT_R8_UNORM, - .bind = PIPE_BIND_QUERY_BUFFER, - .width0 = 0, /* create initially zero size buffer */ - .height0 = 1, - .depth0 = 1, - .array_size = 1, - .last_level = 0, - .nr_samples = 1, - }; - batch->query_buf = pscreen->resource_create(pscreen, &templ); - } - pipe_resource_reference(&samp->prsc, batch->query_buf); return samp; diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c index f210dfe55..a57807071 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.c @@ -30,6 +30,7 @@ #include "util/set.h" #include "util/u_drm.h" #include "util/u_inlines.h" +#include "util/u_sample_positions.h" #include "util/u_string.h" #include "util/u_surface.h" #include "util/u_transfer.h" @@ -52,14 +53,6 @@ /* XXX this should go away, needed for 'struct winsys_handle' */ #include "frontend/drm_driver.h" -/* A private modifier for now, so we have a way to request tiled but not - * compressed. It would perhaps be good to get real modifiers for the - * tiled formats, but would probably need to do some work to figure out - * the layout(s) of the tiled modes, and whether they are the same - * across generations. - */ -#define FD_FORMAT_MOD_QCOM_TILED fourcc_mod_code(QCOM, 0xffffffff) - /** * Go through the entire state and see if the resource is bound * anywhere. If it is, mark the relevant state as dirty. This is @@ -573,7 +566,7 @@ fd_resource_uncompress(struct fd_context *ctx, struct fd_resource *rsc, bool lin { tc_assert_driver_thread(ctx->tc); - uint64_t modifier = linear ? DRM_FORMAT_MOD_LINEAR : FD_FORMAT_MOD_QCOM_TILED; + uint64_t modifier = linear ? DRM_FORMAT_MOD_LINEAR : DRM_FORMAT_MOD_QCOM_TILED3; ASSERTED bool success = fd_try_shadow_resource(ctx, rsc, 0, NULL, modifier); @@ -1290,6 +1283,11 @@ get_best_layout(struct fd_screen *screen, if (tmpl->bind & PIPE_BIND_USE_FRONT_RENDERING) ubwc_ok = false; + /* Disallow UBWC when asked not to use data dependent bandwidth compression: + */ + if (tmpl->bind & PIPE_BIND_CONST_BW) + ubwc_ok = false; + if (ubwc_ok && !can_implicit && !drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count)) { perf_debug("%" PRSC_FMT @@ -1301,15 +1299,8 @@ get_best_layout(struct fd_screen *screen, if (ubwc_ok) return UBWC; - /* We can't use tiled with explicit modifiers, as there is no modifier token - * defined for it. But we might internally force tiled allocation using a - * private modifier token. - * - * TODO we should probably also limit TILED in a similar way to UBWC above, - * once we have a public modifier token defined. - */ if (can_implicit || - drm_find_modifier(FD_FORMAT_MOD_QCOM_TILED, modifiers, count)) + drm_find_modifier(DRM_FORMAT_MOD_QCOM_TILED3, modifiers, count)) return TILED; if (!drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count)) { @@ -1643,10 +1634,6 @@ static const struct u_transfer_vtbl transfer_vtbl = { .get_stencil = fd_resource_get_stencil, }; -static const uint64_t supported_modifiers[] = { - DRM_FORMAT_MOD_LINEAR, -}; - static int fd_layout_resource_for_modifier(struct fd_resource *rsc, uint64_t modifier) { @@ -1760,10 +1747,6 @@ fd_resource_screen_init(struct pipe_screen *pscreen) if (!screen->layout_resource_for_modifier) screen->layout_resource_for_modifier = fd_layout_resource_for_modifier; - if (!screen->supported_modifiers) { - screen->supported_modifiers = supported_modifiers; - screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers); - } /* GL_EXT_memory_object */ pscreen->memobj_create_from_handle = fd_memobj_create_from_handle; @@ -1772,47 +1755,6 @@ fd_resource_screen_init(struct pipe_screen *pscreen) } static void -fd_get_sample_position(struct pipe_context *context, unsigned sample_count, - unsigned sample_index, float *pos_out) -{ - /* The following is copied from nouveau/nv50 except for position - * values, which are taken from blob driver */ - static const uint8_t pos1[1][2] = {{0x8, 0x8}}; - static const uint8_t pos2[2][2] = {{0xc, 0xc}, {0x4, 0x4}}; - static const uint8_t pos4[4][2] = {{0x6, 0x2}, - {0xe, 0x6}, - {0x2, 0xa}, - {0xa, 0xe}}; - /* TODO needs to be verified on supported hw */ - static const uint8_t pos8[8][2] = {{0x9, 0x5}, {0x7, 0xb}, {0xd, 0x9}, - {0x5, 0x3}, {0x3, 0xd}, {0x1, 0x7}, - {0xb, 0xf}, {0xf, 0x1}}; - - const uint8_t(*ptr)[2]; - - switch (sample_count) { - case 1: - ptr = pos1; - break; - case 2: - ptr = pos2; - break; - case 4: - ptr = pos4; - break; - case 8: - ptr = pos8; - break; - default: - assert(0); - return; - } - - pos_out[0] = ptr[sample_index][0] / 16.0f; - pos_out[1] = ptr[sample_index][1] / 16.0f; -} - -static void fd_blit_pipe(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) in_dt { @@ -1836,5 +1778,5 @@ fd_resource_context_init(struct pipe_context *pctx) pctx->blit = fd_blit_pipe; pctx->flush_resource = fd_flush_resource; pctx->invalidate_resource = fd_invalidate_resource; - pctx->get_sample_position = fd_get_sample_position; + pctx->get_sample_position = u_default_get_sample_position; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h index 00f563947..0c4818df2 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_resource.h @@ -253,6 +253,19 @@ has_depth(enum pipe_format format) return util_format_has_depth(desc); } +static inline bool +is_z32(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + return true; + default: + return false; + } +} + struct fd_transfer { struct threaded_transfer b; struct pipe_resource *staging_prsc; diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c index 30c9c80db..93aa04fa3 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.c @@ -125,6 +125,14 @@ fd_screen_get_device_vendor(struct pipe_screen *pscreen) return "Qualcomm"; } +static void +fd_get_sample_pixel_grid(struct pipe_screen *pscreen, unsigned sample_count, + unsigned *out_width, unsigned *out_height) +{ + *out_width = 1; + *out_height = 1; +} + static uint64_t fd_screen_get_timestamp(struct pipe_screen *pscreen) { @@ -133,8 +141,7 @@ fd_screen_get_timestamp(struct pipe_screen *pscreen) if (screen->has_timestamp) { uint64_t n; fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &n); - assert(screen->max_freq > 0); - return n * 1000000000 / screen->max_freq; + return ticks_to_ns(n); } else { int64_t cpu_time = os_time_get_nano(); return cpu_time + screen->cpu_gpu_time_delta; @@ -181,6 +188,36 @@ fd_screen_destroy(struct pipe_screen *pscreen) free(screen); } +static uint64_t +get_memory_size(struct fd_screen *screen) +{ + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + if (fd_device_version(screen->dev) >= FD_VERSION_VA_SIZE) { + uint64_t va_size; + if (!fd_pipe_get_param(screen->pipe, FD_VA_SIZE, &va_size)) { + system_memory = MIN2(system_memory, va_size); + } + } + + return system_memory; +} + +static void +fd_query_memory_info(struct pipe_screen *pscreen, + struct pipe_memory_info *info) +{ + unsigned mem = get_memory_size(fd_screen(pscreen)) >> 10; + + memset(info, 0, sizeof(*info)); + + info->total_device_memory = mem; + info->avail_device_memory = mem; +} + + /* TODO either move caps to a2xx/a3xx specific code, or maybe have some tables for things that differ if the delta is not too much.. @@ -206,16 +243,19 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_INVALIDATE_BUFFER: - case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: case PIPE_CAP_NIR_COMPACT_ARRAYS: case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: case PIPE_CAP_GL_SPIRV: case PIPE_CAP_FBFETCH_COHERENT: + case PIPE_CAP_HAS_CONST_BW: return 1; case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: - case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_DRAW_PARAMETERS: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: + case PIPE_CAP_DEPTH_BOUNDS_TEST: return is_a6xx(screen); case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: @@ -271,6 +311,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_IMAGE_STORE_FORMATTED: + case PIPE_CAP_IMAGE_LOAD_FORMATTED: return is_a5xx(screen) || is_a6xx(screen); case PIPE_CAP_SURFACE_SAMPLE_COUNT: @@ -279,9 +320,18 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_CLIP_DISABLE: return is_a3xx(screen) || is_a4xx(screen) || is_a6xx(screen); + case PIPE_CAP_POST_DEPTH_COVERAGE: case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: + case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION: return is_a6xx(screen); + case PIPE_CAP_SAMPLER_REDUCTION_MINMAX: + case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB: + return is_a6xx(screen) && screen->info->a6xx.has_sampler_minmax; + + case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: + return is_a6xx(screen) && screen->info->a6xx.has_sample_locations; + case PIPE_CAP_POLYGON_OFFSET_CLAMP: return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen); @@ -331,14 +381,13 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 64; case PIPE_CAP_INT64: - case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_DOUBLES: return is_ir3(screen); case PIPE_CAP_GLSL_FEATURE_LEVEL: case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: if (is_a6xx(screen)) - return 450; + return 460; else if (is_ir3(screen)) return 140; else @@ -403,6 +452,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 0; case PIPE_CAP_VS_LAYER_VIEWPORT: + case PIPE_CAP_TES_LAYER_VIEWPORT: return is_a6xx(screen); case PIPE_CAP_MAX_VIEWPORTS: @@ -470,6 +520,10 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return !is_a5xx(screen); /* Stream output. */ + case PIPE_CAP_MAX_VERTEX_STREAMS: + if (is_a6xx(screen)) /* has SO + GS */ + return PIPE_MAX_SO_BUFFERS; + return 0; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: if (is_ir3(screen)) return PIPE_MAX_SO_BUFFERS; @@ -535,7 +589,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) /* only a4xx, requires new enough kernel so we know max_freq: */ return (screen->max_freq > 0) && (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)); + case PIPE_CAP_TIMER_RESOLUTION: + return ticks_to_ns(1); case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: return is_a6xx(screen); case PIPE_CAP_VENDOR_ID: @@ -545,22 +603,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_ACCELERATED: return 1; - case PIPE_CAP_VIDEO_MEMORY: { - uint64_t system_memory; + case PIPE_CAP_VIDEO_MEMORY: + return (int)(get_memory_size(screen) >> 20); - if (!os_get_total_physical_memory(&system_memory)) - return 0; - - if (fd_device_version(screen->dev) >= FD_VERSION_VA_SIZE) { - uint64_t va_size; - - if (!fd_pipe_get_param(screen->pipe, FD_VA_SIZE, &va_size)) { - system_memory = MIN2(system_memory, va_size); - } - } - - return (int)(system_memory >> 20); - } + case PIPE_CAP_QUERY_MEMORY_INFO: /* Enables GL_ATI_meminfo */ + return get_memory_size(screen) != 0; case PIPE_CAP_UMA: return 1; @@ -644,6 +691,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, if (has_compute(screen)) break; return 0; + case PIPE_SHADER_TASK: + case PIPE_SHADER_MESH: + return 0; default: mesa_loge("unknown shader type %d", shader); return 0; @@ -661,7 +711,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_INPUTS: if (shader == PIPE_SHADER_GEOMETRY && is_a6xx(screen)) return 16; - return is_a6xx(screen) ? 32 : 16; + return is_a6xx(screen) ? + (screen->info->a6xx.vs_max_inputs_count) : 16; case PIPE_SHADER_CAP_MAX_OUTPUTS: return is_a6xx(screen) ? 32 : 16; case PIPE_SHADER_CAP_MAX_TEMPS: @@ -687,7 +738,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, /* a2xx compiler doesn't handle indirect: */ return is_ir3(screen) ? 1 : 0; case PIPE_SHADER_CAP_SUBROUTINES: - case PIPE_SHADER_CAP_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: @@ -710,8 +760,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return 16; - case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_NIR; case PIPE_SHADER_CAP_SUPPORTED_IRS: return (1 << PIPE_SHADER_IR_NIR) | COND(has_compute(screen) && (shader == PIPE_SHADER_COMPUTE), @@ -815,7 +863,7 @@ fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, RET((uint64_t[]){screen->ram_size}); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: - RET((uint64_t[]){32768}); + RET((uint64_t[]){screen->info->cs_shared_mem_size}); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: @@ -833,9 +881,12 @@ fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t[]){1}); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t[]){32}); // TODO + case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: + RET((uint32_t[]){0}); // TODO + case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: RET((uint64_t[]){ compiler->max_variable_workgroup_size }); } @@ -900,29 +951,42 @@ fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo, } } +static bool +is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + uint64_t modifier) +{ + struct fd_screen *screen = fd_screen(pscreen); + if (screen->is_format_supported) + return screen->is_format_supported(pscreen, format, modifier); + return modifier == DRM_FORMAT_MOD_LINEAR; +} + static void fd_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen, enum pipe_format format, int max, uint64_t *modifiers, unsigned int *external_only, int *count) { - struct fd_screen *screen = fd_screen(pscreen); - int i, num = 0; + const uint64_t all_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_QCOM_COMPRESSED, + DRM_FORMAT_MOD_QCOM_TILED3, + }; - max = MIN2(max, screen->num_supported_modifiers); + int num = 0; - if (!max) { - max = screen->num_supported_modifiers; - external_only = NULL; - modifiers = NULL; - } + for (int i = 0; i < ARRAY_SIZE(all_modifiers); i++) { + if (!is_format_supported(pscreen, format, all_modifiers[i])) + continue; - for (i = 0; i < max; i++) { - if (modifiers) - modifiers[num] = screen->supported_modifiers[i]; + if (num < max) { + if (modifiers) + modifiers[num] = all_modifiers[i]; - if (external_only) - external_only[num] = 0; + if (external_only) + external_only[num] = false; + } num++; } @@ -936,19 +1000,7 @@ fd_screen_is_dmabuf_modifier_supported(struct pipe_screen *pscreen, enum pipe_format format, bool *external_only) { - struct fd_screen *screen = fd_screen(pscreen); - int i; - - for (i = 0; i < screen->num_supported_modifiers; i++) { - if (modifier == screen->supported_modifiers[i]) { - if (external_only) - *external_only = false; - - return true; - } - } - - return false; + return is_format_supported(pscreen, format, modifier); } struct fd_bo * @@ -1030,6 +1082,8 @@ fd_screen_create(int fd, fd_perfetto_init(); #endif + util_gpuvis_init(); + pscreen = &screen->base; screen->dev = dev; @@ -1060,10 +1114,11 @@ fd_screen_create(int fd, screen->max_freq = 0; } else { screen->max_freq = val; - if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0) - screen->has_timestamp = true; } + if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0) + screen->has_timestamp = true; + screen->dev_id = fd_pipe_dev_id(screen->pipe); if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) { @@ -1174,7 +1229,7 @@ fd_screen_create(int fd, /* fdN_screen_init() should set this: */ assert(screen->primtypes); screen->primtypes_mask = 0; - for (unsigned i = 0; i <= PIPE_PRIM_MAX; i++) + for (unsigned i = 0; i <= MESA_PRIM_COUNT; i++) if (screen->primtypes[i]) screen->primtypes_mask |= (1 << i); @@ -1200,6 +1255,7 @@ fd_screen_create(int fd, pscreen->destroy = fd_screen_destroy; pscreen->get_screen_fd = fd_screen_get_fd; + pscreen->query_memory_info = fd_query_memory_info; pscreen->get_param = fd_screen_get_param; pscreen->get_paramf = fd_screen_get_paramf; pscreen->get_shader_param = fd_screen_get_shader_param; @@ -1215,6 +1271,8 @@ fd_screen_create(int fd, pscreen->get_vendor = fd_screen_get_vendor; pscreen->get_device_vendor = fd_screen_get_device_vendor; + pscreen->get_sample_pixel_grid = fd_get_sample_pixel_grid; + pscreen->get_timestamp = fd_screen_get_timestamp; pscreen->fence_reference = _fd_fence_ref; diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h index c8a814ecc..48ec88121 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_screen.h @@ -129,6 +129,8 @@ struct fd_screen { unsigned (*tile_mode)(const struct pipe_resource *prsc); int (*layout_resource_for_modifier)(struct fd_resource *rsc, uint64_t modifier); + bool (*is_format_supported)(struct pipe_screen *pscreen, + enum pipe_format fmt, uint64_t modifier); /* indirect-branch emit: */ void (*emit_ib)(struct fd_ringbuffer *ring, struct fd_ringbuffer *target); @@ -160,7 +162,7 @@ struct fd_screen { #define FD6_TESS_BO_SIZE (FD6_TESS_FACTOR_SIZE + FD6_TESS_PARAM_SIZE) struct fd_bo *tess_bo; - /* table with PIPE_PRIM_MAX+1 entries mapping PIPE_PRIM_x to + /* table with MESA_PRIM_COUNT+1 entries mapping MESA_PRIM_x to * DI_PT_x value to use for draw initiator. There are some * slight differences between generation. * @@ -213,13 +215,13 @@ struct pipe_screen *fd_screen_create(int fd, const struct pipe_screen_config *config, struct renderonly *ro); -static inline boolean +static inline bool is_a20x(struct fd_screen *screen) { return (screen->gpu_id >= 200) && (screen->gpu_id < 210); } -static inline boolean +static inline bool is_a2xx(struct fd_screen *screen) { return screen->gen == 2; @@ -227,38 +229,38 @@ is_a2xx(struct fd_screen *screen) /* is a3xx patch revision 0? */ /* TODO a306.0 probably doesn't need this.. be more clever?? */ -static inline boolean +static inline bool is_a3xx_p0(struct fd_screen *screen) { return (screen->chip_id & 0xff0000ff) == 0x03000000; } -static inline boolean +static inline bool is_a3xx(struct fd_screen *screen) { return screen->gen == 3; } -static inline boolean +static inline bool is_a4xx(struct fd_screen *screen) { return screen->gen == 4; } -static inline boolean +static inline bool is_a5xx(struct fd_screen *screen) { return screen->gen == 5; } -static inline boolean +static inline bool is_a6xx(struct fd_screen *screen) { return screen->gen == 6; } /* is it using the ir3 compiler (shader isa introduced with a3xx)? */ -static inline boolean +static inline bool is_ir3(struct fd_screen *screen) { return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c b/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c index a5da323c4..d06012249 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_state.c @@ -99,6 +99,25 @@ fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) in_dt } static void +fd_set_sample_locations(struct pipe_context *pctx, size_t size, + const uint8_t *locations) + in_dt +{ + struct fd_context *ctx = fd_context(pctx); + + if (!locations) { + ctx->sample_locations_enabled = false; + return; + } + + size = MIN2(size, sizeof(ctx->sample_locations)); + memcpy(ctx->sample_locations, locations, size); + ctx->sample_locations_enabled = true; + + fd_context_dirty(ctx, FD_DIRTY_SAMPLE_LOCATIONS); +} + +static void fd_set_min_samples(struct pipe_context *pctx, unsigned min_samples) in_dt { struct fd_context *ctx = fd_context(pctx); @@ -140,8 +159,10 @@ fd_set_constant_buffer(struct pipe_context *pctx, enum pipe_shader_type shader, return; } - if (cb->user_buffer && ctx->screen->gen >= 6) + if (cb->user_buffer && ctx->screen->gen >= 6) { upload_user_buffer(pctx, &so->cb[index]); + cb = &so->cb[index]; + } so->enabled_mask |= 1 << index; @@ -451,7 +472,7 @@ fd_set_viewport_states(struct pipe_context *pctx, unsigned start_slot, } static void -fd_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot, +fd_set_vertex_buffers(struct pipe_context *pctx, unsigned count, unsigned unbind_num_trailing_slots, bool take_ownership, const struct pipe_vertex_buffer *vb) in_dt @@ -467,17 +488,15 @@ fd_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot, if (ctx->screen->gen < 3) { for (i = 0; i < count; i++) { bool new_enabled = vb && vb[i].buffer.resource; - bool old_enabled = so->vb[start_slot + i].buffer.resource != NULL; - uint32_t new_stride = vb ? vb[i].stride : 0; - uint32_t old_stride = so->vb[start_slot + i].stride; - if ((new_enabled != old_enabled) || (new_stride != old_stride)) { + bool old_enabled = so->vb[i].buffer.resource != NULL; + if (new_enabled != old_enabled) { fd_context_dirty(ctx, FD_DIRTY_VTXSTATE); break; } } } - util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, + util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, count, unbind_num_trailing_slots, take_ownership); so->count = util_last_bit(so->enabled_mask); @@ -497,7 +516,7 @@ fd_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot, */ if (vb[i].buffer.resource && unlikely(vb[i].buffer_offset >= vb[i].buffer.resource->width0)) { - so->vb[start_slot + i].buffer_offset = 0; + so->vb[i].buffer_offset = 0; } } } @@ -595,6 +614,8 @@ fd_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, memcpy(so->pipe, elements, sizeof(*elements) * num_elements); so->num_elements = num_elements; + for (unsigned i = 0; i < num_elements; i++) + so->strides[elements[i].vertex_buffer_index] = elements[i].src_stride; return so; } @@ -675,14 +696,18 @@ fd_set_stream_output_targets(struct pipe_context *pctx, unsigned num_targets, } for (i = 0; i < num_targets; i++) { - boolean changed = targets[i] != so->targets[i]; - boolean reset = (offsets[i] != (unsigned)-1); + bool changed = targets[i] != so->targets[i]; + bool reset = (offsets[i] != (unsigned)-1); so->reset |= (reset << i); if (targets[i]) { fd_resource_set_usage(targets[i]->buffer, FD_DIRTY_STREAMOUT); fd_dirty_resource(ctx, targets[i]->buffer, FD_DIRTY_STREAMOUT, true); + + struct fd_stream_output_target *target = fd_stream_output_target(targets[i]); + fd_resource_set_usage(target->offset_buf, FD_DIRTY_STREAMOUT); + fd_dirty_resource(ctx, target->offset_buf, FD_DIRTY_STREAMOUT, true); } if (!changed && !reset) @@ -805,6 +830,7 @@ fd_state_init(struct pipe_context *pctx) pctx->set_shader_buffers = fd_set_shader_buffers; pctx->set_shader_images = fd_set_shader_images; pctx->set_framebuffer_state = fd_set_framebuffer_state; + pctx->set_sample_locations = fd_set_sample_locations; pctx->set_polygon_stipple = fd_set_polygon_stipple; pctx->set_scissor_states = fd_set_scissor_states; pctx->set_viewport_states = fd_set_viewport_states; diff --git a/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h b/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h index 5e3fdbe4d..8a64622d1 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h +++ b/lib/mesa/src/gallium/drivers/freedreno/freedreno_util.h @@ -507,6 +507,13 @@ fd4_size2indextype(unsigned index_size) return INDEX4_SIZE_32_BIT; } +/* Convert 19.2MHz RBBM always-on timer ticks to ns */ +static inline uint64_t +ticks_to_ns(uint64_t ts) +{ + return ts * (1000000000 / 19200000); +} + #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 263762efd..497a44b14 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -36,7 +36,6 @@ #include "nir/tgsi_to_nir.h" #include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" #include "ir3/instr-a3xx.h" |