diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-01-29 11:08:07 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-01-29 11:08:07 +0000 |
commit | 6b139c2063623e9310025247cd966490b9aa57ea (patch) | |
tree | 375acfd898ca3d721250aa17291bbb90a8d7250a /lib/mesa/src/gallium/drivers/vc4 | |
parent | cce99579dcfb1d54c54cff65573be3430e77f2c5 (diff) |
Import Mesa 18.3.2
Diffstat (limited to 'lib/mesa/src/gallium/drivers/vc4')
-rw-r--r-- | lib/mesa/src/gallium/drivers/vc4/Automake.inc | 9 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c | 23 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/vc4/meson.build | 120 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/vc4/vc4_fence.c | 75 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c | 1 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/vc4/vc4_query.c | 228 |
6 files changed, 427 insertions, 29 deletions
diff --git a/lib/mesa/src/gallium/drivers/vc4/Automake.inc b/lib/mesa/src/gallium/drivers/vc4/Automake.inc index b1aa9726b..650466e49 100644 --- a/lib/mesa/src/gallium/drivers/vc4/Automake.inc +++ b/lib/mesa/src/gallium/drivers/vc4/Automake.inc @@ -2,9 +2,18 @@ if HAVE_GALLIUM_VC4 TARGET_DRIVERS += vc4 TARGET_CPPFLAGS += -DGALLIUM_VC4 + +if !HAVE_GALLIUM_V3D +TARGET_LIB_DEPS += \ + $(top_builddir)/src/broadcom/libbroadcom.la \ + $(top_builddir)/src/broadcom/libbroadcom_v33.la \ + $(top_builddir)/src/broadcom/libbroadcom_v41.la +endif + TARGET_LIB_DEPS += \ $(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \ $(top_builddir)/src/gallium/drivers/vc4/libvc4.la \ $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la + endif diff --git a/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c index 2da797899..f03d0bf0d 100644 --- a/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c +++ b/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c @@ -141,6 +141,12 @@ static void emit_tile(struct vc4_exec_info *exec, } if (setup->zs_read) { + if (setup->color_read) { + /* Exec previous load. */ + vc4_tile_coordinates(setup, x, y); + vc4_store_before_load(setup); + } + if (args->zs_read.flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER); @@ -149,12 +155,6 @@ static void emit_tile(struct vc4_exec_info *exec, &args->zs_read, x, y) | VC4_LOADSTORE_FULL_RES_DISABLE_COLOR); } else { - if (setup->color_read) { - /* Exec previous load. */ - vc4_tile_coordinates(setup, x, y); - vc4_store_before_load(setup); - } - rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); rcl_u16(setup, args->zs_read.bits); rcl_u32(setup, setup->zs_read->paddr + @@ -285,16 +285,15 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, } } if (setup->zs_read) { + if (setup->color_read) { + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + } + if (args->zs_read.flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; } else { - if (setup->color_read && - !(args->color_read.flags & - VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) { - loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; - loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; - } loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; } } diff --git a/lib/mesa/src/gallium/drivers/vc4/meson.build b/lib/mesa/src/gallium/drivers/vc4/meson.build new file mode 100644 index 000000000..50adcc25f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc4/meson.build @@ -0,0 +1,120 @@ +# Copyright © 2017 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +files_libvc4 = files( + 'kernel/vc4_drv.h', + 'kernel/vc4_gem.c', + 'kernel/vc4_packet.h', + 'kernel/vc4_render_cl.c', + 'kernel/vc4_validate.c', + 'kernel/vc4_validate_shaders.c', + 'vc4_blit.c', + 'vc4_bufmgr.c', + 'vc4_bufmgr.h', + 'vc4_cl.c', + 'vc4_cl_dump.c', + 'vc4_cl_dump.h', + 'vc4_cl.h', + 'vc4_context.c', + 'vc4_context.h', + 'vc4_draw.c', + 'vc4_emit.c', + 'vc4_fence.c', + 'vc4_formats.c', + 'vc4_job.c', + 'vc4_nir_lower_blend.c', + 'vc4_nir_lower_io.c', + 'vc4_nir_lower_txf_ms.c', + 'vc4_opt_algebraic.c', + 'vc4_opt_constant_folding.c', + 'vc4_opt_copy_propagation.c', + 'vc4_opt_dead_code.c', + 'vc4_opt_peephole_sf.c', + 'vc4_opt_small_immediates.c', + 'vc4_opt_vpm.c', + 'vc4_opt_coalesce_ff_writes.c', + 'vc4_program.c', + 'vc4_qir.c', + 'vc4_qir_emit_uniform_stream_resets.c', + 'vc4_qir_live_variables.c', + 'vc4_qir_lower_uniforms.c', + 'vc4_qir_schedule.c', + 'vc4_qir_validate.c', + 'vc4_qir.h', + 'vc4_qpu.c', + 'vc4_qpu_defines.h', + 'vc4_qpu_disasm.c', + 'vc4_qpu_emit.c', + 'vc4_qpu.h', + 'vc4_qpu_schedule.c', + 'vc4_qpu_validate.c', + 'vc4_query.c', + 'vc4_register_allocate.c', + 'vc4_reorder_uniforms.c', + 'vc4_resource.c', + 'vc4_resource.h', + 'vc4_screen.c', + 'vc4_screen.h', + 'vc4_simulator.c', + 'vc4_simulator_validate.h', + 'vc4_state.c', + 'vc4_tiling.c', + 'vc4_tiling_lt.c', + 'vc4_tiling.h', + 'vc4_uniforms.c', +) + +libvc4_neon = [] +if with_asm_arch == 'arm' + libvc4_neon = static_library( + 'vc4_neon', + 'vc4_tiling_lt_neon.c', + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom + ], + c_args : '-mfpu=neon', + ) +endif + +simpenrose_c_args = [] +dep_simpenrose = dependency('simpenrose', required : false) +if dep_simpenrose.found() + simpenrose_c_args = '-DUSE_VC4_SIMULATOR' +endif + +libvc4 = static_library( + 'vc4', + [files_libvc4, v3d_xml_pack], + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, + inc_gallium_drivers, inc_drm_uapi, + ], + link_with: libvc4_neon, + c_args : [c_vis_args, simpenrose_c_args], + cpp_args : [cpp_vis_args], + dependencies : [dep_simpenrose, dep_libdrm, dep_valgrind, idep_nir_headers], + build_by_default : false, +) + +driver_vc4 = declare_dependency( + compile_args : '-DGALLIUM_VC4', + link_with : [libvc4, libvc4winsys, libbroadcom_cle, libbroadcom_v3d], + dependencies : idep_nir, +) diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_fence.c b/lib/mesa/src/gallium/drivers/vc4/vc4_fence.c index f61e7c6a5..0dbfbe966 100644 --- a/lib/mesa/src/gallium/drivers/vc4/vc4_fence.c +++ b/lib/mesa/src/gallium/drivers/vc4/vc4_fence.c @@ -34,26 +34,39 @@ * fired off as our fence marker. */ +#include <libsync.h> +#include <fcntl.h> + #include "util/u_inlines.h" #include "vc4_screen.h" +#include "vc4_context.h" #include "vc4_bufmgr.h" struct vc4_fence { struct pipe_reference reference; uint64_t seqno; + int fd; }; +static inline struct vc4_fence * +vc4_fence(struct pipe_fence_handle *pfence) +{ + return (struct vc4_fence *)pfence; +} + static void vc4_fence_reference(struct pipe_screen *pscreen, struct pipe_fence_handle **pp, struct pipe_fence_handle *pf) { struct vc4_fence **p = (struct vc4_fence **)pp; - struct vc4_fence *f = (struct vc4_fence *)pf; + struct vc4_fence *f = vc4_fence(pf); struct vc4_fence *old = *p; if (pipe_reference(&(*p)->reference, &f->reference)) { + if (old->fd >= 0) + close(old->fd); free(old); } *p = f; @@ -66,13 +79,16 @@ vc4_fence_finish(struct pipe_screen *pscreen, uint64_t timeout_ns) { struct vc4_screen *screen = vc4_screen(pscreen); - struct vc4_fence *f = (struct vc4_fence *)pf; + struct vc4_fence *f = vc4_fence(pf); + + if (f->fd >= 0) + return sync_wait(f->fd, timeout_ns / 1000000) == 0; return vc4_wait_seqno(screen, f->seqno, timeout_ns, "fence wait"); } struct vc4_fence * -vc4_fence_create(struct vc4_screen *screen, uint64_t seqno) +vc4_fence_create(struct vc4_screen *screen, uint64_t seqno, int fd) { struct vc4_fence *f = calloc(1, sizeof(*f)); @@ -81,13 +97,64 @@ vc4_fence_create(struct vc4_screen *screen, uint64_t seqno) pipe_reference_init(&f->reference, 1); f->seqno = seqno; + f->fd = fd; return f; } +static void +vc4_fence_create_fd(struct pipe_context *pctx, struct pipe_fence_handle **pf, + int fd, enum pipe_fd_type type) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_fence **fence = (struct vc4_fence **)pf; + + assert(type == PIPE_FD_TYPE_NATIVE_SYNC); + *fence = vc4_fence_create(vc4->screen, vc4->last_emit_seqno, + fcntl(fd, F_DUPFD_CLOEXEC, 3)); +} + +static void +vc4_fence_server_sync(struct pipe_context *pctx, + struct pipe_fence_handle *pfence) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_fence *fence = vc4_fence(pfence); + + if (fence->fd >= 0) + sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd); +} + +static int +vc4_fence_get_fd(struct pipe_screen *screen, struct pipe_fence_handle *pfence) +{ + struct vc4_fence *fence = vc4_fence(pfence); + + return fcntl(fence->fd, F_DUPFD_CLOEXEC, 3); +} + +int +vc4_fence_context_init(struct vc4_context *vc4) +{ + vc4->base.create_fence_fd = vc4_fence_create_fd; + vc4->base.fence_server_sync = vc4_fence_server_sync; + vc4->in_fence_fd = -1; + + /* Since we initialize the in_fence_fd to -1 (no wait necessary), + * we also need to initialize our in_syncobj as signaled. + */ + if (vc4->screen->has_syncobj) { + return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &vc4->in_syncobj); + } else { + return 0; + } +} + void -vc4_fence_init(struct vc4_screen *screen) +vc4_fence_screen_init(struct vc4_screen *screen) { screen->base.fence_reference = vc4_fence_reference; screen->base.fence_finish = vc4_fence_finish; + screen->base.fence_get_fd = vc4_fence_get_fd; } diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c index 108524377..92b9e8918 100644 --- a/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c +++ b/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c @@ -46,7 +46,6 @@ vc4_nir_lower_txf_ms_instr(struct vc4_compile *c, nir_builder *b, nir_tex_instr *txf = nir_tex_instr_create(c->s, 1); txf->op = nir_texop_txf; - txf->texture = txf_ms->texture; txf->texture_index = txf_ms->texture_index; txf->coord_components = txf_ms->coord_components; txf->is_shadow = txf_ms->is_shadow; diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_query.c b/lib/mesa/src/gallium/drivers/vc4/vc4_query.c index ddf8f8fb0..6e4681e93 100644 --- a/lib/mesa/src/gallium/drivers/vc4/vc4_query.c +++ b/lib/mesa/src/gallium/drivers/vc4/vc4_query.c @@ -22,8 +22,9 @@ */ /** - * Stub support for occlusion queries. + * Expose V3D HW perf counters. * + * We also have code to fake support for occlusion queries. * Since we expose support for GL 2.0, we have to expose occlusion queries, * but the spec allows you to expose 0 query counter bits, so we just return 0 * as the result of all our queries. @@ -32,49 +33,252 @@ struct vc4_query { - uint8_t pad; + unsigned num_queries; + struct vc4_hwperfmon *hwperfmon; }; +static const char *v3d_counter_names[] = { + "FEP-valid-primitives-no-rendered-pixels", + "FEP-valid-primitives-rendered-pixels", + "FEP-clipped-quads", + "FEP-valid-quads", + "TLB-quads-not-passing-stencil-test", + "TLB-quads-not-passing-z-and-stencil-test", + "TLB-quads-passing-z-and-stencil-test", + "TLB-quads-with-zero-coverage", + "TLB-quads-with-non-zero-coverage", + "TLB-quads-written-to-color-buffer", + "PTB-primitives-discarded-outside-viewport", + "PTB-primitives-need-clipping", + "PTB-primitives-discared-reversed", + "QPU-total-idle-clk-cycles", + "QPU-total-clk-cycles-vertex-coord-shading", + "QPU-total-clk-cycles-fragment-shading", + "QPU-total-clk-cycles-executing-valid-instr", + "QPU-total-clk-cycles-waiting-TMU", + "QPU-total-clk-cycles-waiting-scoreboard", + "QPU-total-clk-cycles-waiting-varyings", + "QPU-total-instr-cache-hit", + "QPU-total-instr-cache-miss", + "QPU-total-uniform-cache-hit", + "QPU-total-uniform-cache-miss", + "TMU-total-text-quads-processed", + "TMU-total-text-cache-miss", + "VPM-total-clk-cycles-VDW-stalled", + "VPM-total-clk-cycles-VCD-stalled", + "L2C-total-cache-hit", + "L2C-total-cache-miss", +}; + +int vc4_get_driver_query_group_info(struct pipe_screen *pscreen, + unsigned index, + struct pipe_driver_query_group_info *info) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + + if (!screen->has_perfmon_ioctl) + return 0; + + if (!info) + return 1; + + if (index > 0) + return 0; + + info->name = "V3D counters"; + info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS; + info->num_queries = ARRAY_SIZE(v3d_counter_names); + return 1; +} + +int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_info *info) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + + if (!screen->has_perfmon_ioctl) + return 0; + + if (!info) + return ARRAY_SIZE(v3d_counter_names); + + if (index >= ARRAY_SIZE(v3d_counter_names)) + return 0; + + info->group_id = 0; + info->name = v3d_counter_names[index]; + info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; + info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; + return 1; +} + static struct pipe_query * -vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries, + unsigned *query_types) { struct vc4_query *query = calloc(1, sizeof(*query)); + struct vc4_hwperfmon *hwperfmon; + unsigned i, nhwqueries = 0; + + if (!query) + return NULL; + + for (i = 0; i < num_queries; i++) { + if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC) + nhwqueries++; + } + + /* We can't mix HW and non-HW queries. */ + if (nhwqueries && nhwqueries != num_queries) + return NULL; + + if (!nhwqueries) + return (struct pipe_query *)query; + + hwperfmon = calloc(1, sizeof(*hwperfmon)); + if (!hwperfmon) + goto err_free_query; + + for (i = 0; i < num_queries; i++) + hwperfmon->events[i] = query_types[i] - + PIPE_QUERY_DRIVER_SPECIFIC; + + query->hwperfmon = hwperfmon; + query->num_queries = num_queries; /* Note that struct pipe_query isn't actually defined anywhere. */ return (struct pipe_query *)query; + +err_free_query: + free(query); + + return NULL; +} + +static struct pipe_query * +vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + return vc4_create_batch_query(ctx, 1, &query_type); } static void -vc4_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_context *ctx = vc4_context(pctx); + struct vc4_query *query = (struct vc4_query *)pquery; + + if (query->hwperfmon && query->hwperfmon->id) { + if (query->hwperfmon->id) { + struct drm_vc4_perfmon_destroy req = { }; + + req.id = query->hwperfmon->id; + vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, + &req); + } + + free(query->hwperfmon); + } + free(query); } static boolean -vc4_begin_query(struct pipe_context *ctx, struct pipe_query *query) +vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_query *query = (struct vc4_query *)pquery; + struct vc4_context *ctx = vc4_context(pctx); + struct drm_vc4_perfmon_create req = { }; + unsigned i; + int ret; + + if (!query->hwperfmon) + return true; + + /* Only one perfmon can be activated per context. */ + if (ctx->perfmon) + return false; + + /* Reset the counters by destroying the previously allocated perfmon */ + if (query->hwperfmon->id) { + struct drm_vc4_perfmon_destroy destroyreq = { }; + + destroyreq.id = query->hwperfmon->id; + vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq); + } + + for (i = 0; i < query->num_queries; i++) + req.events[i] = query->hwperfmon->events[i]; + + req.ncounters = query->num_queries; + ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req); + if (ret) + return false; + + query->hwperfmon->id = req.id; + + /* Make sure all pendings jobs are flushed before activating the + * perfmon. + */ + vc4_flush(pctx); + ctx->perfmon = query->hwperfmon; return true; } static bool -vc4_end_query(struct pipe_context *ctx, struct pipe_query *query) +vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_query *query = (struct vc4_query *)pquery; + struct vc4_context *ctx = vc4_context(pctx); + + if (!query->hwperfmon) + return true; + + if (ctx->perfmon != query->hwperfmon) + return false; + + /* Make sure all pendings jobs are flushed before deactivating the + * perfmon. + */ + vc4_flush(pctx); + ctx->perfmon = NULL; return true; } static boolean -vc4_get_query_result(struct pipe_context *ctx, struct pipe_query *query, +vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery, boolean wait, union pipe_query_result *vresult) { - uint64_t *result = &vresult->u64; + struct vc4_context *ctx = vc4_context(pctx); + struct vc4_query *query = (struct vc4_query *)pquery; + struct drm_vc4_perfmon_get_values req; + unsigned i; + int ret; + + if (!query->hwperfmon) { + vresult->u64 = 0; + return true; + } - *result = 0; + if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno, + wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon")) + return false; + + req.id = query->hwperfmon->id; + req.values_ptr = (uintptr_t)query->hwperfmon->counters; + ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req); + if (ret) + return false; + + for (i = 0; i < query->num_queries; i++) + vresult->batch[i].u64 = query->hwperfmon->counters[i]; return true; } static void -vc4_set_active_query_state(struct pipe_context *pipe, boolean enable) +vc4_set_active_query_state(struct pipe_context *pctx, boolean enable) { } @@ -82,10 +286,10 @@ void vc4_query_init(struct pipe_context *pctx) { pctx->create_query = vc4_create_query; + pctx->create_batch_query = vc4_create_batch_query; pctx->destroy_query = vc4_destroy_query; pctx->begin_query = vc4_begin_query; pctx->end_query = vc4_end_query; pctx->get_query_result = vc4_get_query_result; - pctx->set_active_query_state = vc4_set_active_query_state; + pctx->set_active_query_state = vc4_set_active_query_state; } - |