diff options
Diffstat (limited to 'lib/libdrm/intel/intel_bufmgr_gem.c')
-rw-r--r-- | lib/libdrm/intel/intel_bufmgr_gem.c | 123 |
1 files changed, 110 insertions, 13 deletions
diff --git a/lib/libdrm/intel/intel_bufmgr_gem.c b/lib/libdrm/intel/intel_bufmgr_gem.c index f5dd76303..f6f879686 100644 --- a/lib/libdrm/intel/intel_bufmgr_gem.c +++ b/lib/libdrm/intel/intel_bufmgr_gem.c @@ -151,6 +151,7 @@ typedef struct _drm_intel_bufmgr_gem { unsigned int bo_reuse : 1; unsigned int no_exec : 1; unsigned int has_vebox : 1; + unsigned int has_exec_async : 1; bool fenced_relocs; struct { @@ -197,6 +198,8 @@ struct _drm_intel_bo_gem { uint32_t swizzle_mode; unsigned long stride; + unsigned long kflags; + time_t free_time; /** Array passed to the DRM containing relocation information. */ @@ -258,7 +261,7 @@ struct _drm_intel_bo_gem { * Boolean of whether the GPU is definitely not accessing the buffer. * * This is only valid when reusable, since non-reusable - * buffers are those that have been shared wth other + * buffers are those that have been shared with other * processes, so we don't know their state. */ bool idle; @@ -296,7 +299,7 @@ struct _drm_intel_bo_gem { */ int reloc_tree_fences; - /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ + /** Flags that we may need to do the SW_FINISH ioctl on unmap. */ bool mapped_cpu_write; }; @@ -579,12 +582,11 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; bufmgr_gem->exec2_objects[index].alignment = bo->align; - bufmgr_gem->exec2_objects[index].offset = bo_gem->is_softpin ? - bo->offset64 : 0; - bufmgr_gem->exec_bos[index] = bo; - bufmgr_gem->exec2_objects[index].flags = flags; + bufmgr_gem->exec2_objects[index].offset = bo->offset64; + bufmgr_gem->exec2_objects[index].flags = flags | bo_gem->kflags; bufmgr_gem->exec2_objects[index].rsvd1 = 0; bufmgr_gem->exec2_objects[index].rsvd2 = 0; + bufmgr_gem->exec_bos[index] = bo; bufmgr_gem->exec_count++; } @@ -1372,6 +1374,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) for (i = 0; i < bo_gem->softpin_target_count; i++) drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], time); + bo_gem->kflags = 0; bo_gem->reloc_count = 0; bo_gem->used_as_reloc_target = false; bo_gem->softpin_target_count = 0; @@ -1412,6 +1415,8 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) bo_gem->name = NULL; bo_gem->validate_index = -1; + bo_gem->kflags = 0; + DRMLISTADDTAIL(&bo_gem->head, &bucket->head); } else { drm_intel_gem_bo_free(bo); @@ -1723,7 +1728,7 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) } /* We need to unmap after every innovation as we cannot track - * an open vma for every bo as that will exhaasut the system + * an open vma for every bo as that will exhaust the system * limits and cause later failures. */ if (--bo_gem->map_count == 0) { @@ -2385,6 +2390,7 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, static int do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + int in_fence, int *out_fence, unsigned int flags) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; @@ -2441,12 +2447,20 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, else i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); execbuf.rsvd2 = 0; + if (in_fence != -1) { + execbuf.rsvd2 = in_fence; + execbuf.flags |= I915_EXEC_FENCE_IN; + } + if (out_fence != NULL) { + *out_fence = -1; + execbuf.flags |= I915_EXEC_FENCE_OUT; + } if (bufmgr_gem->no_exec) goto skip_execution; ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_EXECBUFFER2, + DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, &execbuf); if (ret != 0) { ret = -errno; @@ -2462,6 +2476,9 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, } drm_intel_update_buffer_offsets2(bufmgr_gem); + if (ret == 0 && out_fence != NULL) + *out_fence = execbuf.rsvd2 >> 32; + skip_execution: if (bufmgr_gem->bufmgr.debug) drm_intel_gem_dump_validation_list(bufmgr_gem); @@ -2487,7 +2504,7 @@ drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, int DR4) { return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, - I915_EXEC_RENDER); + -1, NULL, I915_EXEC_RENDER); } static int @@ -2496,14 +2513,25 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, unsigned int flags) { return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, - flags); + -1, NULL, flags); } int drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, int used, unsigned int flags) { - return do_exec2(bo, used, ctx, NULL, 0, 0, flags); + return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags); +} + +int +drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, + drm_intel_context *ctx, + int used, + int in_fence, + int *out_fence, + unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags); } static int @@ -2750,11 +2778,12 @@ drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) pthread_mutex_lock(&bufmgr_gem->lock); if (!bo_gem->global_name) { + bo_gem->global_name = flink.name; + bo_gem->reusable = false; + HASH_ADD(name_hh, bufmgr_gem->name_table, global_name, sizeof(bo_gem->global_name), bo_gem); - bo_gem->global_name = flink.name; - bo_gem->reusable = false; } pthread_mutex_unlock(&bufmgr_gem->lock); } @@ -2779,6 +2808,59 @@ drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) } /** + * Disables implicit synchronisation before executing the bo + * + * This will cause rendering corruption unless you correctly manage explicit + * fences for all rendering involving this buffer - including use by others. + * Disabling the implicit serialisation is only required if that serialisation + * is too coarse (for example, you have split the buffer into many + * non-overlapping regions and are sharing the whole buffer between concurrent + * independent command streams). + * + * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC, + * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync, + * or subsequent execbufs involving the bo will generate EINVAL. + */ +void +drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags |= EXEC_OBJECT_ASYNC; +} + +/** + * Enables implicit synchronisation before executing the bo + * + * This is the default behaviour of the kernel, to wait upon prior writes + * completing on the object before rendering with it, or to wait for prior + * reads to complete before writing into the object. + * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling + * the kernel never to insert a stall before using the object. Then this + * function can be used to restore the implicit sync before subsequent + * rendering. + */ +void +drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags &= ~EXEC_OBJECT_ASYNC; +} + +/** + * Query whether the kernel supports disabling of its implicit synchronisation + * before execbuf. See drm_intel_gem_bo_disable_implicit_sync() + */ +int +drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + + return bufmgr_gem->has_exec_async; +} + +/** * Enable use of fenced reloc type. * * New code should enable this to avoid unnecessary fence register @@ -3198,6 +3280,17 @@ drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) return context; } +int +drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id) +{ + if (ctx == NULL) + return -EINVAL; + + *ctx_id = ctx->ctx_id; + + return 0; +} + void drm_intel_gem_context_destroy(drm_intel_context *ctx) { @@ -3641,6 +3734,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); bufmgr_gem->has_relaxed_fencing = ret == 0; + gp.param = I915_PARAM_HAS_EXEC_ASYNC; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_exec_async = ret == 0; + bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; |