diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-05-23 04:49:26 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-05-23 04:49:26 +0000 |
commit | ce2b5bb66fb2f0ac7b00c4ad7ab90a8f970d2c41 (patch) | |
tree | 92d8666094480db500295a64fa10faf6d8fdd5d9 | |
parent | d24db92d1a7025e7ec2794df971fdef9e4f75fdf (diff) |
Import Mesa 19.0.5
-rw-r--r-- | lib/mesa/src/freedreno/drm/msm_bo.c | 250 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/drm/msm_device.c | 77 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/drm/msm_pipe.c | 358 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/drm/msm_priv.h | 168 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/drm/msm_ringbuffer.c | 978 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c | 1080 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c | 155 |
7 files changed, 1363 insertions, 1703 deletions
diff --git a/lib/mesa/src/freedreno/drm/msm_bo.c b/lib/mesa/src/freedreno/drm/msm_bo.c index 5963cac86..da2609903 100644 --- a/lib/mesa/src/freedreno/drm/msm_bo.c +++ b/lib/mesa/src/freedreno/drm/msm_bo.c @@ -26,186 +26,172 @@ #include "msm_priv.h" -static int -bo_allocate(struct msm_bo *msm_bo) +static int bo_allocate(struct msm_bo *msm_bo) { - struct fd_bo *bo = &msm_bo->base; - if (!msm_bo->offset) { - struct drm_msm_gem_info req = { - .handle = bo->handle, - .info = MSM_INFO_GET_OFFSET, - }; - int ret; - - /* if the buffer is already backed by pages then this - * doesn't actually do anything (other than giving us - * the offset) - */ - ret = - drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); - if (ret) { - ERROR_MSG("alloc failed: %s", strerror(errno)); - return ret; - } - - msm_bo->offset = req.value; - } - - return 0; + struct fd_bo *bo = &msm_bo->base; + if (!msm_bo->offset) { + struct drm_msm_gem_info req = { + .handle = bo->handle, + .info = MSM_INFO_GET_OFFSET, + }; + int ret; + + /* if the buffer is already backed by pages then this + * doesn't actually do anything (other than giving us + * the offset) + */ + ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, + &req, sizeof(req)); + if (ret) { + ERROR_MSG("alloc failed: %s", strerror(errno)); + return ret; + } + + msm_bo->offset = req.value; + } + + return 0; } -static int -msm_bo_offset(struct fd_bo *bo, uint64_t *offset) +static int msm_bo_offset(struct fd_bo *bo, uint64_t *offset) { - struct msm_bo *msm_bo = to_msm_bo(bo); - int ret = bo_allocate(msm_bo); - if (ret) - return ret; - *offset = msm_bo->offset; - return 0; + struct msm_bo *msm_bo = to_msm_bo(bo); + int ret = bo_allocate(msm_bo); + if (ret) + return ret; + *offset = msm_bo->offset; + return 0; } -static int -msm_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) +static int msm_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) { - struct drm_msm_gem_cpu_prep req = { - .handle = bo->handle, - .op = op, - }; + struct drm_msm_gem_cpu_prep req = { + .handle = bo->handle, + .op = op, + }; - get_abs_timeout(&req.timeout, 5000000000); + get_abs_timeout(&req.timeout, 5000000000); - return drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_PREP, &req, sizeof(req)); + return drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_PREP, &req, sizeof(req)); } -static void -msm_bo_cpu_fini(struct fd_bo *bo) +static void msm_bo_cpu_fini(struct fd_bo *bo) { - struct drm_msm_gem_cpu_fini req = { - .handle = bo->handle, - }; + struct drm_msm_gem_cpu_fini req = { + .handle = bo->handle, + }; - drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_FINI, &req, sizeof(req)); + drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_FINI, &req, sizeof(req)); } -static int -msm_bo_madvise(struct fd_bo *bo, int willneed) +static int msm_bo_madvise(struct fd_bo *bo, int willneed) { - struct drm_msm_gem_madvise req = { - .handle = bo->handle, - .madv = willneed ? MSM_MADV_WILLNEED : MSM_MADV_DONTNEED, - }; - int ret; - - /* older kernels do not support this: */ - if (bo->dev->version < FD_VERSION_MADVISE) - return willneed; - - ret = - drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_MADVISE, &req, sizeof(req)); - if (ret) - return ret; - - return req.retained; + struct drm_msm_gem_madvise req = { + .handle = bo->handle, + .madv = willneed ? MSM_MADV_WILLNEED : MSM_MADV_DONTNEED, + }; + int ret; + + /* older kernels do not support this: */ + if (bo->dev->version < FD_VERSION_MADVISE) + return willneed; + + ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_MADVISE, &req, sizeof(req)); + if (ret) + return ret; + + return req.retained; } -static uint64_t -msm_bo_iova(struct fd_bo *bo) +static uint64_t msm_bo_iova(struct fd_bo *bo) { - struct drm_msm_gem_info req = { - .handle = bo->handle, - .info = MSM_INFO_GET_IOVA, - }; - int ret; + struct drm_msm_gem_info req = { + .handle = bo->handle, + .info = MSM_INFO_GET_IOVA, + }; + int ret; - ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); - if (ret) - return 0; + ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); + debug_assert(ret == 0); - return req.value; + return req.value; } -static void -msm_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap) +static void msm_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap) { - struct drm_msm_gem_info req = { - .handle = bo->handle, - .info = MSM_INFO_SET_NAME, - }; - char buf[32]; - int sz; + struct drm_msm_gem_info req = { + .handle = bo->handle, + .info = MSM_INFO_SET_NAME, + }; + char buf[32]; + int sz; - if (bo->dev->version < FD_VERSION_SOFTPIN) - return; + if (bo->dev->version < FD_VERSION_SOFTPIN) + return; - sz = vsnprintf(buf, sizeof(buf), fmt, ap); + sz = vsnprintf(buf, sizeof(buf), fmt, ap); - req.value = VOID2U64(buf); - req.len = MIN2(sz, sizeof(buf)); + req.value = VOID2U64(buf); + req.len = MIN2(sz, sizeof(buf)); - drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); + drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); } -static void -msm_bo_destroy(struct fd_bo *bo) +static void msm_bo_destroy(struct fd_bo *bo) { - struct msm_bo *msm_bo = to_msm_bo(bo); - free(msm_bo); + struct msm_bo *msm_bo = to_msm_bo(bo); + free(msm_bo); } static const struct fd_bo_funcs funcs = { - .offset = msm_bo_offset, - .cpu_prep = msm_bo_cpu_prep, - .cpu_fini = msm_bo_cpu_fini, - .madvise = msm_bo_madvise, - .iova = msm_bo_iova, - .set_name = msm_bo_set_name, - .destroy = msm_bo_destroy, + .offset = msm_bo_offset, + .cpu_prep = msm_bo_cpu_prep, + .cpu_fini = msm_bo_cpu_fini, + .madvise = msm_bo_madvise, + .iova = msm_bo_iova, + .set_name = msm_bo_set_name, + .destroy = msm_bo_destroy, }; /* allocate a buffer handle: */ -int -msm_bo_new_handle(struct fd_device *dev, uint32_t size, uint32_t flags, - uint32_t *handle) +int msm_bo_new_handle(struct fd_device *dev, + uint32_t size, uint32_t flags, uint32_t *handle) { - struct drm_msm_gem_new req = { - .size = size, - }; - int ret; - - if (flags & FD_BO_SCANOUT) - req.flags |= MSM_BO_SCANOUT; + struct drm_msm_gem_new req = { + .size = size, + .flags = MSM_BO_WC, // TODO figure out proper flags.. + }; + int ret; - if (flags & FD_BO_GPUREADONLY) - req.flags |= MSM_BO_GPU_READONLY; + if (flags & DRM_FREEDRENO_GEM_SCANOUT) + req.flags |= MSM_BO_SCANOUT; - if (flags & FD_BO_CACHED_COHERENT) - req.flags |= MSM_BO_CACHED_COHERENT; - else - req.flags |= MSM_BO_WC; + if (flags & DRM_FREEDRENO_GEM_GPUREADONLY) + req.flags |= MSM_BO_GPU_READONLY; - ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req)); - if (ret) - return ret; + ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, + &req, sizeof(req)); + if (ret) + return ret; - *handle = req.handle; + *handle = req.handle; - return 0; + return 0; } /* allocate a new buffer object */ -struct fd_bo * -msm_bo_from_handle(struct fd_device *dev, uint32_t size, uint32_t handle) +struct fd_bo * msm_bo_from_handle(struct fd_device *dev, + uint32_t size, uint32_t handle) { - struct msm_bo *msm_bo; - struct fd_bo *bo; + struct msm_bo *msm_bo; + struct fd_bo *bo; - msm_bo = calloc(1, sizeof(*msm_bo)); - if (!msm_bo) - return NULL; + msm_bo = calloc(1, sizeof(*msm_bo)); + if (!msm_bo) + return NULL; - bo = &msm_bo->base; - bo->funcs = &funcs; + bo = &msm_bo->base; + bo->funcs = &funcs; - return bo; + return bo; } diff --git a/lib/mesa/src/freedreno/drm/msm_device.c b/lib/mesa/src/freedreno/drm/msm_device.c index a221fdc4c..d391ef013 100644 --- a/lib/mesa/src/freedreno/drm/msm_device.c +++ b/lib/mesa/src/freedreno/drm/msm_device.c @@ -24,77 +24,38 @@ * Rob Clark <robclark@freedesktop.org> */ -#include <unistd.h> -#include <sys/stat.h> #include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> #include "msm_priv.h" -static void -msm_device_destroy(struct fd_device *dev) +static void msm_device_destroy(struct fd_device *dev) { - struct msm_device *msm_dev = to_msm_device(dev); - if (util_queue_is_initialized(&msm_dev->submit_queue)) { - util_queue_destroy(&msm_dev->submit_queue); - } - free(msm_dev); + struct msm_device *msm_dev = to_msm_device(dev); + free(msm_dev); } static const struct fd_device_funcs funcs = { - .bo_new_handle = msm_bo_new_handle, - .bo_from_handle = msm_bo_from_handle, - .pipe_new = msm_pipe_new, - .destroy = msm_device_destroy, + .bo_new_handle = msm_bo_new_handle, + .bo_from_handle = msm_bo_from_handle, + .pipe_new = msm_pipe_new, + .destroy = msm_device_destroy, }; -struct fd_device * -msm_device_new(int fd, drmVersionPtr version) +struct fd_device * msm_device_new(int fd) { - struct msm_device *msm_dev; - struct fd_device *dev; - - STATIC_ASSERT(FD_BO_PREP_READ == MSM_PREP_READ); - STATIC_ASSERT(FD_BO_PREP_WRITE == MSM_PREP_WRITE); - STATIC_ASSERT(FD_BO_PREP_NOSYNC == MSM_PREP_NOSYNC); - - msm_dev = calloc(1, sizeof(*msm_dev)); - if (!msm_dev) - return NULL; - - dev = &msm_dev->base; - dev->funcs = &funcs; - - /* async submit_queue currently only used for msm_submit_sp: */ - if (version->version_minor >= FD_VERSION_SOFTPIN) { - /* Note the name is intentionally short to avoid the queue - * thread's comm truncating the interesting part of the - * process name. - */ - util_queue_init(&msm_dev->submit_queue, "sq", 8, 1, 0, NULL); - } - - if (version->version_minor >= FD_VERSION_CACHED_COHERENT) { - struct drm_msm_gem_new new_req = { - .size = 0x1000, - .flags = MSM_BO_CACHED_COHERENT, - }; + struct msm_device *msm_dev; + struct fd_device *dev; - /* The kernel is new enough to support MSM_BO_CACHED_COHERENT, - * but that is not a guarantee that the device we are running - * on supports it. So do a test allocation to find out. - */ - if (!drmCommandWriteRead(fd, DRM_MSM_GEM_NEW, - &new_req, sizeof(new_req))) { - struct drm_gem_close close_req = { - .handle = new_req.handle, - }; - drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_req); + msm_dev = calloc(1, sizeof(*msm_dev)); + if (!msm_dev) + return NULL; - dev->has_cached_coherent = true; - } - } + dev = &msm_dev->base; + dev->funcs = &funcs; - dev->bo_size = sizeof(struct msm_bo); + dev->bo_size = sizeof(struct msm_bo); - return dev; + return dev; } diff --git a/lib/mesa/src/freedreno/drm/msm_pipe.c b/lib/mesa/src/freedreno/drm/msm_pipe.c index 0c35063c3..7d5b9fcd7 100644 --- a/lib/mesa/src/freedreno/drm/msm_pipe.c +++ b/lib/mesa/src/freedreno/drm/msm_pipe.c @@ -28,239 +28,193 @@ #include "msm_priv.h" -static int -query_param(struct fd_pipe *pipe, uint32_t param, uint64_t *value) +static int query_param(struct fd_pipe *pipe, uint32_t param, + uint64_t *value) { - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - struct drm_msm_param req = { - .pipe = msm_pipe->pipe, - .param = param, - }; - int ret; + struct msm_pipe *msm_pipe = to_msm_pipe(pipe); + struct drm_msm_param req = { + .pipe = msm_pipe->pipe, + .param = param, + }; + int ret; - ret = - drmCommandWriteRead(pipe->dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req)); - if (ret) - return ret; + ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_GET_PARAM, + &req, sizeof(req)); + if (ret) + return ret; - *value = req.value; + *value = req.value; - return 0; + return 0; } -static int -query_queue_param(struct fd_pipe *pipe, uint32_t param, uint64_t *value) +static int msm_pipe_get_param(struct fd_pipe *pipe, + enum fd_param_id param, uint64_t *value) { - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - struct drm_msm_submitqueue_query req = { - .data = VOID2U64(value), - .id = msm_pipe->queue_id, - .param = param, - .len = sizeof(*value), - }; - int ret; - - ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_QUERY, &req, - sizeof(req)); - if (ret) - return ret; - - return 0; + struct msm_pipe *msm_pipe = to_msm_pipe(pipe); + switch(param) { + case FD_DEVICE_ID: // XXX probably get rid of this.. + case FD_GPU_ID: + *value = msm_pipe->gpu_id; + return 0; + case FD_GMEM_SIZE: + *value = msm_pipe->gmem; + return 0; + case FD_CHIP_ID: + *value = msm_pipe->chip_id; + return 0; + case FD_MAX_FREQ: + return query_param(pipe, MSM_PARAM_MAX_FREQ, value); + case FD_TIMESTAMP: + return query_param(pipe, MSM_PARAM_TIMESTAMP, value); + case FD_NR_RINGS: + return query_param(pipe, MSM_PARAM_NR_RINGS, value); + default: + ERROR_MSG("invalid param id: %d", param); + return -1; + } } -static int -msm_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param, - uint64_t *value) +static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp, + uint64_t timeout) { - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - switch (param) { - case FD_DEVICE_ID: // XXX probably get rid of this.. - case FD_GPU_ID: - *value = msm_pipe->gpu_id; - return 0; - case FD_GMEM_SIZE: - *value = msm_pipe->gmem; - return 0; - case FD_GMEM_BASE: - *value = msm_pipe->gmem_base; - return 0; - case FD_CHIP_ID: - *value = msm_pipe->chip_id; - return 0; - case FD_MAX_FREQ: - return query_param(pipe, MSM_PARAM_MAX_FREQ, value); - case FD_TIMESTAMP: - return query_param(pipe, MSM_PARAM_TIMESTAMP, value); - case FD_NR_RINGS: - return query_param(pipe, MSM_PARAM_NR_RINGS, value); - case FD_PP_PGTABLE: - return query_param(pipe, MSM_PARAM_PP_PGTABLE, value); - case FD_CTX_FAULTS: - return query_queue_param(pipe, MSM_SUBMITQUEUE_PARAM_FAULTS, value); - case FD_GLOBAL_FAULTS: - return query_param(pipe, MSM_PARAM_FAULTS, value); - case FD_SUSPEND_COUNT: - return query_param(pipe, MSM_PARAM_SUSPENDS, value); - default: - ERROR_MSG("invalid param id: %d", param); - return -1; - } + struct fd_device *dev = pipe->dev; + struct drm_msm_wait_fence req = { + .fence = timestamp, + .queueid = to_msm_pipe(pipe)->queue_id, + }; + int ret; + + get_abs_timeout(&req.timeout, timeout); + + ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req)); + if (ret) { + ERROR_MSG("wait-fence failed! %d (%s)", ret, strerror(errno)); + return ret; + } + + return 0; } -static int -msm_pipe_wait(struct fd_pipe *pipe, const struct fd_fence *fence, uint64_t timeout) +static int open_submitqueue(struct fd_pipe *pipe, uint32_t prio) { - struct fd_device *dev = pipe->dev; - struct drm_msm_wait_fence req = { - .fence = fence->kfence, - .queueid = to_msm_pipe(pipe)->queue_id, - }; - int ret; - - get_abs_timeout(&req.timeout, timeout); - - ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req)); - if (ret && (ret != -ETIMEDOUT)) { - ERROR_MSG("wait-fence failed! %d (%s)", ret, strerror(errno)); - } - - return ret; -} - -static int -open_submitqueue(struct fd_pipe *pipe, uint32_t prio) -{ - struct drm_msm_submitqueue req = { - .flags = 0, - .prio = prio, - }; - uint64_t nr_rings = 1; - int ret; - - if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) { - to_msm_pipe(pipe)->queue_id = 0; - return 0; - } - - msm_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings); - - req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1); - - ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_NEW, &req, - sizeof(req)); - if (ret) { - ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno)); - return ret; - } - - to_msm_pipe(pipe)->queue_id = req.id; - return 0; + struct drm_msm_submitqueue req = { + .flags = 0, + .prio = prio, + }; + uint64_t nr_rings = 1; + int ret; + + if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) { + to_msm_pipe(pipe)->queue_id = 0; + return 0; + } + + msm_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings); + + req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1); + + ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_NEW, + &req, sizeof(req)); + if (ret) { + ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno)); + return ret; + } + + to_msm_pipe(pipe)->queue_id = req.id; + return 0; } -static void -close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id) +static void close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id) { - if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) - return; + if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) + return; - drmCommandWrite(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE, &queue_id, - sizeof(queue_id)); + drmCommandWrite(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE, + &queue_id, sizeof(queue_id)); } -static void -msm_pipe_destroy(struct fd_pipe *pipe) +static void msm_pipe_destroy(struct fd_pipe *pipe) { - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - - if (msm_pipe->suballoc_bo) - fd_bo_del_locked(msm_pipe->suballoc_bo); - - close_submitqueue(pipe, msm_pipe->queue_id); - msm_pipe_sp_ringpool_init(msm_pipe); - free(msm_pipe); + struct msm_pipe *msm_pipe = to_msm_pipe(pipe); + close_submitqueue(pipe, msm_pipe->queue_id); + free(msm_pipe); } static const struct fd_pipe_funcs sp_funcs = { - .ringbuffer_new_object = msm_ringbuffer_sp_new_object, - .submit_new = msm_submit_sp_new, - .flush = msm_pipe_sp_flush, - .get_param = msm_pipe_get_param, - .wait = msm_pipe_wait, - .destroy = msm_pipe_destroy, + .ringbuffer_new_object = msm_ringbuffer_sp_new_object, + .submit_new = msm_submit_sp_new, + .get_param = msm_pipe_get_param, + .wait = msm_pipe_wait, + .destroy = msm_pipe_destroy, }; static const struct fd_pipe_funcs legacy_funcs = { - .ringbuffer_new_object = msm_ringbuffer_new_object, - .submit_new = msm_submit_new, - .get_param = msm_pipe_get_param, - .wait = msm_pipe_wait, - .destroy = msm_pipe_destroy, + .ringbuffer_new_object = msm_ringbuffer_new_object, + .submit_new = msm_submit_new, + .get_param = msm_pipe_get_param, + .wait = msm_pipe_wait, + .destroy = msm_pipe_destroy, }; -static uint64_t -get_param(struct fd_pipe *pipe, uint32_t param) +static uint64_t get_param(struct fd_pipe *pipe, uint32_t param) { - uint64_t value; - int ret = query_param(pipe, param, &value); - if (ret) { - ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno)); - return 0; - } - return value; + uint64_t value; + int ret = query_param(pipe, param, &value); + if (ret) { + ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno)); + return 0; + } + return value; } -struct fd_pipe * -msm_pipe_new(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio) +struct fd_pipe * msm_pipe_new(struct fd_device *dev, + enum fd_pipe_id id, uint32_t prio) { - static const uint32_t pipe_id[] = { - [FD_PIPE_3D] = MSM_PIPE_3D0, - [FD_PIPE_2D] = MSM_PIPE_2D0, - }; - struct msm_pipe *msm_pipe = NULL; - struct fd_pipe *pipe = NULL; - - msm_pipe = calloc(1, sizeof(*msm_pipe)); - if (!msm_pipe) { - ERROR_MSG("allocation failed"); - goto fail; - } - - pipe = &msm_pipe->base; - - if (fd_device_version(dev) >= FD_VERSION_SOFTPIN) { - pipe->funcs = &sp_funcs; - } else { - pipe->funcs = &legacy_funcs; - } - - /* initialize before get_param(): */ - pipe->dev = dev; - msm_pipe->pipe = pipe_id[id]; - - /* these params should be supported since the first version of drm/msm: */ - msm_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID); - msm_pipe->gmem = get_param(pipe, MSM_PARAM_GMEM_SIZE); - msm_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID); - - if (fd_device_version(pipe->dev) >= FD_VERSION_GMEM_BASE) - msm_pipe->gmem_base = get_param(pipe, MSM_PARAM_GMEM_BASE); - - if (!(msm_pipe->gpu_id || msm_pipe->chip_id)) - goto fail; - - INFO_MSG("Pipe Info:"); - INFO_MSG(" GPU-id: %d", msm_pipe->gpu_id); - INFO_MSG(" Chip-id: 0x%016"PRIx64, msm_pipe->chip_id); - INFO_MSG(" GMEM size: 0x%08x", msm_pipe->gmem); - - if (open_submitqueue(pipe, prio)) - goto fail; - - msm_pipe_sp_ringpool_init(msm_pipe); - - return pipe; + static const uint32_t pipe_id[] = { + [FD_PIPE_3D] = MSM_PIPE_3D0, + [FD_PIPE_2D] = MSM_PIPE_2D0, + }; + struct msm_pipe *msm_pipe = NULL; + struct fd_pipe *pipe = NULL; + + msm_pipe = calloc(1, sizeof(*msm_pipe)); + if (!msm_pipe) { + ERROR_MSG("allocation failed"); + goto fail; + } + + pipe = &msm_pipe->base; + + if (fd_device_version(dev) >= FD_VERSION_SOFTPIN) { + pipe->funcs = &sp_funcs; + } else { + pipe->funcs = &legacy_funcs; + } + + /* initialize before get_param(): */ + pipe->dev = dev; + msm_pipe->pipe = pipe_id[id]; + + /* these params should be supported since the first version of drm/msm: */ + msm_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID); + msm_pipe->gmem = get_param(pipe, MSM_PARAM_GMEM_SIZE); + msm_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID); + + if (! msm_pipe->gpu_id) + goto fail; + + INFO_MSG("Pipe Info:"); + INFO_MSG(" GPU-id: %d", msm_pipe->gpu_id); + INFO_MSG(" Chip-id: 0x%08x", msm_pipe->chip_id); + INFO_MSG(" GMEM size: 0x%08x", msm_pipe->gmem); + + if (open_submitqueue(pipe, prio)) + goto fail; + + return pipe; fail: - if (pipe) - fd_pipe_del(pipe); - return NULL; + if (pipe) + fd_pipe_del(pipe); + return NULL; } diff --git a/lib/mesa/src/freedreno/drm/msm_priv.h b/lib/mesa/src/freedreno/drm/msm_priv.h index 5299b114f..9cb60bc1d 100644 --- a/lib/mesa/src/freedreno/drm/msm_priv.h +++ b/lib/mesa/src/freedreno/drm/msm_priv.h @@ -29,122 +29,112 @@ #include "freedreno_priv.h" -#include "util/slab.h" -#include "util/timespec.h" - -#include "pipe/p_defines.h" - #ifndef __user -#define __user +# define __user #endif -#include "drm-uapi/msm_drm.h" +#include "msm_drm.h" struct msm_device { - struct fd_device base; - struct util_queue submit_queue; + struct fd_device base; + struct fd_bo_cache ring_cache; }; FD_DEFINE_CAST(fd_device, msm_device); -struct fd_device *msm_device_new(int fd, drmVersionPtr version); +struct fd_device * msm_device_new(int fd); struct msm_pipe { - struct fd_pipe base; - uint32_t pipe; - uint32_t gpu_id; - uint64_t chip_id; - uint64_t gmem_base; - uint32_t gmem; - uint32_t queue_id; - struct slab_parent_pool ring_pool; - - /* BO for suballocating long-lived objects on the pipe. */ - struct fd_bo *suballoc_bo; - uint32_t suballoc_offset; - - /** - * The last fence seqno that was flushed to kernel (doesn't mean that it - * is complete, just that the kernel knows about it) - */ - uint32_t last_submit_fence; - - uint32_t last_enqueue_fence; /* just for debugging */ - - /** - * If we *ever* see an in-fence-fd, assume that userspace is - * not relying on implicit fences. - */ - bool no_implicit_sync; + struct fd_pipe base; + uint32_t pipe; + uint32_t gpu_id; + uint32_t gmem; + uint32_t chip_id; + uint32_t queue_id; }; FD_DEFINE_CAST(fd_pipe, msm_pipe); -struct fd_pipe *msm_pipe_new(struct fd_device *dev, enum fd_pipe_id id, - uint32_t prio); - -struct fd_ringbuffer *msm_ringbuffer_new_object(struct fd_pipe *pipe, - uint32_t size); -struct fd_ringbuffer *msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, - uint32_t size); +struct fd_pipe * msm_pipe_new(struct fd_device *dev, + enum fd_pipe_id id, uint32_t prio); -struct fd_submit *msm_submit_new(struct fd_pipe *pipe); -struct fd_submit *msm_submit_sp_new(struct fd_pipe *pipe); -void msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence); +struct fd_ringbuffer * msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size); +struct fd_ringbuffer * msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size); -void msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe); -void msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe); +struct fd_submit * msm_submit_new(struct fd_pipe *pipe); +struct fd_submit * msm_submit_sp_new(struct fd_pipe *pipe); struct msm_bo { - struct fd_bo base; - uint64_t offset; - uint32_t idx; + struct fd_bo base; + uint64_t offset; + /* to avoid excess hashtable lookups, cache the ring this bo was + * last emitted on (since that will probably also be the next ring + * it is emitted on) + */ + unsigned current_submit_seqno; + uint32_t idx; }; FD_DEFINE_CAST(fd_bo, msm_bo); -int msm_bo_new_handle(struct fd_device *dev, uint32_t size, uint32_t flags, - uint32_t *handle); -struct fd_bo *msm_bo_from_handle(struct fd_device *dev, uint32_t size, - uint32_t handle); +int msm_bo_new_handle(struct fd_device *dev, + uint32_t size, uint32_t flags, uint32_t *handle); +struct fd_bo * msm_bo_from_handle(struct fd_device *dev, + uint32_t size, uint32_t handle); static inline void msm_dump_submit(struct drm_msm_gem_submit *req) { - for (unsigned i = 0; i < req->nr_bos; i++) { - struct drm_msm_gem_submit_bo *bos = U642VOID(req->bos); - struct drm_msm_gem_submit_bo *bo = &bos[i]; - ERROR_MSG(" bos[%d]: handle=%u, flags=%x", i, bo->handle, bo->flags); - } - for (unsigned i = 0; i < req->nr_cmds; i++) { - struct drm_msm_gem_submit_cmd *cmds = U642VOID(req->cmds); - struct drm_msm_gem_submit_cmd *cmd = &cmds[i]; - struct drm_msm_gem_submit_reloc *relocs = U642VOID(cmd->relocs); - ERROR_MSG(" cmd[%d]: type=%u, submit_idx=%u, submit_offset=%u, size=%u", - i, cmd->type, cmd->submit_idx, cmd->submit_offset, cmd->size); - for (unsigned j = 0; j < cmd->nr_relocs; j++) { - struct drm_msm_gem_submit_reloc *r = &relocs[j]; - ERROR_MSG( - " reloc[%d]: submit_offset=%u, or=%08x, shift=%d, reloc_idx=%u" - ", reloc_offset=%" PRIu64, - j, r->submit_offset, r->or, r->shift, r->reloc_idx, - (uint64_t)r->reloc_offset); - } - } + for (unsigned i = 0; i < req->nr_bos; i++) { + struct drm_msm_gem_submit_bo *bos = U642VOID(req->bos); + struct drm_msm_gem_submit_bo *bo = &bos[i]; + ERROR_MSG(" bos[%d]: handle=%u, flags=%x", i, bo->handle, bo->flags); + } + for (unsigned i = 0; i < req->nr_cmds; i++) { + struct drm_msm_gem_submit_cmd *cmds = U642VOID(req->cmds); + struct drm_msm_gem_submit_cmd *cmd = &cmds[i]; + struct drm_msm_gem_submit_reloc *relocs = U642VOID(cmd->relocs); + ERROR_MSG(" cmd[%d]: type=%u, submit_idx=%u, submit_offset=%u, size=%u", + i, cmd->type, cmd->submit_idx, cmd->submit_offset, cmd->size); + for (unsigned j = 0; j < cmd->nr_relocs; j++) { + struct drm_msm_gem_submit_reloc *r = &relocs[j]; + ERROR_MSG(" reloc[%d]: submit_offset=%u, or=%08x, shift=%d, reloc_idx=%u" + ", reloc_offset=%"PRIu64, j, r->submit_offset, r->or, r->shift, + r->reloc_idx, r->reloc_offset); + } + } } -static inline void -get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) +static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) +{ + struct timespec t; + uint32_t s = ns / 1000000000; + clock_gettime(CLOCK_MONOTONIC, &t); + tv->tv_sec = t.tv_sec + s; + tv->tv_nsec = t.tv_nsec + ns - (s * 1000000000); +} + +/* + * Stupid/simple growable array implementation: + */ + +static inline void * +grow(void *ptr, uint16_t nr, uint16_t *max, uint16_t sz) { - struct timespec t; - - if (ns == PIPE_TIMEOUT_INFINITE) - ns = 3600ULL * NSEC_PER_SEC; /* 1 hour timeout is almost infinite */ - - clock_gettime(CLOCK_MONOTONIC, &t); - tv->tv_sec = t.tv_sec + ns / NSEC_PER_SEC; - tv->tv_nsec = t.tv_nsec + ns % NSEC_PER_SEC; - if (tv->tv_nsec >= NSEC_PER_SEC) { /* handle nsec overflow */ - tv->tv_nsec -= NSEC_PER_SEC; - tv->tv_sec++; - } + if ((nr + 1) > *max) { + if ((*max * 2) < (nr + 1)) + *max = nr + 5; + else + *max = *max * 2; + ptr = realloc(ptr, *max * sz); + } + return ptr; } +#define DECLARE_ARRAY(type, name) \ + unsigned short nr_ ## name, max_ ## name; \ + type * name; + +#define APPEND(x, name) ({ \ + (x)->name = grow((x)->name, (x)->nr_ ## name, &(x)->max_ ## name, sizeof((x)->name[0])); \ + (x)->nr_ ## name ++; \ +}) + #endif /* MSM_PRIV_H_ */ diff --git a/lib/mesa/src/freedreno/drm/msm_ringbuffer.c b/lib/mesa/src/freedreno/drm/msm_ringbuffer.c index 43bd83937..369f26f98 100644 --- a/lib/mesa/src/freedreno/drm/msm_ringbuffer.c +++ b/lib/mesa/src/freedreno/drm/msm_ringbuffer.c @@ -38,29 +38,37 @@ * traditional reloc and cmd tracking */ + #define INIT_SIZE 0x1000 +static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; + + struct msm_submit { - struct fd_submit base; + struct fd_submit base; + + DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); + DECLARE_ARRAY(struct fd_bo *, bos); + + unsigned seqno; - DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); - DECLARE_ARRAY(struct fd_bo *, bos); + /* maps fd_bo to idx in bos table: */ + struct hash_table *bo_table; - /* maps fd_bo to idx in bos table: */ - struct hash_table *bo_table; + struct slab_mempool ring_pool; - struct slab_mempool ring_pool; + /* hash-set of associated rings: */ + struct set *ring_set; - /* hash-set of associated rings: */ - struct set *ring_set; + struct fd_ringbuffer *primary; - /* Allow for sub-allocation of stateobj ring buffers (ie. sharing - * the same underlying bo).. - * - * We also rely on previous stateobj having been fully constructed - * so we can reclaim extra space at it's end. - */ - struct fd_ringbuffer *suballoc_ring; + /* Allow for sub-allocation of stateobj ring buffers (ie. sharing + * the same underlying bo).. + * + * We also rely on previous stateobj having been fully constructed + * so we can reclaim extra space at it's end. + */ + struct fd_ringbuffer *suballoc_ring; }; FD_DEFINE_CAST(fd_submit, msm_submit); @@ -69,634 +77,646 @@ FD_DEFINE_CAST(fd_submit, msm_submit); * it. */ struct msm_cmd { - struct fd_bo *ring_bo; - unsigned size; - DECLARE_ARRAY(struct drm_msm_gem_submit_reloc, relocs); + struct fd_bo *ring_bo; + unsigned size; + DECLARE_ARRAY(struct drm_msm_gem_submit_reloc, relocs); }; static struct msm_cmd * cmd_new(struct fd_bo *ring_bo) { - struct msm_cmd *cmd = malloc(sizeof(*cmd)); - cmd->ring_bo = fd_bo_ref(ring_bo); - cmd->size = 0; - cmd->nr_relocs = cmd->max_relocs = 0; - cmd->relocs = NULL; - return cmd; + struct msm_cmd *cmd = malloc(sizeof(*cmd)); + cmd->ring_bo = fd_bo_ref(ring_bo); + cmd->size = 0; + cmd->nr_relocs = cmd->max_relocs = 0; + cmd->relocs = NULL; + return cmd; } static void cmd_free(struct msm_cmd *cmd) { - fd_bo_del(cmd->ring_bo); - free(cmd->relocs); - free(cmd); + fd_bo_del(cmd->ring_bo); + free(cmd->relocs); + free(cmd); } +/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to + * later copy into the submit when the stateobj rb is later referenced by + * a regular rb: + */ +struct msm_reloc_bo { + struct fd_bo *bo; + unsigned flags; +}; + struct msm_ringbuffer { - struct fd_ringbuffer base; - - /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ - unsigned offset; - - union { - /* for _FD_RINGBUFFER_OBJECT case: */ - struct { - struct fd_pipe *pipe; - DECLARE_ARRAY(struct fd_bo *, reloc_bos); - struct set *ring_set; - }; - /* for other cases: */ - struct { - struct fd_submit *submit; - DECLARE_ARRAY(struct msm_cmd *, cmds); - }; - } u; - - struct msm_cmd *cmd; /* current cmd */ - struct fd_bo *ring_bo; + struct fd_ringbuffer base; + + /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ + unsigned offset; + + union { + /* for _FD_RINGBUFFER_OBJECT case: */ + struct { + struct fd_pipe *pipe; + DECLARE_ARRAY(struct msm_reloc_bo, reloc_bos); + struct set *ring_set; + }; + /* for other cases: */ + struct { + struct fd_submit *submit; + DECLARE_ARRAY(struct msm_cmd *, cmds); + }; + } u; + + struct msm_cmd *cmd; /* current cmd */ + struct fd_bo *ring_bo; }; FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer); static void finalize_current_cmd(struct fd_ringbuffer *ring); -static struct fd_ringbuffer * -msm_ringbuffer_init(struct msm_ringbuffer *msm_ring, uint32_t size, - enum fd_ringbuffer_flags flags); +static struct fd_ringbuffer * msm_ringbuffer_init( + struct msm_ringbuffer *msm_ring, + uint32_t size, enum fd_ringbuffer_flags flags); /* add (if needed) bo to submit and return index: */ static uint32_t -append_bo(struct msm_submit *submit, struct fd_bo *bo) +append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags) { - struct msm_bo *msm_bo = to_msm_bo(bo); - uint32_t idx; - - /* NOTE: it is legal to use the same bo on different threads for - * different submits. But it is not legal to use the same submit - * from given threads. - */ - idx = READ_ONCE(msm_bo->idx); - - if (unlikely((idx >= submit->nr_submit_bos) || - (submit->submit_bos[idx].handle != bo->handle))) { - uint32_t hash = _mesa_hash_pointer(bo); - struct hash_entry *entry; - - entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); - if (entry) { - /* found */ - idx = (uint32_t)(uintptr_t)entry->data; - } else { - idx = APPEND( - submit, submit_bos, - (struct drm_msm_gem_submit_bo){ - .flags = bo->reloc_flags & (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE), - .handle = bo->handle, - .presumed = 0, - }); - APPEND(submit, bos, fd_bo_ref(bo)); - - _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, - (void *)(uintptr_t)idx); - } - msm_bo->idx = idx; - } - - return idx; + struct msm_bo *msm_bo = to_msm_bo(bo); + uint32_t idx; + pthread_mutex_lock(&idx_lock); + if (likely(msm_bo->current_submit_seqno == submit->seqno)) { + idx = msm_bo->idx; + } else { + uint32_t hash = _mesa_hash_pointer(bo); + struct hash_entry *entry; + + entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); + if (entry) { + /* found */ + idx = (uint32_t)(uintptr_t)entry->data; + } else { + idx = APPEND(submit, submit_bos); + idx = APPEND(submit, bos); + + submit->submit_bos[idx].flags = 0; + submit->submit_bos[idx].handle = bo->handle; + submit->submit_bos[idx].presumed = 0; + + submit->bos[idx] = fd_bo_ref(bo); + + _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, + (void *)(uintptr_t)idx); + } + msm_bo->current_submit_seqno = submit->seqno; + msm_bo->idx = idx; + } + pthread_mutex_unlock(&idx_lock); + if (flags & FD_RELOC_READ) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; + if (flags & FD_RELOC_WRITE) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; + return idx; } static void append_ring(struct set *set, struct fd_ringbuffer *ring) { - uint32_t hash = _mesa_hash_pointer(ring); + uint32_t hash = _mesa_hash_pointer(ring); - if (!_mesa_set_search_pre_hashed(set, hash, ring)) { - fd_ringbuffer_ref(ring); - _mesa_set_add_pre_hashed(set, hash, ring); - } + if (!_mesa_set_search_pre_hashed(set, hash, ring)) { + fd_ringbuffer_ref(ring); + _mesa_set_add_pre_hashed(set, hash, ring); + } } static void msm_submit_suballoc_ring_bo(struct fd_submit *submit, - struct msm_ringbuffer *msm_ring, uint32_t size) + struct msm_ringbuffer *msm_ring, uint32_t size) { - struct msm_submit *msm_submit = to_msm_submit(submit); - unsigned suballoc_offset = 0; - struct fd_bo *suballoc_bo = NULL; + struct msm_submit *msm_submit = to_msm_submit(submit); + unsigned suballoc_offset = 0; + struct fd_bo *suballoc_bo = NULL; - if (msm_submit->suballoc_ring) { - struct msm_ringbuffer *suballoc_ring = - to_msm_ringbuffer(msm_submit->suballoc_ring); + if (msm_submit->suballoc_ring) { + struct msm_ringbuffer *suballoc_ring = + to_msm_ringbuffer(msm_submit->suballoc_ring); - suballoc_bo = suballoc_ring->ring_bo; - suballoc_offset = - fd_ringbuffer_size(msm_submit->suballoc_ring) + suballoc_ring->offset; + suballoc_bo = suballoc_ring->ring_bo; + suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + + suballoc_ring->offset; - suballoc_offset = align(suballoc_offset, 0x10); + suballoc_offset = align(suballoc_offset, 0x10); - if ((size + suballoc_offset) > suballoc_bo->size) { - suballoc_bo = NULL; - } - } + if ((size + suballoc_offset) > suballoc_bo->size) { + suballoc_bo = NULL; + } + } - if (!suballoc_bo) { - // TODO possibly larger size for streaming bo? - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, 0x8000); - msm_ring->offset = 0; - } else { - msm_ring->ring_bo = fd_bo_ref(suballoc_bo); - msm_ring->offset = suballoc_offset; - } + if (!suballoc_bo) { + // TODO possibly larger size for streaming bo? + msm_ring->ring_bo = fd_bo_new_ring( + submit->pipe->dev, 0x8000, 0); + msm_ring->offset = 0; + } else { + msm_ring->ring_bo = fd_bo_ref(suballoc_bo); + msm_ring->offset = suballoc_offset; + } - struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; + struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; - msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); + msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); - if (old_suballoc_ring) - fd_ringbuffer_del(old_suballoc_ring); + if (old_suballoc_ring) + fd_ringbuffer_del(old_suballoc_ring); } static struct fd_ringbuffer * msm_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size, - enum fd_ringbuffer_flags flags) + enum fd_ringbuffer_flags flags) { - struct msm_submit *msm_submit = to_msm_submit(submit); - struct msm_ringbuffer *msm_ring; + struct msm_submit *msm_submit = to_msm_submit(submit); + struct msm_ringbuffer *msm_ring; - msm_ring = slab_alloc_st(&msm_submit->ring_pool); + msm_ring = slab_alloc_st(&msm_submit->ring_pool); - msm_ring->u.submit = submit; + msm_ring->u.submit = submit; - /* NOTE: needs to be before _suballoc_ring_bo() since it could - * increment the refcnt of the current ring - */ - msm_ring->base.refcnt = 1; + /* NOTE: needs to be before _suballoc_ring_bo() since it could + * increment the refcnt of the current ring + */ + msm_ring->base.refcnt = 1; - if (flags & FD_RINGBUFFER_STREAMING) { - msm_submit_suballoc_ring_bo(submit, msm_ring, size); - } else { - if (flags & FD_RINGBUFFER_GROWABLE) - size = INIT_SIZE; + if (flags & FD_RINGBUFFER_STREAMING) { + msm_submit_suballoc_ring_bo(submit, msm_ring, size); + } else { + if (flags & FD_RINGBUFFER_GROWABLE) + size = INIT_SIZE; - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size); - } + msm_ring->offset = 0; + msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0); + } - if (!msm_ringbuffer_init(msm_ring, size, flags)) - return NULL; + if (!msm_ringbuffer_init(msm_ring, size, flags)) + return NULL; - return &msm_ring->base; + if (flags & FD_RINGBUFFER_PRIMARY) { + debug_assert(!msm_submit->primary); + msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); + } + + return &msm_ring->base; } static struct drm_msm_gem_submit_reloc * handle_stateobj_relocs(struct msm_submit *submit, struct msm_ringbuffer *ring) { - struct msm_cmd *cmd = ring->cmd; - struct drm_msm_gem_submit_reloc *relocs; + struct msm_cmd *cmd = ring->cmd; + struct drm_msm_gem_submit_reloc *relocs; + + relocs = malloc(cmd->nr_relocs * sizeof(*relocs)); - relocs = malloc(cmd->nr_relocs * sizeof(*relocs)); + for (unsigned i = 0; i < cmd->nr_relocs; i++) { + unsigned idx = cmd->relocs[i].reloc_idx; + struct fd_bo *bo = ring->u.reloc_bos[idx].bo; + unsigned flags = 0; - for (unsigned i = 0; i < cmd->nr_relocs; i++) { - unsigned idx = cmd->relocs[i].reloc_idx; - struct fd_bo *bo = ring->u.reloc_bos[idx]; + if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_READ) + flags |= FD_RELOC_READ; + if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE) + flags |= FD_RELOC_WRITE; - relocs[i] = cmd->relocs[i]; - relocs[i].reloc_idx = append_bo(submit, bo); - } + relocs[i] = cmd->relocs[i]; + relocs[i].reloc_idx = append_bo(submit, bo, flags); + } - return relocs; + return relocs; } static int msm_submit_flush(struct fd_submit *submit, int in_fence_fd, - struct fd_submit_fence *out_fence) + int *out_fence_fd, uint32_t *out_fence) { - struct msm_submit *msm_submit = to_msm_submit(submit); - struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); - struct drm_msm_gem_submit req = { - .flags = msm_pipe->pipe, - .queueid = msm_pipe->queue_id, - }; - int ret; - - finalize_current_cmd(submit->primary); - append_ring(msm_submit->ring_set, submit->primary); - - unsigned nr_cmds = 0; - unsigned nr_objs = 0; - - set_foreach (msm_submit->ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - nr_cmds += 1; - nr_objs += 1; - } else { - if (ring != submit->primary) - finalize_current_cmd(ring); - nr_cmds += to_msm_ringbuffer(ring)->u.nr_cmds; - } - } - - void *obj_relocs[nr_objs]; - struct drm_msm_gem_submit_cmd cmds[nr_cmds]; - unsigned i = 0, o = 0; - - set_foreach (msm_submit->ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - - debug_assert(i < nr_cmds); - - // TODO handle relocs: - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - - debug_assert(o < nr_objs); - - void *relocs = handle_stateobj_relocs(msm_submit, msm_ring); - obj_relocs[o++] = relocs; - - cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; - cmds[i].submit_idx = append_bo(msm_submit, msm_ring->ring_bo); - cmds[i].submit_offset = msm_ring->offset; - cmds[i].size = offset_bytes(ring->cur, ring->start); - cmds[i].pad = 0; - cmds[i].nr_relocs = msm_ring->cmd->nr_relocs; - cmds[i].relocs = VOID2U64(relocs); - - i++; - } else { - for (unsigned j = 0; j < msm_ring->u.nr_cmds; j++) { - if (ring->flags & FD_RINGBUFFER_PRIMARY) { - cmds[i].type = MSM_SUBMIT_CMD_BUF; - } else { - cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; - } - cmds[i].submit_idx = - append_bo(msm_submit, msm_ring->u.cmds[j]->ring_bo); - cmds[i].submit_offset = msm_ring->offset; - cmds[i].size = msm_ring->u.cmds[j]->size; - cmds[i].pad = 0; - cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs; - cmds[i].relocs = VOID2U64(msm_ring->u.cmds[j]->relocs); - - i++; - } - } - } - - simple_mtx_lock(&table_lock); - for (unsigned j = 0; j < msm_submit->nr_bos; j++) { - fd_bo_add_fence(msm_submit->bos[j], submit->pipe, submit->fence); - } - simple_mtx_unlock(&table_lock); - - if (in_fence_fd != -1) { - req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; - req.fence_fd = in_fence_fd; - } - - if (out_fence && out_fence->use_fence_fd) { - req.flags |= MSM_SUBMIT_FENCE_FD_OUT; - } - - /* needs to be after get_cmd() as that could create bos/cmds table: */ - req.bos = VOID2U64(msm_submit->submit_bos), - req.nr_bos = msm_submit->nr_submit_bos; - req.cmds = VOID2U64(cmds), req.nr_cmds = nr_cmds; - - DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); - - ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, &req, - sizeof(req)); - if (ret) { - ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); - msm_dump_submit(&req); - } else if (!ret && out_fence) { - out_fence->fence.kfence = req.fence; - out_fence->fence.ufence = submit->fence; - out_fence->fence_fd = req.fence_fd; - } - - for (unsigned o = 0; o < nr_objs; o++) - free(obj_relocs[o]); - - return ret; + struct msm_submit *msm_submit = to_msm_submit(submit); + struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); + struct drm_msm_gem_submit req = { + .flags = msm_pipe->pipe, + .queueid = msm_pipe->queue_id, + }; + int ret; + + debug_assert(msm_submit->primary); + + finalize_current_cmd(msm_submit->primary); + append_ring(msm_submit->ring_set, msm_submit->primary); + + unsigned nr_cmds = 0; + unsigned nr_objs = 0; + + set_foreach(msm_submit->ring_set, entry) { + struct fd_ringbuffer *ring = (void *)entry->key; + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + nr_cmds += 1; + nr_objs += 1; + } else { + if (ring != msm_submit->primary) + finalize_current_cmd(ring); + nr_cmds += to_msm_ringbuffer(ring)->u.nr_cmds; + } + } + + void *obj_relocs[nr_objs]; + struct drm_msm_gem_submit_cmd cmds[nr_cmds]; + unsigned i = 0, o = 0; + + set_foreach(msm_submit->ring_set, entry) { + struct fd_ringbuffer *ring = (void *)entry->key; + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + + debug_assert(i < nr_cmds); + + // TODO handle relocs: + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + + debug_assert(o < nr_objs); + + void *relocs = handle_stateobj_relocs(msm_submit, msm_ring); + obj_relocs[o++] = relocs; + + cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; + cmds[i].submit_idx = + append_bo(msm_submit, msm_ring->ring_bo, FD_RELOC_READ); + cmds[i].submit_offset = msm_ring->offset; + cmds[i].size = offset_bytes(ring->cur, ring->start); + cmds[i].pad = 0; + cmds[i].nr_relocs = msm_ring->cmd->nr_relocs; + cmds[i].relocs = VOID2U64(relocs); + + i++; + } else { + for (unsigned j = 0; j < msm_ring->u.nr_cmds; j++) { + if (ring->flags & FD_RINGBUFFER_PRIMARY) { + cmds[i].type = MSM_SUBMIT_CMD_BUF; + } else { + cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; + } + cmds[i].submit_idx = append_bo(msm_submit, + msm_ring->u.cmds[j]->ring_bo, FD_RELOC_READ); + cmds[i].submit_offset = msm_ring->offset; + cmds[i].size = msm_ring->u.cmds[j]->size; + cmds[i].pad = 0; + cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs; + cmds[i].relocs = VOID2U64(msm_ring->u.cmds[j]->relocs); + + i++; + } + } + } + + if (in_fence_fd != -1) { + req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; + req.fence_fd = in_fence_fd; + } + + if (out_fence_fd) { + req.flags |= MSM_SUBMIT_FENCE_FD_OUT; + } + + /* needs to be after get_cmd() as that could create bos/cmds table: */ + req.bos = VOID2U64(msm_submit->submit_bos), + req.nr_bos = msm_submit->nr_submit_bos; + req.cmds = VOID2U64(cmds), + req.nr_cmds = nr_cmds; + + DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); + + ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, + &req, sizeof(req)); + if (ret) { + ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); + msm_dump_submit(&req); + } else if (!ret) { + if (out_fence) + *out_fence = req.fence; + + if (out_fence_fd) + *out_fence_fd = req.fence_fd; + } + + for (unsigned o = 0; o < nr_objs; o++) + free(obj_relocs[o]); + + return ret; } static void unref_rings(struct set_entry *entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - fd_ringbuffer_del(ring); + struct fd_ringbuffer *ring = (void *)entry->key; + fd_ringbuffer_del(ring); } static void msm_submit_destroy(struct fd_submit *submit) { - struct msm_submit *msm_submit = to_msm_submit(submit); + struct msm_submit *msm_submit = to_msm_submit(submit); - if (msm_submit->suballoc_ring) - fd_ringbuffer_del(msm_submit->suballoc_ring); + if (msm_submit->primary) + fd_ringbuffer_del(msm_submit->primary); + if (msm_submit->suballoc_ring) + fd_ringbuffer_del(msm_submit->suballoc_ring); - _mesa_hash_table_destroy(msm_submit->bo_table, NULL); - _mesa_set_destroy(msm_submit->ring_set, unref_rings); + _mesa_hash_table_destroy(msm_submit->bo_table, NULL); + _mesa_set_destroy(msm_submit->ring_set, unref_rings); - // TODO it would be nice to have a way to debug_assert() if all - // rb's haven't been free'd back to the slab, because that is - // an indication that we are leaking bo's - slab_destroy(&msm_submit->ring_pool); + // TODO it would be nice to have a way to debug_assert() if all + // rb's haven't been free'd back to the slab, because that is + // an indication that we are leaking bo's + slab_destroy(&msm_submit->ring_pool); - for (unsigned i = 0; i < msm_submit->nr_bos; i++) - fd_bo_del(msm_submit->bos[i]); + for (unsigned i = 0; i < msm_submit->nr_bos; i++) + fd_bo_del(msm_submit->bos[i]); - free(msm_submit->submit_bos); - free(msm_submit->bos); - free(msm_submit); + free(msm_submit->submit_bos); + free(msm_submit->bos); + free(msm_submit); } static const struct fd_submit_funcs submit_funcs = { - .new_ringbuffer = msm_submit_new_ringbuffer, - .flush = msm_submit_flush, - .destroy = msm_submit_destroy, + .new_ringbuffer = msm_submit_new_ringbuffer, + .flush = msm_submit_flush, + .destroy = msm_submit_destroy, }; struct fd_submit * msm_submit_new(struct fd_pipe *pipe) { - struct msm_submit *msm_submit = calloc(1, sizeof(*msm_submit)); - struct fd_submit *submit; - - msm_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - msm_submit->ring_set = - _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - // TODO tune size: - slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer), 16); - - submit = &msm_submit->base; - submit->funcs = &submit_funcs; - - return submit; + struct msm_submit *msm_submit = calloc(1, sizeof(*msm_submit)); + struct fd_submit *submit; + static unsigned submit_cnt = 0; + + msm_submit->seqno = ++submit_cnt; + msm_submit->bo_table = _mesa_hash_table_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + msm_submit->ring_set = _mesa_set_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + // TODO tune size: + slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer), 16); + + submit = &msm_submit->base; + submit->pipe = pipe; + submit->funcs = &submit_funcs; + + return submit; } + static void finalize_current_cmd(struct fd_ringbuffer *ring) { - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + + debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); + if (!msm_ring->cmd) + return; - if (!msm_ring->cmd) - return; + debug_assert(msm_ring->cmd->ring_bo == msm_ring->ring_bo); - debug_assert(msm_ring->cmd->ring_bo == msm_ring->ring_bo); + unsigned idx = APPEND(&msm_ring->u, cmds); - msm_ring->cmd->size = offset_bytes(ring->cur, ring->start); - APPEND(&msm_ring->u, cmds, msm_ring->cmd); - msm_ring->cmd = NULL; + msm_ring->u.cmds[idx] = msm_ring->cmd; + msm_ring->cmd = NULL; + + msm_ring->u.cmds[idx]->size = offset_bytes(ring->cur, ring->start); } static void msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size) { - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_pipe *pipe = msm_ring->u.submit->pipe; + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + struct fd_pipe *pipe = msm_ring->u.submit->pipe; - debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); + debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); - finalize_current_cmd(ring); + finalize_current_cmd(ring); - fd_bo_del(msm_ring->ring_bo); - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size); - msm_ring->cmd = cmd_new(msm_ring->ring_bo); + fd_bo_del(msm_ring->ring_bo); + msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); + msm_ring->cmd = cmd_new(msm_ring->ring_bo); - ring->start = fd_bo_map(msm_ring->ring_bo); - ring->end = &(ring->start[size / 4]); - ring->cur = ring->start; - ring->size = size; + ring->start = fd_bo_map(msm_ring->ring_bo); + ring->end = &(ring->start[size/4]); + ring->cur = ring->start; + ring->size = size; } static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) + const struct fd_reloc *reloc) { - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_pipe *pipe; - unsigned reloc_idx; - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - unsigned idx = APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo)); - - /* this gets fixed up at submit->flush() time, since this state- - * object rb can be used with many different submits - */ - reloc_idx = idx; - - pipe = msm_ring->u.pipe; - } else { - struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); - - reloc_idx = append_bo(msm_submit, reloc->bo); - - pipe = msm_ring->u.submit->pipe; - } - - APPEND(msm_ring->cmd, relocs, - (struct drm_msm_gem_submit_reloc){ - .reloc_idx = reloc_idx, - .reloc_offset = reloc->offset, - .or = reloc->orlo, - .shift = reloc->shift, - .submit_offset = - offset_bytes(ring->cur, ring->start) + msm_ring->offset, - }); - - ring->cur++; - - if (fd_dev_64b(&pipe->dev_id)) { - APPEND(msm_ring->cmd, relocs, - (struct drm_msm_gem_submit_reloc){ - .reloc_idx = reloc_idx, - .reloc_offset = reloc->offset, - .or = reloc->orhi, - .shift = reloc->shift - 32, - .submit_offset = - offset_bytes(ring->cur, ring->start) + msm_ring->offset, - }); - - ring->cur++; - } + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + struct fd_pipe *pipe; + unsigned reloc_idx; + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + unsigned idx = APPEND(&msm_ring->u, reloc_bos); + + msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); + msm_ring->u.reloc_bos[idx].flags = reloc->flags; + + /* this gets fixed up at submit->flush() time, since this state- + * object rb can be used with many different submits + */ + reloc_idx = idx; + + pipe = msm_ring->u.pipe; + } else { + struct msm_submit *msm_submit = + to_msm_submit(msm_ring->u.submit); + + reloc_idx = append_bo(msm_submit, reloc->bo, reloc->flags); + + pipe = msm_ring->u.submit->pipe; + } + + struct drm_msm_gem_submit_reloc *r; + unsigned idx = APPEND(msm_ring->cmd, relocs); + + r = &msm_ring->cmd->relocs[idx]; + + r->reloc_idx = reloc_idx; + r->reloc_offset = reloc->offset; + r->or = reloc->or; + r->shift = reloc->shift; + r->submit_offset = offset_bytes(ring->cur, ring->start) + + msm_ring->offset; + + ring->cur++; + + if (pipe->gpu_id >= 500) { + idx = APPEND(msm_ring->cmd, relocs); + r = &msm_ring->cmd->relocs[idx]; + + r->reloc_idx = reloc_idx; + r->reloc_offset = reloc->offset; + r->or = reloc->orhi; + r->shift = reloc->shift - 32; + r->submit_offset = offset_bytes(ring->cur, ring->start) + + msm_ring->offset; + + ring->cur++; + } } static void append_stateobj_rings(struct msm_submit *submit, struct fd_ringbuffer *target) { - struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); + struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); - debug_assert(target->flags & _FD_RINGBUFFER_OBJECT); + debug_assert(target->flags & _FD_RINGBUFFER_OBJECT); - set_foreach (msm_target->u.ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; + set_foreach(msm_target->u.ring_set, entry) { + struct fd_ringbuffer *ring = (void *)entry->key; - append_ring(submit->ring_set, ring); + append_ring(submit->ring_set, ring); - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - append_stateobj_rings(submit, ring); - } - } + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + append_stateobj_rings(submit, ring); + } + } } static uint32_t msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx) + struct fd_ringbuffer *target, uint32_t cmd_idx) { - struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_bo *bo; - uint32_t size; - - if ((target->flags & FD_RINGBUFFER_GROWABLE) && - (cmd_idx < msm_target->u.nr_cmds)) { - bo = msm_target->u.cmds[cmd_idx]->ring_bo; - size = msm_target->u.cmds[cmd_idx]->size; - } else { - bo = msm_target->ring_bo; - size = offset_bytes(target->cur, target->start); - } - - msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){ - .bo = bo, - .iova = bo->iova + msm_target->offset, - .offset = msm_target->offset, - }); - - if (!size) - return 0; - - if ((target->flags & _FD_RINGBUFFER_OBJECT) && - !(ring->flags & _FD_RINGBUFFER_OBJECT)) { - struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); - - append_stateobj_rings(msm_submit, target); - } - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - append_ring(msm_ring->u.ring_set, target); - } else { - struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); - append_ring(msm_submit->ring_set, target); - } - - return size; + struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + struct fd_bo *bo; + uint32_t size; + + if ((target->flags & FD_RINGBUFFER_GROWABLE) && + (cmd_idx < msm_target->u.nr_cmds)) { + bo = msm_target->u.cmds[cmd_idx]->ring_bo; + size = msm_target->u.cmds[cmd_idx]->size; + } else { + bo = msm_target->ring_bo; + size = offset_bytes(target->cur, target->start); + } + + msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){ + .bo = bo, + .flags = FD_RELOC_READ, + .offset = msm_target->offset, + }); + + if ((target->flags & _FD_RINGBUFFER_OBJECT) && + !(ring->flags & _FD_RINGBUFFER_OBJECT)) { + struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); + + append_stateobj_rings(msm_submit, target); + } + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + append_ring(msm_ring->u.ring_set, target); + } else { + struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); + append_ring(msm_submit->ring_set, target); + } + + return size; } static uint32_t msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring) { - if (ring->flags & FD_RINGBUFFER_GROWABLE) - return to_msm_ringbuffer(ring)->u.nr_cmds + 1; - return 1; -} - -static bool -msm_ringbuffer_check_size(struct fd_ringbuffer *ring) -{ - assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_submit *submit = msm_ring->u.submit; - struct fd_pipe *pipe = submit->pipe; - - if ((fd_device_version(pipe->dev) < FD_VERSION_UNLIMITED_CMDS) && - ((ring->cur - ring->start) > (ring->size / 4 - 0x1000))) { - return false; - } - - if (to_msm_submit(submit)->nr_bos > MAX_ARRAY_SIZE/2) { - return false; - } - - return true; + if (ring->flags & FD_RINGBUFFER_GROWABLE) + return to_msm_ringbuffer(ring)->u.nr_cmds + 1; + return 1; } static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring) { - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - fd_bo_del(msm_ring->ring_bo); - if (msm_ring->cmd) - cmd_free(msm_ring->cmd); + fd_bo_del(msm_ring->ring_bo); + if (msm_ring->cmd) + cmd_free(msm_ring->cmd); - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - fd_bo_del(msm_ring->u.reloc_bos[i]); - } + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { + fd_bo_del(msm_ring->u.reloc_bos[i].bo); + } - _mesa_set_destroy(msm_ring->u.ring_set, unref_rings); + _mesa_set_destroy(msm_ring->u.ring_set, unref_rings); - free(msm_ring->u.reloc_bos); - free(msm_ring); - } else { - struct fd_submit *submit = msm_ring->u.submit; + free(msm_ring->u.reloc_bos); + free(msm_ring); + } else { + struct fd_submit *submit = msm_ring->u.submit; - for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { - cmd_free(msm_ring->u.cmds[i]); - } + for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { + cmd_free(msm_ring->u.cmds[i]); + } - free(msm_ring->u.cmds); - slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring); - } + free(msm_ring->u.cmds); + slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring); + } } static const struct fd_ringbuffer_funcs ring_funcs = { - .grow = msm_ringbuffer_grow, - .emit_reloc = msm_ringbuffer_emit_reloc, - .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring, - .cmd_count = msm_ringbuffer_cmd_count, - .check_size = msm_ringbuffer_check_size, - .destroy = msm_ringbuffer_destroy, + .grow = msm_ringbuffer_grow, + .emit_reloc = msm_ringbuffer_emit_reloc, + .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring, + .cmd_count = msm_ringbuffer_cmd_count, + .destroy = msm_ringbuffer_destroy, }; static inline struct fd_ringbuffer * msm_ringbuffer_init(struct msm_ringbuffer *msm_ring, uint32_t size, - enum fd_ringbuffer_flags flags) + enum fd_ringbuffer_flags flags) { - struct fd_ringbuffer *ring = &msm_ring->base; + struct fd_ringbuffer *ring = &msm_ring->base; - debug_assert(msm_ring->ring_bo); + debug_assert(msm_ring->ring_bo); - uint8_t *base = fd_bo_map(msm_ring->ring_bo); - ring->start = (void *)(base + msm_ring->offset); - ring->end = &(ring->start[size / 4]); - ring->cur = ring->start; + uint8_t *base = fd_bo_map(msm_ring->ring_bo); + ring->start = (void *)(base + msm_ring->offset); + ring->end = &(ring->start[size/4]); + ring->cur = ring->start; - ring->size = size; - ring->flags = flags; + ring->size = size; + ring->flags = flags; - ring->funcs = &ring_funcs; + ring->funcs = &ring_funcs; - msm_ring->u.cmds = NULL; - msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; + msm_ring->u.cmds = NULL; + msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; - msm_ring->cmd = cmd_new(msm_ring->ring_bo); + msm_ring->cmd = cmd_new(msm_ring->ring_bo); - return ring; + return ring; } struct fd_ringbuffer * msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size) { - struct msm_ringbuffer *msm_ring = malloc(sizeof(*msm_ring)); + struct msm_ringbuffer *msm_ring = malloc(sizeof(*msm_ring)); - msm_ring->u.pipe = pipe; - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size); - msm_ring->base.refcnt = 1; + msm_ring->u.pipe = pipe; + msm_ring->offset = 0; + msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); + msm_ring->base.refcnt = 1; - msm_ring->u.reloc_bos = NULL; - msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; + msm_ring->u.reloc_bos = NULL; + msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; - msm_ring->u.ring_set = - _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + msm_ring->u.ring_set = _mesa_set_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); - return msm_ringbuffer_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); + return msm_ringbuffer_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); } diff --git a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c b/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c index 1d7e7b949..2b8f53172 100644 --- a/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c +++ b/lib/mesa/src/freedreno/drm/msm_ringbuffer_sp.c @@ -26,10 +26,8 @@ #include <assert.h> #include <inttypes.h> -#include <pthread.h> #include "util/hash_table.h" -#include "util/os_file.h" #include "util/slab.h" #include "drm/freedreno_ringbuffer.h" @@ -40,48 +38,34 @@ * (but still builds a bos table) */ -#define INIT_SIZE 0x1000 -#define SUBALLOC_SIZE (32 * 1024) +#define INIT_SIZE 0x1000 -/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on, - * instead use a condition-variable. Note that pipe->flush() is not expected - * to be a common/hot path. - */ -static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER; -static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; struct msm_submit_sp { - struct fd_submit base; - - DECLARE_ARRAY(struct fd_bo *, bos); + struct fd_submit base; - /* maps fd_bo to idx in bos table: */ - struct hash_table *bo_table; + DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); + DECLARE_ARRAY(struct fd_bo *, bos); - struct slab_child_pool ring_pool; + unsigned seqno; - /* Allow for sub-allocation of stateobj ring buffers (ie. sharing - * the same underlying bo).. - * - * We also rely on previous stateobj having been fully constructed - * so we can reclaim extra space at it's end. - */ - struct fd_ringbuffer *suballoc_ring; + /* maps fd_bo to idx in bos table: */ + struct hash_table *bo_table; - /* Flush args, potentially attached to the last submit in the list - * of submits to merge: - */ - int in_fence_fd; - struct fd_submit_fence *out_fence; + struct slab_mempool ring_pool; - /* State for enqueued submits: - */ - struct list_head submit_list; /* includes this submit as last element */ + struct fd_ringbuffer *primary; - /* Used in case out_fence==NULL: */ - struct util_queue_fence fence; + /* Allow for sub-allocation of stateobj ring buffers (ie. sharing + * the same underlying bo).. + * + * We also rely on previous stateobj having been fully constructed + * so we can reclaim extra space at it's end. + */ + struct fd_ringbuffer *suballoc_ring; }; FD_DEFINE_CAST(fd_submit, msm_submit_sp); @@ -90,769 +74,495 @@ FD_DEFINE_CAST(fd_submit, msm_submit_sp); * it. */ struct msm_cmd_sp { - struct fd_bo *ring_bo; - unsigned size; + struct fd_bo *ring_bo; + unsigned size; +}; + +/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to + * later copy into the submit when the stateobj rb is later referenced by + * a regular rb: + */ +struct msm_reloc_bo_sp { + struct fd_bo *bo; + unsigned flags; }; struct msm_ringbuffer_sp { - struct fd_ringbuffer base; - - /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ - unsigned offset; - - union { - /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from - * this one - */ - struct { - struct fd_pipe *pipe; - DECLARE_ARRAY(struct fd_bo *, reloc_bos); - }; - /* for other cases: */ - struct { - struct fd_submit *submit; - DECLARE_ARRAY(struct msm_cmd_sp, cmds); - }; - } u; - - struct fd_bo *ring_bo; + struct fd_ringbuffer base; + + /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ + unsigned offset; + +// TODO check disasm.. hopefully compilers CSE can realize that +// reloc_bos and cmds are at the same offsets and optimize some +// divergent cases into single case + union { + /* for _FD_RINGBUFFER_OBJECT case: */ + struct { + struct fd_pipe *pipe; + DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos); + }; + /* for other cases: */ + struct { + struct fd_submit *submit; + DECLARE_ARRAY(struct msm_cmd_sp, cmds); + }; + } u; + + struct fd_bo *ring_bo; }; FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp); static void finalize_current_cmd(struct fd_ringbuffer *ring); -static struct fd_ringbuffer * -msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size, - enum fd_ringbuffer_flags flags); +static struct fd_ringbuffer * msm_ringbuffer_sp_init( + struct msm_ringbuffer_sp *msm_ring, + uint32_t size, enum fd_ringbuffer_flags flags); /* add (if needed) bo to submit and return index: */ static uint32_t -msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo) +append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags) { - struct msm_bo *msm_bo = to_msm_bo(bo); - uint32_t idx; - - /* NOTE: it is legal to use the same bo on different threads for - * different submits. But it is not legal to use the same submit - * from different threads. - */ - idx = READ_ONCE(msm_bo->idx); - - if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) { - uint32_t hash = _mesa_hash_pointer(bo); - struct hash_entry *entry; - - entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); - if (entry) { - /* found */ - idx = (uint32_t)(uintptr_t)entry->data; - } else { - idx = APPEND(submit, bos, fd_bo_ref(bo)); - - _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, - (void *)(uintptr_t)idx); - } - msm_bo->idx = idx; - } - - return idx; + struct msm_bo *msm_bo = to_msm_bo(bo); + uint32_t idx; + pthread_mutex_lock(&idx_lock); + if (likely(msm_bo->current_submit_seqno == submit->seqno)) { + idx = msm_bo->idx; + } else { + uint32_t hash = _mesa_hash_pointer(bo); + struct hash_entry *entry; + + entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); + if (entry) { + /* found */ + idx = (uint32_t)(uintptr_t)entry->data; + } else { + idx = APPEND(submit, submit_bos); + idx = APPEND(submit, bos); + + submit->submit_bos[idx].flags = 0; + submit->submit_bos[idx].handle = bo->handle; + submit->submit_bos[idx].presumed = 0; + + submit->bos[idx] = fd_bo_ref(bo); + + _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, + (void *)(uintptr_t)idx); + } + msm_bo->current_submit_seqno = submit->seqno; + msm_bo->idx = idx; + } + pthread_mutex_unlock(&idx_lock); + if (flags & FD_RELOC_READ) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; + if (flags & FD_RELOC_WRITE) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; + if (flags & FD_RELOC_DUMP) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP; + return idx; } static void msm_submit_suballoc_ring_bo(struct fd_submit *submit, - struct msm_ringbuffer_sp *msm_ring, uint32_t size) + struct msm_ringbuffer_sp *msm_ring, uint32_t size) { - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - unsigned suballoc_offset = 0; - struct fd_bo *suballoc_bo = NULL; + struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); + unsigned suballoc_offset = 0; + struct fd_bo *suballoc_bo = NULL; - if (msm_submit->suballoc_ring) { - struct msm_ringbuffer_sp *suballoc_ring = - to_msm_ringbuffer_sp(msm_submit->suballoc_ring); + if (msm_submit->suballoc_ring) { + struct msm_ringbuffer_sp *suballoc_ring = + to_msm_ringbuffer_sp(msm_submit->suballoc_ring); - suballoc_bo = suballoc_ring->ring_bo; - suballoc_offset = - fd_ringbuffer_size(msm_submit->suballoc_ring) + suballoc_ring->offset; + suballoc_bo = suballoc_ring->ring_bo; + suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + + suballoc_ring->offset; - suballoc_offset = align(suballoc_offset, 0x10); + suballoc_offset = align(suballoc_offset, 0x10); - if ((size + suballoc_offset) > suballoc_bo->size) { - suballoc_bo = NULL; - } - } + if ((size + suballoc_offset) > suballoc_bo->size) { + suballoc_bo = NULL; + } + } - if (!suballoc_bo) { - // TODO possibly larger size for streaming bo? - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE); - msm_ring->offset = 0; - } else { - msm_ring->ring_bo = fd_bo_ref(suballoc_bo); - msm_ring->offset = suballoc_offset; - } + if (!suballoc_bo) { + // TODO possibly larger size for streaming bo? + msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, + 0x8000, DRM_FREEDRENO_GEM_GPUREADONLY); + msm_ring->offset = 0; + } else { + msm_ring->ring_bo = fd_bo_ref(suballoc_bo); + msm_ring->offset = suballoc_offset; + } - struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; + struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; - msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); + msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); - if (old_suballoc_ring) - fd_ringbuffer_del(old_suballoc_ring); + if (old_suballoc_ring) + fd_ringbuffer_del(old_suballoc_ring); } static struct fd_ringbuffer * msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size, - enum fd_ringbuffer_flags flags) + enum fd_ringbuffer_flags flags) { - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - struct msm_ringbuffer_sp *msm_ring; + struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); + struct msm_ringbuffer_sp *msm_ring; - msm_ring = slab_alloc(&msm_submit->ring_pool); + msm_ring = slab_alloc_st(&msm_submit->ring_pool); - msm_ring->u.submit = submit; + msm_ring->u.submit = submit; - /* NOTE: needs to be before _suballoc_ring_bo() since it could - * increment the refcnt of the current ring - */ - msm_ring->base.refcnt = 1; + /* NOTE: needs to be before _suballoc_ring_bo() since it could + * increment the refcnt of the current ring + */ + msm_ring->base.refcnt = 1; - if (flags & FD_RINGBUFFER_STREAMING) { - msm_submit_suballoc_ring_bo(submit, msm_ring, size); - } else { - if (flags & FD_RINGBUFFER_GROWABLE) - size = INIT_SIZE; + if (flags & FD_RINGBUFFER_STREAMING) { + msm_submit_suballoc_ring_bo(submit, msm_ring, size); + } else { + if (flags & FD_RINGBUFFER_GROWABLE) + size = INIT_SIZE; - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size); - } + msm_ring->offset = 0; + msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, + DRM_FREEDRENO_GEM_GPUREADONLY); + } - if (!msm_ringbuffer_sp_init(msm_ring, size, flags)) - return NULL; - - return &msm_ring->base; -} + if (!msm_ringbuffer_sp_init(msm_ring, size, flags)) + return NULL; -/** - * Prepare submit for flush, always done synchronously. - * - * 1) Finalize primary ringbuffer, at this point no more cmdstream may - * be written into it, since from the PoV of the upper level driver - * the submit is flushed, even if deferred - * 2) Add cmdstream bos to bos table - * 3) Update bo fences - */ -static bool -msm_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd, - struct fd_submit_fence *out_fence) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - bool has_shared = false; + if (flags & FD_RINGBUFFER_PRIMARY) { + debug_assert(!msm_submit->primary); + msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); + } - finalize_current_cmd(submit->primary); - - struct msm_ringbuffer_sp *primary = - to_msm_ringbuffer_sp(submit->primary); - - for (unsigned i = 0; i < primary->u.nr_cmds; i++) - msm_submit_append_bo(msm_submit, primary->u.cmds[i].ring_bo); - - simple_mtx_lock(&table_lock); - for (unsigned i = 0; i < msm_submit->nr_bos; i++) { - fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence); - has_shared |= msm_submit->bos[i]->shared; - } - simple_mtx_unlock(&table_lock); - - msm_submit->out_fence = out_fence; - msm_submit->in_fence_fd = (in_fence_fd == -1) ? - -1 : os_dupfd_cloexec(in_fence_fd); - - return has_shared; + return &msm_ring->base; } static int -flush_submit_list(struct list_head *submit_list) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(last_submit(submit_list)); - struct msm_pipe *msm_pipe = to_msm_pipe(msm_submit->base.pipe); - struct drm_msm_gem_submit req = { - .flags = msm_pipe->pipe, - .queueid = msm_pipe->queue_id, - }; - int ret; - - unsigned nr_cmds = 0; - - /* Determine the number of extra cmds's from deferred submits that - * we will be merging in: - */ - foreach_submit (submit, submit_list) { - assert(submit->pipe == &msm_pipe->base); - nr_cmds += to_msm_ringbuffer_sp(submit->primary)->u.nr_cmds; - } - - struct drm_msm_gem_submit_cmd cmds[nr_cmds]; - - unsigned cmd_idx = 0; - - /* Build up the table of cmds, and for all but the last submit in the - * list, merge their bo tables into the last submit. - */ - foreach_submit_safe (submit, submit_list) { - struct msm_ringbuffer_sp *deferred_primary = - to_msm_ringbuffer_sp(submit->primary); - - for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) { - cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF; - cmds[cmd_idx].submit_idx = - msm_submit_append_bo(msm_submit, deferred_primary->u.cmds[i].ring_bo); - cmds[cmd_idx].submit_offset = deferred_primary->offset; - cmds[cmd_idx].size = deferred_primary->u.cmds[i].size; - cmds[cmd_idx].pad = 0; - cmds[cmd_idx].nr_relocs = 0; - - cmd_idx++; - } - - /* We are merging all the submits in the list into the last submit, - * so the remainder of the loop body doesn't apply to the last submit - */ - if (submit == last_submit(submit_list)) { - DEBUG_MSG("merged %u submits", cmd_idx); - break; - } - - struct msm_submit_sp *msm_deferred_submit = to_msm_submit_sp(submit); - for (unsigned i = 0; i < msm_deferred_submit->nr_bos; i++) { - /* Note: if bo is used in both the current submit and the deferred - * submit being merged, we expect to hit the fast-path as we add it - * to the current submit: - */ - msm_submit_append_bo(msm_submit, msm_deferred_submit->bos[i]); - } - - /* Now that the cmds/bos have been transfered over to the current submit, - * we can remove the deferred submit from the list and drop it's reference - */ - list_del(&submit->node); - fd_submit_del(submit); - } - - if (msm_submit->in_fence_fd != -1) { - req.flags |= MSM_SUBMIT_FENCE_FD_IN; - req.fence_fd = msm_submit->in_fence_fd; - msm_pipe->no_implicit_sync = true; - } - - if (msm_pipe->no_implicit_sync) { - req.flags |= MSM_SUBMIT_NO_IMPLICIT; - } - - if (msm_submit->out_fence && msm_submit->out_fence->use_fence_fd) { - req.flags |= MSM_SUBMIT_FENCE_FD_OUT; - } - - /* Needs to be after get_cmd() as that could create bos/cmds table: - * - * NOTE allocate on-stack in the common case, but with an upper- - * bound to limit on-stack allocation to 4k: - */ - const unsigned bo_limit = sizeof(struct drm_msm_gem_submit_bo) / 4096; - bool bos_on_stack = msm_submit->nr_bos < bo_limit; - struct drm_msm_gem_submit_bo - _submit_bos[bos_on_stack ? msm_submit->nr_bos : 0]; - struct drm_msm_gem_submit_bo *submit_bos; - if (bos_on_stack) { - submit_bos = _submit_bos; - } else { - submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0])); - } - - for (unsigned i = 0; i < msm_submit->nr_bos; i++) { - submit_bos[i].flags = msm_submit->bos[i]->reloc_flags; - submit_bos[i].handle = msm_submit->bos[i]->handle; - submit_bos[i].presumed = 0; - } - - req.bos = VOID2U64(submit_bos); - req.nr_bos = msm_submit->nr_bos; - req.cmds = VOID2U64(cmds); - req.nr_cmds = nr_cmds; - - DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); - - ret = drmCommandWriteRead(msm_pipe->base.dev->fd, DRM_MSM_GEM_SUBMIT, &req, - sizeof(req)); - if (ret) { - ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); - msm_dump_submit(&req); - } else if (!ret && msm_submit->out_fence) { - msm_submit->out_fence->fence.kfence = req.fence; - msm_submit->out_fence->fence.ufence = msm_submit->base.fence; - msm_submit->out_fence->fence_fd = req.fence_fd; - } - - if (!bos_on_stack) - free(submit_bos); - - pthread_mutex_lock(&flush_mtx); - assert(fd_fence_before(msm_pipe->last_submit_fence, msm_submit->base.fence)); - msm_pipe->last_submit_fence = msm_submit->base.fence; - pthread_cond_broadcast(&flush_cnd); - pthread_mutex_unlock(&flush_mtx); - - if (msm_submit->in_fence_fd != -1) - close(msm_submit->in_fence_fd); - - return ret; -} - -static void -msm_submit_sp_flush_execute(void *job, void *gdata, int thread_index) +msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, + int *out_fence_fd, uint32_t *out_fence) { - struct fd_submit *submit = job; - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - - flush_submit_list(&msm_submit->submit_list); - - DEBUG_MSG("finish: %u", submit->fence); + struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); + struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); + struct drm_msm_gem_submit req = { + .flags = msm_pipe->pipe, + .queueid = msm_pipe->queue_id, + }; + int ret; + + debug_assert(msm_submit->primary); + finalize_current_cmd(msm_submit->primary); + + struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary); + struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds]; + + for (unsigned i = 0; i < primary->u.nr_cmds; i++) { + cmds[i].type = MSM_SUBMIT_CMD_BUF; + cmds[i].submit_idx = append_bo(msm_submit, + primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP); + cmds[i].submit_offset = primary->offset; + cmds[i].size = primary->u.cmds[i].size; + cmds[i].pad = 0; + cmds[i].nr_relocs = 0; + } + + if (in_fence_fd != -1) { + req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; + req.fence_fd = in_fence_fd; + } + + if (out_fence_fd) { + req.flags |= MSM_SUBMIT_FENCE_FD_OUT; + } + + /* needs to be after get_cmd() as that could create bos/cmds table: */ + req.bos = VOID2U64(msm_submit->submit_bos), + req.nr_bos = msm_submit->nr_submit_bos; + req.cmds = VOID2U64(cmds), + req.nr_cmds = primary->u.nr_cmds; + + DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); + + ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, + &req, sizeof(req)); + if (ret) { + ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); + msm_dump_submit(&req); + } else if (!ret) { + if (out_fence) + *out_fence = req.fence; + + if (out_fence_fd) + *out_fence_fd = req.fence_fd; + } + + return ret; } static void -msm_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index) +msm_submit_sp_destroy(struct fd_submit *submit) { - struct fd_submit *submit = job; - fd_submit_del(submit); -} + struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); -static int -enqueue_submit_list(struct list_head *submit_list) -{ - struct fd_submit *submit = last_submit(submit_list); - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - struct msm_device *msm_dev = to_msm_device(submit->pipe->dev); - - list_replace(submit_list, &msm_submit->submit_list); - list_inithead(submit_list); - - struct util_queue_fence *fence; - if (msm_submit->out_fence) { - fence = &msm_submit->out_fence->ready; - } else { - util_queue_fence_init(&msm_submit->fence); - fence = &msm_submit->fence; - } - - DEBUG_MSG("enqueue: %u", submit->fence); - - util_queue_add_job(&msm_dev->submit_queue, - submit, fence, - msm_submit_sp_flush_execute, - msm_submit_sp_flush_cleanup, - 0); - - return 0; -} + if (msm_submit->primary) + fd_ringbuffer_del(msm_submit->primary); + if (msm_submit->suballoc_ring) + fd_ringbuffer_del(msm_submit->suballoc_ring); -static bool -should_defer(struct fd_submit *submit) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); + _mesa_hash_table_destroy(msm_submit->bo_table, NULL); - /* if too many bo's, it may not be worth the CPU cost of submit merging: */ - if (msm_submit->nr_bos > 30) - return false; + // TODO it would be nice to have a way to debug_assert() if all + // rb's haven't been free'd back to the slab, because that is + // an indication that we are leaking bo's + slab_destroy(&msm_submit->ring_pool); - /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k - * cmds before we exceed the size of the ringbuffer, which results in - * deadlock writing into the RB (ie. kernel doesn't finish writing into - * the RB so it doesn't kick the GPU to start consuming from the RB) - */ - if (submit->pipe->dev->deferred_cmds > 128) - return false; + for (unsigned i = 0; i < msm_submit->nr_bos; i++) + fd_bo_del(msm_submit->bos[i]); - return true; + free(msm_submit->submit_bos); + free(msm_submit->bos); + free(msm_submit); } -static int -msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, - struct fd_submit_fence *out_fence) -{ - struct fd_device *dev = submit->pipe->dev; - struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); - - /* Acquire lock before flush_prep() because it is possible to race between - * this and pipe->flush(): - */ - simple_mtx_lock(&dev->submit_lock); - - /* If there are deferred submits from another fd_pipe, flush them now, - * since we can't merge submits from different submitqueue's (ie. they - * could have different priority, etc) - */ - if (!list_is_empty(&dev->deferred_submits) && - (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) { - struct list_head submit_list; - - list_replace(&dev->deferred_submits, &submit_list); - list_inithead(&dev->deferred_submits); - dev->deferred_cmds = 0; - - enqueue_submit_list(&submit_list); - } - - list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits); - - bool has_shared = msm_submit_sp_flush_prep(submit, in_fence_fd, out_fence); - - assert(fd_fence_before(msm_pipe->last_enqueue_fence, submit->fence)); - msm_pipe->last_enqueue_fence = submit->fence; - - /* If we don't need an out-fence, we can defer the submit. - * - * TODO we could defer submits with in-fence as well.. if we took our own - * reference to the fd, and merged all the in-fence-fd's when we flush the - * deferred submits - */ - if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) { - DEBUG_MSG("defer: %u", submit->fence); - dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary); - assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev)); - simple_mtx_unlock(&dev->submit_lock); - - return 0; - } - - struct list_head submit_list; - - list_replace(&dev->deferred_submits, &submit_list); - list_inithead(&dev->deferred_submits); - dev->deferred_cmds = 0; - - simple_mtx_unlock(&dev->submit_lock); - - return enqueue_submit_list(&submit_list); -} +static const struct fd_submit_funcs submit_funcs = { + .new_ringbuffer = msm_submit_sp_new_ringbuffer, + .flush = msm_submit_sp_flush, + .destroy = msm_submit_sp_destroy, +}; -void -msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence) +struct fd_submit * +msm_submit_sp_new(struct fd_pipe *pipe) { - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - struct fd_device *dev = pipe->dev; - struct list_head submit_list; - - DEBUG_MSG("flush: %u", fence); + struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit)); + struct fd_submit *submit; + static unsigned submit_cnt = 0; - list_inithead(&submit_list); + msm_submit->seqno = ++submit_cnt; + msm_submit->bo_table = _mesa_hash_table_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + // TODO tune size: + slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16); - simple_mtx_lock(&dev->submit_lock); + submit = &msm_submit->base; + submit->pipe = pipe; + submit->funcs = &submit_funcs; - assert(!fd_fence_after(fence, msm_pipe->last_enqueue_fence)); - - foreach_submit_safe (deferred_submit, &dev->deferred_submits) { - /* We should never have submits from multiple pipes in the deferred - * list. If we did, we couldn't compare their fence to our fence, - * since each fd_pipe is an independent timeline. - */ - if (deferred_submit->pipe != pipe) - break; - - if (fd_fence_after(deferred_submit->fence, fence)) - break; - - list_del(&deferred_submit->node); - list_addtail(&deferred_submit->node, &submit_list); - dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary); - } - - assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev)); + return submit; +} - simple_mtx_unlock(&dev->submit_lock); - if (list_is_empty(&submit_list)) - goto flush_sync; +static void +finalize_current_cmd(struct fd_ringbuffer *ring) +{ + debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - enqueue_submit_list(&submit_list); + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + unsigned idx = APPEND(&msm_ring->u, cmds); -flush_sync: - /* Once we are sure that we've enqueued at least up to the requested - * submit, we need to be sure that submitq has caught up and flushed - * them to the kernel - */ - pthread_mutex_lock(&flush_mtx); - while (fd_fence_before(msm_pipe->last_submit_fence, fence)) { - pthread_cond_wait(&flush_cnd, &flush_mtx); - } - pthread_mutex_unlock(&flush_mtx); + msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo); + msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start); } static void -msm_submit_sp_destroy(struct fd_submit *submit) +msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) { - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - - if (msm_submit->suballoc_ring) - fd_ringbuffer_del(msm_submit->suballoc_ring); + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + struct fd_pipe *pipe = msm_ring->u.submit->pipe; - _mesa_hash_table_destroy(msm_submit->bo_table, NULL); + debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); - // TODO it would be nice to have a way to debug_assert() if all - // rb's haven't been free'd back to the slab, because that is - // an indication that we are leaking bo's - slab_destroy_child(&msm_submit->ring_pool); + finalize_current_cmd(ring); - for (unsigned i = 0; i < msm_submit->nr_bos; i++) - fd_bo_del(msm_submit->bos[i]); + fd_bo_del(msm_ring->ring_bo); + msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, + DRM_FREEDRENO_GEM_GPUREADONLY); - free(msm_submit->bos); - free(msm_submit); + ring->start = fd_bo_map(msm_ring->ring_bo); + ring->end = &(ring->start[size/4]); + ring->cur = ring->start; + ring->size = size; } -static const struct fd_submit_funcs submit_funcs = { - .new_ringbuffer = msm_submit_sp_new_ringbuffer, - .flush = msm_submit_sp_flush, - .destroy = msm_submit_sp_destroy, -}; - -struct fd_submit * -msm_submit_sp_new(struct fd_pipe *pipe) +static void +msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring, + const struct fd_reloc *reloc) { - struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit)); - struct fd_submit *submit; + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + struct fd_pipe *pipe; - msm_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + unsigned idx = APPEND(&msm_ring->u, reloc_bos); - slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool); + msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); + msm_ring->u.reloc_bos[idx].flags = reloc->flags; - submit = &msm_submit->base; - submit->funcs = &submit_funcs; + pipe = msm_ring->u.pipe; + } else { + struct msm_submit_sp *msm_submit = + to_msm_submit_sp(msm_ring->u.submit); - return submit; -} + append_bo(msm_submit, reloc->bo, reloc->flags); -void -msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe) -{ - // TODO tune size: - slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), - 16); -} + pipe = msm_ring->u.submit->pipe; + } -void -msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe) -{ - if (msm_pipe->ring_pool.num_elements) - slab_destroy_parent(&msm_pipe->ring_pool); -} + uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset; + uint32_t dword = iova; + int shift = reloc->shift; -static void -finalize_current_cmd(struct fd_ringbuffer *ring) -{ - debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - APPEND(&msm_ring->u, cmds, - (struct msm_cmd_sp){ - .ring_bo = fd_bo_ref(msm_ring->ring_bo), - .size = offset_bytes(ring->cur, ring->start), - }); -} + if (shift < 0) + dword >>= -shift; + else + dword <<= shift; -static void -msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - struct fd_pipe *pipe = msm_ring->u.submit->pipe; + (*ring->cur++) = dword | reloc->or; - debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); + if (pipe->gpu_id >= 500) { + dword = iova >> 32; + shift -= 32; - finalize_current_cmd(ring); + if (shift < 0) + dword >>= -shift; + else + dword <<= shift; - fd_bo_del(msm_ring->ring_bo); - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size); - - ring->start = fd_bo_map(msm_ring->ring_bo); - ring->end = &(ring->start[size / 4]); - ring->cur = ring->start; - ring->size = size; + (*ring->cur++) = dword | reloc->orhi; + } } -static inline bool -msm_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo) +static uint32_t +msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx) { - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - if (msm_ring->u.reloc_bos[i] == bo) - return true; - } - return false; + struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target); + struct fd_bo *bo; + uint32_t size; + + if ((target->flags & FD_RINGBUFFER_GROWABLE) && + (cmd_idx < msm_target->u.nr_cmds)) { + bo = msm_target->u.cmds[cmd_idx].ring_bo; + size = msm_target->u.cmds[cmd_idx].size; + } else { + bo = msm_target->ring_bo; + size = offset_bytes(target->cur, target->start); + } + + msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){ + .bo = bo, + .flags = FD_RELOC_READ | FD_RELOC_DUMP, + .offset = msm_target->offset, + }); + + if (!(target->flags & _FD_RINGBUFFER_OBJECT)) + return size; + + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { + unsigned idx = APPEND(&msm_ring->u, reloc_bos); + + msm_ring->u.reloc_bos[idx].bo = + fd_bo_ref(msm_target->u.reloc_bos[i].bo); + msm_ring->u.reloc_bos[idx].flags = + msm_target->u.reloc_bos[i].flags; + } + } else { + // TODO it would be nice to know whether we have already + // seen this target before. But hopefully we hit the + // append_bo() fast path enough for this to not matter: + struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); + + for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { + append_bo(msm_submit, msm_target->u.reloc_bos[i].bo, + msm_target->u.reloc_bos[i].flags); + } + } + + return size; } -#define PTRSZ 64 -#include "msm_ringbuffer_sp.h" -#undef PTRSZ -#define PTRSZ 32 -#include "msm_ringbuffer_sp.h" -#undef PTRSZ - static uint32_t msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring) { - if (ring->flags & FD_RINGBUFFER_GROWABLE) - return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1; - return 1; -} - -static bool -msm_ringbuffer_sp_check_size(struct fd_ringbuffer *ring) -{ - assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - struct fd_submit *submit = msm_ring->u.submit; - - if (to_msm_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) { - return false; - } - - return true; + if (ring->flags & FD_RINGBUFFER_GROWABLE) + return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1; + return 1; } static void msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) { - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - fd_bo_del(msm_ring->ring_bo); + fd_bo_del(msm_ring->ring_bo); - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - fd_bo_del(msm_ring->u.reloc_bos[i]); - } - free(msm_ring->u.reloc_bos); + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { + fd_bo_del(msm_ring->u.reloc_bos[i].bo); + } - free(msm_ring); - } else { - struct fd_submit *submit = msm_ring->u.submit; + free(msm_ring); + } else { + struct fd_submit *submit = msm_ring->u.submit; - for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { - fd_bo_del(msm_ring->u.cmds[i].ring_bo); - } - free(msm_ring->u.cmds); + for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { + fd_bo_del(msm_ring->u.cmds[i].ring_bo); + } - slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring); - } + slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring); + } } -static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = { - .grow = msm_ringbuffer_sp_grow, - .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_32, - .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32, - .cmd_count = msm_ringbuffer_sp_cmd_count, - .check_size = msm_ringbuffer_sp_check_size, - .destroy = msm_ringbuffer_sp_destroy, -}; - -static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = { - .grow = msm_ringbuffer_sp_grow, - .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_32, - .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32, - .cmd_count = msm_ringbuffer_sp_cmd_count, - .destroy = msm_ringbuffer_sp_destroy, -}; - -static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = { - .grow = msm_ringbuffer_sp_grow, - .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_64, - .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64, - .cmd_count = msm_ringbuffer_sp_cmd_count, - .check_size = msm_ringbuffer_sp_check_size, - .destroy = msm_ringbuffer_sp_destroy, -}; - -static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = { - .grow = msm_ringbuffer_sp_grow, - .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_64, - .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64, - .cmd_count = msm_ringbuffer_sp_cmd_count, - .destroy = msm_ringbuffer_sp_destroy, +static const struct fd_ringbuffer_funcs ring_funcs = { + .grow = msm_ringbuffer_sp_grow, + .emit_reloc = msm_ringbuffer_sp_emit_reloc, + .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring, + .cmd_count = msm_ringbuffer_sp_cmd_count, + .destroy = msm_ringbuffer_sp_destroy, }; static inline struct fd_ringbuffer * msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size, - enum fd_ringbuffer_flags flags) + enum fd_ringbuffer_flags flags) { - struct fd_ringbuffer *ring = &msm_ring->base; - - /* We don't do any translation from internal FD_RELOC flags to MSM flags. */ - STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ); - STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE); - STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP); - - debug_assert(msm_ring->ring_bo); - - uint8_t *base = fd_bo_map(msm_ring->ring_bo); - ring->start = (void *)(base + msm_ring->offset); - ring->end = &(ring->start[size / 4]); - ring->cur = ring->start; - - ring->size = size; - ring->flags = flags; - - if (flags & _FD_RINGBUFFER_OBJECT) { - if (fd_dev_64b(&msm_ring->u.pipe->dev_id)) { - ring->funcs = &ring_funcs_obj_64; - } else { - ring->funcs = &ring_funcs_obj_32; - } - } else { - if (fd_dev_64b(&msm_ring->u.submit->pipe->dev_id)) { - ring->funcs = &ring_funcs_nonobj_64; - } else { - ring->funcs = &ring_funcs_nonobj_32; - } - } - - // TODO initializing these could probably be conditional on flags - // since unneed for FD_RINGBUFFER_STAGING case.. - msm_ring->u.cmds = NULL; - msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; - - msm_ring->u.reloc_bos = NULL; - msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; - - return ring; + struct fd_ringbuffer *ring = &msm_ring->base; + + debug_assert(msm_ring->ring_bo); + + uint8_t *base = fd_bo_map(msm_ring->ring_bo); + ring->start = (void *)(base + msm_ring->offset); + ring->end = &(ring->start[size/4]); + ring->cur = ring->start; + + ring->size = size; + ring->flags = flags; + + ring->funcs = &ring_funcs; + + // TODO initializing these could probably be conditional on flags + // since unneed for FD_RINGBUFFER_STAGING case.. + msm_ring->u.cmds = NULL; + msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; + + msm_ring->u.reloc_bos = NULL; + msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; + + return ring; } struct fd_ringbuffer * msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size) { - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring)); - - /* Lock access to the msm_pipe->suballoc_* since ringbuffer object allocation - * can happen both on the frontend (most CSOs) and the driver thread (a6xx - * cached tex state, for example) - */ - static simple_mtx_t suballoc_lock = _SIMPLE_MTX_INITIALIZER_NP; - simple_mtx_lock(&suballoc_lock); - - /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */ - msm_ring->offset = align(msm_pipe->suballoc_offset, 64); - if (!msm_pipe->suballoc_bo || - msm_ring->offset + size > fd_bo_size(msm_pipe->suballoc_bo)) { - if (msm_pipe->suballoc_bo) - fd_bo_del(msm_pipe->suballoc_bo); - msm_pipe->suballoc_bo = - fd_bo_new_ring(pipe->dev, MAX2(SUBALLOC_SIZE, align(size, 4096))); - msm_ring->offset = 0; - } - - msm_ring->u.pipe = pipe; - msm_ring->ring_bo = fd_bo_ref(msm_pipe->suballoc_bo); - msm_ring->base.refcnt = 1; - - msm_pipe->suballoc_offset = msm_ring->offset + size; - - simple_mtx_unlock(&suballoc_lock); - - return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); + struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring)); + + msm_ring->u.pipe = pipe; + msm_ring->offset = 0; + msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, + DRM_FREEDRENO_GEM_GPUREADONLY); + msm_ring->base.refcnt = 1; + + return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); } diff --git a/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c b/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c index e3f317329..37a3dcb26 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c +++ b/lib/mesa/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c @@ -21,79 +21,118 @@ * IN THE SOFTWARE. */ -#include "compiler/nir/nir_builder.h" #include "ir3_nir.h" +#include "compiler/nir/nir_builder.h" /* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the * gather results, rather than before. As a result, it must be emulated with * direct texture calls. */ -static nir_ssa_def * -ir3_nir_lower_tg4_to_tex_instr(nir_builder *b, nir_instr *instr, void *data) +static bool +lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx) { - nir_tex_instr *tg4 = nir_instr_as_tex(instr); - static const int offsets[3][2] = {{0, 1}, {1, 1}, {1, 0}}; - - nir_ssa_def *results[4]; - int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset); - for (int i = 0; i < 4; i++) { - int num_srcs = tg4->num_srcs + 1 /* lod */; - if (offset_index < 0 && i < 3) - num_srcs++; - - nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs); - tex->op = nir_texop_txl; - tex->sampler_dim = tg4->sampler_dim; - tex->coord_components = tg4->coord_components; - tex->is_array = tg4->is_array; - tex->is_shadow = tg4->is_shadow; - tex->is_new_style_shadow = tg4->is_new_style_shadow; - tex->texture_index = tg4->texture_index; - tex->sampler_index = tg4->sampler_index; - tex->dest_type = tg4->dest_type; - - for (int j = 0; j < tg4->num_srcs; j++) { - nir_src_copy(&tex->src[j].src, &tg4->src[j].src); - tex->src[j].src_type = tg4->src[j].src_type; - } - if (i != 3) { - nir_ssa_def *offset = nir_vec2(b, nir_imm_int(b, offsets[i][0]), - nir_imm_int(b, offsets[i][1])); - if (offset_index < 0) { - tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset); - tex->src[tg4->num_srcs].src_type = nir_tex_src_offset; - } else { - assert(nir_tex_instr_src_size(tex, offset_index) == 2); - nir_ssa_def *orig = - nir_ssa_for_src(b, tex->src[offset_index].src, 2); - tex->src[offset_index].src = - nir_src_for_ssa(nir_iadd(b, orig, offset)); - } - } - tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0)); - tex->src[num_srcs - 1].src_type = nir_tex_src_lod; - - nir_ssa_dest_init(&tex->instr, &tex->dest, nir_tex_instr_dest_size(tex), - 32, NULL); - nir_builder_instr_insert(b, &tex->instr); - - results[i] = nir_channel(b, &tex->dest.ssa, tg4->component); - } - - return nir_vec(b, results, 4); + bool progress = false; + + static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} }; + + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tg4 = (nir_tex_instr *)instr; + + if (tg4->op != nir_texop_tg4) + continue; + + b->cursor = nir_before_instr(&tg4->instr); + + nir_ssa_def *results[4]; + int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset); + for (int i = 0; i < 4; i++) { + int num_srcs = tg4->num_srcs + 1 /* lod */; + if (offset_index < 0 && i < 3) + num_srcs++; + + nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs); + tex->op = nir_texop_txl; + tex->sampler_dim = tg4->sampler_dim; + tex->coord_components = tg4->coord_components; + tex->is_array = tg4->is_array; + tex->is_shadow = tg4->is_shadow; + tex->is_new_style_shadow = tg4->is_new_style_shadow; + tex->texture_index = tg4->texture_index; + tex->sampler_index = tg4->sampler_index; + tex->dest_type = tg4->dest_type; + + for (int j = 0; j < tg4->num_srcs; j++) { + nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex); + tex->src[j].src_type = tg4->src[j].src_type; + } + if (i != 3) { + nir_ssa_def *offset = + nir_vec2(b, nir_imm_int(b, offsets[i][0]), + nir_imm_int(b, offsets[i][1])); + if (offset_index < 0) { + tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset); + tex->src[tg4->num_srcs].src_type = nir_tex_src_offset; + } else { + assert(nir_tex_instr_src_size(tex, offset_index) == 2); + nir_ssa_def *orig = nir_ssa_for_src( + b, tex->src[offset_index].src, 2); + tex->src[offset_index].src = + nir_src_for_ssa(nir_iadd(b, orig, offset)); + } + } + tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0)); + tex->src[num_srcs - 1].src_type = nir_tex_src_lod; + + nir_ssa_dest_init(&tex->instr, &tex->dest, + nir_tex_instr_dest_size(tex), 32, NULL); + nir_builder_instr_insert(b, &tex->instr); + + results[i] = nir_channel(b, &tex->dest.ssa, tg4->component); + } + + nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]); + nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result)); + + nir_instr_remove(&tg4->instr); + + progress = true; + } + + return progress; } static bool -ir3_nir_lower_tg4_to_tex_filter(const nir_instr *instr, const void *data) +lower_tg4_func(nir_function_impl *impl) { - return (instr->type == nir_instr_type_tex && - nir_instr_as_tex(instr)->op == nir_texop_tg4); + void *mem_ctx = ralloc_parent(impl); + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block_safe(block, impl) { + progress |= lower_tg4(block, &b, mem_ctx); + } + + if (progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return progress; } bool ir3_nir_lower_tg4_to_tex(nir_shader *shader) { - return nir_shader_lower_instructions(shader, ir3_nir_lower_tg4_to_tex_filter, - ir3_nir_lower_tg4_to_tex_instr, NULL); + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) + progress |= lower_tg4_func(function->impl); + } + + return progress; } |