diff options
author | Mark Kettenis <kettenis@cvs.openbsd.org> | 2013-12-15 11:42:11 +0000 |
---|---|---|
committer | Mark Kettenis <kettenis@cvs.openbsd.org> | 2013-12-15 11:42:11 +0000 |
commit | 8c70ecbdfd54a068c4679801134340a61a67f792 (patch) | |
tree | 2de8530be81af6001aa97b98d7258f192cb0a8a9 | |
parent | 0bcc6a43ae8f7a3aed5dd41e156c202c02fdd006 (diff) |
Overhaul the pread and pwrite code to match what Linux does. Should fix a few
more cache coherency issues, hopefully reducing the number of artifacts
showing up the screen.
-rw-r--r-- | sys/dev/pci/drm/i915/i915_gem.c | 329 | ||||
-rw-r--r-- | sys/dev/pci/drm/i915/i915_trace.h | 14 |
2 files changed, 207 insertions, 136 deletions
diff --git a/sys/dev/pci/drm/i915/i915_gem.c b/sys/dev/pci/drm/i915/i915_gem.c index 74a31677e98..627988e9415 100644 --- a/sys/dev/pci/drm/i915/i915_gem.c +++ b/sys/dev/pci/drm/i915/i915_gem.c @@ -1,4 +1,4 @@ -/* $OpenBSD: i915_gem.c,v 1.59 2013/12/11 20:31:43 kettenis Exp $ */ +/* $OpenBSD: i915_gem.c,v 1.60 2013/12/15 11:42:10 kettenis Exp $ */ /* * Copyright (c) 2008-2009 Owain G. Ainsworth <oga@openbsd.org> * @@ -295,7 +295,84 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) obj->tiling_mode != I915_TILING_NONE; } -#ifdef notyet +#define offset_in_page(off) ((off) & PAGE_MASK) + +static void * +kmap(struct vm_page *pg) +{ + vaddr_t va; + +#if defined (__HAVE_PMAP_DIRECT) + va = pmap_map_direct(pg); +#else + va = uvm_km_valloc_wait(phys_map, PAGE_SIZE); + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE); + pmap_update(pmap_kernel()); +#endif + return (void *)va; +} + +static void +kunmap(void *addr) +{ + vaddr_t va = (vaddr_t)addr; + +#if defined (__HAVE_PMAP_DIRECT) + pmap_unmap_direct(va); +#else + pmap_kremove(va, PAGE_SIZE); + pmap_update(pmap_kernel()); + uvm_km_free_wakeup(phys_map, va, PAGE_SIZE); +#endif +} + +static inline void * +kmap_atomic(struct vm_page *pg) +{ + vaddr_t va; + +#if defined (__HAVE_PMAP_DIRECT) + va = pmap_map_direct(pg); +#else + extern vaddr_t pmap_tmpmap_pa(paddr_t); + va = pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pg)); +#endif + return (void *)va; +} + +static inline void +kunmap_atomic(void *addr) +{ +#if defined (__HAVE_PMAP_DIRECT) + pmap_unmap_direct((vaddr_t)addr); +#else + extern void pmap_tmpunmap_pa(void); + pmap_tmpunmap_pa(); +#endif +} + +static inline void +drm_clflush_virt_range(void *addr, size_t len) +{ + pmap_flush_cache((vaddr_t)addr, len); +} + +static inline unsigned long +__copy_to_user(void *to, const void *from, unsigned len) +{ + if (copyout(from, to, len)) + return len; + return 0; +} + +static inline unsigned long +__copy_to_user_inatomic(void *to, const void *from, unsigned len) +{ + if (copyout(from, to, len)) + return len; + return 0; +} + static inline int __copy_to_user_swizzled(char __user *cpu_vaddr, const char *gpu_vaddr, int gpu_offset, @@ -304,7 +381,7 @@ __copy_to_user_swizzled(char __user *cpu_vaddr, int ret, cpu_offset = 0; while (length > 0) { - int cacheline_end = ALIGN(gpu_offset + 1, 64); + int cacheline_end = roundup2(gpu_offset + 1, 64); int this_length = min(cacheline_end - gpu_offset, length); int swizzled_gpu_offset = gpu_offset ^ 64; @@ -322,6 +399,22 @@ __copy_to_user_swizzled(char __user *cpu_vaddr, return 0; } +static inline unsigned long +__copy_from_user(void *to, const void *from, unsigned len) +{ + if (copyin(from, to, len)) + return len; + return 0; +} + +static inline unsigned long +__copy_from_user_inatomic_nocache(void *to, const void *from, unsigned len) +{ + if (copyin(from, to, len)) + return len; + return 0; +} + static inline int __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, const char __user *cpu_vaddr, @@ -330,7 +423,7 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, int ret, cpu_offset = 0; while (length > 0) { - int cacheline_end = ALIGN(gpu_offset + 1, 64); + int cacheline_end = roundup2(gpu_offset + 1, 64); int this_length = min(cacheline_end - gpu_offset, length); int swizzled_gpu_offset = gpu_offset ^ 64; @@ -352,7 +445,7 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, * Flushes invalid cachelines before reading the target if * needs_clflush is set. */ static int -shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, +shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush) { @@ -374,6 +467,9 @@ shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, return ret ? -EFAULT : 0; } +#define round_up(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define round_down(x, y) (((x) / (y)) * (y)) + static void shmem_clflush_swizzled_range(char *addr, unsigned long length, bool swizzled) @@ -399,7 +495,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length, /* Only difference to the fast-path function is that this can handle bit17 * and uses non-atomic copy and kmap functions. */ static int -shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, +shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush) { @@ -420,7 +516,7 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, ret = __copy_to_user(user_data, vaddr + shmem_page_offset, page_length); - kunmap(page); + kunmap(vaddr); return ret ? - EFAULT : 0; } @@ -433,13 +529,11 @@ i915_gem_shmem_pread(struct drm_device *dev, { char __user *user_data; ssize_t remain; - loff_t offset; + off_t offset; int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; int hit_slowpath = 0; - int prefaulted = 0; int needs_clflush = 0; - struct scatterlist *sg; int i; user_data = (char __user *) (uintptr_t) args->data_ptr; @@ -469,8 +563,8 @@ i915_gem_shmem_pread(struct drm_device *dev, offset = args->offset; - for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { - struct page *page; + for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { + struct vm_page *page; if (i < offset >> PAGE_SHIFT) continue; @@ -488,9 +582,15 @@ i915_gem_shmem_pread(struct drm_device *dev, if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset; +#ifdef __linux__ page = sg_page(sg); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0; +#else + page = obj->pages[i]; + page_do_bit17_swizzling = obj_do_bit17_swizzling && + (VM_PAGE_TO_PHYS(page) & (1 << 17)) != 0; +#endif ret = shmem_pread_fast(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, @@ -499,8 +599,9 @@ i915_gem_shmem_pread(struct drm_device *dev, goto next_page; hit_slowpath = 1; - mutex_unlock(&dev->struct_mutex); + DRM_READUNLOCK(); +#ifdef __linux__ if (!prefaulted) { ret = fault_in_multipages_writeable(user_data, remain); /* Userspace is tricking us, but we've already clobbered @@ -510,15 +611,18 @@ i915_gem_shmem_pread(struct drm_device *dev, (void)ret; prefaulted = 1; } +#endif ret = shmem_pread_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, needs_clflush); - mutex_lock(&dev->struct_mutex); + DRM_READLOCK(); next_page: +#ifdef __linux__ mark_page_accessed(page); +#endif if (ret) goto out; @@ -539,7 +643,6 @@ out: return ret; } -#endif /* notyet */ /** * Reads data from the object referenced by handle. @@ -550,14 +653,12 @@ int i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct inteldrm_softc *dev_priv = dev->dev_private; - struct drm_i915_gem_pread *args = data; - struct drm_i915_gem_object *obj; - char *vaddr; - bus_space_handle_t bsh; - bus_size_t bsize; - voff_t offset; - int ret; + struct drm_i915_gem_pread *args = data; + struct drm_i915_gem_object *obj; + int ret = 0; + + if (args->size == 0) + return 0; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (obj == NULL) @@ -565,42 +666,17 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, DRM_READLOCK(); drm_hold_object(&obj->base); - /* - * Bounds check source. - */ - if (args->offset > obj->base.size || args->size > obj->base.size || - args->offset + args->size > obj->base.size) { + /* Bounds check source. */ + if (args->offset > obj->base.size || + args->size > obj->base.size - args->offset) { ret = -EINVAL; goto out; } - ret = i915_gem_object_pin(obj, 0, true, true); - if (ret) - goto out; - - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) - goto unpin; + trace_i915_gem_object_pread(obj, args->offset, args->size); - offset = obj->gtt_offset + args->offset; - bsize = round_page(offset + args->size) - trunc_page(offset); + ret = i915_gem_shmem_pread(dev, obj, args, file); - if ((ret = agp_map_subregion(dev_priv->agph, - trunc_page(offset), bsize, &bsh)) != 0) - goto unpin; - vaddr = bus_space_vaddr(dev->bst, bsh); - if (vaddr == NULL) { - ret = -EFAULT; - goto unmap; - } - - ret = -copyout(vaddr + (offset & PAGE_MASK), - (char *)(uintptr_t)args->data_ptr, args->size); - -unmap: - agp_unmap_subregion(dev_priv->agph, bsh, bsize); -unpin: - i915_gem_object_unpin(obj); out: drm_unhold_and_unref(&obj->base); DRM_READUNLOCK(); @@ -608,7 +684,7 @@ out: return ret; } -#ifdef notyet +#ifdef __linux__ /* This is the fast write path which cannot handle * page faults in the source data */ @@ -631,6 +707,7 @@ fast_user_write(struct io_mapping *mapping, io_mapping_unmap_atomic(vaddr_atomic); return unwritten; } +#endif /** * This is the fast pwrite path, where we copy the data directly from the @@ -643,10 +720,11 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_file *file) { drm_i915_private_t *dev_priv = dev->dev_private; - ssize_t remain; - loff_t offset, page_base; - char __user *user_data; - int page_offset, page_length, ret; + bus_space_handle_t bsh; + bus_addr_t offset; + bus_size_t size; + char *vaddr; + int ret; ret = i915_gem_object_pin(obj, 0, true, true); if (ret) @@ -660,38 +738,23 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = (char __user *) (uintptr_t) args->data_ptr; - remain = args->size; - offset = obj->gtt_offset + args->offset; + size = round_page(offset + args->size) - trunc_page(offset); - while (remain > 0) { - /* Operation in this page - * - * page_base = page offset within aperture - * page_offset = offset within page - * page_length = bytes to copy for this page - */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; + if ((ret = agp_map_subregion(dev_priv->agph, + trunc_page(offset), size, &bsh)) != 0) + goto out_unpin; + vaddr = bus_space_vaddr(dev_priv->bst, bsh); + if (vaddr == NULL) { + ret = -EFAULT; + goto out_unmap; + } - /* If we get a fault while copying data, then (presumably) our - * source page isn't available. Return the error and we'll - * retry in the slow path. - */ - if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, - page_offset, user_data, page_length)) { - ret = -EFAULT; - goto out_unpin; - } + ret = -copyin((char *)(uintptr_t)args->data_ptr, + vaddr + (offset & PAGE_MASK), args->size); - remain -= page_length; - user_data += page_length; - offset += page_length; - } +out_unmap: + agp_unmap_subregion(dev_priv->agph, bsh, size); out_unpin: i915_gem_object_unpin(obj); @@ -704,7 +767,7 @@ out: * needs_clflush_before is set and flushes out any written cachelines after * writing if needs_clflush is set. */ static int -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, +shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush_before, @@ -734,7 +797,7 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, /* Only difference to the fast-path function is that this can handle bit17 * and uses non-atomic copy and kmap functions. */ static int -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, +shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush_before, @@ -760,7 +823,7 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, shmem_clflush_swizzled_range(vaddr + shmem_page_offset, page_length, page_do_bit17_swizzling); - kunmap(page); + kunmap(vaddr); return ret ? -EFAULT : 0; } @@ -772,7 +835,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_file *file) { ssize_t remain; - loff_t offset; + off_t offset; char __user *user_data; int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; @@ -780,7 +843,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int needs_clflush_after = 0; int needs_clflush_before = 0; int i; - struct scatterlist *sg; user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -815,8 +877,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev, offset = args->offset; obj->dirty = 1; - for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { - struct page *page; + for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { + struct vm_page *page; int partial_cacheline_write; if (i < offset >> PAGE_SHIFT) @@ -841,11 +903,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * overcomplicate things and flush the entire patch. */ partial_cacheline_write = needs_clflush_before && ((shmem_page_offset | page_length) - & (boot_cpu_data.x86_clflush_size - 1)); + & (curcpu()->ci_cflushsz - 1)); +#ifdef __linux__ page = sg_page(sg); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0; +#else + page = obj->pages[i]; + page_do_bit17_swizzling = obj_do_bit17_swizzling && + (VM_PAGE_TO_PHYS(page) & (1 << 17)) != 0; +#endif ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, @@ -855,17 +923,21 @@ i915_gem_shmem_pwrite(struct drm_device *dev, goto next_page; hit_slowpath = 1; - mutex_unlock(&dev->struct_mutex); + DRM_READUNLOCK(); ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, needs_clflush_after); - mutex_lock(&dev->struct_mutex); + DRM_READLOCK(); next_page: +#ifdef __linux__ set_page_dirty(page); mark_page_accessed(page); +#else + atomic_clearbits_int(&page->pg_flags, PG_CLEAN); +#endif if (ret) goto out; @@ -895,7 +967,6 @@ out: return ret; } -#endif /* notyet */ /** * Writes data to the object referenced by handle. @@ -906,14 +977,12 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct inteldrm_softc *dev_priv = dev->dev_private; - struct drm_i915_gem_pwrite *args = data; - struct drm_i915_gem_object *obj; - char *vaddr; - bus_space_handle_t bsh; - bus_size_t bsize; - off_t offset; - int ret = 0; + struct drm_i915_gem_pwrite *args = data; + struct drm_i915_gem_object *obj; + int ret; + + if (args->size == 0) + return 0; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (obj == NULL) @@ -922,48 +991,38 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, drm_hold_object(&obj->base); /* Bounds check destination. */ - if (args->offset > obj->base.size || args->size > obj->base.size || - args->offset + args->size > obj->base.size) { + if (args->offset > obj->base.size || + args->size > obj->base.size - args->offset) { ret = -EINVAL; goto out; } + trace_i915_gem_object_pwrite(obj, args->offset, args->size); + + ret = -EFAULT; + /* We can only do the GTT pwrite on untiled buffers, as otherwise + * it would end up going through the fenced access, and we'll get + * different detiling behavior between reading and writing. + * pread/pwrite currently are reading and writing from the CPU + * perspective, requiring manual detiling by the client. + */ if (obj->phys_obj) { ret = i915_gem_phys_pwrite(dev, obj, args, file); goto out; } - ret = i915_gem_object_pin(obj, 0, true, true); - if (ret) - goto out; - - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - goto unpin; - - ret = i915_gem_object_put_fence(obj); - if (ret) - goto unpin; - - offset = obj->gtt_offset + args->offset; - bsize = round_page(offset + args->size) - trunc_page(offset); - - if ((ret = agp_map_subregion(dev_priv->agph, - trunc_page(offset), bsize, &bsh)) != 0) - goto unpin; - vaddr = bus_space_vaddr(dev_priv->bst, bsh); - if (vaddr == NULL) { - ret = -EFAULT; - goto unmap; + if (obj->cache_level == I915_CACHE_NONE && + obj->tiling_mode == I915_TILING_NONE && + obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); + /* Note that the gtt paths might fail with non-page-backed user + * pointers (e.g. gtt mappings when moving data between + * textures). Fallback to the shmem path in that case. */ } - ret = -copyin((char *)(uintptr_t)args->data_ptr, - vaddr + (offset & PAGE_MASK), args->size); + if (ret == -EFAULT || ret == -ENOSPC) + ret = i915_gem_shmem_pwrite(dev, obj, args, file); -unmap: - agp_unmap_subregion(dev_priv->agph, bsh, bsize); -unpin: - i915_gem_object_unpin(obj); out: drm_unhold_and_unref(&obj->base); DRM_READUNLOCK(); diff --git a/sys/dev/pci/drm/i915/i915_trace.h b/sys/dev/pci/drm/i915/i915_trace.h index 15503b4e632..67cb00404f9 100644 --- a/sys/dev/pci/drm/i915/i915_trace.h +++ b/sys/dev/pci/drm/i915/i915_trace.h @@ -1,4 +1,4 @@ -/* $OpenBSD: i915_trace.h,v 1.9 2013/12/01 11:47:13 kettenis Exp $ */ +/* $OpenBSD: i915_trace.h,v 1.10 2013/12/15 11:42:10 kettenis Exp $ */ /* * Copyright (c) 2013 Mark Kettenis <kettenis@openbsd.org> * @@ -47,6 +47,18 @@ trace_i915_gem_object_change_domain(struct drm_i915_gem_object *obj, } static inline void +trace_i915_gem_object_pwrite(struct drm_i915_gem_object *obj, + u32 offset, u32 len) +{ +} + +static inline void +trace_i915_gem_object_pread(struct drm_i915_gem_object *obj, + u32 offset, u32 len) +{ +} + +static inline void trace_i915_gem_object_bind(struct drm_i915_gem_object *obj, bool mappable) { } |