From 0b6361ab565dfaf98b3a52bb33bc52b3912cd495 Mon Sep 17 00:00:00 2001 From: Owain Ainsworth Date: Thu, 8 Apr 2010 11:45:55 +0000 Subject: DRM memory management for inteldrm(4) using the Graphics Execution Manager api. This is currently disabled by default under ifdef INTELDRM_GEM (option INTELDRM_GEM in a kernel config or a patch to i915_drv.h needed to enable), mostly because the intel X driver currently in tree does not always play well with GEM and needs to be switched to UXA accelmethod (which is the only option on later drivers). While based on the intel drm code in the linux kernel this has come cleanups and some obvious behaviour changes: 1) mmap through the GTT is entirely coherent with the gpu cache, removing mappings whenever the cache dirties so you can not race with userland to write to memory at the same time as the gpu. 2) pread/pwrite access is tiling-correct, so userland does not need to tile manually (mesa has already been fixed for this a while ago). The straw that broke the camels back here was the bit17 swizzling stuff on some mobile gpus meansing that while userland did the swizzle itself, the kernel had to do *extra* swizzling, this was frankly retarded so the option was dropped. 3) there is no option to map via the cpu from userland, again due to coherency issues. 4) additional integer overflow checking in some areas. 5) we only support the newer EXECBUFFER2 command submission ioctl. the old one is not supported at all (libdrm was fixed WRT over a week ago). now the TODOs: 1) the locking is funky and is only correct due to biglock. it does however work due to that. This will be fixed in tree, the design in formulating in my head as I type. 2) there are currently no memory limits on drm memory, this needs to be changed. 3) we really need PAT support for the machines where MTRRs are useless, else drm uses quite a lot of cpu (this is a bug that continues from the older code though, nothing new). 4) gpu resetting support on other than 965 is not written yet. 5) currently a lot of the code is in inteldrm, when memory management comes to other chipset common code will be factored out into the drm midlayer. Tested on: 855 (x40), GM965 and 915 by me. 945 by armani@ and jkmeuser@, gm45 by armani@ and marco@. More testing is needed before I enable this by default though. Others also provided testing by what they tested escapes me right now. In order to test this enable INTELDRM_GEM in a kernel and add the following line to the driver section in xorg.conf (I am working on a patch to autodetect the X stuff): Option "AccelMethod" "UXA" --- sys/dev/pci/drm/drmP.h | 83 +- sys/dev/pci/drm/drm_drv.c | 465 ++++- sys/dev/pci/drm/i915_dma.c | 213 +- sys/dev/pci/drm/i915_drv.c | 4663 +++++++++++++++++++++++++++++++++++++++++++- sys/dev/pci/drm/i915_drv.h | 272 ++- sys/dev/pci/drm/i915_irq.c | 115 +- 6 files changed, 5520 insertions(+), 291 deletions(-) diff --git a/sys/dev/pci/drm/drmP.h b/sys/dev/pci/drm/drmP.h index cedc82d2e0e..5d8f6046216 100644 --- a/sys/dev/pci/drm/drmP.h +++ b/sys/dev/pci/drm/drmP.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -99,7 +100,7 @@ #define DRM_SUSER(p) (suser(p, p->p_acflag) == 0) #define DRM_MTRR_WC MDF_WRITECOMBINE -#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) +#define PAGE_ALIGN(addr) (((addr) + PAGE_MASK) & ~PAGE_MASK) extern struct cfdriver drm_cd; @@ -215,14 +216,17 @@ struct drm_buf_entry { }; struct drm_file { - SPLAY_ENTRY(drm_file) link; - int authenticated; - unsigned long ioctl_count; - dev_t kdev; - drm_magic_t magic; - int flags; - int master; - int minor; + SPLAY_HEAD(drm_obj_tree, drm_handle) obj_tree; + struct mutex table_lock; + SPLAY_ENTRY(drm_file) link; + int authenticated; + unsigned long ioctl_count; + dev_t kdev; + drm_magic_t magic; + int flags; + int master; + int minor; + u_int obj_id; /*next gem id*/ }; struct drm_lock_data { @@ -349,6 +353,28 @@ struct drm_ati_pcigart_info { int gart_reg_if; }; +struct drm_obj { + struct uvm_object uobj; + SPLAY_ENTRY(drm_obj) entry; + struct drm_device *dev; + struct uvm_object *uao; + + size_t size; + int name; + int handlecount; + uint32_t read_domains; + uint32_t write_domain; + + uint32_t pending_read_domains; + uint32_t pending_write_domain; +}; + +struct drm_handle { + SPLAY_ENTRY(drm_handle) entry; + struct drm_obj *obj; + uint32_t handle; +}; + struct drm_driver_info { int (*firstopen)(struct drm_device *); int (*open)(struct drm_device *, struct drm_file *); @@ -367,7 +393,16 @@ struct drm_driver_info { u_int32_t (*get_vblank_counter)(struct drm_device *, int); int (*enable_vblank)(struct drm_device *, int); void (*disable_vblank)(struct drm_device *, int); - + /* + * driver-specific constructor for gem objects to set up private data. + * returns 0 on success. + */ + int (*gem_init_object)(struct drm_obj *); + void (*gem_free_object)(struct drm_obj *); + int (*gem_fault)(struct drm_obj *, struct uvm_faultinfo *, off_t, + vaddr_t, vm_page_t *, int, int, vm_prot_t, int); + + size_t gem_size; size_t buf_priv_size; size_t file_priv_size; @@ -385,6 +420,7 @@ struct drm_driver_info { #define DRIVER_PCI_DMA 0x10 #define DRIVER_SG 0x20 #define DRIVER_IRQ 0x40 +#define DRIVER_GEM 0x80 u_int flags; }; @@ -438,6 +474,21 @@ struct drm_device { atomic_t *ctx_bitmap; void *dev_private; struct drm_local_map *agp_buffer_map; + + /* GEM info */ + struct mutex obj_name_lock; + atomic_t obj_count; + u_int obj_name; + atomic_t obj_memory; + atomic_t pin_count; + atomic_t pin_memory; + atomic_t gtt_count; + atomic_t gtt_memory; + uint32_t gtt_total; + uint32_t invalidate_domains; + uint32_t flush_domains; + SPLAY_HEAD(drm_name_tree, drm_obj) name_tree; + struct pool objpl; }; struct drm_attach_args { @@ -588,5 +639,17 @@ int drm_agp_bind_ioctl(struct drm_device *, void *, struct drm_file *); int drm_sg_alloc_ioctl(struct drm_device *, void *, struct drm_file *); int drm_sg_free(struct drm_device *, void *, struct drm_file *); +struct drm_obj *drm_gem_object_alloc(struct drm_device *, size_t); +void drm_gem_object_reference(struct drm_obj *); +void drm_gem_object_unreference(struct drm_obj *); +int drm_handle_create(struct drm_file *, struct drm_obj *, int *); +struct drm_obj *drm_gem_object_lookup(struct drm_device *, + struct drm_file *, int ); +int drm_gem_close_ioctl(struct drm_device *, void *, struct drm_file *); +int drm_gem_flink_ioctl(struct drm_device *, void *, struct drm_file *); +int drm_gem_open_ioctl(struct drm_device *, void *, struct drm_file *); +int drm_gem_load_uao(bus_dma_tag_t, bus_dmamap_t, struct uvm_object *, + bus_size_t, int, bus_dma_segment_t **); + #endif /* __KERNEL__ */ #endif /* _DRM_P_H_ */ diff --git a/sys/dev/pci/drm/drm_drv.c b/sys/dev/pci/drm/drm_drv.c index 88a5e1daadf..4dfd6895ec6 100644 --- a/sys/dev/pci/drm/drm_drv.c +++ b/sys/dev/pci/drm/drm_drv.c @@ -1,4 +1,6 @@ /*- + * Copyright 2007-2009 Owain G. Ainsworth + * Copyright © 2008 Intel Corporation * Copyright 2003 Eric Anholt * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. @@ -27,6 +29,8 @@ * Rickard E. (Rik) Faith * Daryll Strauss * Gareth Hughes + * Eric Anholt + * Owain Ainsworth * */ @@ -35,7 +39,11 @@ * open/close, and ioctl dispatch. */ +#include #include +#include +#include + #include /* for TIOCSGRP */ #include "drmP.h" @@ -64,6 +72,21 @@ int drm_authmagic(struct drm_device *, void *, struct drm_file *); int drm_file_cmp(struct drm_file *, struct drm_file *); SPLAY_PROTOTYPE(drm_file_tree, drm_file, link, drm_file_cmp); +/* functions used by the per-open handle code to grab references to object */ +void drm_handle_ref(struct drm_obj *); +void drm_handle_unref(struct drm_obj *); + +int drm_handle_cmp(struct drm_handle *, struct drm_handle *); +int drm_name_cmp(struct drm_obj *, struct drm_obj *); +void drm_unref(struct uvm_object *); +void drm_ref(struct uvm_object *); +int drm_fault(struct uvm_faultinfo *, vaddr_t, vm_page_t *, int, int, + vm_fault_t, vm_prot_t, int); +boolean_t drm_flush(struct uvm_object *, voff_t, voff_t, int); + +SPLAY_PROTOTYPE(drm_obj_tree, drm_handle, entry, drm_handle_cmp); +SPLAY_PROTOTYPE(drm_name_tree, drm_obj, entry, drm_name_cmp); + /* * attach drm to a pci-based driver. * @@ -180,6 +203,16 @@ drm_attach(struct device *parent, struct device *self, void *aux) printf(": couldn't allocate memory for context bitmap.\n"); goto error; } + + if (dev->driver->flags & DRIVER_GEM) { + mtx_init(&dev->obj_name_lock, IPL_NONE); + SPLAY_INIT(&dev->name_tree); + KASSERT(dev->driver->gem_size >= sizeof(struct drm_obj)); + /* XXX unique name */ + pool_init(&dev->objpl, dev->driver->gem_size, 0, 0, 0, + "drmobjpl", &pool_allocator_nointr); + } + printf("\n"); return; @@ -386,6 +419,11 @@ drmopen(dev_t kdev, int flags, int fmt, struct proc *p) /* for compatibility root is always authenticated */ file_priv->authenticated = DRM_SUSER(p); + if (dev->driver->flags & DRIVER_GEM) { + SPLAY_INIT(&file_priv->obj_tree); + mtx_init(&file_priv->table_lock, IPL_NONE); + } + if (dev->driver->open) { ret = dev->driver->open(dev, file_priv); if (ret != 0) { @@ -485,9 +523,20 @@ drmclose(dev_t kdev, int flags, int fmt, struct proc *p) !dev->driver->reclaim_buffers_locked) drm_reclaim_buffers(dev, file_priv); + DRM_LOCK(); + if (dev->driver->flags & DRIVER_GEM) { + struct drm_handle *han; + mtx_enter(&file_priv->table_lock); + while ((han = SPLAY_ROOT(&file_priv->obj_tree)) != NULL) { + SPLAY_REMOVE(drm_obj_tree, &file_priv->obj_tree, han); + drm_handle_unref(han->obj); + drm_free(han); + } + mtx_leave(&file_priv->table_lock); + } + dev->buf_pgid = 0; - DRM_LOCK(); SPLAY_REMOVE(drm_file_tree, &dev->files, file_priv); drm_free(file_priv); @@ -550,6 +599,8 @@ drmioctl(dev_t kdev, u_long cmd, caddr_t data, int flags, return (drm_wait_vblank(dev, data, file_priv)); case DRM_IOCTL_MODESET_CTL: return (drm_modeset_ctl(dev, data, file_priv)); + case DRM_IOCTL_GEM_CLOSE: + return (drm_gem_close_ioctl(dev, data, file_priv)); /* removed */ case DRM_IOCTL_GET_MAP: @@ -596,6 +647,11 @@ drmioctl(dev_t kdev, u_long cmd, caddr_t data, int flags, return (drm_dma(dev, data, file_priv)); case DRM_IOCTL_AGP_INFO: return (drm_agp_info_ioctl(dev, data, file_priv)); + case DRM_IOCTL_GEM_FLINK: + return (drm_gem_flink_ioctl(dev, data, file_priv)); + case DRM_IOCTL_GEM_OPEN: + return (drm_gem_open_ioctl(dev, data, file_priv)); + } } @@ -936,7 +992,6 @@ drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) } DRM_DEBUG("%u\n", auth->magic); - return (0); } @@ -968,3 +1023,409 @@ drm_authmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) return (ret); } + +struct uvm_pagerops drm_pgops = { + NULL, + drm_ref, + drm_unref, + drm_fault, + drm_flush, +}; + +void +drm_ref(struct uvm_object *uobj) +{ + simple_lock(&uobj->vmobjlock); + uobj->uo_refs++; + simple_unlock(&uobj->vmobjlock); +} + +void +drm_unref(struct uvm_object *uobj) +{ + struct drm_obj *obj = (struct drm_obj *)uobj; + struct drm_device *dev = obj->dev; + + simple_lock(&uobj->vmobjlock); + if (--uobj->uo_refs > 0) { + simple_unlock(&uobj->vmobjlock); + return; + } + + if (dev->driver->gem_free_object != NULL) + dev->driver->gem_free_object(obj); + + uao_detach(obj->uao); + + atomic_dec(&dev->obj_count); + atomic_sub(obj->size, &dev->obj_memory); + simple_unlock(&uobj->vmobjlock); + pool_put(&dev->objpl, obj); +} + + +boolean_t +drm_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) +{ + return (TRUE); +} + + +int +drm_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, vm_page_t *pps, + int npages, int centeridx, vm_fault_t fault_type, + vm_prot_t access_type, int flags) +{ + struct vm_map_entry *entry = ufi->entry; + struct uvm_object *uobj = entry->object.uvm_obj; + struct drm_obj *obj = (struct drm_obj *)uobj; + struct drm_device *dev = obj->dev; + int ret; + UVMHIST_FUNC("udv_fault"); UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist," flags=%ld", flags,0,0,0); + + /* + * we do not allow device mappings to be mapped copy-on-write + * so we kill any attempt to do so here. + */ + + if (UVM_ET_ISCOPYONWRITE(entry)) { + UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%lx)", + entry->etype, 0,0,0); + uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL); + return(VM_PAGER_ERROR); + } + + /* Call down into driver to do the magic */ + ret = dev->driver->gem_fault(obj, ufi, entry->offset + (vaddr - + entry->start), vaddr, pps, npages, centeridx, + access_type, flags); + + uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL); + pmap_update(ufi->orig_map->pmap); + if (ret != VM_PAGER_OK) + uvm_wait("drm_fault"); + return (ret); +} + +/* + * Code to support memory managers based on the GEM (Graphics + * Execution Manager) api. + */ +struct drm_obj * +drm_gem_object_alloc(struct drm_device *dev, size_t size) +{ + struct drm_obj *obj; + + KASSERT((size & (PAGE_SIZE -1)) == 0); + + if ((obj = pool_get(&dev->objpl, PR_WAITOK | PR_ZERO)) == NULL) + return (NULL); + + obj->dev = dev; + + /* uao create can't fail in the 0 case, it just sleeps */ + obj->uao = uao_create(size, 0); + obj->size = size; + simple_lock_init(&obj->uobj.vmobjlock); + obj->uobj.pgops = &drm_pgops; + RB_INIT(&obj->uobj.memt); + obj->uobj.uo_npages = 0; + obj->uobj.uo_refs = 1; + + if (dev->driver->gem_init_object != NULL && + dev->driver->gem_init_object(obj) != 0) { + uao_detach(obj->uao); + pool_put(&dev->objpl, obj); + return (NULL); + } + atomic_inc(&dev->obj_count); + atomic_add(obj->size, &dev->obj_memory); + return (obj); +} + +int +drm_handle_create(struct drm_file *file_priv, struct drm_obj *obj, + int *handlep) +{ + struct drm_handle *han; + + if ((han = drm_calloc(1, sizeof(*han))) == NULL) + return (ENOMEM); + + han->obj = obj; + mtx_enter(&file_priv->table_lock); +again: + *handlep = han->handle = ++file_priv->obj_id; + /* + * Make sure we have no duplicates. this'll hurt once we wrap, 0 is + * reserved. + */ + if (han->handle == 0 || SPLAY_INSERT(drm_obj_tree, + &file_priv->obj_tree, han)) + goto again; + mtx_leave(&file_priv->table_lock); + + drm_handle_ref(obj); + return (0); +} + +struct drm_obj * +drm_gem_object_lookup(struct drm_device *dev, struct drm_file *file_priv, + int handle) +{ + struct drm_obj *obj; + struct drm_handle *han, search; + + search.handle = handle; + + mtx_enter(&file_priv->table_lock); + han = SPLAY_FIND(drm_obj_tree, &file_priv->obj_tree, &search); + if (han == NULL) { + mtx_leave(&file_priv->table_lock); + return (NULL); + } + + obj = han->obj; + drm_gem_object_reference(obj); + mtx_leave(&file_priv->table_lock); + + return (obj); +} + +int +drm_gem_close_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_gem_close *args = data; + struct drm_handle *han, find; + struct drm_obj *obj; + + if ((dev->driver->flags & DRIVER_GEM) == 0) + return (ENODEV); + + find.handle = args->handle; + mtx_enter(&file_priv->table_lock); + han = SPLAY_FIND(drm_obj_tree, &file_priv->obj_tree, &find); + if (han == NULL) { + mtx_leave(&file_priv->table_lock); + return (EINVAL); + } + + obj = han->obj; + SPLAY_REMOVE(drm_obj_tree, &file_priv->obj_tree, han); + mtx_leave(&file_priv->table_lock); + + drm_free(han); + + DRM_LOCK(); + drm_handle_unref(obj); + DRM_UNLOCK(); + + return (0); +} + +int +drm_gem_flink_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_gem_flink *args = data; + struct drm_obj *obj; + + if (!dev->driver->flags & DRIVER_GEM) + return (ENODEV); + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + + mtx_enter(&dev->obj_name_lock); + if (!obj->name) { +again: + obj->name = ++dev->obj_name; + /* 0 is reserved, make sure we don't clash. */ + if (obj->name == 0 || SPLAY_INSERT(drm_name_tree, + &dev->name_tree, obj)) + goto again; + /* name holds a reference to the object */ + drm_gem_object_reference(obj); + } + mtx_leave(&dev->obj_name_lock); + + args->name = (uint64_t)obj->name; + + DRM_LOCK(); + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + + return (0); +} + +int +drm_gem_open_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_gem_open *args = data; + struct drm_obj *obj, search; + int ret, handle; + + if (!dev->driver->flags & DRIVER_GEM) + return (ENODEV); + + search.name = args->name; + mtx_enter(&dev->obj_name_lock); + obj = SPLAY_FIND(drm_name_tree, &dev->name_tree, &search); + if (obj != NULL) + drm_gem_object_reference(obj); + mtx_leave(&dev->obj_name_lock); + if (obj == NULL) + return (ENOENT); + + ret = drm_handle_create(file_priv, obj, &handle); + /* handle has a reference, drop ours. */ + DRM_LOCK(); + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + if (ret) { + return (ret); + } + + args->handle = handle; + args->size = obj->size; + + return (0); +} + +void +drm_gem_object_reference(struct drm_obj *obj) +{ + drm_ref(&obj->uobj); +} + +void +drm_gem_object_unreference(struct drm_obj *obj) +{ + drm_unref(&obj->uobj); +} + + +/* + * grab a reference for a per-open handle. + * The object contains a handlecount too because if all handles disappear we + * need to also remove the global name (names initially are per open unless the + * flink ioctl is called. + */ +void +drm_handle_ref(struct drm_obj *obj) +{ + obj->handlecount++; + drm_gem_object_reference(obj); +} + +/* + * Remove the reference owned by a per-open handle. If we're the last one, + * remove the reference from flink, too. + */ +void +drm_handle_unref(struct drm_obj *obj) +{ + /* do this first in case this is the last reference */ + if (--obj->handlecount == 0) { + struct drm_device *dev = obj->dev; + + mtx_enter(&dev->obj_name_lock); + if (obj->name) { + SPLAY_REMOVE(drm_name_tree, &dev->name_tree, obj); + obj->name = 0; + mtx_leave(&dev->obj_name_lock); + /* name held a reference to object */ + drm_gem_object_unreference(obj); + } else { + mtx_leave(&dev->obj_name_lock); + } + } + drm_gem_object_unreference(obj); +} + +/* + * Helper function to load a uvm anonymous object into a dmamap, to be used + * for binding to a translation-table style sg mechanism (e.g. agp, or intel + * gtt). + * + * For now we ignore maxsegsz. + */ +int +drm_gem_load_uao(bus_dma_tag_t dmat, bus_dmamap_t map, struct uvm_object *uao, + bus_size_t size, int flags, bus_dma_segment_t **segp) +{ + bus_dma_segment_t *segs; + struct vm_page *pg; + struct pglist plist; + u_long npages = size >> PAGE_SHIFT, i = 0; + int ret; + + TAILQ_INIT(&plist); + + /* + * This is really quite ugly, but nothing else would need + * bus_dmamap_load_uao() yet. + */ + segs = malloc(npages * sizeof(*segs), M_DRM, M_WAITOK | M_ZERO); + if (segs == NULL) + return (ENOMEM); + + /* This may sleep, no choice in the matter */ + if (uvm_objwire(uao, 0, size, &plist) != 0) { + ret = ENOMEM; + goto free; + } + + TAILQ_FOREACH(pg, &plist, pageq) { + paddr_t pa = VM_PAGE_TO_PHYS(pg); + + if (i > 0 && pa == (segs[i - 1].ds_addr + + segs[i - 1].ds_len)) { + /* contiguous, yay */ + segs[i - 1].ds_len += PAGE_SIZE; + continue; + } + segs[i].ds_addr = pa; + segs[i].ds_len = PAGE_SIZE; + if (i++ > npages) + break; + } + /* this should be impossible */ + if (pg != TAILQ_END(&pageq)) { + ret = EINVAL; + goto unwire; + } + + if ((ret = bus_dmamap_load_raw(dmat, map, segs, i, size, flags)) != 0) + goto unwire; + + *segp = segs; + + return (0); + +unwire: + uvm_objunwire(uao, 0, size); +free: + free(segs, M_DRM); + return (ret); +} + +int +drm_handle_cmp(struct drm_handle *a, struct drm_handle *b) +{ + return (a->handle < b->handle ? -1 : a->handle > b->handle); +} + +int +drm_name_cmp(struct drm_obj *a, struct drm_obj *b) +{ + return (a->name < b->name ? -1 : a->name > b->name); +} + +SPLAY_GENERATE(drm_obj_tree, drm_handle, entry, drm_handle_cmp); + +SPLAY_GENERATE(drm_name_tree, drm_obj, entry, drm_name_cmp); diff --git a/sys/dev/pci/drm/i915_dma.c b/sys/dev/pci/drm/i915_dma.c index 13c881dc7dc..a221dfb4768 100644 --- a/sys/dev/pci/drm/i915_dma.c +++ b/sys/dev/pci/drm/i915_dma.c @@ -31,6 +31,11 @@ #include "i915_drm.h" #include "i915_drv.h" +int i915_dispatch_batchbuffer(struct drm_device *, + drm_i915_batchbuffer_t *, struct drm_clip_rect *); +int i915_dispatch_cmdbuffer(struct drm_device *, + drm_i915_cmdbuffer_t *, struct drm_clip_rect *); + /** * Sets up the hardware status page for devices that need a physical address * in the register. @@ -83,15 +88,14 @@ i915_dma_cleanup(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - if (dev_priv->ring.bsh != NULL) { + if (dev_priv->ring.ring_obj == NULL && dev_priv->ring.bsh != NULL) { bus_space_unmap(dev_priv->bst, dev_priv->ring.bsh, dev_priv->ring.size); - dev_priv->ring.bsh = NULL; - dev_priv->ring.size = 0; + memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); } /* Clear the HWS virtual address at teardown */ - if (I915_NEED_GFX_HWS(dev_priv)) + if (dev_priv->hws_obj == NULL && I915_NEED_GFX_HWS(dev_priv)) i915_free_hws(dev_priv, dev->dmat); return 0; @@ -111,20 +115,27 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init) if (init->sarea_priv_offset) dev_priv->sarea_priv = (drm_i915_sarea_t *) - ((u8 *) dev_priv->sarea->handle + - init->sarea_priv_offset); + ((u8 *) dev_priv->sarea->handle + init->sarea_priv_offset); else { /* No sarea_priv for you! */ dev_priv->sarea_priv = NULL; } - dev_priv->ring.size = init->ring_size; + if (init->ring_size != 0) { + if (dev_priv->ring.ring_obj != NULL) { + i915_dma_cleanup(dev); + DRM_ERROR("Client tried to initialize ringbuffer in " + "GEM mode\n"); + return (-EINVAL); + } + dev_priv->ring.size = init->ring_size; - if ((ret = bus_space_map(dev_priv->bst, init->ring_start, - init->ring_size, 0, &dev_priv->ring.bsh)) != 0) { - DRM_INFO("can't map ringbuffer\n"); - i915_dma_cleanup(dev); - return (ret); + if ((ret = bus_space_map(dev_priv->bst, init->ring_start, + init->ring_size, 0, &dev_priv->ring.bsh)) != 0) { + DRM_INFO("can't map ringbuffer\n"); + i915_dma_cleanup(dev); + return (ret); + } } /* Allow hardware batchbuffers unless told otherwise. @@ -299,43 +310,35 @@ static int i915_emit_cmds(struct drm_device *dev, int __user *buffer, return 0; } -static int -i915_emit_box(struct drm_device * dev, struct drm_clip_rect *boxes, - int i, int DR1, int DR4) + +/* + * Emit a box for a cliprect. cliprect must already have been copied in and + * sanity checked (the reason for this is so that everything can be checked + * before any gpu state is modified. + */ +void +i915_emit_box(struct drm_device * dev, struct drm_clip_rect *box, + int DR1, int DR4) { drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_clip_rect box; - - if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box))) { - return EFAULT; - } - - if (box.y2 <= box.y1 || box.x2 <= box.x1 || box.y2 <= 0 || - box.x2 <= 0) { - DRM_ERROR("Bad box %d,%d..%d,%d\n", - box.x1, box.y1, box.x2, box.y2); - return EINVAL; - } if (IS_I965G(dev_priv)) { BEGIN_LP_RING(4); OUT_RING(GFX_OP_DRAWRECT_INFO_I965); - OUT_RING((box.x1 & 0xffff) | (box.y1 << 16)); - OUT_RING(((box.x2 - 1) & 0xffff) | ((box.y2 - 1) << 16)); + OUT_RING((box->x1 & 0xffff) | (box->y1 << 16)); + OUT_RING(((box->x2 - 1) & 0xffff) | ((box->y2 - 1) << 16)); OUT_RING(DR4); ADVANCE_LP_RING(); } else { BEGIN_LP_RING(6); OUT_RING(GFX_OP_DRAWRECT_INFO); OUT_RING(DR1); - OUT_RING((box.x1 & 0xffff) | (box.y1 << 16)); - OUT_RING(((box.x2 - 1) & 0xffff) | ((box.y2 - 1) << 16)); + OUT_RING((box->x1 & 0xffff) | (box->y1 << 16)); + OUT_RING(((box->x2 - 1) & 0xffff) | ((box->y2 - 1) << 16)); OUT_RING(DR4); OUT_RING(0); ADVANCE_LP_RING(); } - - return 0; } /* XXX: Emitting the counter should really be moved to part of the IRQ @@ -362,8 +365,9 @@ void i915_emit_breadcrumb(struct drm_device *dev) ADVANCE_LP_RING(); } -static int i915_dispatch_cmdbuffer(struct drm_device * dev, - drm_i915_cmdbuffer_t * cmd) +int +i915_dispatch_cmdbuffer(struct drm_device * dev, + drm_i915_cmdbuffer_t *cmd, struct drm_clip_rect *cliprects) { struct drm_i915_private *dev_priv = dev->dev_private; int nbox = cmd->num_cliprects; @@ -379,12 +383,8 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev, count = nbox ? nbox : 1; for (i = 0; i < count; i++) { - if (i < nbox) { - ret = i915_emit_box(dev, cmd->cliprects, i, - cmd->DR1, cmd->DR4); - if (ret) - return ret; - } + if (i < nbox) + i915_emit_box(dev, &cliprects[i], cmd->DR1, cmd->DR4); ret = i915_emit_cmds(dev, (int __user *)cmd->buf, cmd->sz / 4); if (ret) @@ -395,13 +395,12 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev, return 0; } -int i915_dispatch_batchbuffer(struct drm_device * dev, - drm_i915_batchbuffer_t * batch) +int +i915_dispatch_batchbuffer(struct drm_device *dev, + drm_i915_batchbuffer_t *batch, struct drm_clip_rect *cliprects) { - drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_clip_rect __user *boxes = batch->cliprects; - int nbox = batch->num_cliprects; - int i = 0, count; + drm_i915_private_t *dev_priv = dev->dev_private; + int nbox = batch->num_cliprects, i = 0, count; if ((batch->start | batch->used) & 0x7) { DRM_ERROR("alignment\n"); @@ -410,15 +409,13 @@ int i915_dispatch_batchbuffer(struct drm_device * dev, inteldrm_update_ring(dev_priv); + /* XXX use gem code */ count = nbox ? nbox : 1; for (i = 0; i < count; i++) { - if (i < nbox) { - int ret = i915_emit_box(dev, boxes, i, - batch->DR1, batch->DR4); - if (ret) - return ret; - } + if (i < nbox) + i915_emit_box(dev, &cliprects[i], + batch->DR1, batch->DR4); if (!IS_I830(dev_priv) && !IS_845G(dev_priv)) { BEGIN_LP_RING(2); @@ -451,7 +448,7 @@ int i915_flush_ioctl(struct drm_device *dev, void *data, drm_i915_private_t *dev_priv = dev->dev_private; int ret; - LOCK_TEST_WITH_RETURN(dev, file_priv); + RING_LOCK_TEST_WITH_RETURN(dev, file_priv); DRM_LOCK(); inteldrm_update_ring(dev_priv); @@ -464,9 +461,11 @@ int i915_flush_ioctl(struct drm_device *dev, void *data, int i915_batchbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv) { - drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; - drm_i915_batchbuffer_t *batch = data; - int ret; + struct drm_i915_private *dev_priv = + (drm_i915_private_t *)dev->dev_private; + drm_i915_batchbuffer_t *batch = data; + struct drm_clip_rect *cliprects = NULL; + int i, ret; if (!dev_priv->allow_batchbuffer) { DRM_ERROR("Batchbuffer ioctl disabled\n"); @@ -476,46 +475,94 @@ int i915_batchbuffer(struct drm_device *dev, void *data, DRM_DEBUG("i915 batchbuffer, start %x used %d cliprects %d\n", batch->start, batch->used, batch->num_cliprects); + RING_LOCK_TEST_WITH_RETURN(dev, file_priv); + if (batch->num_cliprects < 0) - return EINVAL; + return (EINVAL); - LOCK_TEST_WITH_RETURN(dev, file_priv); + if (batch->num_cliprects) { + if (SIZE_MAX / batch->num_cliprects < sizeof(*cliprects)) + return (EINVAL); + cliprects = drm_alloc(batch->num_cliprects * + sizeof(*cliprects)); + if (cliprects == NULL) + return (ENOMEM); + + ret = copyin((void *)(uintptr_t)batch->cliprects, cliprects, + sizeof(*cliprects) * batch->num_cliprects); + if (ret != 0) + goto free_cliprects; + + for (i = 0; i < batch->num_cliprects; i++) { + if (cliprects[i].y2 <= cliprects[i].y1 || + cliprects[i].x2 <= cliprects[i].x1 || + cliprects[i].y2 <= 0 || cliprects[i].x2 <= 0) { + ret = EINVAL; + goto free_cliprects; + } + } + } DRM_LOCK(); - ret = i915_dispatch_batchbuffer(dev, batch); + ret = i915_dispatch_batchbuffer(dev, batch, cliprects); DRM_UNLOCK(); if (dev_priv->sarea_priv != NULL) dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); - return ret; +free_cliprects: + drm_free(cliprects); + return (ret); } int i915_cmdbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv) { - drm_i915_private_t *dev_priv = (drm_i915_private_t *)dev->dev_private; - drm_i915_cmdbuffer_t *cmdbuf = data; - int ret; + struct drm_i915_private *dev_priv = + (drm_i915_private_t *)dev->dev_private; + drm_i915_cmdbuffer_t *cmdbuf = data; + struct drm_clip_rect *cliprects = NULL; + int i, ret; DRM_DEBUG("i915 cmdbuffer, buf %p sz %d cliprects %d\n", cmdbuf->buf, cmdbuf->sz, cmdbuf->num_cliprects); + RING_LOCK_TEST_WITH_RETURN(dev, file_priv); + if (cmdbuf->num_cliprects < 0) return EINVAL; - LOCK_TEST_WITH_RETURN(dev, file_priv); + if (cmdbuf->num_cliprects) { + if (SIZE_MAX / cmdbuf->num_cliprects < sizeof(*cliprects)) + return (EINVAL); + cliprects = drm_alloc(cmdbuf->num_cliprects * + sizeof(*cliprects)); + if (cliprects == NULL) + return (ENOMEM); + + ret = copyin((void *)(uintptr_t)cmdbuf->cliprects, cliprects, + sizeof(*cliprects) * cmdbuf->num_cliprects); + if (ret != 0) + goto free_cliprects; + + for (i = 0; i < cmdbuf->num_cliprects; i++) { + if (cliprects[i].y2 <= cliprects[i].y1 || + cliprects[i].x2 <= cliprects[i].x1 || + cliprects[i].y2 <= 0 || cliprects[i].x2 <= 0) { + ret = EINVAL; + goto free_cliprects; + } + } + } DRM_LOCK(); - ret = i915_dispatch_cmdbuffer(dev, cmdbuf); + ret = i915_dispatch_cmdbuffer(dev, cmdbuf, cliprects); DRM_UNLOCK(); - if (ret) { - DRM_ERROR("i915_dispatch_cmdbuffer failed\n"); - return ret; - } - - if (dev_priv->sarea_priv != NULL) + if (ret == 0 && dev_priv->sarea_priv != NULL) dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); - return 0; + +free_cliprects: + drm_free(cliprects); + return (ret); } int i915_getparam(struct drm_device *dev, void *data, @@ -544,13 +591,20 @@ int i915_getparam(struct drm_device *dev, void *data, value = dev_priv->pci_device; break; case I915_PARAM_HAS_GEM: +#ifdef INTELDRM_GEM + value = 1; +#else value = 0; +#endif /* INTELDRM_GEM */ break; case I915_PARAM_NUM_FENCES_AVAIL: - value = 0; + value = dev_priv->num_fence_regs - dev_priv->fence_reg_start; + break; + case I915_PARAM_HAS_EXECBUF2: + value = 1; break; default: - DRM_ERROR("Unknown parameter %d\n", param->param); + DRM_DEBUG("Unknown parameter %d\n", param->param); return EINVAL; } @@ -582,8 +636,15 @@ int i915_setparam(struct drm_device *dev, void *data, case I915_SETPARAM_ALLOW_BATCHBUFFER: dev_priv->allow_batchbuffer = param->value; break; + case I915_SETPARAM_NUM_USED_FENCES: + if (param->value > dev_priv->num_fence_regs || + param->value < 0) + return EINVAL; + /* Userspace can use first N regs */ + dev_priv->fence_reg_start = param->value; + break; default: - DRM_ERROR("unknown parameter %d\n", param->param); + DRM_DEBUG("unknown parameter %d\n", param->param); return EINVAL; } diff --git a/sys/dev/pci/drm/i915_drv.c b/sys/dev/pci/drm/i915_drv.c index 651147033ed..8ebab115ff6 100644 --- a/sys/dev/pci/drm/i915_drv.c +++ b/sys/dev/pci/drm/i915_drv.c @@ -1,9 +1,22 @@ -/* i915_drv.c -- Intel i915 driver -*- linux-c -*- - * Created: Wed Feb 14 17:10:04 2001 by gareth@valinux.com +/* + * Copyright (c) 2008-2009 Owain G. Ainsworth + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /*- + * Copyright © 2008 Intel Corporation * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -27,6 +40,7 @@ * * Authors: * Gareth Hughes + * Eric Anholt * */ @@ -35,14 +49,36 @@ #include "i915_drm.h" #include "i915_drv.h" +#include + +#include +#include +#if 0 +# define INTELDRM_WATCH_COHERENCY +# define WATCH_INACTIVE +#endif + +#define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) + int inteldrm_probe(struct device *, void *, void *); void inteldrm_attach(struct device *, struct device *, void *); int inteldrm_detach(struct device *, int); -int inteldrm_ioctl(struct drm_device *, u_long, caddr_t, struct drm_file *); int inteldrm_activate(struct device *, int); +int inteldrm_ioctl(struct drm_device *, u_long, caddr_t, struct drm_file *); +int inteldrm_intr(void *); void inteldrm_lastclose(struct drm_device *); void inteldrm_wrap_ring(struct drm_i915_private *); +int inteldrm_gmch_match(struct pci_attach_args *); +void inteldrm_chipset_flush(struct drm_i915_private *); +void inteldrm_timeout(void *); +void inteldrm_hangcheck(void *); +void inteldrm_hung(void *, void *); +void inteldrm_965_reset(struct drm_i915_private *, u_int8_t); +int inteldrm_fault(struct drm_obj *, struct uvm_faultinfo *, off_t, + vaddr_t, vm_page_t *, int, int, vm_prot_t, int ); +void inteldrm_wipe_mappings(struct drm_obj *); +void inteldrm_purge_obj(struct drm_obj *); /* For reset and suspend */ int inteldrm_save_state(struct drm_i915_private *); @@ -64,6 +100,112 @@ u_int8_t i915_read_ar(struct drm_i915_private *, u_int16_t, void i915_save_palette(struct drm_i915_private *, enum pipe); void i915_restore_palette(struct drm_i915_private *, enum pipe); +void i915_alloc_ifp(struct drm_i915_private *, struct pci_attach_args *); +void i965_alloc_ifp(struct drm_i915_private *, struct pci_attach_args *); + +void inteldrm_detect_bit_6_swizzle(drm_i915_private_t *, + struct pci_attach_args *); + +int inteldrm_setup_mchbar(struct drm_i915_private *, + struct pci_attach_args *); +void inteldrm_teardown_mchbar(struct drm_i915_private *, + struct pci_attach_args *, int); + +/* Ioctls */ +int i915_gem_init_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_create_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_pread_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_pwrite_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_set_domain_ioctl(struct drm_device *, void *, + struct drm_file *); +int i915_gem_execbuffer2(struct drm_device *, void *, struct drm_file *); +int i915_gem_pin_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_unpin_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_busy_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_entervt_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_leavevt_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_get_aperture_ioctl(struct drm_device *, void *, + struct drm_file *); +int i915_gem_set_tiling(struct drm_device *, void *, struct drm_file *); +int i915_gem_get_tiling(struct drm_device *, void *, struct drm_file *); +int i915_gem_gtt_map_ioctl(struct drm_device *, void *, struct drm_file *); +int i915_gem_madvise_ioctl(struct drm_device *, void *, struct drm_file *); + +/* GEM memory manager functions */ +int i915_gem_init_object(struct drm_obj *); +void i915_gem_free_object(struct drm_obj *); +int i915_gem_object_pin(struct drm_obj *, uint32_t, int); +void i915_gem_object_unpin(struct drm_obj *); +void i915_gem_retire_requests(struct drm_i915_private *); +void i915_gem_retire_request(struct drm_i915_private *, + struct inteldrm_request *); +void i915_gem_retire_work_handler(void *, void*); +int i915_gem_idle(struct drm_i915_private *); +void i915_gem_object_move_to_active(struct drm_obj *); +void i915_gem_object_move_off_active(struct drm_obj *); +void i915_gem_object_move_to_inactive(struct drm_obj *); +uint32_t i915_add_request(struct drm_i915_private *); +void inteldrm_process_flushing(struct drm_i915_private *, u_int32_t); +void i915_move_to_tail(struct inteldrm_obj *, struct i915_gem_list *); +void i915_list_remove(struct inteldrm_obj *); +int i915_gem_init_hws(struct drm_i915_private *); +void i915_gem_cleanup_hws(struct drm_i915_private *); +int i915_gem_init_ringbuffer(struct drm_i915_private *); +int inteldrm_start_ring(struct drm_i915_private *); +void i915_gem_cleanup_ringbuffer(struct drm_i915_private *); +int i915_gem_ring_throttle(struct drm_device *, struct drm_file *); +int i915_gem_evict_inactive(struct drm_i915_private *); +int i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *, + u_int32_t, struct drm_i915_gem_relocation_entry **); +int i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *, + u_int32_t, struct drm_i915_gem_relocation_entry *); +void i915_dispatch_gem_execbuffer(struct drm_device *, + struct drm_i915_gem_execbuffer2 *, uint64_t); +void i915_gem_object_set_to_gpu_domain(struct drm_obj *); +int i915_gem_object_pin_and_relocate(struct drm_obj *, + struct drm_file *, struct drm_i915_gem_exec_object2 *, + struct drm_i915_gem_relocation_entry *); +int i915_gem_object_bind_to_gtt(struct drm_obj *, bus_size_t, int); +int i915_wait_request(struct drm_i915_private *, uint32_t, int); +u_int32_t i915_gem_flush(struct drm_i915_private *, uint32_t, uint32_t); +int i915_gem_object_unbind(struct drm_obj *, int); + +struct drm_obj *i915_gem_find_inactive_object(struct drm_i915_private *, + size_t); + +int i915_gem_evict_everything(struct drm_i915_private *, int); +int i915_gem_evict_something(struct drm_i915_private *, size_t, int); +int i915_gem_object_set_to_gtt_domain(struct drm_obj *, int, int); +int i915_gem_object_set_to_cpu_domain(struct drm_obj *, int, int); +int i915_gem_object_flush_gpu_write_domain(struct drm_obj *, int, int); +int i915_gem_get_fence_reg(struct drm_obj *, int); +int i915_gem_object_put_fence_reg(struct drm_obj *, int); +bus_size_t i915_gem_get_gtt_alignment(struct drm_obj *); + +bus_size_t i915_get_fence_size(struct drm_i915_private *, bus_size_t); +int i915_tiling_ok(struct drm_device *, int, int, int); +int i915_gem_object_fence_offset_ok(struct drm_obj *, int); +void i965_write_fence_reg(struct inteldrm_fence *); +void i915_write_fence_reg(struct inteldrm_fence *); +void i830_write_fence_reg(struct inteldrm_fence *); +void i915_gem_bit_17_swizzle(struct drm_obj *); +void i915_gem_save_bit_17_swizzle(struct drm_obj *); +int inteldrm_swizzle_page(struct vm_page *page); + +/* Debug functions, mostly called from ddb */ +void i915_gem_seqno_info(int); +void i915_interrupt_info(int); +void i915_gem_fence_regs_info(int); +void i915_hws_info(int); +void i915_batchbuffer_info(int); +void i915_ringbuffer_data(int); +void i915_ringbuffer_info(int); +#ifdef WATCH_INACTIVE +void inteldrm_verify_inactive(struct drm_i915_private *, char *, int); +#else +#define inteldrm_verify_inactive(dev,file,line) +#endif + const static struct drm_pcidev inteldrm_pciidlist[] = { {PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82830M_IGD, CHIP_I830|CHIP_M}, @@ -104,7 +246,7 @@ const static struct drm_pcidev inteldrm_pciidlist[] = { {PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82Q33_IGD_1, CHIP_G33|CHIP_I9XX|CHIP_HWS}, {PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82GM45_IGD_1, - CHIP_GM45|CHIP_I965|CHIP_I9XX|CHIP_M|CHIP_HWS}, + CHIP_G4X|CHIP_GM45|CHIP_I965|CHIP_I9XX|CHIP_M|CHIP_HWS}, {PCI_VENDOR_INTEL, 0x2E02, CHIP_G4X|CHIP_I965|CHIP_I9XX|CHIP_HWS}, {PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82Q45_IGD_1, @@ -118,6 +260,7 @@ const static struct drm_pcidev inteldrm_pciidlist[] = { static const struct drm_driver_info inteldrm_driver = { .buf_priv_size = 1, /* No dev_priv */ + .file_priv_size = sizeof(struct inteldrm_file), .ioctl = inteldrm_ioctl, .lastclose = inteldrm_lastclose, .vblank_pipes = 2, @@ -127,6 +270,13 @@ static const struct drm_driver_info inteldrm_driver = { .irq_install = i915_driver_irq_install, .irq_uninstall = i915_driver_irq_uninstall, +#ifdef INTELDRM_GEM + .gem_init_object = i915_gem_init_object, + .gem_free_object = i915_gem_free_object, + .gem_fault = inteldrm_fault, + .gem_size = sizeof(struct inteldrm_obj), +#endif /* INTELDRM_GEM */ + .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, @@ -135,7 +285,10 @@ static const struct drm_driver_info inteldrm_driver = { .patchlevel = DRIVER_PATCHLEVEL, .flags = DRIVER_AGP | DRIVER_AGP_REQUIRE | - DRIVER_MTRR | DRIVER_IRQ, + DRIVER_MTRR | DRIVER_IRQ +#ifdef INTELDRM_GEM + | DRIVER_GEM, +#endif /* INTELDRM_GEM */ }; int @@ -145,13 +298,30 @@ inteldrm_probe(struct device *parent, void *match, void *aux) inteldrm_pciidlist)); } +/* + * We're intel IGD, bus 0 function 0 dev 0 should be the GMCH, so it should + * be Intel + */ +int +inteldrm_gmch_match(struct pci_attach_args *pa) +{ + if (pa->pa_bus == 0 && pa->pa_device == 0 && pa->pa_function == 0 && + PCI_VENDOR(pa->pa_id) == PCI_VENDOR_INTEL && + PCI_CLASS(pa->pa_class) == PCI_CLASS_BRIDGE && + PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_BRIDGE_HOST) + return (1); + return (0); +} + void inteldrm_attach(struct device *parent, struct device *self, void *aux) { struct drm_i915_private *dev_priv = (struct drm_i915_private *)self; - struct pci_attach_args *pa = aux; + struct pci_attach_args *pa = aux, bpa; struct vga_pci_bar *bar; + struct drm_device *dev; const struct drm_pcidev *id_entry; + int i; id_entry = drm_find_description(PCI_VENDOR(pa->pa_id), PCI_PRODUCT(pa->pa_id), inteldrm_pciidlist); @@ -163,7 +333,7 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) dev_priv->dmat = pa->pa_dmat; dev_priv->bst = pa->pa_memt; - /* Add register map (needed for suspend/resume) */ + /* we need to use this api for now due to sharing with intagp */ bar = vga_pci_bar_info((struct vga_pci_softc *)parent, (IS_I9XX(dev_priv) ? 0 : 1)); if (bar == NULL) { @@ -183,10 +353,96 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) return; } + /* + * set up interrupt handler, note that we don't switch the interrupt + * on until the X server talks to us, kms will change this. + */ + dev_priv->irqh = pci_intr_establish(dev_priv->pc, dev_priv->ih, IPL_BIO, + inteldrm_intr, dev_priv, dev_priv->dev.dv_xname); + if (dev_priv->irqh == NULL) { + printf(": couldn't establish interrupt\n"); + return; + } + + /* Unmask the interrupts that we always want on. */ + dev_priv->irq_mask_reg = ~I915_INTERRUPT_ENABLE_FIX; + +#ifdef INTELDRM_GEM + dev_priv->workq = workq_create("intelrel", 1, IPL_BIO); + if (dev_priv->workq == NULL) { + printf("couldn't create workq\n"); + return; + } + + /* GEM init */ + TAILQ_INIT(&dev_priv->mm.active_list); + TAILQ_INIT(&dev_priv->mm.flushing_list); + TAILQ_INIT(&dev_priv->mm.inactive_list); + TAILQ_INIT(&dev_priv->mm.gpu_write_list); + TAILQ_INIT(&dev_priv->mm.request_list); + TAILQ_INIT(&dev_priv->mm.fence_list); + timeout_set(&dev_priv->mm.retire_timer, inteldrm_timeout, dev_priv); + timeout_set(&dev_priv->mm.hang_timer, inteldrm_hangcheck, dev_priv); + dev_priv->mm.next_gem_seqno = 1; + dev_priv->mm.suspended = 1; +#endif /* INTELDRM_GEM */ + + /* For the X server, in kms mode this will not be needed */ + dev_priv->fence_reg_start = 3; + + if (IS_I965G(dev_priv) || IS_I945G(dev_priv) || IS_I945GM(dev_priv) || + IS_G33(dev_priv)) + dev_priv->num_fence_regs = 16; + else + dev_priv->num_fence_regs = 8; + /* Initialise fences to zero, else on some macs we'll get corruption */ + if (IS_I965G(dev_priv)) { + for (i = 0; i < 16; i++) + I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0); + } else { + for (i = 0; i < 8; i++) + I915_WRITE(FENCE_REG_830_0 + (i * 4), 0); + if (IS_I945G(dev_priv) || IS_I945GM(dev_priv) || + IS_G33(dev_priv)) + for (i = 0; i < 8; i++) + I915_WRITE(FENCE_REG_945_8 + (i * 4), 0); + } + + if (pci_find_device(&bpa, inteldrm_gmch_match) == 0) { + printf(": can't find GMCH\n"); + return; + } + + /* Set up the IFP for chipset flushing */ + if (dev_priv->flags & (CHIP_I915G|CHIP_I915GM|CHIP_I945G|CHIP_I945GM)) { + i915_alloc_ifp(dev_priv, &bpa); + } else if (IS_I965G(dev_priv) || IS_G33(dev_priv)) { + i965_alloc_ifp(dev_priv, &bpa); + } else { + int nsegs; + /* + * I8XX has no flush page mechanism, we fake it by writing until + * the cache is empty. allocate a page to scribble on + */ + dev_priv->ifp.i8xx.kva = NULL; + if (bus_dmamem_alloc(pa->pa_dmat, PAGE_SIZE, 0, 0, + &dev_priv->ifp.i8xx.seg, 1, &nsegs, BUS_DMA_WAITOK) == 0) { + if (bus_dmamem_map(pa->pa_dmat, &dev_priv->ifp.i8xx.seg, + 1, PAGE_SIZE, &dev_priv->ifp.i8xx.kva, 0) != 0) { + bus_dmamem_free(pa->pa_dmat, + &dev_priv->ifp.i8xx.seg, nsegs); + dev_priv->ifp.i8xx.kva = NULL; + } + } + } + +#ifdef INTELDRM_GEM + inteldrm_detect_bit_6_swizzle(dev_priv, &bpa); +#endif /* INTELDRM_GEM */ /* Init HWS */ if (!I915_NEED_GFX_HWS(dev_priv)) { if (i915_init_phys_hws(dev_priv, pa->pa_dmat) != 0) { - printf(": couldn't initialize hardware status page\n"); + printf(": couldn't alloc HWS page\n"); return; } } @@ -197,6 +453,24 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) /* All intel chipsets need to be treated as agp, so just pass one */ dev_priv->drmdev = drm_attach_pci(&inteldrm_driver, pa, 1, self); + + dev = (struct drm_device *)dev_priv->drmdev; + +#ifdef INTELDRM_GEM + /* XXX would be a lot nicer to get agp info before now */ + uvm_page_physload_flags(atop(dev->agp->base), atop(dev->agp->base + + dev->agp->info.ai_aperture_size), atop(dev->agp->base), + atop(dev->agp->base + dev->agp->info.ai_aperture_size), 0, + PHYSLOAD_DEVICE); + /* array of vm pages that physload introduced. */ + dev_priv->pgs = PHYS_TO_VM_PAGE(dev->agp->base); + KASSERT(dev_priv->pgs != NULL); + /* XXX wc and do earlier */ + if (bus_space_map(dev_priv->bst, dev->agp->base, + dev->agp->info.ai_aperture_size, BUS_SPACE_MAP_LINEAR, + &dev_priv->aperture_bsh) != 0) + panic("can't map aperture"); +#endif /* INTELDRM_GEM */ } int @@ -204,6 +478,7 @@ inteldrm_detach(struct device *self, int flags) { struct drm_i915_private *dev_priv = (struct drm_i915_private *)self; + /* this will quiesce any dma that's going on and kill the timeouts. */ if (dev_priv->drmdev != NULL) { config_detach(dev_priv->drmdev, flags); dev_priv->drmdev = NULL; @@ -211,6 +486,18 @@ inteldrm_detach(struct device *self, int flags) i915_free_hws(dev_priv, dev_priv->dmat); + if (IS_I9XX(dev_priv) && dev_priv->ifp.i9xx.bsh != NULL) { + bus_space_unmap(dev_priv->ifp.i9xx.bst, dev_priv->ifp.i9xx.bsh, + PAGE_SIZE); + } else if (dev_priv->flags & (CHIP_I830 | CHIP_I845G | CHIP_I85X | + CHIP_I865G) && dev_priv->ifp.i8xx.kva != NULL) { + bus_dmamem_unmap(dev_priv->dmat, dev_priv->ifp.i8xx.kva, + PAGE_SIZE); + bus_dmamem_free(dev_priv->dmat, &dev_priv->ifp.i8xx.seg, 1); + } + + pci_intr_disestablish(dev_priv->pc, dev_priv->irqh); + if (dev_priv->regs != NULL) vga_pci_bar_unmap(dev_priv->regs); @@ -263,6 +550,36 @@ inteldrm_ioctl(struct drm_device *dev, u_long cmd, caddr_t data, return (i915_cmdbuffer(dev, data, file_priv)); case DRM_IOCTL_I915_GET_VBLANK_PIPE: return (i915_vblank_pipe_get(dev, data, file_priv)); +#ifdef INTELDRM_GEM + case DRM_IOCTL_I915_GEM_EXECBUFFER2: + return (i915_gem_execbuffer2(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_BUSY: + return (i915_gem_busy_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_THROTTLE: + return (i915_gem_ring_throttle(dev, file_priv)); + case DRM_IOCTL_I915_GEM_MMAP: + return (i915_gem_gtt_map_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_CREATE: + return (i915_gem_create_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_PREAD: + return (i915_gem_pread_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_PWRITE: + return (i915_gem_pwrite_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_SET_DOMAIN: + return (i915_gem_set_domain_ioctl(dev, data, + file_priv)); + case DRM_IOCTL_I915_GEM_SET_TILING: + return (i915_gem_set_tiling(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_GET_TILING: + return (i915_gem_get_tiling(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_GET_APERTURE: + return (i915_gem_get_aperture_ioctl(dev, data, + file_priv)); + case DRM_IOCTL_I915_GEM_MADVISE: + return (i915_gem_madvise_ioctl(dev, data, file_priv)); +#endif /* INTELDRM_GEM */ + default: + break; } } @@ -279,152 +596,3989 @@ inteldrm_ioctl(struct drm_device *dev, u_long cmd, caddr_t data, case DRM_IOCTL_I915_DESTROY_HEAP: case DRM_IOCTL_I915_SET_VBLANK_PIPE: return (0); +#ifdef INTELDRM_GEM + case DRM_IOCTL_I915_GEM_INIT: + return (i915_gem_init_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_ENTERVT: + return (i915_gem_entervt_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_LEAVEVT: + return (i915_gem_leavevt_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_PIN: + return (i915_gem_pin_ioctl(dev, data, file_priv)); + case DRM_IOCTL_I915_GEM_UNPIN: + return (i915_gem_unpin_ioctl(dev, data, file_priv)); +#endif /* INTELDRM_GEM */ } } return (EINVAL); } +int +inteldrm_intr(void *arg) +{ + drm_i915_private_t *dev_priv = arg; + struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; + u_int32_t iir, pipea_stats = 0, pipeb_stats = 0; + + /* we're not set up, don't poke the hw */ + if (dev_priv->hw_status_page == NULL) + return (0); + /* + * lock is to protect from writes to PIPESTAT and IMR from other cores. + */ + mtx_enter(&dev_priv->user_irq_lock); + iir = I915_READ(IIR); + if (iir == 0) { + mtx_leave(&dev_priv->user_irq_lock); + return (0); + } + + /* + * Clear the PIPE(A|B)STAT regs before the IIR + */ + if (iir & I915_DISPLAY_PIPE_A_EVENT_INTERRUPT) { + pipea_stats = I915_READ(PIPEASTAT); + I915_WRITE(PIPEASTAT, pipea_stats); + } + if (iir & I915_DISPLAY_PIPE_B_EVENT_INTERRUPT) { + pipeb_stats = I915_READ(PIPEBSTAT); + I915_WRITE(PIPEBSTAT, pipeb_stats); + } +#ifdef INTELDRM_GEM + if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) + inteldrm_error(dev_priv); +#endif /* INTELDRM_GEM */ + + I915_WRITE(IIR, iir); + (void)I915_READ(IIR); /* Flush posted writes */ + + if (dev_priv->sarea_priv != NULL) + dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); + + if (iir & I915_USER_INTERRUPT) { + wakeup(dev_priv); +#ifdef INTELDRM_GEM + dev_priv->mm.hang_cnt = 0; + timeout_add_msec(&dev_priv->mm.hang_timer, 750); +#endif /* INTELDRM_GEM */ + } + + mtx_leave(&dev_priv->user_irq_lock); + + if (pipea_stats & I915_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(dev, 0); + + if (pipeb_stats & I915_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(dev, 1); + + return (1); +} + u_int32_t inteldrm_read_hws(struct drm_i915_private *dev_priv, int reg) { - struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; - u_int32_t val; + struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; + u_int32_t val; + + /* + * When we eventually go GEM only we'll always have a dmamap, so this + * madness won't be for long. + */ + if (dev_priv->hws_dmamem) + bus_dmamap_sync(dev->dmat, dev_priv->hws_dmamem->map, 0, + PAGE_SIZE, BUS_DMASYNC_POSTREAD); + + val = ((volatile u_int32_t *)(dev_priv->hw_status_page))[reg]; + + if (dev_priv->hws_dmamem) + bus_dmamap_sync(dev->dmat, dev_priv->hws_dmamem->map, 0, + PAGE_SIZE, BUS_DMASYNC_PREREAD); + return (val); +} + +/* + * These five ring manipulation functions are protected by dev->dev_lock. + */ +int +inteldrm_wait_ring(struct drm_i915_private *dev_priv, int n) +{ + struct inteldrm_ring *ring = &dev_priv->ring; + u_int32_t acthd_reg, acthd, last_acthd, last_head; + int i; + + acthd_reg = IS_I965G(dev_priv) ? ACTHD_I965 : ACTHD; + last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + last_acthd = I915_READ(acthd_reg); + + /* ugh. Could really do with a proper, resettable timer here. */ + for (i = 0; i < 100000; i++) { + ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + acthd = I915_READ(acthd_reg); + ring->space = ring->head - (ring->tail + 8); + + INTELDRM_VPRINTF("%s: head: %x tail: %x space: %x\n", __func__, + ring->head, ring->tail, ring->space); + if (ring->space < 0) + ring->space += ring->size; + if (ring->space >= n) + return (0); + + /* Only timeout if the ring isn't chewing away on something */ + if (ring->head != last_head || acthd != last_acthd) + i = 0; + + last_head = ring->head; + last_acthd = acthd; + delay(10); + } + + return (EBUSY); +} + +void +inteldrm_wrap_ring(struct drm_i915_private *dev_priv) +{ + u_int32_t rem;; + + rem = dev_priv->ring.size - dev_priv->ring.tail; + if (dev_priv->ring.space < rem && + inteldrm_wait_ring(dev_priv, rem) != 0) + return; /* XXX */ + + dev_priv->ring.space -= rem; + + bus_space_set_region_4(dev_priv->bst, dev_priv->ring.bsh, + dev_priv->ring.woffset, MI_NOOP, rem / 4); + + dev_priv->ring.tail = 0; +} + +void +inteldrm_begin_ring(struct drm_i915_private *dev_priv, int ncmd) +{ + int bytes = 4 * ncmd; + + INTELDRM_VPRINTF("%s: %d\n", __func__, ncmd); + if (dev_priv->ring.tail + bytes > dev_priv->ring.size) + inteldrm_wrap_ring(dev_priv); + if (dev_priv->ring.space < bytes) + inteldrm_wait_ring(dev_priv, bytes); + dev_priv->ring.woffset = dev_priv->ring.tail; + dev_priv->ring.tail += bytes; + dev_priv->ring.tail &= dev_priv->ring.size - 1; + dev_priv->ring.space -= bytes; +} + +void +inteldrm_out_ring(struct drm_i915_private *dev_priv, u_int32_t cmd) +{ + INTELDRM_VPRINTF("%s: %x\n", __func__, cmd); + bus_space_write_4(dev_priv->bst, dev_priv->ring.bsh, + dev_priv->ring.woffset, cmd); + /* + * don't need to deal with wrap here because we padded + * the ring out if we would wrap + */ + dev_priv->ring.woffset += 4; +} + +void +inteldrm_advance_ring(struct drm_i915_private *dev_priv) +{ + INTELDRM_VPRINTF("%s: %x, %x\n", __func__, dev_priv->ring.wspace, + dev_priv->ring.woffset); + I915_WRITE(PRB0_TAIL, dev_priv->ring.tail); +} + +void +inteldrm_update_ring(struct drm_i915_private *dev_priv) +{ + struct inteldrm_ring *ring = &dev_priv->ring; + + ring->head = (I915_READ(PRB0_HEAD) & HEAD_ADDR); + ring->tail = (I915_READ(PRB0_TAIL) & TAIL_ADDR); + ring->space = ring->head - (ring->tail + 8); + if (ring->space < 0) + ring->space += ring->size; + INTELDRM_VPRINTF("%s: head: %x tail: %x space: %x\n", __func__, + ring->head, ring->tail, ring->space); +} + +void +i915_alloc_ifp(struct drm_i915_private *dev_priv, struct pci_attach_args *bpa) +{ + bus_addr_t addr; + u_int32_t reg; + + dev_priv->ifp.i9xx.bst = bpa->pa_memt; + + reg = pci_conf_read(bpa->pa_pc, bpa->pa_tag, I915_IFPADDR); + if (reg & 0x1) { + addr = (bus_addr_t)reg; + addr &= ~0x1; + /* XXX extents ... need data on whether bioses alloc or not. */ + if (bus_space_map(bpa->pa_memt, addr, PAGE_SIZE, 0, + &dev_priv->ifp.i9xx.bsh) != 0) + goto nope; + return; + } else if (bpa->pa_memex == NULL || extent_alloc(bpa->pa_memex, + PAGE_SIZE, PAGE_SIZE, 0, 0, 0, &addr) || bus_space_map(bpa->pa_memt, + addr, PAGE_SIZE, 0, &dev_priv->ifp.i9xx.bsh)) + goto nope; + + pci_conf_write(bpa->pa_pc, bpa->pa_tag, I915_IFPADDR, addr | 0x1); + + return; + +nope: + dev_priv->ifp.i9xx.bsh = NULL; + printf(": no ifp "); +} + +void +i965_alloc_ifp(struct drm_i915_private *dev_priv, struct pci_attach_args *bpa) +{ + bus_addr_t addr; + u_int32_t lo, hi; + + dev_priv->ifp.i9xx.bst = bpa->pa_memt; + + hi = pci_conf_read(bpa->pa_pc, bpa->pa_tag, I965_IFPADDR + 4); + lo = pci_conf_read(bpa->pa_pc, bpa->pa_tag, I965_IFPADDR); + if (lo & 0x1) { + addr = (((u_int64_t)hi << 32) | lo); + addr &= ~0x1; + /* XXX extents ... need data on whether bioses alloc or not. */ + if (bus_space_map(bpa->pa_memt, addr, PAGE_SIZE, 0, + &dev_priv->ifp.i9xx.bsh) != 0) + goto nope; + return; + } else if (bpa->pa_memex == NULL || extent_alloc(bpa->pa_memex, + PAGE_SIZE, PAGE_SIZE, 0, 0, 0, &addr) || bus_space_map(bpa->pa_memt, + addr, PAGE_SIZE, 0, &dev_priv->ifp.i9xx.bsh)) + goto nope; + + pci_conf_write(bpa->pa_pc, bpa->pa_tag, I965_IFPADDR + 4, + upper_32_bits(addr)); + pci_conf_write(bpa->pa_pc, bpa->pa_tag, I965_IFPADDR, + (addr & 0xffffffff) | 0x1); + + return; + +nope: + dev_priv->ifp.i9xx.bsh = NULL; + printf(": no ifp "); +} + +void +inteldrm_chipset_flush(struct drm_i915_private *dev_priv) +{ + /* + * Write to this flush page flushes the chipset write cache. + * The write will return when it is done. + */ + if (IS_I9XX(dev_priv)) { + if (dev_priv->ifp.i9xx.bsh != NULL) + bus_space_write_4(dev_priv->ifp.i9xx.bst, + dev_priv->ifp.i9xx.bsh, 0, 1); + } else { + /* + * I8XX don't have a flush page mechanism, but do have the + * cache. Do it the bruteforce way. we write 1024 byes into + * the cache, then clflush them out so they'll kick the stuff + * we care about out of the chipset cache. + */ + if (dev_priv->ifp.i8xx.kva != NULL) { + memset(dev_priv->ifp.i8xx.kva, 0, 1024); + agp_flush_cache_range((vaddr_t)dev_priv->ifp.i8xx.kva, + 1024); + } + } +} + +void +inteldrm_lastclose(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; +#ifdef INTELDRM_GEM + struct vm_page *p; + int ret; + + ret = i915_gem_idle(dev_priv); + if (ret) + DRM_ERROR("failed to idle hardware: %d\n", ret); + + if (dev_priv->agpdmat != NULL) { + /* + * make sure we nuke everything, we may have mappings that we've + * unrefed, but uvm has a reference to them for maps. Make sure + * they get unbound and any accesses will segfault. + * XXX only do ones in GEM. + */ + for (p = dev_priv->pgs; p < dev_priv->pgs + + (dev->agp->info.ai_aperture_size / PAGE_SIZE); p++) + pmap_page_protect(p, VM_PROT_NONE); + agp_bus_dma_destroy((struct agp_softc *)dev->agp->agpdev, + dev_priv->agpdmat); + } +#endif /* INTELDRM_GEM */ + dev_priv->agpdmat = NULL; + + + dev_priv->sarea_priv = NULL; + + i915_dma_cleanup(dev); +} + +#ifdef INTELDRM_GEM + +int +i915_gem_init_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_init *args = data; + + DRM_LOCK(); + + if (args->gtt_start >= args->gtt_end || + args->gtt_end > dev->agp->info.ai_aperture_size || + (args->gtt_start & PAGE_MASK) != 0 || + (args->gtt_end & PAGE_MASK) != 0) { + DRM_UNLOCK(); + return (EINVAL); + } + /* + * putting stuff in the last page of the aperture can cause nasty + * problems with prefetch going into unassigned memory. Since we put + * a scratch page on all unused aperture pages, just leave the last + * page as a spill to prevent gpu hangs. + */ + if (args->gtt_end == dev->agp->info.ai_aperture_size) + args->gtt_end -= 4096; + + if (agp_bus_dma_init((struct agp_softc *)dev->agp->agpdev, + dev->agp->base + args->gtt_start, dev->agp->base + args->gtt_end, + &dev_priv->agpdmat) != 0) { + DRM_UNLOCK(); + return (ENOMEM); + } + + dev->gtt_total = (uint32_t)(args->gtt_end - args->gtt_start); + + DRM_UNLOCK(); + + return 0; +} + +int +i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_get_aperture *args = data; + + args->aper_size = dev->gtt_total; + args->aper_available_size = (args->aper_size - + atomic_read(&dev->pin_memory)); + + return (0); +} + +/** + * Creates a new mm object and returns a handle to it. + */ +int +i915_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_create *args = data; + struct drm_obj *obj; + int handle, ret; + + args->size = round_page(args->size); + + /* Allocate the new object */ + obj = drm_gem_object_alloc(dev, args->size); + if (obj == NULL) + return (ENOMEM); + + ret = drm_handle_create(file_priv, obj, &handle); + /* handle has a reference now, drop ours. */ + DRM_LOCK(); + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + + if (ret == 0) + args->handle = handle; + + return (ret); +} + +/** + * Reads data from the object referenced by handle. + * + * On error, the contents of *data are undefined. + */ +int +i915_gem_pread_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_pread *args = data; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + char *vaddr; + bus_space_handle_t bsh; + bus_size_t bsize; + voff_t offset; + int ret; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + obj_priv = (struct inteldrm_obj *)obj; + + /* + * Bounds check source. + */ + DRM_LOCK(); + if (args->offset > obj->size || args->size > obj->size || + args->offset + args->size > obj->size) { + ret = EINVAL; + goto out; + } + + ret = i915_gem_object_pin(obj, 0, 1); + if (ret) + goto out; + ret = i915_gem_object_set_to_gtt_domain(obj, 0, 1); + if (ret) + goto unpin; + + obj_priv = (struct inteldrm_obj *)obj; + offset = obj_priv->gtt_offset + args->offset; + + bsize = round_page(offset + args->size) - trunc_page(offset); + + if ((ret = bus_space_subregion(dev_priv->bst, dev_priv->aperture_bsh, + trunc_page(offset), bsize, &bsh)) != 0) + goto unpin; + vaddr = bus_space_vaddr(dev->bst, bsh); + if (vaddr == NULL) { + ret = EFAULT; + goto unpin; + } + + ret = copyout(vaddr + (offset & PAGE_MASK), + (char *)(uintptr_t)args->data_ptr, args->size); + + if (ret) + goto unpin; + +unpin: + i915_gem_object_unpin(obj); +out: + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + + return (ret); +} + + +/** + * Writes data to the object referenced by handle. + * + * On error, the contents of the buffer that were to be modified are undefined. + */ +int +i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_pwrite *args = data; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + char *vaddr; + bus_space_handle_t bsh; + bus_size_t bsize; + off_t offset; + int ret = 0; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + obj_priv = (struct inteldrm_obj *)obj; + + DRM_LOCK(); + /* Bounds check destination. */ + if (args->offset > obj->size || args->size > obj->size || + args->offset + args->size > obj->size) { + ret = EINVAL; + goto out; + } + + ret = i915_gem_object_pin(obj, 0, 1); + if (ret) + goto out; + ret = i915_gem_object_set_to_gtt_domain(obj, 1, 1); + if (ret) + goto done; + + obj_priv = (struct inteldrm_obj *)obj; + offset = obj_priv->gtt_offset + args->offset; + bsize = round_page(offset + args->size) - trunc_page(offset); + + if ((ret = bus_space_subregion(dev_priv->bst, dev_priv->aperture_bsh, + trunc_page(offset), bsize, &bsh)) != 0) + goto done; + vaddr = bus_space_vaddr(dev_priv->bst, bsh); + if (vaddr == NULL) { + ret = EFAULT; + goto done; + } + + ret = copyin((char *)(uintptr_t)args->data_ptr, + vaddr + (offset & PAGE_MASK), args->size); + + +done: + i915_gem_object_unpin(obj); +out: + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + + return (ret); +} + +/** + * Called when user space prepares to use an object with the CPU, either through + * the mmap ioctl's mapping or a GTT mapping. + */ +int +i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_set_domain *args = data; + struct drm_obj *obj; + u_int32_t read_domains = args->read_domains; + u_int32_t write_domain = args->write_domain; + int ret; + + /* + * Only handle setting domains to types we allow the cpu to see. + * while linux allows the CPU domain here, we only allow GTT since that + * is all that we let userland near. + * Also sanity check that having something in the write domain implies + * it's in the read domain, and only that read domain. + */ + if ((write_domain | read_domains) & ~I915_GEM_DOMAIN_GTT || + (write_domain != 0 && read_domains != write_domain)) + return (EINVAL); + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + + DRM_LOCK(); + ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0, 1); + + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + /* + * Silently promote `you're not bound, there was nothing to do' + * to success, since the client was just asking us to make sure + * everything was done. + */ + return ((ret == EINVAL) ? 0 : ret); +} + +int +i915_gem_gtt_map_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_mmap *args = data; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + vaddr_t addr; + voff_t offset; + vsize_t end, nsize; + int ret; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + + DRM_LOCK(); + obj_priv = (struct inteldrm_obj *)obj; + + /* Check size. Also ensure that the object is not purgeable */ + if (args->size == 0 || args->offset > obj->size || args->size > + obj->size || (args->offset + args->size) > obj->size || + i915_obj_purgeable(obj_priv)) { + ret = EINVAL; + goto done; + } + + /* bind to the gtt to speed faulting */ + if (obj_priv->dmamap == NULL) { + ret = i915_gem_object_bind_to_gtt(obj, 0, 1); + if (ret) + goto done; + i915_gem_object_move_to_inactive(obj); + } + + + end = round_page(args->offset + args->size); + offset = trunc_page(args->offset); + nsize = end - offset; + + /* + * We give our reference from object_lookup to the mmap, so only + * must free it in the case that the map fails. + */ + addr = uvm_map_hint(curproc, VM_PROT_READ | VM_PROT_WRITE); + ret = uvm_map_p(&curproc->p_vmspace->vm_map, &addr, nsize, &obj->uobj, + offset, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, + UVM_INH_SHARE, UVM_ADV_RANDOM, 0), curproc); + +done: + if (ret != 0) + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + + if (ret == 0) + args->addr_ptr = (uint64_t) addr + (args->offset & PAGE_MASK); + + return (ret); +} + +/* called locked */ +void +i915_gem_object_move_to_active(struct drm_obj *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + struct inteldrm_fence *reg; + u_int32_t seqno = dev_priv->mm.next_gem_seqno; + + /* Add a reference if we're newly entering the active list. */ + if (!inteldrm_is_active(obj_priv)) { + drm_gem_object_reference(obj); + atomic_setbits_int(&obj_priv->io_flags, I915_ACTIVE); + } + + if (inteldrm_needs_fence(obj_priv)) { + reg = &dev_priv->fence_regs[obj_priv->fence_reg]; + reg->last_rendering_seqno = seqno; + } + + /* Move from whatever list we were on to the tail of execution. */ + i915_move_to_tail(obj_priv, &dev_priv->mm.active_list); + obj_priv->last_rendering_seqno = seqno; +} + +void +i915_gem_object_move_off_active(struct drm_obj *obj) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + struct inteldrm_fence *reg; + + obj_priv->last_rendering_seqno = 0; + if (inteldrm_needs_fence(obj_priv)) { + reg = &dev_priv->fence_regs[obj_priv->fence_reg]; + reg->last_rendering_seqno = 0; + } +} + +/* called locked */ +void +i915_gem_object_move_to_inactive(struct drm_obj *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + if (obj_priv->pin_count != 0) + i915_list_remove(obj_priv); + else + i915_move_to_tail(obj_priv, &dev_priv->mm.inactive_list); + + i915_gem_object_move_off_active(obj); + atomic_clearbits_int(&obj_priv->io_flags, I915_FENCED_EXEC); + + KASSERT((obj_priv->io_flags & I915_GPU_WRITE) == 0); + if (inteldrm_is_active(obj_priv)) { + atomic_clearbits_int(&obj_priv->io_flags, + I915_ACTIVE); + drm_gem_object_unreference(obj); + } + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); +} + +void +inteldrm_purge_obj(struct drm_obj *obj) +{ + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + + /* + * may sleep. We free here instead of deactivate (which + * the madvise() syscall would do) because in this case + * (userland bo cache and GL_APPLE_object_purgeable objects in + * OpenGL) the pages are defined to be freed if they were cleared + * so kill them and free up the memory + */ + simple_lock(&obj->uao->vmobjlock); + obj->uao->pgops->pgo_flush(obj->uao, 0, obj->size, + PGO_ALLPAGES | PGO_FREE); + simple_unlock(&obj->uao->vmobjlock); + + /* + * If flush failed, it may have halfway through, so just + * always mark as purged + */ + atomic_setbits_int(&obj_priv->io_flags, I915_PURGED); +} + +void +inteldrm_process_flushing(struct drm_i915_private *dev_priv, + u_int32_t flush_domains) +{ + struct inteldrm_obj *obj_priv, *next; + + for (obj_priv = TAILQ_FIRST(&dev_priv->mm.gpu_write_list); + obj_priv != TAILQ_END(&dev_priv->mm.gpu_write_list); + obj_priv = next) { + struct drm_obj *obj = &(obj_priv->obj); + + next = TAILQ_NEXT(obj_priv, write_list); + + if ((obj->write_domain & flush_domains) == obj->write_domain) { + + obj->write_domain = 0; + TAILQ_REMOVE(&dev_priv->mm.gpu_write_list, + obj_priv, write_list); + atomic_clearbits_int(&obj_priv->io_flags, + I915_GPU_WRITE); + i915_gem_object_move_to_active(obj); + /* if we still need the fence, update LRU */ + if (inteldrm_needs_fence(obj_priv)) { + KASSERT(obj_priv->fence_reg != + I915_FENCE_REG_NONE); + /* we have a fence, won't sleep, can't fail */ + i915_gem_get_fence_reg(obj, 1); + } + + } + } +} + +/** + * Creates a new sequence number, emitting a write of it to the status page + * plus an interrupt, which will trigger and interrupt if they are currently + * enabled. + * + * Must be called with struct_lock held. + * + * Returned sequence numbers are nonzero on success. + */ +uint32_t +i915_add_request(struct drm_i915_private *dev_priv) +{ + struct inteldrm_request *request; + uint32_t seqno; + int was_empty; + + request = drm_calloc(1, sizeof(*request)); + if (request == NULL) { + printf("%s: failed to allocate request\n", __func__); + return 0; + } + + /* Grab the seqno we're going to make this request be, and bump the + * next (skipping 0 so it can be the reserved no-seqno value). + */ + seqno = dev_priv->mm.next_gem_seqno; + dev_priv->mm.next_gem_seqno++; + if (dev_priv->mm.next_gem_seqno == 0) + dev_priv->mm.next_gem_seqno++; + + BEGIN_LP_RING(4); + OUT_RING(MI_STORE_DWORD_INDEX); + OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); + OUT_RING(seqno); + + OUT_RING(MI_USER_INTERRUPT); + ADVANCE_LP_RING(); + + DRM_DEBUG("%d\n", seqno); + + /* XXX request timing for throttle */ + request->seqno = seqno; + was_empty = TAILQ_EMPTY(&dev_priv->mm.request_list); + TAILQ_INSERT_TAIL(&dev_priv->mm.request_list, request, list); + + if (dev_priv->mm.suspended == 0) { + if (was_empty) + timeout_add_sec(&dev_priv->mm.retire_timer, 1); + /* XXX was_empty? */ + timeout_add_msec(&dev_priv->mm.hang_timer, 750); + } + return seqno; +} + +/** + * Moves buffers associated only with the given active seqno from the active + * to inactive list, potentially freeing them. + * + * called with and sleeps with the drm_lock. + */ +void +i915_gem_retire_request(struct drm_i915_private *dev_priv, + struct inteldrm_request *request) +{ + struct inteldrm_obj *obj_priv; + + /* Move any buffers on the active list that are no longer referenced + * by the ringbuffer to the flushing/inactive lists as appropriate. + */ + while ((obj_priv = TAILQ_FIRST(&dev_priv->mm.active_list)) != NULL) { + struct drm_obj *obj = &obj_priv->obj; + + /* If the seqno being retired doesn't match the oldest in the + * list, then the oldest in the list must still be newer than + * this seqno. + */ + if (obj_priv->last_rendering_seqno != request->seqno) + return; + + /* + * If we're now clean and can be read from, move inactive, + * else put on the flushing list to signify that we're not + * available quite yet. + */ + if (obj->write_domain != 0) { + KASSERT(inteldrm_is_active(obj_priv)); + i915_move_to_tail(obj_priv, + &dev_priv->mm.flushing_list); + i915_gem_object_move_off_active(obj); + } else { + i915_gem_object_move_to_inactive(obj); + } + } +} + +/** + * This function clears the request list as sequence numbers are passed. + */ +void +i915_gem_retire_requests(struct drm_i915_private *dev_priv) +{ + struct inteldrm_request *request; + uint32_t seqno; + + if (dev_priv->hw_status_page == NULL) + return; + + seqno = i915_get_gem_seqno(dev_priv); + + while ((request = TAILQ_FIRST(&dev_priv->mm.request_list)) != NULL) { + if (i915_seqno_passed(seqno, request->seqno) || + dev_priv->mm.wedged) { + i915_gem_retire_request(dev_priv, request); + + TAILQ_REMOVE(&dev_priv->mm.request_list, request, list); + drm_free(request); + } else + break; + } +} + +void +i915_gem_retire_work_handler(void *arg1, void *unused) +{ + drm_i915_private_t *dev_priv = arg1; + struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; + + DRM_LOCK(); + i915_gem_retire_requests(dev_priv); + if (!TAILQ_EMPTY(&dev_priv->mm.request_list)) + timeout_add_sec(&dev_priv->mm.retire_timer, 1); + DRM_UNLOCK(); +} + +/** + * Waits for a sequence number to be signaled, and cleans up the + * request and object lists appropriately for that event. + * + * Called locked, sleeps with it. + */ +int +i915_wait_request(struct drm_i915_private *dev_priv, uint32_t seqno, + int interruptible) +{ + int ret = 0; + + KASSERT(seqno != dev_priv->mm.next_gem_seqno); + + /* Check first because poking a wedged chip is bad. */ + if (dev_priv->mm.wedged) + return (EIO); + + if (seqno == dev_priv->mm.next_gem_seqno) { + seqno = i915_add_request(dev_priv); + if (seqno == 0) + return (ENOMEM); + } + + if (!i915_seqno_passed(i915_get_gem_seqno(dev_priv), seqno)) { + mtx_enter(&dev_priv->user_irq_lock); + i915_user_irq_get(dev_priv); + while (ret == 0) { + if (i915_seqno_passed(i915_get_gem_seqno(dev_priv), + seqno) || dev_priv->mm.wedged) + break; + ret = msleep(dev_priv, &dev_priv->user_irq_lock, + PZERO | (interruptible ? PCATCH : 0), "gemwt", 0); + } + i915_user_irq_put(dev_priv); + mtx_leave(&dev_priv->user_irq_lock); + } + if (dev_priv->mm.wedged) + ret = EIO; + + /* Directly dispatch request retiring. While we have the work queue + * to handle this, the waiter on a request often wants an associated + * buffer to have made it to the inactive list, and we would need + * a separate wait queue to handle that. + */ + if (ret == 0) + i915_gem_retire_requests(dev_priv); + + return (ret); +} + +/* + * flush and invalidate the provided domains + * if we have successfully queued a gpu flush, then we return a seqno from + * the request. else (failed or just cpu flushed) we return 0. + */ +u_int32_t +i915_gem_flush(struct drm_i915_private *dev_priv, uint32_t invalidate_domains, + uint32_t flush_domains) +{ + uint32_t cmd; + + if (flush_domains & I915_GEM_DOMAIN_CPU) + inteldrm_chipset_flush(dev_priv); + if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { + /* + * read/write caches: + * + * I915_GEM_DOMAIN_RENDER is always invalidated, but is + * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is + * also flushed at 2d versus 3d pipeline switches. + * + * read-only caches: + * + * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if + * MI_READ_FLUSH is set, and is always flushed on 965. + * + * I915_GEM_DOMAIN_COMMAND may not exist? + * + * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is + * invalidated when MI_EXE_FLUSH is set. + * + * I915_GEM_DOMAIN_VERTEX, which exists on 965, is + * invalidated with every MI_FLUSH. + * + * TLBs: + * + * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND + * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and + * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER + * are flushed at any MI_FLUSH. + */ + + cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; + if ((invalidate_domains | flush_domains) & + I915_GEM_DOMAIN_RENDER) + cmd &= ~MI_NO_WRITE_FLUSH; + /* + * On the 965, the sampler cache always gets flushed + * and this bit is reserved. + */ + if (!IS_I965G(dev_priv) && + invalidate_domains & I915_GEM_DOMAIN_SAMPLER) + cmd |= MI_READ_FLUSH; + if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) + cmd |= MI_EXE_FLUSH; + + BEGIN_LP_RING(2); + OUT_RING(cmd); + OUT_RING(MI_NOOP); + ADVANCE_LP_RING(); + } + + /* if this is a gpu flush, process the results */ + if (flush_domains & I915_GEM_GPU_DOMAINS) { + inteldrm_process_flushing(dev_priv, flush_domains); + return (i915_add_request(dev_priv)); + } + + return (0); +} + +/** + * Unbinds an object from the GTT aperture. + * + * XXX track dirty and pass down to uvm (note, DONTNEED buffers are clean). + */ +int +i915_gem_object_unbind(struct drm_obj *obj, int interruptible) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + int ret = 0; + + /* + * if it's already unbound, or we've already done lastclose, just + * let it happen. XXX does this fail to unwire? + */ + if (obj_priv->dmamap == NULL || dev_priv->agpdmat == NULL) + return 0; + + if (obj_priv->pin_count != 0) { + DRM_ERROR("Attempting to unbind pinned buffer\n"); + return (EINVAL); + } + + KASSERT(!i915_obj_purged(obj_priv)); + + /* Move the object to the CPU domain to ensure that + * any possible CPU writes while it's not in the GTT + * are flushed when we go to remap it. This will + * also ensure that all pending GPU writes are finished + * before we unbind. + */ + ret = i915_gem_object_set_to_cpu_domain(obj, 1, interruptible); + if (ret) + return ret; + + KASSERT(!inteldrm_is_active(obj_priv)); + + /* if it's purgeable don't bother dirtying the pages */ + if (i915_obj_purgeable(obj_priv)) + atomic_clearbits_int(&obj_priv->io_flags, I915_DIRTY); + /* + * unload the map, then unwire the backing object. + */ + i915_gem_save_bit_17_swizzle(obj); + bus_dmamap_unload(dev_priv->agpdmat, obj_priv->dmamap); + uvm_objunwire(obj->uao, 0, obj->size); + /* XXX persistent dmamap worth the memory? */ + bus_dmamap_destroy(dev_priv->agpdmat, obj_priv->dmamap); + obj_priv->dmamap = NULL; + free(obj_priv->dma_segs, M_DRM); + obj_priv->dma_segs = NULL; + /* XXX this should change whether we tell uvm the page is dirty */ + atomic_clearbits_int(&obj_priv->io_flags, I915_DIRTY); + + obj_priv->gtt_offset = 0; + atomic_dec(&dev->gtt_count); + atomic_sub(obj->size, &dev->gtt_memory); + + /* Remove ourselves from any LRU list if present. */ + i915_list_remove((struct inteldrm_obj *)obj); + + if (i915_obj_purgeable(obj_priv)) + inteldrm_purge_obj(obj); + + return (0); +} + +int +i915_gem_evict_something(struct drm_i915_private *dev_priv, size_t min_size, + int interruptible) +{ + struct drm_obj *obj; + struct inteldrm_request *request; + struct inteldrm_obj *obj_priv; + int ret = 0; + + for (;;) { + i915_gem_retire_requests(dev_priv); + + /* If there's an inactive buffer available now, grab it + * and be done. + */ + obj = i915_gem_find_inactive_object(dev_priv, min_size); + if (obj != NULL) { + obj_priv = (struct inteldrm_obj *)obj; + + KASSERT(obj_priv->pin_count == 0); + KASSERT(!inteldrm_is_active(obj_priv)); + + /* Wait on the rendering and unbind the buffer. */ + return (i915_gem_object_unbind(obj, interruptible)); + } + + /* If we didn't get anything, but the ring is still processing + * things, wait for one of those things to finish and hopefully + * leave us a buffer to evict. + */ + if ((request = TAILQ_FIRST(&dev_priv->mm.request_list)) + != NULL) { + ret = i915_wait_request(dev_priv, request->seqno, + interruptible); + if (ret) + return (ret); + + continue; + } + + /* If we didn't have anything on the request list but there + * are buffers awaiting a flush, emit one and try again. + * When we wait on it, those buffers waiting for that flush + * will get moved to inactive. + */ + TAILQ_FOREACH(obj_priv, &dev_priv->mm.flushing_list, list) { + obj = &obj_priv->obj; + if (obj->size >= min_size) + break; + obj = NULL; + } + + if (obj != NULL) { + if (i915_gem_flush(dev_priv, obj->write_domain, + obj->write_domain) == 0) + return (ENOMEM); + continue; + } + + /* + * If we didn't do any of the above, there's no single buffer + * large enough to swap out for the new one, so just evict + * everything and start again. (This should be rare.) + */ + if (!TAILQ_EMPTY(&dev_priv->mm.inactive_list)) + return (i915_gem_evict_inactive(dev_priv)); + else + return (i915_gem_evict_everything(dev_priv, + interruptible)); + } + /* NOTREACHED */ +} + +struct drm_obj * +i915_gem_find_inactive_object(struct drm_i915_private *dev_priv, + size_t min_size) +{ + struct drm_obj *obj, *best = NULL, *first = NULL; + struct inteldrm_obj *obj_priv; + + TAILQ_FOREACH(obj_priv, &dev_priv->mm.inactive_list, list) { + obj = &obj_priv->obj; + if (obj->size >= min_size) { + if ((!inteldrm_is_dirty(obj_priv) || + i915_obj_purgeable(obj_priv)) && + (best == NULL || obj->size < best->size)) { + best = obj; + if (best->size == min_size) + return (best); + } + } + if (first == NULL) + first = obj; + } + + return ((best != NULL) ? best : first); +} + +int +i915_gem_evict_everything(struct drm_i915_private *dev_priv, int interruptible) +{ + u_int32_t seqno; + int ret; + + if (TAILQ_EMPTY(&dev_priv->mm.inactive_list) && + TAILQ_EMPTY(&dev_priv->mm.flushing_list) && + TAILQ_EMPTY(&dev_priv->mm.active_list)) + return (ENOSPC); + + seqno = i915_gem_flush(dev_priv, I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); + if (seqno == 0) + return (ENOMEM); + + if ((ret = i915_wait_request(dev_priv, seqno, interruptible)) != 0 || + (ret = i915_gem_evict_inactive(dev_priv)) != 0) + return (ret); + + /* + * All lists should be empty because we flushed the whole queue, then + * we evicted the whole shebang, only pinned objects are still bound. + */ + KASSERT(TAILQ_EMPTY(&dev_priv->mm.inactive_list)); + KASSERT(TAILQ_EMPTY(&dev_priv->mm.flushing_list)); + KASSERT(TAILQ_EMPTY(&dev_priv->mm.active_list)); + + return (0); +} +/* + * return required GTT alignment for an object, taking into account potential + * fence register needs + */ +bus_size_t +i915_gem_get_gtt_alignment(struct drm_obj *obj) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + bus_size_t start, i; + + /* + * Minimum alignment is 4k (GTT page size), but fence registers may + * modify this + */ + if (IS_I965G(dev_priv) || obj_priv->tiling_mode == I915_TILING_NONE) + return (4096); + + /* + * Older chips need to be aligned to the size of the smallest fence + * register that can contain the object. + */ + if (IS_I9XX(dev_priv)) + start = 1024 * 1024; + else + start = 512 * 1024; + + for (i = start; i < obj->size; i <<= 1) + ; + + return (i); +} + +void +i965_write_fence_reg(struct inteldrm_fence *reg) +{ + struct drm_obj *obj = reg->obj; + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + int regnum = obj_priv->fence_reg; + u_int64_t val; + + val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & + 0xfffff000) << 32; + val |= obj_priv->gtt_offset & 0xfffff000; + val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; + if (obj_priv->tiling_mode == I915_TILING_Y) + val |= 1 << I965_FENCE_TILING_Y_SHIFT; + val |= I965_FENCE_REG_VALID; + + I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); +} + +void +i915_write_fence_reg(struct inteldrm_fence *reg) +{ + struct drm_obj *obj = reg->obj; + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + bus_size_t fence_reg; + u_int32_t val; + u_int32_t pitch_val; + int regnum = obj_priv->fence_reg; + int tile_width; + + if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || + (obj_priv->gtt_offset & (obj->size - 1))) { + DRM_ERROR("%s: object 0x%08x not 1M or size (0x%zx) aligned\n", + __func__, obj_priv->gtt_offset, obj->size); + return; + } + + if (obj_priv->tiling_mode == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(dev_priv)) + tile_width = 128; + else + tile_width = 512; + + /* Note: pitch better be a power of two tile widths */ + pitch_val = obj_priv->stride / tile_width; + pitch_val = ffs(pitch_val) - 1; + + val = obj_priv->gtt_offset; + if (obj_priv->tiling_mode == I915_TILING_Y) + val |= 1 << I830_FENCE_TILING_Y_SHIFT; + val |= I915_FENCE_SIZE_BITS(obj->size); + val |= pitch_val << I830_FENCE_PITCH_SHIFT; + val |= I830_FENCE_REG_VALID; + + if (regnum < 8) + fence_reg = FENCE_REG_830_0 + (regnum * 4); + else + fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); + I915_WRITE(fence_reg, val); +} + +void +i830_write_fence_reg(struct inteldrm_fence *reg) +{ + struct drm_obj *obj = reg->obj; + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + int regnum = obj_priv->fence_reg; + u_int32_t pitch_val, val; + + if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) || + (obj_priv->gtt_offset & (obj->size - 1))) { + DRM_ERROR("object 0x%08x not 512K or size aligned\n", + obj_priv->gtt_offset); + return; + } + + pitch_val = ffs(obj_priv->stride / 128) - 1; + + val = obj_priv->gtt_offset; + if (obj_priv->tiling_mode == I915_TILING_Y) + val |= 1 << I830_FENCE_TILING_Y_SHIFT; + val |= I830_FENCE_SIZE_BITS(obj->size); + val |= pitch_val << I830_FENCE_PITCH_SHIFT; + val |= I830_FENCE_REG_VALID; + + I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); + +} + +/* + * i915_gem_get_fence_reg - set up a fence reg for an object + * + * When mapping objects through the GTT, userspace wants to be able to write + * to them without having to worry about swizzling if the object is tiled. + * + * This function walks the fence regs looking for a free one, stealing one + * if it can't find any. + * + * It then sets up the reg based on the object's properties: address, pitch + * and tiling format. + */ +int +i915_gem_get_fence_reg(struct drm_obj *obj, int interruptible) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + struct inteldrm_obj *old_obj_priv = NULL; + struct drm_obj *old_obj = NULL; + struct inteldrm_fence *reg = NULL; + int i, ret, avail; + + /* If our fence is getting used, just update our place in the LRU */ + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { + reg = &dev_priv->fence_regs[obj_priv->fence_reg]; + + TAILQ_REMOVE(&dev_priv->mm.fence_list, reg, list); + TAILQ_INSERT_TAIL(&dev_priv->mm.fence_list, reg, list); + return (0); + } + + switch (obj_priv->tiling_mode) { + case I915_TILING_NONE: + DRM_ERROR("allocating a fence for non-tiled object?\n"); + break; + case I915_TILING_X: + if (obj_priv->stride == 0) + return (EINVAL); + if (obj_priv->stride & (512 - 1)) + DRM_ERROR("object 0x%08x is X tiled but has non-512B" + " pitch\n", obj_priv->gtt_offset); + break; + case I915_TILING_Y: + if (obj_priv->stride == 0) + return (EINVAL); + if (obj_priv->stride & (128 - 1)) + DRM_ERROR("object 0x%08x is Y tiled but has non-128B" + " pitch\n", obj_priv->gtt_offset); + break; + } + + /* First try to find a free reg */ + avail = 0; + for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { + reg = &dev_priv->fence_regs[i]; + if (reg->obj == NULL) + break; + + old_obj_priv = (struct inteldrm_obj *)reg->obj; + if (old_obj_priv->pin_count == 0) + avail++; + } + + /* None available, try to steal one or wait for a user to finish */ + if (i == dev_priv->num_fence_regs) { + if (avail == 0) + return (ENOMEM); + + TAILQ_FOREACH(reg, &dev_priv->mm.fence_list, + list) { + old_obj = reg->obj; + old_obj_priv = (struct inteldrm_obj *)old_obj; + + if (old_obj_priv->pin_count) + continue; + + /* Ref it so that wait_rendering doesn't free it under + * us. + */ + drm_gem_object_reference(old_obj); + + break; + } + + i = old_obj_priv->fence_reg; + reg = &dev_priv->fence_regs[i]; + + ret = i915_gem_object_put_fence_reg(old_obj, interruptible); + drm_gem_object_unreference(old_obj); + if (ret != 0) + return (ret); + } + + obj_priv->fence_reg = i; + reg->obj = obj; + TAILQ_INSERT_TAIL(&dev_priv->mm.fence_list, reg, list); + + if (IS_I965G(dev_priv)) + i965_write_fence_reg(reg); + else if (IS_I9XX(dev_priv)) + i915_write_fence_reg(reg); + else + i830_write_fence_reg(reg); + + return 0; +} + +int +i915_gem_object_put_fence_reg(struct drm_obj *obj, int interruptible) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + struct inteldrm_fence *reg; + int ret; + + if (obj_priv->fence_reg == I915_FENCE_REG_NONE) + return (0); + + /* + * If the last execbuffer we did on the object needed a fence then + * we must emit a flush. + */ + if (inteldrm_needs_fence(obj_priv)) { + ret = i915_gem_object_flush_gpu_write_domain(obj, 1, + interruptible); + if (ret != 0) + return (ret); + } + + /* if rendering is queued up that depends on the fence, wait for it */ + reg = &dev_priv->fence_regs[obj_priv->fence_reg]; + if (reg->last_rendering_seqno != 0) { + ret = i915_wait_request(dev_priv, reg->last_rendering_seqno, + interruptible); + if (ret != 0) + return (ret); + } + + /* tiling changed, must wipe userspace mappings */ + if ((obj->write_domain | obj->read_domains) & I915_GEM_DOMAIN_GTT) { + inteldrm_wipe_mappings(obj); + if (obj->write_domain == I915_GEM_DOMAIN_GTT) + obj->write_domain = 0; + } + + if (IS_I965G(dev_priv)) { + I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); + } else { + u_int32_t fence_reg; + + if (obj_priv->fence_reg < 8) + fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; + else + fence_reg = FENCE_REG_945_8 + + (obj_priv->fence_reg - 8) * 4; + I915_WRITE(fence_reg , 0); + } + + reg->obj = NULL; + TAILQ_REMOVE(&dev_priv->mm.fence_list, reg, list); + obj_priv->fence_reg = I915_FENCE_REG_NONE; + + return (0); +} + +int +inteldrm_fault(struct drm_obj *obj, struct uvm_faultinfo *ufi, off_t offset, + vaddr_t vaddr, vm_page_t *pps, int npages, int centeridx, + vm_prot_t access_type, int flags) +{ + struct drm_device *dev = obj->dev; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + paddr_t paddr; + int lcv, ret; + int write = !!(access_type & VM_PROT_WRITE); + vm_prot_t mapprot; + + DRM_LOCK(); + /* + * XXX is it ok to sleep in fault handlers? If not, we may have some + * problems... (i can has race plz? -> judicious use of + * uvmfault_unlockall ahoy) + */ + if (obj_priv->dmamap == NULL) { + ret = i915_gem_object_bind_to_gtt(obj, 0, 0); + if (ret) { + printf("%s: failed to bind\n", __func__); + goto error; + } + i915_gem_object_move_to_inactive(obj); + } + + /* + * We could only do this on bind so allow for map_buffer_range + * unsynchronised objects (where buffer suballocation + * is done by the GL application, however it gives coherency problems + * normally. + */ + ret = i915_gem_object_set_to_gtt_domain(obj, write, 0); + if (ret) { + printf("%s: failed to set to gtt (%d)\n", + __func__, ret); + goto error; + } + + mapprot = ufi->entry->protection; + /* + * if it's only a read fault, we only put ourselves into the gtt + * read domain, so make sure we fault again and set ourselves to write. + * this prevents us needing userland to do domain management and get + * it wrong, and makes us fully coherent with the gpu re mmap. + */ + if (write == 0) + mapprot &= ~VM_PROT_WRITE; + /* XXX try and be more efficient when we do this */ + for (lcv = 0 ; lcv < npages ; lcv++, offset += PAGE_SIZE, + vaddr += PAGE_SIZE) { + if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) + continue; + + if (pps[lcv] == PGO_DONTCARE) + continue; + + paddr = dev->agp->base + obj_priv->gtt_offset + offset; + + UVMHIST_LOG(maphist, + " MAPPING: device: pm=%p, va=0x%lx, pa=0x%lx, at=%ld", + ufi->orig_map->pmap, vaddr, (u_long)paddr, mapprot); + /* XXX writecombining */ + if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr | PMAP_NOCACHE, + mapprot, PMAP_CANFAIL | mapprot) != 0) { + DRM_UNLOCK(); + printf("%s: enter failed\n", __func__); + return (VM_PAGER_REFAULT); + } + } + DRM_UNLOCK(); + return (VM_PAGER_OK); + +error: + /* + * EIO means we're wedged so when we reset the gpu this will + * work, so don't segfault. + */ + DRM_UNLOCK(); + return ((ret == EIO) ? VM_PAGER_REFAULT : VM_PAGER_ERROR); + +} + +void +inteldrm_wipe_mappings(struct drm_obj *obj) +{ + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct vm_page *pg; + + /* nuke all our mappings. XXX optimise. */ + for (pg = &dev_priv->pgs[atop(obj_priv->gtt_offset)]; pg != + &dev_priv->pgs[atop(obj_priv->gtt_offset + obj->size)]; pg++) + pmap_page_protect(pg, VM_PROT_NONE); +} + +/** + * Finds free space in the GTT aperture and binds the object there. + */ +int +i915_gem_object_bind_to_gtt(struct drm_obj *obj, bus_size_t alignment, + int interruptible) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + int ret; + + if (dev_priv->agpdmat == NULL) + return (EINVAL); + if (alignment == 0) { + alignment = i915_gem_get_gtt_alignment(obj); + } else if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { + DRM_ERROR("Invalid object alignment requested %u\n", alignment); + return (EINVAL); + } + + /* Can't bind a purgeable buffer */ + if (i915_obj_purgeable(obj_priv)) { + printf("tried to bind purgeable buffer"); + return (EINVAL); + } + + if ((ret = bus_dmamap_create(dev_priv->agpdmat, obj->size, 1, + obj->size, 0, BUS_DMA_WAITOK, &obj_priv->dmamap)) != 0) { + DRM_ERROR("Failed to create dmamap\n"); + return (ret); + } + agp_bus_dma_set_alignment(dev_priv->agpdmat, obj_priv->dmamap, + alignment); + + search_free: + /* + * the helper function wires the uao then binds it to the aperture for + * us, so all we have to do is set up the dmamap then load it. + */ + ret = drm_gem_load_uao(dev_priv->agpdmat, obj_priv->dmamap, obj->uao, + obj->size, BUS_DMA_WAITOK | obj_priv->dma_flags, + &obj_priv->dma_segs); + /* XXX NOWAIT? */ + if (ret != 0) { + /* If the gtt is empty and we're still having trouble + * fitting our object in, we're out of memory. + */ + if (TAILQ_EMPTY(&dev_priv->mm.inactive_list) && + TAILQ_EMPTY(&dev_priv->mm.flushing_list) && + TAILQ_EMPTY(&dev_priv->mm.active_list)) { + DRM_ERROR("GTT full, but LRU list empty\n"); + goto error; + } + + ret = i915_gem_evict_something(dev_priv, obj->size, + interruptible); + if (ret != 0) + goto error; + goto search_free; + } + i915_gem_bit_17_swizzle(obj); + + obj_priv->gtt_offset = obj_priv->dmamap->dm_segs[0].ds_addr - + dev->agp->base; + + atomic_inc(&dev->gtt_count); + atomic_add(obj->size, &dev->gtt_memory); + + /* Assert that the object is not currently in any GPU domain. As it + * wasn't in the GTT, there shouldn't be any way it could have been in + * a GPU cache + */ + KASSERT((obj->read_domains & I915_GEM_GPU_DOMAINS) == 0); + KASSERT((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0); + + return (0); + +error: + bus_dmamap_destroy(dev_priv->agpdmat, obj_priv->dmamap); + obj_priv->dmamap = NULL; + obj_priv->gtt_offset = 0; + return (ret); +} + +/* + * Flush the GPU write domain for the object if dirty, then wait for the + * rendering to complete. When this returns it is safe to unbind from the + * GTT or access from the CPU. + */ +int +i915_gem_object_flush_gpu_write_domain(struct drm_obj *obj, int pipelined, + int interruptible) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + int ret = 0; + + if ((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0) { + /* + * Queue the GPU write cache flushing we need. + * This call will move stuff form the flushing list to the + * active list so all we need to is wait for it. + */ + (void)i915_gem_flush(dev_priv, 0, obj->write_domain); + KASSERT(obj->write_domain == 0); + } + + /* wait for queued rendering so we know it's flushed and bo is idle */ + if (pipelined == 0 && inteldrm_is_active(obj_priv)) { + ret = i915_wait_request(dev_priv, + obj_priv->last_rendering_seqno, interruptible); + } + return (ret); +} + +/* + * Moves a single object to the GTT and possibly write domain. + * + * This function returns when the move is complete, including waiting on + * flushes to occur. + */ +int +i915_gem_object_set_to_gtt_domain(struct drm_obj *obj, int write, + int interruptible) +{ + struct drm_device *dev = (struct drm_device *)obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + int ret; + + /* Not valid to be called on unbound objects. */ + if (obj_priv->dmamap == NULL) + return (EINVAL); + /* Wait on any GPU rendering and flushing to occur. */ + if ((ret = i915_gem_object_flush_gpu_write_domain(obj, 0, + interruptible)) != 0) + return (ret); + + if (obj->write_domain == I915_GEM_DOMAIN_CPU) { + /* clflush the pages, and flush chipset cache */ + bus_dmamap_sync(dev_priv->agpdmat, obj_priv->dmamap, 0, + obj->size, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); + inteldrm_chipset_flush(dev_priv); + obj->write_domain = 0; + } + + /* We're accessing through the gpu, so grab a new fence register or + * update the LRU. + */ + if (obj_priv->tiling_mode != I915_TILING_NONE) + ret = i915_gem_get_fence_reg(obj, interruptible); + + /* + * If we're writing through the GTT domain then the CPU and GPU caches + * will need to be invalidated at next use. + * It should now be out of any other write domains and we can update + * to the correct ones + */ + KASSERT((obj->write_domain & ~I915_GEM_DOMAIN_GTT) == 0); + if (write) { + obj->read_domains = obj->write_domain = I915_GEM_DOMAIN_GTT; + atomic_setbits_int(&obj_priv->io_flags, I915_DIRTY); + } else { + obj->read_domains |= I915_GEM_DOMAIN_GTT; + } + + return (ret); +} + +/* + * Moves a single object to the CPU read and possibly write domain. + * + * This function returns when the move is complete, including waiting on + * flushes to return. + */ +int +i915_gem_object_set_to_cpu_domain(struct drm_obj *obj, int write, + int interruptible) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + int ret; + + /* Wait on any GPU rendering and flushing to occur. */ + if ((ret = i915_gem_object_flush_gpu_write_domain(obj, 0, + interruptible)) != 0) + return (ret); + + if (obj->write_domain == I915_GEM_DOMAIN_GTT || + (write && obj->read_domains & I915_GEM_DOMAIN_GTT)) { + /* + * No actual flushing is required for the GTT write domain. + * Writes to it immeditately go to main memory as far as we + * know, so there's no chipset flush. It also doesn't land + * in render cache. + */ + inteldrm_wipe_mappings(obj); + if (obj->write_domain == I915_GEM_DOMAIN_GTT) + obj->write_domain = 0; + } + + /* remove the fence register since we're not using it anymore */ + if ((ret = i915_gem_object_put_fence_reg(obj, interruptible)) != 0) + return (ret); + + /* Flush the CPU cache if it's still invalid. */ + if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { + bus_dmamap_sync(dev_priv->agpdmat, obj_priv->dmamap, 0, + obj->size, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + + obj->read_domains |= I915_GEM_DOMAIN_CPU; + } + + /* + * It should now be out of any other write domain, and we can update + * the domain value for our changes. + */ + KASSERT((obj->write_domain & ~I915_GEM_DOMAIN_CPU) == 0); + + /* + * If we're writing through the CPU, then the GPU read domains will + * need to be invalidated at next use. + */ + if (write) + obj->read_domains = obj->write_domain = I915_GEM_DOMAIN_CPU; + + return (0); +} + +/* + * Set the next domain for the specified object. This + * may not actually perform the necessary flushing/invaliding though, + * as that may want to be batched with other set_domain operations + * + * This is (we hope) the only really tricky part of gem. The goal + * is fairly simple -- track which caches hold bits of the object + * and make sure they remain coherent. A few concrete examples may + * help to explain how it works. For shorthand, we use the notation + * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the + * a pair of read and write domain masks. + * + * Case 1: the batch buffer + * + * 1. Allocated + * 2. Written by CPU + * 3. Mapped to GTT + * 4. Read by GPU + * 5. Unmapped from GTT + * 6. Freed + * + * Let's take these a step at a time + * + * 1. Allocated + * Pages allocated from the kernel may still have + * cache contents, so we set them to (CPU, CPU) always. + * 2. Written by CPU (using pwrite) + * The pwrite function calls set_domain (CPU, CPU) and + * this function does nothing (as nothing changes) + * 3. Mapped by GTT + * This function asserts that the object is not + * currently in any GPU-based read or write domains + * 4. Read by GPU + * i915_gem_execbuffer calls set_domain (COMMAND, 0). + * As write_domain is zero, this function adds in the + * current read domains (CPU+COMMAND, 0). + * flush_domains is set to CPU. + * invalidate_domains is set to COMMAND + * clflush is run to get data out of the CPU caches + * then i915_dev_set_domain calls i915_gem_flush to + * emit an MI_FLUSH and drm_agp_chipset_flush + * 5. Unmapped from GTT + * i915_gem_object_unbind calls set_domain (CPU, CPU) + * flush_domains and invalidate_domains end up both zero + * so no flushing/invalidating happens + * 6. Freed + * yay, done + * + * Case 2: The shared render buffer + * + * 1. Allocated + * 2. Mapped to GTT + * 3. Read/written by GPU + * 4. set_domain to (CPU,CPU) + * 5. Read/written by CPU + * 6. Read/written by GPU + * + * 1. Allocated + * Same as last example, (CPU, CPU) + * 2. Mapped to GTT + * Nothing changes (assertions find that it is not in the GPU) + * 3. Read/written by GPU + * execbuffer calls set_domain (RENDER, RENDER) + * flush_domains gets CPU + * invalidate_domains gets GPU + * clflush (obj) + * MI_FLUSH and drm_agp_chipset_flush + * 4. set_domain (CPU, CPU) + * flush_domains gets GPU + * invalidate_domains gets CPU + * flush_gpu_write (obj) to make sure all drawing is complete. + * This will include an MI_FLUSH to get the data from GPU + * to memory + * clflush (obj) to invalidate the CPU cache + * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) + * 5. Read/written by CPU + * cache lines are loaded and dirtied + * 6. Read written by GPU + * Same as last GPU access + * + * Case 3: The constant buffer + * + * 1. Allocated + * 2. Written by CPU + * 3. Read by GPU + * 4. Updated (written) by CPU again + * 5. Read by GPU + * + * 1. Allocated + * (CPU, CPU) + * 2. Written by CPU + * (CPU, CPU) + * 3. Read by GPU + * (CPU+RENDER, 0) + * flush_domains = CPU + * invalidate_domains = RENDER + * clflush (obj) + * MI_FLUSH + * drm_agp_chipset_flush + * 4. Updated (written) by CPU again + * (CPU, CPU) + * flush_domains = 0 (no previous write domain) + * invalidate_domains = 0 (no new read domains) + * 5. Read by GPU + * (CPU+RENDER, 0) + * flush_domains = CPU + * invalidate_domains = RENDER + * clflush (obj) + * MI_FLUSH + * drm_agp_chipset_flush + */ +void +i915_gem_object_set_to_gpu_domain(struct drm_obj *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + u_int32_t invalidate_domains = 0; + u_int32_t flush_domains = 0; + + KASSERT((obj->pending_read_domains & I915_GEM_DOMAIN_CPU) == 0); + KASSERT(obj->pending_write_domain != I915_GEM_DOMAIN_CPU); + /* + * If the object isn't moving to a new write domain, + * let the object stay in multiple read domains + */ + if (obj->pending_write_domain == 0) + obj->pending_read_domains |= obj->read_domains; + else + atomic_setbits_int(&obj_priv->io_flags, I915_DIRTY); + + /* + * Flush the current write domain if + * the new read domains don't match. Invalidate + * any read domains which differ from the old + * write domain + */ + if (obj->write_domain && + obj->write_domain != obj->pending_read_domains) { + flush_domains |= obj->write_domain; + invalidate_domains |= obj->pending_read_domains & + ~obj->write_domain; + } + /* + * Invalidate any read caches which may have + * stale data. That is, any new read domains. + */ + invalidate_domains |= obj->pending_read_domains & ~obj->read_domains; + /* clflush the cpu now, gpu caches get queued. */ + if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { + bus_dmamap_sync(dev_priv->agpdmat, obj_priv->dmamap, 0, + obj->size, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + } + if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT) { + inteldrm_wipe_mappings(obj); + } + + /* The actual obj->write_domain will be updated with + * pending_write_domain after we emit the accumulated flush for all of + * the domain changes in execuffer (which clears object's write + * domains). So if we have a current write domain that we aren't + * changing, set pending_write_domain to it. + */ + if (flush_domains == 0 && obj->pending_write_domain == 0 && + (obj->pending_read_domains == obj->write_domain || + obj->write_domain == 0)) + obj->pending_write_domain = obj->write_domain; + obj->read_domains = obj->pending_read_domains; + obj->pending_read_domains = 0; + + dev->invalidate_domains |= invalidate_domains; + dev->flush_domains |= flush_domains; +} + +/** + * Pin an object to the GTT and evaluate the relocations landing in it. + */ +int +i915_gem_object_pin_and_relocate(struct drm_obj *obj, + struct drm_file *file_priv, struct drm_i915_gem_exec_object2 *entry, + struct drm_i915_gem_relocation_entry *relocs) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_obj *target_obj; + struct inteldrm_obj *obj_priv = + (struct inteldrm_obj *)obj; + bus_space_handle_t bsh; + int i, ret, needs_fence; + + needs_fence = (entry->flags & EXEC_OBJECT_NEEDS_FENCE) && + obj_priv->tiling_mode != I915_TILING_NONE; + if (needs_fence) + atomic_setbits_int(&obj_priv->io_flags, I915_EXEC_NEEDS_FENCE); + + /* Choose the GTT offset for our buffer and put it there. */ + ret = i915_gem_object_pin(obj, (u_int32_t)entry->alignment, + needs_fence); + /* XXX what if already bound at a different alignment? */ + if (ret) + return ret; + + entry->offset = obj_priv->gtt_offset; + + /* Apply the relocations, using the GTT aperture to avoid cache + * flushing requirements. + */ + for (i = 0; i < entry->relocation_count; i++) { + struct drm_i915_gem_relocation_entry *reloc = &relocs[i]; + struct inteldrm_obj *target_obj_priv; + uint32_t reloc_val, reloc_offset; + + target_obj = drm_gem_object_lookup(obj->dev, file_priv, + reloc->target_handle); + if (target_obj == NULL) { + i915_gem_object_unpin(obj); + return (EBADF); + } + target_obj_priv = (struct inteldrm_obj *)target_obj; + + /* The target buffer should have appeared before us in the + * exec_object list, so it should have a GTT space bound by now. + */ + if (target_obj_priv->dmamap == 0) { + DRM_ERROR("No GTT space found for object %d\n", + reloc->target_handle); + ret = EINVAL; + goto err; + } + + /* must be in one write domain and one only */ + if (reloc->write_domain & (reloc->write_domain - 1)) { + ret = EINVAL; + goto err; + } + if (reloc->read_domains & I915_GEM_DOMAIN_CPU || + reloc->write_domain & I915_GEM_DOMAIN_CPU) { + DRM_ERROR("relocation with read/write CPU domains: " + "obj %p target %d offset %d " + "read %08x write %08x", obj, + reloc->target_handle, (int)reloc->offset, + reloc->read_domains, reloc->write_domain); + ret = EINVAL; + goto err; + } + + if (reloc->write_domain && target_obj->pending_write_domain && + reloc->write_domain != target_obj->pending_write_domain) { + DRM_ERROR("Write domain conflict: " + "obj %p target %d offset %d " + "new %08x old %08x\n", + obj, reloc->target_handle, + (int) reloc->offset, + reloc->write_domain, + target_obj->pending_write_domain); + ret = EINVAL; + goto err; + } + + target_obj->pending_read_domains |= reloc->read_domains; + target_obj->pending_write_domain |= reloc->write_domain; + + + if (reloc->offset > obj->size - 4) { + DRM_ERROR("Relocation beyond object bounds: " + "obj %p target %d offset %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->offset, (int) obj->size); + ret = EINVAL; + goto err; + } + if (reloc->offset & 3) { + DRM_ERROR("Relocation not 4-byte aligned: " + "obj %p target %d offset %d.\n", + obj, reloc->target_handle, + (int) reloc->offset); + ret = EINVAL; + goto err; + } + + if (reloc->delta > target_obj->size) { + DRM_ERROR("reloc larger than target\n"); + ret = EINVAL; + goto err; + } + + /* Map the page containing the relocation we're going to + * perform. + */ + reloc_offset = obj_priv->gtt_offset + reloc->offset; + reloc_val = target_obj_priv->gtt_offset + reloc->delta; + + if ((ret = bus_space_subregion(dev_priv->bst, + dev_priv->aperture_bsh, trunc_page(reloc_offset), + PAGE_SIZE, &bsh)) != 0) { + DRM_ERROR("map failed...\n"); + goto err; + } + /* + * we do this differently to linux, in the case where the + * presumed offset matches we actually read to check it's + * correct, but at least it won't involve idling the gpu if + * it was reading from it before, only if writing (which would + * be bad anyway since we're now using it as a command buffer). + */ + if (target_obj_priv->gtt_offset == reloc->presumed_offset) { + ret = i915_gem_object_set_to_gtt_domain(obj, 0, 1); + if (ret != 0) + goto err; + if (bus_space_read_4(dev_priv->bst, bsh, + reloc_offset & PAGE_MASK) == reloc_val) { + drm_gem_object_unreference(target_obj); + continue; + } + DRM_DEBUG("reloc tested and found incorrect\n"); + } + + ret = i915_gem_object_set_to_gtt_domain(obj, 1, 1); + if (ret != 0) + goto err; + + bus_space_write_4(dev_priv->bst, bsh, reloc_offset & PAGE_MASK, + reloc_val); + + reloc->presumed_offset = target_obj_priv->gtt_offset; + + drm_gem_object_unreference(target_obj); + } + + return 0; + +err: + /* we always jump to here mid-loop */ + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return (ret); + +} + +/** Dispatch a batchbuffer to the ring + */ +void +i915_dispatch_gem_execbuffer(struct drm_device *dev, + struct drm_i915_gem_execbuffer2 *exec, uint64_t exec_offset) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t exec_start, exec_len; + + exec_start = (uint32_t)exec_offset + exec->batch_start_offset; + exec_len = (uint32_t)exec->batch_len; + + if (IS_I830(dev_priv) || IS_845G(dev_priv)) { + BEGIN_LP_RING(6); + OUT_RING(MI_BATCH_BUFFER); + OUT_RING(exec_start | MI_BATCH_NON_SECURE); + OUT_RING(exec_start + exec_len - 4); + OUT_RING(MI_NOOP); + } else { + BEGIN_LP_RING(4); + if (IS_I965G(dev_priv)) { + OUT_RING(MI_BATCH_BUFFER_START | (2 << 6) | + MI_BATCH_NON_SECURE_I965); + OUT_RING(exec_start); + } else { + OUT_RING(MI_BATCH_BUFFER_START | (2 << 6)); + OUT_RING(exec_start | MI_BATCH_NON_SECURE); + } + } + + /* + * Ensure that the commands in the batch buffer are + * finished before the interrupt fires (from a subsequent request + * added). We get back a seqno representing the execution of the + * current buffer, which we can wait on. We would like to mitigate + * these interrupts, likely by only creating seqnos occasionally + * (so that we have *some* interrupts representing completion of + * buffers that we can wait on when trying to clear up gtt space). + */ + OUT_RING(MI_FLUSH | MI_NO_WRITE_FLUSH); + OUT_RING(MI_NOOP); + ADVANCE_LP_RING(); + + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + /* The sampler always gets flushed on i965 (sigh) */ + if (IS_I965G(dev_priv)) + inteldrm_process_flushing(dev_priv, I915_GEM_DOMAIN_SAMPLER); +} + +/* Throttle our rendering by waiting until the ring has completed our requests + * emitted over 20 msec ago. + * + * This should get us reasonable parallelism between CPU and GPU but also + * relatively low latency when blocking on a particular request to finish. + */ +int +i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) +{ +#if 0 + struct inteldrm_file *intel_file = (struct inteldrm_file *)file_priv; + u_int32_t seqno; +#endif + int ret = 0; + + return ret; +} + +int +i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list, + u_int32_t buffer_count, struct drm_i915_gem_relocation_entry **relocs) +{ + u_int32_t reloc_count = 0, reloc_index = 0, i; + int ret; + + *relocs = NULL; + for (i = 0; i < buffer_count; i++) { + if (reloc_count + exec_list[i].relocation_count < reloc_count) + return (EINVAL); + reloc_count += exec_list[i].relocation_count; + } + + if (reloc_count == 0) + return (0); + + if (SIZE_MAX / reloc_count < sizeof(**relocs)) + return (EINVAL); + *relocs = drm_alloc(reloc_count * sizeof(**relocs)); + for (i = 0; i < buffer_count; i++) { + if ((ret = copyin((void *)(uintptr_t)exec_list[i].relocs_ptr, + &(*relocs)[reloc_index], exec_list[i].relocation_count * + sizeof(**relocs))) != 0) { + drm_free(*relocs); + *relocs = NULL; + return (ret); + } + reloc_index += exec_list[i].relocation_count; + } + + return (0); +} + +int +i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, + u_int32_t buffer_count, struct drm_i915_gem_relocation_entry *relocs) +{ + u_int32_t reloc_count = 0, i; + int ret = 0; + + if (relocs == NULL) + return (0); + + for (i = 0; i < buffer_count; i++) { + if ((ret = copyout(&relocs[reloc_count], + (void *)(uintptr_t)exec_list[i].relocs_ptr, + exec_list[i].relocation_count * sizeof(*relocs))) != 0) + break; + reloc_count += exec_list[i].relocation_count; + } + + drm_free(relocs); + + return (ret); +} + +int +i915_gem_execbuffer2(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_execbuffer2 *args = data; + struct drm_i915_gem_exec_object2 *exec_list = NULL; + struct drm_i915_gem_relocation_entry *relocs; + struct inteldrm_obj *obj_priv, *batch_obj_priv; + struct drm_obj **object_list = NULL; + struct drm_obj *batch_obj, *obj; + size_t oflow; + int ret, ret2, i; + int pinned = 0, pin_tries; + uint32_t reloc_index; + + /* + * Check for valid execbuffer offset. We can do this early because + * bound object are always page aligned, so only the start offset + * matters. Also check for integer overflow in the batch offset and size + */ + if ((args->batch_start_offset | args->batch_len) & 0x7 || + args->batch_start_offset + args->batch_len < args->batch_len || + args->batch_start_offset + args->batch_len < + args->batch_start_offset) + return (EINVAL); + + if (args->buffer_count < 1) { + DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); + return (EINVAL); + } + /* Copy in the exec list from userland, check for overflow */ + oflow = SIZE_MAX / args->buffer_count; + if (oflow < sizeof(*exec_list) || oflow < sizeof(*object_list)) + return (EINVAL); + exec_list = drm_alloc(sizeof(*exec_list) * args->buffer_count); + object_list = drm_alloc(sizeof(*object_list) * args->buffer_count); + if (exec_list == NULL || object_list == NULL) { + ret = ENOMEM; + goto pre_mutex_err; + } + ret = copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, + sizeof(*exec_list) * args->buffer_count); + if (ret != 0) + goto pre_mutex_err; + + ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, + &relocs); + if (ret != 0) + goto pre_mutex_err; + + DRM_LOCK(); + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + + /* XXX check these before we copyin... but we do need the lock */ + if (dev_priv->mm.wedged) { + ret = EIO; + goto unlock; + } + + if (dev_priv->mm.suspended) { + ret = EBUSY; + goto unlock; + } + + /* Look up object handles */ + for (i = 0; i < args->buffer_count; i++) { + object_list[i] = drm_gem_object_lookup(dev, file_priv, + exec_list[i].handle); + if (object_list[i] == NULL) { + DRM_ERROR("Invalid object handle %d at index %d\n", + exec_list[i].handle, i); + ret = EBADF; + goto err; + } + obj_priv = (struct inteldrm_obj *)object_list[i]; + if (obj_priv->io_flags & I915_IN_EXEC) { + DRM_ERROR("Object %p appears more than once in object_list\n", + object_list[i]); + ret = EBADF; + goto err; + } + atomic_setbits_int(&obj_priv->io_flags, I915_IN_EXEC); + } + + /* Pin and relocate */ + for (pin_tries = 0; ; pin_tries++) { + ret = pinned = 0; + reloc_index = 0; + + for (i = 0; i < args->buffer_count; i++) { + object_list[i]->pending_read_domains = 0; + object_list[i]->pending_write_domain = 0; + ret = i915_gem_object_pin_and_relocate(object_list[i], + file_priv, &exec_list[i], &relocs[reloc_index]); + if (ret) + break; + pinned++; + reloc_index += exec_list[i].relocation_count; + } + /* success */ + if (ret == 0) + break; + + /* error other than GTT full, or we've already tried again */ + if (ret != ENOSPC || pin_tries >= 1) + goto err; + + /* unpin all of our buffers */ + for (i = 0; i < pinned; i++) + i915_gem_object_unpin(object_list[i]); + /* evict everyone we can from the aperture */ + ret = i915_gem_evict_everything(dev_priv, 1); + if (ret) + goto err; + } + + /* Set the pending read domains for the batch buffer to COMMAND */ + batch_obj = object_list[args->buffer_count - 1]; + batch_obj_priv = (struct inteldrm_obj *)batch_obj; + if (args->batch_start_offset + args->batch_len > batch_obj->size || + batch_obj->pending_write_domain) { + ret = EINVAL; + goto err; + } + batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + + /* + * Zero the global flush/invalidate flags. These will be modified as + * new domains are computed for each object + */ + dev->invalidate_domains = 0; + dev->flush_domains = 0; + + /* Compute new gpu domains and update invalidate/flush */ + for (i = 0; i < args->buffer_count; i++) + i915_gem_object_set_to_gpu_domain(object_list[i]); + + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + + /* flush and invalidate any domains that need them. */ + (void)i915_gem_flush(dev_priv, dev->invalidate_domains, + dev->flush_domains); + + /* + * update the write domains, and fence/gpu write accounting information. + * Also do the move to active list here. The lazy seqno accounting will + * make sure that they have the correct seqno. If the add_request + * fails, then we will wait for a later batch (or one added on the + * wait), which will waste some time, but if we're that low on memory + * then we could fail in much worse ways. + */ + for (i = 0; i < args->buffer_count; i++) { + obj = object_list[i]; + obj_priv = (struct inteldrm_obj *)obj; + + obj->write_domain = obj->pending_write_domain; + /* + * if we have a write domain, add us to the gpu write list + * else we can remove the bit because it has been flushed. + */ + if (obj_priv->io_flags & I915_GPU_WRITE) + TAILQ_REMOVE(&dev_priv->mm.gpu_write_list, obj_priv, + write_list); + if (obj->write_domain) { + TAILQ_INSERT_TAIL(&dev_priv->mm.gpu_write_list, + obj_priv, write_list); + atomic_setbits_int(&obj_priv->io_flags, I915_GPU_WRITE); + } else { + atomic_clearbits_int(&obj_priv->io_flags, + I915_GPU_WRITE); + } + /* if this batchbuffer needs a fence, then the object is + * counted as fenced exec. else any outstanding fence waits + * will just wait on the fence last_seqno. + */ + if (inteldrm_exec_needs_fence(obj_priv)) { + atomic_setbits_int(&obj_priv->io_flags, + I915_FENCED_EXEC); + } else { + atomic_clearbits_int(&obj_priv->io_flags, + I915_FENCED_EXEC); + } + + i915_gem_object_move_to_active(object_list[i]); + } + + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + + /* Exec the batchbuffer */ + /* + * XXX make sure that this may never fail by preallocating the request. + */ + i915_dispatch_gem_execbuffer(dev, args, batch_obj_priv->gtt_offset); + + /* + * move to active associated all previous buffers with the seqno + * that this call will emit. so we don't need the return. + */ + (void)i915_add_request(dev_priv); + + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + + ret = copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, + sizeof(*exec_list) * args->buffer_count); + +err: + for (i = 0; i < args->buffer_count; i++) { + if (object_list[i] == NULL) + break; + + obj_priv = (struct inteldrm_obj *)object_list[i]; + if (i < pinned) + i915_gem_object_unpin(object_list[i]); + + atomic_clearbits_int(&obj_priv->io_flags, I915_IN_EXEC | + I915_EXEC_NEEDS_FENCE); + drm_gem_object_unreference(object_list[i]); + } + +unlock: + DRM_UNLOCK(); + +pre_mutex_err: + /* update userlands reloc state. */ + ret2 = i915_gem_put_relocs_to_user(exec_list, + args->buffer_count, relocs); + if (ret2 != 0 && ret == 0) + ret = ret2; + + drm_free(object_list); + drm_free(exec_list); + + return ret; +} + +int +i915_gem_object_pin(struct drm_obj *obj, uint32_t alignment, int needs_fence) +{ + struct drm_device *dev = obj->dev; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + int ret; + + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + /* + * if already bound, but alignment is unsuitable, unbind so we can + * fix it. Similarly if we have constraints due to fence registers, + * adjust if needed. Note that if we are already pinned we may as well + * fail because whatever depends on this alignment will render poorly + * otherwise, so just fail the pin (with a printf so we can fix a + * wrong userland). + */ + if ((alignment && obj_priv->dmamap != NULL && + obj_priv->gtt_offset & (alignment - 1)) || (needs_fence && + !i915_gem_object_fence_offset_ok(obj, obj_priv->tiling_mode))) { + if (obj_priv->pin_count == 0) { + ret = i915_gem_object_unbind(obj, 1); + if (ret) + return (ret); + } else { + DRM_ERROR("repinning an object with bad alignment\n"); + } + return (EINVAL); + } + + if (obj_priv->dmamap == NULL) { + ret = i915_gem_object_bind_to_gtt(obj, alignment, 1); + if (ret != 0) + return (ret); + } + + /* + * Pre-965 chips may need a fence register set up in order to + * handle tiling properly. GTT mapping may have blown it away so + * restore. + * With execbuf2 support we don't always need it, but if we do grab + * it. + */ + if (needs_fence && obj_priv->tiling_mode != I915_TILING_NONE && + (ret = i915_gem_get_fence_reg(obj, 1)) != 0) + return (ret); + + /* If the object is not active and not pending a flush, + * remove it from the inactive list + */ + if (++obj_priv->pin_count == 1) { + atomic_inc(&dev->pin_count); + atomic_add(obj->size, &dev->pin_memory); + if (!inteldrm_is_active(obj_priv)) + i915_list_remove(obj_priv); + } + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + + return (0); +} + +void +i915_gem_object_unpin(struct drm_obj *obj) +{ + struct drm_device *dev = obj->dev; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); + KASSERT(obj_priv->pin_count >= 1); + KASSERT(obj_priv->dmamap != NULL); + + /* If the object is no longer pinned, and is + * neither active nor being flushed, then stick it on + * the inactive list + */ + if (--obj_priv->pin_count == 0) { + if (!inteldrm_is_active(obj_priv)) + i915_gem_object_move_to_inactive(obj); + atomic_dec(&dev->pin_count); + atomic_sub(obj->size, &dev->pin_memory); + } + inteldrm_verify_inactive(dev_priv, __FILE__, __LINE__); +} + +int +i915_gem_pin_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_pin *args = data; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + int ret = 0; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + + obj_priv = (struct inteldrm_obj *)obj; + DRM_LOCK(); + if (i915_obj_purgeable(obj_priv)) { + printf("pinning purgeable object\n"); + ret = EINVAL; + goto out; + } + + if (++obj_priv->user_pin_count == 1) { + ret = i915_gem_object_pin(obj, args->alignment, 1); + if (ret != 0) + goto out; + } + + /* XXX - flush the CPU caches for pinned objects + * as the X server doesn't manage domains yet + */ + i915_gem_object_set_to_gtt_domain(obj, 1, 1); + args->offset = obj_priv->gtt_offset; + +out: + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + + return (ret); +} + +int +i915_gem_unpin_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_pin *args = data; + struct inteldrm_obj *obj_priv; + struct drm_obj *obj; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + + DRM_LOCK(); + obj_priv = (struct inteldrm_obj *)obj; + if (obj_priv->user_pin_count == 0) { + DRM_UNLOCK(); + return (EINVAL); + } + + if (--obj_priv->user_pin_count == 0) + i915_gem_object_unpin(obj); + + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + return (0); +} + +int +i915_gem_busy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_busy *args = data; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) { + DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", + args->handle); + return (EBADF); + } + + DRM_LOCK(); + /* + * Update the active list for the hardware's current position. + * otherwise this will only update on a delayed timer or when + * the irq is unmasked. This keeps our working set smaller. + */ + i915_gem_retire_requests(dev_priv); + + obj_priv = (struct inteldrm_obj *)obj; + /* + * Don't count being on the flushing list being done. Otherwise, a + * buffer left on the flushing list but not getting flushed (because + * no one is flushing that domain) won't ever return unbusy and get + * reused by libdrm's bo cache. The other expected consumer of this + * interface, OpenGL's occlusion queries, also specs that the object + * get unbusy "eventually" without any interference. + */ + args->busy = inteldrm_is_active(obj_priv) && + obj_priv->last_rendering_seqno != 0; + + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + return 0; +} + +int +i915_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_madvise *args = data; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + int need, ret = 0; + + switch (args->madv) { + case I915_MADV_DONTNEED: + need = 0; + break; + case I915_MADV_WILLNEED: + need = 1; + break; + default: + return (EINVAL); + } + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + + obj_priv = (struct inteldrm_obj *)obj; + DRM_LOCK(); + + /* invalid to madvise on a pinned BO */ + if (obj_priv->pin_count) { + ret = EINVAL; + goto out; + } + + if (!i915_obj_purged(obj_priv)) { + if (need) { + atomic_clearbits_int(&obj_priv->io_flags, + I915_DONTNEED); + } else { + atomic_setbits_int(&obj_priv->io_flags, I915_DONTNEED); + } + } + + + /* if the object is no longer bound, discard its backing storage */ + if (i915_obj_purgeable(obj_priv) && obj_priv->dmamap == NULL) + inteldrm_purge_obj(obj); + + args->retained = !i915_obj_purged(obj_priv); + +out: + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + + return (ret); +} + +int +i915_gem_init_object(struct drm_obj *obj) +{ + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + + /* + * We've just allocated pages from the kernel, + * so they've just been written by the CPU with + * zeros. They'll need to be flushed before we + * use them with the GPU. + */ + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + + /* normal objects don't need special treatment */ + obj_priv->dma_flags = 0; + obj_priv->fence_reg = I915_FENCE_REG_NONE; + + return 0; +} + +/* + * NOTE all object unreferences in this driver need to hold the DRM_LOCK(), + * because if they free they poke around in driver structures. + */ +void +i915_gem_free_object(struct drm_obj *obj) +{ + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + + while (obj_priv->pin_count > 0) + i915_gem_object_unpin(obj); + + i915_gem_object_unbind(obj, 0); + /* XXX dmatag went away? */ +} + +/** Unbinds all objects that are on the given buffer list. */ +int +i915_gem_evict_inactive(struct drm_i915_private *dev_priv) +{ + struct inteldrm_obj *obj_priv; + int ret; + + while ((obj_priv = TAILQ_FIRST(&dev_priv->mm.inactive_list)) != NULL) { + if (obj_priv->pin_count != 0) { + DRM_ERROR("Pinned object in unbind list\n"); + return (EINVAL); + } + + if ((ret = i915_gem_object_unbind(&obj_priv->obj, 1)) != 0) + break; + } + + return (ret); +} + +int +i915_gem_idle(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; + int ret; + + DRM_LOCK(); + if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { + DRM_UNLOCK(); + return (0); + } + + /* + * If we're wedged, the workq will clear everything, else this will + * empty out the lists for us. + */ + if ((ret = i915_gem_evict_everything(dev_priv, 1)) != 0 && ret != ENOSPC) { + DRM_UNLOCK(); + return (ret); + } + + /* Hack! Don't let anybody do execbuf while we don't control the chip. + * We need to replace this with a semaphore, or something. + */ + dev_priv->mm.suspended = 1; + /* if we hung then the timer alredy fired. */ + timeout_del(&dev_priv->mm.hang_timer); + + inteldrm_update_ring(dev_priv); + i915_gem_cleanup_ringbuffer(dev_priv); + DRM_UNLOCK(); + + /* this should be idle now */ + timeout_del(&dev_priv->mm.retire_timer); + + return 0; +} + +int +i915_gem_init_hws(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + int ret; + + /* If we need a physical address for the status page, it's already + * initialized at driver load time. + */ + if (!I915_NEED_GFX_HWS(dev_priv)) + return 0; + + obj = drm_gem_object_alloc(dev, 4096); + if (obj == NULL) { + DRM_ERROR("Failed to allocate status page\n"); + return (ENOMEM); + } + obj_priv = (struct inteldrm_obj *)obj; + /* + * snooped gtt mapping please . + * Normally this flag is only to dmamem_map, but it's been overloaded + * for the agp mapping + */ + obj_priv->dma_flags = BUS_DMA_COHERENT | BUS_DMA_READ; + + ret = i915_gem_object_pin(obj, 4096, 0); + if (ret != 0) { + drm_gem_object_unreference(obj); + return ret; + } + + dev_priv->hw_status_page = (void *)vm_map_min(kernel_map); + obj->uao->pgops->pgo_reference(obj->uao); + if ((ret = uvm_map(kernel_map, (vaddr_t *)&dev_priv->hw_status_page, + PAGE_SIZE, obj->uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, + UVM_INH_SHARE, UVM_ADV_RANDOM, 0))) != 0) + if (ret != 0) { + DRM_ERROR("Failed to map status page.\n"); + obj->uao->pgops->pgo_detach(obj->uao); + memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); + i915_gem_object_unpin(obj); + drm_gem_object_unreference(obj); + return (EINVAL); + } + dev_priv->hws_obj = obj; + memset(dev_priv->hw_status_page, 0, PAGE_SIZE); + I915_WRITE(HWS_PGA, obj_priv->gtt_offset); + I915_READ(HWS_PGA); /* posting read */ + DRM_DEBUG("hws offset: 0x%08x\n", obj_priv->gtt_offset); + + return 0; +} + +void +i915_gem_cleanup_hws(struct drm_i915_private *dev_priv) +{ + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + + if (dev_priv->hws_obj == NULL) + return; + + obj = dev_priv->hws_obj; + obj_priv = (struct inteldrm_obj *)obj; + + uvm_unmap(kernel_map, (vaddr_t)dev_priv->hw_status_page, + (vaddr_t)dev_priv->hw_status_page + PAGE_SIZE); + i915_gem_object_unpin(obj); + drm_gem_object_unreference(obj); + dev_priv->hws_obj = NULL; + + memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); + dev_priv->hw_status_page = NULL; + + /* Write high address into HWS_PGA when disabling. */ + I915_WRITE(HWS_PGA, 0x1ffff000); +} + +int +i915_gem_init_ringbuffer(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + int ret; + + ret = i915_gem_init_hws(dev_priv); + if (ret != 0) + return ret; + + obj = drm_gem_object_alloc(dev, 128 * 1024); + if (obj == NULL) { + DRM_ERROR("Failed to allocate ringbuffer\n"); + ret = ENOMEM; + goto delhws; + } + obj_priv = (struct inteldrm_obj *)obj; + + ret = i915_gem_object_pin(obj, 4096, 0); + if (ret != 0) + goto unref; + + /* Set up the kernel mapping for the ring. */ + dev_priv->ring.size = obj->size; + + /* XXX WC */ + if ((ret = bus_space_subregion(dev_priv->bst, dev_priv->aperture_bsh, + obj_priv->gtt_offset, obj->size, &dev_priv->ring.bsh)) != 0) { + DRM_INFO("can't map ringbuffer\n"); + goto unpin; + } + dev_priv->ring.ring_obj = obj; + + if ((ret = inteldrm_start_ring(dev_priv)) != 0) + goto unmap; + + return (0); + +unmap: +unpin: + memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); + i915_gem_object_unpin(obj); +unref: + drm_gem_object_unreference(obj); +delhws: + i915_gem_cleanup_hws(dev_priv); + return (ret); +} + +int +inteldrm_start_ring(struct drm_i915_private *dev_priv) +{ + struct drm_obj *obj = dev_priv->ring.ring_obj; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + u_int32_t head; + + /* Stop the ring if it's running. */ + I915_WRITE(PRB0_CTL, 0); + I915_WRITE(PRB0_TAIL, 0); + I915_WRITE(PRB0_HEAD, 0); + + /* Initialize the ring. */ + I915_WRITE(PRB0_START, obj_priv->gtt_offset); + head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + + /* G45 ring initialisation fails to reset head to zero */ + if (head != 0) { + I915_WRITE(PRB0_HEAD, 0); + DRM_DEBUG("Forced ring head to zero ctl %08x head %08x" + "tail %08x start %08x\n", I915_READ(PRB0_CTL), + I915_READ(PRB0_HEAD), I915_READ(PRB0_TAIL), + I915_READ(PRB0_START)); + } + + I915_WRITE(PRB0_CTL, ((obj->size - 4096) & RING_NR_PAGES) | + RING_NO_REPORT | RING_VALID); + + head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + /* If ring head still != 0, the ring is dead */ + if (head != 0) { + DRM_ERROR("Ring initialisation failed: ctl %08x head %08x" + "tail %08x start %08x\n", I915_READ(PRB0_CTL), + I915_READ(PRB0_HEAD), I915_READ(PRB0_TAIL), + I915_READ(PRB0_START)); + return (EIO); + } + + /* Update our cache of the ring state */ + inteldrm_update_ring(dev_priv); + + return (0); +} + +void +i915_gem_cleanup_ringbuffer(struct drm_i915_private *dev_priv) +{ + if (dev_priv->ring.ring_obj == NULL) + return; + + i915_gem_object_unpin(dev_priv->ring.ring_obj); + drm_gem_object_unreference(dev_priv->ring.ring_obj); + dev_priv->ring.ring_obj = NULL; + memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); + + i915_gem_cleanup_hws(dev_priv); +} + +int +i915_gem_entervt_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + int ret; + + if (dev_priv->mm.wedged) { + DRM_ERROR("Reenabling wedged hardware, good luck\n"); + dev_priv->mm.wedged = 0; + } + + + DRM_LOCK(); + dev_priv->mm.suspended = 0; + + ret = i915_gem_init_ringbuffer(dev_priv); + if (ret != 0) { + DRM_UNLOCK(); + return (ret); + } + + /* gtt mapping means that the inactive list may not be empty */ + KASSERT(TAILQ_EMPTY(&dev_priv->mm.active_list)); + KASSERT(TAILQ_EMPTY(&dev_priv->mm.flushing_list)); + KASSERT(TAILQ_EMPTY(&dev_priv->mm.request_list)); + DRM_UNLOCK(); + + drm_irq_install(dev); + + return (0); +} + +int +i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = i915_gem_idle(dev_priv); + drm_irq_uninstall(dev); + return (ret); +} + +void +inteldrm_timeout(void *arg) +{ + drm_i915_private_t *dev_priv = arg; + + if (workq_add_task(dev_priv->workq, 0, i915_gem_retire_work_handler, + dev_priv, NULL) == ENOMEM) + DRM_ERROR("failed to run retire handler\n"); +} + +/* + * handle hung hardware, or error interrupts. for now print debug info. + */ +void +inteldrm_error(struct drm_i915_private *dev_priv) +{ + u_int32_t eir, ipeir, pgtbl_err, pipea_stats, pipeb_stats; + u_int8_t reset = GDRST_RENDER; + + eir = I915_READ(EIR); + pipea_stats = I915_READ(PIPEASTAT); + pipeb_stats = I915_READ(PIPEBSTAT); + + /* + * only actually check the error bits if we register one. + * else we just hung, stay silent. + */ + if (eir != 0) { + printf("render error detected, EIR: 0x%08x\n", eir); + if (IS_G4X(dev_priv)) { + if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) { + ipeir = I915_READ(IPEIR_I965); + + printf(" IPEIR: 0x%08x\n", + I915_READ(IPEIR_I965)); + printf(" IPEHR: 0x%08x\n", + I915_READ(IPEHR_I965)); + printf(" INSTDONE: 0x%08x\n", + I915_READ(INSTDONE_I965)); + printf(" INSTPS: 0x%08x\n", + I915_READ(INSTPS)); + printf(" INSTDONE1: 0x%08x\n", + I915_READ(INSTDONE1)); + printf(" ACTHD: 0x%08x\n", + I915_READ(ACTHD_I965)); + I915_WRITE(IPEIR_I965, ipeir); + (void)I915_READ(IPEIR_I965); + } + if (eir & GM45_ERROR_PAGE_TABLE) { + pgtbl_err = I915_READ(PGTBL_ER); + printf("page table error\n"); + printf(" PGTBL_ER: 0x%08x\n", pgtbl_err); + I915_WRITE(PGTBL_ER, pgtbl_err); + (void)I915_READ(PGTBL_ER); + dev_priv->mm.wedged = 1; + reset = GDRST_FULL; + + } + } else if (IS_I9XX(dev_priv) && eir & I915_ERROR_PAGE_TABLE) { + pgtbl_err = I915_READ(PGTBL_ER); + printf("page table error\n"); + printf(" PGTBL_ER: 0x%08x\n", pgtbl_err); + I915_WRITE(PGTBL_ER, pgtbl_err); + (void)I915_READ(PGTBL_ER); + dev_priv->mm.wedged = 1; + reset = GDRST_FULL; + } + if (eir & I915_ERROR_MEMORY_REFRESH) { + printf("memory refresh error\n"); + printf("PIPEASTAT: 0x%08x\n", + pipea_stats); + printf("PIPEBSTAT: 0x%08x\n", + pipeb_stats); + /* pipestat has already been acked */ + } + if (eir & I915_ERROR_INSTRUCTION) { + printf("instruction error\n"); + printf(" INSTPM: 0x%08x\n", + I915_READ(INSTPM)); + if (!IS_I965G(dev_priv)) { + ipeir = I915_READ(IPEIR); + + printf(" IPEIR: 0x%08x\n", + I915_READ(IPEIR)); + printf(" IPEHR: 0x%08x\n", + I915_READ(IPEHR)); + printf(" INSTDONE: 0x%08x\n", + I915_READ(INSTDONE)); + printf(" ACTHD: 0x%08x\n", + I915_READ(ACTHD)); + I915_WRITE(IPEIR, ipeir); + (void)I915_READ(IPEIR); + } else { + ipeir = I915_READ(IPEIR_I965); + + printf(" IPEIR: 0x%08x\n", + I915_READ(IPEIR_I965)); + printf(" IPEHR: 0x%08x\n", + I915_READ(IPEHR_I965)); + printf(" INSTDONE: 0x%08x\n", + I915_READ(INSTDONE_I965)); + printf(" INSTPS: 0x%08x\n", + I915_READ(INSTPS)); + printf(" INSTDONE1: 0x%08x\n", + I915_READ(INSTDONE1)); + printf(" ACTHD: 0x%08x\n", + I915_READ(ACTHD_I965)); + I915_WRITE(IPEIR_I965, ipeir); + (void)I915_READ(IPEIR_I965); + } + } + + I915_WRITE(EIR, eir); + eir = I915_READ(EIR); + } + /* + * nasty errors don't clear and need a reset, mask them until we reset + * else we'll get infinite interrupt storms. + */ + if (eir) { + /* print so we know that we may want to reset here too */ + if (dev_priv->mm.wedged == 0) + DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir); + I915_WRITE(EMR, I915_READ(EMR) | eir); + I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); + } + /* + * if it was a pagetable error, or we were called from hangcheck, then + * reset the gpu. + */ + if (dev_priv->mm.wedged && workq_add_task(dev_priv->workq, 0, + inteldrm_hung, dev_priv, (void *)(uintptr_t)reset) == ENOMEM) + DRM_INFO("failed to schedule reset task\n"); + +} + +void +inteldrm_hung(void *arg, void *reset_type) +{ + struct drm_i915_private *dev_priv = arg; + struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; + struct inteldrm_obj *obj_priv; + u_int8_t reset = (u_int8_t)(uintptr_t)reset_type; + + DRM_LOCK(); + if (HAS_RESET(dev_priv)) { + DRM_INFO("resetting gpu: "); + inteldrm_965_reset(dev_priv, reset); + printf("done!\n"); + } else + printf("no reset function for chipset.\n"); + + /* + * Clear out all of the requests and make everything inactive. + */ + i915_gem_retire_requests(dev_priv); + + /* + * Clear the active and flushing lists to inactive. Since + * we've reset the hardware then they're not going to get + * flushed or completed otherwise. nuke the domains since + * they're now irrelavent. + */ + while ((obj_priv = TAILQ_FIRST(&dev_priv->mm.active_list)) != NULL) { + if (obj_priv->obj.write_domain & I915_GEM_GPU_DOMAINS) { + TAILQ_REMOVE(&dev_priv->mm.gpu_write_list, + obj_priv, write_list); + atomic_clearbits_int(&obj_priv->io_flags, + I915_GPU_WRITE); + obj_priv->obj.write_domain &= ~I915_GEM_GPU_DOMAINS; + } + i915_gem_object_move_to_inactive(&obj_priv->obj);; + } + + while ((obj_priv = TAILQ_FIRST(&dev_priv->mm.flushing_list)) != NULL) { + if (obj_priv->obj.write_domain & I915_GEM_GPU_DOMAINS) { + TAILQ_REMOVE(&dev_priv->mm.gpu_write_list, + obj_priv, write_list); + atomic_clearbits_int(&obj_priv->io_flags, + I915_GPU_WRITE); + obj_priv->obj.write_domain &= ~I915_GEM_GPU_DOMAINS; + } + i915_gem_object_move_to_inactive(&obj_priv->obj); + } + + /* unbind everything */ + (void)i915_gem_evict_inactive(dev_priv); + + if (HAS_RESET(dev_priv)) + dev_priv->mm.wedged = 0; + DRM_UNLOCK(); +} + +void +inteldrm_hangcheck(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + u_int32_t acthd; + + /* are we idle? */ + if (TAILQ_EMPTY(&dev_priv->mm.request_list) || + i915_seqno_passed(i915_get_gem_seqno(dev_priv), + TAILQ_LAST(&dev_priv->mm.request_list, i915_request)->seqno)) { + dev_priv->mm.hang_cnt = 0; + return; + } + + if (IS_I965G(dev_priv)) + acthd = I915_READ(ACTHD_I965); + else + acthd = I915_READ(ACTHD); + + /* if we've hit ourselves before and the hardware hasn't moved, hung. */ + if (dev_priv->mm.last_acthd == acthd) { + /* if that's twice we didn't hit it, then we're hung */ + if (++dev_priv->mm.hang_cnt >= 2) { + dev_priv->mm.hang_cnt = 0; + /* XXX atomic */ + dev_priv->mm.wedged = 1; + DRM_INFO("gpu hung!\n"); + /* XXX locking */ + wakeup(dev_priv); + inteldrm_error(dev_priv); + return; + } + } else { + dev_priv->mm.hang_cnt = 0; + } + + dev_priv->mm.last_acthd = acthd; + /* Set ourselves up again, in case we haven't added another batch */ + timeout_add_msec(&dev_priv->mm.hang_timer, 750); +} + +void +i915_move_to_tail(struct inteldrm_obj *obj_priv, struct i915_gem_list *head) +{ + i915_list_remove(obj_priv); + TAILQ_INSERT_TAIL(head, obj_priv, list); + obj_priv->current_list = head; +} + +void +i915_list_remove(struct inteldrm_obj *obj_priv) +{ + if (obj_priv->current_list != NULL) + TAILQ_REMOVE(obj_priv->current_list, obj_priv, list); + obj_priv->current_list = NULL; +} + +/* + * + * Support for managing tiling state of buffer objects. + * + * The idea behind tiling is to increase cache hit rates by rearranging + * pixel data so that a group of pixel accesses are in the same cacheline. + * Performance improvement from doing this on the back/depth buffer are on + * the order of 30%. + * + * Intel architectures make this somewhat more complicated, though, by + * adjustments made to addressing of data when the memory is in interleaved + * mode (matched pairs of DIMMS) to improve memory bandwidth. + * For interleaved memory, the CPU sends every sequential 64 bytes + * to an alternate memory channel so it can get the bandwidth from both. + * + * The GPU also rearranges its accesses for increased bandwidth to interleaved + * memory, and it matches what the CPU does for non-tiled. However, when tiled + * it does it a little differently, since one walks addresses not just in the + * X direction but also Y. So, along with alternating channels when bit + * 6 of the address flips, it also alternates when other bits flip -- Bits 9 + * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) + * are common to both the 915 and 965-class hardware. + * + * The CPU also sometimes XORs in higher bits as well, to improve + * bandwidth doing strided access like we do so frequently in graphics. This + * is called "Channel XOR Randomization" in the MCH documentation. The result + * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address + * decode. + * + * All of this bit 6 XORing has an effect on our memory management, + * as we need to make sure that the 3d driver can correctly address object + * contents. + * + * If we don't have interleaved memory, all tiling is safe and no swizzling is + * required. + * + * When bit 17 is XORed in, we simply refuse to tile at all. Bit + * 17 is not just a page offset, so as we page an object out and back in, + * individual pages in it will have different bit 17 addresses, resulting in + * each 64 bytes being swapped with its neighbor! + * + * Otherwise, if interleaved, we have to tell the 3d driver what the address + * swizzling it needs to do is, since it's writing with the CPU to the pages + * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the + * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling + * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order + * to match what the GPU expects. + */ + +#define MCHBAR_I915 0x44 +#define MCHBAR_I965 0x48 +#define MCHBAR_SIZE (4*4096) + +#define DEVEN_REG 0x54 +#define DEVEN_MCHBAR_EN (1 << 28) + + +/* + * Check the MCHBAR on the host bridge is enabled, and if not allocate it. + * we do not need to actually map it because we access the bar through it's + * mirror on the IGD, however, if it is disabled or not allocated then + * the mirror does not work. *sigh*. + * + * we return a trinary state: + * 0 = already enabled, or can not enable + * 1 = enabled, needs disable + * 2 = enabled, needs disable and free. + */ +int +inteldrm_setup_mchbar(struct drm_i915_private *dev_priv, + struct pci_attach_args *bpa) +{ + u_int64_t mchbar_addr; + pcireg_t tmp, low, high = 0; + u_long addr; + int reg = IS_I965G(dev_priv) ? MCHBAR_I965 : MCHBAR_I915; + int ret = 1, enabled = 0; + + if (IS_I915G(dev_priv) || IS_I915GM(dev_priv)) { + tmp = pci_conf_read(bpa->pa_pc, bpa->pa_tag, DEVEN_REG); + enabled = !!(tmp & DEVEN_MCHBAR_EN); + } else { + tmp = pci_conf_read(bpa->pa_pc, bpa->pa_tag, reg); + enabled = tmp & 1; + } + + if (enabled) { + return (0); + } + + if (IS_I965G(dev_priv)) + high = pci_conf_read(bpa->pa_pc, bpa->pa_tag, reg + 4); + low = pci_conf_read(bpa->pa_pc, bpa->pa_tag, reg); + mchbar_addr = ((u_int64_t)high << 32) | low; + + /* + * XXX need to check to see if it's allocated in the pci resources, + * right now we just check to see if there's any address there + * + * if there's no address, then we allocate one. + * note that we can't just use pci_mapreg_map here since some intel + * BARs are special in that they set bit 0 to show they're enabled, + * this is not handled by generic pci code. + */ + if (mchbar_addr == 0) { + addr = (u_long)mchbar_addr; + if (bpa->pa_memex == NULL || extent_alloc(bpa->pa_memex, + MCHBAR_SIZE, MCHBAR_SIZE, 0, 0, 0, &addr)) { + return (0); /* just say we don't need to disable */ + } else { + mchbar_addr = addr; + ret = 2; + /* We've allocated it, now fill in the BAR again */ + if (IS_I965G(dev_priv)) + pci_conf_write(bpa->pa_pc, bpa->pa_tag, + reg + 4, upper_32_bits(mchbar_addr)); + pci_conf_write(bpa->pa_pc, bpa->pa_tag, + reg, mchbar_addr & 0xffffffff); + } + } + /* set the enable bit */ + if (IS_I915G(dev_priv) || IS_I915GM(dev_priv)) { + pci_conf_write(bpa->pa_pc, bpa->pa_tag, DEVEN_REG, + tmp | DEVEN_MCHBAR_EN); + } else { + tmp = pci_conf_read(bpa->pa_pc, bpa->pa_tag, reg); + pci_conf_write(bpa->pa_pc, bpa->pa_tag, reg, tmp | 1); + } + + return (ret); +} + +/* + * we take the trinary returned from inteldrm_setup_mchbar and clean up after + * it. + */ +void +inteldrm_teardown_mchbar(struct drm_i915_private *dev_priv, + struct pci_attach_args *bpa, int disable) +{ + u_int64_t mchbar_addr; + pcireg_t tmp, low, high = 0; + int reg = IS_I965G(dev_priv) ? MCHBAR_I965 : MCHBAR_I915; + + switch(disable) { + case 2: + if (IS_I965G(dev_priv)) + high = pci_conf_read(bpa->pa_pc, bpa->pa_tag, reg + 4); + low = pci_conf_read(bpa->pa_pc, bpa->pa_tag, reg); + mchbar_addr = ((u_int64_t)high << 32) | low; + extent_free(bpa->pa_memex, mchbar_addr, MCHBAR_SIZE, 0); + /* FALLTHROUGH */ + case 1: + if (IS_I915G(dev_priv) || IS_I915GM(dev_priv)) { + tmp = pci_conf_read(bpa->pa_pc, bpa->pa_tag, DEVEN_REG); + tmp &= ~DEVEN_MCHBAR_EN; + pci_conf_write(bpa->pa_pc, bpa->pa_tag, DEVEN_REG, tmp); + } else { + tmp = pci_conf_read(bpa->pa_pc, bpa->pa_tag, reg); + tmp &= ~1; + pci_conf_write(bpa->pa_pc, bpa->pa_tag, reg, tmp); + } + break; + case 0: + default: + break; + }; +} + +/** + * Detects bit 6 swizzling of address lookup between IGD access and CPU + * access through main memory. + */ +void +inteldrm_detect_bit_6_swizzle(drm_i915_private_t *dev_priv, + struct pci_attach_args *bpa) +{ + uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + int need_disable; + + if (!IS_I9XX(dev_priv)) { + /* As far as we know, the 865 doesn't have these bit 6 + * swizzling issues. + */ + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else if (IS_MOBILE(dev_priv)) { + uint32_t dcc; + + /* try to enable MCHBAR, a lot of biosen disable it */ + need_disable = inteldrm_setup_mchbar(dev_priv, bpa); + + /* On 915-945 and GM965, channel interleave by the CPU is + * determined by DCC. The CPU will alternate based on bit 6 + * in interleaved mode, and the GPU will then also alternate + * on bit 6, 9, and 10 for X, but the CPU may also optionally + * alternate based on bit 17 (XOR not disabled and XOR + * bit == 17). + */ + dcc = I915_READ(DCC); + switch (dcc & DCC_ADDRESSING_MODE_MASK) { + case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + break; + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: + if (dcc & DCC_CHANNEL_XOR_DISABLE) { + /* This is the base swizzling by the GPU for + * tiled buffers. + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { + /* Bit 11 swizzling by the CPU in addition. */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; + swizzle_y = I915_BIT_6_SWIZZLE_9_11; + } else { + /* Bit 17 swizzling by the CPU in addition. */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; + swizzle_y = I915_BIT_6_SWIZZLE_9_17; + } + break; + } + if (dcc == 0xffffffff) { + DRM_ERROR("Couldn't read from MCHBAR. " + "Disabling tiling.\n"); + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } - /* - * When we eventually go GEM only we'll always have a dmamap, so this - * madness won't be for long. - */ - if (dev_priv->hws_dmamem) - bus_dmamap_sync(dev->dmat, dev_priv->hws_dmamem->map, 0, - PAGE_SIZE, BUS_DMASYNC_POSTREAD); - - val = ((volatile u_int32_t *)(dev_priv->hw_status_page))[reg]; + inteldrm_teardown_mchbar(dev_priv, bpa, need_disable); + } else { + /* The 965, G33, and newer, have a very flexible memory + * configuration. It will enable dual-channel mode + * (interleaving) on as much memory as it can, and the GPU + * will additionally sometimes enable different bit 6 + * swizzling for tiled objects from the CPU. + * + * Here's what I found on G965: + * + * slot fill memory size swizzling + * 0A 0B 1A 1B 1-ch 2-ch + * 512 0 0 0 512 0 O + * 512 0 512 0 16 1008 X + * 512 0 0 512 16 1008 X + * 0 512 0 512 16 1008 X + * 1024 1024 1024 0 2048 1024 O + * + * We could probably detect this based on either the DRB + * matching, which was the case for the swizzling required in + * the table above, or from the 1-ch value being less than + * the minimum size of a rank. + */ + if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } + } - if (dev_priv->hws_dmamem) - bus_dmamap_sync(dev->dmat, dev_priv->hws_dmamem->map, 0, - PAGE_SIZE, BUS_DMASYNC_PREREAD); - return (val); + dev_priv->mm.bit_6_swizzle_x = swizzle_x; + dev_priv->mm.bit_6_swizzle_y = swizzle_y; } -/* - * These five ring manipulation functions are protected by dev->dev_lock. - */ int -inteldrm_wait_ring(struct drm_i915_private *dev_priv, int n) +inteldrm_swizzle_page(struct vm_page *pg) { - struct inteldrm_ring *ring = &dev_priv->ring; - u_int32_t acthd_reg, acthd, last_acthd, last_head; - int i; - - acthd_reg = IS_I965G(dev_priv) ? ACTHD_I965 : ACTHD; - last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; - last_acthd = I915_READ(acthd_reg); + vaddr_t va; + int i; + u_int8_t temp[64], *vaddr; + +#if defined (__HAVE_PMAP_DIRECT) + va = pmap_map_direct(pg); +#else + va = uvm_km_valloc(kernel_map, PAGE_SIZE); + if (va == 0) + return (ENOMEM); + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW); + pmap_update(pmap_kernel()); +#endif + vaddr = (u_int8_t *)va; + + for (i = 0; i < PAGE_SIZE; i += 128) { + memcpy(temp, &vaddr[i], 64); + memcpy(&vaddr[i], &vaddr[i + 64], 64); + memcpy(&vaddr[i + 64], temp, 64); + } - /* ugh. Could really do with a proper, resettable timer here. */ - for (i = 0; i < 100000; i++) { - ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; - acthd = I915_READ(acthd_reg); - ring->space = ring->head - (ring->tail + 8); +#if defined (__HAVE_PMAP_DIRECT) + pmap_unmap_direct(va); +#else + pmap_kremove(va, va + PAGE_SIZE); + pmap_update(pmap_kernel()); + uvm_km_free(kernel_map, va, PAGE_SIZE); +#endif + return (0); +} - INTELDRM_VPRINTF("%s: head: %x tail: %x space: %x\n", __func__, - ring->head, ring->tail, ring->space); - if (ring->space < 0) - ring->space += ring->size; - if (ring->space >= n) - return (0); +void +i915_gem_bit_17_swizzle(struct drm_obj *obj) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + struct vm_page *pg; + bus_dma_segment_t *segp; + int page_count = obj->size >> PAGE_SHIFT; + int i, n, ret; + + if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17 || + obj_priv->bit_17 == NULL) + return; - /* Only timeout if the ring isn't chewing away on something */ - if (ring->head != last_head || acthd != last_acthd) - i = 0; + while (i < page_count) { + /* compare bit 17 with previous one (in case we swapped). + * if they don't match we'll have to swizzle the page + */ + if ((((segp->ds_addr + n) >> 17) & 0x1) != + test_bit(i, obj_priv->bit_17)) { + /* XXX move this to somewhere where we already have pg */ + pg = PHYS_TO_VM_PAGE(segp->ds_addr + n * PAGE_SIZE); + KASSERT(pg != NULL); + ret = inteldrm_swizzle_page(pg); + if (ret) + return; + atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); + } - last_head = ring->head; - last_acthd = acthd; - tsleep(dev_priv, PZERO | PCATCH, "i915wt", - hz / 100); + if (++n * PAGE_SIZE > segp->ds_len) { + n = 0; + segp++; + } } - return (EBUSY); } -void -inteldrm_wrap_ring(struct drm_i915_private *dev_priv) +void +i915_gem_save_bit_17_swizzle(struct drm_obj *obj) { - u_int32_t rem;; + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; + bus_dma_segment_t *segp; + int page_count = obj->size >> PAGE_SHIFT, i, n; - rem = dev_priv->ring.size - dev_priv->ring.tail; - if (dev_priv->ring.space < rem && - inteldrm_wait_ring(dev_priv, rem) != 0) - return; /* XXX */ + if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) + return; - dev_priv->ring.space -= rem; + if (obj_priv->bit_17 == NULL) { + /* round up number of pages to a multiple of 32 so we know what + * size to make the bitmask. XXX this is wasteful with malloc + * and a better way should be done + */ + size_t nb17 = ((page_count + 31) & ~31)/32; + obj_priv->bit_17 = drm_alloc(nb17 * sizeof(u_int32_t)); + if (obj_priv-> bit_17 == NULL) { + return; + } - bus_space_set_region_4(dev_priv->bst, dev_priv->ring.bsh, - dev_priv->ring.woffset, MI_NOOP, rem / 4); + } - dev_priv->ring.tail = 0; + segp = &obj_priv->dma_segs[0]; + n = 0; + while (i < page_count) { + if ((segp->ds_addr + (n * PAGE_SIZE)) & (1 << 17)) + set_bit(i, obj_priv->bit_17); + else + clear_bit(i, obj_priv->bit_17); + + if (++n * PAGE_SIZE > segp->ds_len) { + n = 0; + segp++; + } + } } -void -inteldrm_begin_ring(struct drm_i915_private *dev_priv, int ncmd) +bus_size_t +i915_get_fence_size(struct drm_i915_private *dev_priv, bus_size_t size) { - int bytes = 4 * ncmd; + bus_size_t i, start; - INTELDRM_VPRINTF("%s: %d\n", __func__, ncmd); - if (dev_priv->ring.tail + bytes > dev_priv->ring.size) - inteldrm_wrap_ring(dev_priv); - if (dev_priv->ring.space < bytes) - inteldrm_wait_ring(dev_priv, bytes); - dev_priv->ring.woffset = dev_priv->ring.tail; - dev_priv->ring.tail += bytes; - dev_priv->ring.tail &= dev_priv->ring.size - 1; - dev_priv->ring.space -= bytes; + if (IS_I965G(dev_priv)) { + /* 965 can have fences anywhere, so align to gpu-page size */ + return ((size + (4096 - 1)) & ~(4096 - 1)); + } else { + /* + * Align the size to a power of two greater than the smallest + * fence size. + */ + if (IS_I9XX(dev_priv)) + start = 1024 * 1024; + else + start = 512 * 1024; + + for (i = start; i < size; i <<= 1) + ; + + return (i); + } } -void -inteldrm_out_ring(struct drm_i915_private *dev_priv, u_int32_t cmd) +int +i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) { - INTELDRM_VPRINTF("%s: %x\n", __func__, cmd); - bus_space_write_4(dev_priv->bst, dev_priv->ring.bsh, - dev_priv->ring.woffset, cmd); - /* - * don't need to deal with wrap here because we padded - * the ring out if we would wrap - */ - dev_priv->ring.woffset += 4; + struct drm_i915_private *dev_priv = dev->dev_private; + int tile_width; + + /* Linear is always ok */ + if (tiling_mode == I915_TILING_NONE) + return (1); + + if (!IS_I9XX(dev_priv) || (tiling_mode == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(dev_priv))) + tile_width = 128; + else + tile_width = 512; + + /* Check stride and size constraints */ + if (IS_I965G(dev_priv)) { + /* fence reg has end address, so size is ok */ + if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) + return (0); + } else if (IS_I9XX(dev_priv)) { + u_int32_t pitch_val = ffs(stride / tile_width) - 1; + /* + * XXX: for Y tiling, max pitch is actually 6 (8k) instead of 4 + * (2k) on the 945. + */ + if (pitch_val > I915_FENCE_MAX_PITCH_VAL || + size > (I830_FENCE_MAX_SIZE_VAL << 20)) + return (0); + } else { + u_int32_t pitch_val = ffs(stride / tile_width) - 1; + + if (pitch_val > I830_FENCE_MAX_PITCH_VAL || + size > (I830_FENCE_MAX_SIZE_VAL << 19)) + return (0); + } + + /* 965+ just needs multiples of the tile width */ + if (IS_I965G(dev_priv)) + return ((stride & (tile_width - 1)) == 0); + + + /* Pre-965 needs power-of-two */ + if (stride < tile_width || stride & (stride - 1) || + i915_get_fence_size(dev_priv, size) != size) + return (0); + return (1); } -void -inteldrm_advance_ring(struct drm_i915_private *dev_priv) +int +i915_gem_object_fence_offset_ok(struct drm_obj *obj, int tiling_mode) { - INTELDRM_VPRINTF("%s: %x, %x\n", __func__, dev_priv->ring.wspace, - dev_priv->ring.woffset); - I915_WRITE(PRB0_TAIL, dev_priv->ring.tail); -} + struct drm_device *dev = obj->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct inteldrm_obj *obj_priv = (struct inteldrm_obj *)obj; -void -inteldrm_update_ring(struct drm_i915_private *dev_priv) + if (obj_priv->dmamap == NULL || tiling_mode == I915_TILING_NONE) + return (1); + + if (!IS_I965G(dev_priv)) { + if (obj_priv->gtt_offset & (obj->size -1)) + return (0); + if (IS_I9XX(dev_priv)) { + if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK) + return (0); + } else { + if (obj_priv->gtt_offset & ~I830_FENCE_START_MASK) + return (0); + } + } + return (1); +} +/** + * Sets the tiling mode of an object, returning the required swizzling of + * bit 6 of addresses in the object. + */ +int +i915_gem_set_tiling(struct drm_device *dev, void *data, + struct drm_file *file_priv) { - struct inteldrm_ring *ring = &dev_priv->ring; + struct drm_i915_gem_set_tiling *args = data; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + int ret = 0; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + obj_priv = (struct inteldrm_obj *)obj; + + if (i915_tiling_ok(dev, args->stride, obj->size, + args->tiling_mode) == 0) { + ret = EINVAL; + DRM_LOCK(); /* for unref */ + goto out; + } - ring->head = (I915_READ(PRB0_HEAD) & HEAD_ADDR); - ring->tail = (I915_READ(PRB0_TAIL) & TAIL_ADDR); - ring->space = ring->head - (ring->tail + 8); - if (ring->space < 0) - ring->space += ring->size; - INTELDRM_VPRINTF("%s: head: %x tail: %x space: %x\n", __func__, - ring->head, ring->tail, ring->space); + DRM_LOCK(); + if (args->tiling_mode == I915_TILING_NONE) { + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + args->stride = 0; + } else { + if (args->tiling_mode == I915_TILING_X) + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + else + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + /* If we can't handle the swizzling, make it untiled. */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { + args->tiling_mode = I915_TILING_NONE; + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + args->stride = 0; + } + } + + if (args->tiling_mode != obj_priv->tiling_mode || + args->stride != obj_priv->stride) { + /* + * We need to rebind the object if its current allocation no + * longer meets the alignment restrictions for its new tiling + * mode. Otherwise we can leave it alone, but must clear any + * fence register. + */ + if (i915_gem_object_fence_offset_ok(obj, args->tiling_mode)) { + if (obj_priv->pin_count) + ret = EINVAL; + else + ret = i915_gem_object_unbind(obj, 1); + } else if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { + ret = i915_gem_object_put_fence_reg(obj, 1); + } else { + inteldrm_wipe_mappings(obj); + } + if (ret != 0) { + args->tiling_mode = obj_priv->tiling_mode; + args->stride = obj_priv->stride; + goto out; + } + obj_priv->tiling_mode = args->tiling_mode; + obj_priv->stride = args->stride; + } + +out: + drm_gem_object_unreference(obj); + DRM_UNLOCK(); + + return (ret); } -void -inteldrm_lastclose(struct drm_device *dev) +/** + * Returns the current tiling mode and required bit 6 swizzling for the object. + */ +int +i915_gem_get_tiling(struct drm_device *dev, void *data, + struct drm_file *file_priv) { - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_get_tiling *args = data; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return (EBADF); + obj_priv = (struct inteldrm_obj *)obj; + + DRM_LOCK(); + + args->tiling_mode = obj_priv->tiling_mode; + switch (obj_priv->tiling_mode) { + case I915_TILING_X: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + break; + case I915_TILING_NONE: + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + break; + default: + DRM_ERROR("unknown tiling mode\n"); + } - dev_priv->sarea_priv = NULL; + drm_gem_object_unreference(obj); + DRM_UNLOCK(); - i915_dma_cleanup(dev); + return 0; } +#endif /* INTELDRM_GEM */ + /** * inteldrm_pipe_enabled - check if a pipe is enabled * @dev: DRM device @@ -1093,3 +5247,296 @@ inteldrm_restore_state(struct drm_i915_private *dev_priv) return 0; } + +#ifdef INTELDRM_GEM +/* + * Reset the chip after a hang (965 only) + * + * The procedure that should be followed is relatively simple: + * - reset the chip using the reset reg + * - re-init context state + * - re-init Hardware status page + * - re-init ringbuffer + * - re-init interrupt state + * - re-init display + */ +void +inteldrm_965_reset(struct drm_i915_private *dev_priv, u_int8_t flags) +{ + pcireg_t reg; + int i = 0; + + if (flags == GDRST_FULL) + inteldrm_save_display(dev_priv); + + reg = pci_conf_read(dev_priv->pc, dev_priv->tag, GDRST); + /* + * Set the domains we want to reset, then bit 0 (reset itself). + * then we wait for the hardware to clear it. + */ + pci_conf_write(dev_priv->pc, dev_priv->tag, GDRST, + reg | (u_int32_t)flags | ((flags == GDRST_FULL) ? 0x1 : 0x0)); + delay(50); + /* don't clobber the rest of the register */ + pci_conf_write(dev_priv->pc, dev_priv->tag, GDRST, reg & 0xfe); + + /* if this fails we're pretty much fucked, but don't loop forever */ + do { + delay(100); + reg = pci_conf_read(dev_priv->pc, dev_priv->tag, GDRST); + } while ((reg & 0x1) && ++i < 10); + + if (reg & 0x1) + printf("bit 0 not cleared .. "); + + /* put everything back together again */ + + /* + * GTT is already up (we didn't do a pci-level reset, thank god. + * + * We don't have to restore the contexts (we don't use them yet). + * So, if X is running we need to put the ringbuffer back first. + */ + if (dev_priv->mm.suspended == 0) { + struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; + if (inteldrm_start_ring(dev_priv) != 0) + panic("can't restart ring, we're fucked"); + + /* put the hardware status page back */ + if (I915_NEED_GFX_HWS(dev_priv)) + I915_WRITE(HWS_PGA, ((struct inteldrm_obj *) + dev_priv->hws_obj)->gtt_offset); + else + I915_WRITE(HWS_PGA, + dev_priv->hws_dmamem->map->dm_segs[0].ds_addr); + I915_READ(HWS_PGA); /* posting read */ + + /* so we remove the handler and can put it back in */ + DRM_UNLOCK(); + drm_irq_uninstall(dev); + drm_irq_install(dev); + DRM_LOCK(); + } else + printf("not restarting ring...\n"); + + + if (flags == GDRST_FULL) + inteldrm_restore_display(dev_priv); +} +#endif /* INTELDRM_GEM */ + +/* + * Debug code from here. + */ +#ifdef WATCH_INACTIVE +void +inteldrm_verify_inactive(struct drm_i915_private *dev_priv, char *file, + int line) +{ + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + + TAILQ_FOREACH(obj_priv, &dev_priv->mm.inactive_list, list) { + obj = (struct drm_obj *)obj_priv; + if (obj_priv->pin_count || inteldrm_is_active(obj_priv) || + obj->write_domain & I915_GEM_GPU_DOMAINS) + DRM_ERROR("inactive %p (p $d a $d w $x) %s:%d\n", + obj, obj_priv->pin_count, + inteldrm_is_active(obj_priv), + obj->write_domain, file, line); + } +} +#endif /* WATCH_INACTIVE */ + +#if (INTELDRM_DEBUG > 1) + +static const char *get_pin_flag(struct inteldrm_obj *obj_priv) +{ + if (obj_priv->pin_count > 0) + return "p"; + else + return " "; +} + +static const char *get_tiling_flag(struct inteldrm_obj *obj_priv) +{ + switch (obj_priv->tiling_mode) { + default: + case I915_TILING_NONE: return " "; + case I915_TILING_X: return "X"; + case I915_TILING_Y: return "Y"; + } +} + +void +i915_gem_seqno_info(int kdev) +{ + struct drm_device *dev = drm_get_device_from_kdev(kdev); + drm_i915_private_t *dev_priv = dev->dev_private; + + if (dev_priv->hw_status_page != NULL) { + printf("Current sequence: %d\n", i915_get_gem_seqno(dev_priv)); + } else { + printf("Current sequence: hws uninitialized\n"); + } +} + + +void +i915_interrupt_info(int kdev) +{ + struct drm_device *dev = drm_get_device_from_kdev(kdev); + drm_i915_private_t *dev_priv = dev->dev_private; + + printf("Interrupt enable: %08x\n", + I915_READ(IER)); + printf("Interrupt identity: %08x\n", + I915_READ(IIR)); + printf("Interrupt mask: %08x\n", + I915_READ(IMR)); + printf("Pipe A stat: %08x\n", + I915_READ(PIPEASTAT)); + printf("Pipe B stat: %08x\n", + I915_READ(PIPEBSTAT)); + printf("Interrupts received: 0\n"); + if (dev_priv->hw_status_page != NULL) { + printf("Current sequence: %d\n", + i915_get_gem_seqno(dev_priv)); + } else { + printf("Current sequence: hws uninitialized\n"); + } +} + +void +i915_gem_fence_regs_info(int kdev) +{ + struct drm_device *dev = drm_get_device_from_kdev(kdev); + drm_i915_private_t *dev_priv = dev->dev_private; + int i; + + printf("Reserved fences = %d\n", dev_priv->fence_reg_start); + printf("Total fences = %d\n", dev_priv->num_fence_regs); + for (i = 0; i < dev_priv->num_fence_regs; i++) { + struct drm_obj *obj = dev_priv->fence_regs[i].obj; + + if (obj == NULL) { + printf("Fenced object[%2d] = unused\n", i); + } else { + struct inteldrm_obj *obj_priv; + + obj_priv = (struct inteldrm_obj *)obj; + printf("Fenced object[%2d] = %p: %s " + "%08x %08zx %08x %s %08x %08x %d", + i, obj, get_pin_flag(obj_priv), + obj_priv->gtt_offset, + obj->size, obj_priv->stride, + get_tiling_flag(obj_priv), + obj->read_domains, obj->write_domain, + obj_priv->last_rendering_seqno); + if (obj->name) + printf(" (name: %d)", obj->name); + printf("\n"); + } + } +} + +void +i915_hws_info(int kdev) +{ + struct drm_device *dev = drm_get_device_from_kdev(kdev); + drm_i915_private_t *dev_priv = dev->dev_private; + int i; + volatile u32 *hws; + + hws = (volatile u32 *)dev_priv->hw_status_page; + if (hws == NULL) + return; + + for (i = 0; i < 4096 / sizeof(u32) / 4; i += 4) { + printf("0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i * 4, + hws[i], hws[i + 1], hws[i + 2], hws[i + 3]); + } +} + +static void +i915_dump_pages(bus_space_tag_t bst, bus_space_handle_t bsh, + bus_size_t size) +{ + bus_addr_t offset = 0; + int i = 0; + + /* + * this is a bit odd so i don't have to play with the intel + * tools too much. + */ + for (offset = 0; offset < size; offset += 4, i += 4) { + if (i == PAGE_SIZE) + i = 0; + printf("%08x : %08x\n", i, bus_space_read_4(bst, bsh, + offset)); + } +} + +void +i915_batchbuffer_info(int kdev) +{ + struct drm_device *dev = drm_get_device_from_kdev(kdev); + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_obj *obj; + struct inteldrm_obj *obj_priv; + bus_space_handle_t bsh; + int ret; + + TAILQ_FOREACH(obj_priv, &dev_priv->mm.active_list, list) { + obj = &obj_priv->obj; + if (obj->read_domains & I915_GEM_DOMAIN_COMMAND) { + if ((ret = bus_space_subregion(dev_priv->bst, + dev_priv->aperture_bsh, obj_priv->gtt_offset, + obj->size, &bsh)) != 0) { + DRM_ERROR("Failed to map pages: %d\n", ret); + return; + } + printf("--- gtt_offset = 0x%08x\n", + obj_priv->gtt_offset); + i915_dump_pages(dev_priv->bst, bsh, obj->size); + } + } +} + +void +i915_ringbuffer_data(int kdev) +{ + struct drm_device *dev = drm_get_device_from_kdev(kdev); + drm_i915_private_t *dev_priv = dev->dev_private; + bus_size_t off; + + if (!dev_priv->ring.ring_obj) { + printf("No ringbuffer setup\n"); + return; + } + + for (off = 0; off < dev_priv->ring.size; off += 4) + printf("%08x : %08x\n", off, bus_space_read_4(dev_priv->bst, + dev_priv->ring.bsh, off)); +} + +void +i915_ringbuffer_info(int kdev) +{ + struct drm_device *dev = drm_get_device_from_kdev(kdev); + drm_i915_private_t *dev_priv = dev->dev_private; + u_int32_t head, tail; + + head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; + + printf("RingHead : %08x\n", head); + printf("RingTail : %08x\n", tail); + printf("RingMask : %08x\n", dev_priv->ring.size - 1); + printf("RingSize : %08lx\n", dev_priv->ring.size); + printf("Acthd : %08x\n", I915_READ(IS_I965G(dev_priv) ? + ACTHD_I965 : ACTHD)); +} + +#endif diff --git a/sys/dev/pci/drm/i915_drv.h b/sys/dev/pci/drm/i915_drv.h index 3f52e270d5f..dece67d1c78 100644 --- a/sys/dev/pci/drm/i915_drv.h +++ b/sys/dev/pci/drm/i915_drv.h @@ -37,7 +37,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20080312" +#define DRIVER_DATE "20080730" enum pipe { PIPE_A = 0, @@ -59,6 +59,7 @@ enum pipe { #define DRIVER_PATCHLEVEL 0 struct inteldrm_ring { + struct drm_obj *ring_obj; bus_space_handle_t bsh; bus_size_t size; u_int32_t head; @@ -67,11 +68,21 @@ struct inteldrm_ring { u_int32_t woffset; }; +#define I915_FENCE_REG_NONE -1 + +struct inteldrm_fence { + TAILQ_ENTRY(inteldrm_fence) list; + struct drm_obj *obj; + u_int32_t last_rendering_seqno; +}; + typedef struct drm_i915_private { struct device dev; struct device *drmdev; + bus_dma_tag_t agpdmat; /* tag from intagp for GEM */ bus_dma_tag_t dmat; bus_space_tag_t bst; + bus_space_handle_t aperture_bsh; u_long flags; u_int16_t pci_device; @@ -86,8 +97,21 @@ typedef struct drm_i915_private { struct drm_local_map *sarea; drm_i915_sarea_t *sarea_priv; + union flush { + struct { + bus_space_tag_t bst; + bus_space_handle_t bsh; + } i9xx; + struct { + bus_dma_segment_t seg; + caddr_t kva; + } i8xx; + } ifp; struct inteldrm_ring ring; + struct workq *workq; + struct vm_page *pgs; struct drm_local_map hws_map; + struct drm_obj *hws_obj; struct drm_dmamem *hws_dmamem; void *hw_status_page; unsigned int status_gfx_addr; @@ -103,6 +127,10 @@ typedef struct drm_i915_private { int allow_batchbuffer; + struct inteldrm_fence fence_regs[16]; /* 965 */ + int fence_reg_start; /* 4 by default */ + int num_fence_regs; /* 8 pre-965, 16 post */ + /* Register state */ u8 saveLBB; u32 saveDSPACNTR; @@ -202,8 +230,107 @@ typedef struct drm_i915_private { u32 saveCURBPOS; u32 saveCURBBASE; u32 saveCURSIZE; + + struct { + /** + * List of objects currently involved in rendering from the + * ringbuffer. + * + * Includes buffers having the contents of their GPU caches + * flushed, not necessarily primitives. last_rendering_seqno + * represents when the rendering involved will be completed. + * + * A reference is held on the buffer while on this list. + */ + TAILQ_HEAD(i915_gem_list, inteldrm_obj) active_list; + + /** + * List of objects which are not in the ringbuffer but which + * still have a write_domain which needs to be flushed before + * unbinding. + * + * last_rendering_seqno is 0 while an object is in this list + * + * A reference is held on the buffer while on this list. + */ + struct i915_gem_list flushing_list; + + /* + * list of objects currently pending a GPU write flush. + * + * All elements on this list will either be on the active + * or flushing list, last rendiering_seqno differentiates the + * two. + */ + struct i915_gem_list gpu_write_list; + /** + * LRU list of objects which are not in the ringbuffer and + * are ready to unbind, but are still in the GTT. + * + * last_rendering_seqno is 0 while an object is in this list + * + * A reference is not held on the buffer while on this list, + * as merely being GTT-bound shouldn't prevent its being + * freed, and we'll pull it off the list in the free path. + */ + struct i915_gem_list inactive_list; + + /* Fence LRU */ + TAILQ_HEAD(i915_fence, inteldrm_fence) fence_list; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + TAILQ_HEAD(i915_request , inteldrm_request) request_list; + + /** + * We leave the user IRQ off as much as possible, + * but this means that requests will finish and never + * be retired once the system goes idle. Set a timer to + * fire periodically while the ring is running. When it + * fires, go retire requests in a workq. + */ + struct timeout retire_timer; + struct timeout hang_timer; + /* for hangcheck */ + int hang_cnt; + u_int32_t last_acthd; + + uint32_t next_gem_seqno; + + /** + * Flag if the X Server, and thus DRM, is not currently in + * control of the device. + * + * This is set between LeaveVT and EnterVT. It needs to be + * replaced with a semaphore. It also needs to be + * transitioned away from for kernel modesetting. + */ + int suspended; + + /** + * Flag if the hardware appears to be wedged. + * + * This is set when attempts to idle the device timeout. + * It prevents command submission from occuring and makes + * every pending request fail + */ + int wedged; + + /** Bit 6 swizzling required for X tiling */ + uint32_t bit_6_swizzle_x; + /** Bit 6 swizzling required for Y tiling */ + uint32_t bit_6_swizzle_y; + } mm; } drm_i915_private_t; +struct inteldrm_file { + struct drm_file file_priv; + struct { + } mm; +}; + /* chip type flags */ #define CHIP_I830 0x0001 #define CHIP_I845G 0x0002 @@ -222,18 +349,86 @@ typedef struct drm_i915_private { #define CHIP_M 0x4000 #define CHIP_HWS 0x8000 +/** driver private structure attached to each drm_gem_object */ +struct inteldrm_obj { + struct drm_obj obj; + + /** This object's place on the active/flushing/inactive lists */ + TAILQ_ENTRY(inteldrm_obj) list; + TAILQ_ENTRY(inteldrm_obj) write_list; + struct i915_gem_list *current_list; + /* GTT binding. */ + bus_dmamap_t dmamap; + bus_dma_segment_t *dma_segs; + /* Current offset of the object in GTT space. */ + bus_addr_t gtt_offset; + u_int32_t *bit_17; + /* + * This is set if the object is on the active or flushing lists + * (has pending rendering), and is not set if it's on inactive (ready + * to be unbound). + */ +#define I915_ACTIVE 0x0001 /* being used by the gpu. */ +#define I915_IN_EXEC 0x0002 /* being processed in execbuffer */ +#define I915_USER_PINNED 0x0004 /* BO has been pinned from userland */ +#define I915_GPU_WRITE 0x0008 /* BO has been not flushed */ +#define I915_DONTNEED 0x0010 /* BO backing pages purgable */ +#define I915_PURGED 0x0020 /* BO backing pages purged */ +#define I915_DIRTY 0x0040 /* BO written to since last bound */ +#define I915_EXEC_NEEDS_FENCE 0x0080 /* being processed but will need fence*/ +#define I915_FENCED_EXEC 0x0100 /* Most recent exec needs fence */ + int io_flags; + /* extra flags to bus_dma */ + int dma_flags; + /* Fence register for this object. needed for tiling. */ + int fence_reg; + /** refcount for times pinned this object in GTT space */ + int pin_count; + /* number of times pinned by pin ioctl. */ + u_int user_pin_count; + + /** Breadcrumb of last rendering to the buffer. */ + u_int32_t last_rendering_seqno; + /** Current tiling mode for the object. */ + u_int32_t tiling_mode; + u_int32_t stride; +}; + +#define inteldrm_is_active(obj_priv) (obj_priv->io_flags & I915_ACTIVE) +#define inteldrm_is_dirty(obj_priv) (obj_priv->io_flags & I915_DIRTY) +#define inteldrm_exec_needs_fence(obj_priv) \ + (obj_priv->io_flags & I915_EXEC_NEEDS_FENCE) +#define inteldrm_needs_fence(obj_priv) (obj_priv->io_flags & I915_FENCED_EXEC) + +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable + * sequence-number comparisons on buffer last_rendering_seqnos, and associate + * an emission time with seqnos for tracking how far ahead of the GPU we are. + */ +struct inteldrm_request { + TAILQ_ENTRY(inteldrm_request) list; + /** GEM sequence number associated with this request. */ + uint32_t seqno; +}; + u_int32_t inteldrm_read_hws(struct drm_i915_private *, int); int inteldrm_wait_ring(struct drm_i915_private *dev, int n); void inteldrm_begin_ring(struct drm_i915_private *, int); void inteldrm_out_ring(struct drm_i915_private *, u_int32_t); void inteldrm_advance_ring(struct drm_i915_private *); void inteldrm_update_ring(struct drm_i915_private *); +void inteldrm_error(struct drm_i915_private *); int inteldrm_pipe_enabled(struct drm_i915_private *, int); /* i915_dma.c */ extern void i915_emit_breadcrumb(struct drm_device *dev); -extern int i915_dispatch_batchbuffer(struct drm_device * dev, - drm_i915_batchbuffer_t * batch); +void i915_emit_box(struct drm_device * dev, struct drm_clip_rect * boxes, + int DR1, int DR4); int i915_dma_cleanup(struct drm_device *); int i915_init_phys_hws(drm_i915_private_t *, bus_dma_tag_t); @@ -250,6 +445,7 @@ extern void i915_driver_irq_uninstall(struct drm_device * dev); extern int i915_vblank_pipe_get(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int i915_emit_irq(struct drm_device * dev); +extern int i915_wait_irq(struct drm_device * dev, int irq_nr); extern int i915_enable_vblank(struct drm_device *dev, int crtc); extern void i915_disable_vblank(struct drm_device *dev, int crtc); extern u32 i915_get_vblank_counter(struct drm_device *dev, int crtc); @@ -305,6 +501,11 @@ read64(struct drm_i915_private *dev_priv, bus_size_t off) #define I915_WRITE8(reg,val) bus_space_write_1(dev_priv->regs->bst, \ dev_priv->regs->bsh, (reg), (val)) +#define RING_LOCK_TEST_WITH_RETURN(dev, file_priv) do { \ + if (((drm_i915_private_t *)dev->dev_private)->ring.ring_obj == NULL) \ + LOCK_TEST_WITH_RETURN(dev, file_priv); \ +} while (0) + #define INTELDRM_VERBOSE 0 #if INTELDRM_VERBOSE > 0 #define INTELDRM_VPRINTF(fmt, args...) DRM_INFO(fmt, ##args) @@ -2121,6 +2322,71 @@ read64(struct drm_i915_private *dev_priv, bus_size_t off) #define I915_NEED_GFX_HWS(dev_priv) (dev_priv->flags & CHIP_HWS) +#define HAS_RESET(dev_priv) IS_I965G(dev_priv) + +/* + * Interrupts that are always left unmasked. + * + * Since pipe events are edge-triggered from the PIPESTAT register to IIRC, + * we leave them always unmasked in IMR and then control enabling them through + * PIPESTAT alone. + */ +#define I915_INTERRUPT_ENABLE_FIX \ + (I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | \ + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | \ + I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) + +/* Interrupts that we mask and unmask at runtime */ +#define I915_INTERRUPT_ENABLE_VAR (I915_USER_INTERRUPT) + +/* These are all of the interrupts used by the driver */ +#define I915_INTERRUPT_ENABLE_MASK \ + (I915_INTERRUPT_ENABLE_FIX | \ + I915_INTERRUPT_ENABLE_VAR) + +#define printeir(val) printf("%s: error reg: %b\n", __func__, val, \ + "\20\x10PTEERR\x2REFRESHERR\x1INSTERR") + + +/* + * With the i45 and later, Y tiling got adjusted so that it was 32 128-byte + * rows, which changes the alignment requirements and fence programming. + */ +#define HAS_128_BYTE_Y_TILING(dev_priv) (IS_I9XX(dev_priv) && \ + !(IS_I915G(dev_priv) || IS_I915GM(dev_priv))) + #define PRIMARY_RINGBUFFER_SIZE (128*1024) +/* Inlines */ + +/** + * Returns true if seq1 is later than seq2. + */ +static __inline int +i915_seqno_passed(uint32_t seq1, uint32_t seq2) +{ + return ((int32_t)(seq1 - seq2) >= 0); +} + +/* + * Read seqence number from the Hardware status page. + */ +static __inline u_int32_t +i915_get_gem_seqno(struct drm_i915_private *dev_priv) +{ + return (READ_HWSP(dev_priv, I915_GEM_HWS_INDEX)); +} + +static __inline int +i915_obj_purgeable(struct inteldrm_obj *obj_priv) +{ + return (obj_priv->io_flags & I915_DONTNEED); +} + +static __inline int +i915_obj_purged(struct inteldrm_obj *obj_priv) +{ + return (obj_priv->io_flags & I915_PURGED); +} + #endif diff --git a/sys/dev/pci/drm/i915_irq.c b/sys/dev/pci/drm/i915_irq.c index 2fd852de163..f7a0af2921c 100644 --- a/sys/dev/pci/drm/i915_irq.c +++ b/sys/dev/pci/drm/i915_irq.c @@ -38,25 +38,6 @@ void i915_enable_pipestat(drm_i915_private_t *, int, u_int32_t); void i915_disable_pipestat(drm_i915_private_t *, int, u_int32_t); int i915_wait_irq(struct drm_device *, int); -/* - * Interrupts that are always left unmasked. - * - * Since pipe events are edge-triggered from the PIPESTAT register to IIRC, - * we leave them always unmasked in IMR and then control enabling them through - * PIPESTAT alone. - */ -#define I915_INTERRUPT_ENABLE_FIX \ - (I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | \ - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT) - -/* Interrupts that we mask and unmask at runtime */ -#define I915_INTERRUPT_ENABLE_VAR (I915_USER_INTERRUPT) - -/* These are all of the interrupts used by the driver */ -#define I915_INTERRUPT_ENABLE_MASK \ - (I915_INTERRUPT_ENABLE_FIX | \ - I915_INTERRUPT_ENABLE_VAR) - inline void i915_enable_irq(drm_i915_private_t *dev_priv, u_int32_t mask) { @@ -141,55 +122,6 @@ i915_get_vblank_counter(struct drm_device *dev, int pipe) return ((high1 << 8) | low); } -int -inteldrm_intr(void *arg) -{ - struct drm_device *dev = arg; - drm_i915_private_t *dev_priv = dev->dev_private; - u_int32_t iir, pipea_stats = 0, pipeb_stats = 0; - - /* - * lock is to protect from writes to PIPESTAT and IMR from other cores. - */ - mtx_enter(&dev_priv->user_irq_lock); - iir = I915_READ(IIR); - if (iir == 0) { - mtx_leave(&dev_priv->user_irq_lock); - return (0); - } - - /* - * Clear the PIPE(A|B)STAT regs before the IIR - */ - if (iir & I915_DISPLAY_PIPE_A_EVENT_INTERRUPT) { - pipea_stats = I915_READ(PIPEASTAT); - I915_WRITE(PIPEASTAT, pipea_stats); - } - if (iir & I915_DISPLAY_PIPE_B_EVENT_INTERRUPT) { - pipeb_stats = I915_READ(PIPEBSTAT); - I915_WRITE(PIPEBSTAT, pipeb_stats); - } - - I915_WRITE(IIR, iir); - (void)I915_READ(IIR); /* Flush posted writes */ - - if (dev_priv->sarea_priv != NULL) - dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); - - if (iir & I915_USER_INTERRUPT) - wakeup(dev_priv); - - mtx_leave(&dev_priv->user_irq_lock); - - if (pipea_stats & I915_VBLANK_INTERRUPT_STATUS) - drm_handle_vblank(dev, 0); - - if (pipeb_stats & I915_VBLANK_INTERRUPT_STATUS) - drm_handle_vblank(dev, 1); - - return (1); -} - int i915_emit_irq(struct drm_device *dev) { @@ -212,18 +144,14 @@ i915_emit_irq(struct drm_device *dev) void i915_user_irq_get(struct drm_i915_private *dev_priv) { - struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; - - if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1)) + if (++dev_priv->user_irq_refcount == 1) i915_enable_irq(dev_priv, I915_USER_INTERRUPT); } void i915_user_irq_put(struct drm_i915_private *dev_priv) { - struct drm_device *dev = (struct drm_device *)dev_priv->drmdev; - - if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0)) + if (--dev_priv->user_irq_refcount == 0) i915_disable_irq(dev_priv, I915_USER_INTERRUPT); } @@ -262,7 +190,7 @@ i915_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv) drm_i915_irq_emit_t *emit = data; int result; - LOCK_TEST_WITH_RETURN(dev, file_priv); + RING_LOCK_TEST_WITH_RETURN(dev, file_priv); if (!dev_priv) { DRM_ERROR("called with no initialization\n"); @@ -350,23 +278,31 @@ i915_driver_irq_install(struct drm_device *dev) I915_WRITE(IER, 0x0); (void)I915_READ(IER); - dev_priv->irqh = pci_intr_establish(dev_priv->pc, dev_priv->ih, IPL_BIO, - inteldrm_intr, dev, dev_priv->dev.dv_xname); - if (dev_priv->irqh == NULL) - return (ENOENT); - dev->vblank->vb_max = 0xffffff; /* only 24 bits of frame count */ if (IS_G4X(dev_priv)) dev->vblank->vb_max = 0xffffffff; - /* Unmask the interrupts that we always want on. */ - dev_priv->irq_mask_reg = ~I915_INTERRUPT_ENABLE_FIX; - - dev_priv->pipestat[0] = dev_priv->pipestat[1] = 0; +#ifdef INTELDRM_GEM + /* + * Enable some error detection, note the instruction error mask + * bit is reserved, so we leave it masked. + */ + I915_WRITE(EMR, IS_G4X(dev_priv) ? + ~(GM45_ERROR_PAGE_TABLE | GM45_ERROR_MEM_PRIV | + GM45_ERROR_CP_PRIV | I915_ERROR_MEMORY_REFRESH) : + ~(I915_ERROR_PAGE_TABLE | I915_ERROR_MEMORY_REFRESH)); +#endif /* INTELDRM_GEM */ - /* Disable pipe interrupt enables, clear pending pipe status */ - I915_WRITE(PIPEASTAT, I915_READ(PIPEASTAT) & 0x8000ffff); - I915_WRITE(PIPEBSTAT, I915_READ(PIPEBSTAT) & 0x8000ffff); + /* + * Disable pipe interrupt enables, clear pending pipe status + * add back in the enabled interrupts from previous iterations + * (say in the reset case where we want vblank interrupts etc to be + * switched back on if they were running + */ + I915_WRITE(PIPEASTAT, (I915_READ(PIPEASTAT) & 0x8000ffff) | + dev_priv->pipestat[0]); + I915_WRITE(PIPEBSTAT, (I915_READ(PIPEBSTAT) & 0x8000ffff) | + dev_priv->pipestat[1]); /* Clear pending interrupt status */ I915_WRITE(IIR, I915_READ(IIR)); @@ -382,9 +318,6 @@ i915_driver_irq_uninstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - if (!dev_priv) - return; - I915_WRITE(HWSTAM, 0xffffffff); I915_WRITE(PIPEASTAT, 0); I915_WRITE(PIPEBSTAT, 0); @@ -394,6 +327,4 @@ i915_driver_irq_uninstall(struct drm_device *dev) I915_WRITE(PIPEASTAT, I915_READ(PIPEASTAT) & 0x8000ffff); I915_WRITE(PIPEBSTAT, I915_READ(PIPEBSTAT) & 0x8000ffff); I915_WRITE(IIR, I915_READ(IIR)); - - pci_intr_disestablish(dev_priv->pc, dev_priv->irqh); } -- cgit v1.2.3