diff options
Diffstat (limited to 'lib/libdrm/intel')
-rw-r--r-- | lib/libdrm/intel/intel_aub.h | 123 | ||||
-rw-r--r-- | lib/libdrm/intel/intel_bufmgr.h | 43 | ||||
-rw-r--r-- | lib/libdrm/intel/intel_bufmgr_gem.c | 856 | ||||
-rw-r--r-- | lib/libdrm/intel/intel_bufmgr_priv.h | 5 | ||||
-rw-r--r-- | lib/libdrm/intel/intel_chipset.h | 103 | ||||
-rw-r--r-- | lib/libdrm/intel/shlib_version | 2 |
6 files changed, 1072 insertions, 60 deletions
diff --git a/lib/libdrm/intel/intel_aub.h b/lib/libdrm/intel/intel_aub.h new file mode 100644 index 000000000..a36fd53a5 --- /dev/null +++ b/lib/libdrm/intel/intel_aub.h @@ -0,0 +1,123 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** @file intel_aub.h + * + * The AUB file is a file format used by Intel's internal simulation + * and other validation tools. It can be used at various levels by a + * driver to input state to the simulated hardware or a replaying + * debugger. + * + * We choose to dump AUB files using the trace block format for ease + * of implementation -- dump out the blocks of memory as plain blobs + * and insert ring commands to execute the batchbuffer blob. + */ + +#ifndef _INTEL_AUB_H +#define _INTEL_AUB_H + +#define AUB_MI_NOOP (0) +#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) +#define AUB_PIPE_CONTROL (0x7a000002) + +/* DW0: instruction type. */ + +#define CMD_AUB (7 << 29) + +#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) +/* DW1 */ +# define AUB_HEADER_MAJOR_SHIFT 24 +# define AUB_HEADER_MINOR_SHIFT 16 + +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) + +/* DW1 */ +#define AUB_TRACE_OPERATION_MASK 0x000000ff +#define AUB_TRACE_OP_COMMENT 0x00000000 +#define AUB_TRACE_OP_DATA_WRITE 0x00000001 +#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 +#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 +// operation = TRACE_DATA_WRITE, Type +#define AUB_TRACE_TYPE_MASK 0x0000ff00 +#define AUB_TRACE_TYPE_NOTYPE (0 << 8) +#define AUB_TRACE_TYPE_BATCH (1 << 8) +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) +#define AUB_TRACE_TYPE_2D_MAP (6 << 8) +#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) +#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) +#define AUB_TRACE_TYPE_1D_MAP (10 << 8) +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) +#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) +#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) +#define AUB_TRACE_TYPE_GENERAL (14 << 8) +#define AUB_TRACE_TYPE_SURFACE (15 << 8) + + +// operation = TRACE_COMMAND_WRITE, Type = +#define AUB_TRACE_TYPE_RING_HWB (1 << 8) +#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) +#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) +#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) + +// Address space +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 +#define AUB_TRACE_MEMTYPE_GTT (0 << 16) +#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) +#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) +#define AUB_TRACE_MEMTYPE_PCI (3 << 16) +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) + +/* DW2 */ +// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_GENERAL_STATE +#define AUB_TRACE_GENERAL_STATE_MASK 0x000000ff + +#define AUB_TRACE_VS_STATE 0x00000001 +#define AUB_TRACE_GS_STATE 0x00000002 +#define AUB_TRACE_CL_STATE 0x00000003 +#define AUB_TRACE_SF_STATE 0x00000004 +#define AUB_TRACE_WM_STATE 0x00000005 +#define AUB_TRACE_CC_STATE 0x00000006 +#define AUB_TRACE_CL_VP 0x00000007 +#define AUB_TRACE_SF_VP 0x00000008 +#define AUB_TRACE_CC_VP 0x00000009 +#define AUB_TRACE_SAMPLER_STATE 0x0000000a +#define AUB_TRACE_KERNEL 0x0000000b +#define AUB_TRACE_SCRATCH 0x0000000c +#define AUB_TRACE_SDC 0x0000000d +#define AUB_TRACE_BLEND_STATE 0x00000016 +#define AUB_TRACE_DEPTH_STENCIL_STATE 0x00000017 + +// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_SURFACE_STATE +#define AUB_TRACE_SURFACE_STATE_MASK 0x00000ff00 +#define AUB_TRACE_BINDING_TABLE 0x000000100 +#define AUB_TRACE_SURFACE_STATE 0x000000200 + +/* DW3: address */ +/* DW4: len */ + +#endif /* _INTEL_AUB_H */ diff --git a/lib/libdrm/intel/intel_bufmgr.h b/lib/libdrm/intel/intel_bufmgr.h index 85da8b9aa..8d7f2390d 100644 --- a/lib/libdrm/intel/intel_bufmgr.h +++ b/lib/libdrm/intel/intel_bufmgr.h @@ -1,5 +1,5 @@ /* - * Copyright © 2008 Intel Corporation + * Copyright © 2008-2012 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,10 +36,12 @@ #include <stdio.h> #include <stdint.h> +#include <stdio.h> struct drm_clip_rect; typedef struct _drm_intel_bufmgr drm_intel_bufmgr; +typedef struct _drm_intel_context drm_intel_context; typedef struct _drm_intel_bo drm_intel_bo; struct _drm_intel_bo { @@ -84,6 +86,19 @@ struct _drm_intel_bo { int handle; }; +enum aub_dump_bmp_format { + AUB_DUMP_BMP_FORMAT_8BIT = 1, + AUB_DUMP_BMP_FORMAT_ARGB_4444 = 4, + AUB_DUMP_BMP_FORMAT_ARGB_0888 = 6, + AUB_DUMP_BMP_FORMAT_ARGB_8888 = 7, +}; + +typedef struct _drm_intel_aub_annotation { + uint32_t type; + uint32_t subtype; + uint32_t ending_offset; +} drm_intel_aub_annotation; + #define BO_ALLOC_FOR_RENDER (1<<0) drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, @@ -148,15 +163,38 @@ void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr); void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr); void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit); +int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo); int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo); int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo); + int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo); void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start); void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable); +void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable); +void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset); +void +drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, + drm_intel_aub_annotation *annotations, + unsigned count); + int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id); int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total); +int drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr); +int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns); + +drm_intel_context *drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr); +void drm_intel_gem_context_destroy(drm_intel_context *ctx); +int drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, + int used, unsigned int flags); + +int drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd); +drm_intel_bo *drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, + int prime_fd, int size); /* drm_intel_bufmgr_fake.c */ drm_intel_bufmgr *drm_intel_bufmgr_fake_init(int fd, @@ -203,6 +241,9 @@ void drm_intel_decode_set_head_tail(struct drm_intel_decode *ctx, void drm_intel_decode_set_output_file(struct drm_intel_decode *ctx, FILE *out); void drm_intel_decode(struct drm_intel_decode *ctx); +int drm_intel_reg_read(drm_intel_bufmgr *bufmgr, + uint32_t offset, + uint64_t *result); /** @{ Compatibility defines to keep old code building despite the symbol rename * from dri_* to drm_intel_* diff --git a/lib/libdrm/intel/intel_bufmgr_gem.c b/lib/libdrm/intel/intel_bufmgr_gem.c index 7cc34e7e8..e2a33cd27 100644 --- a/lib/libdrm/intel/intel_bufmgr_gem.c +++ b/lib/libdrm/intel/intel_bufmgr_gem.c @@ -1,7 +1,7 @@ /************************************************************************** * * Copyright © 2007 Red Hat Inc. - * Copyright © 2007 Intel Corporation + * Copyright © 2007-2012 Intel Corporation * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA * All Rights Reserved. * @@ -54,14 +54,28 @@ #include <stdbool.h> #include "errno.h" +#ifndef ETIME +#define ETIME ETIMEDOUT +#endif #include "libdrm_lists.h" #include "intel_bufmgr.h" #include "intel_bufmgr_priv.h" #include "intel_chipset.h" +#include "intel_aub.h" #include "string.h" #include "i915_drm.h" +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + #define DBG(...) do { \ if (bufmgr_gem->bufmgr.debug) \ fprintf(stderr, __VA_ARGS__); \ @@ -110,8 +124,13 @@ typedef struct _drm_intel_bufmgr_gem { unsigned int has_blt : 1; unsigned int has_relaxed_fencing : 1; unsigned int has_llc : 1; + unsigned int has_wait_timeout : 1; unsigned int bo_reuse : 1; + unsigned int no_exec : 1; bool fenced_relocs; + + FILE *aub_file; + uint32_t aub_offset; } drm_intel_bufmgr_gem; #define DRM_INTEL_RELOC_FENCE (1<<0) @@ -206,6 +225,11 @@ struct _drm_intel_bo_gem { /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ bool mapped_cpu_write; + + uint32_t aub_offset; + + drm_intel_aub_annotation *aub_annotations; + unsigned aub_annotation_count; }; static unsigned int @@ -543,7 +567,7 @@ drm_intel_gem_bo_busy(drm_intel_bo *bo) struct drm_i915_gem_busy busy; int ret; - memset(&busy, 0, sizeof(busy)); + VG_CLEAR(busy); busy.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); @@ -557,6 +581,7 @@ drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, { struct drm_i915_gem_madvise madv; + VG_CLEAR(madv); madv.handle = bo_gem->gem_handle; madv.madv = state; madv.retained = 1; @@ -684,7 +709,8 @@ retry: return NULL; bo_gem->bo.size = bo_size; - memset(&create, 0, sizeof(create)); + + VG_CLEAR(create); create.size = bo_size; ret = drmIoctl(bufmgr_gem->fd, @@ -720,6 +746,8 @@ retry: bo_gem->used_as_reloc_target = false; bo_gem->has_error = false; bo_gem->reusable = true; + bo_gem->aub_annotations = NULL; + bo_gem->aub_annotation_count = 0; drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); @@ -840,7 +868,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, if (!bo_gem) return NULL; - memset(&open_arg, 0, sizeof(open_arg)); + VG_CLEAR(open_arg); open_arg.name = handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, @@ -863,7 +891,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, bo_gem->global_name = handle; bo_gem->reusable = false; - memset(&get_tiling, 0, sizeof(get_tiling)); + VG_CLEAR(get_tiling); get_tiling.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, @@ -894,6 +922,7 @@ drm_intel_gem_bo_free(drm_intel_bo *bo) DRMLISTDEL(&bo_gem->vma_list); if (bo_gem->mem_virtual) { + VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); munmap(bo_gem->mem_virtual, bo_gem->bo.size); bufmgr_gem->vma_count--; } @@ -903,16 +932,31 @@ drm_intel_gem_bo_free(drm_intel_bo *bo) } /* Close this object */ - memset(&close, 0, sizeof(close)); + VG_CLEAR(close); close.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); if (ret != 0) { DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", bo_gem->gem_handle, bo_gem->name, strerror(errno)); } + free(bo_gem->aub_annotations); free(bo); } +static void +drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) +{ +#if HAVE_VALGRIND + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->mem_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); + + if (bo_gem->gtt_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); +#endif +} + /** Frees all cached buffers significantly older than @time. */ static void drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) @@ -1041,6 +1085,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); bo_gem->map_count = 0; drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + drm_intel_gem_bo_mark_mmaps_incoherent(bo); } DRMLISTDEL(&bo_gem->name_list); @@ -1115,7 +1160,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) DBG("bo_map: %d (%s), map_count=%d\n", bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); - memset(&mmap_arg, 0, sizeof(mmap_arg)); + VG_CLEAR(mmap_arg); mmap_arg.handle = bo_gem->gem_handle; mmap_arg.offset = 0; mmap_arg.size = bo->size; @@ -1132,12 +1177,14 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; } DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->mem_virtual); bo->virtual = bo_gem->mem_virtual; + VG_CLEAR(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_GTT /* XXX _CPU */; if (write_enable) @@ -1156,22 +1203,20 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) if (write_enable) bo_gem->mapped_cpu_write = true; + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); pthread_mutex_unlock(&bufmgr_gem->lock); return 0; } -int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +static int +map_gtt(drm_intel_bo *bo) { - return drm_intel_gem_bo_map(bo, 1); -#ifndef __OpenBSD__ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_set_domain set_domain; int ret; - pthread_mutex_lock(&bufmgr_gem->lock); - if (bo_gem->map_count++ == 0) drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); @@ -1182,7 +1227,7 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); - memset(&mmap_arg, 0, sizeof(mmap_arg)); + VG_CLEAR(mmap_arg); mmap_arg.handle = bo_gem->gem_handle; /* Get the fake offset back... */ @@ -1197,7 +1242,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) strerror(errno)); if (--bo_gem->map_count == 0) drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); - pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } @@ -1214,7 +1258,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) strerror(errno)); if (--bo_gem->map_count == 0) drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); - pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } } @@ -1224,7 +1267,36 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->gtt_virtual); - /* Now move it to the GTT domain so that the CPU caches are flushed */ + return 0; +} + +int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +{ + return drm_intel_gem_bo_map(bo, 1); +#ifndef __OpenBSD__ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret) { + pthread_mutex_unlock(&bufmgr_gem->lock); + return ret; + } + + /* Now move it to the GTT domain so that the GPU and CPU + * caches are flushed and the GPU isn't actively using the + * buffer. + * + * The pagefault handler does this domain change for us when + * it has unbound the BO from the GTT, but it's up to us to + * tell it when we're about to use things if we had done + * rendering and it still happens to be bound to the GTT. + */ + VG_CLEAR(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_GTT; set_domain.write_domain = I915_GEM_DOMAIN_GTT; @@ -1237,17 +1309,56 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) strerror(errno)); } + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); pthread_mutex_unlock(&bufmgr_gem->lock); return 0; #endif } +/** + * Performs a mapping of the buffer object like the normal GTT + * mapping, but avoids waiting for the GPU to be done reading from or + * rendering to the buffer. + * + * This is used in the implementation of GL_ARB_map_buffer_range: The + * user asks to create a buffer, then does a mapping, fills some + * space, runs a drawing command, then asks to map it again without + * synchronizing because it guarantees that it won't write over the + * data that the GPU is busy using (or, more specifically, that if it + * does write over the data, it acknowledges that rendering is + * undefined). + */ + +int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + int ret; + + /* If the CPU cache isn't coherent with the GTT, then use a + * regular synchronized mapping. The problem is that we don't + * track where the buffer was last used on the CPU side in + * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so + * we would potentially corrupt the buffer even when the user + * does reasonable things. + */ +#ifndef __OpenBSD__ + if (!bufmgr_gem->has_llc) +#endif + return drm_intel_gem_bo_map_gtt(bo); + + pthread_mutex_lock(&bufmgr_gem->lock); + ret = map_gtt(bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_sw_finish sw_finish; int ret = 0; if (bo == NULL) @@ -1265,18 +1376,19 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) } if (bo_gem->mapped_cpu_write) { -#ifndef __OpenBSD__ + struct drm_i915_gem_sw_finish sw_finish; + /* Cause a flush to happen if the buffer's pinned for * scanout, so the results show up in a timely manner. * Unlike GTT set domains, this only does work if the * buffer should be scanout-related. */ + VG_CLEAR(sw_finish); sw_finish.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SW_FINISH, &sw_finish); ret = ret == -1 ? -errno : 0; -#endif bo_gem->mapped_cpu_write = false; } @@ -1286,6 +1398,7 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) */ if (--bo_gem->map_count == 0) { drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + drm_intel_gem_bo_mark_mmaps_incoherent(bo); bo->virtual = NULL; } pthread_mutex_unlock(&bufmgr_gem->lock); @@ -1307,7 +1420,7 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, struct drm_i915_gem_pwrite pwrite; int ret; - memset(&pwrite, 0, sizeof(pwrite)); + VG_CLEAR(pwrite); pwrite.handle = bo_gem->gem_handle; pwrite.offset = offset; pwrite.size = size; @@ -1332,6 +1445,7 @@ drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; int ret; + VG_CLEAR(get_pipe_from_crtc_id); get_pipe_from_crtc_id.crtc_id = crtc_id; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, @@ -1358,7 +1472,7 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, struct drm_i915_gem_pread pread; int ret; - memset(&pread, 0, sizeof(pread)); + VG_CLEAR(pread); pread.handle = bo_gem->gem_handle; pread.offset = offset; pread.size = size; @@ -1384,6 +1498,58 @@ drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) } /** + * Waits on a BO for the given amount of time. + * + * @bo: buffer object to wait for + * @timeout_ns: amount of time to wait in nanoseconds. + * If value is less than 0, an infinite wait will occur. + * + * Returns 0 if the wait was successful ie. the last batch referencing the + * object has completed within the allotted time. Otherwise some negative return + * value describes the error. Of particular interest is -ETIME when the wait has + * failed to yield the desired result. + * + * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows + * the operation to give up after a certain amount of time. Another subtle + * difference is the internal locking semantics are different (this variant does + * not hold the lock for the duration of the wait). This makes the wait subject + * to a larger userspace race window. + * + * The implementation shall wait until the object is no longer actively + * referenced within a batch buffer at the time of the call. The wait will + * not guarantee that the buffer is re-issued via another thread, or an flinked + * handle. Userspace must make sure this race does not occur if such precision + * is important. + */ +int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_wait wait; + int ret; + + if (!bufmgr_gem->has_wait_timeout) { + DBG("%s:%d: Timed wait is not supported. Falling back to " + "infinite wait\n", __FILE__, __LINE__); + if (timeout_ns) { + drm_intel_gem_bo_wait_rendering(bo); + return 0; + } else { + return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; + } + } + + wait.bo_handle = bo_gem->gem_handle; + wait.timeout_ns = timeout_ns; + wait.flags = 0; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + if (ret == -1) + return -errno; + + return ret; +} + +/** * Sets the object to the GTT read and possibly write domain, used by the X * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). * @@ -1398,6 +1564,7 @@ drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) struct drm_i915_gem_set_domain set_domain; int ret; + VG_CLEAR(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_GTT; set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; @@ -1583,9 +1750,10 @@ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) assert(bo_gem->reloc_count >= start); /* Unreference the cleared target buffers */ for (i = start; i < bo_gem->reloc_count; i++) { - if (bo_gem->reloc_target_info[i].bo != bo) { - drm_intel_gem_bo_unreference_locked_timed(bo_gem-> - reloc_target_info[i].bo, + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; + if (&target_bo_gem->bo != bo) { + bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); } } @@ -1613,6 +1781,8 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) if (target_bo == bo) continue; + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + /* Continue walking the tree depth-first. */ drm_intel_gem_bo_process_reloc(target_bo); @@ -1638,6 +1808,8 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) if (target_bo == bo) continue; + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + /* Continue walking the tree depth-first. */ drm_intel_gem_bo_process_reloc2(target_bo); @@ -1691,6 +1863,287 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) } } +static void +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) +{ + fwrite(&data, 1, 4, bufmgr_gem->aub_file); +} + +static void +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) +{ + fwrite(data, 1, size, bufmgr_gem->aub_file); +} + +static void +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + uint32_t *data; + unsigned int i; + + data = malloc(bo->size); + drm_intel_bo_get_subdata(bo, offset, size, data); + + /* Easy mode: write out bo with no relocations */ + if (!bo_gem->reloc_count) { + aub_out_data(bufmgr_gem, data, size); + free(data); + return; + } + + /* Otherwise, handle the relocations while writing. */ + for (i = 0; i < size / 4; i++) { + int r; + for (r = 0; r < bo_gem->reloc_count; r++) { + struct drm_i915_gem_relocation_entry *reloc; + drm_intel_reloc_target *info; + + reloc = &bo_gem->relocs[r]; + info = &bo_gem->reloc_target_info[r]; + + if (reloc->offset == offset + i * 4) { + drm_intel_bo_gem *target_gem; + uint32_t val; + + target_gem = (drm_intel_bo_gem *)info->bo; + + val = reloc->delta; + val += target_gem->aub_offset; + + aub_out(bufmgr_gem, val); + data[i] = val; + break; + } + } + if (r == bo_gem->reloc_count) { + /* no relocation, just the data */ + aub_out(bufmgr_gem, data[i]); + } + } + + free(data); +} + +static void +aub_bo_get_address(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + /* Give the object a graphics address in the AUB file. We + * don't just use the GEM object address because we do AUB + * dumping before execution -- we want to successfully log + * when the hardware might hang, and we might even want to aub + * capture for a driver trying to execute on a different + * generation of hardware by disabling the actual kernel exec + * call. + */ + bo_gem->aub_offset = bufmgr_gem->aub_offset; + bufmgr_gem->aub_offset += bo->size; + /* XXX: Handle aperture overflow. */ + assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); +} + +static void +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, + uint32_t offset, uint32_t size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + (5 - 2)); + aub_out(bufmgr_gem, + AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, subtype); + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); + aub_out(bufmgr_gem, size); + aub_write_bo_data(bo, offset, size); +} + +/** + * Break up large objects into multiple writes. Otherwise a 128kb VBO + * would overflow the 16 bits of size field in the packet header and + * everything goes badly after that. + */ +static void +aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, + uint32_t offset, uint32_t size) +{ + uint32_t block_size; + uint32_t sub_offset; + + for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { + block_size = size - sub_offset; + + if (block_size > 8 * 4096) + block_size = 8 * 4096; + + aub_write_trace_block(bo, type, subtype, offset + sub_offset, + block_size); + } +} + +static void +aub_write_bo(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + uint32_t offset = 0; + unsigned i; + + aub_bo_get_address(bo); + + /* Write out each annotated section separately. */ + for (i = 0; i < bo_gem->aub_annotation_count; ++i) { + drm_intel_aub_annotation *annotation = + &bo_gem->aub_annotations[i]; + uint32_t ending_offset = annotation->ending_offset; + if (ending_offset > bo->size) + ending_offset = bo->size; + if (ending_offset > offset) { + aub_write_large_trace_block(bo, annotation->type, + annotation->subtype, + offset, + ending_offset - offset); + offset = ending_offset; + } + } + + /* Write out any remaining unannotated data */ + if (offset < bo->size) { + aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, + offset, bo->size - offset); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, + uint32_t batch_buffer, int ring_flag) +{ + uint32_t ringbuffer[4096]; + int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ + int ring_count = 0; + + if (ring_flag == I915_EXEC_BSD) + ring = AUB_TRACE_TYPE_RING_PRB1; + + /* Make a ring buffer to execute our batchbuffer. */ + memset(ringbuffer, 0, sizeof(ringbuffer)); + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[ring_count++] = batch_buffer; + + /* Write out the ring. This appears to trigger execution of + * the ring in the simulator. + */ + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + (5 - 2)); + aub_out(bufmgr_gem, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(bufmgr_gem, 0); /* general/surface subtype */ + aub_out(bufmgr_gem, bufmgr_gem->aub_offset); + aub_out(bufmgr_gem, ring_count * 4); + + /* FIXME: Need some flush operations here? */ + aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); + + /* Update offset pointer */ + bufmgr_gem->aub_offset += 4096; +} + +void +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + uint32_t cpp; + + switch (format) { + case AUB_DUMP_BMP_FORMAT_8BIT: + cpp = 1; + break; + case AUB_DUMP_BMP_FORMAT_ARGB_4444: + cpp = 2; + break; + case AUB_DUMP_BMP_FORMAT_ARGB_0888: + case AUB_DUMP_BMP_FORMAT_ARGB_8888: + cpp = 4; + break; + default: + printf("Unknown AUB dump format %d\n", format); + return; + } + + if (!bufmgr_gem->aub_file) + return; + + aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); + aub_out(bufmgr_gem, (y1 << 16) | x1); + aub_out(bufmgr_gem, + (format << 24) | + (cpp << 19) | + pitch / 4); + aub_out(bufmgr_gem, (height << 16) | width); + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); + aub_out(bufmgr_gem, + ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | + ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); +} + +static void +aub_exec(drm_intel_bo *bo, int ring_flag, int used) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + bool batch_buffer_needs_annotations; + + if (!bufmgr_gem->aub_file) + return; + + /* If batch buffer is not annotated, annotate it the best we + * can. + */ + batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; + if (batch_buffer_needs_annotations) { + drm_intel_aub_annotation annotations[2] = { + { AUB_TRACE_TYPE_BATCH, 0, used }, + { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } + }; + drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); + } + + /* Write out all buffers to AUB memory */ + for (i = 0; i < bufmgr_gem->exec_count; i++) { + aub_write_bo(bufmgr_gem->exec_bos[i]); + } + + /* Remove any annotations we added */ + if (batch_buffer_needs_annotations) + drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); + + /* Dump ring buffer */ + aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); + + fflush(bufmgr_gem->aub_file); + + /* + * One frame has been dumped. So reset the aub_offset for the next frame. + * + * FIXME: Can we do this? + */ + bufmgr_gem->aub_offset = 0x10000; +} + #ifndef __OpenBSD__ static int drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, @@ -1713,6 +2166,7 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, */ drm_intel_add_validate_buffer(bo); + VG_CLEAR(execbuf); execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; execbuf.buffer_count = bufmgr_gem->exec_count; execbuf.batch_start_offset = 0; @@ -1760,13 +2214,14 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, #endif static int -drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, - drm_clip_rect_t *cliprects, int num_cliprects, int DR4, - unsigned int flags) +do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int flags) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; struct drm_i915_gem_execbuffer2 execbuf; - int ret, i; + int ret = 0; + int i; switch (flags & 0x7) { default: @@ -1793,6 +2248,7 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, */ drm_intel_add_validate_buffer2(bo, 0); + VG_CLEAR(execbuf); execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; execbuf.buffer_count = bufmgr_gem->exec_count; execbuf.batch_start_offset = 0; @@ -1804,9 +2260,17 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, execbuf.DR4 = DR4; #endif execbuf.flags = flags; - execbuf.rsvd1 = 0; + if (ctx == NULL) + i915_execbuffer2_set_context_id(execbuf, 0); + else + i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); execbuf.rsvd2 = 0; + aub_exec(bo, flags, used); + + if (bufmgr_gem->no_exec) + goto skip_execution; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); @@ -1824,6 +2288,7 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, } drm_intel_update_buffer_offsets2(bufmgr_gem); +skip_execution: if (bufmgr_gem->bufmgr.debug) drm_intel_gem_dump_validation_list(bufmgr_gem); @@ -1846,9 +2311,24 @@ drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, drm_clip_rect_t *cliprects, int num_cliprects, int DR4) { - return drm_intel_gem_bo_mrb_exec2(bo, used, - cliprects, num_cliprects, DR4, - I915_EXEC_RENDER); + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + I915_EXEC_RENDER); +} + +static int +drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int flags) +{ + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + flags); +} + +int +drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, + int used, unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, flags); } static int @@ -1859,7 +2339,7 @@ drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) struct drm_i915_gem_pin pin; int ret; - memset(&pin, 0, sizeof(pin)); + VG_CLEAR(pin); pin.handle = bo_gem->gem_handle; pin.alignment = alignment; @@ -1881,7 +2361,7 @@ drm_intel_gem_bo_unpin(drm_intel_bo *bo) struct drm_i915_gem_unpin unpin; int ret; - memset(&unpin, 0, sizeof(unpin)); + VG_CLEAR(unpin); unpin.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); @@ -1962,21 +2442,92 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, return 0; } +drm_intel_bo * +drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + int ret; + uint32_t handle; + drm_intel_bo_gem *bo_gem; + struct drm_i915_gem_get_tiling get_tiling; + + ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); + if (ret) { + fprintf(stderr,"ret is %d %d\n", ret, errno); + return NULL; + } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + bo_gem->bo.size = size; + bo_gem->bo.handle = handle; + bo_gem->bo.bufmgr = bufmgr; + + bo_gem->gem_handle = handle; + + atomic_set(&bo_gem->refcount, 1); + + bo_gem->name = "prime"; + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = false; + + DRMINITLISTHEAD(&bo_gem->name_list); + DRMINITLISTHEAD(&bo_gem->vma_list); + + VG_CLEAR(get_tiling); + get_tiling.handle = bo_gem->gem_handle; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_TILING, + &get_tiling); + if (ret != 0) { + drm_intel_gem_bo_unreference(&bo_gem->bo); + return NULL; + } + bo_gem->tiling_mode = get_tiling.tiling_mode; + bo_gem->swizzle_mode = get_tiling.swizzle_mode; + /* XXX stride is unknown */ + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + + return &bo_gem->bo; +} + +int +drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, + DRM_CLOEXEC, prime_fd) != 0) + return -errno; + + bo_gem->reusable = false; + + return 0; +} + static int drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_gem_flink flink; int ret; if (!bo_gem->global_name) { - memset(&flink, 0, sizeof(flink)); + struct drm_gem_flink flink; + + VG_CLEAR(flink); flink.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); if (ret != 0) return -errno; + bo_gem->global_name = flink.name; bo_gem->reusable = false; @@ -2292,6 +2843,208 @@ drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) } /** + * Get the PCI ID for the device. This can be overridden by setting the + * INTEL_DEVID_OVERRIDE environment variable to the desired ID. + */ +static int +get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) +{ + char *devid_override; + int devid; + int ret; + drm_i915_getparam_t gp; + + if (geteuid() == getuid()) { + devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + bufmgr_gem->no_exec = true; + return strtod(devid_override, NULL); + } + } + + VG_CLEAR(devid); + VG_CLEAR(gp); + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = &devid; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); + fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); + } + return devid; +} + +int +drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + return bufmgr_gem->pci_device; +} + +/** + * Sets up AUB dumping. + * + * This is a trace file format that can be used with the simulator. + * Packets are emitted in a format somewhat like GPU command packets. + * You can set up a GTT and upload your objects into the referenced + * space, then send off batchbuffers and get BMPs out the other end. + */ +void +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + int entry = 0x200003; + int i; + int gtt_size = 0x10000; + + if (!enable) { + if (bufmgr_gem->aub_file) { + fclose(bufmgr_gem->aub_file); + bufmgr_gem->aub_file = NULL; + } + } + + if (geteuid() != getuid()) + return; + + bufmgr_gem->aub_file = fopen("intel.aub", "w+"); + if (!bufmgr_gem->aub_file) + return; + + /* Start allocating objects from just after the GTT. */ + bufmgr_gem->aub_offset = gtt_size; + + /* Start with a (required) version packet. */ + aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); + aub_out(bufmgr_gem, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(bufmgr_gem, 0); /* app name */ + } + aub_out(bufmgr_gem, 0); /* timestamp */ + aub_out(bufmgr_gem, 0); /* timestamp */ + aub_out(bufmgr_gem, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M */ + aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2)); + aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, 0); /* subtype */ + aub_out(bufmgr_gem, 0); /* offset */ + aub_out(bufmgr_gem, gtt_size); /* size */ + for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { + aub_out(bufmgr_gem, entry); + } +} + +drm_intel_context * +drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_gem_context_create create; + drm_intel_context *context = NULL; + int ret; + + VG_CLEAR(create); + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret != 0) { + DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", + strerror(errno)); + return NULL; + } + + context = calloc(1, sizeof(*context)); + context->ctx_id = create.ctx_id; + context->bufmgr = bufmgr; + + return context; +} + +void +drm_intel_gem_context_destroy(drm_intel_context *ctx) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_gem_context_destroy destroy; + int ret; + + if (ctx == NULL) + return; + + VG_CLEAR(destroy); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + destroy.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, + &destroy); + if (ret != 0) + fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", + strerror(errno)); + + free(ctx); +} + +int +drm_intel_reg_read(drm_intel_bufmgr *bufmgr, + uint32_t offset, + uint64_t *result) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_reg_read reg_read; + int ret; + + VG_CLEAR(reg_read); + reg_read.offset = offset; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); + + *result = reg_read.val; + return ret; +} + + +/** + * Annotate the given bo for use in aub dumping. + * + * \param annotations is an array of drm_intel_aub_annotation objects + * describing the type of data in various sections of the bo. Each + * element of the array specifies the type and subtype of a section of + * the bo, and the past-the-end offset of that section. The elements + * of \c annotations must be sorted so that ending_offset is + * increasing. + * + * \param count is the number of elements in the \c annotations array. + * If \c count is zero, then \c annotations will not be dereferenced. + * + * Annotations are copied into a private data structure, so caller may + * re-use the memory pointed to by \c annotations after the call + * returns. + * + * Annotations are stored for the lifetime of the bo; to reset to the + * default state (no annotations), call this function with a \c count + * of zero. + */ +void +drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, + drm_intel_aub_annotation *annotations, + unsigned count) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + unsigned size = sizeof(*annotations) * count; + drm_intel_aub_annotation *new_annotations = + count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; + if (new_annotations == NULL) { + free(bo_gem->aub_annotations); + bo_gem->aub_annotations = NULL; + bo_gem->aub_annotation_count = 0; + return; + } + memcpy(new_annotations, annotations, size); + bo_gem->aub_annotations = new_annotations; + bo_gem->aub_annotation_count = count; +} + +/** * Initializes the GEM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. * @@ -2335,13 +3088,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) (int)bufmgr_gem->gtt_size / 1024); } - gp.param = I915_PARAM_CHIPSET_ID; - gp.value = &bufmgr_gem->pci_device; - ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret) { - fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); - fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); - } + bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); if (IS_GEN2(bufmgr_gem->pci_device)) bufmgr_gem->gen = 2; @@ -2354,9 +3101,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) else if (IS_GEN6(bufmgr_gem->pci_device)) bufmgr_gem->gen = 6; else if (IS_GEN7(bufmgr_gem->pci_device)) - bufmgr_gem->gen = 7; - else - assert(0); + bufmgr_gem->gen = 7; + else { + free(bufmgr_gem); + return NULL; + } if (IS_GEN3(bufmgr_gem->pci_device) && bufmgr_gem->gtt_size > 256*1024*1024) { @@ -2367,6 +3116,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->gtt_size -= 256*1024*1024; } + VG_CLEAR(gp); gp.value = &tmp; #ifndef __OpenBSD__ @@ -2388,20 +3138,20 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); bufmgr_gem->has_relaxed_fencing = ret == 0; -#ifndef __OpenBSD__ + gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_wait_timeout = ret == 0; + gp.param = I915_PARAM_HAS_LLC; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret == -EINVAL) { -#endif + if (ret != 0) { /* Kernel does not supports HAS_LLC query, fallback to GPU * generation detection and assume that we have LLC on GEN6/7 */ bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | IS_GEN7(bufmgr_gem->pci_device)); -#ifndef __OpenBSD__ } else - bufmgr_gem->has_llc = ret == 0; -#endif + bufmgr_gem->has_llc = *gp.value; if (bufmgr_gem->gen < 4) { gp.param = I915_PARAM_NUM_FENCES_AVAIL; diff --git a/lib/libdrm/intel/intel_bufmgr_priv.h b/lib/libdrm/intel/intel_bufmgr_priv.h index 0b625200a..2592d42d5 100644 --- a/lib/libdrm/intel/intel_bufmgr_priv.h +++ b/lib/libdrm/intel/intel_bufmgr_priv.h @@ -280,6 +280,11 @@ struct _drm_intel_bufmgr { int debug; }; +struct _drm_intel_context { + unsigned int ctx_id; + struct _drm_intel_bufmgr *bufmgr; +}; + #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) #define ROUND_UP_TO(x, y) (((x) + (y) - 1) / (y) * (y)) #define ROUND_UP_TO_MB(x) ROUND_UP_TO((x), 1024*1024) diff --git a/lib/libdrm/intel/intel_chipset.h b/lib/libdrm/intel/intel_chipset.h index e3a30fc7d..ebec2f8ad 100644 --- a/lib/libdrm/intel/intel_chipset.h +++ b/lib/libdrm/intel/intel_chipset.h @@ -44,6 +44,49 @@ #define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* mobile */ #define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 #define PCI_CHIP_IVYBRIDGE_S 0x015a /* server */ +#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a /* server */ + +#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */ +#define PCI_CHIP_HASWELL_GT2 0x0412 +#define PCI_CHIP_HASWELL_GT2_PLUS 0x0422 +#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */ +#define PCI_CHIP_HASWELL_M_GT2 0x0416 +#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426 +#define PCI_CHIP_HASWELL_S_GT1 0x040A /* Server */ +#define PCI_CHIP_HASWELL_S_GT2 0x041A +#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042A +#define PCI_CHIP_HASWELL_SDV_GT1 0x0C02 /* Desktop */ +#define PCI_CHIP_HASWELL_SDV_GT2 0x0C12 +#define PCI_CHIP_HASWELL_SDV_GT2_PLUS 0x0C22 +#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 /* Mobile */ +#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 +#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26 +#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A /* Server */ +#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A +#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A +#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */ +#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12 +#define PCI_CHIP_HASWELL_ULT_GT2_PLUS 0x0A22 +#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */ +#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 +#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26 +#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ +#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A +#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A +#define PCI_CHIP_HASWELL_CRW_GT1 0x0D12 /* Desktop */ +#define PCI_CHIP_HASWELL_CRW_GT2 0x0D22 +#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D32 +#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 /* Mobile */ +#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26 +#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36 +#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A /* Server */ +#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A +#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A + +#define PCI_CHIP_VALLEYVIEW_PO 0x0f30 /* power on board */ +#define PCI_CHIP_VALLEYVIEW_1 0x0f31 +#define PCI_CHIP_VALLEYVIEW_2 0x0f32 +#define PCI_CHIP_VALLEYVIEW_3 0x0f33 #define IS_830(dev) (dev == 0x3577) #define IS_845(dev) (dev == 0x2562) @@ -84,6 +127,11 @@ #define IS_I965GM(dev) (dev == 0x2A02) +#define IS_VALLEYVIEW(dev) (((dev) == PCI_CHIP_VALLEYVIEW_PO) || \ + ((dev) == PCI_CHIP_VALLEYVIEW_1) || \ + ((dev) == PCI_CHIP_VALLEYVIEW_2) || \ + ((dev) == PCI_CHIP_VALLEYVIEW_3)) + #define IS_GEN4(dev) (dev == 0x2972 || \ dev == 0x2982 || \ dev == 0x2992 || \ @@ -115,11 +163,56 @@ dev == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ dev == PCI_CHIP_SANDYBRIDGE_S) -#define IS_GEN7(dev) (dev == PCI_CHIP_IVYBRIDGE_GT1 || \ - dev == PCI_CHIP_IVYBRIDGE_GT2 || \ - dev == PCI_CHIP_IVYBRIDGE_M_GT1 || \ - dev == PCI_CHIP_IVYBRIDGE_M_GT2 || \ - dev == PCI_CHIP_IVYBRIDGE_S) +#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ + IS_HASWELL(devid) || \ + IS_VALLEYVIEW(devid)) + +#define IS_IVYBRIDGE(dev) (dev == PCI_CHIP_IVYBRIDGE_GT1 || \ + dev == PCI_CHIP_IVYBRIDGE_GT2 || \ + dev == PCI_CHIP_IVYBRIDGE_M_GT1 || \ + dev == PCI_CHIP_IVYBRIDGE_M_GT2 || \ + dev == PCI_CHIP_IVYBRIDGE_S || \ + dev == PCI_CHIP_IVYBRIDGE_S_GT2) + +#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \ + devid == PCI_CHIP_HASWELL_M_GT1 || \ + devid == PCI_CHIP_HASWELL_S_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT1) +#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2 || \ + devid == PCI_CHIP_HASWELL_M_GT2 || \ + devid == PCI_CHIP_HASWELL_S_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT2 || \ + devid == PCI_CHIP_HASWELL_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_M_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_S_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_SDV_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_ULT_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_CRW_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT2_PLUS || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT2_PLUS) + +#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \ + IS_HSW_GT2(devid)) #define IS_G4X(dev) (dev == 0x2E02 || \ dev == 0x2E12 || \ diff --git a/lib/libdrm/intel/shlib_version b/lib/libdrm/intel/shlib_version index b52599a16..c6e3f4d3f 100644 --- a/lib/libdrm/intel/shlib_version +++ b/lib/libdrm/intel/shlib_version @@ -1,2 +1,2 @@ major=2 -minor=0 +minor=1 |