6 files changed, 1072 insertions, 60 deletions
diff --git a/lib/libdrm/intel/intel_aub.h b/lib/libdrm/intel/intel_aub.h
new file mode 100644
index 000000000..a36fd53a5
--- /dev/null
+++ b/lib/libdrm/intel/intel_aub.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_aub.h
+ *
+ * The AUB file is a file format used by Intel's internal simulation
+ * and other validation tools.  It can be used at various levels by a
+ * driver to input state to the simulated hardware or a replaying
+ * debugger.
+ *
+ * We choose to dump AUB files using the trace block format for ease
+ * of implementation -- dump out the blocks of memory as plain blobs
+ * and insert ring commands to execute the batchbuffer blob.
+ */
+
+#ifndef _INTEL_AUB_H
+#define _INTEL_AUB_H
+
+#define AUB_MI_NOOP			(0)
+#define AUB_MI_BATCH_BUFFER_START 	(0x31 << 23)
+#define AUB_PIPE_CONTROL		(0x7a000002)
+
+/* DW0: instruction type. */
+
+#define CMD_AUB			(7 << 29)
+
+#define CMD_AUB_HEADER		(CMD_AUB | (1 << 23) | (0x05 << 16))
+/* DW1 */
+# define AUB_HEADER_MAJOR_SHIFT		24
+# define AUB_HEADER_MINOR_SHIFT		16
+
+#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16))
+#define CMD_AUB_DUMP_BMP           (CMD_AUB | (1 << 23) | (0x9e << 16))
+
+/* DW1 */
+#define AUB_TRACE_OPERATION_MASK	0x000000ff
+#define AUB_TRACE_OP_COMMENT		0x00000000
+#define AUB_TRACE_OP_DATA_WRITE		0x00000001
+#define AUB_TRACE_OP_COMMAND_WRITE	0x00000002
+#define AUB_TRACE_OP_MMIO_WRITE		0x00000003
+// operation = TRACE_DATA_WRITE, Type
+#define AUB_TRACE_TYPE_MASK		0x0000ff00
+#define AUB_TRACE_TYPE_NOTYPE		(0 << 8)
+#define AUB_TRACE_TYPE_BATCH		(1 << 8)
+#define AUB_TRACE_TYPE_VERTEX_BUFFER	(5 << 8)
+#define AUB_TRACE_TYPE_2D_MAP		(6 << 8)
+#define AUB_TRACE_TYPE_CUBE_MAP		(7 << 8)
+#define AUB_TRACE_TYPE_VOLUME_MAP	(9 << 8)
+#define AUB_TRACE_TYPE_1D_MAP		(10 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_BUFFER	(11 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_URB	(12 << 8)
+#define AUB_TRACE_TYPE_INDEX_BUFFER	(13 << 8)
+#define AUB_TRACE_TYPE_GENERAL		(14 << 8)
+#define AUB_TRACE_TYPE_SURFACE		(15 << 8)
+
+
+// operation = TRACE_COMMAND_WRITE, Type =
+#define AUB_TRACE_TYPE_RING_HWB		(1 << 8)
+#define AUB_TRACE_TYPE_RING_PRB0	(2 << 8)
+#define AUB_TRACE_TYPE_RING_PRB1	(3 << 8)
+#define AUB_TRACE_TYPE_RING_PRB2	(4 << 8)
+
+// Address space
+#define AUB_TRACE_ADDRESS_SPACE_MASK	0x00ff0000
+#define AUB_TRACE_MEMTYPE_GTT		(0 << 16)
+#define AUB_TRACE_MEMTYPE_LOCAL		(1 << 16)
+#define AUB_TRACE_MEMTYPE_NONLOCAL	(2 << 16)
+#define AUB_TRACE_MEMTYPE_PCI		(3 << 16)
+#define AUB_TRACE_MEMTYPE_GTT_ENTRY     (4 << 16)
+
+/* DW2 */
+// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_GENERAL_STATE
+#define AUB_TRACE_GENERAL_STATE_MASK	0x000000ff
+
+#define AUB_TRACE_VS_STATE		0x00000001
+#define AUB_TRACE_GS_STATE		0x00000002
+#define AUB_TRACE_CL_STATE		0x00000003
+#define AUB_TRACE_SF_STATE		0x00000004
+#define AUB_TRACE_WM_STATE		0x00000005
+#define AUB_TRACE_CC_STATE		0x00000006
+#define AUB_TRACE_CL_VP			0x00000007
+#define AUB_TRACE_SF_VP			0x00000008
+#define AUB_TRACE_CC_VP			0x00000009
+#define AUB_TRACE_SAMPLER_STATE		0x0000000a
+#define AUB_TRACE_KERNEL		0x0000000b
+#define AUB_TRACE_SCRATCH		0x0000000c
+#define AUB_TRACE_SDC			0x0000000d
+#define AUB_TRACE_BLEND_STATE		0x00000016
+#define AUB_TRACE_DEPTH_STENCIL_STATE	0x00000017
+
+// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_SURFACE_STATE
+#define AUB_TRACE_SURFACE_STATE_MASK	0x00000ff00
+#define AUB_TRACE_BINDING_TABLE		0x000000100
+#define AUB_TRACE_SURFACE_STATE		0x000000200
+
+/* DW3: address */
+/* DW4: len */
+
+#endif /* _INTEL_AUB_H */
diff --git a/lib/libdrm/intel/intel_bufmgr.h b/lib/libdrm/intel/intel_bufmgr.h
index 85da8b9aa..8d7f2390d 100644
--- a/lib/libdrm/intel/intel_bufmgr.h
+++ b/lib/libdrm/intel/intel_bufmgr.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2008 Intel Corporation
+ * Copyright © 2008-2012 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -36,10 +36,12 @@
 
 #include <stdio.h>
 #include <stdint.h>
+#include <stdio.h>
 
 struct drm_clip_rect;
 
 typedef struct _drm_intel_bufmgr drm_intel_bufmgr;
+typedef struct _drm_intel_context drm_intel_context;
 typedef struct _drm_intel_bo drm_intel_bo;
 
 struct _drm_intel_bo {
@@ -84,6 +86,19 @@ struct _drm_intel_bo {
 	int handle;
 };
 
+enum aub_dump_bmp_format {
+	AUB_DUMP_BMP_FORMAT_8BIT = 1,
+	AUB_DUMP_BMP_FORMAT_ARGB_4444 = 4,
+	AUB_DUMP_BMP_FORMAT_ARGB_0888 = 6,
+	AUB_DUMP_BMP_FORMAT_ARGB_8888 = 7,
+};
+
+typedef struct _drm_intel_aub_annotation {
+	uint32_t type;
+	uint32_t subtype;
+	uint32_t ending_offset;
+} drm_intel_aub_annotation;
+
 #define BO_ALLOC_FOR_RENDER (1<<0)
 
 drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
@@ -148,15 +163,38 @@ void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr);
 void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr);
 void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr,
 					     int limit);
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo);
 int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
 int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
+
 int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
 void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
 void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
 
+void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable);
+void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
+				   int x1, int y1, int width, int height,
+				   enum aub_dump_bmp_format format,
+				   int pitch, int offset);
+void
+drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
+					 drm_intel_aub_annotation *annotations,
+					 unsigned count);
+
 int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
 
 int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total);
+int drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr);
+int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns);
+
+drm_intel_context *drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr);
+void drm_intel_gem_context_destroy(drm_intel_context *ctx);
+int drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
+				  int used, unsigned int flags);
+
+int drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd);
+drm_intel_bo *drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr,
+						int prime_fd, int size);
 
 /* drm_intel_bufmgr_fake.c */
 drm_intel_bufmgr *drm_intel_bufmgr_fake_init(int fd,
@@ -203,6 +241,9 @@ void drm_intel_decode_set_head_tail(struct drm_intel_decode *ctx,
 void drm_intel_decode_set_output_file(struct drm_intel_decode *ctx, FILE *out);
 void drm_intel_decode(struct drm_intel_decode *ctx);
 
+int drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
+		       uint32_t offset,
+		       uint64_t *result);
 
 /** @{ Compatibility defines to keep old code building despite the symbol rename
  * from dri_* to drm_intel_*
diff --git a/lib/libdrm/intel/intel_bufmgr_gem.c b/lib/libdrm/intel/intel_bufmgr_gem.c
index 7cc34e7e8..e2a33cd27 100644
--- a/lib/libdrm/intel/intel_bufmgr_gem.c
+++ b/lib/libdrm/intel/intel_bufmgr_gem.c
@@ -1,7 +1,7 @@
 /**************************************************************************
  *
  * Copyright � 2007 Red Hat Inc.
- * Copyright � 2007 Intel Corporation
+ * Copyright � 2007-2012 Intel Corporation
  * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
  * All Rights Reserved.
  *
@@ -54,14 +54,28 @@
 #include <stdbool.h>
 
 #include "errno.h"
+#ifndef ETIME
+#define ETIME ETIMEDOUT
+#endif
 #include "libdrm_lists.h"
 #include "intel_bufmgr.h"
 #include "intel_bufmgr_priv.h"
 #include "intel_chipset.h"
+#include "intel_aub.h"
 #include "string.h"
 
 #include "i915_drm.h"
 
+#ifdef HAVE_VALGRIND
+#include <valgrind.h>
+#include <memcheck.h>
+#define VG(x) x
+#else
+#define VG(x)
+#endif
+
+#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
+
 #define DBG(...) do {					\
 	if (bufmgr_gem->bufmgr.debug)			\
 		fprintf(stderr, __VA_ARGS__);		\
@@ -110,8 +124,13 @@ typedef struct _drm_intel_bufmgr_gem {
 	unsigned int has_blt : 1;
 	unsigned int has_relaxed_fencing : 1;
 	unsigned int has_llc : 1;
+	unsigned int has_wait_timeout : 1;
 	unsigned int bo_reuse : 1;
+	unsigned int no_exec : 1;
 	bool fenced_relocs;
+
+	FILE *aub_file;
+	uint32_t aub_offset;
 } drm_intel_bufmgr_gem;
 
 #define DRM_INTEL_RELOC_FENCE (1<<0)
@@ -206,6 +225,11 @@ struct _drm_intel_bo_gem {
 
 	/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
 	bool mapped_cpu_write;
+
+	uint32_t aub_offset;
+
+	drm_intel_aub_annotation *aub_annotations;
+	unsigned aub_annotation_count;
 };
 
 static unsigned int
@@ -543,7 +567,7 @@ drm_intel_gem_bo_busy(drm_intel_bo *bo)
 	struct drm_i915_gem_busy busy;
 	int ret;
 
-	memset(&busy, 0, sizeof(busy));
+	VG_CLEAR(busy);
 	busy.handle = bo_gem->gem_handle;
 
 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
@@ -557,6 +581,7 @@ drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
 {
 	struct drm_i915_gem_madvise madv;
 
+	VG_CLEAR(madv);
 	madv.handle = bo_gem->gem_handle;
 	madv.madv = state;
 	madv.retained = 1;
@@ -684,7 +709,8 @@ retry:
 			return NULL;
 
 		bo_gem->bo.size = bo_size;
-		memset(&create, 0, sizeof(create));
+
+		VG_CLEAR(create);
 		create.size = bo_size;
 
 		ret = drmIoctl(bufmgr_gem->fd,
@@ -720,6 +746,8 @@ retry:
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
 	bo_gem->reusable = true;
+	bo_gem->aub_annotations = NULL;
+	bo_gem->aub_annotation_count = 0;
 
 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
 
@@ -840,7 +868,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
 	if (!bo_gem)
 		return NULL;
 
-	memset(&open_arg, 0, sizeof(open_arg));
+	VG_CLEAR(open_arg);
 	open_arg.name = handle;
 	ret = drmIoctl(bufmgr_gem->fd,
 		       DRM_IOCTL_GEM_OPEN,
@@ -863,7 +891,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
 	bo_gem->global_name = handle;
 	bo_gem->reusable = false;
 
-	memset(&get_tiling, 0, sizeof(get_tiling));
+	VG_CLEAR(get_tiling);
 	get_tiling.handle = bo_gem->gem_handle;
 	ret = drmIoctl(bufmgr_gem->fd,
 		       DRM_IOCTL_I915_GEM_GET_TILING,
@@ -894,6 +922,7 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
 
 	DRMLISTDEL(&bo_gem->vma_list);
 	if (bo_gem->mem_virtual) {
+		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
 		munmap(bo_gem->mem_virtual, bo_gem->bo.size);
 		bufmgr_gem->vma_count--;
 	}
@@ -903,16 +932,31 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
 	}
 
 	/* Close this object */
-	memset(&close, 0, sizeof(close));
+	VG_CLEAR(close);
 	close.handle = bo_gem->gem_handle;
 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
 	if (ret != 0) {
 		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
 		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
 	}
+	free(bo_gem->aub_annotations);
 	free(bo);
 }
 
+static void
+drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
+{
+#if HAVE_VALGRIND
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+	if (bo_gem->mem_virtual)
+		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
+
+	if (bo_gem->gtt_virtual)
+		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
+#endif
+}
+
 /** Frees all cached buffers significantly older than @time. */
 static void
 drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
@@ -1041,6 +1085,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
 		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
 		bo_gem->map_count = 0;
 		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
+		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
 	}
 
 	DRMLISTDEL(&bo_gem->name_list);
@@ -1115,7 +1160,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 		DBG("bo_map: %d (%s), map_count=%d\n",
 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
 
-		memset(&mmap_arg, 0, sizeof(mmap_arg));
+		VG_CLEAR(mmap_arg);
 		mmap_arg.handle = bo_gem->gem_handle;
 		mmap_arg.offset = 0;
 		mmap_arg.size = bo->size;
@@ -1132,12 +1177,14 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
+		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
 		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
 	}
 	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
 	    bo_gem->mem_virtual);
 	bo->virtual = bo_gem->mem_virtual;
 
+	VG_CLEAR(set_domain);
 	set_domain.handle = bo_gem->gem_handle;
 	set_domain.read_domains = I915_GEM_DOMAIN_GTT /* XXX _CPU */;
 	if (write_enable)
@@ -1156,22 +1203,20 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 	if (write_enable)
 		bo_gem->mapped_cpu_write = true;
 
+	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
+	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
 	pthread_mutex_unlock(&bufmgr_gem->lock);
 
 	return 0;
 }
 
-int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+static int
+map_gtt(drm_intel_bo *bo)
 {
-	return drm_intel_gem_bo_map(bo, 1);
-#ifndef __OpenBSD__
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
-	pthread_mutex_lock(&bufmgr_gem->lock);
-
 	if (bo_gem->map_count++ == 0)
 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
 
@@ -1182,7 +1227,7 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
 
-		memset(&mmap_arg, 0, sizeof(mmap_arg));
+		VG_CLEAR(mmap_arg);
 		mmap_arg.handle = bo_gem->gem_handle;
 
 		/* Get the fake offset back... */
@@ -1197,7 +1242,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    strerror(errno));
 			if (--bo_gem->map_count == 0)
 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 
@@ -1214,7 +1258,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    strerror(errno));
 			if (--bo_gem->map_count == 0)
 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 	}
@@ -1224,7 +1267,36 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
 	    bo_gem->gtt_virtual);
 
-	/* Now move it to the GTT domain so that the CPU caches are flushed */
+	return 0;
+}
+
+int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+{
+	return drm_intel_gem_bo_map(bo, 1);
+#ifndef __OpenBSD__
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = map_gtt(bo);
+	if (ret) {
+		pthread_mutex_unlock(&bufmgr_gem->lock);
+		return ret;
+	}
+
+	/* Now move it to the GTT domain so that the GPU and CPU
+	 * caches are flushed and the GPU isn't actively using the
+	 * buffer.
+	 *
+	 * The pagefault handler does this domain change for us when
+	 * it has unbound the BO from the GTT, but it's up to us to
+	 * tell it when we're about to use things if we had done
+	 * rendering and it still happens to be bound to the GTT.
+	 */
+	VG_CLEAR(set_domain);
 	set_domain.handle = bo_gem->gem_handle;
 	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
 	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
@@ -1237,17 +1309,56 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 		    strerror(errno));
 	}
 
+	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
+	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
 	pthread_mutex_unlock(&bufmgr_gem->lock);
 
 	return 0;
 #endif
 }
 
+/**
+ * Performs a mapping of the buffer object like the normal GTT
+ * mapping, but avoids waiting for the GPU to be done reading from or
+ * rendering to the buffer.
+ *
+ * This is used in the implementation of GL_ARB_map_buffer_range: The
+ * user asks to create a buffer, then does a mapping, fills some
+ * space, runs a drawing command, then asks to map it again without
+ * synchronizing because it guarantees that it won't write over the
+ * data that the GPU is busy using (or, more specifically, that if it
+ * does write over the data, it acknowledges that rendering is
+ * undefined).
+ */
+
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	int ret;
+
+	/* If the CPU cache isn't coherent with the GTT, then use a
+	 * regular synchronized mapping.  The problem is that we don't
+	 * track where the buffer was last used on the CPU side in
+	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
+	 * we would potentially corrupt the buffer even when the user
+	 * does reasonable things.
+	 */
+#ifndef __OpenBSD__
+	if (!bufmgr_gem->has_llc)
+#endif
+		return drm_intel_gem_bo_map_gtt(bo);
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+	ret = map_gtt(bo);
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return ret;
+}
+
 static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_i915_gem_sw_finish sw_finish;
 	int ret = 0;
 
 	if (bo == NULL)
@@ -1265,18 +1376,19 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
 	}
 
 	if (bo_gem->mapped_cpu_write) {
-#ifndef __OpenBSD__
+		struct drm_i915_gem_sw_finish sw_finish;
+
 		/* Cause a flush to happen if the buffer's pinned for
 		 * scanout, so the results show up in a timely manner.
 		 * Unlike GTT set domains, this only does work if the
 		 * buffer should be scanout-related.
 		 */
+		VG_CLEAR(sw_finish);
 		sw_finish.handle = bo_gem->gem_handle;
 		ret = drmIoctl(bufmgr_gem->fd,
 			       DRM_IOCTL_I915_GEM_SW_FINISH,
 			       &sw_finish);
 		ret = ret == -1 ? -errno : 0;
-#endif
 		bo_gem->mapped_cpu_write = false;
 	}
 
@@ -1286,6 +1398,7 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
 	 */
 	if (--bo_gem->map_count == 0) {
 		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
+		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
 		bo->virtual = NULL;
 	}
 	pthread_mutex_unlock(&bufmgr_gem->lock);
@@ -1307,7 +1420,7 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
 	struct drm_i915_gem_pwrite pwrite;
 	int ret;
 
-	memset(&pwrite, 0, sizeof(pwrite));
+	VG_CLEAR(pwrite);
 	pwrite.handle = bo_gem->gem_handle;
 	pwrite.offset = offset;
 	pwrite.size = size;
@@ -1332,6 +1445,7 @@ drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
 	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
 	int ret;
 
+	VG_CLEAR(get_pipe_from_crtc_id);
 	get_pipe_from_crtc_id.crtc_id = crtc_id;
 	ret = drmIoctl(bufmgr_gem->fd,
 		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
@@ -1358,7 +1472,7 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
 	struct drm_i915_gem_pread pread;
 	int ret;
 
-	memset(&pread, 0, sizeof(pread));
+	VG_CLEAR(pread);
 	pread.handle = bo_gem->gem_handle;
 	pread.offset = offset;
 	pread.size = size;
@@ -1384,6 +1498,58 @@ drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
 }
 
 /**
+ * Waits on a BO for the given amount of time.
+ *
+ * @bo: buffer object to wait for
+ * @timeout_ns: amount of time to wait in nanoseconds.
+ *   If value is less than 0, an infinite wait will occur.
+ *
+ * Returns 0 if the wait was successful ie. the last batch referencing the
+ * object has completed within the allotted time. Otherwise some negative return
+ * value describes the error. Of particular interest is -ETIME when the wait has
+ * failed to yield the desired result.
+ *
+ * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
+ * the operation to give up after a certain amount of time. Another subtle
+ * difference is the internal locking semantics are different (this variant does
+ * not hold the lock for the duration of the wait). This makes the wait subject
+ * to a larger userspace race window.
+ *
+ * The implementation shall wait until the object is no longer actively
+ * referenced within a batch buffer at the time of the call. The wait will
+ * not guarantee that the buffer is re-issued via another thread, or an flinked
+ * handle. Userspace must make sure this race does not occur if such precision
+ * is important.
+ */
+int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_wait wait;
+	int ret;
+
+	if (!bufmgr_gem->has_wait_timeout) {
+		DBG("%s:%d: Timed wait is not supported. Falling back to "
+		    "infinite wait\n", __FILE__, __LINE__);
+		if (timeout_ns) {
+			drm_intel_gem_bo_wait_rendering(bo);
+			return 0;
+		} else {
+			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
+		}
+	}
+
+	wait.bo_handle = bo_gem->gem_handle;
+	wait.timeout_ns = timeout_ns;
+	wait.flags = 0;
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
+	if (ret == -1)
+		return -errno;
+
+	return ret;
+}
+
+/**
  * Sets the object to the GTT read and possibly write domain, used by the X
  * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
  *
@@ -1398,6 +1564,7 @@ drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
 	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
+	VG_CLEAR(set_domain);
 	set_domain.handle = bo_gem->gem_handle;
 	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
 	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
@@ -1583,9 +1750,10 @@ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
 	assert(bo_gem->reloc_count >= start);
 	/* Unreference the cleared target buffers */
 	for (i = start; i < bo_gem->reloc_count; i++) {
-		if (bo_gem->reloc_target_info[i].bo != bo) {
-			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
-								  reloc_target_info[i].bo,
+		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
+		if (&target_bo_gem->bo != bo) {
+			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
+			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
 								  time.tv_sec);
 		}
 	}
@@ -1613,6 +1781,8 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
 		if (target_bo == bo)
 			continue;
 
+		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
+
 		/* Continue walking the tree depth-first. */
 		drm_intel_gem_bo_process_reloc(target_bo);
 
@@ -1638,6 +1808,8 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 		if (target_bo == bo)
 			continue;
 
+		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
+
 		/* Continue walking the tree depth-first. */
 		drm_intel_gem_bo_process_reloc2(target_bo);
 
@@ -1691,6 +1863,287 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
 	}
 }
 
+static void
+aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
+{
+	fwrite(&data, 1, 4, bufmgr_gem->aub_file);
+}
+
+static void
+aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
+{
+	fwrite(data, 1, size, bufmgr_gem->aub_file);
+}
+
+static void
+aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	uint32_t *data;
+	unsigned int i;
+
+	data = malloc(bo->size);
+	drm_intel_bo_get_subdata(bo, offset, size, data);
+
+	/* Easy mode: write out bo with no relocations */
+	if (!bo_gem->reloc_count) {
+		aub_out_data(bufmgr_gem, data, size);
+		free(data);
+		return;
+	}
+
+	/* Otherwise, handle the relocations while writing. */
+	for (i = 0; i < size / 4; i++) {
+		int r;
+		for (r = 0; r < bo_gem->reloc_count; r++) {
+			struct drm_i915_gem_relocation_entry *reloc;
+			drm_intel_reloc_target *info;
+
+			reloc = &bo_gem->relocs[r];
+			info = &bo_gem->reloc_target_info[r];
+
+			if (reloc->offset == offset + i * 4) {
+				drm_intel_bo_gem *target_gem;
+				uint32_t val;
+
+				target_gem = (drm_intel_bo_gem *)info->bo;
+
+				val = reloc->delta;
+				val += target_gem->aub_offset;
+
+				aub_out(bufmgr_gem, val);
+				data[i] = val;
+				break;
+			}
+		}
+		if (r == bo_gem->reloc_count) {
+			/* no relocation, just the data */
+			aub_out(bufmgr_gem, data[i]);
+		}
+	}
+
+	free(data);
+}
+
+static void
+aub_bo_get_address(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+	/* Give the object a graphics address in the AUB file.  We
+	 * don't just use the GEM object address because we do AUB
+	 * dumping before execution -- we want to successfully log
+	 * when the hardware might hang, and we might even want to aub
+	 * capture for a driver trying to execute on a different
+	 * generation of hardware by disabling the actual kernel exec
+	 * call.
+	 */
+	bo_gem->aub_offset = bufmgr_gem->aub_offset;
+	bufmgr_gem->aub_offset += bo->size;
+	/* XXX: Handle aperture overflow. */
+	assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
+}
+
+static void
+aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
+		      uint32_t offset, uint32_t size)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+	aub_out(bufmgr_gem,
+		CMD_AUB_TRACE_HEADER_BLOCK |
+		(5 - 2));
+	aub_out(bufmgr_gem,
+		AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
+	aub_out(bufmgr_gem, subtype);
+	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
+	aub_out(bufmgr_gem, size);
+	aub_write_bo_data(bo, offset, size);
+}
+
+/**
+ * Break up large objects into multiple writes.  Otherwise a 128kb VBO
+ * would overflow the 16 bits of size field in the packet header and
+ * everything goes badly after that.
+ */
+static void
+aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
+			    uint32_t offset, uint32_t size)
+{
+	uint32_t block_size;
+	uint32_t sub_offset;
+
+	for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
+		block_size = size - sub_offset;
+
+		if (block_size > 8 * 4096)
+			block_size = 8 * 4096;
+
+		aub_write_trace_block(bo, type, subtype, offset + sub_offset,
+				      block_size);
+	}
+}
+
+static void
+aub_write_bo(drm_intel_bo *bo)
+{
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	uint32_t offset = 0;
+	unsigned i;
+
+	aub_bo_get_address(bo);
+
+	/* Write out each annotated section separately. */
+	for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
+		drm_intel_aub_annotation *annotation =
+			&bo_gem->aub_annotations[i];
+		uint32_t ending_offset = annotation->ending_offset;
+		if (ending_offset > bo->size)
+			ending_offset = bo->size;
+		if (ending_offset > offset) {
+			aub_write_large_trace_block(bo, annotation->type,
+						    annotation->subtype,
+						    offset,
+						    ending_offset - offset);
+			offset = ending_offset;
+		}
+	}
+
+	/* Write out any remaining unannotated data */
+	if (offset < bo->size) {
+		aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
+					    offset, bo->size - offset);
+	}
+}
+
+/*
+ * Make a ringbuffer on fly and dump it
+ */
+static void
+aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
+			  uint32_t batch_buffer, int ring_flag)
+{
+	uint32_t ringbuffer[4096];
+	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
+	int ring_count = 0;
+
+	if (ring_flag == I915_EXEC_BSD)
+		ring = AUB_TRACE_TYPE_RING_PRB1;
+
+	/* Make a ring buffer to execute our batchbuffer. */
+	memset(ringbuffer, 0, sizeof(ringbuffer));
+	ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
+	ringbuffer[ring_count++] = batch_buffer;
+
+	/* Write out the ring.  This appears to trigger execution of
+	 * the ring in the simulator.
+	 */
+	aub_out(bufmgr_gem,
+		CMD_AUB_TRACE_HEADER_BLOCK |
+		(5 - 2));
+	aub_out(bufmgr_gem,
+		AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
+	aub_out(bufmgr_gem, 0); /* general/surface subtype */
+	aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
+	aub_out(bufmgr_gem, ring_count * 4);
+
+	/* FIXME: Need some flush operations here? */
+	aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
+
+	/* Update offset pointer */
+	bufmgr_gem->aub_offset += 4096;
+}
+
+void
+drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
+			      int x1, int y1, int width, int height,
+			      enum aub_dump_bmp_format format,
+			      int pitch, int offset)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+	uint32_t cpp;
+
+	switch (format) {
+	case AUB_DUMP_BMP_FORMAT_8BIT:
+		cpp = 1;
+		break;
+	case AUB_DUMP_BMP_FORMAT_ARGB_4444:
+		cpp = 2;
+		break;
+	case AUB_DUMP_BMP_FORMAT_ARGB_0888:
+	case AUB_DUMP_BMP_FORMAT_ARGB_8888:
+		cpp = 4;
+		break;
+	default:
+		printf("Unknown AUB dump format %d\n", format);
+		return;
+	}
+
+	if (!bufmgr_gem->aub_file)
+		return;
+
+	aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
+	aub_out(bufmgr_gem, (y1 << 16) | x1);
+	aub_out(bufmgr_gem,
+		(format << 24) |
+		(cpp << 19) |
+		pitch / 4);
+	aub_out(bufmgr_gem, (height << 16) | width);
+	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
+	aub_out(bufmgr_gem,
+		((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
+		((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
+}
+
+static void
+aub_exec(drm_intel_bo *bo, int ring_flag, int used)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	int i;
+	bool batch_buffer_needs_annotations;
+
+	if (!bufmgr_gem->aub_file)
+		return;
+
+	/* If batch buffer is not annotated, annotate it the best we
+	 * can.
+	 */
+	batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
+	if (batch_buffer_needs_annotations) {
+		drm_intel_aub_annotation annotations[2] = {
+			{ AUB_TRACE_TYPE_BATCH, 0, used },
+			{ AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
+		};
+		drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
+	}
+
+	/* Write out all buffers to AUB memory */
+	for (i = 0; i < bufmgr_gem->exec_count; i++) {
+		aub_write_bo(bufmgr_gem->exec_bos[i]);
+	}
+
+	/* Remove any annotations we added */
+	if (batch_buffer_needs_annotations)
+		drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
+
+	/* Dump ring buffer */
+	aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
+
+	fflush(bufmgr_gem->aub_file);
+
+	/*
+	 * One frame has been dumped. So reset the aub_offset for the next frame.
+	 *
+	 * FIXME: Can we do this?
+	 */
+	bufmgr_gem->aub_offset = 0x10000;
+}
+
 #ifndef __OpenBSD__
 static int
 drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
@@ -1713,6 +2166,7 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
 	 */
 	drm_intel_add_validate_buffer(bo);
 
+	VG_CLEAR(execbuf);
 	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
 	execbuf.buffer_count = bufmgr_gem->exec_count;
 	execbuf.batch_start_offset = 0;
@@ -1760,13 +2214,14 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
 #endif
 
 static int
-drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
-			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
-			unsigned int flags)
+do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
+	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
+	 unsigned int flags)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
 	struct drm_i915_gem_execbuffer2 execbuf;
-	int ret, i;
+	int ret = 0;
+	int i;
 
 	switch (flags & 0x7) {
 	default:
@@ -1793,6 +2248,7 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
 	 */
 	drm_intel_add_validate_buffer2(bo, 0);
 
+	VG_CLEAR(execbuf);
 	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
 	execbuf.buffer_count = bufmgr_gem->exec_count;
 	execbuf.batch_start_offset = 0;
@@ -1804,9 +2260,17 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
 	execbuf.DR4 = DR4;
 #endif
 	execbuf.flags = flags;
-	execbuf.rsvd1 = 0;
+	if (ctx == NULL)
+		i915_execbuffer2_set_context_id(execbuf, 0);
+	else
+		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
 	execbuf.rsvd2 = 0;
 
+	aub_exec(bo, flags, used);
+
+	if (bufmgr_gem->no_exec)
+		goto skip_execution;
+
 	ret = drmIoctl(bufmgr_gem->fd,
 		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
 		       &execbuf);
@@ -1824,6 +2288,7 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
 	}
 	drm_intel_update_buffer_offsets2(bufmgr_gem);
 
+skip_execution:
 	if (bufmgr_gem->bufmgr.debug)
 		drm_intel_gem_dump_validation_list(bufmgr_gem);
 
@@ -1846,9 +2311,24 @@ drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
 		       drm_clip_rect_t *cliprects, int num_cliprects,
 		       int DR4)
 {
-	return drm_intel_gem_bo_mrb_exec2(bo, used,
-					cliprects, num_cliprects, DR4,
-					I915_EXEC_RENDER);
+	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
+			I915_EXEC_RENDER);
+}
+
+static int
+drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
+			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
+			unsigned int flags)
+{
+	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
+			flags);
+}
+
+int
+drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
+			      int used, unsigned int flags)
+{
+	return do_exec2(bo, used, ctx, NULL, 0, 0, flags);
 }
 
 static int
@@ -1859,7 +2339,7 @@ drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
 	struct drm_i915_gem_pin pin;
 	int ret;
 
-	memset(&pin, 0, sizeof(pin));
+	VG_CLEAR(pin);
 	pin.handle = bo_gem->gem_handle;
 	pin.alignment = alignment;
 
@@ -1881,7 +2361,7 @@ drm_intel_gem_bo_unpin(drm_intel_bo *bo)
 	struct drm_i915_gem_unpin unpin;
 	int ret;
 
-	memset(&unpin, 0, sizeof(unpin));
+	VG_CLEAR(unpin);
 	unpin.handle = bo_gem->gem_handle;
 
 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
@@ -1962,21 +2442,92 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
 	return 0;
 }
 
+drm_intel_bo *
+drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+	int ret;
+	uint32_t handle;
+	drm_intel_bo_gem *bo_gem;
+	struct drm_i915_gem_get_tiling get_tiling;
+
+	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
+	if (ret) {
+	  fprintf(stderr,"ret is %d %d\n", ret, errno);
+		return NULL;
+	}
+
+	bo_gem = calloc(1, sizeof(*bo_gem));
+	if (!bo_gem)
+		return NULL;
+
+	bo_gem->bo.size = size;
+	bo_gem->bo.handle = handle;
+	bo_gem->bo.bufmgr = bufmgr;
+
+	bo_gem->gem_handle = handle;
+
+	atomic_set(&bo_gem->refcount, 1);
+
+	bo_gem->name = "prime";
+	bo_gem->validate_index = -1;
+	bo_gem->reloc_tree_fences = 0;
+	bo_gem->used_as_reloc_target = false;
+	bo_gem->has_error = false;
+	bo_gem->reusable = false;
+
+	DRMINITLISTHEAD(&bo_gem->name_list);
+	DRMINITLISTHEAD(&bo_gem->vma_list);
+
+	VG_CLEAR(get_tiling);
+	get_tiling.handle = bo_gem->gem_handle;
+	ret = drmIoctl(bufmgr_gem->fd,
+		       DRM_IOCTL_I915_GEM_GET_TILING,
+		       &get_tiling);
+	if (ret != 0) {
+		drm_intel_gem_bo_unreference(&bo_gem->bo);
+		return NULL;
+	}
+	bo_gem->tiling_mode = get_tiling.tiling_mode;
+	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
+	/* XXX stride is unknown */
+	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
+
+	return &bo_gem->bo;
+}
+
+int
+drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
+			       DRM_CLOEXEC, prime_fd) != 0)
+		return -errno;
+
+	bo_gem->reusable = false;
+
+	return 0;
+}
+
 static int
 drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_gem_flink flink;
 	int ret;
 
 	if (!bo_gem->global_name) {
-		memset(&flink, 0, sizeof(flink));
+		struct drm_gem_flink flink;
+
+		VG_CLEAR(flink);
 		flink.handle = bo_gem->gem_handle;
 
 		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
 		if (ret != 0)
 			return -errno;
+
 		bo_gem->global_name = flink.name;
 		bo_gem->reusable = false;
 
@@ -2292,6 +2843,208 @@ drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
 }
 
 /**
+ * Get the PCI ID for the device.  This can be overridden by setting the
+ * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
+ */
+static int
+get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
+{
+	char *devid_override;
+	int devid;
+	int ret;
+	drm_i915_getparam_t gp;
+
+	if (geteuid() == getuid()) {
+		devid_override = getenv("INTEL_DEVID_OVERRIDE");
+		if (devid_override) {
+			bufmgr_gem->no_exec = true;
+			return strtod(devid_override, NULL);
+		}
+	}
+
+	VG_CLEAR(devid);
+	VG_CLEAR(gp);
+	gp.param = I915_PARAM_CHIPSET_ID;
+	gp.value = &devid;
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	if (ret) {
+		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
+		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
+	}
+	return devid;
+}
+
+int
+drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+
+	return bufmgr_gem->pci_device;
+}
+
+/**
+ * Sets up AUB dumping.
+ *
+ * This is a trace file format that can be used with the simulator.
+ * Packets are emitted in a format somewhat like GPU command packets.
+ * You can set up a GTT and upload your objects into the referenced
+ * space, then send off batchbuffers and get BMPs out the other end.
+ */
+void
+drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+	int entry = 0x200003;
+	int i;
+	int gtt_size = 0x10000;
+
+	if (!enable) {
+		if (bufmgr_gem->aub_file) {
+			fclose(bufmgr_gem->aub_file);
+			bufmgr_gem->aub_file = NULL;
+		}
+	}
+
+	if (geteuid() != getuid())
+		return;
+
+	bufmgr_gem->aub_file = fopen("intel.aub", "w+");
+	if (!bufmgr_gem->aub_file)
+		return;
+
+	/* Start allocating objects from just after the GTT. */
+	bufmgr_gem->aub_offset = gtt_size;
+
+	/* Start with a (required) version packet. */
+	aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
+	aub_out(bufmgr_gem,
+		(4 << AUB_HEADER_MAJOR_SHIFT) |
+		(0 << AUB_HEADER_MINOR_SHIFT));
+	for (i = 0; i < 8; i++) {
+		aub_out(bufmgr_gem, 0); /* app name */
+	}
+	aub_out(bufmgr_gem, 0); /* timestamp */
+	aub_out(bufmgr_gem, 0); /* timestamp */
+	aub_out(bufmgr_gem, 0); /* comment len */
+
+	/* Set up the GTT. The max we can handle is 256M */
+	aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2));
+	aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE);
+	aub_out(bufmgr_gem, 0); /* subtype */
+	aub_out(bufmgr_gem, 0); /* offset */
+	aub_out(bufmgr_gem, gtt_size); /* size */
+	for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
+		aub_out(bufmgr_gem, entry);
+	}
+}
+
+drm_intel_context *
+drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+	struct drm_i915_gem_context_create create;
+	drm_intel_context *context = NULL;
+	int ret;
+
+	VG_CLEAR(create);
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
+	if (ret != 0) {
+		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
+		    strerror(errno));
+		return NULL;
+	}
+
+	context = calloc(1, sizeof(*context));
+	context->ctx_id = create.ctx_id;
+	context->bufmgr = bufmgr;
+
+	return context;
+}
+
+void
+drm_intel_gem_context_destroy(drm_intel_context *ctx)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem;
+	struct drm_i915_gem_context_destroy destroy;
+	int ret;
+
+	if (ctx == NULL)
+		return;
+
+	VG_CLEAR(destroy);
+
+	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
+	destroy.ctx_id = ctx->ctx_id;
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
+		       &destroy);
+	if (ret != 0)
+		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
+			strerror(errno));
+
+	free(ctx);
+}
+
+int
+drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
+		   uint32_t offset,
+		   uint64_t *result)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+	struct drm_i915_reg_read reg_read;
+	int ret;
+
+	VG_CLEAR(reg_read);
+	reg_read.offset = offset;
+
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
+
+	*result = reg_read.val;
+	return ret;
+}
+
+
+/**
+ * Annotate the given bo for use in aub dumping.
+ *
+ * \param annotations is an array of drm_intel_aub_annotation objects
+ * describing the type of data in various sections of the bo.  Each
+ * element of the array specifies the type and subtype of a section of
+ * the bo, and the past-the-end offset of that section.  The elements
+ * of \c annotations must be sorted so that ending_offset is
+ * increasing.
+ *
+ * \param count is the number of elements in the \c annotations array.
+ * If \c count is zero, then \c annotations will not be dereferenced.
+ *
+ * Annotations are copied into a private data structure, so caller may
+ * re-use the memory pointed to by \c annotations after the call
+ * returns.
+ *
+ * Annotations are stored for the lifetime of the bo; to reset to the
+ * default state (no annotations), call this function with a \c count
+ * of zero.
+ */
+void
+drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
+					 drm_intel_aub_annotation *annotations,
+					 unsigned count)
+{
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	unsigned size = sizeof(*annotations) * count;
+	drm_intel_aub_annotation *new_annotations =
+		count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL;
+	if (new_annotations == NULL) {
+		free(bo_gem->aub_annotations);
+		bo_gem->aub_annotations = NULL;
+		bo_gem->aub_annotation_count = 0;
+		return;
+	}
+	memcpy(new_annotations, annotations, size);
+	bo_gem->aub_annotations = new_annotations;
+	bo_gem->aub_annotation_count = count;
+}
+
+/**
  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
  * and manage map buffer objections.
  *
@@ -2335,13 +3088,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 			(int)bufmgr_gem->gtt_size / 1024);
 	}
 
-	gp.param = I915_PARAM_CHIPSET_ID;
-	gp.value = &bufmgr_gem->pci_device;
-	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	if (ret) {
-		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
-		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
-	}
+	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
 
 	if (IS_GEN2(bufmgr_gem->pci_device))
 		bufmgr_gem->gen = 2;
@@ -2354,9 +3101,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 	else if (IS_GEN6(bufmgr_gem->pci_device))
 		bufmgr_gem->gen = 6;
 	else if (IS_GEN7(bufmgr_gem->pci_device))
-        	bufmgr_gem->gen = 7;
-	else
-        	assert(0);
+		bufmgr_gem->gen = 7;
+	else {
+		free(bufmgr_gem);
+		return NULL;
+	}
 
 	if (IS_GEN3(bufmgr_gem->pci_device) &&
 	    bufmgr_gem->gtt_size > 256*1024*1024) {
@@ -2367,6 +3116,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 		bufmgr_gem->gtt_size -= 256*1024*1024;
 	}
 
+	VG_CLEAR(gp);
 	gp.value = &tmp;
 
 #ifndef __OpenBSD__
@@ -2388,20 +3138,20 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
 	bufmgr_gem->has_relaxed_fencing = ret == 0;
 
-#ifndef __OpenBSD__
+	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	bufmgr_gem->has_wait_timeout = ret == 0;
+
 	gp.param = I915_PARAM_HAS_LLC;
 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	if (ret == -EINVAL) {
-#endif
+	if (ret != 0) {
 		/* Kernel does not supports HAS_LLC query, fallback to GPU
 		 * generation detection and assume that we have LLC on GEN6/7
 		 */
 		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
 				IS_GEN7(bufmgr_gem->pci_device));
-#ifndef __OpenBSD__
 	} else
-		bufmgr_gem->has_llc = ret == 0;
-#endif
+		bufmgr_gem->has_llc = *gp.value;
 
 	if (bufmgr_gem->gen < 4) {
 		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
diff --git a/lib/libdrm/intel/intel_bufmgr_priv.h b/lib/libdrm/intel/intel_bufmgr_priv.h
index 0b625200a..2592d42d5 100644
--- a/lib/libdrm/intel/intel_bufmgr_priv.h
+++ b/lib/libdrm/intel/intel_bufmgr_priv.h
@@ -280,6 +280,11 @@ struct _drm_intel_bufmgr {
 	int debug;
 };
 
+struct _drm_intel_context {
+	unsigned int ctx_id;
+	struct _drm_intel_bufmgr *bufmgr;
+};
+
 #define ALIGN(value, alignment)	((value + alignment - 1) & ~(alignment - 1))
 #define ROUND_UP_TO(x, y)	(((x) + (y) - 1) / (y) * (y))
 #define ROUND_UP_TO_MB(x)	ROUND_UP_TO((x), 1024*1024)
diff --git a/lib/libdrm/intel/intel_chipset.h b/lib/libdrm/intel/intel_chipset.h
index e3a30fc7d..ebec2f8ad 100644
--- a/lib/libdrm/intel/intel_chipset.h
+++ b/lib/libdrm/intel/intel_chipset.h
@@ -44,6 +44,49 @@
 #define PCI_CHIP_IVYBRIDGE_M_GT1	0x0156 /* mobile */
 #define PCI_CHIP_IVYBRIDGE_M_GT2	0x0166
 #define PCI_CHIP_IVYBRIDGE_S		0x015a /* server */
+#define PCI_CHIP_IVYBRIDGE_S_GT2	0x016a /* server */
+
+#define PCI_CHIP_HASWELL_GT1            0x0402 /* Desktop */
+#define PCI_CHIP_HASWELL_GT2            0x0412
+#define PCI_CHIP_HASWELL_GT2_PLUS       0x0422
+#define PCI_CHIP_HASWELL_M_GT1          0x0406 /* Mobile */
+#define PCI_CHIP_HASWELL_M_GT2          0x0416
+#define PCI_CHIP_HASWELL_M_GT2_PLUS     0x0426
+#define PCI_CHIP_HASWELL_S_GT1          0x040A /* Server */
+#define PCI_CHIP_HASWELL_S_GT2          0x041A
+#define PCI_CHIP_HASWELL_S_GT2_PLUS     0x042A
+#define PCI_CHIP_HASWELL_SDV_GT1        0x0C02 /* Desktop */
+#define PCI_CHIP_HASWELL_SDV_GT2        0x0C12
+#define PCI_CHIP_HASWELL_SDV_GT2_PLUS   0x0C22
+#define PCI_CHIP_HASWELL_SDV_M_GT1      0x0C06 /* Mobile */
+#define PCI_CHIP_HASWELL_SDV_M_GT2      0x0C16
+#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26
+#define PCI_CHIP_HASWELL_SDV_S_GT1      0x0C0A /* Server */
+#define PCI_CHIP_HASWELL_SDV_S_GT2      0x0C1A
+#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A
+#define PCI_CHIP_HASWELL_ULT_GT1        0x0A02 /* Desktop */
+#define PCI_CHIP_HASWELL_ULT_GT2        0x0A12
+#define PCI_CHIP_HASWELL_ULT_GT2_PLUS   0x0A22
+#define PCI_CHIP_HASWELL_ULT_M_GT1      0x0A06 /* Mobile */
+#define PCI_CHIP_HASWELL_ULT_M_GT2      0x0A16
+#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
+#define PCI_CHIP_HASWELL_ULT_S_GT1      0x0A0A /* Server */
+#define PCI_CHIP_HASWELL_ULT_S_GT2      0x0A1A
+#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
+#define PCI_CHIP_HASWELL_CRW_GT1        0x0D12 /* Desktop */
+#define PCI_CHIP_HASWELL_CRW_GT2        0x0D22
+#define PCI_CHIP_HASWELL_CRW_GT2_PLUS   0x0D32
+#define PCI_CHIP_HASWELL_CRW_M_GT1      0x0D16 /* Mobile */
+#define PCI_CHIP_HASWELL_CRW_M_GT2      0x0D26
+#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
+#define PCI_CHIP_HASWELL_CRW_S_GT1      0x0D1A /* Server */
+#define PCI_CHIP_HASWELL_CRW_S_GT2      0x0D2A
+#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
+
+#define PCI_CHIP_VALLEYVIEW_PO		0x0f30 /* power on board */
+#define PCI_CHIP_VALLEYVIEW_1		0x0f31
+#define PCI_CHIP_VALLEYVIEW_2		0x0f32
+#define PCI_CHIP_VALLEYVIEW_3		0x0f33
 
 #define IS_830(dev) (dev == 0x3577)
 #define IS_845(dev) (dev == 0x2562)
@@ -84,6 +127,11 @@
 
 #define IS_I965GM(dev) (dev == 0x2A02)
 
+#define IS_VALLEYVIEW(dev) (((dev) == PCI_CHIP_VALLEYVIEW_PO) ||	\
+			    ((dev) == PCI_CHIP_VALLEYVIEW_1) ||		\
+			    ((dev) == PCI_CHIP_VALLEYVIEW_2) ||		\
+			    ((dev) == PCI_CHIP_VALLEYVIEW_3))
+
 #define IS_GEN4(dev) (dev == 0x2972 ||	\
 		      dev == 0x2982 ||	\
 		      dev == 0x2992 ||	\
@@ -115,11 +163,56 @@
 			 dev == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
 			 dev == PCI_CHIP_SANDYBRIDGE_S)
 
-#define IS_GEN7(dev)	(dev == PCI_CHIP_IVYBRIDGE_GT1 || \
-			 dev == PCI_CHIP_IVYBRIDGE_GT2 || \
-			 dev == PCI_CHIP_IVYBRIDGE_M_GT1 || \
-			 dev == PCI_CHIP_IVYBRIDGE_M_GT2 || \
-			 dev == PCI_CHIP_IVYBRIDGE_S)
+#define IS_GEN7(devid)          (IS_IVYBRIDGE(devid) || \
+                                 IS_HASWELL(devid) || \
+				 IS_VALLEYVIEW(devid))
+
+#define IS_IVYBRIDGE(dev)	(dev == PCI_CHIP_IVYBRIDGE_GT1 || \
+				 dev == PCI_CHIP_IVYBRIDGE_GT2 || \
+				 dev == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+				 dev == PCI_CHIP_IVYBRIDGE_M_GT2 || \
+				 dev == PCI_CHIP_IVYBRIDGE_S || \
+				 dev == PCI_CHIP_IVYBRIDGE_S_GT2)
+
+#define IS_HSW_GT1(devid)       (devid == PCI_CHIP_HASWELL_GT1 || \
+				 devid == PCI_CHIP_HASWELL_M_GT1 || \
+				 devid == PCI_CHIP_HASWELL_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_SDV_GT1 || \
+				 devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \
+				 devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_ULT_GT1 || \
+				 devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \
+				 devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_CRW_GT1 || \
+				 devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \
+				 devid == PCI_CHIP_HASWELL_CRW_S_GT1)
+#define IS_HSW_GT2(devid)       (devid == PCI_CHIP_HASWELL_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_M_GT2 || \
+				 devid == PCI_CHIP_HASWELL_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_SDV_GT2 || \
+				 devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \
+				 devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_ULT_GT2 || \
+				 devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \
+				 devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_CRW_GT2 || \
+				 devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \
+				 devid == PCI_CHIP_HASWELL_CRW_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_M_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_S_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_SDV_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_SDV_M_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_SDV_S_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_ULT_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_ULT_M_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_ULT_S_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_CRW_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_CRW_M_GT2_PLUS || \
+				 devid == PCI_CHIP_HASWELL_CRW_S_GT2_PLUS)
+
+#define IS_HASWELL(devid)       (IS_HSW_GT1(devid) || \
+                                 IS_HSW_GT2(devid))
 
 #define IS_G4X(dev) (dev == 0x2E02 || \
                      dev == 0x2E12 || \
diff --git a/lib/libdrm/intel/shlib_version b/lib/libdrm/intel/shlib_version
index b52599a16..c6e3f4d3f 100644
--- a/lib/libdrm/intel/shlib_version
+++ b/lib/libdrm/intel/shlib_version
@@ -1,2 +1,2 @@
 major=2
-minor=0
+minor=1