68 files changed, 3947 insertions, 665 deletions
diff --git a/lib/libdrm/amdgpu/Makefile.sources b/lib/libdrm/amdgpu/Makefile.sources
index bc3abaa61..498b64cc3 100644
--- a/lib/libdrm/amdgpu/Makefile.sources
+++ b/lib/libdrm/amdgpu/Makefile.sources
@@ -6,6 +6,7 @@ LIBDRM_AMDGPU_FILES := \
 	amdgpu_gpu_info.c \
 	amdgpu_internal.h \
 	amdgpu_vamgr.c \
+	amdgpu_vm.c \
 	util_hash.c \
 	util_hash.h \
 	util_hash_table.c \
diff --git a/lib/libdrm/amdgpu/amdgpu-symbol-check b/lib/libdrm/amdgpu/amdgpu-symbol-check
index c5b85b52b..3f298d13c 100755
--- a/lib/libdrm/amdgpu/amdgpu-symbol-check
+++ b/lib/libdrm/amdgpu/amdgpu-symbol-check
@@ -29,17 +29,25 @@ amdgpu_cs_chunk_fence_info_to_data
 amdgpu_cs_chunk_fence_to_dep
 amdgpu_cs_create_semaphore
 amdgpu_cs_create_syncobj
+amdgpu_cs_create_syncobj2
 amdgpu_cs_ctx_create
+amdgpu_cs_ctx_create2
 amdgpu_cs_ctx_free
 amdgpu_cs_destroy_semaphore
 amdgpu_cs_destroy_syncobj
 amdgpu_cs_export_syncobj
+amdgpu_cs_fence_to_handle
 amdgpu_cs_import_syncobj
 amdgpu_cs_query_fence_status
 amdgpu_cs_query_reset_state
 amdgpu_cs_signal_semaphore
 amdgpu_cs_submit
 amdgpu_cs_submit_raw
+amdgpu_cs_syncobj_export_sync_file
+amdgpu_cs_syncobj_import_sync_file
+amdgpu_cs_syncobj_reset
+amdgpu_cs_syncobj_signal
+amdgpu_cs_syncobj_wait
 amdgpu_cs_wait_fences
 amdgpu_cs_wait_semaphore
 amdgpu_device_deinitialize
@@ -59,6 +67,8 @@ amdgpu_read_mm_registers
 amdgpu_va_range_alloc
 amdgpu_va_range_free
 amdgpu_va_range_query
+amdgpu_vm_reserve_vmid
+amdgpu_vm_unreserve_vmid
 EOF
 done)
 
diff --git a/lib/libdrm/amdgpu/amdgpu.h b/lib/libdrm/amdgpu/amdgpu.h
index 238b1aaa9..2eb03bf15 100644
--- a/lib/libdrm/amdgpu/amdgpu.h
+++ b/lib/libdrm/amdgpu/amdgpu.h
@@ -798,8 +798,9 @@ int amdgpu_bo_list_update(amdgpu_bo_list_handle handle,
  * context will always be executed in order (first come, first serve).
  *
  *
- * \param   dev	    - \c [in] Device handle. See #amdgpu_device_initialize()
- * \param   context - \c [out] GPU Context handle
+ * \param   dev      - \c [in] Device handle. See #amdgpu_device_initialize()
+ * \param   priority - \c [in] Context creation flags. See AMDGPU_CTX_PRIORITY_*
+ * \param   context  - \c [out] GPU Context handle
  *
  * \return   0 on success\n
  *          <0 - Negative POSIX Error code
@@ -807,6 +808,18 @@ int amdgpu_bo_list_update(amdgpu_bo_list_handle handle,
  * \sa amdgpu_cs_ctx_free()
  *
 */
+int amdgpu_cs_ctx_create2(amdgpu_device_handle dev,
+			 uint32_t priority,
+			 amdgpu_context_handle *context);
+/**
+ * Create GPU execution Context
+ *
+ * Refer to amdgpu_cs_ctx_create2 for full documentation. This call
+ * is missing the priority parameter.
+ *
+ * \sa amdgpu_cs_ctx_create2()
+ *
+*/
 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
 			 amdgpu_context_handle *context);
 
@@ -1331,6 +1344,21 @@ const char *amdgpu_get_marketing_name(amdgpu_device_handle dev);
 /**
  *  Create kernel sync object
  *
+ * \param   dev         - \c [in]  device handle
+ * \param   flags       - \c [in]  flags that affect creation
+ * \param   syncobj     - \c [out] sync object handle
+ *
+ * \return   0 on success\n
+ *          <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_cs_create_syncobj2(amdgpu_device_handle dev,
+			      uint32_t  flags,
+			      uint32_t *syncobj);
+
+/**
+ *  Create kernel sync object
+ *
  * \param   dev	      - \c [in]  device handle
  * \param   syncobj   - \c [out] sync object handle
  *
@@ -1354,6 +1382,54 @@ int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
 			      uint32_t syncobj);
 
 /**
+ * Reset kernel sync objects to unsignalled state.
+ *
+ * \param dev           - \c [in] device handle
+ * \param syncobjs      - \c [in] array of sync object handles
+ * \param syncobj_count - \c [in] number of handles in syncobjs
+ *
+ * \return   0 on success\n
+ *          <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_cs_syncobj_reset(amdgpu_device_handle dev,
+			    const uint32_t *syncobjs, uint32_t syncobj_count);
+
+/**
+ * Signal kernel sync objects.
+ *
+ * \param dev           - \c [in] device handle
+ * \param syncobjs      - \c [in] array of sync object handles
+ * \param syncobj_count - \c [in] number of handles in syncobjs
+ *
+ * \return   0 on success\n
+ *          <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_cs_syncobj_signal(amdgpu_device_handle dev,
+			     const uint32_t *syncobjs, uint32_t syncobj_count);
+
+/**
+ *  Wait for one or all sync objects to signal.
+ *
+ * \param   dev	    - \c [in] self-explanatory
+ * \param   handles - \c [in] array of sync object handles
+ * \param   num_handles - \c [in] self-explanatory
+ * \param   timeout_nsec - \c [in] self-explanatory
+ * \param   flags   - \c [in] a bitmask of DRM_SYNCOBJ_WAIT_FLAGS_*
+ * \param   first_signaled - \c [in] self-explanatory
+ *
+ * \return   0 on success\n
+ *          -ETIME - Timeout
+ *          <0 - Negative POSIX Error code
+ *
+ */
+int amdgpu_cs_syncobj_wait(amdgpu_device_handle dev,
+			   uint32_t *handles, unsigned num_handles,
+			   int64_t timeout_nsec, unsigned flags,
+			   uint32_t *first_signaled);
+
+/**
  *  Export kernel sync object to shareable fd.
  *
  * \param   dev	       - \c [in] device handle
@@ -1383,6 +1459,50 @@ int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
 			     uint32_t *syncobj);
 
 /**
+ *  Export kernel sync object to a sync_file.
+ *
+ * \param   dev	       - \c [in] device handle
+ * \param   syncobj    - \c [in] sync object handle
+ * \param   sync_file_fd - \c [out] sync_file file descriptor.
+ *
+ * \return   0 on success\n
+ *          <0 - Negative POSIX Error code
+ *
+ */
+int amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,
+				       uint32_t syncobj,
+				       int *sync_file_fd);
+
+/**
+ *  Import kernel sync object from a sync_file.
+ *
+ * \param   dev	       - \c [in] device handle
+ * \param   syncobj    - \c [in] sync object handle
+ * \param   sync_file_fd - \c [in] sync_file file descriptor.
+ *
+ * \return   0 on success\n
+ *          <0 - Negative POSIX Error code
+ *
+ */
+int amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,
+				       uint32_t syncobj,
+				       int sync_file_fd);
+
+/**
+ * Export an amdgpu fence as a handle (syncobj or fd).
+ *
+ * \param what		AMDGPU_FENCE_TO_HANDLE_GET_{SYNCOBJ, FD}
+ * \param out_handle	returned handle
+ *
+ * \return   0 on success\n
+ *          <0 - Negative POSIX Error code
+ */
+int amdgpu_cs_fence_to_handle(amdgpu_device_handle dev,
+			      struct amdgpu_cs_fence *fence,
+			      uint32_t what,
+			      uint32_t *out_handle);
+
+/**
  *  Submit raw command submission to kernel
  *
  * \param   dev	       - \c [in] device handle
@@ -1412,6 +1532,24 @@ void amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence *fence,
 void amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info *fence_info,
 					struct drm_amdgpu_cs_chunk_data *data);
 
+/**
+ * Reserve VMID
+ * \param   context - \c [in]  GPU Context
+ * \param   flags - \c [in]  TBD
+ *
+ * \return  0 on success otherwise POSIX Error code
+*/
+int amdgpu_vm_reserve_vmid(amdgpu_device_handle dev, uint32_t flags);
+
+/**
+ * Free reserved VMID
+ * \param   context - \c [in]  GPU Context
+ * \param   flags - \c [in]  TBD
+ *
+ * \return  0 on success otherwise POSIX Error code
+*/
+int amdgpu_vm_unreserve_vmid(amdgpu_device_handle dev, uint32_t flags);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/libdrm/amdgpu/amdgpu_asic_id.c b/lib/libdrm/amdgpu/amdgpu_asic_id.c
index 3a88896b5..0c8925e5d 100644
--- a/lib/libdrm/amdgpu/amdgpu_asic_id.c
+++ b/lib/libdrm/amdgpu/amdgpu_asic_id.c
@@ -38,70 +38,69 @@
 #include "amdgpu_drm.h"
 #include "amdgpu_internal.h"
 
-static int parse_one_line(const char *line, struct amdgpu_asic_id *id)
+static int parse_one_line(struct amdgpu_device *dev, const char *line)
 {
 	char *buf, *saveptr;
 	char *s_did;
+	uint32_t did;
 	char *s_rid;
+	uint32_t rid;
 	char *s_name;
 	char *endptr;
-	int r = 0;
+	int r = -EINVAL;
+
+	/* ignore empty line and commented line */
+	if (strlen(line) == 0 || line[0] == '#')
+		return -EAGAIN;
 
 	buf = strdup(line);
 	if (!buf)
 		return -ENOMEM;
 
-	/* ignore empty line and commented line */
-	if (strlen(line) == 0 || line[0] == '#') {
-		r = -EAGAIN;
-		goto out;
-	}
-
 	/* device id */
 	s_did = strtok_r(buf, ",", &saveptr);
-	if (!s_did) {
-		r = -EINVAL;
+	if (!s_did)
 		goto out;
-	}
 
-	id->did = strtol(s_did, &endptr, 16);
-	if (*endptr) {
-		r = -EINVAL;
+	did = strtol(s_did, &endptr, 16);
+	if (*endptr)
+		goto out;
+
+	if (did != dev->info.asic_id) {
+		r = -EAGAIN;
 		goto out;
 	}
 
 	/* revision id */
 	s_rid = strtok_r(NULL, ",", &saveptr);
-	if (!s_rid) {
-		r = -EINVAL;
+	if (!s_rid)
 		goto out;
-	}
 
-	id->rid = strtol(s_rid, &endptr, 16);
-	if (*endptr) {
-		r = -EINVAL;
+	rid = strtol(s_rid, &endptr, 16);
+	if (*endptr)
+		goto out;
+
+	if (rid != dev->info.pci_rev_id) {
+		r = -EAGAIN;
 		goto out;
 	}
 
 	/* marketing name */
 	s_name = strtok_r(NULL, ",", &saveptr);
-	if (!s_name) {
-		r = -EINVAL;
+	if (!s_name)
 		goto out;
-	}
+
 	/* trim leading whitespaces or tabs */
 	while (isblank(*s_name))
 		s_name++;
-	if (strlen(s_name) == 0) {
-		r = -EINVAL;
+	if (strlen(s_name) == 0)
 		goto out;
-	}
 
-	id->marketing_name = strdup(s_name);
-	if (id->marketing_name == NULL) {
-		r = -EINVAL;
-		goto out;
-	}
+	dev->marketing_name = strdup(s_name);
+	if (dev->marketing_name)
+		r = 0;
+	else
+		r = -ENOMEM;
 
 out:
 	free(buf);
@@ -109,31 +108,20 @@ out:
 	return r;
 }
 
-int amdgpu_parse_asic_ids(struct amdgpu_asic_id **p_asic_id_table)
+void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
 {
-	struct amdgpu_asic_id *asic_id_table;
-	struct amdgpu_asic_id *id;
 	FILE *fp;
 	char *line = NULL;
 	size_t len = 0;
 	ssize_t n;
 	int line_num = 1;
-	size_t table_size = 0;
-	size_t table_max_size = AMDGPU_ASIC_ID_TABLE_NUM_ENTRIES;
 	int r = 0;
 
 	fp = fopen(AMDGPU_ASIC_ID_TABLE, "r");
 	if (!fp) {
 		fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE,
 			strerror(errno));
-		return -EINVAL;
-	}
-
-	asic_id_table = calloc(table_max_size + 1,
-			       sizeof(struct amdgpu_asic_id));
-	if (!asic_id_table) {
-		r = -ENOMEM;
-		goto close;
+		return;
 	}
 
 	/* 1st valid line is file version */
@@ -153,67 +141,25 @@ int amdgpu_parse_asic_ids(struct amdgpu_asic_id **p_asic_id_table)
 	}
 
 	while ((n = getline(&line, &len, fp)) != -1) {
-		if (table_size > table_max_size) {
-			/* double table size */
-			table_max_size *= 2;
-			id = realloc(asic_id_table, (table_max_size + 1) *
-				     sizeof(struct amdgpu_asic_id));
-			if (!id) {
-				r = -ENOMEM;
-				goto free;
-			}
-                        asic_id_table = id;
-		}
-
-		id = asic_id_table + table_size;
-
 		/* trim trailing newline */
 		if (line[n - 1] == '\n')
 			line[n - 1] = '\0';
 
-		r = parse_one_line(line, id);
-		if (r) {
-			if (r == -EAGAIN) {
-				line_num++;
-				continue;
-			}
-			fprintf(stderr, "Invalid format: %s: line %d: %s\n",
-				AMDGPU_ASIC_ID_TABLE, line_num, line);
-			goto free;
-		}
+		r = parse_one_line(dev, line);
+		if (r != -EAGAIN)
+			break;
 
 		line_num++;
-		table_size++;
 	}
 
-	/* end of table */
-	id = asic_id_table + table_size;
-	memset(id, 0, sizeof(struct amdgpu_asic_id));
-
-	if (table_size != table_max_size) {
-		id = realloc(asic_id_table, (table_size + 1) *
-			     sizeof(struct amdgpu_asic_id));
-		if (!id)
-			r = -ENOMEM;
-		else
-			asic_id_table = id;
-        }
+	if (r == -EINVAL) {
+		fprintf(stderr, "Invalid format: %s: line %d: %s\n",
+			AMDGPU_ASIC_ID_TABLE, line_num, line);
+	} else if (r) {
+		fprintf(stderr, "%s: Cannot parse ASIC IDs: %s\n",
+			__func__, strerror(-r));
+	}
 
-free:
 	free(line);
-
-	if (r && asic_id_table) {
-		while (table_size--) {
-			id = asic_id_table + table_size;
-			free(id->marketing_name);
-		}
-		free(asic_id_table);
-		asic_id_table = NULL;
-	}
-close:
 	fclose(fp);
-
-	*p_asic_id_table = asic_id_table;
-
-	return r;
 }
diff --git a/lib/libdrm/amdgpu/amdgpu_cs.c b/lib/libdrm/amdgpu/amdgpu_cs.c
index dfba8754f..987daa403 100644
--- a/lib/libdrm/amdgpu/amdgpu_cs.c
+++ b/lib/libdrm/amdgpu/amdgpu_cs.c
@@ -46,13 +46,14 @@ static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
 /**
  * Create command submission context
  *
- * \param   dev - \c [in] amdgpu device handle
- * \param   context - \c [out] amdgpu context handle
+ * \param   dev      - \c [in] Device handle. See #amdgpu_device_initialize()
+ * \param   priority - \c [in] Context creation flags. See AMDGPU_CTX_PRIORITY_*
+ * \param   context  - \c [out] GPU Context handle
  *
  * \return  0 on success otherwise POSIX Error code
 */
-int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
-			 amdgpu_context_handle *context)
+int amdgpu_cs_ctx_create2(amdgpu_device_handle dev, uint32_t priority,
+							amdgpu_context_handle *context)
 {
 	struct amdgpu_context *gpu_context;
 	union drm_amdgpu_ctx args;
@@ -75,6 +76,8 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
 	/* Create the context */
 	memset(&args, 0, sizeof(args));
 	args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
+	args.in.priority = priority;
+
 	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
 	if (r)
 		goto error;
@@ -94,6 +97,12 @@ error:
 	return r;
 }
 
+int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
+			 amdgpu_context_handle *context)
+{
+	return amdgpu_cs_ctx_create2(dev, AMDGPU_CTX_PRIORITY_NORMAL, context);
+}
+
 /**
  * Release command submission context
  *
@@ -597,6 +606,16 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
 	return amdgpu_cs_unreference_sem(sem);
 }
 
+int amdgpu_cs_create_syncobj2(amdgpu_device_handle dev,
+			      uint32_t  flags,
+			      uint32_t *handle)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjCreate(dev->fd, flags, handle);
+}
+
 int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
 			     uint32_t *handle)
 {
@@ -615,6 +634,36 @@ int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
 	return drmSyncobjDestroy(dev->fd, handle);
 }
 
+int amdgpu_cs_syncobj_reset(amdgpu_device_handle dev,
+			    const uint32_t *syncobjs, uint32_t syncobj_count)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjReset(dev->fd, syncobjs, syncobj_count);
+}
+
+int amdgpu_cs_syncobj_signal(amdgpu_device_handle dev,
+			     const uint32_t *syncobjs, uint32_t syncobj_count)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjSignal(dev->fd, syncobjs, syncobj_count);
+}
+
+int amdgpu_cs_syncobj_wait(amdgpu_device_handle dev,
+			   uint32_t *handles, unsigned num_handles,
+			   int64_t timeout_nsec, unsigned flags,
+			   uint32_t *first_signaled)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjWait(dev->fd, handles, num_handles, timeout_nsec,
+			      flags, first_signaled);
+}
+
 int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
 			     uint32_t handle,
 			     int *shared_fd)
@@ -635,6 +684,26 @@ int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
 	return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
 }
 
+int amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,
+				       uint32_t syncobj,
+				       int *sync_file_fd)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjExportSyncFile(dev->fd, syncobj, sync_file_fd);
+}
+
+int amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,
+				       uint32_t syncobj,
+				       int sync_file_fd)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjImportSyncFile(dev->fd, syncobj, sync_file_fd);
+}
+
 int amdgpu_cs_submit_raw(amdgpu_device_handle dev,
 			 amdgpu_context_handle context,
 			 amdgpu_bo_list_handle bo_list_handle,
@@ -681,3 +750,25 @@ void amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence *fence,
 	dep->ctx_id = fence->context->id;
 	dep->handle = fence->fence;
 }
+
+int amdgpu_cs_fence_to_handle(amdgpu_device_handle dev,
+			      struct amdgpu_cs_fence *fence,
+			      uint32_t what,
+			      uint32_t *out_handle)
+{
+	union drm_amdgpu_fence_to_handle fth = {0};
+	int r;
+
+	fth.in.fence.ctx_id = fence->context->id;
+	fth.in.fence.ip_type = fence->ip_type;
+	fth.in.fence.ip_instance = fence->ip_instance;
+	fth.in.fence.ring = fence->ring;
+	fth.in.fence.seq_no = fence->fence;
+	fth.in.what = what;
+
+	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_FENCE_TO_HANDLE,
+				&fth, sizeof(fth));
+	if (r == 0)
+		*out_handle = fth.out.handle;
+	return r;
+}
diff --git a/lib/libdrm/amdgpu/amdgpu_device.c b/lib/libdrm/amdgpu/amdgpu_device.c
index 9a238d970..eb4b2745f 100644
--- a/lib/libdrm/amdgpu/amdgpu_device.c
+++ b/lib/libdrm/amdgpu/amdgpu_device.c
@@ -130,7 +130,6 @@ static int amdgpu_get_auth(int fd, int *auth)
 
 static void amdgpu_device_free_internal(amdgpu_device_handle dev)
 {
-	const struct amdgpu_asic_id *id;
 	amdgpu_vamgr_deinit(&dev->vamgr_32);
 	amdgpu_vamgr_deinit(&dev->vamgr);
 	util_hash_table_destroy(dev->bo_flink_names);
@@ -140,12 +139,7 @@ static void amdgpu_device_free_internal(amdgpu_device_handle dev)
 	close(dev->fd);
 	if ((dev->flink_fd >= 0) && (dev->fd != dev->flink_fd))
 		close(dev->flink_fd);
-	if (dev->asic_ids) {
-		for (id = dev->asic_ids; id->did; id++)
-			free(id->marketing_name);
-
-		free(dev->asic_ids);
-	}
+	free(dev->marketing_name);
 	free(dev);
 }
 
@@ -191,6 +185,8 @@ int amdgpu_device_initialize(int fd,
 		fd_tab = util_hash_table_create(fd_hash, fd_compare);
 	r = amdgpu_get_auth(fd, &flag_auth);
 	if (r) {
+		fprintf(stderr, "%s: amdgpu_get_auth (1) failed (%i)\n",
+			__func__, r);
 		pthread_mutex_unlock(&fd_mutex);
 		return r;
 	}
@@ -198,6 +194,8 @@ int amdgpu_device_initialize(int fd,
 	if (dev) {
 		r = amdgpu_get_auth(dev->fd, &flag_authexist);
 		if (r) {
+			fprintf(stderr, "%s: amdgpu_get_auth (2) failed (%i)\n",
+				__func__, r);
 			pthread_mutex_unlock(&fd_mutex);
 			return r;
 		}
@@ -213,6 +211,7 @@ int amdgpu_device_initialize(int fd,
 
 	dev = calloc(1, sizeof(struct amdgpu_device));
 	if (!dev) {
+		fprintf(stderr, "%s: calloc failed\n", __func__);
 		pthread_mutex_unlock(&fd_mutex);
 		return -ENOMEM;
 	}
@@ -248,36 +247,34 @@ int amdgpu_device_initialize(int fd,
 
 	/* Check if acceleration is working. */
 	r = amdgpu_query_info(dev, AMDGPU_INFO_ACCEL_WORKING, 4, &accel_working);
-	if (r)
+	if (r) {
+		fprintf(stderr, "%s: amdgpu_query_info(ACCEL_WORKING) failed (%i)\n",
+			__func__, r);
 		goto cleanup;
+	}
 	if (!accel_working) {
+		fprintf(stderr, "%s: AMDGPU_INFO_ACCEL_WORKING = 0\n", __func__);
 		r = -EBADF;
 		goto cleanup;
 	}
 
 	r = amdgpu_query_gpu_info_init(dev);
-	if (r)
+	if (r) {
+		fprintf(stderr, "%s: amdgpu_query_gpu_info_init failed\n", __func__);
 		goto cleanup;
+	}
 
-	amdgpu_vamgr_init(&dev->vamgr, dev->dev_info.virtual_address_offset,
-			  dev->dev_info.virtual_address_max,
-			  dev->dev_info.virtual_address_alignment);
-
+	start = dev->dev_info.virtual_address_offset;
 	max = MIN2(dev->dev_info.virtual_address_max, 0xffffffff);
-	start = amdgpu_vamgr_find_va(&dev->vamgr,
-				     max - dev->dev_info.virtual_address_offset,
-				     dev->dev_info.virtual_address_alignment, 0);
-	if (start > 0xffffffff)
-		goto free_va; /* shouldn't get here */
-
 	amdgpu_vamgr_init(&dev->vamgr_32, start, max,
 			  dev->dev_info.virtual_address_alignment);
 
-	r = amdgpu_parse_asic_ids(&dev->asic_ids);
-	if (r) {
-		fprintf(stderr, "%s: Cannot parse ASIC IDs, 0x%x.",
-			__func__, r);
-	}
+	start = MAX2(dev->dev_info.virtual_address_offset, 0x100000000ULL);
+	max = MAX2(dev->dev_info.virtual_address_max, 0x100000000ULL);
+	amdgpu_vamgr_init(&dev->vamgr, start, max,
+			  dev->dev_info.virtual_address_alignment);
+
+	amdgpu_parse_asic_ids(dev);
 
 	*major_version = dev->major_version;
 	*minor_version = dev->minor_version;
@@ -287,12 +284,6 @@ int amdgpu_device_initialize(int fd,
 
 	return 0;
 
-free_va:
-	r = -ENOMEM;
-	amdgpu_vamgr_free_va(&dev->vamgr, start,
-			     max - dev->dev_info.virtual_address_offset);
-	amdgpu_vamgr_deinit(&dev->vamgr);
-
 cleanup:
 	if (dev->fd >= 0)
 		close(dev->fd);
@@ -309,16 +300,5 @@ int amdgpu_device_deinitialize(amdgpu_device_handle dev)
 
 const char *amdgpu_get_marketing_name(amdgpu_device_handle dev)
 {
-	const struct amdgpu_asic_id *id;
-
-	if (!dev->asic_ids)
-		return NULL;
-
-	for (id = dev->asic_ids; id->did; id++) {
-		if ((id->did == dev->info.asic_id) &&
-		    (id->rid == dev->info.pci_rev_id))
-			return id->marketing_name;
-	}
-
-	return NULL;
+	return dev->marketing_name;
 }
diff --git a/lib/libdrm/amdgpu/amdgpu_internal.h b/lib/libdrm/amdgpu/amdgpu_internal.h
index e68246bf9..3e044f11d 100644
--- a/lib/libdrm/amdgpu/amdgpu_internal.h
+++ b/lib/libdrm/amdgpu/amdgpu_internal.h
@@ -69,12 +69,6 @@ struct amdgpu_va {
 	struct amdgpu_bo_va_mgr *vamgr;
 };
 
-struct amdgpu_asic_id {
-	uint32_t did;
-	uint32_t rid;
-	char *marketing_name;
-};
-
 struct amdgpu_device {
 	atomic_t refcount;
 	int fd;
@@ -82,8 +76,7 @@ struct amdgpu_device {
 	unsigned major_version;
 	unsigned minor_version;
 
-	/** Lookup table of asic device id, revision id and marketing name */
-	struct amdgpu_asic_id *asic_ids;
+	char *marketing_name;
 	/** List of buffer handles. Protected by bo_table_mutex. */
 	struct util_hash_table *bo_handles;
 	/** List of buffer GEM flink names. Protected by bo_table_mutex. */
@@ -143,21 +136,12 @@ struct amdgpu_semaphore {
  * Functions.
  */
 
-drm_private void amdgpu_bo_free_internal(amdgpu_bo_handle bo);
-
 drm_private void amdgpu_vamgr_init(struct amdgpu_bo_va_mgr *mgr, uint64_t start,
 		       uint64_t max, uint64_t alignment);
 
 drm_private void amdgpu_vamgr_deinit(struct amdgpu_bo_va_mgr *mgr);
 
-drm_private uint64_t
-amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, uint64_t size,
-		     uint64_t alignment, uint64_t base_required);
-
-drm_private void
-amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va, uint64_t size);
-
-drm_private int amdgpu_parse_asic_ids(struct amdgpu_asic_id **asic_ids);
+drm_private void amdgpu_parse_asic_ids(struct amdgpu_device *dev);
 
 drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev);
 
@@ -189,26 +173,4 @@ static inline bool update_references(atomic_t *dst, atomic_t *src)
 	return false;
 }
 
-/**
- * Assignment between two amdgpu_bo pointers with reference counting.
- *
- * Usage:
- *    struct amdgpu_bo *dst = ... , *src = ...;
- *
- *    dst = src;
- *    // No reference counting. Only use this when you need to move
- *    // a reference from one pointer to another.
- *
- *    amdgpu_bo_reference(&dst, src);
- *    // Reference counters are updated. dst is decremented and src is
- *    // incremented. dst is freed if its reference counter is 0.
- */
-static inline void amdgpu_bo_reference(struct amdgpu_bo **dst,
-					struct amdgpu_bo *src)
-{
-	if (update_references(&(*dst)->refcount, &src->refcount))
-		amdgpu_bo_free_internal(*dst);
-	*dst = src;
-}
-
 #endif
diff --git a/lib/libdrm/amdgpu/amdgpu_vamgr.c b/lib/libdrm/amdgpu/amdgpu_vamgr.c
index 2b1388edc..ab425ef7c 100644
--- a/lib/libdrm/amdgpu/amdgpu_vamgr.c
+++ b/lib/libdrm/amdgpu/amdgpu_vamgr.c
@@ -34,18 +34,19 @@
 #include "util_math.h"
 
 int amdgpu_va_range_query(amdgpu_device_handle dev,
-			  enum amdgpu_gpu_va_range type, uint64_t *start, uint64_t *end)
+			  enum amdgpu_gpu_va_range type,
+			  uint64_t *start, uint64_t *end)
 {
-	if (type == amdgpu_gpu_va_range_general) {
-		*start = dev->dev_info.virtual_address_offset;
-		*end = dev->dev_info.virtual_address_max;
-		return 0;
-	}
-	return -EINVAL;
+	if (type != amdgpu_gpu_va_range_general)
+		return -EINVAL;
+
+	*start = dev->dev_info.virtual_address_offset;
+	*end = dev->dev_info.virtual_address_max;
+	return 0;
 }
 
 drm_private void amdgpu_vamgr_init(struct amdgpu_bo_va_mgr *mgr, uint64_t start,
-			      uint64_t max, uint64_t alignment)
+				   uint64_t max, uint64_t alignment)
 {
 	mgr->va_offset = start;
 	mgr->va_max = max;
@@ -65,7 +66,7 @@ drm_private void amdgpu_vamgr_deinit(struct amdgpu_bo_va_mgr *mgr)
 	pthread_mutex_destroy(&mgr->bo_va_mutex);
 }
 
-drm_private uint64_t
+static drm_private uint64_t
 amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, uint64_t size,
 		     uint64_t alignment, uint64_t base_required)
 {
@@ -83,8 +84,8 @@ amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, uint64_t size,
 	/* first look for a hole */
 	LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
 		if (base_required) {
-			if(hole->offset > base_required ||
-				(hole->offset + hole->size) < (base_required + size))
+			if (hole->offset > base_required ||
+			    (hole->offset + hole->size) < (base_required + size))
 				continue;
 			waste = base_required - hole->offset;
 			offset = base_required;
@@ -154,7 +155,7 @@ amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, uint64_t size,
 	return offset;
 }
 
-drm_private void
+static drm_private void
 amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va, uint64_t size)
 {
 	struct amdgpu_bo_va_hole *hole;
@@ -192,9 +193,9 @@ amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va, uint64_t size)
 				hole->offset = va;
 				hole->size += size;
 				/* Merge lower hole if it's adjacent */
-				if (next != hole
-						&& &next->list != &mgr->va_holes
-						&& (next->offset + next->size) == va) {
+				if (next != hole &&
+				    &next->list != &mgr->va_holes &&
+				    (next->offset + next->size) == va) {
 					next->size += hole->size;
 					list_del(&hole->list);
 					free(hole);
diff --git a/lib/libdrm/amdgpu/amdgpu_vm.c b/lib/libdrm/amdgpu/amdgpu_vm.c
new file mode 100644
index 000000000..5ba7c082d
--- /dev/null
+++ b/lib/libdrm/amdgpu/amdgpu_vm.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "amdgpu.h"
+#include "amdgpu_drm.h"
+#include "xf86drm.h"
+#include "amdgpu_internal.h"
+
+int amdgpu_vm_reserve_vmid(amdgpu_device_handle dev, uint32_t flags)
+{
+	union drm_amdgpu_vm vm;
+
+	vm.in.op = AMDGPU_VM_OP_RESERVE_VMID;
+	vm.in.flags = flags;
+
+	return drmCommandWriteRead(dev->fd, DRM_AMDGPU_VM,
+				   &vm, sizeof(vm));
+}
+
+int amdgpu_vm_unreserve_vmid(amdgpu_device_handle dev, uint32_t flags)
+{
+	union drm_amdgpu_vm vm;
+
+	vm.in.op = AMDGPU_VM_OP_UNRESERVE_VMID;
+	vm.in.flags = flags;
+
+	return drmCommandWriteRead(dev->fd, DRM_AMDGPU_VM,
+				   &vm, sizeof(vm));
+}
diff --git a/lib/libdrm/data/amdgpu.ids b/lib/libdrm/data/amdgpu.ids
index 0b98c3c3a..1828e410d 100644
--- a/lib/libdrm/data/amdgpu.ids
+++ b/lib/libdrm/data/amdgpu.ids
@@ -62,6 +62,7 @@
 67DF,	C6,	Radeon RX 570 Series
 67DF,	C7,	AMD Radeon (TM) RX 480 Graphics
 67DF,	CF,	AMD Radeon (TM) RX 470 Graphics
+67DF,	D7,	Radeon(TM) RX 470 Graphics
 67DF,	E3,	Radeon RX Series
 67DF,	E7,	Radeon RX 580 Series
 67DF,	EF,	Radeon RX 570 Series
@@ -84,13 +85,16 @@
 67EF,	C5,	AMD Radeon (TM) RX 460 Graphics
 67EF,	C7,	AMD Radeon (TM) RX Graphics
 67EF,	CF,	AMD Radeon (TM) RX 460 Graphics
+67EF,	E0,	Radeon RX 560 Series
 67EF,	E1,	Radeon RX Series
 67EF,	E3,	Radeon RX Series
-67EF,	E7,	Radeon RX Series
+67EF,	E5,	Radeon RX 560 Series
 67EF,	EF,	AMD Radeon (TM) RX Graphics
-67EF,	FF,	Radeon RX Series
+67EF,	FF,	Radeon(TM) RX 460 Graphics
 67FF,	C0,	AMD Radeon (TM) RX Graphics
 67FF,	C1,	AMD Radeon (TM) RX Graphics
+67FF,	CF,	Radeon RX 560 Series
+67FF,	EF,	Radeon RX 560 Series
 67FF,	FF,	Radeon RX 550 Series
 6800,	0,	AMD Radeon HD 7970M
 6801,	0,	AMD Radeon(TM) HD8970M
@@ -122,6 +126,25 @@
 6837,	0,	AMD Radeon HD7700 Series
 683D,	0,	AMD Radeon HD 7700 Series
 683F,	0,	AMD Radeon HD 7700 Series
+6860,	00,	Radeon Instinct MI25
+6860,	01,	Radeon Pro V320
+6860,	02,	Radeon Instinct MI25
+6860,	03,	Radeon Pro V340
+6860,	04,	Radeon Instinct MI25x2
+6861,	00,	Radeon(TM) Pro WX9100
+6862,	00,	Radeon Pro SSG
+6863,	00,	Radeon Vega Frontier Edition
+6864,	03,	Radeon Pro V340
+6864,	04,	Instinct MI25x2
+6868,	00,	Radeon(TM) Pro WX8100
+686C,	00,	GLXT (Radeon Instinct MI25) MxGPU VFID
+686C,	01,	GLXT (Radeon Pro V320) MxGPU
+686C,	02,	GLXT (Radeon Instinct MI25) MxGPU
+686C,	03,	GLXT (Radeon Pro V340) MxGPU
+686C,	04,	GLXT (Radeon Instinct MI25x2) MxGPU
+687F,	C0,	Radeon RX Vega
+687F,	C1,	Radeon RX Vega
+687F,	C3,	Radeon RX Vega
 6900,	0,	AMD Radeon R7 M260
 6900,	81,	AMD Radeon (TM) R7 M360
 6900,	83,	AMD Radeon (TM) R7 M340
@@ -139,8 +162,12 @@
 6939,	F0,	AMD Radeon R9 200 Series
 6939,	0,	AMD Radeon R9 200 Series
 6939,	F1,	AMD Radeon (TM) R9 380 Series
+6980,	00,	Radeon Pro WX3100
 6985,	00,	AMD Radeon Pro WX3100
+6987,	80,	AMD Embedded Radeon E9171
 6995,	00,	AMD Radeon Pro WX2100
+6997,	00,	Radeon Pro WX2100
+699F,	81,	AMD Embedded Radeon E9170 Series
 699F,	C0,	Radeon 500 Series
 699F,	C3,	Radeon 500 Series
 699F,	C7,	Radeon RX 550 Series
@@ -153,6 +180,7 @@
 9874,	C5,	AMD Radeon R6 Graphics
 9874,	C6,	AMD Radeon R6 Graphics
 9874,	C7,	AMD Radeon R5 Graphics
+9874,	C8,	AMD Radeon R7 Graphics
 9874,	81,	AMD Radeon R6 Graphics
 9874,	87,	AMD Radeon R5 Graphics
 9874,	85,	AMD Radeon R6 Graphics
diff --git a/lib/libdrm/etnaviv/Makefile.sources b/lib/libdrm/etnaviv/Makefile.sources
index 525805674..0eb73783a 100644
--- a/lib/libdrm/etnaviv/Makefile.sources
+++ b/lib/libdrm/etnaviv/Makefile.sources
@@ -3,6 +3,7 @@ LIBDRM_ETNAVIV_FILES := \
 	etnaviv_gpu.c \
 	etnaviv_bo.c \
 	etnaviv_bo_cache.c \
+	etnaviv_perfmon.c \
 	etnaviv_pipe.c \
 	etnaviv_cmd_stream.c \
 	etnaviv_drm.h \
diff --git a/lib/libdrm/etnaviv/etnaviv-symbol-check b/lib/libdrm/etnaviv/etnaviv-symbol-check
index 0e2030e46..bc5096159 100755
--- a/lib/libdrm/etnaviv/etnaviv-symbol-check
+++ b/lib/libdrm/etnaviv/etnaviv-symbol-check
@@ -41,7 +41,12 @@ etna_cmd_stream_timestamp
 etna_cmd_stream_flush
 etna_cmd_stream_flush2
 etna_cmd_stream_finish
+etna_cmd_stream_perf
 etna_cmd_stream_reloc
+etna_perfmon_create
+etna_perfmon_del
+etna_perfmon_get_dom_by_name
+etna_perfmon_get_sig_by_name
 EOF
 done)
 
diff --git a/lib/libdrm/etnaviv/etnaviv_bo.c b/lib/libdrm/etnaviv/etnaviv_bo.c
index 4ad0434e7..78b9cd272 100644
--- a/lib/libdrm/etnaviv/etnaviv_bo.c
+++ b/lib/libdrm/etnaviv/etnaviv_bo.c
@@ -173,7 +173,7 @@ struct etna_bo *etna_bo_from_name(struct etna_device *dev, uint32_t name)
 	pthread_mutex_lock(&table_lock);
 
 	/* check name table first, to see if bo is already open: */
-	bo = lookup_bo(dev->name_table, req.handle);
+	bo = lookup_bo(dev->name_table, name);
 	if (bo)
 		goto out_unlock;
 
@@ -206,10 +206,15 @@ struct etna_bo *etna_bo_from_dmabuf(struct etna_device *dev, int fd)
 	int ret, size;
 	uint32_t handle;
 
+	/* take the lock before calling drmPrimeFDToHandle to avoid
+	 * racing against etna_bo_del, which might invalidate the
+	 * returned handle.
+	 */
 	pthread_mutex_lock(&table_lock);
 
 	ret = drmPrimeFDToHandle(dev->fd, fd, &handle);
 	if (ret) {
+		pthread_mutex_unlock(&table_lock);
 		return NULL;
 	}
 
diff --git a/lib/libdrm/etnaviv/etnaviv_bo_cache.c b/lib/libdrm/etnaviv/etnaviv_bo_cache.c
index 8924651f0..6208230dc 100644
--- a/lib/libdrm/etnaviv/etnaviv_bo_cache.c
+++ b/lib/libdrm/etnaviv/etnaviv_bo_cache.c
@@ -124,20 +124,32 @@ static int is_idle(struct etna_bo *bo)
 
 static struct etna_bo *find_in_bucket(struct etna_bo_bucket *bucket, uint32_t flags)
 {
-	struct etna_bo *bo = NULL;
+	struct etna_bo *bo = NULL, *tmp;
 
 	pthread_mutex_lock(&table_lock);
-	while (!LIST_IS_EMPTY(&bucket->list)) {
-		bo = LIST_ENTRY(struct etna_bo, bucket->list.next, list);
 
-		if (bo->flags == flags && is_idle(bo)) {
-			list_del(&bo->list);
-			break;
+	if (LIST_IS_EMPTY(&bucket->list))
+		goto out_unlock;
+
+	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &bucket->list, list) {
+		/* skip BOs with different flags */
+		if (bo->flags != flags)
+			continue;
+
+		/* check if the first BO with matching flags is idle */
+		if (is_idle(bo)) {
+			list_delinit(&bo->list);
+			goto out_unlock;
 		}
 
-		bo = NULL;
+		/* If the oldest BO is still busy, don't try younger ones */
 		break;
 	}
+
+	/* There was no matching buffer found */
+	bo = NULL;
+
+out_unlock:
 	pthread_mutex_unlock(&table_lock);
 
 	return bo;
diff --git a/lib/libdrm/etnaviv/etnaviv_cmd_stream.c b/lib/libdrm/etnaviv/etnaviv_cmd_stream.c
index 8d0e8135e..e8c58cd5d 100644
--- a/lib/libdrm/etnaviv/etnaviv_cmd_stream.c
+++ b/lib/libdrm/etnaviv/etnaviv_cmd_stream.c
@@ -105,6 +105,7 @@ void etna_cmd_stream_del(struct etna_cmd_stream *stream)
 
 	free(stream->buffer);
 	free(priv->submit.relocs);
+	free(priv->submit.pmrs);
 	free(priv);
 }
 
@@ -115,6 +116,7 @@ static void reset_buffer(struct etna_cmd_stream *stream)
 	stream->offset = 0;
 	priv->submit.nr_bos = 0;
 	priv->submit.nr_relocs = 0;
+	priv->submit.nr_pmrs = 0;
 	priv->nr_bos = 0;
 
 	if (priv->reset_notify)
@@ -191,6 +193,8 @@ static void flush(struct etna_cmd_stream *stream, int in_fence_fd,
 		.nr_bos = priv->submit.nr_bos,
 		.relocs = VOID2U64(priv->submit.relocs),
 		.nr_relocs = priv->submit.nr_relocs,
+		.pmrs = VOID2U64(priv->submit.pmrs),
+		.nr_pmrs = priv->submit.nr_pmrs,
 		.stream = VOID2U64(stream->buffer),
 		.stream_size = stream->offset * 4, /* in bytes */
 	};
@@ -260,3 +264,19 @@ void etna_cmd_stream_reloc(struct etna_cmd_stream *stream, const struct etna_rel
 
 	etna_cmd_stream_emit(stream, addr);
 }
+
+void etna_cmd_stream_perf(struct etna_cmd_stream *stream, const struct etna_perf *p)
+{
+	struct etna_cmd_stream_priv *priv = etna_cmd_stream_priv(stream);
+	struct drm_etnaviv_gem_submit_pmr *pmr;
+	uint32_t idx = APPEND(&priv->submit, pmrs);
+
+	pmr = &priv->submit.pmrs[idx];
+
+	pmr->flags = p->flags;
+	pmr->sequence = p->sequence;
+	pmr->read_offset = p->offset;
+	pmr->read_idx = bo2idx(stream, p->bo, ETNA_SUBMIT_BO_READ | ETNA_SUBMIT_BO_WRITE);
+	pmr->domain = p->signal->domain->id;
+	pmr->signal = p->signal->signal;
+}
diff --git a/lib/libdrm/etnaviv/etnaviv_drm.h b/lib/libdrm/etnaviv/etnaviv_drm.h
index 76f6f78a3..110cc73bf 100644
--- a/lib/libdrm/etnaviv/etnaviv_drm.h
+++ b/lib/libdrm/etnaviv/etnaviv_drm.h
@@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo {
 	__u64 presumed;       /* in/out, presumed buffer address */
 };
 
+/* performance monitor request (pmr) */
+#define ETNA_PM_PROCESS_PRE             0x0001
+#define ETNA_PM_PROCESS_POST            0x0002
+struct drm_etnaviv_gem_submit_pmr {
+	__u32 flags;          /* in, when to process request (ETNA_PM_PROCESS_x) */
+	__u8  domain;         /* in, pm domain */
+	__u8  pad;
+	__u16 signal;         /* in, pm signal */
+	__u32 sequence;       /* in, sequence number */
+	__u32 read_offset;    /* in, offset from read_bo */
+	__u32 read_idx;       /* in, index of read_bo buffer */
+};
+
 /* Each cmdstream submit consists of a table of buffers involved, and
  * one or more cmdstream buffers.  This allows for conditional execution
  * (context-restore), and IB buffers needed for per tile/bin draw cmds.
@@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit {
 	__u64 stream;         /* in, ptr to cmdstream */
 	__u32 flags;          /* in, mask of ETNA_SUBMIT_x */
 	__s32 fence_fd;       /* in/out, fence fd (see ETNA_SUBMIT_FENCE_FD_x) */
+	__u64 pmrs;           /* in, ptr to array of submit_pmr's */
+	__u32 nr_pmrs;        /* in, number of submit_pmr's */
+	__u32 pad;
 };
 
 /* The normal way to synchronize with the GPU is just to CPU_PREP on
@@ -210,6 +226,27 @@ struct drm_etnaviv_gem_wait {
 	struct drm_etnaviv_timespec timeout;	/* in */
 };
 
+/*
+ * Performance Monitor (PM):
+ */
+
+struct drm_etnaviv_pm_domain {
+	__u32 pipe;       /* in */
+	__u8  iter;       /* in/out, select pm domain at index iter */
+	__u8  id;         /* out, id of domain */
+	__u16 nr_signals; /* out, how many signals does this domain provide */
+	char  name[64];   /* out, name of domain */
+};
+
+struct drm_etnaviv_pm_signal {
+	__u32 pipe;       /* in */
+	__u8  domain;     /* in, pm domain index */
+	__u8  pad;
+	__u16 iter;       /* in/out, select pm source at index iter */
+	__u16 id;         /* out, id of signal */
+	char  name[64];   /* out, name of domain */
+};
+
 #define DRM_ETNAVIV_GET_PARAM          0x00
 /* placeholder:
 #define DRM_ETNAVIV_SET_PARAM          0x01
@@ -222,7 +259,9 @@ struct drm_etnaviv_gem_wait {
 #define DRM_ETNAVIV_WAIT_FENCE         0x07
 #define DRM_ETNAVIV_GEM_USERPTR        0x08
 #define DRM_ETNAVIV_GEM_WAIT           0x09
-#define DRM_ETNAVIV_NUM_IOCTLS         0x0a
+#define DRM_ETNAVIV_PM_QUERY_DOM       0x0a
+#define DRM_ETNAVIV_PM_QUERY_SIG       0x0b
+#define DRM_ETNAVIV_NUM_IOCTLS         0x0c
 
 #define DRM_IOCTL_ETNAVIV_GET_PARAM    DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GET_PARAM, struct drm_etnaviv_param)
 #define DRM_IOCTL_ETNAVIV_GEM_NEW      DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_NEW, struct drm_etnaviv_gem_new)
@@ -233,6 +272,8 @@ struct drm_etnaviv_gem_wait {
 #define DRM_IOCTL_ETNAVIV_WAIT_FENCE   DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_WAIT_FENCE, struct drm_etnaviv_wait_fence)
 #define DRM_IOCTL_ETNAVIV_GEM_USERPTR  DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_USERPTR, struct drm_etnaviv_gem_userptr)
 #define DRM_IOCTL_ETNAVIV_GEM_WAIT     DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_WAIT, struct drm_etnaviv_gem_wait)
+#define DRM_IOCTL_ETNAVIV_PM_QUERY_DOM DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_PM_QUERY_DOM, struct drm_etnaviv_pm_domain)
+#define DRM_IOCTL_ETNAVIV_PM_QUERY_SIG DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_PM_QUERY_SIG, struct drm_etnaviv_pm_signal)
 
 #if defined(__cplusplus)
 }
diff --git a/lib/libdrm/etnaviv/etnaviv_drmif.h b/lib/libdrm/etnaviv/etnaviv_drmif.h
index 87704acd3..5a6bef8d1 100644
--- a/lib/libdrm/etnaviv/etnaviv_drmif.h
+++ b/lib/libdrm/etnaviv/etnaviv_drmif.h
@@ -35,6 +35,9 @@ struct etna_pipe;
 struct etna_gpu;
 struct etna_device;
 struct etna_cmd_stream;
+struct etna_perfmon;
+struct etna_perfmon_domain;
+struct etna_perfmon_signal;
 
 enum etna_pipe_id {
 	ETNA_PIPE_3D = 0,
@@ -190,4 +193,24 @@ struct etna_reloc {
 
 void etna_cmd_stream_reloc(struct etna_cmd_stream *stream, const struct etna_reloc *r);
 
+/* performance monitoring functions:
+ */
+
+struct etna_perfmon *etna_perfmon_create(struct etna_pipe *pipe);
+void etna_perfmon_del(struct etna_perfmon *perfmon);
+struct etna_perfmon_domain *etna_perfmon_get_dom_by_name(struct etna_perfmon *pm, const char *name);
+struct etna_perfmon_signal *etna_perfmon_get_sig_by_name(struct etna_perfmon_domain *dom, const char *name);
+
+struct etna_perf {
+#define ETNA_PM_PROCESS_PRE             0x0001
+#define ETNA_PM_PROCESS_POST            0x0002
+	uint32_t flags;
+	uint32_t sequence;
+	struct etna_perfmon_signal *signal;
+	struct etna_bo *bo;
+	uint32_t offset;
+};
+
+void etna_cmd_stream_perf(struct etna_cmd_stream *stream, const struct etna_perf *p);
+
 #endif /* ETNAVIV_DRMIF_H_ */
diff --git a/lib/libdrm/etnaviv/etnaviv_perfmon.c b/lib/libdrm/etnaviv/etnaviv_perfmon.c
new file mode 100644
index 000000000..aa5130a65
--- /dev/null
+++ b/lib/libdrm/etnaviv/etnaviv_perfmon.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2017 Etnaviv Project
+ * Copyright (C) 2017 Zodiac Inflight Innovations
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Christian Gmeiner <christian.gmeiner@gmail.com>
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include "etnaviv_priv.h"
+
+static int etna_perfmon_query_signals(struct etna_perfmon *pm, struct etna_perfmon_domain *dom)
+{
+	struct etna_device *dev = pm->pipe->gpu->dev;
+	struct drm_etnaviv_pm_signal req = {
+		.pipe = pm->pipe->id,
+		.domain = dom->id
+	};
+
+	do {
+		struct etna_perfmon_signal *sig;
+		int ret;
+
+		ret = drmCommandWriteRead(dev->fd, DRM_ETNAVIV_PM_QUERY_SIG, &req, sizeof(req));
+		if (ret)
+			break;
+
+		sig = calloc(1, sizeof(*sig));
+		if (!sig)
+			return -ENOMEM;
+
+		INFO_MSG("perfmon signal:");
+		INFO_MSG("id         = %d", req.id);
+		INFO_MSG("name       = %s", req.name);
+
+		sig->domain = dom;
+		sig->signal = req.id;
+		strncpy(sig->name, req.name, sizeof(sig->name));
+		list_addtail(&sig->head, &dom->signals);
+	} while (req.iter != 0xffff);
+
+	return 0;
+}
+
+static int etna_perfmon_query_domains(struct etna_perfmon *pm)
+{
+	struct etna_device *dev = pm->pipe->gpu->dev;
+	struct drm_etnaviv_pm_domain req = {
+		.pipe = pm->pipe->id
+	};
+
+	do {
+		struct etna_perfmon_domain *dom;
+		int ret;
+
+		ret = drmCommandWriteRead(dev->fd, DRM_ETNAVIV_PM_QUERY_DOM, &req, sizeof(req));
+		if (ret)
+			break;
+
+		dom = calloc(1, sizeof(*dom));
+		if (!dom)
+			return -ENOMEM;
+
+		list_inithead(&dom->signals);
+		dom->id = req.id;
+		strncpy(dom->name, req.name, sizeof(dom->name));
+		list_addtail(&dom->head, &pm->domains);
+
+		INFO_MSG("perfmon domain:");
+		INFO_MSG("id         = %d", req.id);
+		INFO_MSG("name       = %s", req.name);
+		INFO_MSG("nr_signals = %d", req.nr_signals);
+
+		/* Query all available signals for this domain. */
+		if (req.nr_signals > 0) {
+			ret = etna_perfmon_query_signals(pm, dom);
+			if (ret)
+				return ret;
+		}
+	} while (req.iter != 0xff);
+
+	return 0;
+}
+
+static void etna_perfmon_free_signals(struct etna_perfmon_domain *dom)
+{
+	struct etna_perfmon_signal *sig, *next;
+
+	LIST_FOR_EACH_ENTRY_SAFE(sig, next, &dom->signals, head) {
+		list_del(&sig->head);
+		free(sig);
+	}
+}
+
+static void etna_perfmon_free_domains(struct etna_perfmon *pm)
+{
+	struct etna_perfmon_domain *dom, *next;
+
+	LIST_FOR_EACH_ENTRY_SAFE(dom, next, &pm->domains, head) {
+		etna_perfmon_free_signals(dom);
+		list_del(&dom->head);
+		free(dom);
+	}
+}
+
+struct etna_perfmon *etna_perfmon_create(struct etna_pipe *pipe)
+{
+	struct etna_perfmon *pm;
+	int ret;
+
+	pm = calloc(1, sizeof(*pm));
+	if (!pm) {
+		ERROR_MSG("allocation failed");
+		return NULL;
+	}
+
+	list_inithead(&pm->domains);
+	pm->pipe = pipe;
+
+	/* query all available domains and sources for this device */
+	ret = etna_perfmon_query_domains(pm);
+	if (ret)
+		goto fail;
+
+	return pm;
+
+fail:
+	etna_perfmon_del(pm);
+	return NULL;
+}
+
+void etna_perfmon_del(struct etna_perfmon *pm)
+{
+	if (!pm)
+		return;
+
+	etna_perfmon_free_domains(pm);
+	free(pm);
+}
+
+struct etna_perfmon_domain *etna_perfmon_get_dom_by_name(struct etna_perfmon *pm, const char *name)
+{
+	struct etna_perfmon_domain *dom;
+
+	if (pm) {
+		LIST_FOR_EACH_ENTRY(dom, &pm->domains, head) {
+			if (!strcmp(dom->name, name))
+				return dom;
+		}
+	}
+
+	return NULL;
+}
+
+struct etna_perfmon_signal *etna_perfmon_get_sig_by_name(struct etna_perfmon_domain *dom, const char *name)
+{
+	struct etna_perfmon_signal *signal;
+
+	if (dom) {
+		LIST_FOR_EACH_ENTRY(signal, &dom->signals, head) {
+			if (!strcmp(signal->name, name))
+				return signal;
+		}
+	}
+
+	return NULL;
+}
diff --git a/lib/libdrm/etnaviv/etnaviv_priv.h b/lib/libdrm/etnaviv/etnaviv_priv.h
index 1334ba3f0..e45d364cc 100644
--- a/lib/libdrm/etnaviv/etnaviv_priv.h
+++ b/lib/libdrm/etnaviv/etnaviv_priv.h
@@ -140,6 +140,10 @@ struct etna_cmd_stream_priv {
 		/* reloc's table: */
 		struct drm_etnaviv_gem_submit_reloc *relocs;
 		uint32_t nr_relocs, max_relocs;
+
+		/* perf's table: */
+		struct drm_etnaviv_gem_submit_pmr *pmrs;
+		uint32_t nr_pmrs, max_pmrs;
 	} submit;
 
 	/* should have matching entries in submit.bos: */
@@ -151,6 +155,27 @@ struct etna_cmd_stream_priv {
 	void *reset_notify_priv;
 };
 
+struct etna_perfmon {
+	struct list_head domains;
+	struct etna_pipe *pipe;
+};
+
+struct etna_perfmon_domain
+{
+	struct list_head head;
+	struct list_head signals;
+	uint8_t id;
+	char name[64];
+};
+
+struct etna_perfmon_signal
+{
+	struct list_head head;
+	struct etna_perfmon_domain *domain;
+	uint8_t signal;
+	char name[64];
+};
+
 #define ALIGN(v,a) (((v) + (a) - 1) & ~((a) - 1))
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
diff --git a/lib/libdrm/exynos/exynos_fimg2d.c b/lib/libdrm/exynos/exynos_fimg2d.c
index 61340c36c..5658a48e0 100644
--- a/lib/libdrm/exynos/exynos_fimg2d.c
+++ b/lib/libdrm/exynos/exynos_fimg2d.c
@@ -3,11 +3,24 @@
  * Authors:
  *	Inki Dae <inki.dae@samsung.com>
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #ifdef HAVE_CONFIG_H
diff --git a/lib/libdrm/exynos/exynos_fimg2d.h b/lib/libdrm/exynos/exynos_fimg2d.h
index a825c6831..a4dfbe734 100644
--- a/lib/libdrm/exynos/exynos_fimg2d.h
+++ b/lib/libdrm/exynos/exynos_fimg2d.h
@@ -3,11 +3,24 @@
  * Authors:
  *	Inki Dae <inki.dae@samsung.com>
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef _FIMG2D_H_
diff --git a/lib/libdrm/exynos/fimg2d_reg.h b/lib/libdrm/exynos/fimg2d_reg.h
index 07dd6349d..d42296d4e 100644
--- a/lib/libdrm/exynos/fimg2d_reg.h
+++ b/lib/libdrm/exynos/fimg2d_reg.h
@@ -3,11 +3,24 @@
  * Authors:
  *	Inki Dae <inki.dae@samsung.com>
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef _FIMG2D_REG_H_
diff --git a/lib/libdrm/freedreno/freedreno-symbol-check b/lib/libdrm/freedreno/freedreno-symbol-check
index 42f2c4395..6b81040c7 100755
--- a/lib/libdrm/freedreno/freedreno-symbol-check
+++ b/lib/libdrm/freedreno/freedreno-symbol-check
@@ -33,6 +33,7 @@ fd_device_version
 fd_pipe_del
 fd_pipe_get_param
 fd_pipe_new
+fd_pipe_new2
 fd_pipe_wait
 fd_pipe_wait_timeout
 fd_ringbuffer_cmd_count
diff --git a/lib/libdrm/freedreno/freedreno_bo.c b/lib/libdrm/freedreno/freedreno_bo.c
index 10949ebf0..7f8ea59c2 100644
--- a/lib/libdrm/freedreno/freedreno_bo.c
+++ b/lib/libdrm/freedreno/freedreno_bo.c
@@ -138,6 +138,7 @@ fd_bo_from_dmabuf(struct fd_device *dev, int fd)
 	pthread_mutex_lock(&table_lock);
 	ret = drmPrimeFDToHandle(dev->fd, fd, &handle);
 	if (ret) {
+		pthread_mutex_unlock(&table_lock);
 		return NULL;
 	}
 
diff --git a/lib/libdrm/freedreno/freedreno_drmif.h b/lib/libdrm/freedreno/freedreno_drmif.h
index 7a8073ff7..c3b0d02a3 100644
--- a/lib/libdrm/freedreno/freedreno_drmif.h
+++ b/lib/libdrm/freedreno/freedreno_drmif.h
@@ -61,6 +61,7 @@ enum fd_param_id {
 	FD_CHIP_ID,
 	FD_MAX_FREQ,
 	FD_TIMESTAMP,
+	FD_NR_RINGS,      /* # of rings == # of distinct priority levels */
 };
 
 /* bo flags: */
@@ -93,6 +94,7 @@ enum fd_version {
 	FD_VERSION_MADVISE = 1,            /* kernel supports madvise */
 	FD_VERSION_UNLIMITED_CMDS = 1,     /* submits w/ >4 cmd buffers (growable ringbuffer) */
 	FD_VERSION_FENCE_FD = 2,           /* submit command supports in/out fences */
+	FD_VERSION_SUBMIT_QUEUES = 3,      /* submit queues and multiple priority levels */
 };
 enum fd_version fd_device_version(struct fd_device *dev);
 
@@ -100,6 +102,7 @@ enum fd_version fd_device_version(struct fd_device *dev);
  */
 
 struct fd_pipe * fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id);
+struct fd_pipe * fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio);
 void fd_pipe_del(struct fd_pipe *pipe);
 int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param,
 		uint64_t *value);
diff --git a/lib/libdrm/freedreno/freedreno_pipe.c b/lib/libdrm/freedreno/freedreno_pipe.c
index 3f8c8342c..1540474bd 100644
--- a/lib/libdrm/freedreno/freedreno_pipe.c
+++ b/lib/libdrm/freedreno/freedreno_pipe.c
@@ -33,21 +33,30 @@
 #include "freedreno_drmif.h"
 #include "freedreno_priv.h"
 
+/**
+ * priority of zero is highest priority, and higher numeric values are
+ * lower priorities
+ */
 struct fd_pipe *
-fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id)
+fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio)
 {
-	struct fd_pipe *pipe = NULL;
+	struct fd_pipe *pipe;
 	uint64_t val;
 
 	if (id > FD_PIPE_MAX) {
 		ERROR_MSG("invalid pipe id: %d", id);
-		goto fail;
+		return NULL;
 	}
 
-	pipe = dev->funcs->pipe_new(dev, id);
+	if ((prio != 1) && (fd_device_version(dev) < FD_VERSION_SUBMIT_QUEUES)) {
+		ERROR_MSG("invalid priority!");
+		return NULL;
+	}
+
+	pipe = dev->funcs->pipe_new(dev, id, prio);
 	if (!pipe) {
 		ERROR_MSG("allocation failed");
-		goto fail;
+		return NULL;
 	}
 
 	pipe->dev = dev;
@@ -57,10 +66,12 @@ fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id)
 	pipe->gpu_id = val;
 
 	return pipe;
-fail:
-	if (pipe)
-		fd_pipe_del(pipe);
-	return NULL;
+}
+
+struct fd_pipe *
+fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id)
+{
+	return fd_pipe_new2(dev, id, 1);
 }
 
 void fd_pipe_del(struct fd_pipe *pipe)
diff --git a/lib/libdrm/freedreno/freedreno_priv.h b/lib/libdrm/freedreno/freedreno_priv.h
index 8dd3ee694..273074727 100644
--- a/lib/libdrm/freedreno/freedreno_priv.h
+++ b/lib/libdrm/freedreno/freedreno_priv.h
@@ -66,7 +66,8 @@ struct fd_device_funcs {
 			uint32_t flags, uint32_t *handle);
 	struct fd_bo * (*bo_from_handle)(struct fd_device *dev,
 			uint32_t size, uint32_t handle);
-	struct fd_pipe * (*pipe_new)(struct fd_device *dev, enum fd_pipe_id id);
+	struct fd_pipe * (*pipe_new)(struct fd_device *dev, enum fd_pipe_id id,
+			unsigned prio);
 	void (*destroy)(struct fd_device *dev);
 };
 
diff --git a/lib/libdrm/freedreno/kgsl/kgsl_pipe.c b/lib/libdrm/freedreno/kgsl/kgsl_pipe.c
index 8a39eb49e..80bd13133 100644
--- a/lib/libdrm/freedreno/kgsl/kgsl_pipe.c
+++ b/lib/libdrm/freedreno/kgsl/kgsl_pipe.c
@@ -52,6 +52,7 @@ static int kgsl_pipe_get_param(struct fd_pipe *pipe,
 		return 0;
 	case FD_MAX_FREQ:
 	case FD_TIMESTAMP:
+	case FD_NR_RINGS:
 		/* unsupported on kgsl */
 		return -1;
 	default:
@@ -210,7 +211,7 @@ static int getprop(int fd, enum kgsl_property_type type,
 
 
 drm_private struct fd_pipe * kgsl_pipe_new(struct fd_device *dev,
-		enum fd_pipe_id id)
+		enum fd_pipe_id id, uint32_t prio)
 {
 	static const char *paths[] = {
 			[FD_PIPE_3D] = "/dev/kgsl-3d0",
diff --git a/lib/libdrm/freedreno/kgsl/kgsl_priv.h b/lib/libdrm/freedreno/kgsl/kgsl_priv.h
index 6ab649650..41b13920e 100644
--- a/lib/libdrm/freedreno/kgsl/kgsl_priv.h
+++ b/lib/libdrm/freedreno/kgsl/kgsl_priv.h
@@ -103,7 +103,7 @@ drm_private void kgsl_pipe_post_submit(struct kgsl_pipe *pipe,
 drm_private void kgsl_pipe_process_pending(struct kgsl_pipe *pipe,
 		uint32_t timestamp);
 drm_private struct fd_pipe * kgsl_pipe_new(struct fd_device *dev,
-		enum fd_pipe_id id);
+		enum fd_pipe_id id, uint32_t prio);
 
 drm_private struct fd_ringbuffer * kgsl_ringbuffer_new(struct fd_pipe *pipe,
 		uint32_t size);
diff --git a/lib/libdrm/freedreno/kgsl/kgsl_ringbuffer.c b/lib/libdrm/freedreno/kgsl/kgsl_ringbuffer.c
index e4696b1bf..f09c433bb 100644
--- a/lib/libdrm/freedreno/kgsl/kgsl_ringbuffer.c
+++ b/lib/libdrm/freedreno/kgsl/kgsl_ringbuffer.c
@@ -146,7 +146,7 @@ static int kgsl_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_star
 		ibdesc.gpuaddr = kgsl_ring->bo->gpuaddr;
 		ibdesc.hostptr = kgsl_ring->bo->hostptr;
 		ibdesc.sizedwords = 0x145;
-		req.timestamp = (uint32_t)kgsl_ring->bo->hostptr;
+		req.timestamp = (uintptr_t)kgsl_ring->bo->hostptr;
 	}
 
 	do {
diff --git a/lib/libdrm/freedreno/msm/msm_drm.h b/lib/libdrm/freedreno/msm/msm_drm.h
index ed4c8d475..dac49e59b 100644
--- a/lib/libdrm/freedreno/msm/msm_drm.h
+++ b/lib/libdrm/freedreno/msm/msm_drm.h
@@ -73,6 +73,8 @@ struct drm_msm_timespec {
 #define MSM_PARAM_CHIP_ID    0x03
 #define MSM_PARAM_MAX_FREQ   0x04
 #define MSM_PARAM_TIMESTAMP  0x05
+#define MSM_PARAM_GMEM_BASE  0x06
+#define MSM_PARAM_NR_RINGS   0x07
 
 struct drm_msm_param {
 	__u32 pipe;           /* in, MSM_PIPE_x */
@@ -104,10 +106,14 @@ struct drm_msm_gem_new {
 	__u32 handle;         /* out */
 };
 
+#define MSM_INFO_IOVA	0x01
+
+#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
+
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-	__u32 pad;
-	__u64 offset;         /* out, offset to pass to mmap() */
+	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
+	__u64 offset;         /* out, mmap() offset or iova */
 };
 
 #define MSM_PREP_READ        0x01
@@ -167,7 +173,7 @@ struct drm_msm_gem_submit_cmd {
 	__u32 size;           /* in, cmdstream size */
 	__u32 pad;
 	__u32 nr_relocs;      /* in, number of submit_reloc's */
-	__u64 __user relocs;  /* in, ptr to array of submit_reloc's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
 };
 
 /* Each buffer referenced elsewhere in the cmdstream submit (ie. the
@@ -211,9 +217,10 @@ struct drm_msm_gem_submit {
 	__u32 fence;          /* out */
 	__u32 nr_bos;         /* in, number of submit_bo's */
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
-	__u64 __user bos;     /* in, ptr to array of submit_bo's */
-	__u64 __user cmds;    /* in, ptr to array of submit_cmd's */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 cmds;           /* in, ptr to array of submit_cmd's */
 	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
+	__u32 queueid;         /* in, submitqueue id */
 };
 
 /* The normal way to synchronize with the GPU is just to CPU_PREP on
@@ -227,6 +234,7 @@ struct drm_msm_wait_fence {
 	__u32 fence;          /* in */
 	__u32 pad;
 	struct drm_msm_timespec timeout;   /* in */
+	__u32 queueid;         /* in, submitqueue id */
 };
 
 /* madvise provides a way to tell the kernel in case a buffers contents
@@ -250,6 +258,20 @@ struct drm_msm_gem_madvise {
 	__u32 retained;       /* out, whether backing store still exists */
 };
 
+/*
+ * Draw queues allow the user to set specific submission parameter. Command
+ * submissions specify a specific submitqueue to use.  ID 0 is reserved for
+ * backwards compatibility as a "default" submitqueue
+ */
+
+#define MSM_SUBMITQUEUE_FLAGS (0)
+
+struct drm_msm_submitqueue {
+	__u32 flags;   /* in, MSM_SUBMITQUEUE_x */
+	__u32 prio;    /* in, Priority level */
+	__u32 id;      /* out, identifier */
+};
+
 #define DRM_MSM_GET_PARAM              0x00
 /* placeholder:
 #define DRM_MSM_SET_PARAM              0x01
@@ -261,7 +283,11 @@ struct drm_msm_gem_madvise {
 #define DRM_MSM_GEM_SUBMIT             0x06
 #define DRM_MSM_WAIT_FENCE             0x07
 #define DRM_MSM_GEM_MADVISE            0x08
-#define DRM_MSM_NUM_IOCTLS             0x09
+/* placeholder:
+#define DRM_MSM_GEM_SVM_NEW            0x09
+ */
+#define DRM_MSM_SUBMITQUEUE_NEW        0x0A
+#define DRM_MSM_SUBMITQUEUE_CLOSE      0x0B
 
 #define DRM_IOCTL_MSM_GET_PARAM        DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
 #define DRM_IOCTL_MSM_GEM_NEW          DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
@@ -271,6 +297,8 @@ struct drm_msm_gem_madvise {
 #define DRM_IOCTL_MSM_GEM_SUBMIT       DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, struct drm_msm_gem_submit)
 #define DRM_IOCTL_MSM_WAIT_FENCE       DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, struct drm_msm_wait_fence)
 #define DRM_IOCTL_MSM_GEM_MADVISE      DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise)
+#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW    DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
+#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE  DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
 
 #if defined(__cplusplus)
 }
diff --git a/lib/libdrm/freedreno/msm/msm_pipe.c b/lib/libdrm/freedreno/msm/msm_pipe.c
index f872e2459..7395e573f 100644
--- a/lib/libdrm/freedreno/msm/msm_pipe.c
+++ b/lib/libdrm/freedreno/msm/msm_pipe.c
@@ -71,6 +71,8 @@ static int msm_pipe_get_param(struct fd_pipe *pipe,
 		return query_param(pipe, MSM_PARAM_MAX_FREQ, value);
 	case FD_TIMESTAMP:
 		return query_param(pipe, MSM_PARAM_TIMESTAMP, value);
+	case FD_NR_RINGS:
+		return query_param(pipe, MSM_PARAM_NR_RINGS, value);
 	default:
 		ERROR_MSG("invalid param id: %d", param);
 		return -1;
@@ -83,6 +85,7 @@ static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp,
 	struct fd_device *dev = pipe->dev;
 	struct drm_msm_wait_fence req = {
 			.fence = timestamp,
+			.queueid = to_msm_pipe(pipe)->queue_id,
 	};
 	int ret;
 
@@ -97,9 +100,42 @@ static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp,
 	return 0;
 }
 
+static int open_submitqueue(struct fd_device *dev, uint32_t prio,
+		uint32_t *queue_id)
+{
+	struct drm_msm_submitqueue req = {
+		.flags = 0,
+		.prio = prio,
+	};
+	int ret;
+
+	if (fd_device_version(dev) < FD_VERSION_SUBMIT_QUEUES) {
+		*queue_id = 0;
+		return 0;
+	}
+
+	ret = drmCommandWriteRead(dev->fd, DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
+	if (ret) {
+		ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno));
+		return ret;
+	}
+
+	*queue_id = req.id;
+	return 0;
+}
+
+static void close_submitqueue(struct fd_device *dev, uint32_t queue_id)
+{
+	if (fd_device_version(dev) < FD_VERSION_SUBMIT_QUEUES)
+		return;
+
+	drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE, &queue_id, sizeof(queue_id));
+}
+
 static void msm_pipe_destroy(struct fd_pipe *pipe)
 {
 	struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
+	close_submitqueue(pipe->dev, msm_pipe->queue_id);
 	free(msm_pipe);
 }
 
@@ -122,7 +158,7 @@ static uint64_t get_param(struct fd_pipe *pipe, uint32_t param)
 }
 
 drm_private struct fd_pipe * msm_pipe_new(struct fd_device *dev,
-		enum fd_pipe_id id)
+		enum fd_pipe_id id, uint32_t prio)
 {
 	static const uint32_t pipe_id[] = {
 			[FD_PIPE_3D] = MSM_PIPE_3D0,
@@ -157,6 +193,9 @@ drm_private struct fd_pipe * msm_pipe_new(struct fd_device *dev,
 	INFO_MSG(" Chip-id:         0x%08x", msm_pipe->chip_id);
 	INFO_MSG(" GMEM size:       0x%08x", msm_pipe->gmem);
 
+	if (open_submitqueue(dev, prio, &msm_pipe->queue_id))
+		goto fail;
+
 	return pipe;
 fail:
 	if (pipe)
diff --git a/lib/libdrm/freedreno/msm/msm_priv.h b/lib/libdrm/freedreno/msm/msm_priv.h
index 6d670aab2..88ac3aa42 100644
--- a/lib/libdrm/freedreno/msm/msm_priv.h
+++ b/lib/libdrm/freedreno/msm/msm_priv.h
@@ -56,6 +56,7 @@ struct msm_pipe {
 	uint32_t gpu_id;
 	uint32_t gmem;
 	uint32_t chip_id;
+	uint32_t queue_id;
 };
 
 static inline struct msm_pipe * to_msm_pipe(struct fd_pipe *x)
@@ -64,7 +65,7 @@ static inline struct msm_pipe * to_msm_pipe(struct fd_pipe *x)
 }
 
 drm_private struct fd_pipe * msm_pipe_new(struct fd_device *dev,
-		enum fd_pipe_id id);
+		enum fd_pipe_id id, uint32_t prio);
 
 drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe,
 		uint32_t size);
diff --git a/lib/libdrm/freedreno/msm/msm_ringbuffer.c b/lib/libdrm/freedreno/msm/msm_ringbuffer.c
index c3b2ededf..c75bb1692 100644
--- a/lib/libdrm/freedreno/msm/msm_ringbuffer.c
+++ b/lib/libdrm/freedreno/msm/msm_ringbuffer.c
@@ -401,6 +401,7 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start
 	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
 	struct drm_msm_gem_submit req = {
 			.flags = to_msm_pipe(ring->pipe)->pipe,
+			.queueid = to_msm_pipe(ring->pipe)->queue_id,
 	};
 	uint32_t i;
 	int ret;
@@ -589,12 +590,12 @@ drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe,
 		uint32_t size)
 {
 	struct msm_ringbuffer *msm_ring;
-	struct fd_ringbuffer *ring = NULL;
+	struct fd_ringbuffer *ring;
 
 	msm_ring = calloc(1, sizeof(*msm_ring));
 	if (!msm_ring) {
 		ERROR_MSG("allocation failed");
-		goto fail;
+		return NULL;
 	}
 
 	if (size == 0) {
@@ -614,8 +615,4 @@ drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe,
 	ring_cmd_new(ring, size);
 
 	return ring;
-fail:
-	if (ring)
-		fd_ringbuffer_del(ring);
-	return NULL;
 }
diff --git a/lib/libdrm/include/drm/README b/lib/libdrm/include/drm/README
index 870b0b5b6..5b518ddf8 100644
--- a/lib/libdrm/include/drm/README
+++ b/lib/libdrm/include/drm/README
@@ -86,43 +86,25 @@ Outdated or Broken Headers
 This section contains a list of headers and the respective "issues" they might
 have relative to their kernel equivalent.
 
-Nearly all headers:
- - Missing extern C notation.
-Status: Trivial.
-
 Most UMS headers:
  - Not using fixed size integers - compat ioctls are broken.
 Status: ?
 Promote to fixed size ints, which match the current (32bit) ones.
 
-
-drm_mode.h
- - Missing DPI encode/connector pair.
-Status: Trivial.
-
 i915_drm.h
  - Missing PARAMS - HAS_POOLED_EU, MIN_EU_IN_POOL CONTEXT_PARAM_NO_ERROR_CAPTURE
 Status: Trivial.
 
-mga_drm.h
- - Typo fix, use struct over typedef.
-Status: Trivial.
-
 nouveau_drm.h
  - Missing macros NOUVEAU_GETPARAM*, NOUVEAU_DRM_HEADER_PATCHLEVEL, structs,
-enums, using stdint.h over the __u* types.
+enums
 Status: ?
 
-qxl_drm.h
- - Using the stdint.h uint*_t over the respective __u* ones
-Status: Trivial.
-
 r128_drm.h
  - Broken compat ioctls.
 
 radeon_drm.h
- - Missing RADEON_TILING_R600_NO_SCANOUT, CIK_TILE_MODE_*, broken UMS ioctls,
-using stdint types.
+ - Missing RADEON_TILING_R600_NO_SCANOUT, CIK_TILE_MODE_*, broken UMS ioctls
  - Both kernel and libdrm: missing padding -
 drm_radeon_gem_{create,{g,s}et_tiling,set_domain} others ?
 Status: ?
diff --git a/lib/libdrm/include/drm/amdgpu_drm.h b/lib/libdrm/include/drm/amdgpu_drm.h
index d9aa4a339..919248fb4 100644
--- a/lib/libdrm/include/drm/amdgpu_drm.h
+++ b/lib/libdrm/include/drm/amdgpu_drm.h
@@ -52,6 +52,8 @@ extern "C" {
 #define DRM_AMDGPU_GEM_USERPTR		0x11
 #define DRM_AMDGPU_WAIT_FENCES		0x12
 #define DRM_AMDGPU_VM			0x13
+#define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
+#define DRM_AMDGPU_SCHED		0x15
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -67,6 +69,8 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
 #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
+#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
+#define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
 
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -87,6 +91,10 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
 /* Flag that allocating the BO should use linear VRAM */
 #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is always valid in this VM */
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
+/* Flag that BO sharing will be explicitly synchronized */
+#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC		(1 << 7)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
@@ -162,13 +170,22 @@ union drm_amdgpu_bo_list {
 /* unknown cause */
 #define AMDGPU_CTX_UNKNOWN_RESET	3
 
+/* Context priority level */
+#define AMDGPU_CTX_PRIORITY_UNSET       -2048
+#define AMDGPU_CTX_PRIORITY_VERY_LOW    -1023
+#define AMDGPU_CTX_PRIORITY_LOW         -512
+#define AMDGPU_CTX_PRIORITY_NORMAL      0
+/* Selecting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER */
+#define AMDGPU_CTX_PRIORITY_HIGH        512
+#define AMDGPU_CTX_PRIORITY_VERY_HIGH   1023
+
 struct drm_amdgpu_ctx_in {
 	/** AMDGPU_CTX_OP_* */
 	__u32	op;
 	/** For future use, no flags defined so far */
 	__u32	flags;
 	__u32	ctx_id;
-	__u32	_pad;
+	__s32	priority;
 };
 
 union drm_amdgpu_ctx_out {
@@ -212,6 +229,21 @@ union drm_amdgpu_vm {
 	struct drm_amdgpu_vm_out out;
 };
 
+/* sched ioctl */
+#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE	1
+
+struct drm_amdgpu_sched_in {
+	/* AMDGPU_SCHED_OP_* */
+	__u32	op;
+	__u32	fd;
+	__s32	priority;
+	__u32	flags;
+};
+
+union drm_amdgpu_sched {
+	struct drm_amdgpu_sched_in in;
+};
+
 /*
  * This is not a reliable API and you should expect it to fail for any
  * number of reasons and have fallback path that do not use userptr to
@@ -513,6 +545,21 @@ struct drm_amdgpu_cs_chunk_sem {
 	__u32 handle;
 };
 
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ	0
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD	1
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD	2
+
+union drm_amdgpu_fence_to_handle {
+	struct {
+		struct drm_amdgpu_fence fence;
+		__u32 what;
+		__u32 pad;
+	} in;
+	struct {
+		__u32 handle;
+	} out;
+};
+
 struct drm_amdgpu_cs_chunk_data {
 	union {
 		struct drm_amdgpu_cs_chunk_ib		ib_data;
@@ -611,6 +658,7 @@ struct drm_amdgpu_cs_chunk_data {
 	#define AMDGPU_INFO_SENSOR_VDDGFX		0x7
 /* Number of VRAM page faults on CPU access. */
 #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
+#define AMDGPU_INFO_VRAM_LOST_COUNTER		0x1F
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
@@ -764,6 +812,7 @@ struct drm_amdgpu_info_device {
 	__u64 max_memory_clock;
 	/* cu information */
 	__u32 cu_active_number;
+	/* NOTE: cu_ao_mask is INVALID, DON'T use it */
 	__u32 cu_ao_mask;
 	__u32 cu_bitmap[4][4];
 	/** Render backend pipe mask. One render backend is CB+DB. */
@@ -818,6 +867,8 @@ struct drm_amdgpu_info_device {
 	/* max gs wavefront per vgt*/
 	__u32 max_gs_waves_per_vgt;
 	__u32 _pad1;
+	/* always on cu bitmap */
+	__u32 cu_ao_bitmap[4][4];
 };
 
 struct drm_amdgpu_info_hw_ip {
diff --git a/lib/libdrm/include/drm/drm_fourcc.h b/lib/libdrm/include/drm/drm_fourcc.h
index 7586c46f6..3ad838d3f 100644
--- a/lib/libdrm/include/drm/drm_fourcc.h
+++ b/lib/libdrm/include/drm/drm_fourcc.h
@@ -185,6 +185,8 @@ extern "C" {
 #define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
 /* add more to the end as needed */
 
+#define DRM_FORMAT_RESERVED	      ((1ULL << 56) - 1)
+
 #define fourcc_mod_code(vendor, val) \
 	((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffULL))
 
@@ -197,6 +199,15 @@ extern "C" {
  */
 
 /*
+ * Invalid Modifier
+ *
+ * This modifier can be used as a sentinel to terminate the format modifiers
+ * list, or to initialize a variable with an invalid modifier. It might also be
+ * used to report an error back to userspace for certain APIs.
+ */
+#define DRM_FORMAT_MOD_INVALID	fourcc_mod_code(NONE, DRM_FORMAT_RESERVED)
+
+/*
  * Linear Layout
  *
  * Just plain linear layout. Note that this is different from no specifying any
@@ -253,6 +264,26 @@ extern "C" {
 #define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3)
 
 /*
+ * Intel color control surface (CCS) for render compression
+ *
+ * The framebuffer format must be one of the 8:8:8:8 RGB formats.
+ * The main surface will be plane index 0 and must be Y/Yf-tiled,
+ * the CCS will be plane index 1.
+ *
+ * Each CCS tile matches a 1024x512 pixel area of the main surface.
+ * To match certain aspects of the 3D hardware the CCS is
+ * considered to be made up of normal 128Bx32 Y tiles, Thus
+ * the CCS pitch must be specified in multiples of 128 bytes.
+ *
+ * In reality the CCS tile appears to be a 64Bx64 Y tile, composed
+ * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks.
+ * But that fact is not relevant unless the memory is accessed
+ * directly.
+ */
+#define I915_FORMAT_MOD_Y_TILED_CCS	fourcc_mod_code(INTEL, 4)
+#define I915_FORMAT_MOD_Yf_TILED_CCS	fourcc_mod_code(INTEL, 5)
+
+/*
  * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
  *
  * Macroblocks are laid in a Z-shape, and each pixel data is following the
diff --git a/lib/libdrm/include/drm/drm_mode.h b/lib/libdrm/include/drm/drm_mode.h
index 70571af60..5597a8715 100644
--- a/lib/libdrm/include/drm/drm_mode.h
+++ b/lib/libdrm/include/drm/drm_mode.h
@@ -75,7 +75,7 @@ extern "C" {
   * (define not exposed to user space).
   */
 #define DRM_MODE_FLAG_3D_MASK			(0x1f<<14)
-#define  DRM_MODE_FLAG_3D_NONE			(0<<14)
+#define  DRM_MODE_FLAG_3D_NONE		(0<<14)
 #define  DRM_MODE_FLAG_3D_FRAME_PACKING		(1<<14)
 #define  DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE	(2<<14)
 #define  DRM_MODE_FLAG_3D_LINE_ALTERNATIVE	(3<<14)
@@ -85,6 +85,19 @@ extern "C" {
 #define  DRM_MODE_FLAG_3D_TOP_AND_BOTTOM	(7<<14)
 #define  DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF	(8<<14)
 
+/* Picture aspect ratio options */
+#define DRM_MODE_PICTURE_ASPECT_NONE		0
+#define DRM_MODE_PICTURE_ASPECT_4_3		1
+#define DRM_MODE_PICTURE_ASPECT_16_9		2
+
+/* Aspect ratio flag bitmask (4 bits 22:19) */
+#define DRM_MODE_FLAG_PIC_AR_MASK		(0x0F<<19)
+#define  DRM_MODE_FLAG_PIC_AR_NONE \
+			(DRM_MODE_PICTURE_ASPECT_NONE<<19)
+#define  DRM_MODE_FLAG_PIC_AR_4_3 \
+			(DRM_MODE_PICTURE_ASPECT_4_3<<19)
+#define  DRM_MODE_FLAG_PIC_AR_16_9 \
+			(DRM_MODE_PICTURE_ASPECT_16_9<<19)
 
 /* DPMS flags */
 /* bit compatible with the xorg definitions. */
@@ -100,11 +113,6 @@ extern "C" {
 #define DRM_MODE_SCALE_CENTER		2 /* Centered, no scaling */
 #define DRM_MODE_SCALE_ASPECT		3 /* Full screen, preserve aspect */
 
-/* Picture aspect ratio options */
-#define DRM_MODE_PICTURE_ASPECT_NONE	0
-#define DRM_MODE_PICTURE_ASPECT_4_3	1
-#define DRM_MODE_PICTURE_ASPECT_16_9	2
-
 /* Dithering mode options */
 #define DRM_MODE_DITHERING_OFF	0
 #define DRM_MODE_DITHERING_ON	1
@@ -119,6 +127,53 @@ extern "C" {
 #define DRM_MODE_LINK_STATUS_GOOD	0
 #define DRM_MODE_LINK_STATUS_BAD	1
 
+/*
+ * DRM_MODE_ROTATE_<degrees>
+ *
+ * Signals that a drm plane is been rotated <degrees> degrees in counter
+ * clockwise direction.
+ *
+ * This define is provided as a convenience, looking up the property id
+ * using the name->prop id lookup is the preferred method.
+ */
+#define DRM_MODE_ROTATE_0       (1<<0)
+#define DRM_MODE_ROTATE_90      (1<<1)
+#define DRM_MODE_ROTATE_180     (1<<2)
+#define DRM_MODE_ROTATE_270     (1<<3)
+
+/*
+ * DRM_MODE_ROTATE_MASK
+ *
+ * Bitmask used to look for drm plane rotations.
+ */
+#define DRM_MODE_ROTATE_MASK (\
+		DRM_MODE_ROTATE_0  | \
+		DRM_MODE_ROTATE_90  | \
+		DRM_MODE_ROTATE_180 | \
+		DRM_MODE_ROTATE_270)
+
+/*
+ * DRM_MODE_REFLECT_<axis>
+ *
+ * Signals that the contents of a drm plane is reflected in the <axis> axis,
+ * in the same way as mirroring.
+ *
+ * This define is provided as a convenience, looking up the property id
+ * using the name->prop id lookup is the preferred method.
+ */
+#define DRM_MODE_REFLECT_X      (1<<4)
+#define DRM_MODE_REFLECT_Y      (1<<5)
+
+/*
+ * DRM_MODE_REFLECT_MASK
+ *
+ * Bitmask used to look for drm plane reflections.
+ */
+#define DRM_MODE_REFLECT_MASK (\
+		DRM_MODE_REFLECT_X | \
+		DRM_MODE_REFLECT_Y)
+
+
 struct drm_mode_modeinfo {
 	__u32 clock;
 	__u16 hdisplay;
@@ -657,6 +712,56 @@ struct drm_mode_atomic {
 	__u64 user_data;
 };
 
+struct drm_format_modifier_blob {
+#define FORMAT_BLOB_CURRENT 1
+	/* Version of this blob format */
+	__u32 version;
+
+	/* Flags */
+	__u32 flags;
+
+	/* Number of fourcc formats supported */
+	__u32 count_formats;
+
+	/* Where in this blob the formats exist (in bytes) */
+	__u32 formats_offset;
+
+	/* Number of drm_format_modifiers */
+	__u32 count_modifiers;
+
+	/* Where in this blob the modifiers exist (in bytes) */
+	__u32 modifiers_offset;
+
+	/* __u32 formats[] */
+	/* struct drm_format_modifier modifiers[] */
+};
+
+struct drm_format_modifier {
+	/* Bitmask of formats in get_plane format list this info applies to. The
+	 * offset allows a sliding window of which 64 formats (bits).
+	 *
+	 * Some examples:
+	 * In today's world with < 65 formats, and formats 0, and 2 are
+	 * supported
+	 * 0x0000000000000005
+	 *		  ^-offset = 0, formats = 5
+	 *
+	 * If the number formats grew to 128, and formats 98-102 are
+	 * supported with the modifier:
+	 *
+	 * 0x0000007c00000000 0000000000000000
+	 *		  ^
+	 *		  |__offset = 64, formats = 0x7c00000000
+	 *
+	 */
+	__u64 formats;
+	__u32 offset;
+	__u32 pad;
+
+	/* The modifier that applies to the >get_plane format list bitmask. */
+	__u64 modifier;
+};
+
 /**
  * Create a new 'blob' data property, copying length bytes from data pointer,
  * and returning new blob ID.
@@ -677,6 +782,72 @@ struct drm_mode_destroy_blob {
 	__u32 blob_id;
 };
 
+/**
+ * Lease mode resources, creating another drm_master.
+ */
+struct drm_mode_create_lease {
+	/** Pointer to array of object ids (__u32) */
+	__u64 object_ids;
+	/** Number of object ids */
+	__u32 object_count;
+	/** flags for new FD (O_CLOEXEC, etc) */
+	__u32 flags;
+
+	/** Return: unique identifier for lessee. */
+	__u32 lessee_id;
+	/** Return: file descriptor to new drm_master file */
+	__u32 fd;
+};
+
+/**
+ * List lesses from a drm_master
+ */
+struct drm_mode_list_lessees {
+	/** Number of lessees.
+	 * On input, provides length of the array.
+	 * On output, provides total number. No
+	 * more than the input number will be written
+	 * back, so two calls can be used to get
+	 * the size and then the data.
+	 */
+	__u32 count_lessees;
+	__u32 pad;
+
+	/** Pointer to lessees.
+	 * pointer to __u64 array of lessee ids
+	 */
+	__u64 lessees_ptr;
+};
+
+/**
+ * Get leased objects
+ */
+struct drm_mode_get_lease {
+	/** Number of leased objects.
+	 * On input, provides length of the array.
+	 * On output, provides total number. No
+	 * more than the input number will be written
+	 * back, so two calls can be used to get
+	 * the size and then the data.
+	 */
+	__u32 count_objects;
+	__u32 pad;
+
+	/** Pointer to objects.
+	 * pointer to __u32 array of object ids
+	 */
+	__u64 objects_ptr;
+};
+
+/**
+ * Revoke lease
+ */
+struct drm_mode_revoke_lease {
+	/** Unique ID of lessee
+	 */
+	__u32 lessee_id;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/lib/libdrm/include/drm/mga_drm.h b/lib/libdrm/include/drm/mga_drm.h
index b630e8fad..793001114 100644
--- a/lib/libdrm/include/drm/mga_drm.h
+++ b/lib/libdrm/include/drm/mga_drm.h
@@ -37,6 +37,10 @@
 
 #include "drm.h"
 
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the Xserver file (mga_sarea.h)
  */
@@ -107,7 +111,7 @@
  */
 #define MGA_NR_SAREA_CLIPRECTS	8
 
-/* 2 heaps (1 for card, 1 for agp), each divided into upto 128
+/* 2 heaps (1 for card, 1 for agp), each divided into up to 128
  * regions, subject to a minimum region size of (1<<16) == 64k.
  *
  * Clients may subdivide regions internally, but when sharing between
@@ -248,7 +252,7 @@ typedef struct _drm_mga_sarea {
 #define DRM_MGA_DMA_BOOTSTRAP  0x0c
 
 #define DRM_IOCTL_MGA_INIT     DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INIT, drm_mga_init_t)
-#define DRM_IOCTL_MGA_FLUSH    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, drm_lock_t)
+#define DRM_IOCTL_MGA_FLUSH    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, struct drm_lock)
 #define DRM_IOCTL_MGA_RESET    DRM_IO(  DRM_COMMAND_BASE + DRM_MGA_RESET)
 #define DRM_IOCTL_MGA_SWAP     DRM_IO(  DRM_COMMAND_BASE + DRM_MGA_SWAP)
 #define DRM_IOCTL_MGA_CLEAR    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_CLEAR, drm_mga_clear_t)
@@ -416,4 +420,8 @@ typedef struct drm_mga_getparam {
 	void *value;
 } drm_mga_getparam_t;
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif
diff --git a/lib/libdrm/include/drm/nouveau_drm.h b/lib/libdrm/include/drm/nouveau_drm.h
index e418f9f38..cb077821c 100644
--- a/lib/libdrm/include/drm/nouveau_drm.h
+++ b/lib/libdrm/include/drm/nouveau_drm.h
@@ -27,6 +27,12 @@
 
 #define NOUVEAU_DRM_HEADER_PATCHLEVEL 16
 
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 struct drm_nouveau_channel_alloc {
 	uint32_t     fb_ctxdma_handle;
 	uint32_t     tt_ctxdma_handle;
@@ -105,34 +111,34 @@ struct drm_nouveau_setparam {
 #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
 
 struct drm_nouveau_gem_info {
-	uint32_t handle;
-	uint32_t domain;
-	uint64_t size;
-	uint64_t offset;
-	uint64_t map_handle;
-	uint32_t tile_mode;
-	uint32_t tile_flags;
+	__u32 handle;
+	__u32 domain;
+	__u64 size;
+	__u64 offset;
+	__u64 map_handle;
+	__u32 tile_mode;
+	__u32 tile_flags;
 };
 
 struct drm_nouveau_gem_new {
 	struct drm_nouveau_gem_info info;
-	uint32_t channel_hint;
-	uint32_t align;
+	__u32 channel_hint;
+	__u32 align;
 };
 
 #define NOUVEAU_GEM_MAX_BUFFERS 1024
 struct drm_nouveau_gem_pushbuf_bo_presumed {
-	uint32_t valid;
-	uint32_t domain;
-	uint64_t offset;
+	__u32 valid;
+	__u32 domain;
+	__u64 offset;
 };
 
 struct drm_nouveau_gem_pushbuf_bo {
-	uint64_t user_priv;
-	uint32_t handle;
-	uint32_t read_domains;
-	uint32_t write_domains;
-	uint32_t valid_domains;
+	__u64 user_priv;
+	__u32 handle;
+	__u32 read_domains;
+	__u32 write_domains;
+	__u32 valid_domains;
 	struct drm_nouveau_gem_pushbuf_bo_presumed presumed;
 };
 
@@ -141,47 +147,47 @@ struct drm_nouveau_gem_pushbuf_bo {
 #define NOUVEAU_GEM_RELOC_OR   (1 << 2)
 #define NOUVEAU_GEM_MAX_RELOCS 1024
 struct drm_nouveau_gem_pushbuf_reloc {
-	uint32_t reloc_bo_index;
-	uint32_t reloc_bo_offset;
-	uint32_t bo_index;
-	uint32_t flags;
-	uint32_t data;
-	uint32_t vor;
-	uint32_t tor;
+	__u32 reloc_bo_index;
+	__u32 reloc_bo_offset;
+	__u32 bo_index;
+	__u32 flags;
+	__u32 data;
+	__u32 vor;
+	__u32 tor;
 };
 
 #define NOUVEAU_GEM_MAX_PUSH 512
 struct drm_nouveau_gem_pushbuf_push {
-	uint32_t bo_index;
-	uint32_t pad;
-	uint64_t offset;
-	uint64_t length;
+	__u32 bo_index;
+	__u32 pad;
+	__u64 offset;
+	__u64 length;
 };
 
 struct drm_nouveau_gem_pushbuf {
-	uint32_t channel;
-	uint32_t nr_buffers;
-	uint64_t buffers;
-	uint32_t nr_relocs;
-	uint32_t nr_push;
-	uint64_t relocs;
-	uint64_t push;
-	uint32_t suffix0;
-	uint32_t suffix1;
-	uint64_t vram_available;
-	uint64_t gart_available;
+	__u32 channel;
+	__u32 nr_buffers;
+	__u64 buffers;
+	__u32 nr_relocs;
+	__u32 nr_push;
+	__u64 relocs;
+	__u64 push;
+	__u32 suffix0;
+	__u32 suffix1;
+	__u64 vram_available;
+	__u64 gart_available;
 };
 
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_NOBLOCK                                 0x00000002
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
 struct drm_nouveau_gem_cpu_prep {
-	uint32_t handle;
-	uint32_t flags;
+	__u32 handle;
+	__u32 flags;
 };
 
 struct drm_nouveau_gem_cpu_fini {
-	uint32_t handle;
+	__u32 handle;
 };
 
 enum nouveau_bus_type {
@@ -207,4 +213,8 @@ struct drm_nouveau_sarea {
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif /* __NOUVEAU_DRM_H__ */
diff --git a/lib/libdrm/include/drm/qxl_drm.h b/lib/libdrm/include/drm/qxl_drm.h
index 1e331a867..38a0dbdfd 100644
--- a/lib/libdrm/include/drm/qxl_drm.h
+++ b/lib/libdrm/include/drm/qxl_drm.h
@@ -27,10 +27,14 @@
 #include <stddef.h>
 #include "drm.h"
 
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
  *
- * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel
+ * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel
  * compatibility Keep fields aligned to their size
  */
 
@@ -48,14 +52,14 @@
 #define DRM_QXL_ALLOC_SURF  0x06
 
 struct drm_qxl_alloc {
-	uint32_t size;
-	uint32_t handle; /* 0 is an invalid handle */
+	__u32 size;
+	__u32 handle; /* 0 is an invalid handle */
 };
 
 struct drm_qxl_map {
-	uint64_t offset; /* use for mmap system call */
-	uint32_t handle;
-	uint32_t pad;
+	__u64 offset; /* use for mmap system call */
+	__u32 handle;
+	__u32 pad;
 };
 
 /*
@@ -68,59 +72,59 @@ struct drm_qxl_map {
 #define QXL_RELOC_TYPE_SURF 2
 
 struct drm_qxl_reloc {
-	uint64_t src_offset; /* offset into src_handle or src buffer */
-	uint64_t dst_offset; /* offset in dest handle */
-	uint32_t src_handle; /* dest handle to compute address from */
-	uint32_t dst_handle; /* 0 if to command buffer */
-	uint32_t reloc_type;
-	uint32_t pad;
+	__u64 src_offset; /* offset into src_handle or src buffer */
+	__u64 dst_offset; /* offset in dest handle */
+	__u32 src_handle; /* dest handle to compute address from */
+	__u32 dst_handle; /* 0 if to command buffer */
+	__u32 reloc_type;
+	__u32 pad;
 };
 
 struct drm_qxl_command {
-	uint64_t	 command; /* void* */
-	uint64_t	 relocs; /* struct drm_qxl_reloc* */
-	uint32_t		type;
-	uint32_t		command_size;
-	uint32_t		relocs_num;
-	uint32_t                pad;
+	__u64		command; /* void* */
+	__u64		relocs; /* struct drm_qxl_reloc* */
+	__u32		type;
+	__u32		command_size;
+	__u32		relocs_num;
+	__u32                pad;
 };
 
 /* XXX: call it drm_qxl_commands? */
 struct drm_qxl_execbuffer {
-	uint32_t		flags;		/* for future use */
-	uint32_t		commands_num;
-	uint64_t	 commands;	/* struct drm_qxl_command* */
+	__u32		flags;		/* for future use */
+	__u32		commands_num;
+	__u64		commands;	/* struct drm_qxl_command* */
 };
 
 struct drm_qxl_update_area {
-	uint32_t handle;
-	uint32_t top;
-	uint32_t left;
-	uint32_t bottom;
-	uint32_t right;
-	uint32_t pad;
+	__u32 handle;
+	__u32 top;
+	__u32 left;
+	__u32 bottom;
+	__u32 right;
+	__u32 pad;
 };
 
 #define QXL_PARAM_NUM_SURFACES 1 /* rom->n_surfaces */
 #define QXL_PARAM_MAX_RELOCS 2
 struct drm_qxl_getparam {
-	uint64_t param;
-	uint64_t value;
+	__u64 param;
+	__u64 value;
 };
 
 /* these are one bit values */
 struct drm_qxl_clientcap {
-	uint32_t index;
-	uint32_t pad;
+	__u32 index;
+	__u32 pad;
 };
 
 struct drm_qxl_alloc_surf {
-	uint32_t format;
-	uint32_t width;
-	uint32_t height;
-	int32_t stride;
-	uint32_t handle;
-	uint32_t pad;
+	__u32 format;
+	__u32 width;
+	__u32 height;
+	__s32 stride;
+	__u32 handle;
+	__u32 pad;
 };
 
 #define DRM_IOCTL_QXL_ALLOC \
@@ -149,4 +153,8 @@ struct drm_qxl_alloc_surf {
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_QXL_ALLOC_SURF,\
 		struct drm_qxl_alloc_surf)
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif
diff --git a/lib/libdrm/include/drm/r128_drm.h b/lib/libdrm/include/drm/r128_drm.h
index ede78ff9d..bf431a023 100644
--- a/lib/libdrm/include/drm/r128_drm.h
+++ b/lib/libdrm/include/drm/r128_drm.h
@@ -33,6 +33,12 @@
 #ifndef __R128_DRM_H__
 #define __R128_DRM_H__
 
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the X server file (r128_sarea.h)
  */
@@ -323,4 +329,8 @@ typedef struct drm_r128_getparam {
 	void *value;
 } drm_r128_getparam_t;
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif
diff --git a/lib/libdrm/include/drm/radeon_drm.h b/lib/libdrm/include/drm/radeon_drm.h
index f09cc04ce..a1e385d6e 100644
--- a/lib/libdrm/include/drm/radeon_drm.h
+++ b/lib/libdrm/include/drm/radeon_drm.h
@@ -797,9 +797,9 @@ typedef struct drm_radeon_surface_free {
 #define RADEON_GEM_DOMAIN_VRAM		0x4
 
 struct drm_radeon_gem_info {
-	uint64_t	gart_size;
-	uint64_t	vram_size;
-	uint64_t	vram_visible;
+	__u64	gart_size;
+	__u64	vram_size;
+	__u64	vram_visible;
 };
 
 #define RADEON_GEM_NO_BACKING_STORE	(1 << 0)
@@ -811,11 +811,11 @@ struct drm_radeon_gem_info {
 #define RADEON_GEM_NO_CPU_ACCESS	(1 << 4)
 
 struct drm_radeon_gem_create {
-	uint64_t	size;
-	uint64_t	alignment;
-	uint32_t	handle;
-	uint32_t	initial_domain;
-	uint32_t	flags;
+	__u64	size;
+	__u64	alignment;
+	__u32	handle;
+	__u32	initial_domain;
+	__u32	flags;
 };
 
 /*
@@ -829,10 +829,10 @@ struct drm_radeon_gem_create {
 #define RADEON_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_radeon_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
-	uint32_t		flags;
-	uint32_t		handle;
+	__u64		addr;
+	__u64		size;
+	__u32		flags;
+	__u32		handle;
 };
 
 #define RADEON_TILING_MACRO				0x1
@@ -855,72 +855,72 @@ struct drm_radeon_gem_userptr {
 #define RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK	0xf
 
 struct drm_radeon_gem_set_tiling {
-	uint32_t	handle;
-	uint32_t	tiling_flags;
-	uint32_t	pitch;
+	__u32	handle;
+	__u32	tiling_flags;
+	__u32	pitch;
 };
 
 struct drm_radeon_gem_get_tiling {
-	uint32_t	handle;
-	uint32_t	tiling_flags;
-	uint32_t	pitch;
+	__u32	handle;
+	__u32	tiling_flags;
+	__u32	pitch;
 };
 
 struct drm_radeon_gem_mmap {
-	uint32_t	handle;
-	uint32_t	pad;
-	uint64_t	offset;
-	uint64_t	size;
-	uint64_t	addr_ptr;
+	__u32	handle;
+	__u32	pad;
+	__u64	offset;
+	__u64	size;
+	__u64	addr_ptr;
 };
 
 struct drm_radeon_gem_set_domain {
-	uint32_t	handle;
-	uint32_t	read_domains;
-	uint32_t	write_domain;
+	__u32	handle;
+	__u32	read_domains;
+	__u32	write_domain;
 };
 
 struct drm_radeon_gem_wait_idle {
-	uint32_t	handle;
-	uint32_t	pad;
+	__u32	handle;
+	__u32	pad;
 };
 
 struct drm_radeon_gem_busy {
-	uint32_t	handle;
-	uint32_t        domain;
+	__u32	handle;
+	__u32        domain;
 };
 
 struct drm_radeon_gem_pread {
 	/** Handle for the object being read. */
-	uint32_t handle;
-	uint32_t pad;
+	__u32 handle;
+	__u32 pad;
 	/** Offset into the object to read from */
-	uint64_t offset;
+	__u64 offset;
 	/** Length of data to read */
-	uint64_t size;
+	__u64 size;
 	/** Pointer to write the data into. */
 	/* void *, but pointers are not 32/64 compatible */
-	uint64_t data_ptr;
+	__u64 data_ptr;
 };
 
 struct drm_radeon_gem_pwrite {
 	/** Handle for the object being written to. */
-	uint32_t handle;
-	uint32_t pad;
+	__u32 handle;
+	__u32 pad;
 	/** Offset into the object to write to */
-	uint64_t offset;
+	__u64 offset;
 	/** Length of data to write */
-	uint64_t size;
+	__u64 size;
 	/** Pointer to read the data from. */
 	/* void *, but pointers are not 32/64 compatible */
-	uint64_t data_ptr;
+	__u64 data_ptr;
 };
 
 /* Sets or returns a value associated with a buffer. */
 struct drm_radeon_gem_op {
-	uint32_t	handle; /* buffer */
-	uint32_t	op;     /* RADEON_GEM_OP_* */
-	uint64_t	value;  /* input or return value */
+	__u32	handle; /* buffer */
+	__u32	op;     /* RADEON_GEM_OP_* */
+	__u64	value;  /* input or return value */
 };
 
 #define RADEON_GEM_OP_GET_INITIAL_DOMAIN	0
@@ -940,11 +940,11 @@ struct drm_radeon_gem_op {
 #define RADEON_VM_PAGE_SNOOPED		(1 << 4)
 
 struct drm_radeon_gem_va {
-	uint32_t		handle;
-	uint32_t		operation;
-	uint32_t		vm_id;
-	uint32_t		flags;
-	uint64_t		offset;
+	__u32		handle;
+	__u32		operation;
+	__u32		vm_id;
+	__u32		flags;
+	__u64		offset;
 };
 
 #define RADEON_CHUNK_ID_RELOCS	0x01
@@ -966,29 +966,29 @@ struct drm_radeon_gem_va {
 /* 0 = normal, + = higher priority, - = lower priority */
 
 struct drm_radeon_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };
 
 /* drm_radeon_cs_reloc.flags */
 #define RADEON_RELOC_PRIO_MASK		(0xf << 0)
 
 struct drm_radeon_cs_reloc {
-	uint32_t		handle;
-	uint32_t		read_domains;
-	uint32_t		write_domain;
-	uint32_t		flags;
+	__u32		handle;
+	__u32		read_domains;
+	__u32		write_domain;
+	__u32		flags;
 };
 
 struct drm_radeon_cs {
-	uint32_t		num_chunks;
-	uint32_t		cs_id;
-	/* this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		num_chunks;
+	__u32		cs_id;
+	/* this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 	/* updates to the limits after this CS ioctl */
-	uint64_t		gart_limit;
-	uint64_t		vram_limit;
+	__u64		gart_limit;
+	__u64		vram_limit;
 };
 
 #define RADEON_INFO_DEVICE_ID		0x00
@@ -1047,9 +1047,9 @@ struct drm_radeon_cs {
 #define RADEON_INFO_GPU_RESET_COUNTER	0x26
 
 struct drm_radeon_info {
-	uint32_t		request;
-	uint32_t		pad;
-	uint64_t		value;
+	__u32		request;
+	__u32		pad;
+	__u64		value;
 };
 
 /* Those correspond to the tile index to use, this is to explicitly state
diff --git a/lib/libdrm/include/drm/savage_drm.h b/lib/libdrm/include/drm/savage_drm.h
index f7a75eff0..1a91234e6 100644
--- a/lib/libdrm/include/drm/savage_drm.h
+++ b/lib/libdrm/include/drm/savage_drm.h
@@ -26,10 +26,16 @@
 #ifndef __SAVAGE_DRM_H__
 #define __SAVAGE_DRM_H__
 
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 #ifndef __SAVAGE_SAREA_DEFINES__
 #define __SAVAGE_SAREA_DEFINES__
 
-/* 2 heaps (1 for card, 1 for agp), each divided into upto 128
+/* 2 heaps (1 for card, 1 for agp), each divided into up to 128
  * regions, subject to a minimum region size of (1<<16) == 64k.
  *
  * Clients may subdivide regions internally, but when sharing between
@@ -63,10 +69,10 @@ typedef struct _drm_savage_sarea {
 #define DRM_SAVAGE_BCI_EVENT_EMIT	0x02
 #define DRM_SAVAGE_BCI_EVENT_WAIT	0x03
 
-#define DRM_IOCTL_SAVAGE_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t)
-#define DRM_IOCTL_SAVAGE_CMDBUF		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t)
-#define DRM_IOCTL_SAVAGE_EVENT_EMIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t)
-#define DRM_IOCTL_SAVAGE_EVENT_WAIT	DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t)
+#define DRM_IOCTL_SAVAGE_BCI_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t)
+#define DRM_IOCTL_SAVAGE_BCI_CMDBUF		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t)
+#define DRM_IOCTL_SAVAGE_BCI_EVENT_EMIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t)
+#define DRM_IOCTL_SAVAGE_BCI_EVENT_WAIT	DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t)
 
 #define SAVAGE_DMA_PCI	1
 #define SAVAGE_DMA_AGP	3
@@ -207,4 +213,8 @@ union drm_savage_cmd_header {
 	} clear1;		/* SAVAGE_CMD_CLEAR data */
 };
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif
diff --git a/lib/libdrm/include/drm/sis_drm.h b/lib/libdrm/include/drm/sis_drm.h
index 30f7b3827..8e51bb9a5 100644
--- a/lib/libdrm/include/drm/sis_drm.h
+++ b/lib/libdrm/include/drm/sis_drm.h
@@ -27,6 +27,12 @@
 #ifndef __SIS_DRM_H__
 #define __SIS_DRM_H__
 
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 /* SiS specific ioctls */
 #define NOT_USED_0_3
 #define DRM_SIS_FB_ALLOC	0x04
@@ -64,4 +70,8 @@ typedef struct {
 	unsigned int offset, size;
 } drm_sis_fb_t;
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif				/* __SIS_DRM_H__ */
diff --git a/lib/libdrm/include/drm/tegra_drm.h b/lib/libdrm/include/drm/tegra_drm.h
index 7c0fe0ed5..12f9bf848 100644
--- a/lib/libdrm/include/drm/tegra_drm.h
+++ b/lib/libdrm/include/drm/tegra_drm.h
@@ -20,10 +20,14 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#ifndef _UAPI_TEGRA_DRM_H_
-#define _UAPI_TEGRA_DRM_H_
+#ifndef _TEGRA_DRM_H_
+#define _TEGRA_DRM_H_
 
-#include <drm.h>
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
 
 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
@@ -198,4 +202,8 @@ struct drm_tegra_gem_get_flags {
 #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags)
 #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags)
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif
diff --git a/lib/libdrm/include/drm/vc4_drm.h b/lib/libdrm/include/drm/vc4_drm.h
index 6ac4c5c01..3415a4b71 100644
--- a/lib/libdrm/include/drm/vc4_drm.h
+++ b/lib/libdrm/include/drm/vc4_drm.h
@@ -21,8 +21,8 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef _UAPI_VC4_DRM_H_
-#define _UAPI_VC4_DRM_H_
+#ifndef _VC4_DRM_H_
+#define _VC4_DRM_H_
 
 #include "drm.h"
 
@@ -40,6 +40,8 @@ extern "C" {
 #define DRM_VC4_GET_PARAM                         0x07
 #define DRM_VC4_SET_TILING                        0x08
 #define DRM_VC4_GET_TILING                        0x09
+#define DRM_VC4_LABEL_BO                          0x0a
+#define DRM_VC4_GEM_MADVISE                       0x0b
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -51,6 +53,8 @@ extern "C" {
 #define DRM_IOCTL_VC4_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
 #define DRM_IOCTL_VC4_SET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
 #define DRM_IOCTL_VC4_GET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
+#define DRM_IOCTL_VC4_LABEL_BO            DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
+#define DRM_IOCTL_VC4_GEM_MADVISE         DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise)
 
 struct drm_vc4_submit_rcl_surface {
 	__u32 hindex; /* Handle index, or ~0 if not present. */
@@ -153,6 +157,16 @@ struct drm_vc4_submit_cl {
 	__u32 pad:24;
 
 #define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
+/* By default, the kernel gets to choose the order that the tiles are
+ * rendered in.  If this is set, then the tiles will be rendered in a
+ * raster order, with the right-to-left vs left-to-right and
+ * top-to-bottom vs bottom-to-top dictated by
+ * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*.  This allows overlapping
+ * blits to be implemented using the 3D engine.
+ */
+#define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
 	__u32 flags;
 
 	/* Returned value of the seqno of this render job (for the
@@ -292,6 +306,8 @@ struct drm_vc4_get_hang_state {
 #define DRM_VC4_PARAM_SUPPORTS_BRANCHES		3
 #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
+#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
+#define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
 
 struct drm_vc4_get_param {
 	__u32 param;
@@ -311,8 +327,33 @@ struct drm_vc4_set_tiling {
 	__u64 modifier;
 };
 
+/**
+ * struct drm_vc4_label_bo - Attach a name to a BO for debug purposes.
+ */
+struct drm_vc4_label_bo {
+	__u32 handle;
+	__u32 len;
+	__u64 name;
+};
+
+/*
+ * States prefixed with '__' are internal states and cannot be passed to the
+ * DRM_IOCTL_VC4_GEM_MADVISE ioctl.
+ */
+#define VC4_MADV_WILLNEED			0
+#define VC4_MADV_DONTNEED			1
+#define __VC4_MADV_PURGED			2
+#define __VC4_MADV_NOTSUPP			3
+
+struct drm_vc4_gem_madvise {
+	__u32 handle;
+	__u32 madv;
+	__u32 retained;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
 
-#endif /* _UAPI_VC4_DRM_H_ */
+#endif /* _VC4_DRM_H_ */
diff --git a/lib/libdrm/include/drm/via_drm.h b/lib/libdrm/include/drm/via_drm.h
index 182f8792f..8b69e8197 100644
--- a/lib/libdrm/include/drm/via_drm.h
+++ b/lib/libdrm/include/drm/via_drm.h
@@ -26,6 +26,10 @@
 
 #include "drm.h"
 
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
  */
@@ -272,4 +276,8 @@ typedef struct drm_via_dmablit {
 	drm_via_blitsync_t sync;
 } drm_via_dmablit_t;
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif				/* _VIA_DRM_H_ */
diff --git a/lib/libdrm/include/drm/vmwgfx_drm.h b/lib/libdrm/include/drm/vmwgfx_drm.h
index 5b68b4d10..d325a4107 100644
--- a/lib/libdrm/include/drm/vmwgfx_drm.h
+++ b/lib/libdrm/include/drm/vmwgfx_drm.h
@@ -30,6 +30,10 @@
 
 #include "drm.h"
 
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 #define DRM_VMW_MAX_SURFACE_FACES 6
 #define DRM_VMW_MAX_MIP_LEVELS 24
 
@@ -1087,4 +1091,9 @@ union drm_vmw_extended_context_arg {
 	enum drm_vmw_extended_context req;
 	struct drm_vmw_context_arg rep;
 };
+
+#if defined(__cplusplus)
+}
+#endif
+
 #endif
diff --git a/lib/libdrm/libkms/exynos.c b/lib/libdrm/libkms/exynos.c
index 0e97fb519..c20b6b05d 100644
--- a/lib/libdrm/libkms/exynos.c
+++ b/lib/libdrm/libkms/exynos.c
@@ -5,10 +5,24 @@
  *	SooChan Lim <sc1.lim@samsung.com>
  *      Sangjin LEE <lsj119@samsung.com>
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifdef HAVE_CONFIG_H
diff --git a/lib/libdrm/tests/amdgpu/Makefile.am b/lib/libdrm/tests/amdgpu/Makefile.am
index 9e0857844..e79c1bd3f 100644
--- a/lib/libdrm/tests/amdgpu/Makefile.am
+++ b/lib/libdrm/tests/amdgpu/Makefile.am
@@ -1,7 +1,8 @@
 AM_CFLAGS = \
 	-I $(top_srcdir)/include/drm \
 	-I $(top_srcdir)/amdgpu \
-	-I $(top_srcdir)
+	-I $(top_srcdir) \
+	-pthread
 
 LDADD = $(top_builddir)/libdrm.la \
 	$(top_builddir)/amdgpu/libdrm_amdgpu.la \
@@ -27,4 +28,8 @@ amdgpu_test_SOURCES = \
 	vce_tests.c \
 	vce_ib.h \
 	frame.h \
-	vcn_tests.c
+	uvd_enc_tests.c \
+	vcn_tests.c \
+	uve_ib.h \
+	deadlock_tests.c \
+	vm_tests.c
diff --git a/lib/libdrm/tests/amdgpu/amdgpu_test.c b/lib/libdrm/tests/amdgpu/amdgpu_test.c
index 1d44b09ef..8fa3399a4 100644
--- a/lib/libdrm/tests/amdgpu/amdgpu_test.c
+++ b/lib/libdrm/tests/amdgpu/amdgpu_test.c
@@ -49,6 +49,17 @@
 #include "CUnit/Basic.h"
 
 #include "amdgpu_test.h"
+#include "amdgpu_internal.h"
+
+/* Test suit names */
+#define BASIC_TESTS_STR "Basic Tests"
+#define BO_TESTS_STR "BO Tests"
+#define CS_TESTS_STR "CS Tests"
+#define VCE_TESTS_STR "VCE Tests"
+#define VCN_TESTS_STR "VCN Tests"
+#define UVD_ENC_TESTS_STR "UVD ENC Tests"
+#define DEADLOCK_TESTS_STR "Deadlock Tests"
+#define VM_TESTS_STR "VM Tests"
 
 /**
  *  Open handles for amdgpu devices
@@ -62,55 +73,150 @@ int open_render_node = 0;	/* By default run most tests on primary node */
 /** The table of all known test suites to run */
 static CU_SuiteInfo suites[] = {
 	{
-		.pName = "Basic Tests",
+		.pName = BASIC_TESTS_STR,
 		.pInitFunc = suite_basic_tests_init,
 		.pCleanupFunc = suite_basic_tests_clean,
 		.pTests = basic_tests,
 	},
 	{
-		.pName = "BO Tests",
+		.pName = BO_TESTS_STR,
 		.pInitFunc = suite_bo_tests_init,
 		.pCleanupFunc = suite_bo_tests_clean,
 		.pTests = bo_tests,
 	},
 	{
-		.pName = "CS Tests",
+		.pName = CS_TESTS_STR,
 		.pInitFunc = suite_cs_tests_init,
 		.pCleanupFunc = suite_cs_tests_clean,
 		.pTests = cs_tests,
 	},
 	{
-		.pName = "VCE Tests",
+		.pName = VCE_TESTS_STR,
 		.pInitFunc = suite_vce_tests_init,
 		.pCleanupFunc = suite_vce_tests_clean,
 		.pTests = vce_tests,
 	},
 	{
-		.pName = "VCN Tests",
+		.pName = VCN_TESTS_STR,
 		.pInitFunc = suite_vcn_tests_init,
 		.pCleanupFunc = suite_vcn_tests_clean,
 		.pTests = vcn_tests,
 	},
+	{
+		.pName = UVD_ENC_TESTS_STR,
+		.pInitFunc = suite_uvd_enc_tests_init,
+		.pCleanupFunc = suite_uvd_enc_tests_clean,
+		.pTests = uvd_enc_tests,
+	},
+	{
+		.pName = DEADLOCK_TESTS_STR,
+		.pInitFunc = suite_deadlock_tests_init,
+		.pCleanupFunc = suite_deadlock_tests_clean,
+		.pTests = deadlock_tests,
+	},
+	{
+		.pName = VM_TESTS_STR,
+		.pInitFunc = suite_vm_tests_init,
+		.pCleanupFunc = suite_vm_tests_clean,
+		.pTests = vm_tests,
+	},
+
 	CU_SUITE_INFO_NULL,
 };
 
+typedef CU_BOOL (*active__stat_func)(void);
+
+typedef struct Suites_Active_Status {
+	char*             pName;
+	active__stat_func pActive;
+}Suites_Active_Status;
+
+static CU_BOOL always_active()
+{
+	return CU_TRUE;
+}
+
+static Suites_Active_Status suites_active_stat[] = {
+		{
+			.pName = BASIC_TESTS_STR,
+			.pActive = always_active,
+		},
+		{
+			.pName = BO_TESTS_STR,
+			.pActive = always_active,
+		},
+		{
+			.pName = CS_TESTS_STR,
+			.pActive = suite_cs_tests_enable,
+		},
+		{
+			.pName = VCE_TESTS_STR,
+			.pActive = suite_vce_tests_enable,
+		},
+		{
+			.pName = VCN_TESTS_STR,
+			.pActive = suite_vcn_tests_enable,
+		},
+		{
+			.pName = UVD_ENC_TESTS_STR,
+			.pActive = suite_uvd_enc_tests_enable,
+		},
+		{
+			.pName = DEADLOCK_TESTS_STR,
+			.pActive = suite_deadlock_tests_enable,
+		},
+		{
+			.pName = VM_TESTS_STR,
+			.pActive = always_active,
+		},
+};
+
 
-/** Display information about all  suites and their tests */
+/*
+ * Display information about all  suites and their tests
+ *
+ * NOTE: Must be run after registry is initialized and suites registered.
+ */
 static void display_test_suites(void)
 {
 	int iSuite;
 	int iTest;
+	CU_pSuite pSuite = NULL;
+	CU_pTest  pTest  = NULL;
 
 	printf("Suites\n");
 
 	for (iSuite = 0; suites[iSuite].pName != NULL; iSuite++) {
-		printf("Suite id = %d: Name '%s'\n",
-				iSuite + 1, suites[iSuite].pName);
+
+		pSuite = CU_get_suite_by_index((unsigned int) iSuite + 1,
+						      CU_get_registry());
+
+		if (!pSuite) {
+			fprintf(stderr, "Invalid suite id : %d\n", iSuite + 1);
+			continue;
+		}
+
+		printf("Suite id = %d: Name '%s status: %s'\n",
+				iSuite + 1, suites[iSuite].pName,
+				pSuite->fActive ? "ENABLED" : "DISABLED");
+
+
 
 		for (iTest = 0; suites[iSuite].pTests[iTest].pName != NULL;
 			iTest++) {
-			printf("	Test id %d: Name: '%s'\n", iTest + 1,
-					suites[iSuite].pTests[iTest].pName);
+
+			pTest = CU_get_test_by_index((unsigned int) iTest + 1,
+									pSuite);
+
+			if (!pTest) {
+				fprintf(stderr, "Invalid test id : %d\n", iTest + 1);
+				continue;
+			}
+
+			printf("Test id %d: Name: '%s status: %s'\n", iTest + 1,
+					suites[iSuite].pTests[iTest].pName,
+					pSuite->fActive && pTest->fActive ?
+						     "ENABLED" : "DISABLED");
 		}
 	}
 }
@@ -118,7 +224,7 @@ static void display_test_suites(void)
 
 /** Help string for command line parameters */
 static const char usage[] =
-	"Usage: %s [-hlpr] [<-s <suite id>> [-t <test id>]] "
+	"Usage: %s [-hlpr] [<-s <suite id>> [-t <test id>] [-f]] "
 	"[-b <pci_bus_id> [-d <pci_device_id>]]\n"
 	"where:\n"
 	"       l - Display all suites and their tests\n"
@@ -126,9 +232,10 @@ static const char usage[] =
 	"       b - Specify device's PCI bus id to run tests\n"
 	"       d - Specify device's PCI device id to run tests (optional)\n"
 	"       p - Display information of AMDGPU devices in system\n"
+	"       f - Force executing inactive suite or test\n"
 	"       h - Display this help\n";
 /** Specified options strings for getopt */
-static const char options[]   = "hlrps:t:b:d:";
+static const char options[]   = "hlrps:t:b:d:f";
 
 /* Open AMD devices.
  * Return the number of AMD device openned.
@@ -293,6 +400,46 @@ static int amdgpu_find_device(uint8_t bus, uint16_t dev)
 	return -1;
 }
 
+static void amdgpu_disable_suits()
+{
+	amdgpu_device_handle device_handle;
+	uint32_t major_version, minor_version, family_id;
+	int i;
+	int size = sizeof(suites_active_stat) / sizeof(suites_active_stat[0]);
+
+	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+				   &minor_version, &device_handle))
+		return;
+
+	family_id = device_handle->info.family_id;
+
+	if (amdgpu_device_deinitialize(device_handle))
+		return;
+
+	/* Set active status for suits based on their policies */
+	for (i = 0; i < size; ++i)
+		if (amdgpu_set_suite_active(suites_active_stat[i].pName,
+				suites_active_stat[i].pActive()))
+			fprintf(stderr, "suit deactivation failed - %s\n", CU_get_error_msg());
+
+	/* Explicitly disable specific tests due to known bugs or preferences */
+	/*
+	* BUG: Compute ring stalls and never recovers when the address is
+	* written after the command already submitted
+	*/
+	if (amdgpu_set_test_active(DEADLOCK_TESTS_STR, "compute ring block test", CU_FALSE))
+		fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+
+	if (amdgpu_set_test_active(BO_TESTS_STR, "Metadata", CU_FALSE))
+		fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+
+
+	/* This test was ran on GFX8 and GFX9 only */
+	if (family_id < AMDGPU_FAMILY_VI || family_id > AMDGPU_FAMILY_RV)
+		if (amdgpu_set_test_active(BASIC_TESTS_STR, "Sync dependency Test", CU_FALSE))
+			fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+}
+
 /* The main() function for setting up and running the tests.
  * Returns a CUE_SUCCESS on successful running, another
  * CUnit error code on failure.
@@ -309,6 +456,8 @@ int main(int argc, char **argv)
 	CU_pSuite pSuite = NULL;
 	CU_pTest  pTest  = NULL;
 	int test_device_index;
+	int display_list = 0;
+	int force_run = 0;
 
 	for (i = 0; i < MAX_CARDS_SUPPORTED; i++)
 		drm_amdgpu[i] = -1;
@@ -319,8 +468,8 @@ int main(int argc, char **argv)
 	while ((c = getopt(argc, argv, options)) != -1) {
 		switch (c) {
 		case 'l':
-			display_test_suites();
-			exit(EXIT_SUCCESS);
+			display_list = 1;
+			break;
 		case 's':
 			suite_id = atoi(optarg);
 			break;
@@ -339,6 +488,9 @@ int main(int argc, char **argv)
 		case 'r':
 			open_render_node = 1;
 			break;
+		case 'f':
+			force_run = 1;
+			break;
 		case '?':
 		case 'h':
 			fprintf(stderr, usage, argv[0]);
@@ -404,17 +556,33 @@ int main(int argc, char **argv)
 	/* Run tests using the CUnit Basic interface */
 	CU_basic_set_mode(CU_BRM_VERBOSE);
 
+	/* Disable suits and individual tests based on misc. conditions */
+	amdgpu_disable_suits();
+
+	if (display_list) {
+		display_test_suites();
+		goto end;
+	}
+
 	if (suite_id != -1) {	/* If user specify particular suite? */
 		pSuite = CU_get_suite_by_index((unsigned int) suite_id,
 						CU_get_registry());
 
 		if (pSuite) {
+
+			if (force_run)
+				CU_set_suite_active(pSuite, CU_TRUE);
+
 			if (test_id != -1) {   /* If user specify test id */
 				pTest = CU_get_test_by_index(
 						(unsigned int) test_id,
 						pSuite);
-				if (pTest)
+				if (pTest) {
+					if (force_run)
+						CU_set_test_active(pTest, CU_TRUE);
+
 					CU_basic_run_test(pSuite, pTest);
+				}
 				else {
 					fprintf(stderr, "Invalid test id: %d\n",
 								test_id);
@@ -434,6 +602,7 @@ int main(int argc, char **argv)
 	} else
 		CU_basic_run_tests();
 
+end:
 	CU_cleanup_registry();
 	amdgpu_close_devices();
 	return CU_get_error();
diff --git a/lib/libdrm/tests/amdgpu/amdgpu_test.h b/lib/libdrm/tests/amdgpu/amdgpu_test.h
index c75a07a44..3238e05f6 100644
--- a/lib/libdrm/tests/amdgpu/amdgpu_test.h
+++ b/lib/libdrm/tests/amdgpu/amdgpu_test.h
@@ -85,6 +85,11 @@ int suite_cs_tests_init();
 int suite_cs_tests_clean();
 
 /**
+ * Decide if the suite is enabled by default or not.
+ */
+CU_BOOL suite_cs_tests_enable(void);
+
+/**
  * Tests in cs test suite
  */
 extern CU_TestInfo cs_tests[];
@@ -100,6 +105,11 @@ int suite_vce_tests_init();
 int suite_vce_tests_clean();
 
 /**
+ * Decide if the suite is enabled by default or not.
+ */
+CU_BOOL suite_vce_tests_enable(void);
+
+/**
  * Tests in vce test suite
  */
 extern CU_TestInfo vce_tests[];
@@ -115,11 +125,71 @@ int suite_vcn_tests_init();
 int suite_vcn_tests_clean();
 
 /**
+ * Decide if the suite is enabled by default or not.
+ */
+CU_BOOL suite_vcn_tests_enable(void);
+
+/**
 + * Tests in vcn test suite
 + */
 extern CU_TestInfo vcn_tests[];
 
 /**
+ * Initialize uvd enc test suite
+ */
+int suite_uvd_enc_tests_init();
+
+/**
+ * Deinitialize uvd enc test suite
+ */
+int suite_uvd_enc_tests_clean();
+
+/**
+ * Decide if the suite is enabled by default or not.
+ */
+CU_BOOL suite_uvd_enc_tests_enable(void);
+
+/**
+ * Tests in uvd enc test suite
+ */
+extern CU_TestInfo uvd_enc_tests[];
+
+/**
+ * Initialize deadlock test suite
+ */
+int suite_deadlock_tests_init();
+
+/**
+ * Deinitialize deadlock test suite
+ */
+int suite_deadlock_tests_clean();
+
+/**
+ * Decide if the suite is enabled by default or not.
+ */
+CU_BOOL suite_deadlock_tests_enable(void);
+
+/**
+ * Tests in uvd enc test suite
+ */
+extern CU_TestInfo deadlock_tests[];
+
+/**
+ * Initialize vm test suite
+ */
+int suite_vm_tests_init();
+
+/**
+ * Deinitialize deadlock test suite
+ */
+int suite_vm_tests_clean();
+
+/**
+ * Tests in vm test suite
+ */
+extern CU_TestInfo vm_tests[];
+
+/**
  * Helper functions
  */
 static inline amdgpu_bo_handle gpu_mem_alloc(
@@ -251,4 +321,35 @@ amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1,
 	return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list);
 }
 
+
+static inline CU_ErrorCode amdgpu_set_suite_active(const char *suit_name,
+							  CU_BOOL active)
+{
+	CU_ErrorCode r = CU_set_suite_active(CU_get_suite(suit_name), active);
+
+	if (r != CUE_SUCCESS)
+		fprintf(stderr, "Failed to obtain suite %s\n", suit_name);
+
+	return r;
+}
+
+static inline CU_ErrorCode amdgpu_set_test_active(const char *suit_name,
+				  const char *test_name, CU_BOOL active)
+{
+	CU_ErrorCode r;
+	CU_pSuite pSuite = CU_get_suite(suit_name);
+
+	if (!pSuite) {
+		fprintf(stderr, "Failed to obtain suite %s\n",
+				suit_name);
+		return CUE_NOSUITE;
+	}
+
+	r = CU_set_test_active(CU_get_test(pSuite, test_name), active);
+	if (r != CUE_SUCCESS)
+		fprintf(stderr, "Failed to obtain test %s\n", test_name);
+
+	return r;
+}
+
 #endif  /* #ifdef _AMDGPU_TEST_H_ */
diff --git a/lib/libdrm/tests/amdgpu/basic_tests.c b/lib/libdrm/tests/amdgpu/basic_tests.c
index 8d5844b67..474a679c0 100644
--- a/lib/libdrm/tests/amdgpu/basic_tests.c
+++ b/lib/libdrm/tests/amdgpu/basic_tests.c
@@ -31,6 +31,7 @@
 #ifdef HAVE_ALLOCA_H
 # include <alloca.h>
 #endif
+#include <sys/wait.h>
 
 #include "CUnit/Basic.h"
 
@@ -40,15 +41,16 @@
 static  amdgpu_device_handle device_handle;
 static  uint32_t  major_version;
 static  uint32_t  minor_version;
+static  uint32_t  family_id;
 
 static void amdgpu_query_info_test(void);
-static void amdgpu_memory_alloc(void);
 static void amdgpu_command_submission_gfx(void);
 static void amdgpu_command_submission_compute(void);
 static void amdgpu_command_submission_multi_fence(void);
 static void amdgpu_command_submission_sdma(void);
 static void amdgpu_userptr_test(void);
 static void amdgpu_semaphore_test(void);
+static void amdgpu_sync_dependency_test(void);
 
 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
@@ -56,13 +58,13 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
 
 CU_TestInfo basic_tests[] = {
 	{ "Query Info Test",  amdgpu_query_info_test },
-	{ "Memory alloc Test",  amdgpu_memory_alloc },
 	{ "Userptr Test",  amdgpu_userptr_test },
 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
 	{ "SW semaphore Test",  amdgpu_semaphore_test },
+	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
 	CU_TEST_INFO_NULL,
 };
 #define BUFFER_SIZE (8 * 1024)
@@ -199,22 +201,110 @@ CU_TestInfo basic_tests[] = {
 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
 
+#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
+						(((b) & 0x1) << 26) |		\
+						(((t) & 0x1) << 23) |		\
+						(((s) & 0x1) << 22) |		\
+						(((cnt) & 0xFFFFF) << 0))
+#define	SDMA_OPCODE_COPY_SI	3
+#define SDMA_OPCODE_CONSTANT_FILL_SI	13
+#define SDMA_NOP_SI  0xf
+#define GFX_COMPUTE_NOP_SI 0x80000000
+#define	PACKET3_DMA_DATA_SI	0x41
+#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
+		/* 0 - ME
+		 * 1 - PFP
+		 */
+#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
+		/* 0 - DST_ADDR using DAS
+		 * 1 - GDS
+		 * 3 - DST_ADDR using L2
+		 */
+#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
+		/* 0 - SRC_ADDR using SAS
+		 * 1 - GDS
+		 * 2 - DATA
+		 * 3 - SRC_ADDR using L2
+		 */
+#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
+
+
+#define PKT3_CONTEXT_CONTROL                   0x28
+#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
+#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
+#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
+
+#define PKT3_CLEAR_STATE                       0x12
+
+#define PKT3_SET_SH_REG                        0x76
+#define		PACKET3_SET_SH_REG_START			0x00002c00
+
+#define	PACKET3_DISPATCH_DIRECT				0x15
+
+
+/* gfx 8 */
+#define mmCOMPUTE_PGM_LO                                                        0x2e0c
+#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
+#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
+#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
+#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
+#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
+#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
+
+
+
+#define SWAP_32(num) ((num>>24)&0xff) | \
+			((num<<8)&0xff0000) | \
+			((num>>8)&0xff00) | \
+			((num<<24)&0xff000000)
+
+
+/* Shader code
+ * void main()
+{
+
+	float x = some_input;
+		for (unsigned i = 0; i < 1000000; i++)
+  	x = sin(x);
+
+	u[0] = 42u;
+}
+*/
+
+static  uint32_t shader_bin[] = {
+	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
+	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
+	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
+	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
+};
+
+#define CODE_OFFSET 512
+#define DATA_OFFSET 1024
+
+
 int suite_basic_tests_init(void)
 {
+	struct amdgpu_gpu_info gpu_info = {0};
 	int r;
 
 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
 				   &minor_version, &device_handle);
 
-	if (r == 0)
-		return CUE_SUCCESS;
-	else {
+	if (r) {
 		if ((r == -EACCES) && (errno == EACCES))
 			printf("\n\nError:%s. "
 				"Hint:Try to run this test program as root.",
 				strerror(errno));
 		return CUE_SINIT_FAILED;
 	}
+
+	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+	if (r)
+		return CUE_SINIT_FAILED;
+
+	family_id = gpu_info.family_id;
+
+	return CUE_SUCCESS;
 }
 
 int suite_basic_tests_clean(void)
@@ -241,53 +331,6 @@ static void amdgpu_query_info_test(void)
 	CU_ASSERT_EQUAL(r, 0);
 }
 
-static void amdgpu_memory_alloc(void)
-{
-	amdgpu_bo_handle bo;
-	amdgpu_va_handle va_handle;
-	uint64_t bo_mc;
-	int r;
-
-	/* Test visible VRAM */
-	bo = gpu_mem_alloc(device_handle,
-			4096, 4096,
-			AMDGPU_GEM_DOMAIN_VRAM,
-			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
-			&bo_mc, &va_handle);
-
-	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
-	CU_ASSERT_EQUAL(r, 0);
-
-	/* Test invisible VRAM */
-	bo = gpu_mem_alloc(device_handle,
-			4096, 4096,
-			AMDGPU_GEM_DOMAIN_VRAM,
-			AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
-			&bo_mc, &va_handle);
-
-	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
-	CU_ASSERT_EQUAL(r, 0);
-
-	/* Test GART Cacheable */
-	bo = gpu_mem_alloc(device_handle,
-			4096, 4096,
-			AMDGPU_GEM_DOMAIN_GTT,
-			0, &bo_mc, &va_handle);
-
-	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
-	CU_ASSERT_EQUAL(r, 0);
-
-	/* Test GART USWC */
-	bo = gpu_mem_alloc(device_handle,
-			4096, 4096,
-			AMDGPU_GEM_DOMAIN_GTT,
-			AMDGPU_GEM_CREATE_CPU_GTT_USWC,
-			&bo_mc, &va_handle);
-
-	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
-	CU_ASSERT_EQUAL(r, 0);
-}
-
 static void amdgpu_command_submission_gfx_separate_ibs(void)
 {
 	amdgpu_context_handle context_handle;
@@ -301,7 +344,7 @@ static void amdgpu_command_submission_gfx_separate_ibs(void)
 	uint32_t expired;
 	amdgpu_bo_list_handle bo_list;
 	amdgpu_va_handle va_handle, va_handle_ce;
-	int r;
+	int r, i = 0;
 
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	CU_ASSERT_EQUAL(r, 0);
@@ -326,12 +369,14 @@ static void amdgpu_command_submission_gfx_separate_ibs(void)
 
 	/* IT_SET_CE_DE_COUNTERS */
 	ptr = ib_result_ce_cpu;
-	ptr[0] = 0xc0008900;
-	ptr[1] = 0;
-	ptr[2] = 0xc0008400;
-	ptr[3] = 1;
+	if (family_id != AMDGPU_FAMILY_SI) {
+		ptr[i++] = 0xc0008900;
+		ptr[i++] = 0;
+	}
+	ptr[i++] = 0xc0008400;
+	ptr[i++] = 1;
 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
-	ib_info[0].size = 4;
+	ib_info[0].size = i;
 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
 
 	/* IT_WAIT_ON_CE_COUNTER */
@@ -390,7 +435,7 @@ static void amdgpu_command_submission_gfx_shared_ib(void)
 	uint32_t expired;
 	amdgpu_bo_list_handle bo_list;
 	amdgpu_va_handle va_handle;
-	int r;
+	int r, i = 0;
 
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	CU_ASSERT_EQUAL(r, 0);
@@ -409,12 +454,14 @@ static void amdgpu_command_submission_gfx_shared_ib(void)
 
 	/* IT_SET_CE_DE_COUNTERS */
 	ptr = ib_result_cpu;
-	ptr[0] = 0xc0008900;
-	ptr[1] = 0;
-	ptr[2] = 0xc0008400;
-	ptr[3] = 1;
+	if (family_id != AMDGPU_FAMILY_SI) {
+		ptr[i++] = 0xc0008900;
+		ptr[i++] = 0;
+	}
+	ptr[i++] = 0xc0008400;
+	ptr[i++] = 1;
 	ib_info[0].ib_mc_address = ib_result_mc_address;
-	ib_info[0].size = 4;
+	ib_info[0].size = i;
 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
 
 	ptr = (uint32_t *)ib_result_cpu + 4;
@@ -495,10 +542,19 @@ static void amdgpu_semaphore_test(void)
 	struct amdgpu_cs_fence fence_status = {0};
 	uint32_t *ptr;
 	uint32_t expired;
+	uint32_t sdma_nop, gfx_nop;
 	amdgpu_bo_list_handle bo_list[2];
 	amdgpu_va_handle va_handle[2];
 	int r, i;
 
+	if (family_id == AMDGPU_FAMILY_SI) {
+		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
+		gfx_nop = GFX_COMPUTE_NOP_SI;
+	} else {
+		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
+		gfx_nop = GFX_COMPUTE_NOP;
+	}
+
 	r = amdgpu_cs_create_semaphore(&sem);
 	CU_ASSERT_EQUAL(r, 0);
 	for (i = 0; i < 2; i++) {
@@ -518,7 +574,7 @@ static void amdgpu_semaphore_test(void)
 
 	/* 1. same context different engine */
 	ptr = ib_result_cpu[0];
-	ptr[0] = SDMA_NOP;
+	ptr[0] = sdma_nop;
 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
 	ib_info[0].size = 1;
 
@@ -535,7 +591,7 @@ static void amdgpu_semaphore_test(void)
 	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
 	CU_ASSERT_EQUAL(r, 0);
 	ptr = ib_result_cpu[1];
-	ptr[0] = GFX_COMPUTE_NOP;
+	ptr[0] = gfx_nop;
 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
 	ib_info[1].size = 1;
 
@@ -559,7 +615,7 @@ static void amdgpu_semaphore_test(void)
 
 	/* 2. same engine different context */
 	ptr = ib_result_cpu[0];
-	ptr[0] = GFX_COMPUTE_NOP;
+	ptr[0] = gfx_nop;
 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
 	ib_info[0].size = 1;
 
@@ -576,7 +632,7 @@ static void amdgpu_semaphore_test(void)
 	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
 	CU_ASSERT_EQUAL(r, 0);
 	ptr = ib_result_cpu[1];
-	ptr[0] = GFX_COMPUTE_NOP;
+	ptr[0] = gfx_nop;
 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
 	ib_info[1].size = 1;
 
@@ -597,6 +653,7 @@ static void amdgpu_semaphore_test(void)
 					 500000000, 0, &expired);
 	CU_ASSERT_EQUAL(r, 0);
 	CU_ASSERT_EQUAL(expired, true);
+
 	for (i = 0; i < 2; i++) {
 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
 					     ib_result_mc_address[i], 4096);
@@ -627,11 +684,15 @@ static void amdgpu_command_submission_compute_nop(void)
 	int i, r, instance;
 	amdgpu_bo_list_handle bo_list;
 	amdgpu_va_handle va_handle;
+	struct drm_amdgpu_info_hw_ip info;
+
+	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
+	CU_ASSERT_EQUAL(r, 0);
 
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	CU_ASSERT_EQUAL(r, 0);
 
-	for (instance = 0; instance < 8; instance++) {
+	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
 					    AMDGPU_GEM_DOMAIN_GTT, 0,
 					    &ib_result_handle, &ib_result_cpu,
@@ -643,8 +704,8 @@ static void amdgpu_command_submission_compute_nop(void)
 		CU_ASSERT_EQUAL(r, 0);
 
 		ptr = ib_result_cpu;
-		for (i = 0; i < 16; ++i)
-			ptr[i] = 0xffff1000;
+		memset(ptr, 0, 16);
+		ptr[0]=PACKET3(PACKET3_NOP, 14);
 
 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
 		ib_info.ib_mc_address = ib_result_mc_address;
@@ -805,16 +866,12 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
-	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo_mc;
 	volatile uint32_t *bo_cpu;
 	int i, j, r, loop;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle va_handle;
 
-	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
-	CU_ASSERT_EQUAL(r, 0);
-
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
 
@@ -850,13 +907,17 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
 		/* fulfill PM4: test DMA write-linear */
 		i = j = 0;
 		if (ip_type == AMDGPU_HW_IP_DMA) {
-			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
-					       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+			if (family_id == AMDGPU_FAMILY_SI)
+				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
+							  sdma_write_length);
+			else
+				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+						       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 			pm4[i++] = 0xffffffff & bo_mc;
 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
-			if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+			if (family_id >= AMDGPU_FAMILY_AI)
 				pm4[i++] = sdma_write_length - 1;
-			else
+			else if (family_id != AMDGPU_FAMILY_SI)
 				pm4[i++] = sdma_write_length;
 			while(j++ < sdma_write_length)
 				pm4[i++] = 0xdeadbeaf;
@@ -913,16 +974,12 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
-	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo_mc;
 	volatile uint32_t *bo_cpu;
 	int i, j, r, loop;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle va_handle;
 
-	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
-	CU_ASSERT_EQUAL(r, 0);
-
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
 
@@ -957,27 +1014,47 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
 		/* fulfill PM4: test DMA const fill */
 		i = j = 0;
 		if (ip_type == AMDGPU_HW_IP_DMA) {
-			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
-					       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
-			pm4[i++] = 0xffffffff & bo_mc;
-			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
-			pm4[i++] = 0xdeadbeaf;
-			if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
-				pm4[i++] = sdma_write_length - 1;
-			else
-				pm4[i++] = sdma_write_length;
+			if (family_id == AMDGPU_FAMILY_SI) {
+				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 0, 0, 0,
+							  sdma_write_length / 4);
+				pm4[i++] = 0xfffffffc & bo_mc;
+				pm4[i++] = 0xdeadbeaf;
+				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
+			} else {
+				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
+						       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
+				pm4[i++] = 0xffffffff & bo_mc;
+				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
+				pm4[i++] = 0xdeadbeaf;
+				if (family_id >= AMDGPU_FAMILY_AI)
+					pm4[i++] = sdma_write_length - 1;
+				else
+					pm4[i++] = sdma_write_length;
+			}
 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
 			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
-			pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
-			pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
-				PACKET3_DMA_DATA_DST_SEL(0) |
-				PACKET3_DMA_DATA_SRC_SEL(2) |
-				PACKET3_DMA_DATA_CP_SYNC;
-			pm4[i++] = 0xdeadbeaf;
-			pm4[i++] = 0;
-			pm4[i++] = 0xfffffffc & bo_mc;
-			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
-			pm4[i++] = sdma_write_length;
+			if (family_id == AMDGPU_FAMILY_SI) {
+				pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
+				pm4[i++] = 0xdeadbeaf;
+				pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
+					PACKET3_DMA_DATA_SI_DST_SEL(0) |
+					PACKET3_DMA_DATA_SI_SRC_SEL(2) |
+					PACKET3_DMA_DATA_SI_CP_SYNC;
+				pm4[i++] = 0xffffffff & bo_mc;
+				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
+				pm4[i++] = sdma_write_length;
+			} else {
+				pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
+				pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
+					PACKET3_DMA_DATA_DST_SEL(0) |
+					PACKET3_DMA_DATA_SRC_SEL(2) |
+					PACKET3_DMA_DATA_CP_SYNC;
+				pm4[i++] = 0xdeadbeaf;
+				pm4[i++] = 0;
+				pm4[i++] = 0xfffffffc & bo_mc;
+				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
+				pm4[i++] = sdma_write_length;
+			}
 		}
 
 		amdgpu_test_exec_cs_helper(context_handle,
@@ -1023,16 +1100,12 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
-	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo1_mc, bo2_mc;
 	volatile unsigned char *bo1_cpu, *bo2_cpu;
 	int i, j, r, loop1, loop2;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
 
-	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
-	CU_ASSERT_EQUAL(r, 0);
-
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
 
@@ -1083,28 +1156,51 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 			/* fulfill PM4: test DMA copy linear */
 			i = j = 0;
 			if (ip_type == AMDGPU_HW_IP_DMA) {
-				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
-				if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
-					pm4[i++] = sdma_write_length - 1;
-				else
-					pm4[i++] = sdma_write_length;
-				pm4[i++] = 0;
-				pm4[i++] = 0xffffffff & bo1_mc;
-				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
-				pm4[i++] = 0xffffffff & bo2_mc;
-				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
+				if (family_id == AMDGPU_FAMILY_SI) {
+					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
+								  sdma_write_length);
+					pm4[i++] = 0xffffffff & bo2_mc;
+					pm4[i++] = 0xffffffff & bo1_mc;
+					pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
+					pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
+				} else {
+					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
+					if (family_id >= AMDGPU_FAMILY_AI)
+						pm4[i++] = sdma_write_length - 1;
+					else
+						pm4[i++] = sdma_write_length;
+					pm4[i++] = 0;
+					pm4[i++] = 0xffffffff & bo1_mc;
+					pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
+					pm4[i++] = 0xffffffff & bo2_mc;
+					pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
+				}
+
 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
-				pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
-				pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
-					PACKET3_DMA_DATA_DST_SEL(0) |
-					PACKET3_DMA_DATA_SRC_SEL(0) |
-					PACKET3_DMA_DATA_CP_SYNC;
-				pm4[i++] = 0xfffffffc & bo1_mc;
-				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
-				pm4[i++] = 0xfffffffc & bo2_mc;
-				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
-				pm4[i++] = sdma_write_length;
+				if (family_id == AMDGPU_FAMILY_SI) {
+					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
+					pm4[i++] = 0xfffffffc & bo1_mc;
+					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
+						PACKET3_DMA_DATA_SI_DST_SEL(0) |
+						PACKET3_DMA_DATA_SI_SRC_SEL(0) |
+						PACKET3_DMA_DATA_SI_CP_SYNC |
+						(0xffff00000000 & bo1_mc) >> 32;
+					pm4[i++] = 0xfffffffc & bo2_mc;
+					pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
+					pm4[i++] = sdma_write_length;
+				} else {
+					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
+					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
+						PACKET3_DMA_DATA_DST_SEL(0) |
+						PACKET3_DMA_DATA_SRC_SEL(0) |
+						PACKET3_DMA_DATA_CP_SYNC;
+					pm4[i++] = 0xfffffffc & bo1_mc;
+					pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
+					pm4[i++] = 0xfffffffc & bo2_mc;
+					pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
+					pm4[i++] = sdma_write_length;
+				}
 			}
 
 			amdgpu_test_exec_cs_helper(context_handle,
@@ -1165,7 +1261,7 @@ static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
 	amdgpu_bo_list_handle bo_list;
 	amdgpu_va_handle va_handle, va_handle_ce;
 	int r;
-	int i, ib_cs_num = 2;
+	int i = 0, ib_cs_num = 2;
 
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	CU_ASSERT_EQUAL(r, 0);
@@ -1190,12 +1286,14 @@ static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
 
 	/* IT_SET_CE_DE_COUNTERS */
 	ptr = ib_result_ce_cpu;
-	ptr[0] = 0xc0008900;
-	ptr[1] = 0;
-	ptr[2] = 0xc0008400;
-	ptr[3] = 1;
+	if (family_id != AMDGPU_FAMILY_SI) {
+		ptr[i++] = 0xc0008900;
+		ptr[i++] = 0;
+	}
+	ptr[i++] = 0xc0008400;
+	ptr[i++] = 1;
 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
-	ib_info[0].size = 4;
+	ib_info[0].size = i;
 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
 
 	/* IT_WAIT_ON_CE_COUNTER */
@@ -1296,15 +1394,28 @@ static void amdgpu_userptr_test(void)
 	handle = buf_handle;
 
 	j = i = 0;
-	pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
-			       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+
+	if (family_id == AMDGPU_FAMILY_SI)
+		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
+				sdma_write_length);
+	else
+		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 	pm4[i++] = 0xffffffff & bo_mc;
 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
-	pm4[i++] = sdma_write_length;
+	if (family_id >= AMDGPU_FAMILY_AI)
+		pm4[i++] = sdma_write_length - 1;
+	else if (family_id != AMDGPU_FAMILY_SI)
+		pm4[i++] = sdma_write_length;
 
 	while (j++ < sdma_write_length)
 		pm4[i++] = 0xdeadbeaf;
 
+	if (!fork()) {
+		pm4[0] = 0x0;
+		exit(0);
+	}
+
 	amdgpu_test_exec_cs_helper(context_handle,
 				   AMDGPU_HW_IP_DMA, 0,
 				   i, pm4,
@@ -1328,4 +1439,212 @@ static void amdgpu_userptr_test(void)
 
 	r = amdgpu_cs_ctx_free(context_handle);
 	CU_ASSERT_EQUAL(r, 0);
+
+	wait(NULL);
+}
+
+static void amdgpu_sync_dependency_test(void)
+{
+	amdgpu_context_handle context_handle[2];
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	struct amdgpu_cs_fence fence_status;
+	uint32_t expired;
+	int i, j, r, instance;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	static uint32_t *ptr;
+	uint64_t seq_no;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
+	CU_ASSERT_EQUAL(r, 0);
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
+			AMDGPU_GEM_DOMAIN_GTT, 0,
+						    &ib_result_handle, &ib_result_cpu,
+						    &ib_result_mc_address, &va_handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+			       &bo_list);
+	CU_ASSERT_EQUAL(r, 0);
+
+	ptr = ib_result_cpu;
+	i = 0;
+
+	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
+
+	/* Dispatch minimal init config and verify it's executed */
+	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
+	ptr[i++] = 0x80000000;
+	ptr[i++] = 0x80000000;
+
+	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
+	ptr[i++] = 0x80000000;
+
+
+	/* Program compute regs */
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
+	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
+	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
+	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
+
+
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
+	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
+	/*
+	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
+	                                      SGPRS = 1
+	                                      PRIORITY = 0
+	                                      FLOAT_MODE = 192 (0xc0)
+	                                      PRIV = 0
+	                                      DX10_CLAMP = 1
+	                                      DEBUG_MODE = 0
+	                                      IEEE_MODE = 0
+	                                      BULKY = 0
+	                                      CDBG_USER = 0
+	 *
+	 */
+	ptr[i++] = 0x002c0040;
+
+
+	/*
+	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
+	                                      USER_SGPR = 8
+	                                      TRAP_PRESENT = 0
+	                                      TGID_X_EN = 0
+	                                      TGID_Y_EN = 0
+	                                      TGID_Z_EN = 0
+	                                      TG_SIZE_EN = 0
+	                                      TIDIG_COMP_CNT = 0
+	                                      EXCP_EN_MSB = 0
+	                                      LDS_SIZE = 0
+	                                      EXCP_EN = 0
+	 *
+	 */
+	ptr[i++] = 0x00000010;
+
+
+/*
+ * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
+                                         WAVESIZE = 0
+ *
+ */
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
+	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
+	ptr[i++] = 0x00000100;
+
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
+	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
+	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
+	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
+
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
+	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
+	ptr[i++] = 0;
+
+	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
+	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
+	ptr[i++] = 1;
+	ptr[i++] = 1;
+	ptr[i++] = 1;
+
+
+	/* Dispatch */
+	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+	ptr[i++] = 1;
+	ptr[i++] = 1;
+	ptr[i++] = 1;
+	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
+
+
+	while (i & 7)
+		ptr[i++] =  0xffff1000; /* type3 nop packet */
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address;
+	ib_info.size = i;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+	ibs_request.fence_info.handle = NULL;
+
+	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
+	CU_ASSERT_EQUAL(r, 0);
+	seq_no = ibs_request.seq_no;
+
+
+
+	/* Prepare second command with dependency on the first */
+	j = i;
+	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
+	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+	ptr[i++] = 0xfffffffc & ib_result_mc_address + DATA_OFFSET * 4;
+	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
+	ptr[i++] = 99;
+
+	while (i & 7)
+		ptr[i++] =  0xffff1000; /* type3 nop packet */
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
+	ib_info.size = i - j;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+	ibs_request.fence_info.handle = NULL;
+
+	ibs_request.number_of_dependencies = 1;
+
+	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
+	ibs_request.dependencies[0].context = context_handle[1];
+	ibs_request.dependencies[0].ip_instance = 0;
+	ibs_request.dependencies[0].ring = 0;
+	ibs_request.dependencies[0].fence = seq_no;
+
+
+	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
+	CU_ASSERT_EQUAL(r, 0);
+
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle[0];
+	fence_status.ip_type = AMDGPU_HW_IP_GFX;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = ibs_request.seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
+	CU_ASSERT_EQUAL(r, 0);
+
+	/* Expect the second command to wait for shader to complete */
+	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_cs_ctx_free(context_handle[0]);
+	CU_ASSERT_EQUAL(r, 0);
+	r = amdgpu_cs_ctx_free(context_handle[1]);
+	CU_ASSERT_EQUAL(r, 0);
+
+	free(ibs_request.dependencies);
 }
diff --git a/lib/libdrm/tests/amdgpu/bo_tests.c b/lib/libdrm/tests/amdgpu/bo_tests.c
index 74b5e77b2..24698bcb0 100644
--- a/lib/libdrm/tests/amdgpu/bo_tests.c
+++ b/lib/libdrm/tests/amdgpu/bo_tests.c
@@ -46,13 +46,15 @@ static amdgpu_va_handle va_handle;
 static void amdgpu_bo_export_import(void);
 static void amdgpu_bo_metadata(void);
 static void amdgpu_bo_map_unmap(void);
+static void amdgpu_memory_alloc(void);
+static void amdgpu_mem_fail_alloc(void);
 
 CU_TestInfo bo_tests[] = {
 	{ "Export/Import",  amdgpu_bo_export_import },
-#if 0
 	{ "Metadata",  amdgpu_bo_metadata },
-#endif
 	{ "CPU map/unmap",  amdgpu_bo_map_unmap },
+	{ "Memory alloc Test",  amdgpu_memory_alloc },
+	{ "Memory fail alloc Test",  amdgpu_mem_fail_alloc },
 	CU_TEST_INFO_NULL,
 };
 
@@ -195,3 +197,72 @@ static void amdgpu_bo_map_unmap(void)
 	r = amdgpu_bo_cpu_unmap(buffer_handle);
 	CU_ASSERT_EQUAL(r, 0);
 }
+
+static void amdgpu_memory_alloc(void)
+{
+	amdgpu_bo_handle bo;
+	amdgpu_va_handle va_handle;
+	uint64_t bo_mc;
+	int r;
+
+	/* Test visible VRAM */
+	bo = gpu_mem_alloc(device_handle,
+			4096, 4096,
+			AMDGPU_GEM_DOMAIN_VRAM,
+			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+			&bo_mc, &va_handle);
+
+	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
+	CU_ASSERT_EQUAL(r, 0);
+
+	/* Test invisible VRAM */
+	bo = gpu_mem_alloc(device_handle,
+			4096, 4096,
+			AMDGPU_GEM_DOMAIN_VRAM,
+			AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
+			&bo_mc, &va_handle);
+
+	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
+	CU_ASSERT_EQUAL(r, 0);
+
+	/* Test GART Cacheable */
+	bo = gpu_mem_alloc(device_handle,
+			4096, 4096,
+			AMDGPU_GEM_DOMAIN_GTT,
+			0, &bo_mc, &va_handle);
+
+	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
+	CU_ASSERT_EQUAL(r, 0);
+
+	/* Test GART USWC */
+	bo = gpu_mem_alloc(device_handle,
+			4096, 4096,
+			AMDGPU_GEM_DOMAIN_GTT,
+			AMDGPU_GEM_CREATE_CPU_GTT_USWC,
+			&bo_mc, &va_handle);
+
+	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
+	CU_ASSERT_EQUAL(r, 0);
+}
+
+static void amdgpu_mem_fail_alloc(void)
+{
+	amdgpu_bo_handle bo;
+	int r;
+	struct amdgpu_bo_alloc_request req = {0};
+	amdgpu_bo_handle buf_handle;
+
+	/* Test impossible mem allocation, 1TB */
+	req.alloc_size = 0xE8D4A51000;
+	req.phys_alignment = 4096;
+	req.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;
+	req.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+
+	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
+	CU_ASSERT_EQUAL(r, -ENOMEM);
+
+	if (!r) {
+		r = amdgpu_bo_free(bo);
+		CU_ASSERT_EQUAL(r, 0);
+	}
+}
diff --git a/lib/libdrm/tests/amdgpu/cs_tests.c b/lib/libdrm/tests/amdgpu/cs_tests.c
index 081ec9c26..4880b74f8 100644
--- a/lib/libdrm/tests/amdgpu/cs_tests.c
+++ b/lib/libdrm/tests/amdgpu/cs_tests.c
@@ -66,6 +66,26 @@ CU_TestInfo cs_tests[] = {
 	CU_TEST_INFO_NULL,
 };
 
+CU_BOOL suite_cs_tests_enable(void)
+{
+	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+					     &minor_version, &device_handle))
+		return CU_FALSE;
+
+	family_id = device_handle->info.family_id;
+
+	if (amdgpu_device_deinitialize(device_handle))
+		return CU_FALSE;
+
+
+	if (family_id >= AMDGPU_FAMILY_RV || family_id == AMDGPU_FAMILY_SI) {
+		printf("\n\nThe ASIC NOT support UVD, suite disabled\n");
+		return CU_FALSE;
+	}
+
+	return CU_TRUE;
+}
+
 int suite_cs_tests_init(void)
 {
 	amdgpu_bo_handle ib_result_handle;
diff --git a/lib/libdrm/tests/amdgpu/deadlock_tests.c b/lib/libdrm/tests/amdgpu/deadlock_tests.c
new file mode 100644
index 000000000..84f4debe3
--- /dev/null
+++ b/lib/libdrm/tests/amdgpu/deadlock_tests.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#ifdef HAVE_ALLOCA_H
+# include <alloca.h>
+#endif
+
+#include "CUnit/Basic.h"
+
+#include "amdgpu_test.h"
+#include "amdgpu_drm.h"
+#include "amdgpu_internal.h"
+
+#include <pthread.h>
+
+
+/*
+ * This defines the delay in MS after which memory location designated for
+ * compression against reference value is written to, unblocking command
+ * processor
+ */
+#define WRITE_MEM_ADDRESS_DELAY_MS 100
+
+#define	PACKET_TYPE3	3
+
+#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
+			 (((op) & 0xFF) << 8) |				\
+			 ((n) & 0x3FFF) << 16)
+
+#define	PACKET3_WAIT_REG_MEM				0x3C
+#define		WAIT_REG_MEM_FUNCTION(x)                ((x) << 0)
+		/* 0 - always
+		 * 1 - <
+		 * 2 - <=
+		 * 3 - ==
+		 * 4 - !=
+		 * 5 - >=
+		 * 6 - >
+		 */
+#define		WAIT_REG_MEM_MEM_SPACE(x)               ((x) << 4)
+		/* 0 - reg
+		 * 1 - mem
+		 */
+#define		WAIT_REG_MEM_OPERATION(x)               ((x) << 6)
+		/* 0 - wait_reg_mem
+		 * 1 - wr_wait_wr_reg
+		 */
+#define		WAIT_REG_MEM_ENGINE(x)                  ((x) << 8)
+		/* 0 - me
+		 * 1 - pfp
+		 */
+
+static  amdgpu_device_handle device_handle;
+static  uint32_t  major_version;
+static  uint32_t  minor_version;
+
+static pthread_t stress_thread;
+static uint32_t *ptr;
+
+static void amdgpu_deadlock_helper(unsigned ip_type);
+static void amdgpu_deadlock_gfx(void);
+static void amdgpu_deadlock_compute(void);
+
+CU_BOOL suite_deadlock_tests_enable(void)
+{
+	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+					     &minor_version, &device_handle))
+		return CU_FALSE;
+
+	if (amdgpu_device_deinitialize(device_handle))
+		return CU_FALSE;
+
+
+	if (device_handle->info.family_id == AMDGPU_FAMILY_AI) {
+		printf("\n\nCurrently hangs the CP on this ASIC, deadlock suite disabled\n");
+		return CU_FALSE;
+	}
+
+	return CU_TRUE;
+}
+
+int suite_deadlock_tests_init(void)
+{
+	struct amdgpu_gpu_info gpu_info = {0};
+	int r;
+
+	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+				   &minor_version, &device_handle);
+
+	if (r) {
+		if ((r == -EACCES) && (errno == EACCES))
+			printf("\n\nError:%s. "
+				"Hint:Try to run this test program as root.",
+				strerror(errno));
+		return CUE_SINIT_FAILED;
+	}
+
+	return CUE_SUCCESS;
+}
+
+int suite_deadlock_tests_clean(void)
+{
+	int r = amdgpu_device_deinitialize(device_handle);
+
+	if (r == 0)
+		return CUE_SUCCESS;
+	else
+		return CUE_SCLEAN_FAILED;
+}
+
+
+CU_TestInfo deadlock_tests[] = {
+	{ "gfx ring block test",  amdgpu_deadlock_gfx },
+	{ "compute ring block test",  amdgpu_deadlock_compute },
+	CU_TEST_INFO_NULL,
+};
+
+static void *write_mem_address(void *data)
+{
+	int i;
+
+	/* useconds_t range is [0, 1,000,000] so use loop for waits > 1s */
+	for (i = 0; i < WRITE_MEM_ADDRESS_DELAY_MS; i++)
+		usleep(1000);
+
+	ptr[256] = 0x1;
+
+	return 0;
+}
+
+static void amdgpu_deadlock_gfx(void)
+{
+	amdgpu_deadlock_helper(AMDGPU_HW_IP_GFX);
+}
+
+static void amdgpu_deadlock_compute(void)
+{
+	amdgpu_deadlock_helper(AMDGPU_HW_IP_COMPUTE);
+}
+
+static void amdgpu_deadlock_helper(unsigned ip_type)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	struct amdgpu_cs_fence fence_status;
+	uint32_t expired;
+	int i, r, instance;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+
+	r = pthread_create(&stress_thread, NULL, write_mem_address, NULL);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+			AMDGPU_GEM_DOMAIN_GTT, 0,
+						    &ib_result_handle, &ib_result_cpu,
+						    &ib_result_mc_address, &va_handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+			       &bo_list);
+	CU_ASSERT_EQUAL(r, 0);
+
+	ptr = ib_result_cpu;
+
+	ptr[0] = PACKET3(PACKET3_WAIT_REG_MEM, 5);
+	ptr[1] = (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+			 WAIT_REG_MEM_FUNCTION(4) | /* != */
+			 WAIT_REG_MEM_ENGINE(0));  /* me */
+	ptr[2] = (ib_result_mc_address + 256*4) & 0xfffffffc;
+	ptr[3] = ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff;
+	ptr[4] = 0x00000000; /* reference value */
+	ptr[5] = 0xffffffff; /* and mask */
+	ptr[6] = 0x00000004; /* poll interval */
+
+	for (i = 7; i < 16; ++i)
+		ptr[i] = 0xffff1000;
+
+
+	ptr[256] = 0x0; /* the memory we wait on to change */
+
+
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address;
+	ib_info.size = 16;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = ip_type;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+	ibs_request.fence_info.handle = NULL;
+
+	for (i = 0; i < 200; i++) {
+		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
+		CU_ASSERT_EQUAL(r, 0);
+
+	}
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = ip_type;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = ibs_request.seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE,0, &expired);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	pthread_join(stress_thread, NULL);
+}
diff --git a/lib/libdrm/tests/amdgpu/frame.h b/lib/libdrm/tests/amdgpu/frame.h
index 4c946c272..335401c1d 100644
--- a/lib/libdrm/tests/amdgpu/frame.h
+++ b/lib/libdrm/tests/amdgpu/frame.h
@@ -24,7 +24,7 @@
 #ifndef _frame_h_
 #define _frame_h_
 
-const uint8_t frame[] = {
+static const uint8_t frame[] = {
 	0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb,
 	0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2,
 	0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xaa, 0xaa, 0xaa,
diff --git a/lib/libdrm/tests/amdgpu/uvd_enc_tests.c b/lib/libdrm/tests/amdgpu/uvd_enc_tests.c
new file mode 100644
index 000000000..bed8494a8
--- /dev/null
+++ b/lib/libdrm/tests/amdgpu/uvd_enc_tests.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "CUnit/Basic.h"
+
+#include "util_math.h"
+
+#include "amdgpu_test.h"
+#include "amdgpu_drm.h"
+#include "amdgpu_internal.h"
+#include "frame.h"
+#include "uve_ib.h"
+
+#define IB_SIZE		4096
+#define MAX_RESOURCES	16
+
+struct amdgpu_uvd_enc_bo {
+	amdgpu_bo_handle handle;
+	amdgpu_va_handle va_handle;
+	uint64_t addr;
+	uint64_t size;
+	uint8_t *ptr;
+};
+
+struct amdgpu_uvd_enc {
+	unsigned width;
+	unsigned height;
+	struct amdgpu_uvd_enc_bo session;
+	struct amdgpu_uvd_enc_bo vbuf;
+	struct amdgpu_uvd_enc_bo bs;
+	struct amdgpu_uvd_enc_bo fb;
+	struct amdgpu_uvd_enc_bo cpb;
+};
+
+static amdgpu_device_handle device_handle;
+static uint32_t major_version;
+static uint32_t minor_version;
+static uint32_t family_id;
+
+static amdgpu_context_handle context_handle;
+static amdgpu_bo_handle ib_handle;
+static amdgpu_va_handle ib_va_handle;
+static uint64_t ib_mc_address;
+static uint32_t *ib_cpu;
+
+static struct amdgpu_uvd_enc enc;
+static amdgpu_bo_handle resources[MAX_RESOURCES];
+static unsigned num_resources;
+
+static void amdgpu_cs_uvd_enc_create(void);
+static void amdgpu_cs_uvd_enc_session_init(void);
+static void amdgpu_cs_uvd_enc_encode(void);
+static void amdgpu_cs_uvd_enc_destroy(void);
+
+
+CU_TestInfo uvd_enc_tests[] = {
+	{ "UVD ENC create",  amdgpu_cs_uvd_enc_create },
+	{ "UVD ENC session init",  amdgpu_cs_uvd_enc_session_init },
+	{ "UVD ENC encode",  amdgpu_cs_uvd_enc_encode },
+	{ "UVD ENC destroy",  amdgpu_cs_uvd_enc_destroy },
+	CU_TEST_INFO_NULL,
+};
+
+CU_BOOL suite_uvd_enc_tests_enable(void)
+{
+	int r;
+	struct drm_amdgpu_info_hw_ip info;
+
+	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+					     &minor_version, &device_handle))
+		return CU_FALSE;
+
+	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_UVD_ENC, 0, &info);
+
+	if (amdgpu_device_deinitialize(device_handle))
+		return CU_FALSE;
+
+	if (!info.available_rings)
+		printf("\n\nThe ASIC NOT support UVD ENC, suite disabled.\n");
+
+	return (r == 0 && (info.available_rings ? CU_TRUE : CU_FALSE));
+}
+
+
+int suite_uvd_enc_tests_init(void)
+{
+	int r;
+
+	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+				     &minor_version, &device_handle);
+	if (r)
+		return CUE_SINIT_FAILED;
+
+	family_id = device_handle->info.family_id;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	if (r)
+		return CUE_SINIT_FAILED;
+
+	r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_handle, (void**)&ib_cpu,
+				    &ib_mc_address, &ib_va_handle);
+	if (r)
+		return CUE_SINIT_FAILED;
+
+	return CUE_SUCCESS;
+}
+
+int suite_uvd_enc_tests_clean(void)
+{
+	int r;
+
+	r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
+				     ib_mc_address, IB_SIZE);
+	if (r)
+		return CUE_SCLEAN_FAILED;
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	if (r)
+		return CUE_SCLEAN_FAILED;
+
+	r = amdgpu_device_deinitialize(device_handle);
+	if (r)
+		return CUE_SCLEAN_FAILED;
+
+	return CUE_SUCCESS;
+}
+
+static int submit(unsigned ndw, unsigned ip)
+{
+	struct amdgpu_cs_request ibs_request = {0};
+	struct amdgpu_cs_ib_info ib_info = {0};
+	struct amdgpu_cs_fence fence_status = {0};
+	uint32_t expired;
+	int r;
+
+	ib_info.ib_mc_address = ib_mc_address;
+	ib_info.size = ndw;
+
+	ibs_request.ip_type = ip;
+
+	r = amdgpu_bo_list_create(device_handle, num_resources, resources,
+				  NULL, &ibs_request.resources);
+	if (r)
+		return r;
+
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.fence_info.handle = NULL;
+
+	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_list_destroy(ibs_request.resources);
+	if (r)
+		return r;
+
+	fence_status.context = context_handle;
+	fence_status.ip_type = ip;
+	fence_status.fence = ibs_request.seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+					 AMDGPU_TIMEOUT_INFINITE,
+					 0, &expired);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static void alloc_resource(struct amdgpu_uvd_enc_bo *uvd_enc_bo,
+			unsigned size, unsigned domain)
+{
+	struct amdgpu_bo_alloc_request req = {0};
+	amdgpu_bo_handle buf_handle;
+	amdgpu_va_handle va_handle;
+	uint64_t va = 0;
+	int r;
+
+	req.alloc_size = ALIGN(size, 4096);
+	req.preferred_heap = domain;
+	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
+	CU_ASSERT_EQUAL(r, 0);
+	r = amdgpu_va_range_alloc(device_handle,
+				  amdgpu_gpu_va_range_general,
+				  req.alloc_size, 1, 0, &va,
+				  &va_handle, 0);
+	CU_ASSERT_EQUAL(r, 0);
+	r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0,
+			    AMDGPU_VA_OP_MAP);
+	CU_ASSERT_EQUAL(r, 0);
+	uvd_enc_bo->addr = va;
+	uvd_enc_bo->handle = buf_handle;
+	uvd_enc_bo->size = req.alloc_size;
+	uvd_enc_bo->va_handle = va_handle;
+	r = amdgpu_bo_cpu_map(uvd_enc_bo->handle, (void **)&uvd_enc_bo->ptr);
+	CU_ASSERT_EQUAL(r, 0);
+	memset(uvd_enc_bo->ptr, 0, size);
+	r = amdgpu_bo_cpu_unmap(uvd_enc_bo->handle);
+	CU_ASSERT_EQUAL(r, 0);
+}
+
+static void free_resource(struct amdgpu_uvd_enc_bo *uvd_enc_bo)
+{
+	int r;
+
+	r = amdgpu_bo_va_op(uvd_enc_bo->handle, 0, uvd_enc_bo->size,
+			    uvd_enc_bo->addr, 0, AMDGPU_VA_OP_UNMAP);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_va_range_free(uvd_enc_bo->va_handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_free(uvd_enc_bo->handle);
+	CU_ASSERT_EQUAL(r, 0);
+	memset(uvd_enc_bo, 0, sizeof(*uvd_enc_bo));
+}
+
+static void amdgpu_cs_uvd_enc_create(void)
+{
+	int len, r;
+
+	enc.width = 160;
+	enc.height = 128;
+
+	num_resources  = 0;
+	alloc_resource(&enc.session, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT);
+	resources[num_resources++] = enc.session.handle;
+	resources[num_resources++] = ib_handle;
+}
+
+static void check_result(struct amdgpu_uvd_enc *enc)
+{
+	uint64_t sum;
+	uint32_t s = 175602;
+	uint32_t *ptr, size;
+	int i, j, r;
+
+	r = amdgpu_bo_cpu_map(enc->fb.handle, (void **)&enc->fb.ptr);
+	CU_ASSERT_EQUAL(r, 0);
+	ptr = (uint32_t *)enc->fb.ptr;
+	size = ptr[6];
+	r = amdgpu_bo_cpu_unmap(enc->fb.handle);
+	CU_ASSERT_EQUAL(r, 0);
+	r = amdgpu_bo_cpu_map(enc->bs.handle, (void **)&enc->bs.ptr);
+	CU_ASSERT_EQUAL(r, 0);
+	for (j = 0, sum = 0; j < size; ++j)
+		sum += enc->bs.ptr[j];
+	CU_ASSERT_EQUAL(sum, s);
+	r = amdgpu_bo_cpu_unmap(enc->bs.handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+}
+
+static void amdgpu_cs_uvd_enc_session_init(void)
+{
+	int len, r;
+
+	len = 0;
+	memcpy((ib_cpu + len), uve_session_info, sizeof(uve_session_info));
+	len += sizeof(uve_session_info) / 4;
+	ib_cpu[len++] = enc.session.addr >> 32;
+	ib_cpu[len++] = enc.session.addr;
+
+	memcpy((ib_cpu + len), uve_task_info, sizeof(uve_task_info));
+	len += sizeof(uve_task_info) / 4;
+	ib_cpu[len++] = 0x000000d8;
+	ib_cpu[len++] = 0x00000000;
+	ib_cpu[len++] = 0x00000000;
+
+	memcpy((ib_cpu + len), uve_op_init, sizeof(uve_op_init));
+	len += sizeof(uve_op_init) / 4;
+
+	memcpy((ib_cpu + len), uve_session_init, sizeof(uve_session_init));
+	len += sizeof(uve_session_init) / 4;
+
+	memcpy((ib_cpu + len), uve_layer_ctrl, sizeof(uve_layer_ctrl));
+	len += sizeof(uve_layer_ctrl) / 4;
+
+	memcpy((ib_cpu + len), uve_slice_ctrl, sizeof(uve_slice_ctrl));
+	len += sizeof(uve_slice_ctrl) / 4;
+
+	memcpy((ib_cpu + len), uve_spec_misc, sizeof(uve_spec_misc));
+	len += sizeof(uve_spec_misc) / 4;
+
+	memcpy((ib_cpu + len), uve_rc_session_init, sizeof(uve_rc_session_init));
+	len += sizeof(uve_rc_session_init) / 4;
+
+	memcpy((ib_cpu + len), uve_deblocking_filter, sizeof(uve_deblocking_filter));
+	len += sizeof(uve_deblocking_filter) / 4;
+
+	memcpy((ib_cpu + len), uve_quality_params, sizeof(uve_quality_params));
+	len += sizeof(uve_quality_params) / 4;
+
+	memcpy((ib_cpu + len), uve_op_init_rc, sizeof(uve_op_init_rc));
+	len += sizeof(uve_op_init_rc) / 4;
+
+	memcpy((ib_cpu + len), uve_op_init_rc_vbv_level, sizeof(uve_op_init_rc_vbv_level));
+	len += sizeof(uve_op_init_rc_vbv_level) / 4;
+
+	r = submit(len, AMDGPU_HW_IP_UVD_ENC);
+	CU_ASSERT_EQUAL(r, 0);
+}
+
+static void amdgpu_cs_uvd_enc_encode(void)
+{
+	int len, r, i;
+	uint64_t luma_offset, chroma_offset;
+	uint32_t vbuf_size, bs_size = 0x003f4800, cpb_size;
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+	vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5;
+	cpb_size = vbuf_size * 10;
+
+
+	num_resources  = 0;
+	alloc_resource(&enc.fb, 4096, AMDGPU_GEM_DOMAIN_VRAM);
+	resources[num_resources++] = enc.fb.handle;
+	alloc_resource(&enc.bs, bs_size, AMDGPU_GEM_DOMAIN_VRAM);
+	resources[num_resources++] = enc.bs.handle;
+	alloc_resource(&enc.vbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM);
+	resources[num_resources++] = enc.vbuf.handle;
+	alloc_resource(&enc.cpb, cpb_size, AMDGPU_GEM_DOMAIN_VRAM);
+	resources[num_resources++] = enc.cpb.handle;
+	resources[num_resources++] = ib_handle;
+
+	r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr);
+	CU_ASSERT_EQUAL(r, 0);
+
+	memset(enc.vbuf.ptr, 0, vbuf_size);
+	for (i = 0; i < enc.height; ++i) {
+		memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width);
+		enc.vbuf.ptr += ALIGN(enc.width, align);
+	}
+	for (i = 0; i < enc.height / 2; ++i) {
+		memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width);
+		enc.vbuf.ptr += ALIGN(enc.width, align);
+	}
+
+	r = amdgpu_bo_cpu_unmap(enc.vbuf.handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	len = 0;
+	memcpy((ib_cpu + len), uve_session_info, sizeof(uve_session_info));
+	len += sizeof(uve_session_info) / 4;
+	ib_cpu[len++] = enc.session.addr >> 32;
+	ib_cpu[len++] = enc.session.addr;
+
+	memcpy((ib_cpu + len), uve_task_info, sizeof(uve_task_info));
+	len += sizeof(uve_task_info) / 4;
+	ib_cpu[len++] = 0x000005e0;
+	ib_cpu[len++] = 0x00000001;
+	ib_cpu[len++] = 0x00000001;
+
+	memcpy((ib_cpu + len), uve_nalu_buffer_1, sizeof(uve_nalu_buffer_1));
+	len += sizeof(uve_nalu_buffer_1) / 4;
+
+	memcpy((ib_cpu + len), uve_nalu_buffer_2, sizeof(uve_nalu_buffer_2));
+	len += sizeof(uve_nalu_buffer_2) / 4;
+
+	memcpy((ib_cpu + len), uve_nalu_buffer_3, sizeof(uve_nalu_buffer_3));
+	len += sizeof(uve_nalu_buffer_3) / 4;
+
+	memcpy((ib_cpu + len), uve_nalu_buffer_4, sizeof(uve_nalu_buffer_4));
+	len += sizeof(uve_nalu_buffer_4) / 4;
+
+	memcpy((ib_cpu + len), uve_slice_header, sizeof(uve_slice_header));
+	len += sizeof(uve_slice_header) / 4;
+
+	ib_cpu[len++] = 0x00000254;
+	ib_cpu[len++] = 0x00000010;
+	ib_cpu[len++] = enc.cpb.addr >> 32;
+	ib_cpu[len++] = enc.cpb.addr;
+	memcpy((ib_cpu + len), uve_ctx_buffer, sizeof(uve_ctx_buffer));
+	len += sizeof(uve_ctx_buffer) / 4;
+
+	memcpy((ib_cpu + len), uve_bitstream_buffer, sizeof(uve_bitstream_buffer));
+	len += sizeof(uve_bitstream_buffer) / 4;
+	ib_cpu[len++] = 0x00000000;
+	ib_cpu[len++] = enc.bs.addr >> 32;
+	ib_cpu[len++] = enc.bs.addr;
+	ib_cpu[len++] = 0x003f4800;
+	ib_cpu[len++] = 0x00000000;
+
+	memcpy((ib_cpu + len), uve_feedback_buffer, sizeof(uve_feedback_buffer));
+	len += sizeof(uve_feedback_buffer) / 4;
+	ib_cpu[len++] = enc.fb.addr >> 32;
+	ib_cpu[len++] = enc.fb.addr;
+	ib_cpu[len++] = 0x00000010;
+	ib_cpu[len++] = 0x00000028;
+
+	memcpy((ib_cpu + len), uve_feedback_buffer_additional, sizeof(uve_feedback_buffer_additional));
+	len += sizeof(uve_feedback_buffer_additional) / 4;
+
+	memcpy((ib_cpu + len), uve_intra_refresh, sizeof(uve_intra_refresh));
+	len += sizeof(uve_intra_refresh) / 4;
+
+	memcpy((ib_cpu + len), uve_layer_select, sizeof(uve_layer_select));
+	len += sizeof(uve_layer_select) / 4;
+
+	memcpy((ib_cpu + len), uve_rc_layer_init, sizeof(uve_rc_layer_init));
+	len += sizeof(uve_rc_layer_init) / 4;
+
+	memcpy((ib_cpu + len), uve_layer_select, sizeof(uve_layer_select));
+	len += sizeof(uve_layer_select) / 4;
+
+	memcpy((ib_cpu + len), uve_rc_per_pic, sizeof(uve_rc_per_pic));
+	len += sizeof(uve_rc_per_pic) / 4;
+
+	unsigned luma_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16);
+	luma_offset = enc.vbuf.addr;
+	chroma_offset = luma_offset + luma_size;
+	ib_cpu[len++] = 0x00000054;
+	ib_cpu[len++] = 0x0000000c;
+	ib_cpu[len++] = 0x00000002;
+	ib_cpu[len++] = 0x003f4800;
+	ib_cpu[len++] = luma_offset >> 32;
+	ib_cpu[len++] = luma_offset;
+	ib_cpu[len++] = chroma_offset >> 32;
+	ib_cpu[len++] = chroma_offset;
+	memcpy((ib_cpu + len), uve_encode_param, sizeof(uve_encode_param));
+	ib_cpu[len] = ALIGN(enc.width, align);
+	ib_cpu[len + 1] = ALIGN(enc.width, align);
+	len += sizeof(uve_encode_param) / 4;
+
+	memcpy((ib_cpu + len), uve_op_speed_enc_mode, sizeof(uve_op_speed_enc_mode));
+	len += sizeof(uve_op_speed_enc_mode) / 4;
+
+	memcpy((ib_cpu + len), uve_op_encode, sizeof(uve_op_encode));
+	len += sizeof(uve_op_encode) / 4;
+
+	r = submit(len, AMDGPU_HW_IP_UVD_ENC);
+	CU_ASSERT_EQUAL(r, 0);
+
+	check_result(&enc);
+
+	free_resource(&enc.fb);
+	free_resource(&enc.bs);
+	free_resource(&enc.vbuf);
+	free_resource(&enc.cpb);
+}
+
+static void amdgpu_cs_uvd_enc_destroy(void)
+{
+	struct amdgpu_uvd_enc_bo sw_ctx;
+	int len, r;
+
+	num_resources  = 0;
+	resources[num_resources++] = ib_handle;
+
+	len = 0;
+	memcpy((ib_cpu + len), uve_session_info, sizeof(uve_session_info));
+	len += sizeof(uve_session_info) / 4;
+	ib_cpu[len++] = enc.session.addr >> 32;
+	ib_cpu[len++] = enc.session.addr;
+
+	memcpy((ib_cpu + len), uve_task_info, sizeof(uve_task_info));
+	len += sizeof(uve_task_info) / 4;
+	ib_cpu[len++] = 0xffffffff;
+	ib_cpu[len++] = 0x00000002;
+	ib_cpu[len++] = 0x00000000;
+
+	memcpy((ib_cpu + len), uve_op_close, sizeof(uve_op_close));
+	len += sizeof(uve_op_close) / 4;
+
+	r = submit(len, AMDGPU_HW_IP_UVD_ENC);
+	CU_ASSERT_EQUAL(r, 0);
+
+	free_resource(&enc.session);
+}
diff --git a/lib/libdrm/tests/amdgpu/uve_ib.h b/lib/libdrm/tests/amdgpu/uve_ib.h
new file mode 100644
index 000000000..cb72be228
--- /dev/null
+++ b/lib/libdrm/tests/amdgpu/uve_ib.h
@@ -0,0 +1,527 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+
+#ifndef _uve_ib_h_
+#define _uve_ib_h_
+
+static const uint32_t uve_session_info[] = {
+	0x00000018,
+	0x00000001,
+	0x00000000,
+	0x00010000,
+};
+
+static const uint32_t uve_task_info[] = {
+	0x00000014,
+	0x00000002,
+};
+
+static const uint32_t uve_session_init[] = {
+	0x00000020,
+	0x00000003,
+	0x000000c0,
+	0x00000080,
+	0x00000020,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+static const uint32_t uve_layer_ctrl[] = {
+	0x00000010,
+	0x00000004,
+	0x00000001,
+	0x00000001,
+};
+
+static const uint32_t uve_layer_select[] = {
+	0x0000000c,
+	0x00000005,
+	0x00000000,
+};
+
+static const uint32_t uve_slice_ctrl[] = {
+	0x00000014,
+	0x00000006,
+	0x00000000,
+	0x00000006,
+	0x00000006,
+};
+
+static const uint32_t uve_spec_misc[] = {
+	0x00000024,
+	0x00000007,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000001,
+	0x00000001,
+};
+
+static const uint32_t uve_rc_session_init[] = {
+	0x00000010,
+	0x00000008,
+	0x00000000,
+	0x00000040,
+};
+
+static const uint32_t uve_rc_layer_init[] = {
+	0x00000028,
+	0x00000009,
+	0x001e8480,
+	0x001e8480,
+	0x0000001e,
+	0x00000001,
+	0x0001046a,
+	0x0001046a,
+	0x0001046a,
+	0xaaaaaaaa,
+};
+
+static const uint32_t uve_deblocking_filter[] = {
+	0x00000020,
+	0x0000000e,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+static const uint32_t uve_quality_params[] = {
+	0x00000014,
+	0x0000000d,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+static const uint32_t uve_feedback_buffer[] = {
+	0x0000001c,
+	0x00000012,
+	0x00000000,
+};
+
+static const uint32_t uve_feedback_buffer_additional[] = {
+	0x00000108,
+	0x00000014,
+	0x00000001,
+	0x00000010,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+static const uint32_t uve_nalu_buffer_1[] = {
+	0x00000018,
+	0x00000013,
+	0x00000001,
+	0x00000007,
+	0x00000001,
+	0x46011000,
+};
+
+static const uint32_t uve_nalu_buffer_2[] = {
+	0x0000002c,
+	0x00000013,
+	0x00000002,
+	0x0000001b,
+	0x00000001,
+	0x40010c01,
+	0xffff0160,
+	0x00000300,
+	0xb0000003,
+	0x00000300,
+	0x962c0900,
+};
+
+static const uint32_t uve_nalu_buffer_3[] = {
+	0x00000034,
+	0x00000013,
+	0x00000003,
+	0x00000023,
+	0x00000001,
+	0x42010101,
+	0x60000003,
+	0x00b00000,
+	0x03000003,
+	0x0096a018,
+	0x2020708f,
+	0xcb924295,
+	0x12e08000,
+};
+
+static const uint32_t uve_nalu_buffer_4[] = {
+	0x0000001c,
+	0x00000013,
+	0x00000004,
+	0x0000000b,
+	0x00000001,
+	0x4401e0f1,
+	0x80992000,
+};
+
+static const uint32_t uve_slice_header[] = {
+	0x000000c8,
+	0x0000000b,
+	0x28010000,
+	0x40000000,
+	0x60000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000002,
+	0x00000010,
+	0x00000003,
+	0x00000000,
+	0x00000002,
+	0x00000002,
+	0x00000004,
+	0x00000000,
+	0x00000001,
+	0x00000000,
+	0x00000002,
+	0x00000003,
+	0x00000005,
+	0x00000000,
+	0x00000002,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+static const uint32_t uve_encode_param[] = {
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xffffffff,
+	0x00000001,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+static const uint32_t uve_intra_refresh[] = {
+	0x00000014,
+	0x0000000f,
+	0x00000000,
+	0x00000000,
+	0x00000001,
+};
+
+static const uint32_t uve_ctx_buffer[] = {
+	0x00000000,
+	0x00000000,
+	0x000000a0,
+	0x000000a0,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+static const uint32_t uve_bitstream_buffer[] = {
+	0x0000001c,
+	0x00000011,
+};
+
+static const uint32_t uve_rc_per_pic[] = {
+	0x00000024,
+	0x0000000a,
+	0x0000001a,
+	0x00000000,
+	0x00000033,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000001,
+};
+
+static const uint32_t uve_op_init[] = {
+	0x00000008,
+	0x08000001,
+};
+
+static const uint32_t uve_op_close[] = {
+	0x00000008,
+	0x08000002,
+};
+
+static const uint32_t uve_op_encode[] = {
+	0x00000008,
+	0x08000003,
+};
+
+static const uint32_t uve_op_init_rc[] = {
+	0x00000008,
+	0x08000004,
+};
+
+static const uint32_t uve_op_init_rc_vbv_level[] = {
+	0x00000008,
+	0x08000005,
+};
+
+static const uint32_t uve_op_speed_enc_mode[] = {
+	0x00000008,
+	0x08000006,
+};
+
+static const uint32_t uve_op_balance_enc_mode[] = {
+	0x00000008,
+	0x08000007,
+};
+
+static const uint32_t uve_op_quality_enc_mode[] = {
+	0x00000008,
+	0x08000008,
+};
+#endif /*_uve_ib_h*/
diff --git a/lib/libdrm/tests/amdgpu/vce_tests.c b/lib/libdrm/tests/amdgpu/vce_tests.c
index b03807b26..75821bbb1 100644
--- a/lib/libdrm/tests/amdgpu/vce_tests.c
+++ b/lib/libdrm/tests/amdgpu/vce_tests.c
@@ -88,6 +88,27 @@ CU_TestInfo vce_tests[] = {
 	CU_TEST_INFO_NULL,
 };
 
+
+CU_BOOL suite_vce_tests_enable(void)
+{
+	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+					     &minor_version, &device_handle))
+		return CU_FALSE;
+
+	family_id = device_handle->info.family_id;
+
+	if (amdgpu_device_deinitialize(device_handle))
+		return CU_FALSE;
+
+
+	if (family_id >= AMDGPU_FAMILY_RV || family_id == AMDGPU_FAMILY_SI) {
+		printf("\n\nThe ASIC NOT support VCE, suite disabled\n");
+		return CU_FALSE;
+	}
+
+	return CU_TRUE;
+}
+
 int suite_vce_tests_init(void)
 {
 	int r;
diff --git a/lib/libdrm/tests/amdgpu/vcn_tests.c b/lib/libdrm/tests/amdgpu/vcn_tests.c
index 2b1696dd0..9224bc371 100644
--- a/lib/libdrm/tests/amdgpu/vcn_tests.c
+++ b/lib/libdrm/tests/amdgpu/vcn_tests.c
@@ -82,6 +82,27 @@ CU_TestInfo vcn_tests[] = {
 	CU_TEST_INFO_NULL,
 };
 
+CU_BOOL suite_vcn_tests_enable(void)
+{
+
+	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+				   &minor_version, &device_handle))
+		return CU_FALSE;
+
+	family_id = device_handle->info.family_id;
+
+	if (amdgpu_device_deinitialize(device_handle))
+			return CU_FALSE;
+
+
+	if (family_id < AMDGPU_FAMILY_RV) {
+		printf("\n\nThe ASIC NOT support VCN, suite disabled\n");
+		return CU_FALSE;
+	}
+
+	return CU_TRUE;
+}
+
 int suite_vcn_tests_init(void)
 {
 	int r;
@@ -93,11 +114,6 @@ int suite_vcn_tests_init(void)
 
 	family_id = device_handle->info.family_id;
 
-	if (family_id < AMDGPU_FAMILY_RV) {
-		printf("\n\nThe ASIC NOT support VCN, all sub-tests will pass\n");
-		return CUE_SUCCESS;
-	}
-
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	if (r)
 		return CUE_SINIT_FAILED;
@@ -116,24 +132,18 @@ int suite_vcn_tests_clean(void)
 {
 	int r;
 
-	if (family_id < AMDGPU_FAMILY_RV) {
-		r = amdgpu_device_deinitialize(device_handle);
-		if (r)
-			return CUE_SCLEAN_FAILED;
-	} else {
-		r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
-				     ib_mc_address, IB_SIZE);
-		if (r)
-			return CUE_SCLEAN_FAILED;
-
-		r = amdgpu_cs_ctx_free(context_handle);
-		if (r)
-			return CUE_SCLEAN_FAILED;
-
-		r = amdgpu_device_deinitialize(device_handle);
-		if (r)
-			return CUE_SCLEAN_FAILED;
-	}
+	r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
+			     ib_mc_address, IB_SIZE);
+	if (r)
+		return CUE_SCLEAN_FAILED;
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	if (r)
+		return CUE_SCLEAN_FAILED;
+
+	r = amdgpu_device_deinitialize(device_handle);
+	if (r)
+		return CUE_SCLEAN_FAILED;
 
 	return CUE_SUCCESS;
 }
@@ -244,9 +254,6 @@ static void amdgpu_cs_vcn_dec_create(void)
 	struct amdgpu_vcn_bo msg_buf;
 	int len, r;
 
-	if (family_id < AMDGPU_FAMILY_RV)
-		return;
-
 	num_resources  = 0;
 	alloc_resource(&msg_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
 	resources[num_resources++] = msg_buf.handle;
@@ -282,9 +289,6 @@ static void amdgpu_cs_vcn_dec_decode(void)
 	int size, len, i, r;
 	uint8_t *dec;
 
-	if (family_id < AMDGPU_FAMILY_RV)
-		return;
-
 	size = 4*1024; /* msg */
 	size += 4*1024; /* fb */
 	size += 4096; /*it_scaling_table*/
@@ -355,9 +359,6 @@ static void amdgpu_cs_vcn_dec_destroy(void)
 	struct amdgpu_vcn_bo msg_buf;
 	int len, r;
 
-	if (family_id < AMDGPU_FAMILY_RV)
-		return;
-
 	num_resources  = 0;
 	alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT);
 	resources[num_resources++] = msg_buf.handle;
@@ -387,24 +388,15 @@ static void amdgpu_cs_vcn_dec_destroy(void)
 
 static void amdgpu_cs_vcn_enc_create(void)
 {
-	if (family_id < AMDGPU_FAMILY_RV)
-		return;
-
 	/* TODO */
 }
 
 static void amdgpu_cs_vcn_enc_encode(void)
 {
-	if (family_id < AMDGPU_FAMILY_RV)
-		return;
-
 	/* TODO */
 }
 
 static void amdgpu_cs_vcn_enc_destroy(void)
 {
-	if (family_id < AMDGPU_FAMILY_RV)
-		return;
-
 	/* TODO */
 }
diff --git a/lib/libdrm/tests/amdgpu/vm_tests.c b/lib/libdrm/tests/amdgpu/vm_tests.c
new file mode 100644
index 000000000..5f1831076
--- /dev/null
+++ b/lib/libdrm/tests/amdgpu/vm_tests.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+
+#include "CUnit/Basic.h"
+
+#include "amdgpu_test.h"
+#include "amdgpu_drm.h"
+
+static  amdgpu_device_handle device_handle;
+static  uint32_t  major_version;
+static  uint32_t  minor_version;
+
+
+static void amdgpu_vmid_reserve_test(void);
+
+int suite_vm_tests_init(void)
+{
+	struct amdgpu_gpu_info gpu_info = {0};
+	int r;
+
+	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
+				   &minor_version, &device_handle);
+
+	if (r) {
+		if ((r == -EACCES) && (errno == EACCES))
+			printf("\n\nError:%s. "
+				"Hint:Try to run this test program as root.",
+				strerror(errno));
+		return CUE_SINIT_FAILED;
+	}
+
+	return CUE_SUCCESS;
+}
+
+int suite_vm_tests_clean(void)
+{
+	int r = amdgpu_device_deinitialize(device_handle);
+
+	if (r == 0)
+		return CUE_SUCCESS;
+	else
+		return CUE_SCLEAN_FAILED;
+}
+
+
+CU_TestInfo vm_tests[] = {
+	{ "resere vmid test",  amdgpu_vmid_reserve_test },
+	CU_TEST_INFO_NULL,
+};
+
+static void amdgpu_vmid_reserve_test(void)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	struct amdgpu_cs_fence fence_status;
+	uint32_t expired, flags;
+	int i, r, instance;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	union drm_amdgpu_vm vm;
+	static uint32_t *ptr;
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	flags = 0;
+	r = amdgpu_vm_reserve_vmid(device_handle, flags);
+	CU_ASSERT_EQUAL(r, 0);
+
+
+	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+			AMDGPU_GEM_DOMAIN_GTT, 0,
+						    &ib_result_handle, &ib_result_cpu,
+						    &ib_result_mc_address, &va_handle);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+			       &bo_list);
+	CU_ASSERT_EQUAL(r, 0);
+
+	ptr = ib_result_cpu;
+
+	for (i = 0; i < 16; ++i)
+		ptr[i] = 0xffff1000;
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address;
+	ib_info.size = 16;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+	ibs_request.fence_info.handle = NULL;
+
+	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
+	CU_ASSERT_EQUAL(r, 0);
+
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = AMDGPU_HW_IP_GFX;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = ibs_request.seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE,0, &expired);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_list_destroy(bo_list);
+	CU_ASSERT_EQUAL(r, 0);
+
+	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+	CU_ASSERT_EQUAL(r, 0);
+
+	flags = 0;
+	r = amdgpu_vm_unreserve_vmid(device_handle, flags);
+	CU_ASSERT_EQUAL(r, 0);
+
+
+	r = amdgpu_cs_ctx_free(context_handle);
+	CU_ASSERT_EQUAL(r, 0);
+}
diff --git a/lib/libdrm/tests/drmsl.c b/lib/libdrm/tests/drmsl.c
index d0ac0efaa..d1b59a86f 100644
--- a/lib/libdrm/tests/drmsl.c
+++ b/lib/libdrm/tests/drmsl.c
@@ -106,7 +106,9 @@ static double do_time(int size, int iter)
     return usec;
 }
 
-static void print_neighbors(void *list, unsigned long key)
+static void print_neighbors(void *list, unsigned long key,
+                            unsigned long expected_prev,
+                            unsigned long expected_next)
 {
     unsigned long prev_key = 0;
     unsigned long next_key = 0;
@@ -119,6 +121,16 @@ static void print_neighbors(void *list, unsigned long key)
 				  &next_key, &next_value);
     printf("Neighbors of %5lu: %d %5lu %5lu\n",
 	   key, retval, prev_key, next_key);
+    if (prev_key != expected_prev) {
+        fprintf(stderr, "Unexpected neighbor: %5lu. Expected: %5lu\n",
+                prev_key, expected_prev);
+	exit(1);
+    }
+    if (next_key != expected_next) {
+        fprintf(stderr, "Unexpected neighbor: %5lu. Expected: %5lu\n",
+                next_key, expected_next);
+	exit(1);
+    }
 }
 
 int main(void)
@@ -138,13 +150,13 @@ int main(void)
     print(list);
     printf("\n==============================\n\n");
 
-    print_neighbors(list, 0);
-    print_neighbors(list, 50);
-    print_neighbors(list, 51);
-    print_neighbors(list, 123);
-    print_neighbors(list, 200);
-    print_neighbors(list, 213);
-    print_neighbors(list, 256);
+    print_neighbors(list, 0, 0, 50);
+    print_neighbors(list, 50, 0, 50);
+    print_neighbors(list, 51, 50, 123);
+    print_neighbors(list, 123, 50, 123);
+    print_neighbors(list, 200, 123, 213);
+    print_neighbors(list, 213, 123, 213);
+    print_neighbors(list, 256, 213, 256);
     printf("\n==============================\n\n");
 
     drmSLDelete(list, 50);
diff --git a/lib/libdrm/tests/exynos/exynos_fimg2d_event.c b/lib/libdrm/tests/exynos/exynos_fimg2d_event.c
index 9ed5a307d..353e087b3 100644
--- a/lib/libdrm/tests/exynos/exynos_fimg2d_event.c
+++ b/lib/libdrm/tests/exynos/exynos_fimg2d_event.c
@@ -1,17 +1,24 @@
 /*
  * Copyright (C) 2015 - Tobias Jakobi
  *
- * This is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published
- * by the Free Software Foundation, either version 2 of the License,
- * or (at your option) any later version.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
- * It is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with it. If not, see <http://www.gnu.org/licenses/>.
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include <unistd.h>
diff --git a/lib/libdrm/tests/exynos/exynos_fimg2d_perf.c b/lib/libdrm/tests/exynos/exynos_fimg2d_perf.c
index 1699bba7b..a2d5c1929 100644
--- a/lib/libdrm/tests/exynos/exynos_fimg2d_perf.c
+++ b/lib/libdrm/tests/exynos/exynos_fimg2d_perf.c
@@ -1,17 +1,24 @@
 /*
  * Copyright (C) 2015 - Tobias Jakobi
  *
- * This is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published
- * by the Free Software Foundation, either version 2 of the License,
- * or (at your option) any later version.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
- * It is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with it. If not, see <http://www.gnu.org/licenses/>.
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include <stdlib.h>
diff --git a/lib/libdrm/tests/exynos/exynos_fimg2d_test.c b/lib/libdrm/tests/exynos/exynos_fimg2d_test.c
index ab1028e8b..b71cf5935 100644
--- a/lib/libdrm/tests/exynos/exynos_fimg2d_test.c
+++ b/lib/libdrm/tests/exynos/exynos_fimg2d_test.c
@@ -3,11 +3,24 @@
  * Authors:
  *	Inki Dae <inki.dae@samsung.com>
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #ifdef HAVE_CONFIG_H
diff --git a/lib/libdrm/tests/modetest/modetest.c b/lib/libdrm/tests/modetest/modetest.c
index b8891ff5e..62d933272 100644
--- a/lib/libdrm/tests/modetest/modetest.c
+++ b/lib/libdrm/tests/modetest/modetest.c
@@ -251,6 +251,89 @@ static void dump_blob(struct device *dev, uint32_t blob_id)
 	drmModeFreePropertyBlob(blob);
 }
 
+static const char *modifier_to_string(uint64_t modifier)
+{
+	switch (modifier) {
+	case DRM_FORMAT_MOD_INVALID:
+		return "INVALID";
+	case DRM_FORMAT_MOD_LINEAR:
+		return "LINEAR";
+	case I915_FORMAT_MOD_X_TILED:
+		return "X_TILED";
+	case I915_FORMAT_MOD_Y_TILED:
+		return "Y_TILED";
+	case I915_FORMAT_MOD_Yf_TILED:
+		return "Yf_TILED";
+	case I915_FORMAT_MOD_Y_TILED_CCS:
+		return "Y_TILED_CCS";
+	case I915_FORMAT_MOD_Yf_TILED_CCS:
+		return "Yf_TILED_CCS";
+	case DRM_FORMAT_MOD_SAMSUNG_64_32_TILE:
+		return "SAMSUNG_64_32_TILE";
+	case DRM_FORMAT_MOD_VIVANTE_TILED:
+		return "VIVANTE_TILED";
+	case DRM_FORMAT_MOD_VIVANTE_SUPER_TILED:
+		return "VIVANTE_SUPER_TILED";
+	case DRM_FORMAT_MOD_VIVANTE_SPLIT_TILED:
+		return "VIVANTE_SPLIT_TILED";
+	case DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED:
+		return "VIVANTE_SPLIT_SUPER_TILED";
+	case NV_FORMAT_MOD_TEGRA_TILED:
+		return "MOD_TEGRA_TILED";
+	case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(0):
+		return "MOD_TEGRA_16BX2_BLOCK(0)";
+	case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(1):
+		return "MOD_TEGRA_16BX2_BLOCK(1)";
+	case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(2):
+		return "MOD_TEGRA_16BX2_BLOCK(2)";
+	case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(3):
+		return "MOD_TEGRA_16BX2_BLOCK(3)";
+	case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(4):
+		return "MOD_TEGRA_16BX2_BLOCK(4)";
+	case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(5):
+		return "MOD_TEGRA_16BX2_BLOCK(5)";
+	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
+		return "MOD_BROADCOM_VC4_T_TILED";
+	default:
+		return "(UNKNOWN MODIFIER)";
+	}
+}
+
+static void dump_in_formats(struct device *dev, uint32_t blob_id)
+{
+	uint32_t i, j;
+	drmModePropertyBlobPtr blob;
+	struct drm_format_modifier_blob *header;
+	uint32_t *formats;
+	struct drm_format_modifier *modifiers;
+
+	printf("\t\tin_formats blob decoded:\n");
+	blob = drmModeGetPropertyBlob(dev->fd, blob_id);
+	if (!blob) {
+		printf("\n");
+		return;
+	}
+
+	header = blob->data;
+	formats = (uint32_t *) ((char *) header + header->formats_offset);
+	modifiers = (struct drm_format_modifier *)
+		((char *) header + header->modifiers_offset);
+
+	for (i = 0; i < header->count_formats; i++) {
+		printf("\t\t\t");
+		dump_fourcc(formats[i]);
+		printf(": ");
+		for (j = 0; j < header->count_modifiers; j++) {
+			uint64_t mask = 1ULL << i;
+			if (modifiers[j].formats & mask)
+				printf(" %s", modifier_to_string(modifiers[j].modifier));
+		}
+		printf("\n");
+	}
+
+	drmModeFreePropertyBlob(blob);
+}
+
 static void dump_prop(struct device *dev, drmModePropertyPtr prop,
 		      uint32_t prop_id, uint64_t value)
 {
@@ -328,6 +411,9 @@ static void dump_prop(struct device *dev, drmModePropertyPtr prop,
 		printf(" %"PRId64"\n", value);
 	else
 		printf(" %"PRIu64"\n", value);
+
+	if (strcmp(prop->name, "IN_FORMATS") == 0)
+		dump_in_formats(dev, value);
 }
 
 static void dump_connectors(struct device *dev)
@@ -1005,7 +1091,8 @@ static int set_plane(struct device *dev, struct plane_arg *p)
 		if (!format_support(ovr, p->fourcc))
 			continue;
 
-		if ((ovr->possible_crtcs & (1 << pipe)) && !ovr->crtc_id) {
+		if ((ovr->possible_crtcs & (1 << pipe)) &&
+		    (ovr->crtc_id == 0 || ovr->crtc_id == p->crtc_id)) {
 			plane_id = ovr->plane_id;
 			break;
 		}