intel: Throttle harder

Filling the rings is a very unpleasant user experience, so cap the number of batches we allow to be inflight at any one time. Interestingly, as also found with SNA, throttling can improve performance by reducing RSS. However, typically throughput is improved (at the expense of latency) by oversubscribing work to the GPU and a 10-20% slowdown is commonplace for cairo-traces. Notably, x11perf is less affected and in particular application level benchmarks show no change. Note that this exposes another bug in libdrm-intel 2.4.40 on gen2/3. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2013-01-10 19:14:21 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2013-01-11 12:56:08 +0000
commit: 441ef916ae6569c88b3d6abaf7fea4d69be49d76 (patch)
tree: 78c05f0f440238cad0571a3f0a67f7721bf54b03
parent: a37d56f338c5fae832d5eeea1283b6dbde827678 (diff)
2 files changed, 32 insertions, 11 deletions
diff --git a/src/intel.h b/src/intel.h
index 53ce33c1..d4c9aff2 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -182,7 +182,7 @@ typedef struct intel_screen_private {
 	unsigned int batch_emit_start;
 	/** Number of bytes to be emitted in the current BEGIN_BATCH. */
 	uint32_t batch_emitting;
-	dri_bo *batch_bo;
+	dri_bo *batch_bo, *last_batch_bo[2];
 	/** Whether we're in a section of code that can't tolerate flushing */
 	Bool in_batch_atomic;
 	/** Ending batch_used that was verified by intel_start_batch_atomic() */
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 334deb79..4e74a0f1 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -67,17 +67,26 @@ void intel_next_vertex(intel_screen_private *intel)
 		dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096);
 }
 
-static void intel_next_batch(ScrnInfoPtr scrn)
+static dri_bo *bo_alloc(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
-
+	int size = 4 * 4096;
 	/* The 865 has issues with larger-than-page-sized batch buffers. */
 	if (IS_I865G(intel))
-		intel->batch_bo =
-		    dri_bo_alloc(intel->bufmgr, "batch", 4096, 4096);
-	else
-		intel->batch_bo =
-		    dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096);
+		size = 4096;
+	return dri_bo_alloc(intel->bufmgr, "batch", size, 4096);
+}
+
+static void intel_next_batch(ScrnInfoPtr scrn, int mode)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	dri_bo *tmp;
+
+	drm_intel_gem_bo_clear_relocs(intel->batch_bo, 0);
+
+	tmp = intel->last_batch_bo[mode];
+	intel->last_batch_bo[mode] = intel->batch_bo;
+	intel->batch_bo = tmp;
 
 	intel->batch_used = 0;
 
@@ -95,12 +104,25 @@ void intel_batch_init(ScrnInfoPtr scrn)
 	intel->batch_emitting = 0;
 	intel->vertex_id = 0;
 
-	intel_next_batch(scrn);
+	intel->last_batch_bo[0] = bo_alloc(scrn);
+	intel->last_batch_bo[1] = bo_alloc(scrn);
+
+	intel->batch_bo = bo_alloc(scrn);
+	intel->batch_used = 0;
+	intel->last_3d = LAST_3D_OTHER;
 }
 
 void intel_batch_teardown(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(intel->last_batch_bo); i++) {
+		if (intel->last_batch_bo[i] != NULL) {
+			dri_bo_unreference(intel->last_batch_bo[i]);
+			intel->last_batch_bo[i] = NULL;
+		}
+	}
 
 	if (intel->batch_bo != NULL) {
 		dri_bo_unreference(intel->batch_bo);
@@ -273,8 +295,7 @@ void intel_batch_submit(ScrnInfoPtr scrn)
 	if (intel->debug_flush & DEBUG_FLUSH_WAIT)
 		drm_intel_bo_wait_rendering(intel->batch_bo);
 
-	dri_bo_unreference(intel->batch_bo);
-	intel_next_batch(scrn);
+	intel_next_batch(scrn, intel->current_batch == I915_EXEC_BLT);
 
 	if (intel->batch_commit_notify)
 		intel->batch_commit_notify(intel);
author	Chris Wilson <chris@chris-wilson.co.uk>	2013-01-10 19:14:21 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2013-01-11 12:56:08 +0000
commit	441ef916ae6569c88b3d6abaf7fea4d69be49d76 (patch)
tree	78c05f0f440238cad0571a3f0a67f7721bf54b03
parent	a37d56f338c5fae832d5eeea1283b6dbde827678 (diff)