diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-01-10 19:14:21 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2013-01-11 12:56:08 +0000 |
commit | 441ef916ae6569c88b3d6abaf7fea4d69be49d76 (patch) | |
tree | 78c05f0f440238cad0571a3f0a67f7721bf54b03 | |
parent | a37d56f338c5fae832d5eeea1283b6dbde827678 (diff) |
intel: Throttle harder
Filling the rings is a very unpleasant user experience, so cap the
number of batches we allow to be inflight at any one time.
Interestingly, as also found with SNA, throttling can improve
performance by reducing RSS. However, typically throughput is improved
(at the expense of latency) by oversubscribing work to the GPU and a
10-20% slowdown is commonplace for cairo-traces. Notably, x11perf is
less affected and in particular application level benchmarks show no
change.
Note that this exposes another bug in libdrm-intel 2.4.40 on gen2/3.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/intel.h | 2 | ||||
-rw-r--r-- | src/intel_batchbuffer.c | 41 |
2 files changed, 32 insertions, 11 deletions
diff --git a/src/intel.h b/src/intel.h index 53ce33c1..d4c9aff2 100644 --- a/src/intel.h +++ b/src/intel.h @@ -182,7 +182,7 @@ typedef struct intel_screen_private { unsigned int batch_emit_start; /** Number of bytes to be emitted in the current BEGIN_BATCH. */ uint32_t batch_emitting; - dri_bo *batch_bo; + dri_bo *batch_bo, *last_batch_bo[2]; /** Whether we're in a section of code that can't tolerate flushing */ Bool in_batch_atomic; /** Ending batch_used that was verified by intel_start_batch_atomic() */ diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 334deb79..4e74a0f1 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -67,17 +67,26 @@ void intel_next_vertex(intel_screen_private *intel) dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096); } -static void intel_next_batch(ScrnInfoPtr scrn) +static dri_bo *bo_alloc(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - + int size = 4 * 4096; /* The 865 has issues with larger-than-page-sized batch buffers. */ if (IS_I865G(intel)) - intel->batch_bo = - dri_bo_alloc(intel->bufmgr, "batch", 4096, 4096); - else - intel->batch_bo = - dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096); + size = 4096; + return dri_bo_alloc(intel->bufmgr, "batch", size, 4096); +} + +static void intel_next_batch(ScrnInfoPtr scrn, int mode) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + dri_bo *tmp; + + drm_intel_gem_bo_clear_relocs(intel->batch_bo, 0); + + tmp = intel->last_batch_bo[mode]; + intel->last_batch_bo[mode] = intel->batch_bo; + intel->batch_bo = tmp; intel->batch_used = 0; @@ -95,12 +104,25 @@ void intel_batch_init(ScrnInfoPtr scrn) intel->batch_emitting = 0; intel->vertex_id = 0; - intel_next_batch(scrn); + intel->last_batch_bo[0] = bo_alloc(scrn); + intel->last_batch_bo[1] = bo_alloc(scrn); + + intel->batch_bo = bo_alloc(scrn); + intel->batch_used = 0; + intel->last_3d = LAST_3D_OTHER; } void intel_batch_teardown(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); + int i; + + for (i = 0; i < ARRAY_SIZE(intel->last_batch_bo); i++) { + if (intel->last_batch_bo[i] != NULL) { + dri_bo_unreference(intel->last_batch_bo[i]); + intel->last_batch_bo[i] = NULL; + } + } if (intel->batch_bo != NULL) { dri_bo_unreference(intel->batch_bo); @@ -273,8 +295,7 @@ void intel_batch_submit(ScrnInfoPtr scrn) if (intel->debug_flush & DEBUG_FLUSH_WAIT) drm_intel_bo_wait_rendering(intel->batch_bo); - dri_bo_unreference(intel->batch_bo); - intel_next_batch(scrn); + intel_next_batch(scrn, intel->current_batch == I915_EXEC_BLT); if (intel->batch_commit_notify) intel->batch_commit_notify(intel); |