diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2010-05-19 10:57:46 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2010-05-24 09:33:37 +0100 |
commit | 2b050f330f78d02e7f476e55be29d760271ac61c (patch) | |
tree | 7bbaf0b6aa581373e4a212498f11e949aa86d612 | |
parent | dcef703a7cdcf360f12312a338361697acffc3e9 (diff) |
Use pwrite to upload the batch buffer
By using pwrite() instead of dri_bo_map() we can write to the batch buffer
through the GTT and not be forced to map it back into the CPU domain and
out again, eliminating a double clflush.
Measing x11perf text performance on PineView:
Before:
16000000 trep @ 0.0020 msec (511000.0/sec): Char in 80-char aa line (Charter 10)
16000000 trep @ 0.0021 msec (480000.0/sec): Char in 80-char rgb line (Charter 10)
After:
16000000 trep @ 0.0019 msec (532000.0/sec): Char in 80-char aa line (Charter 10)
16000000 trep @ 0.0020 msec (496000.0/sec): Char in 80-char rgb line (Charter 10)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/i830.h | 2 | ||||
-rw-r--r-- | src/i830_batchbuffer.c | 38 | ||||
-rw-r--r-- | src/i830_batchbuffer.h | 16 | ||||
-rw-r--r-- | src/i915_3d.h | 9 | ||||
-rw-r--r-- | src/i915_render.c | 2 |
5 files changed, 25 insertions, 42 deletions
@@ -252,7 +252,7 @@ typedef struct intel_screen_private { dri_bufmgr *bufmgr; - uint8_t *batch_ptr; + uint32_t batch_ptr[4096]; /** Byte offset in batch_ptr for the next dword to be emitted. */ unsigned int batch_used; /** Position in batch_ptr at the start of the current BEGIN_BATCH */ diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c index 0fe81d03..80539b33 100644 --- a/src/i830_batchbuffer.c +++ b/src/i830_batchbuffer.c @@ -42,7 +42,6 @@ static void intel_next_batch(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - int ret; /* The 865 has issues with larger-than-page-sized batch buffers. */ if (IS_I865G(intel)) @@ -52,12 +51,7 @@ static void intel_next_batch(ScrnInfoPtr scrn) intel->batch_bo = dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096); - ret = dri_bo_map(intel->batch_bo, 1); - if (ret != 0) - FatalError("Failed to map batchbuffer: %s\n", strerror(-ret)); - intel->batch_used = 0; - intel->batch_ptr = intel->batch_bo->virtual; /* We don't know when another client has executed, so we have * to reinitialize our 3D state per batch. @@ -80,9 +74,6 @@ void intel_batch_teardown(ScrnInfoPtr scrn) intel_screen_private *intel = intel_get_screen_private(scrn); if (intel->batch_ptr != NULL) { - dri_bo_unmap(intel->batch_bo); - intel->batch_ptr = NULL; - dri_bo_unreference(intel->batch_bo); intel->batch_bo = NULL; @@ -168,31 +159,24 @@ void intel_batch_submit(ScrnInfoPtr scrn) if (intel->vertex_flush) intel->vertex_flush(intel); - /* Emit a padding dword if we aren't going to be quad-word aligned. */ - if ((intel->batch_used & 4) == 0) { - *(uint32_t *) (intel->batch_ptr + intel->batch_used) = MI_NOOP; - intel->batch_used += 4; - } - /* Mark the end of the batchbuffer. */ - *(uint32_t *) (intel->batch_ptr + intel->batch_used) = - MI_BATCH_BUFFER_END; - intel->batch_used += 4; + OUT_BATCH(MI_BATCH_BUFFER_END); + /* Emit a padding dword if we aren't going to be quad-word aligned. */ + if (intel->batch_used & 1) + OUT_BATCH(MI_NOOP); if (DUMP_BATCHBUFFERS) { FILE *file = fopen(DUMP_BATCHBUFFERS, "a"); if (file) { - fwrite (intel->batch_ptr, intel->batch_used, 1, file); + fwrite (intel->batch_ptr, intel->batch_used*4, 1, file); fclose(file); } } - dri_bo_unmap(intel->batch_bo); - intel->batch_ptr = NULL; - - ret = - dri_bo_exec(intel->batch_bo, intel->batch_used, NULL, 0, - 0xffffffff); + ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr); + if (ret == 0) + ret = dri_bo_exec(intel->batch_bo, intel->batch_used*4, + NULL, 0, 0xffffffff); if (ret != 0) { static int once; @@ -269,6 +253,6 @@ void intel_batch_wait_last(ScrnInfoPtr scrn) /* Map it CPU write, which guarantees it's done. This is a completely * non performance path, so we don't need anything better. */ - drm_intel_bo_map(intel->last_batch_bo, TRUE); - drm_intel_bo_unmap(intel->last_batch_bo); + drm_intel_gem_bo_map_gtt(intel->last_batch_bo); + drm_intel_gem_bo_unmap_gtt(intel->last_batch_bo); } diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h index c912a1d9..874916dd 100644 --- a/src/i830_batchbuffer.h +++ b/src/i830_batchbuffer.h @@ -41,7 +41,7 @@ void intel_batch_wait_last(ScrnInfoPtr scrn); static inline int intel_batch_space(intel_screen_private *intel) { - return (intel->batch_bo->size - BATCH_RESERVED) - (intel->batch_used); + return (intel->batch_bo->size - BATCH_RESERVED) - (4*intel->batch_used); } static inline void @@ -60,7 +60,7 @@ static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz) intel_batch_require_space(scrn, intel, sz * 4); intel->in_batch_atomic = TRUE; - intel->batch_atomic_limit = intel->batch_used + sz * 4; + intel->batch_atomic_limit = intel->batch_used + sz; } static inline void intel_batch_end_atomic(ScrnInfoPtr scrn) @@ -74,19 +74,19 @@ static inline void intel_batch_end_atomic(ScrnInfoPtr scrn) static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t dword) { - *(uint32_t *) (intel->batch_ptr + intel->batch_used) = dword; - intel->batch_used += 4; + intel->batch_ptr[intel->batch_used++] = dword; } static inline void intel_batch_align(intel_screen_private *intel, uint32_t align) { uint32_t delta; + align /= 4; assert(align); if ((delta = intel->batch_used & (align - 1))) { delta = align - delta; - memset (intel->batch_ptr + intel->batch_used, 0, delta); + memset (intel->batch_ptr + intel->batch_used, 0, 4*delta); intel->batch_used += delta; } } @@ -99,11 +99,11 @@ intel_batch_emit_reloc(intel_screen_private *intel, { if (needs_fence) drm_intel_bo_emit_reloc_fence(intel->batch_bo, - intel->batch_used, + intel->batch_used * 4, bo, delta, read_domains, write_domains); else - drm_intel_bo_emit_reloc(intel->batch_bo, intel->batch_used, + drm_intel_bo_emit_reloc(intel->batch_bo, intel->batch_used * 4, bo, delta, read_domains, write_domains); @@ -175,7 +175,7 @@ do { \ "ADVANCE_BATCH\n", __FUNCTION__); \ assert(!intel->in_batch_atomic); \ intel_batch_require_space(scrn, intel, (n) * 4); \ - intel->batch_emitting = (n) * 4; \ + intel->batch_emitting = (n); \ intel->batch_emit_start = intel->batch_used; \ } while (0) diff --git a/src/i915_3d.h b/src/i915_3d.h index ab4fbb57..043a6d56 100644 --- a/src/i915_3d.h +++ b/src/i915_3d.h @@ -423,8 +423,7 @@ do { \ #define FS_BEGIN() \ do { \ - _shader_offset = intel->batch_used; \ - intel->batch_used += 4; \ + _shader_offset = intel->batch_used++; \ } while (0) #define FS_OUT(_shaderop) \ @@ -436,7 +435,7 @@ do { \ #define FS_END() \ do { \ - *(uint32_t *)(intel->batch_ptr + _shader_offset) = \ - (_3DSTATE_PIXEL_SHADER_PROGRAM | \ - ((intel->batch_used - _shader_offset) / 4 - 2)); \ + intel->batch_ptr[_shader_offset] = \ + _3DSTATE_PIXEL_SHADER_PROGRAM | \ + (intel->batch_used - _shader_offset - 2); \ } while (0); diff --git a/src/i915_render.c b/src/i915_render.c index 87f1ca48..de68c5c4 100644 --- a/src/i915_render.c +++ b/src/i915_render.c @@ -1067,7 +1067,7 @@ void i915_vertex_flush(intel_screen_private *intel) { if (intel->prim_offset) { - *(uint32_t *) (intel->batch_ptr + intel->prim_offset) |= intel->prim_count - 1; + intel->batch_ptr[intel->prim_offset] |= intel->prim_count - 1; intel->prim_offset = 0; } } |