diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2010-12-02 11:25:05 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2010-12-03 14:05:30 +0000 |
commit | a1fa0dbfdafea32139d4457d81b9d722df955eaf (patch) | |
tree | 0e84f3ea2d10e46471b88d637d98172c75b285a6 | |
parent | 23437fe6769322d48cc3d264660717475b5d0d74 (diff) |
i965: Upload an entire vbo in a single pwrite, rather than per-rectangle
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/i965_render.c | 259 | ||||
-rw-r--r-- | src/intel.h | 8 | ||||
-rw-r--r-- | src/intel_batchbuffer.c | 5 | ||||
-rw-r--r-- | src/intel_uxa.c | 3 |
4 files changed, 129 insertions, 146 deletions
diff --git a/src/i965_render.c b/src/i965_render.c index 22e9f8b4..b1308662 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -43,15 +43,6 @@ #include "brw_defines.h" #include "brw_structs.h" -/* 24 = 4 vertices/composite * 3 texcoords/vertex * 2 floats/texcoord - * - * This is an upper-bound based on the case of a non-affine - * transformation and with a mask, but useful for sizing all cases for - * simplicity. - */ -#define VERTEX_FLOATS_PER_COMPOSITE 24 -#define VERTEX_BUFFER_SIZE (256 * VERTEX_FLOATS_PER_COMPOSITE) - struct blendinfo { Bool dst_alpha; Bool src_alpha; @@ -707,8 +698,6 @@ struct gen4_cc_unit_state { [BRW_BLENDFACTOR_COUNT]; }; -typedef float gen4_vertex_buffer[VERTEX_BUFFER_SIZE]; - typedef struct gen4_composite_op { int op; drm_intel_bo *surface_state_binding_table_bo; @@ -734,7 +723,6 @@ struct gen4_render_state { drm_intel_bo *wm_kernel_bo[WM_KERNEL_COUNT]; drm_intel_bo *sip_kernel_bo; - dri_bo *vertex_buffer_bo; drm_intel_bo *cc_vp_bo; drm_intel_bo *gen6_blend_bo; @@ -744,9 +732,6 @@ struct gen4_render_state { [SAMPLER_STATE_FILTER_COUNT] [SAMPLER_STATE_EXTEND_COUNT]; gen4_composite_op composite_op; - - int vb_offset; - int vertex_size; }; static void gen6_emit_composite_state(ScrnInfoPtr scrn); @@ -1451,8 +1436,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) uint32_t w_component; uint32_t src_format; - render_state->vertex_size = 4 * (2 + nelem * selem); - if (is_affine) { src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; w_component = BRW_VFCOMPONENT_STORE_1_FLT; @@ -1566,8 +1549,8 @@ static Bool i965_composite_check_aperture(ScrnInfoPtr scrn) gen4_composite_op *composite_op = &render_state->composite_op; drm_intel_bo *bo_table[] = { intel->batch_bo, + intel->vertex_bo, composite_op->surface_state_binding_table_bo, - render_state->vertex_buffer_bo, render_state->vs_state_bo, render_state->sf_state_bo, render_state->sf_mask_state_bo, @@ -1581,8 +1564,8 @@ static Bool i965_composite_check_aperture(ScrnInfoPtr scrn) }; drm_intel_bo *gen6_bo_table[] = { intel->batch_bo, + intel->vertex_bo, composite_op->surface_state_binding_table_bo, - render_state->vertex_buffer_bo, render_state->wm_kernel_bo[composite_op->wm_kernel], render_state->ps_sampler_state_bo[composite_op->src_filter] [composite_op->src_extend] @@ -1764,6 +1747,9 @@ i965_prepare_composite(int op, PicturePtr source_picture, composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; } + intel->floats_per_vertex = + 2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3); + if (!i965_composite_check_aperture(scrn)) { intel_batch_submit(scrn, FALSE); if (!i965_composite_check_aperture(scrn)) { @@ -1779,32 +1765,38 @@ i965_prepare_composite(int op, PicturePtr source_picture, return TRUE; } -static drm_intel_bo *i965_get_vb_space(ScrnInfoPtr scrn) +static void i965_select_vertex_buffer(struct intel_screen_private *intel) { - intel_screen_private *intel = intel_get_screen_private(scrn); - struct gen4_render_state *render_state = intel->gen4_render_state; + int vertex_size = intel->floats_per_vertex; - /* If the vertex buffer is too full, then we free the old and a new one - * gets made. - */ - if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > - VERTEX_BUFFER_SIZE) { - drm_intel_bo_unreference(render_state->vertex_buffer_bo); - render_state->vertex_buffer_bo = NULL; - } + /* Set up the pointer to our (single) vertex buffer */ + OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); - /* Alloc a new vertex buffer if necessary. */ - if (render_state->vertex_buffer_bo == NULL) { - render_state->vertex_buffer_bo = - drm_intel_bo_alloc(intel->bufmgr, "vb", - sizeof(gen4_vertex_buffer), 4096); - render_state->vb_offset = 0; + /* XXX could use multiple vbo to reduce relocations if + * frequently switching between vertex sizes, like rgb10text. + */ + if (INTEL_INFO(intel)->gen >= 60) { + OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | + GEN6_VB0_VERTEXDATA | + (4*vertex_size << VB0_BUFFER_PITCH_SHIFT)); + } else { + OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | + VB0_VERTEXDATA | + (4*vertex_size << VB0_BUFFER_PITCH_SHIFT)); } + OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + if (INTEL_INFO(intel)->gen >= 50) + OUT_RELOC(intel->vertex_bo, + I915_GEM_DOMAIN_VERTEX, 0, + sizeof(intel->vertex_ptr) - 1); + else + OUT_BATCH(0); + OUT_BATCH(0); // ignore for VERTEXDATA, but still there - drm_intel_bo_reference(render_state->vertex_buffer_bo); - return render_state->vertex_buffer_bo; + intel->last_floats_per_vertex = vertex_size; } + void i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h) @@ -1814,9 +1806,6 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, struct gen4_render_state *render_state = intel->gen4_render_state; Bool has_mask; float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; - int i; - drm_intel_bo *vb_bo; - float vb[24]; /* 3 * (2 dst + 3 src + 3 mask) */ Bool is_affine = render_state->composite_op.is_affine; if (is_affine) { @@ -1850,9 +1839,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, return; } - if (intel->scale_units[1][0] == -1 || intel->scale_units[1][1] == -1) { - has_mask = FALSE; - } else { + if (intel->render_mask) { has_mask = TRUE; if (is_affine) { if (!intel_get_transformed_coordinates(maskX, maskY, @@ -1888,55 +1875,10 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, &mask_x[2], &mask_y[2], &mask_w[2])) return; } + } else { + has_mask = FALSE; } - vb_bo = i965_get_vb_space(scrn); - if (vb_bo == NULL) - return; - i = 0; - /* rect (x2,y2) */ - vb[i++] = (float)(dstX + w); - vb[i++] = (float)(dstY + h); - vb[i++] = src_x[2] * intel->scale_units[0][0]; - vb[i++] = src_y[2] * intel->scale_units[0][1]; - if (!is_affine) - vb[i++] = src_w[2]; - if (has_mask) { - vb[i++] = mask_x[2] * intel->scale_units[1][0]; - vb[i++] = mask_y[2] * intel->scale_units[1][1]; - if (!is_affine) - vb[i++] = mask_w[2]; - } - - /* rect (x1,y2) */ - vb[i++] = (float)dstX; - vb[i++] = (float)(dstY + h); - vb[i++] = src_x[1] * intel->scale_units[0][0]; - vb[i++] = src_y[1] * intel->scale_units[0][1]; - if (!is_affine) - vb[i++] = src_w[1]; - if (has_mask) { - vb[i++] = mask_x[1] * intel->scale_units[1][0]; - vb[i++] = mask_y[1] * intel->scale_units[1][1]; - if (!is_affine) - vb[i++] = mask_w[1]; - } - - /* rect (x1,y1) */ - vb[i++] = (float)dstX; - vb[i++] = (float)dstY; - vb[i++] = src_x[0] * intel->scale_units[0][0]; - vb[i++] = src_y[0] * intel->scale_units[0][1]; - if (!is_affine) - vb[i++] = src_w[0]; - if (has_mask) { - vb[i++] = mask_x[0] * intel->scale_units[1][0]; - vb[i++] = mask_y[0] * intel->scale_units[1][1]; - if (!is_affine) - vb[i++] = mask_w[0]; - } - drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb); - if (!i965_composite_check_aperture(scrn)) intel_batch_submit(scrn, FALSE); @@ -1950,40 +1892,80 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, OUT_BATCH(MI_FLUSH); } - /* Set up the pointer to our (single) vertex buffer */ - OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); + if (intel->vertex_offset == 0) { + if (intel->vertex_used && + intel->floats_per_vertex != intel->last_floats_per_vertex) { + intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; + intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; + } + if (intel->floats_per_vertex != intel->last_floats_per_vertex || + intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { + intel_next_vertex(intel); + i965_select_vertex_buffer(intel); + intel->vertex_index = 0; + } - if (INTEL_INFO(intel)->gen >= 60) { - OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | - GEN6_VB0_VERTEXDATA | - (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT)); - } else { - OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | - VB0_VERTEXDATA | - (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT)); + OUT_BATCH(BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + intel->vertex_offset = intel->batch_used; + OUT_BATCH(0); /* vertex count, to be filled in later */ + OUT_BATCH(intel->vertex_index); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + intel->vertex_count = intel->vertex_index; } - OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, - render_state->vb_offset * 4); - - if (INTEL_INFO(intel)->gen >= 50) - OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, - render_state->vb_offset * 4 + i * 4); - else - OUT_BATCH(3); + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); + if (!is_affine) + OUT_VERTEX(src_w[2]); + if (has_mask) { + OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]); + OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]); + if (!is_affine) + OUT_VERTEX(mask_w[2]); + } - OUT_BATCH(0); // ignore for VERTEXDATA, but still there + /* rect (x1,y2) */ + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); + if (!is_affine) + OUT_VERTEX(src_w[1]); + if (has_mask) { + OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]); + OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]); + if (!is_affine) + OUT_VERTEX(mask_w[1]); + } - OUT_BATCH(BRW_3DPRIMITIVE | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | /* CTG - indirect vertex count */ - 4); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); /* start vertex offset */ - OUT_BATCH(1); /* single instance */ - OUT_BATCH(0); /* start instance location */ - OUT_BATCH(0); /* index buffer offset, ignored */ + /* rect (x1,y1) */ + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); + if (!is_affine) + OUT_VERTEX(src_w[0]); + if (has_mask) { + OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]); + OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]); + if (!is_affine) + OUT_VERTEX(mask_w[0]); + } + intel->vertex_index += 3; - render_state->vb_offset += i; - drm_intel_bo_unreference(vb_bo); + if (INTEL_INFO(intel)->gen < 50) { + /* XXX OMG! */ + i965_vertex_flush(intel); + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + } intel_batch_end_atomic(scrn); } @@ -1991,17 +1973,10 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, void i965_batch_flush_notify(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - struct gen4_render_state *render_state = intel->gen4_render_state; - - /* Once a batch is emitted, we never want to map again any buffer - * object being referenced by that batch, (which would be very - * expensive). */ - if (render_state->vertex_buffer_bo) { - dri_bo_unreference(render_state->vertex_buffer_bo); - render_state->vertex_buffer_bo = NULL; - } intel->needs_render_state_emit = TRUE; + intel->last_floats_per_vertex = 0; + intel->vertex_index = 0; } /** @@ -2022,7 +1997,6 @@ void gen4_render_state_init(ScrnInfoPtr scrn) intel->gen4_render_state = calloc(sizeof(*render_state), 1); render_state = intel->gen4_render_state; - render_state->vb_offset = 0; render_state->vs_state_bo = gen4_create_vs_unit_state(scrn); @@ -2136,8 +2110,6 @@ void gen4_render_state_cleanup(ScrnInfoPtr scrn) gen4_composite_op *composite_op = &render_state->composite_op; drm_intel_bo_unreference(composite_op->surface_state_binding_table_bo); - drm_intel_bo_unreference(render_state->vertex_buffer_bo); - drm_intel_bo_unreference(render_state->vs_state_bo); drm_intel_bo_unreference(render_state->sf_state_bo); drm_intel_bo_unreference(render_state->sf_mask_state_bo); @@ -2514,7 +2486,6 @@ static void gen6_composite_vertex_element_state(ScrnInfoPtr scrn, Bool has_mask, Bool is_affine) { intel_screen_private *intel = intel_get_screen_private(scrn); - struct gen4_render_state *render_state = intel->gen4_render_state; /* * vertex data in vertex buffer * position: (x, y) @@ -2526,8 +2497,6 @@ gen6_composite_vertex_element_state(ScrnInfoPtr scrn, Bool has_mask, Bool is_aff uint32_t w_component; uint32_t src_format; - render_state->vertex_size = 4 * (2 + nelem * selem); - if (is_affine) { src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; w_component = BRW_VFCOMPONENT_STORE_1_FLT; @@ -2546,10 +2515,10 @@ gen6_composite_vertex_element_state(ScrnInfoPtr scrn, Bool has_mask, Bool is_aff */ OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (2 + nelem)) + 1 - 2)); - + OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | - GEN6_VE0_VALID | - (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (0 << VE0_OFFSET_SHIFT)); OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | @@ -2589,7 +2558,7 @@ gen6_composite_vertex_element_state(ScrnInfoPtr scrn, Bool has_mask, Bool is_aff } } -static void +static void gen6_emit_composite_state(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); @@ -2647,7 +2616,7 @@ gen6_emit_composite_state(ScrnInfoPtr scrn) gen6_composite_vertex_element_state(scrn, mask != 0, is_affine); } -static void +static void gen6_render_state_init(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); @@ -2659,7 +2628,6 @@ gen6_render_state_init(ScrnInfoPtr scrn) intel->gen4_render_state = calloc(sizeof(*render_state), 1); render_state = intel->gen4_render_state; - render_state->vb_offset = 0; for (m = 0; m < WM_KERNEL_COUNT; m++) { render_state->wm_kernel_bo[m] = @@ -2680,7 +2648,7 @@ gen6_render_state_init(ScrnInfoPtr scrn) i, j, k, l, border_color_bo); - } + } } } } @@ -2691,3 +2659,12 @@ gen6_render_state_init(ScrnInfoPtr scrn) render_state->gen6_blend_bo = gen6_composite_create_blend_state(scrn); render_state->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(scrn); } + +void i965_vertex_flush(struct intel_screen_private *intel) +{ + if (intel->vertex_offset) { + intel->batch_ptr[intel->vertex_offset] = + intel->vertex_index - intel->vertex_count; + intel->vertex_offset = 0; + } +} diff --git a/src/intel.h b/src/intel.h index ed21b1e8..51faaf17 100644 --- a/src/intel.h +++ b/src/intel.h @@ -426,9 +426,10 @@ typedef struct intel_screen_private { int w, int h); int floats_per_vertex; int last_floats_per_vertex; - uint32_t vertex_count; - uint32_t vertex_index; - uint32_t vertex_used; + uint16_t vertex_offset; + uint16_t vertex_count; + uint16_t vertex_index; + uint16_t vertex_used; float vertex_ptr[4*1024]; dri_bo *vertex_bo; @@ -580,6 +581,7 @@ Bool i965_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, void i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h); +void i965_vertex_flush(struct intel_screen_private *intel); void i965_batch_flush_notify(ScrnInfoPtr scrn); Bool intel_transform_is_affine(PictTransformPtr t); diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 9f8ca6f3..34c6cc90 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -45,8 +45,10 @@ static void intel_end_vertex(intel_screen_private *intel) { if (intel->vertex_bo) { - if (intel->vertex_used) + if (intel->vertex_used) { dri_bo_subdata(intel->vertex_bo, 0, intel->vertex_used*4, intel->vertex_ptr); + intel->vertex_used = 0; + } dri_bo_unreference(intel->vertex_bo); intel->vertex_bo = NULL; @@ -59,7 +61,6 @@ void intel_next_vertex(intel_screen_private *intel) intel->vertex_bo = dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096); - intel->vertex_used = 0; } static void intel_next_batch(ScrnInfoPtr scrn) diff --git a/src/intel_uxa.c b/src/intel_uxa.c index 8db0ef0d..9310e895 100644 --- a/src/intel_uxa.c +++ b/src/intel_uxa.c @@ -1178,6 +1178,8 @@ Bool intel_uxa_init(ScreenPtr screen) intel->render_current_dest = NULL; intel->prim_offset = 0; intel->vertex_count = 0; + intel->vertex_offset = 0; + intel->vertex_used = 0; intel->floats_per_vertex = 0; intel->last_floats_per_vertex = 0; intel->vertex_bo = NULL; @@ -1221,6 +1223,7 @@ Bool intel_uxa_init(ScreenPtr screen) intel->uxa_driver->composite = i965_composite; intel->uxa_driver->done_composite = i830_done_composite; + intel->vertex_flush = i965_vertex_flush; intel->batch_flush_notify = i965_batch_flush_notify; } |