diff options
author | Carl Worth <cworth@cworth.org> | 2008-10-22 17:12:47 -0700 |
---|---|---|
committer | Carl Worth <cworth@cworth.org> | 2008-11-03 22:46:17 -0800 |
commit | fcb2a5a1253c505913e66b08107c0a9f57b07bad (patch) | |
tree | 480d37fdd80f06ca47331ebb02afa5bad9cf6cba | |
parent | 7e68786cf746b6b984b184080578db1947b268ed (diff) |
Use buffer object for vertex buffer (in new gen4_dynamic_state)
This begins the process of separating the dynamic data from the
static data, (still to move are the surface state and binding
table objects). The new dynamic_state is stored in a buffer
object, so this patch restores the buffer-object-for-vertex-buffer
functionality originally in commit 1abf4d3a7a and later reverted
in 5c9a62a29f.
A notable difference is that this time we actually do use
check_aperture_space to ensure things will fit, (assuming
there's a non-empty implementation under that).
-rw-r--r-- | src/i965_render.c | 131 |
1 files changed, 116 insertions, 15 deletions
diff --git a/src/i965_render.c b/src/i965_render.c index a9d7f66c..7a3ff7ff 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -59,8 +59,14 @@ do { \ } while(0) #endif -#define MAX_VERTEX_PER_COMPOSITE 24 -#define MAX_VERTEX_BUFFERS 256 +/* 24 = 4 vertices/composite * 3 texcoords/vertex * 2 floats/texcoord + * + * This is an upper-bound based on the case of a non-affine + * transformation and with a mask, but useful for sizing all cases for + * simplicity. + */ +#define VERTEX_FLOATS_PER_COMPOSITE 24 +#define VERTEX_BUFFER_SIZE (256 * VERTEX_FLOATS_PER_COMPOSITE) struct blendinfo { Bool dst_alpha; @@ -445,11 +451,16 @@ typedef struct brw_surface_state_padded { /** * Gen4 rendering state buffer structure. * - * Ideally this structure would contain static data for all of the - * combinations of state that we use for Render acceleration, and - * another buffer would contain the dynamic surface state, binding - * table, and vertex data. We'll be moving to that organization soon, - * so we use that naming already. + * This structure contains static data for all of the combinations of + * state that we use for Render acceleration. + * + * Meanwhile, gen4_dynamic_state_t should contain all dynamic data, + * but we're still in the process of migrating some data out of + * gen4_static_state_t to gen4_dynamic_state_t. Things remaining to be + * migrated include + * + * surface_state + * binding_table */ typedef struct _gen4_static_state { uint8_t wm_scratch[128 * PS_MAX_THREADS]; @@ -503,15 +514,19 @@ typedef struct _gen4_static_state { [BRW_BLENDFACTOR_COUNT]; struct brw_cc_viewport cc_viewport; PAD64 (brw_cc_viewport, 0); - - float vb[MAX_VERTEX_PER_COMPOSITE * MAX_VERTEX_BUFFERS]; } gen4_static_state_t; +typedef struct gen4_dynamic_state_state { + float vb[VERTEX_BUFFER_SIZE]; +} gen4_dynamic_state; + /** Private data for gen4 render accel implementation. */ struct gen4_render_state { gen4_static_state_t *static_state; uint32_t static_state_offset; + dri_bo* dynamic_state_bo; + int binding_table_index; int surface_state_index; int vb_offset; @@ -917,6 +932,73 @@ i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss, return offset; } + +static Bool +_allocate_dynamic_state_internal (ScrnInfoPtr pScrn, Bool check_twice); + +/* Allocate the dynamic state needed for a composite operation, + * flushing the current batch if needed to create sufficient space. + * + * Even after flushing we check again and return FALSE if the + * operation still can't fit with an empty batch. Otherwise, returns + * TRUE. + */ +static Bool _allocate_dynamic_state_check_twice (ScrnInfoPtr pScrn) { + return _allocate_dynamic_state_internal (pScrn, TRUE); +} + +/* Allocate the dynamic state needed for a composite operation, + * flushing the current batch if needed to create sufficient space. + */ +static void +_allocate_dynamic_state (ScrnInfoPtr pScrn) +{ + _allocate_dynamic_state_internal (pScrn, FALSE); +} + +/* Number of buffer object in our call to check_aperture_size: + * + * batch_bo + * dynamic_state_bo + */ +#define NUM_BO 2 + +static Bool +_allocate_dynamic_state_internal (ScrnInfoPtr pScrn, Bool check_twice) +{ + I830Ptr pI830 = I830PTR(pScrn); + struct gen4_render_state *render_state= pI830->gen4_render_state; + dri_bo *bo_table[NUM_BO]; + + if (render_state->dynamic_state_bo == NULL) { + render_state->dynamic_state_bo = dri_bo_alloc (pI830->bufmgr, "vb", + sizeof (gen4_dynamic_state), + 4096); + } + + bo_table[0] = pI830->batch_bo; + bo_table[1] = render_state->dynamic_state_bo; + + /* If this command won't fit in the current batch, flush. */ + if (dri_bufmgr_check_aperture_space (bo_table, NUM_BO) < 0) { + intel_batch_flush (pScrn, FALSE); + + if (check_twice) { + /* If the command still won't fit in an empty batch, then it's + * just plain too big for the hardware---fallback to software. + */ + if (dri_bufmgr_check_aperture_space (bo_table, NUM_BO) < 0) { + dri_bo_unreference (render_state->dynamic_state_bo); + render_state->dynamic_state_bo = NULL; + return FALSE; + } + } + } + + return TRUE; +} +#undef NUM_BO + Bool i965_prepare_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture, @@ -940,6 +1022,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, int state_base_offset; uint32_t src_blend, dst_blend; uint32_t *binding_table; + Bool success; + + /* Fallback if we can't make this operation fit. */ + success = _allocate_dynamic_state_check_twice (pScrn); + if (! success) + return FALSE; IntelEmitInvarientState(pScrn); *pI830->last_3d = LAST_3D_RENDER; @@ -1288,11 +1376,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; I830Ptr pI830 = I830PTR(pScrn); struct gen4_render_state *render_state = pI830->gen4_render_state; - gen4_static_state_t *static_state = render_state->static_state; + gen4_dynamic_state *dynamic_state; Bool has_mask; Bool is_affine_src, is_affine_mask, is_affine; float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; - float *vb = static_state->vb; + float *vb; int i; is_affine_src = i830_transform_is_affine (pI830->transform[0]); @@ -1369,11 +1457,21 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, } } - if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE >= ARRAY_SIZE(static_state->vb)) { - i830WaitSync(pScrn); + /* Arrange for a dynamic_state buffer object with sufficient space + * for our vertices. */ + if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > VERTEX_BUFFER_SIZE) { + dri_bo_unreference (render_state->dynamic_state_bo); + render_state->dynamic_state_bo = NULL; render_state->vb_offset = 0; + _allocate_dynamic_state (pScrn); } + /* Map the dynamic_state buffer object so we can write to the + * vertex buffer within it. */ + dri_bo_map (render_state->dynamic_state_bo, 1); + dynamic_state = render_state->dynamic_state_bo->virtual; + vb = dynamic_state->vb; + i = render_state->vb_offset; /* rect (x2,y2) */ vb[i++] = (float)(dstX + w); @@ -1416,7 +1514,9 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, if (!is_affine) vb[i++] = mask_w[0]; } - assert (i * 4 <= sizeof(static_state->vb)); + assert (i <= VERTEX_BUFFER_SIZE); + + dri_bo_unmap (render_state->dynamic_state_bo); BEGIN_BATCH(12); OUT_BATCH(MI_FLUSH); @@ -1425,7 +1525,8 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT)); - OUT_BATCH(render_state->static_state_offset + offsetof(gen4_static_state_t, vb) + + OUT_RELOC(render_state->dynamic_state_bo, I915_GEM_DOMAIN_VERTEX, 0, + offsetof(gen4_dynamic_state, vb) + render_state->vb_offset * 4); OUT_BATCH(3); OUT_BATCH(0); // ignore for VERTEXDATA, but still there |