3 files changed, 46 insertions, 29 deletions
diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h
index 2b898c26..c9b84215 100644
--- a/src/i830_batchbuffer.h
+++ b/src/i830_batchbuffer.h
@@ -60,6 +60,20 @@ intel_batch_emit_dword(I830Ptr pI830, uint32_t dword)
 }
 
 static inline void
+intel_batch_emit_reloc (I830Ptr  pI830,
+			dri_bo  *bo,
+			uint32_t read_domains,
+			uint32_t write_domains,
+			uint32_t delta)
+{
+    assert(intel_batch_space(pI830) >= 4);
+    *(uint32_t *)(pI830->batch_ptr + pI830->batch_used) = bo->offset + delta;
+    intel_bo_emit_reloc (pI830->batch_bo, read_domains, write_domains, delta,
+			 pI830->batch_used, bo);
+    pI830->batch_used += 4;
+}
+
+static inline void
 intel_batch_emit_reloc_pixmap(I830Ptr pI830, PixmapPtr pPixmap, uint32_t delta)
 {
     assert(pI830->batch_ptr != NULL);
@@ -71,6 +85,9 @@ intel_batch_emit_reloc_pixmap(I830Ptr pI830, PixmapPtr pPixmap, uint32_t delta)
 
 #define OUT_BATCH(dword) intel_batch_emit_dword(pI830, dword)
 
+#define OUT_RELOC(bo, read_domains, write_domains, delta) \
+	intel_batch_emit_reloc (pI830, bo, read_domains, write_domains, delta)
+
 #define OUT_RELOC_PIXMAP(pPixmap, delta)	\
 	intel_batch_emit_reloc_pixmap(pI830, pPixmap, delta)
 
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 4cede907..b6fac9f8 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -2510,6 +2510,9 @@ I830BlockHandler(int i,
        intel_batch_flush(pScrn);
 
        pI830->need_mi_flush = FALSE;
+#ifdef XF86DRI
+       drmCommandNone(pI830->drmSubFD, DRM_I915_GEM_THROTTLE);
+#endif
     }
 
     /*
@@ -2768,6 +2771,7 @@ i830_init_bufmgr(ScrnInfoPtr pScrn)
 	 batch_size = 4096;
 
       pI830->bufmgr = intel_bufmgr_gem_init(pI830->drmSubFD, batch_size);
+      intel_bufmgr_gem_enable_reuse(pI830->bufmgr);
    } else {
       pI830->bufmgr = intel_bufmgr_fake_init(pI830->fake_bufmgr_mem->offset,
 					     pI830->FbBase +
diff --git a/src/i965_render.c b/src/i965_render.c
index 8360be4a..62d0035f 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -37,6 +37,7 @@
 #include "xf86.h"
 #include "i830.h"
 #include "i915_reg.h"
+#include "i915_drm.h"
 
 /* bring in brw structs */
 #include "brw_defines.h"
@@ -60,7 +61,7 @@ do { 							\
 #endif
 
 #define MAX_VERTEX_PER_COMPOSITE    24
-#define MAX_VERTEX_BUFFERS	    256
+#define VERTEX_BUFFER_SIZE	    (16 * MAX_VERTEX_PER_COMPOSITE)
 
 struct blendinfo {
     Bool dst_alpha;
@@ -502,14 +503,14 @@ typedef struct _gen4_state {
 				     [BRW_BLENDFACTOR_COUNT];
     struct brw_cc_viewport cc_viewport;
     PAD64 (brw_cc_viewport, 0);
-
-    float vb[MAX_VERTEX_PER_COMPOSITE * MAX_VERTEX_BUFFERS];
 } gen4_state_t;
 
 /** Private data for gen4 render accel implementation. */
 struct gen4_render_state {
     gen4_state_t *card_state;
     uint32_t card_state_offset;
+    dri_bo *vb_bo;
+    int vb_bo_busy;
 
     int binding_table_index;
     int surface_state_index;
@@ -1270,12 +1271,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 {
     ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
     I830Ptr pI830 = I830PTR(pScrn);
-    gen4_state_t *card_state = pI830->gen4_render_state->card_state;
     struct gen4_render_state *render_state = pI830->gen4_render_state;
     Bool has_mask;
     Bool is_affine_src, is_affine_mask, is_affine;
     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
-    float *vb = card_state->vb;
+    float *vb;
     int i;
 
     is_affine_src = i830_transform_is_affine (pI830->transform[0]);
@@ -1352,11 +1352,25 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	}
     }
 
-    if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE >= ARRAY_SIZE(card_state->vb)) {
-	i830WaitSync(pScrn);
+    /* Arrange for a buffer object with sufficient space for our
+     * vertices, and that isn't "busy", that is, it is not already
+     * referenced by a batch that has been flushed. */
+    if (! render_state->vb_bo || render_state->vb_bo_busy ||
+	render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE > VERTEX_BUFFER_SIZE)
+    {
+	if (render_state->vb_bo)
+	    dri_bo_unreference (render_state->vb_bo);
+
+	render_state->vb_bo = dri_bo_alloc (pI830->bufmgr, "vb",
+					    VERTEX_BUFFER_SIZE * sizeof (float),
+					    4096);
 	render_state->vb_offset = 0;
     }
 
+    /* Map the vertex buffer object so we can write to it. */
+    dri_bo_map (render_state->vb_bo, 1);
+    vb = render_state->vb_bo->virtual;
+
     i = render_state->vb_offset;
     /* rect (x2,y2) */
     vb[i++] = (float)(dstX + w);
@@ -1399,7 +1413,9 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	if (!is_affine)
 	    vb[i++] = mask_w[0];
     }
-    assert (i * 4 <= sizeof(card_state->vb));
+    assert (i <= VERTEX_BUFFER_SIZE);
+
+    dri_bo_unmap (render_state->vb_bo);
 
     BEGIN_BATCH(12);
     OUT_BATCH(MI_FLUSH);
@@ -1408,7 +1424,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
 	      VB0_VERTEXDATA |
 	      (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
-    OUT_BATCH(render_state->card_state_offset + offsetof(gen4_state_t, vb) +
+    OUT_RELOC(render_state->vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
 	      render_state->vb_offset * 4);
     OUT_BATCH(3);
     OUT_BATCH(0); // ignore for VERTEXDATA, but still there
@@ -1431,26 +1447,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     ErrorF("sync after 3dprimitive\n");
     I830Sync(pScrn);
 #endif
-    /* we must be sure that the pipeline is flushed before next exa draw,
-       because that will be new state, binding state and instructions*/
-    {
-	BEGIN_BATCH(4);
-	OUT_BATCH(BRW_PIPE_CONTROL |
-		  BRW_PIPE_CONTROL_NOWRITE |
-		  BRW_PIPE_CONTROL_WC_FLUSH |
-		  BRW_PIPE_CONTROL_IS_FLUSH |
-		  (1 << 10) |  /* XXX texture cache flush for BLC/CTG */
-		  2);
-	OUT_BATCH(0); /* Destination address */
-	OUT_BATCH(0); /* Immediate data low DW */
-	OUT_BATCH(0); /* Immediate data high DW */
-	ADVANCE_BATCH();
-    }
-
-    /* Mark sync so we can wait for it before setting up the VB on the next
-     * rectangle.
-     */
-    i830MarkSync(pScrn);
 }
 
 /**