summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-04-04 16:34:58 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2011-04-04 16:42:57 +0100
commit79444291a39c42039192a5baa3a71d52300cf4ee (patch)
tree6830160e83f07c30627ff5d00bde0dce9d04a4dd
parentd2106384be6f9df498392127c3ff64d0a2b17457 (diff)
i965: segregate each vertex element into its own buffer
Reduce the number of relocations emitted by only emitting one relocation per vertex element per vertex buffer. References: https://bugs.freedesktop.org/show_bug.cgi?id=35733 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/i965_render.c314
-rw-r--r--src/intel.h1
-rw-r--r--src/intel_batchbuffer.c3
3 files changed, 171 insertions, 147 deletions
diff --git a/src/i965_render.c b/src/i965_render.c
index 90e2b638..e504bfe6 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -724,6 +724,7 @@ typedef struct gen4_composite_op {
sampler_state_extend_t mask_extend;
Bool is_affine;
wm_kernel_t wm_kernel;
+ int vertex_id;
} gen4_composite_op;
/** Private data for gen4 render accel implementation. */
@@ -1127,6 +1128,125 @@ i965_set_picture_surface_state(intel_screen_private *intel,
return offset;
}
+static void gen4_composite_vertex_elements(struct intel_screen_private *intel)
+{
+ struct gen4_render_state *render_state = intel->gen4_render_state;
+ gen4_composite_op *composite_op = &render_state->composite_op;
+ Bool has_mask = intel->render_mask != NULL;
+ Bool is_affine = composite_op->is_affine;
+ /*
+ * number of extra parameters per vertex
+ */
+ int nelem = has_mask ? 2 : 1;
+ /*
+ * size of extra parameters:
+ * 3 for homogenous (xyzw)
+ * 2 for cartesian (xy)
+ */
+ int selem = is_affine ? 2 : 3;
+ uint32_t w_component;
+ uint32_t src_format;
+ int id;
+
+ id = has_mask << 1 | is_affine;
+
+ if (composite_op->vertex_id == id)
+ return;
+
+ composite_op->vertex_id = id;
+
+ if (is_affine) {
+ src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
+ w_component = BRW_VFCOMPONENT_STORE_1_FLT;
+ } else {
+ src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ w_component = BRW_VFCOMPONENT_STORE_SRC;
+ }
+
+ if (IS_GEN5(intel)) {
+ /*
+ * The reason to add this extra vertex element in the header is that
+ * Ironlake has different vertex header definition and origin method to
+ * set destination element offset doesn't exist anymore, which means
+ * hardware requires a predefined vertex element layout.
+ *
+ * haihao proposed this approach to fill the first vertex element, so
+ * origin layout for Gen4 doesn't need to change, and origin shader
+ * programs behavior is also kept.
+ *
+ * I think this is not bad. - zhenyu
+ */
+
+ OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
+ ((2 * (2 + nelem)) - 1));
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
+ } else {
+ /* Set up our vertex elements, sourced from the single vertex buffer.
+ * that will be set up later.
+ */
+ OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
+ ((2 * (1 + nelem)) - 1));
+ }
+
+ /* x,y */
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+
+ if (IS_GEN5(intel))
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ /* u0, v0, w0 */
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
+
+ if (IS_GEN5(intel))
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
+ /* u1, v1, w1 */
+ if (has_mask) {
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
+
+ if (IS_GEN5(intel))
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
+ }
+}
+
static void i965_emit_composite_state(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -1141,7 +1261,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
sampler_state_filter_t mask_filter = composite_op->mask_filter;
sampler_state_extend_t src_extend = composite_op->src_extend;
sampler_state_extend_t mask_extend = composite_op->mask_extend;
- Bool is_affine = composite_op->is_affine;
uint32_t src_blend, dst_blend;
intel->needs_render_state_emit = FALSE;
@@ -1299,111 +1418,7 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
(URB_CS_ENTRIES << 0));
}
- {
- /*
- * number of extra parameters per vertex
- */
- int nelem = mask ? 2 : 1;
- /*
- * size of extra parameters:
- * 3 for homogenous (xyzw)
- * 2 for cartesian (xy)
- */
- int selem = is_affine ? 2 : 3;
- uint32_t w_component;
- uint32_t src_format;
-
- if (is_affine) {
- src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
- w_component = BRW_VFCOMPONENT_STORE_1_FLT;
- } else {
- src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
- w_component = BRW_VFCOMPONENT_STORE_SRC;
- }
-
- if (IS_GEN5(intel)) {
- /*
- * The reason to add this extra vertex element in the header is that
- * Ironlake has different vertex header definition and origin method to
- * set destination element offset doesn't exist anymore, which means
- * hardware requires a predefined vertex element layout.
- *
- * haihao proposed this approach to fill the first vertex element, so
- * origin layout for Gen4 doesn't need to change, and origin shader
- * programs behavior is also kept.
- *
- * I think this is not bad. - zhenyu
- */
-
- OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
- ((2 * (2 + nelem)) - 1));
- OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (0 << VE0_OFFSET_SHIFT));
-
- OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
- } else {
- /* Set up our vertex elements, sourced from the single vertex buffer.
- * that will be set up later.
- */
- OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
- ((2 * (1 + nelem)) - 1));
- }
-
- /* x,y */
- OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (0 << VE0_OFFSET_SHIFT));
-
- if (IS_GEN5(intel))
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
- else
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
- /* u0, v0, w0 */
- OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
- (src_format << VE0_FORMAT_SHIFT) |
- ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
-
- if (IS_GEN5(intel))
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
- else
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
- /* u1, v1, w1 */
- if (mask) {
- OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
- (src_format << VE0_FORMAT_SHIFT) |
- (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
-
- if (IS_GEN5(intel))
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
- else
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
- }
- }
+ gen4_composite_vertex_elements(intel);
}
/**
@@ -1597,7 +1612,10 @@ i965_prepare_composite(int op, PicturePtr source_picture,
static void i965_select_vertex_buffer(struct intel_screen_private *intel)
{
- int vertex_size = intel->floats_per_vertex;
+ int id = intel->gen4_render_state->composite_op.vertex_id;
+
+ if (intel->vertex_id & (1 << id))
+ return;
/* Set up the pointer to our (single) vertex buffer */
OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
@@ -1606,13 +1624,13 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel)
* frequently switching between vertex sizes, like rgb10text.
*/
if (INTEL_INFO(intel)->gen >= 60) {
- OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+ OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) |
GEN6_VB0_VERTEXDATA |
- (4*vertex_size << VB0_BUFFER_PITCH_SHIFT));
+ (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
} else {
- OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
+ OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) |
VB0_VERTEXDATA |
- (4*vertex_size << VB0_BUFFER_PITCH_SHIFT));
+ (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
}
OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
if (INTEL_INFO(intel)->gen >= 50)
@@ -1623,7 +1641,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel)
OUT_BATCH(0);
OUT_BATCH(0); // ignore for VERTEXDATA, but still there
- intel->last_floats_per_vertex = vertex_size;
+ intel->vertex_id |= 1 << id;
}
static void i965_bind_surfaces(struct intel_screen_private *intel)
@@ -1754,14 +1772,14 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
intel->floats_per_vertex != intel->last_floats_per_vertex) {
intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex;
intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
+ intel->last_floats_per_vertex = intel->floats_per_vertex;
}
- if (intel->floats_per_vertex != intel->last_floats_per_vertex ||
- intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
+ if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
i965_vertex_flush(intel);
intel_next_vertex(intel);
- i965_select_vertex_buffer(intel);
intel->vertex_index = 0;
}
+ i965_select_vertex_buffer(intel);
if (intel->vertex_offset == 0) {
OUT_BATCH(BRW_3DPRIMITIVE |
@@ -2306,17 +2324,19 @@ gen6_composite_vertex_element_state(intel_screen_private *intel,
* texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is TRUE): same as above
*/
+ gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op;
int nelem = has_mask ? 2 : 1;
int selem = is_affine ? 2 : 3;
uint32_t w_component;
uint32_t src_format;
+ int id;
+
+ id = has_mask << 1 | is_affine;
- if (intel->gen6_render_state.vertex_size == nelem &&
- intel->gen6_render_state.vertex_type == selem)
+ if (composite_op->vertex_id == id)
return;
- intel->gen6_render_state.vertex_size = nelem;
- intel->gen6_render_state.vertex_type = selem;
+ composite_op->vertex_id = id;
if (is_affine) {
src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
@@ -2337,45 +2357,45 @@ gen6_composite_vertex_element_state(intel_screen_private *intel,
OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
((2 * (2 + nelem)) + 1 - 2));
- OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- GEN6_VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
+ (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
/* x,y */
- OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- GEN6_VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
+ OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
/* u0, v0, w0 */
- OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- GEN6_VE0_VALID |
- (src_format << VE0_FORMAT_SHIFT) |
- ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
+ OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
/* u1, v1, w1 */
if (has_mask) {
- OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- GEN6_VE0_VALID |
- (src_format << VE0_FORMAT_SHIFT) |
- (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
+ OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
}
}
diff --git a/src/intel.h b/src/intel.h
index b86d293f..8a54aefa 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -407,6 +407,7 @@ typedef struct intel_screen_private {
uint16_t vertex_count;
uint16_t vertex_index;
uint16_t vertex_used;
+ uint32_t vertex_id;
float vertex_ptr[4*1024];
dri_bo *vertex_bo;
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 8c7ab3d8..2bc00f90 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -53,6 +53,8 @@ static void intel_end_vertex(intel_screen_private *intel)
dri_bo_unreference(intel->vertex_bo);
intel->vertex_bo = NULL;
}
+
+ intel->vertex_id = 0;
}
void intel_next_vertex(intel_screen_private *intel)
@@ -89,6 +91,7 @@ void intel_batch_init(ScrnInfoPtr scrn)
intel->batch_emit_start = 0;
intel->batch_emitting = 0;
+ intel->vertex_id = 0;
intel_next_batch(scrn);
}