diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-04-04 16:34:58 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2011-04-04 16:42:57 +0100 |
commit | 79444291a39c42039192a5baa3a71d52300cf4ee (patch) | |
tree | 6830160e83f07c30627ff5d00bde0dce9d04a4dd | |
parent | d2106384be6f9df498392127c3ff64d0a2b17457 (diff) |
i965: segregate each vertex element into its own buffer
Reduce the number of relocations emitted by only emitting one relocation
per vertex element per vertex buffer.
References: https://bugs.freedesktop.org/show_bug.cgi?id=35733
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/i965_render.c | 314 | ||||
-rw-r--r-- | src/intel.h | 1 | ||||
-rw-r--r-- | src/intel_batchbuffer.c | 3 |
3 files changed, 171 insertions, 147 deletions
diff --git a/src/i965_render.c b/src/i965_render.c index 90e2b638..e504bfe6 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -724,6 +724,7 @@ typedef struct gen4_composite_op { sampler_state_extend_t mask_extend; Bool is_affine; wm_kernel_t wm_kernel; + int vertex_id; } gen4_composite_op; /** Private data for gen4 render accel implementation. */ @@ -1127,6 +1128,125 @@ i965_set_picture_surface_state(intel_screen_private *intel, return offset; } +static void gen4_composite_vertex_elements(struct intel_screen_private *intel) +{ + struct gen4_render_state *render_state = intel->gen4_render_state; + gen4_composite_op *composite_op = &render_state->composite_op; + Bool has_mask = intel->render_mask != NULL; + Bool is_affine = composite_op->is_affine; + /* + * number of extra parameters per vertex + */ + int nelem = has_mask ? 2 : 1; + /* + * size of extra parameters: + * 3 for homogenous (xyzw) + * 2 for cartesian (xy) + */ + int selem = is_affine ? 2 : 3; + uint32_t w_component; + uint32_t src_format; + int id; + + id = has_mask << 1 | is_affine; + + if (composite_op->vertex_id == id) + return; + + composite_op->vertex_id = id; + + if (is_affine) { + src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; + w_component = BRW_VFCOMPONENT_STORE_1_FLT; + } else { + src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; + w_component = BRW_VFCOMPONENT_STORE_SRC; + } + + if (IS_GEN5(intel)) { + /* + * The reason to add this extra vertex element in the header is that + * Ironlake has different vertex header definition and origin method to + * set destination element offset doesn't exist anymore, which means + * hardware requires a predefined vertex element layout. + * + * haihao proposed this approach to fill the first vertex element, so + * origin layout for Gen4 doesn't need to change, and origin shader + * programs behavior is also kept. + * + * I think this is not bad. - zhenyu + */ + + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | + ((2 * (2 + nelem)) - 1)); + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + + OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + } else { + /* Set up our vertex elements, sourced from the single vertex buffer. + * that will be set up later. + */ + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | + ((2 * (1 + nelem)) - 1)); + } + + /* x,y */ + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + + if (IS_GEN5(intel)) + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + else + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + /* u0, v0, w0 */ + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ + + if (IS_GEN5(intel)) + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + else + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ + /* u1, v1, w1 */ + if (has_mask) { + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ + + if (IS_GEN5(intel)) + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + else + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ + } +} + static void i965_emit_composite_state(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); @@ -1141,7 +1261,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) sampler_state_filter_t mask_filter = composite_op->mask_filter; sampler_state_extend_t src_extend = composite_op->src_extend; sampler_state_extend_t mask_extend = composite_op->mask_extend; - Bool is_affine = composite_op->is_affine; uint32_t src_blend, dst_blend; intel->needs_render_state_emit = FALSE; @@ -1299,111 +1418,7 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) (URB_CS_ENTRIES << 0)); } - { - /* - * number of extra parameters per vertex - */ - int nelem = mask ? 2 : 1; - /* - * size of extra parameters: - * 3 for homogenous (xyzw) - * 2 for cartesian (xy) - */ - int selem = is_affine ? 2 : 3; - uint32_t w_component; - uint32_t src_format; - - if (is_affine) { - src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; - w_component = BRW_VFCOMPONENT_STORE_1_FLT; - } else { - src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; - w_component = BRW_VFCOMPONENT_STORE_SRC; - } - - if (IS_GEN5(intel)) { - /* - * The reason to add this extra vertex element in the header is that - * Ironlake has different vertex header definition and origin method to - * set destination element offset doesn't exist anymore, which means - * hardware requires a predefined vertex element layout. - * - * haihao proposed this approach to fill the first vertex element, so - * origin layout for Gen4 doesn't need to change, and origin shader - * programs behavior is also kept. - * - * I think this is not bad. - zhenyu - */ - - OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | - ((2 * (2 + nelem)) - 1)); - OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | - (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | - (0 << VE0_OFFSET_SHIFT)); - - OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | - (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); - } else { - /* Set up our vertex elements, sourced from the single vertex buffer. - * that will be set up later. - */ - OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | - ((2 * (1 + nelem)) - 1)); - } - - /* x,y */ - OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | - (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | - (0 << VE0_OFFSET_SHIFT)); - - if (IS_GEN5(intel)) - OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); - else - OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | - (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); - /* u0, v0, w0 */ - OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | - (src_format << VE0_FORMAT_SHIFT) | - ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ - - if (IS_GEN5(intel)) - OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); - else - OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | - ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ - /* u1, v1, w1 */ - if (mask) { - OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | - (src_format << VE0_FORMAT_SHIFT) | - (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ - - if (IS_GEN5(intel)) - OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); - else - OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | - ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ - } - } + gen4_composite_vertex_elements(intel); } /** @@ -1597,7 +1612,10 @@ i965_prepare_composite(int op, PicturePtr source_picture, static void i965_select_vertex_buffer(struct intel_screen_private *intel) { - int vertex_size = intel->floats_per_vertex; + int id = intel->gen4_render_state->composite_op.vertex_id; + + if (intel->vertex_id & (1 << id)) + return; /* Set up the pointer to our (single) vertex buffer */ OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); @@ -1606,13 +1624,13 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel) * frequently switching between vertex sizes, like rgb10text. */ if (INTEL_INFO(intel)->gen >= 60) { - OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | + OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) | GEN6_VB0_VERTEXDATA | - (4*vertex_size << VB0_BUFFER_PITCH_SHIFT)); + (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); } else { - OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | + OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | - (4*vertex_size << VB0_BUFFER_PITCH_SHIFT)); + (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); } OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); if (INTEL_INFO(intel)->gen >= 50) @@ -1623,7 +1641,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel) OUT_BATCH(0); OUT_BATCH(0); // ignore for VERTEXDATA, but still there - intel->last_floats_per_vertex = vertex_size; + intel->vertex_id |= 1 << id; } static void i965_bind_surfaces(struct intel_screen_private *intel) @@ -1754,14 +1772,14 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, intel->floats_per_vertex != intel->last_floats_per_vertex) { intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; + intel->last_floats_per_vertex = intel->floats_per_vertex; } - if (intel->floats_per_vertex != intel->last_floats_per_vertex || - intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { + if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { i965_vertex_flush(intel); intel_next_vertex(intel); - i965_select_vertex_buffer(intel); intel->vertex_index = 0; } + i965_select_vertex_buffer(intel); if (intel->vertex_offset == 0) { OUT_BATCH(BRW_3DPRIMITIVE | @@ -2306,17 +2324,19 @@ gen6_composite_vertex_element_state(intel_screen_private *intel, * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) * texture coordinate 1 if (has_mask is TRUE): same as above */ + gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op; int nelem = has_mask ? 2 : 1; int selem = is_affine ? 2 : 3; uint32_t w_component; uint32_t src_format; + int id; + + id = has_mask << 1 | is_affine; - if (intel->gen6_render_state.vertex_size == nelem && - intel->gen6_render_state.vertex_type == selem) + if (composite_op->vertex_id == id) return; - intel->gen6_render_state.vertex_size = nelem; - intel->gen6_render_state.vertex_type = selem; + composite_op->vertex_id = id; if (is_affine) { src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; @@ -2337,45 +2357,45 @@ gen6_composite_vertex_element_state(intel_screen_private *intel, OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (2 + nelem)) + 1 - 2)); - OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | - GEN6_VE0_VALID | - (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | - (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | - (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); /* x,y */ - OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | - GEN6_VE0_VALID | - (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | - (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ + OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); /* u0, v0, w0 */ - OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | - GEN6_VE0_VALID | - (src_format << VE0_FORMAT_SHIFT) | - ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ + OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); /* u1, v1, w1 */ if (has_mask) { - OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | - GEN6_VE0_VALID | - (src_format << VE0_FORMAT_SHIFT) | - (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ + OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); } } diff --git a/src/intel.h b/src/intel.h index b86d293f..8a54aefa 100644 --- a/src/intel.h +++ b/src/intel.h @@ -407,6 +407,7 @@ typedef struct intel_screen_private { uint16_t vertex_count; uint16_t vertex_index; uint16_t vertex_used; + uint32_t vertex_id; float vertex_ptr[4*1024]; dri_bo *vertex_bo; diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 8c7ab3d8..2bc00f90 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -53,6 +53,8 @@ static void intel_end_vertex(intel_screen_private *intel) dri_bo_unreference(intel->vertex_bo); intel->vertex_bo = NULL; } + + intel->vertex_id = 0; } void intel_next_vertex(intel_screen_private *intel) @@ -89,6 +91,7 @@ void intel_batch_init(ScrnInfoPtr scrn) intel->batch_emit_start = 0; intel->batch_emitting = 0; + intel->vertex_id = 0; intel_next_batch(scrn); } |