diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2010-12-02 12:07:44 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2010-12-03 14:05:30 +0000 |
commit | 3cc74044ce3546cc7dc2e918cbabbb41a77f4026 (patch) | |
tree | 39aee30cee0323ba96da44c0247cb028262f1a75 | |
parent | a1fa0dbfdafea32139d4457d81b9d722df955eaf (diff) |
i965: Amalgamate surface binding tables
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/i830_render.c | 4 | ||||
-rw-r--r-- | src/i915_render.c | 4 | ||||
-rw-r--r-- | src/i965_render.c | 403 | ||||
-rw-r--r-- | src/intel.h | 21 | ||||
-rw-r--r-- | src/intel_batchbuffer.c | 50 | ||||
-rw-r--r-- | src/intel_batchbuffer.h | 10 | ||||
-rw-r--r-- | src/intel_uxa.c | 59 |
7 files changed, 265 insertions, 286 deletions
diff --git a/src/i830_render.c b/src/i830_render.c index 52646d36..2c40b5df 100644 --- a/src/i830_render.c +++ b/src/i830_render.c @@ -859,9 +859,7 @@ i830_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, intel_batch_end_atomic(scrn); } -void i830_batch_flush_notify(ScrnInfoPtr scrn) +void i830_batch_commit_notify(intel_screen_private *intel) { - intel_screen_private *intel = intel_get_screen_private(scrn); - intel->needs_render_state_emit = TRUE; } diff --git a/src/i915_render.c b/src/i915_render.c index fafdac5c..6d844e53 100644 --- a/src/i915_render.c +++ b/src/i915_render.c @@ -1197,10 +1197,8 @@ i915_vertex_flush(intel_screen_private *intel) } void -i915_batch_flush_notify(ScrnInfoPtr scrn) +i915_batch_commit_notify(intel_screen_private *intel) { - intel_screen_private *intel = intel_get_screen_private(scrn); - intel->needs_render_state_emit = TRUE; intel->render_current_dest = NULL; intel->last_floats_per_vertex = 0; diff --git a/src/i965_render.c b/src/i965_render.c index b1308662..2e1c3f78 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -307,29 +307,6 @@ i965_check_composite_texture(ScreenPtr screen, PicturePtr picture) #define URB_SF_ENTRY_SIZE 2 #define URB_SF_ENTRIES 1 -static const uint32_t sip_kernel_static[][4] = { -/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ - {0x00000030, 0x20000108, 0x00001220, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -/* nop (4) g0<1>UD { align1 + } */ - {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, -}; - /* * this program computes dA/dx and dA/dy for the texture coordinates along * with the base texture coordinate. It was extracted from the Mesa driver @@ -690,8 +667,6 @@ typedef struct brw_surface_state_padded { char pad[32 - sizeof(struct brw_surface_state)]; } brw_surface_state_padded; -#define PS_BINDING_TABLE_OFFSET (3 * sizeof(struct brw_surface_state_padded)) - struct gen4_cc_unit_state { /* Index by [src_blend][dst_blend] */ brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT] @@ -700,7 +675,6 @@ struct gen4_cc_unit_state { typedef struct gen4_composite_op { int op; - drm_intel_bo *surface_state_binding_table_bo; sampler_state_filter_t src_filter; sampler_state_filter_t mask_filter; sampler_state_extend_t src_extend; @@ -722,8 +696,6 @@ struct gen4_render_state { [SAMPLER_STATE_EXTEND_COUNT]; drm_intel_bo *wm_kernel_bo[WM_KERNEL_COUNT]; - drm_intel_bo *sip_kernel_bo; - drm_intel_bo *cc_vp_bo; drm_intel_bo *gen6_blend_bo; drm_intel_bo *gen6_depth_stencil_bo; @@ -1144,55 +1116,40 @@ static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type) * Sets up the common fields for a surface state buffer for the given * picture in the given surface state buffer. */ -static void +static int i965_set_picture_surface_state(intel_screen_private *intel, - dri_bo * ss_bo, int ss_index, PicturePtr picture, PixmapPtr pixmap, Bool is_dst) { - struct brw_surface_state_padded *ss; - struct brw_surface_state local_ss; struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + struct brw_surface_state *ss; + int offset; - ss = (struct brw_surface_state_padded *)ss_bo->virtual + ss_index; + ss = (struct brw_surface_state *) + (intel->surface_data + intel->surface_used); - /* Since ss is a pointer to WC memory, do all of our bit operations - * into a local temporary first. - */ - memset(&local_ss, 0, sizeof(local_ss)); - local_ss.ss0.surface_type = BRW_SURFACE_2D; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = BRW_SURFACE_2D; if (is_dst) { uint32_t dst_format = 0; - Bool ret = TRUE; + Bool ret; ret = i965_get_dest_format(picture, &dst_format); assert(ret == TRUE); - local_ss.ss0.surface_format = dst_format; + ss->ss0.surface_format = dst_format; } else { - local_ss.ss0.surface_format = i965_get_card_format(picture); + ss->ss0.surface_format = i965_get_card_format(picture); } - local_ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; - local_ss.ss0.writedisable_alpha = 0; - local_ss.ss0.writedisable_red = 0; - local_ss.ss0.writedisable_green = 0; - local_ss.ss0.writedisable_blue = 0; - local_ss.ss0.color_blend = 1; - local_ss.ss0.vert_line_stride = 0; - local_ss.ss0.vert_line_stride_ofs = 0; - local_ss.ss0.mipmap_layout_mode = 0; - local_ss.ss0.render_cache_read_mode = 0; - local_ss.ss1.base_addr = priv->bo->offset; - - local_ss.ss2.mip_count = 0; - local_ss.ss2.render_target_rotation = 0; - local_ss.ss2.height = pixmap->drawable.height - 1; - local_ss.ss2.width = pixmap->drawable.width - 1; - local_ss.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; - local_ss.ss3.tile_walk = 0; /* Tiled X */ - local_ss.ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; - - memcpy(ss, &local_ss, sizeof(local_ss)); + ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; + ss->ss0.color_blend = 1; + ss->ss1.base_addr = priv->bo->offset; + + ss->ss2.height = pixmap->drawable.height - 1; + ss->ss2.width = pixmap->drawable.width - 1; + ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; + ss->ss3.tile_walk = 0; /* Tiled X */ + ss->ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; if (priv->bo != NULL) { uint32_t write_domain, read_domains; @@ -1206,12 +1163,18 @@ i965_set_picture_surface_state(intel_screen_private *intel, } intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain); - dri_bo_emit_reloc(ss_bo, read_domains, write_domain, + dri_bo_emit_reloc(intel->surface_bo, + read_domains, write_domain, 0, - ss_index * sizeof(*ss) + + intel->surface_used + offsetof(struct brw_surface_state, ss1), priv->bo); } + + offset = intel->surface_used; + intel->surface_used += sizeof(struct brw_surface_state_padded); + + return offset; } static void i965_emit_composite_state(ScrnInfoPtr scrn) @@ -1229,71 +1192,45 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) sampler_state_extend_t src_extend = composite_op->src_extend; sampler_state_extend_t mask_extend = composite_op->mask_extend; Bool is_affine = composite_op->is_affine; - int urb_vs_start, urb_vs_size; - int urb_gs_start, urb_gs_size; - int urb_clip_start, urb_clip_size; - int urb_sf_start, urb_sf_size; - int urb_cs_start, urb_cs_size; uint32_t src_blend, dst_blend; - dri_bo *surface_state_binding_table_bo = composite_op->surface_state_binding_table_bo; intel->needs_render_state_emit = FALSE; - IntelEmitInvarientState(scrn); - intel->last_3d = LAST_3D_RENDER; - - /* Mark the destination dirty within this batch */ - intel_batch_mark_pixmap_domains(intel, - intel_get_pixmap_private(dest), - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - - urb_vs_start = 0; - urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; - urb_gs_start = urb_vs_start + urb_vs_size; - urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; - urb_clip_start = urb_gs_start + urb_gs_size; - urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; - urb_sf_start = urb_clip_start + urb_clip_size; - urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; - urb_cs_start = urb_sf_start + urb_sf_size; - urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; - - i965_get_blend_cntl(op, mask_picture, dest_picture->format, - &src_blend, &dst_blend); - /* Begin the long sequence of commands needed to set up the 3D * rendering pipe */ - /* URB fence. Erratum (Vol 1a, p32): URB_FENCE must not cross a - * cache-line (64 bytes). Start by aligning this sequence of ops to - * a cache-line... - */ - ALIGN_BATCH(64); + if (intel->needs_3d_invariant) { + if (IS_GEN5(intel)) { + /* Ironlake errata workaround: Before disabling the clipper, + * you have to MI_FLUSH to get the pipeline idle. + */ + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + } - assert(intel->in_batch_atomic); - { /* Match Mesa driver setup */ - OUT_BATCH(MI_FLUSH | - MI_STATE_INSTRUCTION_CACHE_FLUSH | - BRW_MI_GLOBAL_SNAPSHOT_RESET); if (INTEL_INFO(intel)->gen >= 45) OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); else OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); - OUT_BATCH(BRW_CS_URB_STATE | 0); - OUT_BATCH((0 << 4) | /* URB Entry Allocation Size */ - (0 << 0)); /* Number of URB Entries */ + /* Set system instruction pointer */ + OUT_BATCH(BRW_STATE_SIP | 0); + OUT_BATCH(0); + + intel->needs_3d_invariant = FALSE; + } + if (intel->surface_reloc == 0) { /* Zero out the two base address registers so all offsets are * absolute. */ if (IS_GEN5(intel)) { OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ - OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + intel->surface_reloc = intel->batch_used; + intel_batch_emit_dword(intel, + intel->surface_bo->offset | BASE_ADDRESS_MODIFY); OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ /* general state max addr, disabled */ @@ -1305,42 +1242,21 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) } else { OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ - OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + intel->surface_reloc = intel->batch_used; + intel_batch_emit_dword(intel, + intel->surface_bo->offset | BASE_ADDRESS_MODIFY); OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ /* general state max addr, disabled */ OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media object state max addr, disabled */ OUT_BATCH(0 | BASE_ADDRESS_MODIFY); } - /* Set system instruction pointer */ - OUT_BATCH(BRW_STATE_SIP | 0); - OUT_RELOC(render_state->sip_kernel_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, 0); } - if (IS_GEN5(intel)) { - /* Ironlake errata workaround: Before disabling the clipper, - * you have to MI_FLUSH to get the pipeline idle. - */ - OUT_BATCH(MI_FLUSH); - } + i965_get_blend_cntl(op, mask_picture, dest_picture->format, + &src_blend, &dst_blend); { - int pipe_ctrl; - /* Pipe control */ - - if (IS_GEN5(intel)) - pipe_ctrl = BRW_PIPE_CONTROL_NOWRITE; - else - pipe_ctrl = - BRW_PIPE_CONTROL_NOWRITE | - BRW_PIPE_CONTROL_IS_FLUSH; - - OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctrl | 2); - OUT_BATCH(0); /* Destination address */ - OUT_BATCH(0); /* Immediate data low DW */ - OUT_BATCH(0); /* Immediate data high DW */ - /* Binding table pointers */ OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); OUT_BATCH(0); /* vs */ @@ -1348,14 +1264,15 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ /* Only the PS uses the binding table */ - OUT_BATCH(PS_BINDING_TABLE_OFFSET); + OUT_BATCH(intel->surface_table); /* The drawing rectangle clipping is always on. Set it to values that * shouldn't do any clipping. */ OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); OUT_BATCH(0x00000000); /* ymin, xmin */ - OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */ + OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | + DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */ OUT_BATCH(0x00000000); /* yorigin, xorigin */ /* skip the depth buffer */ @@ -1386,42 +1303,54 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) I915_GEM_DOMAIN_INSTRUCTION, 0, offsetof(struct gen4_cc_unit_state, cc_state[src_blend][dst_blend])); + } - /* URB fence. Erratum (Vol 1a, p32): URB_FENCE must not cross a - * cache-line (64 bytes). - * - * 21 preceding dwords since start of section: 84 bytes. - * 12 bytes for URB_FENCE, implies that the end-of-instruction - * does not cross the cache-line boundary... - * - * A total of 33 or 35 dwords since alignment: 132, 140 bytes. - * Again, the URB_FENCE will not cross a cache-line. + { + int urb_vs_start, urb_vs_size; + int urb_gs_start, urb_gs_size; + int urb_clip_start, urb_clip_size; + int urb_sf_start, urb_sf_size; + int urb_cs_start, urb_cs_size; + + urb_vs_start = 0; + urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + urb_gs_start = urb_vs_start + urb_vs_size; + urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + urb_clip_start = urb_gs_start + urb_gs_size; + urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + urb_sf_start = urb_clip_start + urb_clip_size; + urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + urb_cs_start = urb_sf_start + urb_sf_size; + urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + + /* Erratum (Vol 1a, p32): + * URB_FENCE must not cross a cache-line (64 bytes). */ + if ((intel->batch_used & 15) > (16 - 3)) { + int cnt = 16 - (intel->batch_used & 15); + while (cnt--) + OUT_BATCH(MI_NOOP); + } + OUT_BATCH(BRW_URB_FENCE | UF0_CS_REALLOC | UF0_SF_REALLOC | UF0_CLIP_REALLOC | - UF0_GS_REALLOC | UF0_VS_REALLOC | 1); - OUT_BATCH(((urb_clip_start + - urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | - ((urb_gs_start + - urb_gs_size) << UF1_GS_FENCE_SHIFT) | ((urb_vs_start - + - urb_vs_size) - << - UF1_VS_FENCE_SHIFT)); - OUT_BATCH(((urb_cs_start + - urb_cs_size) << UF2_CS_FENCE_SHIFT) | ((urb_sf_start - + - urb_sf_size) - << - UF2_SF_FENCE_SHIFT)); + UF0_GS_REALLOC | + UF0_VS_REALLOC | + 1); + OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); /* Constant buffer state */ OUT_BATCH(BRW_CS_URB_STATE | 0); OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | (URB_CS_ENTRIES << 0)); } + { /* * number of extra parameters per vertex @@ -1550,7 +1479,7 @@ static Bool i965_composite_check_aperture(ScrnInfoPtr scrn) drm_intel_bo *bo_table[] = { intel->batch_bo, intel->vertex_bo, - composite_op->surface_state_binding_table_bo, + intel->surface_bo, render_state->vs_state_bo, render_state->sf_state_bo, render_state->sf_mask_state_bo, @@ -1560,12 +1489,11 @@ static Bool i965_composite_check_aperture(ScrnInfoPtr scrn) [composite_op->mask_filter] [composite_op->mask_extend], render_state->cc_state_bo, - render_state->sip_kernel_bo, }; drm_intel_bo *gen6_bo_table[] = { intel->batch_bo, intel->vertex_bo, - composite_op->surface_state_binding_table_bo, + intel->surface_bo, render_state->wm_kernel_bo[composite_op->wm_kernel], render_state->ps_sampler_state_bo[composite_op->src_filter] [composite_op->src_extend] @@ -1585,6 +1513,26 @@ static Bool i965_composite_check_aperture(ScrnInfoPtr scrn) ARRAY_SIZE(bo_table)) == 0; } +static void i965_surface_flush(struct intel_screen_private *intel) +{ + drm_intel_bo_subdata(intel->surface_bo, + 0, intel->surface_used, + intel->surface_data); + intel->surface_used = 0; + + assert (intel->surface_reloc != 0); + drm_intel_bo_emit_reloc(intel->batch_bo, + intel->surface_reloc * 4, + intel->surface_bo, BASE_ADDRESS_MODIFY, + I915_GEM_DOMAIN_INSTRUCTION, 0); + intel->surface_reloc = 0; + + drm_intel_bo_unreference(intel->surface_bo); + intel->surface_bo = + drm_intel_bo_alloc(intel->bufmgr, "surface data", + sizeof(intel->surface_data), 4096); +} + Bool i965_prepare_composite(int op, PicturePtr source_picture, PicturePtr mask_picture, PicturePtr dest_picture, @@ -1594,8 +1542,6 @@ i965_prepare_composite(int op, PicturePtr source_picture, intel_screen_private *intel = intel_get_screen_private(scrn); struct gen4_render_state *render_state = intel->gen4_render_state; gen4_composite_op *composite_op = &render_state->composite_op; - uint32_t *binding_table; - drm_intel_bo *surface_state_binding_table_bo; composite_op->src_filter = sampler_state_filter_from_picture(source_picture->filter); @@ -1653,39 +1599,6 @@ i965_prepare_composite(int op, PicturePtr source_picture, (mask && intel_pixmap_is_dirty(mask))) intel_batch_emit_flush(scrn); - - /* Set up the surface states. */ - surface_state_binding_table_bo = dri_bo_alloc(intel->bufmgr, "surface_state", - 3 * (sizeof(struct brw_surface_state_padded) + sizeof(uint32_t)), - 4096); - if (dri_bo_map(surface_state_binding_table_bo, 1) != 0) { - dri_bo_unreference(surface_state_binding_table_bo); - return FALSE; - } - /* Set up the state buffer for the destination surface */ - i965_set_picture_surface_state(intel, surface_state_binding_table_bo, 0, - dest_picture, dest, TRUE); - /* Set up the source surface state buffer */ - i965_set_picture_surface_state(intel, surface_state_binding_table_bo, 1, - source_picture, source, FALSE); - if (mask) { - /* Set up the mask surface state buffer */ - i965_set_picture_surface_state(intel, surface_state_binding_table_bo, 2, - mask_picture, mask, FALSE); - } - - /* Set up the binding table of surface indices to surface state. */ - binding_table = (uint32_t *)((char *)surface_state_binding_table_bo->virtual + PS_BINDING_TABLE_OFFSET); - binding_table[0] = 0; - binding_table[1] = sizeof(struct brw_surface_state_padded); - - if (mask) { - binding_table[2] = 2 * sizeof(struct brw_surface_state_padded); - } else { - binding_table[2] = 0; - } - dri_bo_unmap(surface_state_binding_table_bo); - composite_op->op = op; intel->render_source_picture = source_picture; intel->render_mask_picture = mask_picture; @@ -1693,8 +1606,6 @@ i965_prepare_composite(int op, PicturePtr source_picture, intel->render_source = source; intel->render_mask = mask; intel->render_dest = dest; - drm_intel_bo_unreference(composite_op->surface_state_binding_table_bo); - composite_op->surface_state_binding_table_bo = surface_state_binding_table_bo; intel->scale_units[0][0] = 1. / source->drawable.width; intel->scale_units[0][1] = 1. / source->drawable.height; @@ -1760,6 +1671,10 @@ i965_prepare_composite(int op, PicturePtr source_picture, } } + if (sizeof(intel->surface_data) - intel->surface_used < + 4 * sizeof(struct brw_surface_state_padded)) + i965_surface_flush(intel); + intel->needs_render_state_emit = TRUE; return TRUE; @@ -1796,6 +1711,35 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel) intel->last_floats_per_vertex = vertex_size; } +static void i965_bind_surfaces(struct intel_screen_private *intel) +{ + uint32_t *binding_table; + + assert(intel->surface_used + 4 * sizeof(struct brw_surface_state_padded) < + sizeof(intel->surface_data)); + + binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); + intel->surface_table = intel->surface_used; + intel->surface_used += sizeof(struct brw_surface_state_padded); + + binding_table[0] = + i965_set_picture_surface_state(intel, + intel->render_dest_picture, + intel->render_dest, + TRUE); + binding_table[1] = + i965_set_picture_surface_state(intel, + intel->render_source_picture, + intel->render_source, + FALSE); + if (intel->render_mask) { + binding_table[2] = + i965_set_picture_surface_state(intel, + intel->render_mask_picture, + intel->render_mask, + FALSE); + } +} void i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, @@ -1884,12 +1828,12 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, intel_batch_start_atomic(scrn, 200); if (intel->needs_render_state_emit) { + i965_bind_surfaces(intel); + if (INTEL_INFO(intel)->gen >= 60) gen6_emit_composite_state(scrn); else i965_emit_composite_state(scrn); - } else { - OUT_BATCH(MI_FLUSH); } if (intel->vertex_offset == 0) { @@ -1970,11 +1914,10 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, intel_batch_end_atomic(scrn); } -void i965_batch_flush_notify(ScrnInfoPtr scrn) +void i965_batch_commit_notify(intel_screen_private *intel) { - intel_screen_private *intel = intel_get_screen_private(scrn); - intel->needs_render_state_emit = TRUE; + intel->needs_3d_invariant = TRUE; intel->last_floats_per_vertex = 0; intel->vertex_index = 0; } @@ -1990,6 +1933,13 @@ void gen4_render_state_init(ScrnInfoPtr scrn) drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; drm_intel_bo *border_color_bo; + intel->needs_3d_invariant = TRUE; + + intel->surface_bo = + drm_intel_bo_alloc(intel->bufmgr, "surface data", + sizeof(intel->surface_data), 4096); + intel->surface_used = 0; + if (INTEL_INFO(intel)->gen >= 60) return gen6_render_state_init(scrn); @@ -2092,11 +2042,6 @@ void gen4_render_state_init(ScrnInfoPtr scrn) drm_intel_bo_unreference(border_color_bo); render_state->cc_state_bo = gen4_create_cc_unit_state(scrn); - render_state->sip_kernel_bo = intel_bo_alloc_for_data(scrn, - sip_kernel_static, - sizeof - (sip_kernel_static), - "sip kernel"); } /** @@ -2107,9 +2052,8 @@ void gen4_render_state_cleanup(ScrnInfoPtr scrn) intel_screen_private *intel = intel_get_screen_private(scrn); struct gen4_render_state *render_state = intel->gen4_render_state; int i, j, k, l, m; - gen4_composite_op *composite_op = &render_state->composite_op; - drm_intel_bo_unreference(composite_op->surface_state_binding_table_bo); + drm_intel_bo_unreference(intel->surface_bo); drm_intel_bo_unreference(render_state->vs_state_bo); drm_intel_bo_unreference(render_state->sf_state_bo); drm_intel_bo_unreference(render_state->sf_mask_state_bo); @@ -2134,7 +2078,6 @@ void gen4_render_state_cleanup(ScrnInfoPtr scrn) drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]); drm_intel_bo_unreference(render_state->cc_state_bo); - drm_intel_bo_unreference(render_state->sip_kernel_bo); drm_intel_bo_unreference(render_state->cc_vp_bo); drm_intel_bo_unreference(render_state->gen6_blend_bo); @@ -2235,9 +2178,6 @@ gen6_composite_invarient_states(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - OUT_BATCH(MI_FLUSH | - MI_STATE_INSTRUCTION_CACHE_FLUSH | - BRW_MI_GLOBAL_SNAPSHOT_RESET); OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); @@ -2254,13 +2194,15 @@ gen6_composite_invarient_states(ScrnInfoPtr scrn) } static void -gen6_composite_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo) +gen6_composite_state_base_address(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ - OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + intel->surface_reloc = intel->batch_used; + intel_batch_emit_dword(intel, + intel->surface_bo->offset | BASE_ADDRESS_MODIFY); OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ @@ -2450,7 +2392,7 @@ gen6_composite_binding_table_pointers(ScrnInfoPtr scrn) OUT_BATCH(0); /* vs */ OUT_BATCH(0); /* gs */ /* Only the PS uses the binding table */ - OUT_BATCH(PS_BINDING_TABLE_OFFSET); + OUT_BATCH(intel->surface_table); } static void @@ -2575,26 +2517,21 @@ gen6_emit_composite_state(ScrnInfoPtr scrn) sampler_state_extend_t mask_extend = composite_op->mask_extend; Bool is_affine = composite_op->is_affine; uint32_t src_blend, dst_blend; - drm_intel_bo *surface_state_binding_table_bo = composite_op->surface_state_binding_table_bo; drm_intel_bo *ps_sampler_state_bo = render_state->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend]; intel->needs_render_state_emit = FALSE; IntelEmitInvarientState(scrn); intel->last_3d = LAST_3D_RENDER; - /* Mark the destination dirty within this batch */ - intel_batch_mark_pixmap_domains(intel, - intel_get_pixmap_private(dest), - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - i965_get_blend_cntl(op, - mask_picture, - dest_picture->format, - &src_blend, - &dst_blend); + i965_get_blend_cntl(op, + mask_picture, + dest_picture->format, + &src_blend, + &dst_blend); assert(intel->in_batch_atomic); gen6_composite_invarient_states(scrn); - gen6_composite_state_base_address(scrn, surface_state_binding_table_bo); + if (intel->surface_reloc == 0) + gen6_composite_state_base_address(scrn); gen6_composite_viewport_state_pointers(scrn, render_state->cc_vp_bo); gen6_composite_urb(scrn); gen6_composite_cc_state_pointers(scrn, @@ -2668,3 +2605,9 @@ void i965_vertex_flush(struct intel_screen_private *intel) intel->vertex_offset = 0; } } + +void i965_batch_flush(struct intel_screen_private *intel) +{ + if (intel->surface_used) + i965_surface_flush(intel); +} diff --git a/src/intel.h b/src/intel.h index 51faaf17..57ce2cfd 100644 --- a/src/intel.h +++ b/src/intel.h @@ -352,8 +352,11 @@ typedef struct intel_screen_private { CloseScreenProcPtr CloseScreen; + void (*context_switch) (struct intel_screen_private *intel, + int new_mode); void (*vertex_flush) (struct intel_screen_private *intel); - void (*batch_flush_notify) (ScrnInfoPtr scrn); + void (*batch_flush) (struct intel_screen_private *intel); + void (*batch_commit_notify) (struct intel_screen_private *intel); uxa_driver_t *uxa_driver; Bool need_sync; @@ -400,6 +403,7 @@ typedef struct intel_screen_private { PixmapPtr render_current_dest; Bool render_source_is_solid; Bool render_mask_is_solid; + Bool needs_3d_invariant; Bool needs_render_state_emit; Bool needs_render_vertex_emit; Bool needs_render_ca_pass; @@ -433,6 +437,12 @@ typedef struct intel_screen_private { float vertex_ptr[4*1024]; dri_bo *vertex_bo; + uint8_t surface_data[16*1024]; + uint16_t surface_used; + uint16_t surface_table; + uint32_t surface_reloc; + dri_bo *surface_bo; + /* 965 render acceleration state */ struct gen4_render_state *gen4_render_state; @@ -565,8 +575,8 @@ Bool i915_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, void i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h); void i915_vertex_flush(intel_screen_private *intel); -void i915_batch_flush_notify(ScrnInfoPtr scrn); -void i830_batch_flush_notify(ScrnInfoPtr scrn); +void i915_batch_commit_notify(intel_screen_private *intel); +void i830_batch_commit_notify(intel_screen_private *intel); /* i965_render.c */ unsigned int gen4_render_state_size(ScrnInfoPtr scrn); void gen4_render_state_init(ScrnInfoPtr scrn); @@ -581,8 +591,9 @@ Bool i965_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, void i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h); -void i965_vertex_flush(struct intel_screen_private *intel); -void i965_batch_flush_notify(ScrnInfoPtr scrn); +void i965_vertex_flush(intel_screen_private *intel); +void i965_batch_flush(intel_screen_private *intel); +void i965_batch_commit_notify(intel_screen_private *intel); Bool intel_transform_is_affine(PictTransformPtr t); Bool diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 34c6cc90..378a78c7 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -40,7 +40,7 @@ #include "i915_drm.h" #include "i965_reg.h" -#define DUMP_BATCHBUFFERS NULL /* "/tmp/i915-batchbuffers.dump" */ +#define DUMP_BATCHBUFFERS NULL // "/tmp/i915-batchbuffers.dump" static void intel_end_vertex(intel_screen_private *intel) { @@ -149,23 +149,23 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn) assert (!intel->in_batch_atomic); /* Big hammer, look to the pipelined flushes in future. */ - if (intel->current_batch == BLT_BATCH) { - BEGIN_BATCH_BLT(4); - OUT_BATCH(MI_FLUSH_DW | 2); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else if ((INTEL_INFO(intel)->gen >= 60)) { - BEGIN_BATCH(4); - OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); /* Mesa does so */ - OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH | - BRW_PIPE_CONTROL_WC_FLUSH | - BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH | - BRW_PIPE_CONTROL_NOWRITE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + if ((INTEL_INFO(intel)->gen >= 60)) { + if (intel->current_batch == BLT_BATCH) { + BEGIN_BATCH_BLT(4); + OUT_BATCH(MI_FLUSH_DW | 2); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_WC_FLUSH | + BRW_PIPE_CONTROL_NOWRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } } else { flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE; if (INTEL_INFO(intel)->gen >= 40) @@ -189,6 +189,9 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush) intel->vertex_flush(intel); intel_end_vertex(intel); + if (intel->batch_flush) + intel->batch_flush(intel); + if (flush) intel_batch_emit_flush(scrn); @@ -210,11 +213,12 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush) } ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr); - if (ret == 0) + if (ret == 0) { ret = drm_intel_bo_mrb_exec(intel->batch_bo, intel->batch_used*4, NULL, 0, 0xffffffff, - intel->current_batch); + IS_GEN6(intel) ? intel->current_batch: I915_EXEC_DEFAULT); + } if (ret != 0) { if (ret == -EIO) { @@ -275,8 +279,10 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush) if (intel->debug_flush & DEBUG_FLUSH_WAIT) intel_batch_wait_last(scrn); - if (intel->batch_flush_notify) - intel->batch_flush_notify(scrn); + if (intel->batch_commit_notify) + intel->batch_commit_notify(intel); + + intel->current_batch = 0; } /** Waits on the last emitted batchbuffer to be completed. */ diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h index 21890c44..5b9ff5ed 100644 --- a/src/intel_batchbuffer.h +++ b/src/intel_batchbuffer.h @@ -184,16 +184,18 @@ do { \ FatalError("%s: BEGIN_BATCH called without closing " \ "ADVANCE_BATCH\n", __FUNCTION__); \ assert(!intel->in_batch_atomic); \ - if (intel->current_batch != batch_idx) \ - intel_batch_submit(scrn, FALSE); \ + if (intel->current_batch != batch_idx) { \ + if (intel->current_batch && intel->context_switch) \ + intel->context_switch(intel, batch_idx); \ + } \ intel_batch_require_space(scrn, intel, (n) * 4); \ intel->current_batch = batch_idx; \ intel->batch_emitting = (n); \ intel->batch_emit_start = intel->batch_used; \ } while (0) -#define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH) -#define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH) +#define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH) +#define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH) #define ADVANCE_BATCH() do { \ if (intel->batch_emitting == 0) \ diff --git a/src/intel_uxa.c b/src/intel_uxa.c index 9310e895..439d1091 100644 --- a/src/intel_uxa.c +++ b/src/intel_uxa.c @@ -85,10 +85,16 @@ int uxa_pixmap_index; #endif static void -ironlake_blt_workaround(ScrnInfoPtr scrn) +gen6_context_switch(intel_screen_private *intel, + int new_mode) { - intel_screen_private *intel = intel_get_screen_private(scrn); + intel_batch_submit(intel->scrn, FALSE); +} +static void +gen5_context_switch(intel_screen_private *intel, + int new_mode) +{ /* Ironlake has a limitation that a 3D or Media command can't * be the first command after a BLT, unless it's * non-pipelined. Instead of trying to track it and emit a @@ -96,11 +102,24 @@ ironlake_blt_workaround(ScrnInfoPtr scrn) * non-pipelined 3D instruction after each blit. */ - if (IS_GEN5(intel)) { - BEGIN_BATCH(2); + if (new_mode == I915_EXEC_BLT) { + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + MI_INHIBIT_RENDER_CACHE_FLUSH); + } else { OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16); OUT_BATCH(0); - ADVANCE_BATCH(); + } +} + +static void +gen4_context_switch(intel_screen_private *intel, + int new_mode) +{ + if (new_mode == I915_EXEC_BLT) { + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + MI_INHIBIT_RENDER_CACHE_FLUSH); } } @@ -292,10 +311,7 @@ static void intel_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) pitch = intel_pixmap_pitch(pixmap); { - if (IS_GEN6(intel)) - BEGIN_BATCH_BLT(6); - else - BEGIN_BATCH(6); + BEGIN_BATCH_BLT(6); cmd = XY_COLOR_BLT_CMD; @@ -319,8 +335,6 @@ static void intel_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) OUT_BATCH(intel->BR[16]); ADVANCE_BATCH(); } - - ironlake_blt_workaround(scrn); } static void intel_uxa_done_solid(PixmapPtr pixmap) @@ -442,10 +456,7 @@ intel_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, src_pitch = intel_pixmap_pitch(intel->render_source); { - if (IS_GEN6(intel)) - BEGIN_BATCH_BLT(8); - else - BEGIN_BATCH(8); + BEGIN_BATCH_BLT(8); cmd = XY_SRC_COPY_BLT_CMD; @@ -485,7 +496,6 @@ intel_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, ADVANCE_BATCH(); } - ironlake_blt_workaround(scrn); } static void intel_uxa_done_copy(PixmapPtr dest) @@ -1183,6 +1193,8 @@ Bool intel_uxa_init(ScreenPtr screen) intel->floats_per_vertex = 0; intel->last_floats_per_vertex = 0; intel->vertex_bo = NULL; + intel->surface_used = 0; + intel->surface_reloc = 0; /* Solid fill */ intel->uxa_driver->check_solid = intel_uxa_check_solid; @@ -1205,7 +1217,7 @@ Bool intel_uxa_init(ScreenPtr screen) intel->uxa_driver->composite = i830_composite; intel->uxa_driver->done_composite = i830_done_composite; - intel->batch_flush_notify = i830_batch_flush_notify; + intel->batch_commit_notify = i830_batch_commit_notify; } else if (IS_GEN3(intel)) { intel->uxa_driver->check_composite = i915_check_composite; intel->uxa_driver->check_composite_target = i915_check_composite_target; @@ -1215,7 +1227,7 @@ Bool intel_uxa_init(ScreenPtr screen) intel->uxa_driver->done_composite = i830_done_composite; intel->vertex_flush = i915_vertex_flush; - intel->batch_flush_notify = i915_batch_flush_notify; + intel->batch_commit_notify = i915_batch_commit_notify; } else { intel->uxa_driver->check_composite = i965_check_composite; intel->uxa_driver->check_composite_texture = i965_check_composite_texture; @@ -1224,7 +1236,16 @@ Bool intel_uxa_init(ScreenPtr screen) intel->uxa_driver->done_composite = i830_done_composite; intel->vertex_flush = i965_vertex_flush; - intel->batch_flush_notify = i965_batch_flush_notify; + intel->batch_flush = i965_batch_flush; + intel->batch_commit_notify = i965_batch_commit_notify; + + if (IS_GEN4(intel)) { + intel->context_switch = gen4_context_switch; + } else if (IS_GEN5(intel)) { + intel->context_switch = gen5_context_switch; + } else { + intel->context_switch = gen6_context_switch; + } } /* PutImage */ |