diff options
author | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2009-06-25 20:16:45 +0000 |
---|---|---|
committer | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2009-06-25 20:16:45 +0000 |
commit | 1968cea3515172a985c8cad914ead74b3621d8ff (patch) | |
tree | 14f241250ecdc59597ec213a3b8f898fd5794bb8 /driver/xf86-video-intel/src/i965_render.c | |
parent | f3606649643e49df0add499c50a7d11e99f0d4f5 (diff) |
update to xf86-video-intel 2.7.1. Tested by many.
Diffstat (limited to 'driver/xf86-video-intel/src/i965_render.c')
-rw-r--r-- | driver/xf86-video-intel/src/i965_render.c | 1128 |
1 files changed, 693 insertions, 435 deletions
diff --git a/driver/xf86-video-intel/src/i965_render.c b/driver/xf86-video-intel/src/i965_render.c index 8360be4a1..c123a3695 100644 --- a/driver/xf86-video-intel/src/i965_render.c +++ b/driver/xf86-video-intel/src/i965_render.c @@ -42,25 +42,14 @@ #include "brw_defines.h" #include "brw_structs.h" -#ifdef I830DEBUG -#define DEBUG_I830FALLBACK 1 -#endif - -#ifdef DEBUG_I830FALLBACK -#define I830FALLBACK(s, arg...) \ -do { \ - DPRINTF(PFX, "EXA fallback: " s "\n", ##arg); \ - return FALSE; \ -} while(0) -#else -#define I830FALLBACK(s, arg...) \ -do { \ - return FALSE; \ -} while(0) -#endif - -#define MAX_VERTEX_PER_COMPOSITE 24 -#define MAX_VERTEX_BUFFERS 256 +/* 24 = 4 vertices/composite * 3 texcoords/vertex * 2 floats/texcoord + * + * This is an upper-bound based on the case of a non-affine + * transformation and with a mask, but useful for sizing all cases for + * simplicity. + */ +#define VERTEX_FLOATS_PER_COMPOSITE 24 +#define VERTEX_BUFFER_SIZE (256 * VERTEX_FLOATS_PER_COMPOSITE) struct blendinfo { Bool dst_alpha; @@ -161,6 +150,8 @@ static void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format, static Bool i965_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format) { + ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum]; + switch (pDstPicture->format) { case PICT_a8r8g8b8: case PICT_x8r8g8b8: @@ -192,6 +183,7 @@ static Bool i965_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format) static Bool i965_check_composite_texture(PicturePtr pPict, int unit) { + ScrnInfoPtr pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum]; int w = pPict->pDrawable->width; int h = pPict->pDrawable->height; int i; @@ -209,7 +201,7 @@ static Bool i965_check_composite_texture(PicturePtr pPict, int unit) I830FALLBACK("Unsupported picture format 0x%x\n", (int)pPict->format); - if (pPict->repeat && pPict->repeatType != RepeatNormal) + if (pPict->repeatType > RepeatReflect) I830FALLBACK("extended repeat (%d) not supported\n", pPict->repeatType); @@ -226,6 +218,7 @@ Bool i965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture) { + ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum]; uint32_t tmp1; /* Check for unsupported compositing operations. */ @@ -322,8 +315,6 @@ static const uint32_t sf_kernel_mask_static[][4] = { /* ps kernels */ #define PS_KERNEL_NUM_GRF 32 #define PS_MAX_THREADS 48 -#define PS_SCRATCH_SPACE 1024 -#define PS_SCRATCH_SPACE_LOG 0 /* log2 (PS_SCRATCH_SPACE) - 10 (1024 is 0, 2048 is 1) */ static const uint32_t ps_kernel_nomask_affine_static [][4] = { #include "exa_wm_xy.g4b" @@ -399,12 +390,6 @@ static const uint32_t ps_kernel_masknoca_projective_static [][4] = { #include "exa_wm_write.g4b" }; -/** - * Storage for the static kernel data with template name, rounded to 64 bytes. - */ -#define KERNEL_DECL(template) \ - uint32_t template [((sizeof (template ## _static) + 63) & ~63) / 16][4]; - #define WM_STATE_DECL(kernel) \ struct brw_wm_unit_state wm_state_ ## kernel[SAMPLER_STATE_FILTER_COUNT] \ [SAMPLER_STATE_EXTEND_COUNT] \ @@ -427,9 +412,49 @@ typedef enum { typedef enum { SAMPLER_STATE_EXTEND_NONE, SAMPLER_STATE_EXTEND_REPEAT, + SAMPLER_STATE_EXTEND_PAD, + SAMPLER_STATE_EXTEND_REFLECT, SAMPLER_STATE_EXTEND_COUNT } sampler_state_extend_t; +typedef enum { + WM_KERNEL_NOMASK_AFFINE, + WM_KERNEL_NOMASK_PROJECTIVE, + WM_KERNEL_MASKCA_AFFINE, + WM_KERNEL_MASKCA_PROJECTIVE, + WM_KERNEL_MASKCA_SRCALPHA_AFFINE, + WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + WM_KERNEL_MASKNOCA_AFFINE, + WM_KERNEL_MASKNOCA_PROJECTIVE, + WM_KERNEL_COUNT +} wm_kernel_t; + +#define KERNEL(kernel_enum, kernel, masked) \ + [kernel_enum] = {&kernel, sizeof(kernel), masked} +struct wm_kernel_info { + void *data; + unsigned int size; + Bool has_mask; +} wm_kernels[] = { + KERNEL(WM_KERNEL_NOMASK_AFFINE, + ps_kernel_nomask_affine_static, FALSE), + KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, + ps_kernel_nomask_projective_static, FALSE), + KERNEL(WM_KERNEL_MASKCA_AFFINE, + ps_kernel_maskca_affine_static, TRUE), + KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, + ps_kernel_maskca_projective_static, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, + ps_kernel_maskca_srcalpha_affine_static, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + ps_kernel_maskca_srcalpha_projective_static, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_AFFINE, + ps_kernel_masknoca_affine_static, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, + ps_kernel_masknoca_projective_static, TRUE), +}; +#undef KERNEL + typedef struct _brw_cc_unit_state_padded { struct brw_cc_unit_state state; char pad[64 - sizeof (struct brw_cc_unit_state)]; @@ -440,81 +465,53 @@ typedef struct brw_surface_state_padded { char pad[32 - sizeof (struct brw_surface_state)]; } brw_surface_state_padded; -/** - * Gen4 rendering state buffer structure. - * - * Ideally this structure would contain static data for all of the - * combinations of state that we use for Render acceleration, and another - * buffer would be the use-and-throw-away surface and vertex data. See the - * intel-batchbuffer branch for an implementation of that. For now, it - * has the static program data, and then a changing buffer containing all - * the rest. - */ -typedef struct _gen4_state { - uint8_t wm_scratch[128 * PS_MAX_THREADS]; - - KERNEL_DECL (sip_kernel); - KERNEL_DECL (sf_kernel); - KERNEL_DECL (sf_kernel_mask); - KERNEL_DECL (ps_kernel_nomask_affine); - KERNEL_DECL (ps_kernel_nomask_projective); - KERNEL_DECL (ps_kernel_maskca_affine); - KERNEL_DECL (ps_kernel_maskca_projective); - KERNEL_DECL (ps_kernel_maskca_srcalpha_affine); - KERNEL_DECL (ps_kernel_maskca_srcalpha_projective); - KERNEL_DECL (ps_kernel_masknoca_affine); - KERNEL_DECL (ps_kernel_masknoca_projective); - - struct brw_vs_unit_state vs_state; - PAD64 (brw_vs_unit_state, 0); - - struct brw_sf_unit_state sf_state; - PAD64 (brw_sf_unit_state, 0); - struct brw_sf_unit_state sf_state_mask; - PAD64 (brw_sf_unit_state, 1); - - WM_STATE_DECL (nomask_affine); - WM_STATE_DECL (nomask_projective); - WM_STATE_DECL (maskca_affine); - WM_STATE_DECL (maskca_projective); - WM_STATE_DECL (maskca_srcalpha_affine); - WM_STATE_DECL (maskca_srcalpha_projective); - WM_STATE_DECL (masknoca_affine); - WM_STATE_DECL (masknoca_projective); - - uint32_t binding_table[128]; - - struct brw_surface_state_padded surface_state[32]; - - /* Index by [src_filter][src_extend][mask_filter][mask_extend]. Two of - * the structs happen to add to 32 bytes. - */ - struct brw_sampler_state sampler_state[SAMPLER_STATE_FILTER_COUNT] - [SAMPLER_STATE_EXTEND_COUNT] - [SAMPLER_STATE_FILTER_COUNT] - [SAMPLER_STATE_EXTEND_COUNT][2]; - - struct brw_sampler_default_color sampler_default_color; - PAD64 (brw_sampler_default_color, 0); - +struct gen4_cc_unit_state { /* Index by [src_blend][dst_blend] */ brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT] [BRW_BLENDFACTOR_COUNT]; - struct brw_cc_viewport cc_viewport; - PAD64 (brw_cc_viewport, 0); +}; - float vb[MAX_VERTEX_PER_COMPOSITE * MAX_VERTEX_BUFFERS]; -} gen4_state_t; +typedef float gen4_vertex_buffer[VERTEX_BUFFER_SIZE]; + +typedef struct gen4_composite_op { + int op; + PicturePtr source_picture; + PicturePtr mask_picture; + PicturePtr dest_picture; + PixmapPtr source; + PixmapPtr mask; + PixmapPtr dest; + drm_intel_bo *binding_table_bo; + sampler_state_filter_t src_filter; + sampler_state_filter_t mask_filter; + sampler_state_extend_t src_extend; + sampler_state_extend_t mask_extend; + Bool is_affine; + wm_kernel_t wm_kernel; +} gen4_composite_op; /** Private data for gen4 render accel implementation. */ struct gen4_render_state { - gen4_state_t *card_state; - uint32_t card_state_offset; + drm_intel_bo *vs_state_bo; + drm_intel_bo *sf_state_bo; + drm_intel_bo *sf_mask_state_bo; + drm_intel_bo *cc_state_bo; + drm_intel_bo *wm_state_bo[WM_KERNEL_COUNT] + [SAMPLER_STATE_FILTER_COUNT] + [SAMPLER_STATE_EXTEND_COUNT] + [SAMPLER_STATE_FILTER_COUNT] + [SAMPLER_STATE_EXTEND_COUNT]; + drm_intel_bo *wm_kernel_bo[WM_KERNEL_COUNT]; + + drm_intel_bo *sip_kernel_bo; + dri_bo* vertex_buffer_bo; + + gen4_composite_op composite_op; - int binding_table_index; - int surface_state_index; int vb_offset; int vertex_size; + + Bool needs_state_emit; }; /** @@ -524,11 +521,25 @@ struct gen4_render_state { * calculate dA/dx and dA/dy. Hand these interpolation coefficients * back to SF which then hands pixels off to WM. */ -static void -sf_state_init (struct brw_sf_unit_state *sf_state, int kernel_offset) +static drm_intel_bo * +gen4_create_sf_state(ScrnInfoPtr scrn, drm_intel_bo *kernel_bo) { + I830Ptr pI830 = I830PTR(scrn); + struct brw_sf_unit_state *sf_state; + drm_intel_bo *sf_state_bo; + + sf_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 SF state", + sizeof(*sf_state), 4096); + drm_intel_bo_map(sf_state_bo, TRUE); + sf_state = sf_state_bo->virtual; + memset(sf_state, 0, sizeof(*sf_state)); sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + sf_state->thread0.kernel_start_pointer = + intel_emit_reloc(sf_state_bo, + offsetof(struct brw_sf_unit_state, thread0), + kernel_bo, sf_state->thread0.grf_reg_count << 1, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; sf_state->sf1.single_program_flow = 1; sf_state->sf1.binding_table_entry_count = 0; sf_state->sf1.thread_priority = 0; @@ -556,21 +567,49 @@ sf_state_init (struct brw_sf_unit_state *sf_state, int kernel_offset) sf_state->sf6.dest_org_vbias = 0x8; sf_state->sf6.dest_org_hbias = 0x8; - assert((kernel_offset & 63) == 0); - sf_state->thread0.kernel_start_pointer = kernel_offset >> 6; + drm_intel_bo_unmap(sf_state_bo); + + return sf_state_bo; +} + +static drm_intel_bo * +sampler_border_color_create(ScrnInfoPtr scrn) +{ + struct brw_sampler_legacy_border_color sampler_border_color; + + /* Set up the sampler border color (always transparent black) */ + memset(&sampler_border_color, 0, sizeof(sampler_border_color)); + sampler_border_color.color[0] = 0; /* R */ + sampler_border_color.color[1] = 0; /* G */ + sampler_border_color.color[2] = 0; /* B */ + sampler_border_color.color[3] = 0; /* A */ + + return intel_bo_alloc_for_data(scrn, + &sampler_border_color, + sizeof(sampler_border_color), + "gen4 render sampler border color"); } static void -sampler_state_init (struct brw_sampler_state *sampler_state, +sampler_state_init (drm_intel_bo *sampler_state_bo, + struct brw_sampler_state *sampler_state, sampler_state_filter_t filter, sampler_state_extend_t extend, - int default_color_offset) + drm_intel_bo *border_color_bo) { + uint32_t sampler_state_offset; + + sampler_state_offset = (char *)sampler_state - + (char *)sampler_state_bo->virtual; + /* PS kernel use this sampler */ memset(sampler_state, 0, sizeof(*sampler_state)); sampler_state->ss0.lod_preclamp = 1; /* GL mode */ - sampler_state->ss0.default_color_mode = 0; /* GL mode */ + + /* We use the legacy mode to get the semantics specified by + * the Render extension. */ + sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; switch(filter) { default: @@ -596,20 +635,71 @@ sampler_state_init (struct brw_sampler_state *sampler_state, sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; break; + case SAMPLER_STATE_EXTEND_PAD: + sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + break; + case SAMPLER_STATE_EXTEND_REFLECT: + sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + break; } - assert((default_color_offset & 31) == 0); - sampler_state->ss2.default_color_pointer = default_color_offset >> 5; + sampler_state->ss2.border_color_pointer = + intel_emit_reloc(sampler_state_bo, sampler_state_offset + + offsetof(struct brw_sampler_state, ss2), + border_color_bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0) >> 5; sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ } +static drm_intel_bo * +gen4_create_sampler_state(ScrnInfoPtr scrn, + sampler_state_filter_t src_filter, + sampler_state_extend_t src_extend, + sampler_state_filter_t mask_filter, + sampler_state_extend_t mask_extend, + drm_intel_bo *border_color_bo) +{ + I830Ptr pI830 = I830PTR(scrn); + drm_intel_bo *sampler_state_bo; + struct brw_sampler_state *sampler_state; + + sampler_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 sampler state", + sizeof(struct brw_sampler_state) * 2, + 4096); + drm_intel_bo_map(sampler_state_bo, TRUE); + sampler_state = sampler_state_bo->virtual; + + sampler_state_init(sampler_state_bo, + &sampler_state[0], + src_filter, src_extend, + border_color_bo); + sampler_state_init(sampler_state_bo, + &sampler_state[1], + mask_filter, mask_extend, + border_color_bo); + + drm_intel_bo_unmap(sampler_state_bo); + + return sampler_state_bo; +} + static void -cc_state_init (struct brw_cc_unit_state *cc_state, +cc_state_init (drm_intel_bo *cc_state_bo, + uint32_t cc_state_offset, int src_blend, int dst_blend, - int cc_viewport_offset) + drm_intel_bo *cc_vp_bo) { + struct brw_cc_unit_state *cc_state; + + cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual + + cc_state_offset); + memset(cc_state, 0, sizeof(*cc_state)); cc_state->cc0.stencil_enable = 0; /* disable stencil */ cc_state->cc2.depth_test = 0; /* disable depth test */ @@ -618,8 +708,11 @@ cc_state_init (struct brw_cc_unit_state *cc_state, cc_state->cc3.blend_enable = 1; /* enable color blend */ cc_state->cc3.alpha_test = 0; /* disable alpha test */ - assert((cc_viewport_offset & 31) == 0); - cc_state->cc4.cc_viewport_state_offset = cc_viewport_offset >> 5; + cc_state->cc4.cc_viewport_state_offset = + intel_emit_reloc(cc_state_bo, cc_state_offset + + offsetof(struct brw_cc_unit_state, cc4), + cc_vp_bo, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; cc_state->cc5.dither_enable = 0; /* disable dither */ cc_state->cc5.logicop_func = 0xc; /* COPY */ @@ -639,21 +732,34 @@ cc_state_init (struct brw_cc_unit_state *cc_state, cc_state->cc6.dest_blend_factor = dst_blend; } -static void -wm_state_init (struct brw_wm_unit_state *wm_state, - Bool has_mask, - int scratch_offset, - int kernel_offset, - int sampler_state_offset) +static drm_intel_bo * +gen4_create_wm_state(ScrnInfoPtr scrn, + Bool has_mask, drm_intel_bo *kernel_bo, + drm_intel_bo *sampler_bo) { + I830Ptr pI830 = I830PTR(scrn); + struct brw_wm_unit_state *wm_state; + drm_intel_bo *wm_state_bo; + + wm_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 WM state", + sizeof(*wm_state), 4096); + drm_intel_bo_map(wm_state_bo, TRUE); + wm_state = wm_state_bo->virtual; + memset(wm_state, 0, sizeof (*wm_state)); wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + wm_state->thread0.kernel_start_pointer = + intel_emit_reloc(wm_state_bo, + offsetof(struct brw_wm_unit_state, thread0), + kernel_bo, wm_state->thread0.grf_reg_count << 1, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; + wm_state->thread1.single_program_flow = 0; - assert((scratch_offset & 1023) == 0); - wm_state->thread2.scratch_space_base_pointer = scratch_offset >> 10; + /* scratch space is not used in our kernel */ + wm_state->thread2.scratch_space_base_pointer = 0; + wm_state->thread2.per_thread_scratch_space = 0; - wm_state->thread2.per_thread_scratch_space = PS_SCRATCH_SPACE_LOG; wm_state->thread3.const_urb_entry_read_length = 0; wm_state->thread3.const_urb_entry_read_offset = 0; @@ -662,9 +768,13 @@ wm_state_init (struct brw_wm_unit_state *wm_state, wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ wm_state->wm4.stats_enable = 1; /* statistic */ - assert((sampler_state_offset & 31) == 0); - wm_state->wm4.sampler_state_pointer = sampler_state_offset >> 5; wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ + wm_state->wm4.sampler_state_pointer = + intel_emit_reloc(wm_state_bo, offsetof(struct brw_wm_unit_state, wm4), + sampler_bo, + wm_state->wm4.stats_enable + + (wm_state->wm4.sampler_count << 2), + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; wm_state->wm5.max_threads = PS_MAX_THREADS - 1; wm_state->wm5.transposed_urb_read = 0; wm_state->wm5.thread_dispatch_enable = 1; @@ -675,9 +785,6 @@ wm_state_init (struct brw_wm_unit_state *wm_state, wm_state->wm5.enable_8_pix = 0; wm_state->wm5.early_depth_test = 1; - assert((kernel_offset & 63) == 0); - wm_state->thread0.kernel_start_pointer = kernel_offset >> 6; - /* Each pair of attributes (src/mask coords) is two URB entries */ if (has_mask) { wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ @@ -686,116 +793,76 @@ wm_state_init (struct brw_wm_unit_state *wm_state, wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ wm_state->thread3.urb_entry_read_length = 2; } + + drm_intel_bo_unmap(wm_state_bo); + + return wm_state_bo; } -/** - * Called at EnterVT to fill in our state buffer with any static information. - */ -static void -gen4_state_init (struct gen4_render_state *render_state) +static drm_intel_bo * +gen4_create_cc_viewport(ScrnInfoPtr scrn) +{ + I830Ptr pI830 = I830PTR(scrn); + drm_intel_bo *bo; + struct brw_cc_viewport cc_viewport; + + cc_viewport.min_depth = -1.e35; + cc_viewport.max_depth = 1.e35; + + bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 render unit state", + sizeof(cc_viewport), 4096); + drm_intel_bo_subdata(bo, 0, sizeof(cc_viewport), &cc_viewport); + + return bo; +} + +static drm_intel_bo * +gen4_create_vs_unit_state(ScrnInfoPtr scrn) { - int i, j, k, l; - gen4_state_t *card_state = render_state->card_state; - uint32_t state_base_offset = render_state->card_state_offset; - -#define KERNEL_COPY(kernel) \ - memcpy(card_state->kernel, kernel ## _static, sizeof(kernel ## _static)) - - KERNEL_COPY (sip_kernel); - KERNEL_COPY (sf_kernel); - KERNEL_COPY (sf_kernel_mask); - KERNEL_COPY (ps_kernel_nomask_affine); - KERNEL_COPY (ps_kernel_nomask_projective); - KERNEL_COPY (ps_kernel_maskca_affine); - KERNEL_COPY (ps_kernel_maskca_projective); - KERNEL_COPY (ps_kernel_maskca_srcalpha_affine); - KERNEL_COPY (ps_kernel_maskca_srcalpha_projective); - KERNEL_COPY (ps_kernel_masknoca_affine); - KERNEL_COPY (ps_kernel_masknoca_projective); -#undef KERNEL_COPY + struct brw_vs_unit_state vs_state; + memset(&vs_state, 0, sizeof(vs_state)); /* Set up the vertex shader to be disabled (passthrough) */ - memset(&card_state->vs_state, 0, sizeof(card_state->vs_state)); - card_state->vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; - card_state->vs_state.thread4.urb_entry_allocation_size = - URB_VS_ENTRY_SIZE - 1; - card_state->vs_state.vs6.vs_enable = 0; - card_state->vs_state.vs6.vert_cache_disable = 1; - - /* Set up the sampler default color (always transparent black) */ - memset(&card_state->sampler_default_color, 0, - sizeof(card_state->sampler_default_color)); - card_state->sampler_default_color.color[0] = 0.0; /* R */ - card_state->sampler_default_color.color[1] = 0.0; /* G */ - card_state->sampler_default_color.color[2] = 0.0; /* B */ - card_state->sampler_default_color.color[3] = 0.0; /* A */ - - card_state->cc_viewport.min_depth = -1.e35; - card_state->cc_viewport.max_depth = 1.e35; - - sf_state_init (&card_state->sf_state, - state_base_offset + - offsetof (gen4_state_t, sf_kernel)); - sf_state_init (&card_state->sf_state_mask, - state_base_offset + - offsetof (gen4_state_t, sf_kernel_mask)); + vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; + vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + vs_state.vs6.vs_enable = 0; + vs_state.vs6.vert_cache_disable = 1; - for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) { - for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { - for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { - for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { - sampler_state_init (&card_state->sampler_state[i][j][k][l][0], - i, j, - state_base_offset + - offsetof (gen4_state_t, - sampler_default_color)); - sampler_state_init (&card_state->sampler_state[i][j][k][l][1], - k, l, - state_base_offset + - offsetof (gen4_state_t, - sampler_default_color)); - } - } - } - } + return intel_bo_alloc_for_data(scrn, &vs_state, sizeof(vs_state), + "gen4 render VS state"); +} + +/** + * Set up all combinations of cc state: each blendfactor for source and + * dest. + */ +static drm_intel_bo * +gen4_create_cc_unit_state(ScrnInfoPtr scrn) +{ + I830Ptr pI830 = I830PTR(scrn); + struct gen4_cc_unit_state *cc_state; + drm_intel_bo *cc_state_bo, *cc_vp_bo; + int i, j; + cc_vp_bo = gen4_create_cc_viewport(scrn); + cc_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 CC state", + sizeof(*cc_state), 4096); + drm_intel_bo_map(cc_state_bo, TRUE); + cc_state = cc_state_bo->virtual; for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { - cc_state_init (&card_state->cc_state[i][j].state, i, j, - state_base_offset + - offsetof (gen4_state_t, cc_viewport)); + cc_state_init(cc_state_bo, + offsetof(struct gen4_cc_unit_state, + cc_state[i][j].state), + i, j, cc_vp_bo); } } + drm_intel_bo_unmap(cc_state_bo); -#define SETUP_WM_STATE(kernel, has_mask) \ - wm_state_init(&card_state->wm_state_ ## kernel [i][j][k][l], \ - has_mask, \ - state_base_offset + offsetof(gen4_state_t, \ - wm_scratch), \ - state_base_offset + offsetof(gen4_state_t, \ - ps_kernel_ ## kernel), \ - state_base_offset + offsetof(gen4_state_t, \ - sampler_state[i][j][k][l])); - + drm_intel_bo_unreference(cc_vp_bo); - for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) { - for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { - for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { - for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { - SETUP_WM_STATE (nomask_affine, FALSE); - SETUP_WM_STATE (nomask_projective, FALSE); - SETUP_WM_STATE (maskca_affine, TRUE); - SETUP_WM_STATE (maskca_projective, TRUE); - SETUP_WM_STATE (maskca_srcalpha_affine, TRUE); - SETUP_WM_STATE (maskca_srcalpha_projective, TRUE); - SETUP_WM_STATE (masknoca_affine, TRUE); - SETUP_WM_STATE (masknoca_projective, TRUE); - } - } - } - } -#undef SETUP_WM_STATE + return cc_state_bo; } static uint32_t @@ -828,33 +895,36 @@ sampler_state_filter_from_picture (int filter) } static sampler_state_extend_t -sampler_state_extend_from_picture (int repeat) +sampler_state_extend_from_picture (int repeat_type) { - switch (repeat) { + switch (repeat_type) { case RepeatNone: return SAMPLER_STATE_EXTEND_NONE; case RepeatNormal: return SAMPLER_STATE_EXTEND_REPEAT; + case RepeatPad: + return SAMPLER_STATE_EXTEND_PAD; + case RepeatReflect: + return SAMPLER_STATE_EXTEND_REFLECT; default: return -1; } } /** - * Sets up the common fields for a surface state buffer for the given picture - * in the surface state buffer at index, and returns the offset within the - * state buffer for this entry. + * Sets up the common fields for a surface state buffer for the given + * picture in the given surface state buffer. */ -static unsigned int -i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss, +static void +i965_set_picture_surface_state(dri_bo *ss_bo, int ss_index, PicturePtr pPicture, PixmapPtr pPixmap, Bool is_dst) { - I830Ptr pI830 = I830PTR(pScrn); - struct gen4_render_state *render_state= pI830->gen4_render_state; - gen4_state_t *card_state = render_state->card_state; + struct brw_surface_state_padded *ss; struct brw_surface_state local_ss; - uint32_t offset; + dri_bo *pixmap_bo = i830_get_pixmap_bo(pPixmap); + + ss = (struct brw_surface_state_padded *)ss_bo->virtual + ss_index; /* Since ss is a pointer to WC memory, do all of our bit operations * into a local temporary first. @@ -882,7 +952,10 @@ i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss, local_ss.ss0.vert_line_stride_ofs = 0; local_ss.ss0.mipmap_layout_mode = 0; local_ss.ss0.render_cache_read_mode = 0; - local_ss.ss1.base_addr = intel_get_pixmap_offset(pPixmap); + if (pixmap_bo != NULL) + local_ss.ss1.base_addr = pixmap_bo->offset; + else + local_ss.ss1.base_addr = intel_get_pixmap_offset(pPixmap); local_ss.ss2.mip_count = 0; local_ss.ss2.render_target_rotation = 0; @@ -894,62 +967,52 @@ i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss, memcpy(ss, &local_ss, sizeof(local_ss)); - offset = (char *)ss - (char *)card_state; - assert((offset & 31) == 0); + if (pixmap_bo != NULL) { + uint32_t write_domain, read_domains; - return offset; + if (is_dst) { + write_domain = I915_GEM_DOMAIN_RENDER; + read_domains = I915_GEM_DOMAIN_RENDER; + } else { + write_domain = 0; + read_domains = I915_GEM_DOMAIN_SAMPLER; + } + dri_bo_emit_reloc(ss_bo, read_domains, write_domain, + 0, + ss_index * sizeof(*ss) + + offsetof(struct brw_surface_state, ss1), + pixmap_bo); + } } -Bool -i965_prepare_composite(int op, PicturePtr pSrcPicture, - PicturePtr pMaskPicture, PicturePtr pDstPicture, - PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) +static void +i965_emit_composite_state(ScrnInfoPtr pScrn) { - ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum]; I830Ptr pI830 = I830PTR(pScrn); struct gen4_render_state *render_state= pI830->gen4_render_state; - gen4_state_t *card_state = render_state->card_state; - struct brw_surface_state_padded *ss; - uint32_t sf_state_offset; - sampler_state_filter_t src_filter, mask_filter; - sampler_state_extend_t src_extend, mask_extend; - Bool is_affine_src, is_affine_mask, is_affine; + gen4_composite_op *composite_op = &render_state->composite_op; + int op = composite_op->op; + PicturePtr pMaskPicture = composite_op->mask_picture; + PicturePtr pDstPicture = composite_op->dest_picture; + PixmapPtr pMask = composite_op->mask; + PixmapPtr pDst = composite_op->dest; + sampler_state_filter_t src_filter = composite_op->src_filter; + sampler_state_filter_t mask_filter = composite_op->mask_filter; + sampler_state_extend_t src_extend = composite_op->src_extend; + sampler_state_extend_t mask_extend = composite_op->mask_extend; + Bool is_affine = composite_op->is_affine; int urb_vs_start, urb_vs_size; int urb_gs_start, urb_gs_size; int urb_clip_start, urb_clip_size; int urb_sf_start, urb_sf_size; int urb_cs_start, urb_cs_size; - char *state_base; - int state_base_offset; uint32_t src_blend, dst_blend; - uint32_t *binding_table; + dri_bo *binding_table_bo = composite_op->binding_table_bo; - IntelEmitInvarientState(pScrn); - *pI830->last_3d = LAST_3D_RENDER; + render_state->needs_state_emit = FALSE; - pI830->scale_units[0][0] = pSrc->drawable.width; - pI830->scale_units[0][1] = pSrc->drawable.height; - - pI830->transform[0] = pSrcPicture->transform; - is_affine_src = i830_transform_is_affine (pI830->transform[0]); - - if (!pMask) { - pI830->transform[1] = NULL; - pI830->scale_units[1][0] = -1; - pI830->scale_units[1][1] = -1; - is_affine_mask = TRUE; - } else { - pI830->transform[1] = pMaskPicture->transform; - pI830->scale_units[1][0] = pMask->drawable.width; - pI830->scale_units[1][1] = pMask->drawable.height; - is_affine_mask = i830_transform_is_affine (pI830->transform[1]); - } - - is_affine = is_affine_src && is_affine_mask; - - state_base_offset = pI830->gen4_render_state_mem->offset; - assert((state_base_offset & 63) == 0); - state_base = (char *)(pI830->FbBase + state_base_offset); + IntelEmitInvarientState(pScrn); + pI830->last_3d = LAST_3D_RENDER; urb_vs_start = 0; urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; @@ -965,67 +1028,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format, &src_blend, &dst_blend); - if ((render_state->binding_table_index + 3 >= - ARRAY_SIZE(card_state->binding_table)) || - (render_state->surface_state_index + 3 >= - ARRAY_SIZE(card_state->surface_state))) - { - i830WaitSync(pScrn); - render_state->binding_table_index = 0; - render_state->surface_state_index = 0; - render_state->vb_offset = 0; - } - - binding_table = card_state->binding_table + - render_state->binding_table_index; - ss = card_state->surface_state + render_state->surface_state_index; - /* We only use 2 or 3 entries, but the table has to be 32-byte - * aligned. - */ - render_state->binding_table_index += 8; - render_state->surface_state_index += (pMask != NULL) ? 3 : 2; - - /* Set up and bind the state buffer for the destination surface */ - binding_table[0] = state_base_offset + - i965_set_picture_surface_state(pScrn, - &ss[0].state, - pDstPicture, pDst, TRUE); - - /* Set up and bind the source surface state buffer */ - binding_table[1] = state_base_offset + - i965_set_picture_surface_state(pScrn, - &ss[1].state, - pSrcPicture, pSrc, FALSE); - if (pMask) { - /* Set up and bind the mask surface state buffer */ - binding_table[2] = state_base_offset + - i965_set_picture_surface_state(pScrn, - &ss[2].state, - pMaskPicture, pMask, - FALSE); - } else { - binding_table[2] = 0; - } - - src_filter = sampler_state_filter_from_picture (pSrcPicture->filter); - if (src_filter < 0) - I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter); - src_extend = sampler_state_extend_from_picture (pSrcPicture->repeat); - if (src_extend < 0) - I830FALLBACK ("Bad src repeat 0x%x\n", pSrcPicture->repeat); - - if (pMaskPicture) { - mask_filter = sampler_state_filter_from_picture (pMaskPicture->filter); - if (mask_filter < 0) - I830FALLBACK ("Bad mask filter 0x%x\n", pMaskPicture->filter); - mask_extend = sampler_state_extend_from_picture (pMaskPicture->repeat); - if (mask_extend < 0) - I830FALLBACK ("Bad mask repeat 0x%x\n", pMaskPicture->repeat); - } else { - mask_filter = SAMPLER_STATE_FILTER_NEAREST; - mask_extend = SAMPLER_STATE_EXTEND_NONE; - } - /* Begin the long sequence of commands needed to set up the 3D * rendering pipe */ @@ -1041,7 +1043,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, BEGIN_BATCH(12); /* Match Mesa driver setup */ - if (IS_GM45(pI830) || IS_G4X(pI830)) + if (IS_G4X(pI830)) OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); else OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); @@ -1064,7 +1066,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, /* Set system instruction pointer */ OUT_BATCH(BRW_STATE_SIP | 0); - OUT_BATCH(state_base_offset + offsetof(gen4_state_t, sip_kernel)); + OUT_RELOC(render_state->sip_kernel_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH(MI_NOOP); ADVANCE_BATCH(); } @@ -1086,8 +1089,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ /* Only the PS uses the binding table */ - assert((((unsigned char *)binding_table - pI830->FbBase) & 31) == 0); - OUT_BATCH((unsigned char *)binding_table - pI830->FbBase); + OUT_RELOC(binding_table_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* The drawing rectangle clipping is always on. Set it to values that * shouldn't do any clipping. @@ -1105,68 +1107,26 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, /* Set the pointers to the 3d pipeline state */ OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); - assert((offsetof(gen4_state_t, vs_state) & 31) == 0); - OUT_BATCH(state_base_offset + offsetof(gen4_state_t, vs_state)); + OUT_RELOC(render_state->vs_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ - if (pMask) { - sf_state_offset = state_base_offset + - offsetof(gen4_state_t, sf_state_mask); + OUT_RELOC(render_state->sf_mask_state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); } else { - sf_state_offset = state_base_offset + - offsetof(gen4_state_t, sf_state); + OUT_RELOC(render_state->sf_state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); } - assert((sf_state_offset & 31) == 0); - OUT_BATCH(sf_state_offset); - - /* Shorthand for long array lookup */ -#define OUT_WM_KERNEL(kernel) do { \ - uint32_t offset = state_base_offset + \ - offsetof(gen4_state_t, \ - wm_state_ ## kernel \ - [src_filter] \ - [src_extend] \ - [mask_filter] \ - [mask_extend]); \ - assert((offset & 31) == 0); \ - OUT_BATCH(offset); \ -} while (0) - if (pMask) { - if (pMaskPicture->componentAlpha && - PICT_FORMAT_RGB(pMaskPicture->format)) - { - if (i965_blend_op[op].src_alpha) { - if (is_affine) - OUT_WM_KERNEL(maskca_srcalpha_affine); - else - OUT_WM_KERNEL(maskca_srcalpha_projective); - } else { - if (is_affine) - OUT_WM_KERNEL(maskca_affine); - else - OUT_WM_KERNEL(maskca_projective); - } - } else { - if (is_affine) - OUT_WM_KERNEL(masknoca_affine); - else - OUT_WM_KERNEL(masknoca_projective); - } - } else { - if (is_affine) - OUT_WM_KERNEL(nomask_affine); - else - OUT_WM_KERNEL(nomask_projective); - } -#undef OUT_WM_KERNEL + OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel] + [src_filter][src_extend] + [mask_filter][mask_extend], + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - /* 64 byte aligned */ - assert((offsetof(gen4_state_t, - cc_state[src_blend][dst_blend]) & 63) == 0); - OUT_BATCH(state_base_offset + - offsetof(gen4_state_t, cc_state[src_blend][dst_blend])); + OUT_RELOC(render_state->cc_state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + offsetof(struct gen4_cc_unit_state, + cc_state[src_blend][dst_blend])); /* URB fence */ OUT_BATCH(BRW_URB_FENCE | @@ -1261,27 +1221,237 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, ErrorF("try to sync to show any errors...\n"); I830Sync(pScrn); #endif +} + +/** + * Returns whether the current set of composite state plus vertex buffer is + * expected to fit in the aperture. + */ +static Bool +i965_composite_check_aperture(ScrnInfoPtr pScrn) +{ + I830Ptr pI830 = I830PTR(pScrn); + struct gen4_render_state *render_state= pI830->gen4_render_state; + gen4_composite_op *composite_op = &render_state->composite_op; + drm_intel_bo *bo_table[] = { + pI830->batch_bo, + composite_op->binding_table_bo, + render_state->vertex_buffer_bo, + render_state->vs_state_bo, + render_state->sf_state_bo, + render_state->sf_mask_state_bo, + render_state->wm_state_bo[composite_op->wm_kernel] + [composite_op->src_filter] + [composite_op->src_extend] + [composite_op->mask_filter] + [composite_op->mask_extend], + render_state->cc_state_bo, + render_state->sip_kernel_bo, + }; + + return drm_intel_bufmgr_check_aperture_space(bo_table, + ARRAY_SIZE(bo_table)) == 0; +} + +Bool +i965_prepare_composite(int op, PicturePtr pSrcPicture, + PicturePtr pMaskPicture, PicturePtr pDstPicture, + PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) +{ + ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum]; + I830Ptr pI830 = I830PTR(pScrn); + struct gen4_render_state *render_state= pI830->gen4_render_state; + gen4_composite_op *composite_op = &render_state->composite_op; + uint32_t *binding_table; + drm_intel_bo *binding_table_bo, *surface_state_bo; + + if (composite_op->src_filter < 0) + I830FALLBACK("Bad src filter 0x%x\n", pSrcPicture->filter); + composite_op->src_extend = + sampler_state_extend_from_picture(pSrcPicture->repeatType); + if (composite_op->src_extend < 0) + I830FALLBACK("Bad src repeat 0x%x\n", pSrcPicture->repeatType); + + if (pMaskPicture) { + composite_op->mask_filter = + sampler_state_filter_from_picture(pMaskPicture->filter); + if (composite_op->mask_filter < 0) + I830FALLBACK("Bad mask filter 0x%x\n", pMaskPicture->filter); + composite_op->mask_extend = + sampler_state_extend_from_picture(pMaskPicture->repeatType); + if (composite_op->mask_extend < 0) + I830FALLBACK("Bad mask repeat 0x%x\n", pMaskPicture->repeatType); + } else { + composite_op->mask_filter = SAMPLER_STATE_FILTER_NEAREST; + composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE; + } + + /* Set up the surface states. */ + surface_state_bo = dri_bo_alloc(pI830->bufmgr, "surface_state", + 3 * sizeof (brw_surface_state_padded), + 4096); + if (dri_bo_map(surface_state_bo, 1) != 0) + return FALSE; + /* Set up the state buffer for the destination surface */ + i965_set_picture_surface_state(surface_state_bo, 0, + pDstPicture, pDst, TRUE); + /* Set up the source surface state buffer */ + i965_set_picture_surface_state(surface_state_bo, 1, + pSrcPicture, pSrc, FALSE); + if (pMask) { + /* Set up the mask surface state buffer */ + i965_set_picture_surface_state(surface_state_bo, 2, + pMaskPicture, pMask, + FALSE); + } + dri_bo_unmap(surface_state_bo); + + /* Set up the binding table of surface indices to surface state. */ + binding_table_bo = dri_bo_alloc(pI830->bufmgr, "binding_table", + 3 * sizeof(uint32_t), 4096); + if (dri_bo_map (binding_table_bo, 1) != 0) { + dri_bo_unreference(surface_state_bo); + return FALSE; + } + + binding_table = binding_table_bo->virtual; + binding_table[0] = intel_emit_reloc(binding_table_bo, + 0 * sizeof(uint32_t), + surface_state_bo, + 0 * sizeof(brw_surface_state_padded), + I915_GEM_DOMAIN_INSTRUCTION, 0); + + binding_table[1] = intel_emit_reloc(binding_table_bo, + 1 * sizeof(uint32_t), + surface_state_bo, + 1 * sizeof(brw_surface_state_padded), + I915_GEM_DOMAIN_INSTRUCTION, 0); + + if (pMask) { + binding_table[2] = intel_emit_reloc(binding_table_bo, + 2 * sizeof(uint32_t), + surface_state_bo, + 2 * sizeof(brw_surface_state_padded), + I915_GEM_DOMAIN_INSTRUCTION, 0); + } else { + binding_table[2] = 0; + } + dri_bo_unmap(binding_table_bo); + /* All refs to surface_state are now contained in binding_table_bo. */ + drm_intel_bo_unreference(surface_state_bo); + + composite_op->op = op; + composite_op->source_picture = pSrcPicture; + composite_op->mask_picture = pMaskPicture; + composite_op->dest_picture = pDstPicture; + composite_op->source = pSrc; + composite_op->mask = pMask; + composite_op->dest = pDst; + drm_intel_bo_unreference(composite_op->binding_table_bo); + composite_op->binding_table_bo = binding_table_bo; + composite_op->src_filter = + sampler_state_filter_from_picture(pSrcPicture->filter); + + pI830->scale_units[0][0] = pSrc->drawable.width; + pI830->scale_units[0][1] = pSrc->drawable.height; + + pI830->transform[0] = pSrcPicture->transform; + composite_op->is_affine = + i830_transform_is_affine(pI830->transform[0]); + + if (!pMask) { + pI830->transform[1] = NULL; + pI830->scale_units[1][0] = -1; + pI830->scale_units[1][1] = -1; + } else { + pI830->transform[1] = pMaskPicture->transform; + pI830->scale_units[1][0] = pMask->drawable.width; + pI830->scale_units[1][1] = pMask->drawable.height; + composite_op->is_affine |= + i830_transform_is_affine(pI830->transform[1]); + } + + + if (pMask) { + if (pMaskPicture->componentAlpha && + PICT_FORMAT_RGB(pMaskPicture->format)) + { + if (i965_blend_op[op].src_alpha) { + if (composite_op->is_affine) + composite_op->wm_kernel = WM_KERNEL_MASKCA_SRCALPHA_AFFINE; + else + composite_op->wm_kernel = WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE; + } else { + if (composite_op->is_affine) + composite_op->wm_kernel = WM_KERNEL_MASKCA_AFFINE; + else + composite_op->wm_kernel = WM_KERNEL_MASKCA_PROJECTIVE; + } + } else { + if (composite_op->is_affine) + composite_op->wm_kernel = WM_KERNEL_MASKNOCA_AFFINE; + else + composite_op->wm_kernel = WM_KERNEL_MASKNOCA_PROJECTIVE; + } + } else { + if (composite_op->is_affine) + composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE; + else + composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; + } + + if (!i965_composite_check_aperture(pScrn)) { + intel_batch_flush(pScrn, FALSE); + if (!i965_composite_check_aperture(pScrn)) + I830FALLBACK("Couldn't fit render operation in aperture\n"); + } + + render_state->needs_state_emit = TRUE; + return TRUE; } +static drm_intel_bo * +i965_get_vb_space(ScrnInfoPtr pScrn) +{ + I830Ptr pI830 = I830PTR(pScrn); + struct gen4_render_state *render_state = pI830->gen4_render_state; + + /* If the vertex buffer is too full, then we free the old and a new one + * gets made. + */ + if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > + VERTEX_BUFFER_SIZE) { + drm_intel_bo_unreference(render_state->vertex_buffer_bo); + render_state->vertex_buffer_bo = NULL; + } + + /* Alloc a new vertex buffer if necessary. */ + if (render_state->vertex_buffer_bo == NULL) { + render_state->vertex_buffer_bo = drm_intel_bo_alloc(pI830->bufmgr, "vb", + sizeof(gen4_vertex_buffer), + 4096); + render_state->vb_offset = 0; + } + + drm_intel_bo_reference(render_state->vertex_buffer_bo); + return render_state->vertex_buffer_bo; +} + void i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h) { ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; I830Ptr pI830 = I830PTR(pScrn); - gen4_state_t *card_state = pI830->gen4_render_state->card_state; struct gen4_render_state *render_state = pI830->gen4_render_state; Bool has_mask; - Bool is_affine_src, is_affine_mask, is_affine; float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; - float *vb = card_state->vb; int i; + drm_intel_bo *vb_bo; + float vb[18]; + Bool is_affine = render_state->composite_op.is_affine; - is_affine_src = i830_transform_is_affine (pI830->transform[0]); - is_affine_mask = i830_transform_is_affine (pI830->transform[1]); - is_affine = is_affine_src && is_affine_mask; - if (is_affine) { if (!i830_get_transformed_coordinates(srcX, srcY, @@ -1352,12 +1522,10 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, } } - if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE >= ARRAY_SIZE(card_state->vb)) { - i830WaitSync(pScrn); - render_state->vb_offset = 0; - } - - i = render_state->vb_offset; + vb_bo = i965_get_vb_space(pScrn); + if (vb_bo == NULL) + return; + i = 0; /* rect (x2,y2) */ vb[i++] = (float)(dstX + w); vb[i++] = (float)(dstY + h); @@ -1399,7 +1567,15 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, if (!is_affine) vb[i++] = mask_w[0]; } - assert (i * 4 <= sizeof(card_state->vb)); + assert (i <= VERTEX_BUFFER_SIZE); + drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb); + + if (!i965_composite_check_aperture(pScrn)) + intel_batch_flush(pScrn, FALSE); + + intel_batch_start_atomic(pScrn, 200); + if (render_state->needs_state_emit) + i965_emit_composite_state(pScrn); BEGIN_BATCH(12); OUT_BATCH(MI_FLUSH); @@ -1408,8 +1584,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT)); - OUT_BATCH(render_state->card_state_offset + offsetof(gen4_state_t, vb) + - render_state->vb_offset * 4); + OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, render_state->vb_offset * 4); OUT_BATCH(3); OUT_BATCH(0); // ignore for VERTEXDATA, but still there @@ -1425,32 +1600,32 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, OUT_BATCH(0); /* index buffer offset, ignored */ ADVANCE_BATCH(); - render_state->vb_offset = i; + render_state->vb_offset += i; + drm_intel_bo_unreference(vb_bo); + + intel_batch_end_atomic(pScrn); #ifdef I830DEBUG ErrorF("sync after 3dprimitive\n"); I830Sync(pScrn); #endif - /* we must be sure that the pipeline is flushed before next exa draw, - because that will be new state, binding state and instructions*/ - { - BEGIN_BATCH(4); - OUT_BATCH(BRW_PIPE_CONTROL | - BRW_PIPE_CONTROL_NOWRITE | - BRW_PIPE_CONTROL_WC_FLUSH | - BRW_PIPE_CONTROL_IS_FLUSH | - (1 << 10) | /* XXX texture cache flush for BLC/CTG */ - 2); - OUT_BATCH(0); /* Destination address */ - OUT_BATCH(0); /* Immediate data low DW */ - OUT_BATCH(0); /* Immediate data high DW */ - ADVANCE_BATCH(); +} + +void +i965_batch_flush_notify(ScrnInfoPtr pScrn) +{ + I830Ptr pI830 = I830PTR(pScrn); + struct gen4_render_state *render_state = pI830->gen4_render_state; + + /* Once a batch is emitted, we never want to map again any buffer + * object being referenced by that batch, (which would be very + * expensive). */ + if (render_state->vertex_buffer_bo) { + dri_bo_unreference (render_state->vertex_buffer_bo); + render_state->vertex_buffer_bo = NULL; } - /* Mark sync so we can wait for it before setting up the VB on the next - * rectangle. - */ - i830MarkSync(pScrn); + render_state->needs_state_emit = TRUE; } /** @@ -1461,17 +1636,75 @@ gen4_render_state_init(ScrnInfoPtr pScrn) { I830Ptr pI830 = I830PTR(pScrn); struct gen4_render_state *render_state; + int i, j, k, l, m; + drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; + drm_intel_bo *border_color_bo; if (pI830->gen4_render_state == NULL) pI830->gen4_render_state = calloc(sizeof(*render_state), 1); render_state = pI830->gen4_render_state; + render_state->vb_offset = 0; + + render_state->vs_state_bo = gen4_create_vs_unit_state(pScrn); + + /* Set up the two SF states (one for blending with a mask, one without) */ + sf_kernel_bo = intel_bo_alloc_for_data(pScrn, + sf_kernel_static, + sizeof(sf_kernel_static), + "sf kernel"); + sf_kernel_mask_bo = intel_bo_alloc_for_data(pScrn, + sf_kernel_mask_static, + sizeof(sf_kernel_mask_static), + "sf mask kernel"); + render_state->sf_state_bo = gen4_create_sf_state(pScrn, sf_kernel_bo); + render_state->sf_mask_state_bo = gen4_create_sf_state(pScrn, + sf_kernel_mask_bo); + drm_intel_bo_unreference(sf_kernel_bo); + drm_intel_bo_unreference(sf_kernel_mask_bo); + + for (m = 0; m < WM_KERNEL_COUNT; m++) { + render_state->wm_kernel_bo[m] = + intel_bo_alloc_for_data(pScrn, + wm_kernels[m].data, wm_kernels[m].size, + "WM kernel"); + } - render_state->card_state_offset = pI830->gen4_render_state_mem->offset; - render_state->card_state = (gen4_state_t *) - (pI830->FbBase + render_state->card_state_offset); + /* Set up the WM states: each filter/extend type for source and mask, per + * kernel. + */ + border_color_bo = sampler_border_color_create(pScrn); + for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) { + for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { + for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { + for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { + drm_intel_bo *sampler_state_bo; + + sampler_state_bo = + gen4_create_sampler_state(pScrn, + i, j, + k, l, + border_color_bo); + + for (m = 0; m < WM_KERNEL_COUNT; m++) { + render_state->wm_state_bo[m][i][j][k][l] = + gen4_create_wm_state(pScrn, + wm_kernels[m].has_mask, + render_state->wm_kernel_bo[m], + sampler_state_bo); + } + drm_intel_bo_unreference(sampler_state_bo); + } + } + } + } + drm_intel_bo_unreference(border_color_bo); - gen4_state_init(render_state); + render_state->cc_state_bo = gen4_create_cc_unit_state(pScrn); + render_state->sip_kernel_bo = intel_bo_alloc_for_data(pScrn, + sip_kernel_static, + sizeof(sip_kernel_static), + "sip kernel"); } /** @@ -1481,12 +1714,37 @@ void gen4_render_state_cleanup(ScrnInfoPtr pScrn) { I830Ptr pI830 = I830PTR(pScrn); + struct gen4_render_state *render_state= pI830->gen4_render_state; + int i, j, k, l, m; - pI830->gen4_render_state->card_state = NULL; -} + if (render_state->vertex_buffer_bo) { + dri_bo_unreference (render_state->vertex_buffer_bo); + render_state->vertex_buffer_bo = NULL; + } -unsigned int -gen4_render_state_size(ScrnInfoPtr pScrn) -{ - return sizeof(gen4_state_t); + drm_intel_bo_unreference(render_state->vs_state_bo); + render_state->vs_state_bo = NULL; + drm_intel_bo_unreference(render_state->sf_state_bo); + render_state->sf_state_bo = NULL; + drm_intel_bo_unreference(render_state->sf_mask_state_bo); + render_state->sf_mask_state_bo = NULL; + + for (i = 0; i < WM_KERNEL_COUNT; i++) { + drm_intel_bo_unreference(render_state->wm_kernel_bo[i]); + render_state->wm_kernel_bo[i] = NULL; + } + + for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) + for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) + for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) + for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) + for (m = 0; m < WM_KERNEL_COUNT; m++) { + drm_intel_bo_unreference(render_state->wm_state_bo[m][i][j][k][l]); + render_state->wm_state_bo[m][i][j][k][l] = NULL; + } + + drm_intel_bo_unreference(render_state->cc_state_bo); + render_state->cc_state_bo = NULL; + drm_intel_bo_unreference(render_state->sip_kernel_bo); + render_state->sip_kernel_bo = NULL; } |