diff options
author | Keith Packard <keithp@keithp.com> | 2008-04-09 00:09:34 -0500 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2008-04-10 16:58:11 -0500 |
commit | 825d9e50c59450f07178a54fed2616e551dc0455 (patch) | |
tree | 18b82292eb43f534a538dcd0fdefc15c41be17a2 | |
parent | a03eaaa67b33c57530e92c53d28917e2563b4427 (diff) |
Add planer video decode kernel
Support for planar video reduces bus bandwidth by 25% and also reduces CPU
usage during planar->packed conversion.
-rw-r--r-- | src/Makefile.am | 2 | ||||
-rw-r--r-- | src/exa_wm_src_sample_planar.g4a | 66 | ||||
-rw-r--r-- | src/exa_wm_src_sample_planar.g4b | 4 | ||||
-rw-r--r-- | src/i830_video.c | 71 | ||||
-rw-r--r-- | src/i965_video.c | 176 |
5 files changed, 196 insertions, 123 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index f50d1d4c..48ea5679 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -135,6 +135,7 @@ INTEL_G4A = \ exa_wm_src_projective.g4a \ exa_wm_src_sample_argb.g4a \ exa_wm_src_sample_a.g4a \ + exa_wm_src_sample_planar.g4a \ exa_wm_src_data.g4a \ exa_wm_mask_affine.g4a \ exa_wm_mask_projective.g4a \ @@ -161,6 +162,7 @@ INTEL_G4B = \ exa_wm_src_projective.g4b \ exa_wm_src_sample_argb.g4b \ exa_wm_src_sample_a.g4b \ + exa_wm_src_sample_planar.g4b \ exa_wm_src_data.g4b \ exa_wm_mask_affine.g4b \ exa_wm_mask_projective.g4b \ diff --git a/src/exa_wm_src_sample_planar.g4a b/src/exa_wm_src_sample_planar.g4a new file mode 100644 index 00000000..92b867ea --- /dev/null +++ b/src/exa_wm_src_sample_planar.g4a @@ -0,0 +1,66 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the src surface in planar format */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load r */ +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ + +/* sample Y */ +send (16) src_msg_ind /* msg reg index */ + src_sample_g<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + +/* sample U (Cr) */ +send (16) src_msg_ind /* msg reg index */ + src_sample_r<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + +/* sample V (Cb) */ +send (16) src_msg_ind /* msg reg index */ + src_sample_b<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + diff --git a/src/exa_wm_src_sample_planar.g4b b/src/exa_wm_src_sample_planar.g4b new file mode 100644 index 00000000..d2b9cfe5 --- /dev/null +++ b/src/exa_wm_src_sample_planar.g4b @@ -0,0 +1,4 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x01800031, 0x22001d29, 0x008d0000, 0x02520001 }, + { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520102 }, + { 0x01800031, 0x22401d29, 0x008d0000, 0x02520203 }, diff --git a/src/i830_video.c b/src/i830_video.c index 14dab8fd..4e1f725d 100644 --- a/src/i830_video.c +++ b/src/i830_video.c @@ -1350,65 +1350,6 @@ I830CopyPackedData(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, } } -/* Copies planar data in *buf to UYVY-packed data in the screen atYBufXOffset. - */ -static void -I830CopyPlanarToPackedData(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, - unsigned char *buf, int srcPitch, - int srcPitch2, int dstPitch, int srcH, - int top, int left, int h, int w, int id) -{ - I830Ptr pI830 = I830PTR(pScrn); - uint8_t *dst1, *srcy, *srcu, *srcv; - int y; - - if (pPriv->currentBuf == 0) - dst1 = pI830->FbBase + pPriv->YBuf0offset; - else - dst1 = pI830->FbBase + pPriv->YBuf1offset; - - srcy = buf + (top * srcPitch) + left; - if (id == FOURCC_YV12) { - srcu = buf + (srcH * srcPitch) + ((top / 2) * srcPitch2) + (left / 2); - srcv = buf + (srcH * srcPitch) + ((srcH / 2) * srcPitch2) + - ((top / 2) * srcPitch2) + (left / 2); - } else { - srcv = buf + (srcH * srcPitch) + ((top / 2) * srcPitch2) + (left / 2); - srcu = buf + (srcH * srcPitch) + ((srcH / 2) * srcPitch2) + - ((top / 2) * srcPitch2) + (left / 2); - } - - for (y = 0; y < h; y++) { - uint32_t *dst = (uint32_t *)dst1; - uint8_t *sy = srcy; - uint8_t *su = srcu; - uint8_t *sv = srcv; - int i; - - i = w / 2; - while(i > 4) { - dst[0] = sy[0] | (sy[1] << 16) | (sv[0] << 8) | (su[0] << 24); - dst[1] = sy[2] | (sy[3] << 16) | (sv[1] << 8) | (su[1] << 24); - dst[2] = sy[4] | (sy[5] << 16) | (sv[2] << 8) | (su[2] << 24); - dst[3] = sy[6] | (sy[7] << 16) | (sv[3] << 8) | (su[3] << 24); - dst += 4; su += 4; sv += 4; sy += 8; - i -= 4; - } - while(i--) { - dst[0] = sy[0] | (sy[1] << 16) | (sv[0] << 8) | (su[0] << 24); - dst++; su++; sv++; - sy += 2; - } - - dst1 += dstPitch; - srcy += srcPitch; - if (y & 1) { - srcu += srcPitch2; - srcv += srcPitch2; - } - } -} - static void I830CopyPlanarData(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, unsigned char *buf, int srcPitch, @@ -2339,8 +2280,6 @@ I830PutImage(ScrnInfoPtr pScrn, srcPitch2 = ((width >> 1) + 0x3ff) & ~0x3ff; } #endif - if (pPriv->textured && IS_I965G(pI830)) - destId = FOURCC_YUY2; break; case FOURCC_UYVY: case FOURCC_YUY2: @@ -2460,14 +2399,8 @@ I830PutImage(ScrnInfoPtr pScrn, case FOURCC_I420: top &= ~1; nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; - if (pPriv->textured && IS_I965G(pI830)) { - I830CopyPlanarToPackedData(pScrn, pPriv, buf, srcPitch, srcPitch2, - dstPitch, height, top, left, nlines, - npixels, id); - } else { - I830CopyPlanarData(pScrn, pPriv, buf, srcPitch, srcPitch2, dstPitch, - height, top, left, nlines, npixels, id); - } + I830CopyPlanarData(pScrn, pPriv, buf, srcPitch, srcPitch2, dstPitch, + height, top, left, nlines, npixels, id); break; case FOURCC_UYVY: case FOURCC_YUY2: diff --git a/src/i965_video.c b/src/i965_video.c index 68337e7e..63f11929 100644 --- a/src/i965_video.c +++ b/src/i965_video.c @@ -93,7 +93,7 @@ static const uint32_t sf_kernel_static[][4] = { #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) -static const uint32_t ps_kernel_static[][4] = { +static const uint32_t ps_kernel_packed_static[][4] = { #include "exa_wm_xy.g4b" #include "exa_wm_src_affine.g4b" #include "exa_wm_src_sample_argb.g4b" @@ -101,11 +101,17 @@ static const uint32_t ps_kernel_static[][4] = { #include "exa_wm_write.g4b" }; +static const uint32_t ps_kernel_planar_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_planar.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + #define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) #define MIN(a,b) ((a) < (b) ? (a) : (b)) -#define WM_BINDING_TABLE_ENTRIES 2 - static uint32_t float_to_uint (float f) { union {uint32_t i; float f;} x; x.f = f; @@ -165,8 +171,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, int urb_sf_start, urb_sf_size; int urb_cs_start, urb_cs_size; struct brw_surface_state *dest_surf_state; - struct brw_surface_state *src_surf_state; - struct brw_sampler_state *src_sampler_state; + struct brw_surface_state *src_surf_state[3]; + struct brw_sampler_state *src_sampler_state[3]; struct brw_vs_unit_state *vs_state; struct brw_sf_unit_state *sf_state; struct brw_wm_unit_state *wm_state; @@ -179,7 +185,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, float src_scale_x, src_scale_y; uint32_t *binding_table; Bool first_output = TRUE; - int dest_surf_offset, src_surf_offset, src_sampler_offset, vs_offset; + int dest_surf_offset, src_surf_offset[3], src_sampler_offset[3], vs_offset; int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset; int wm_scratch_offset; int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset; @@ -188,6 +194,16 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, int vb_size = (4 * 4) * 4; /* 4 DWORDS per vertex */ char *state_base; int state_base_offset; + int src_surf; + int n_src_surf; + uint32_t src_surf_format; + uint32_t src_surf_base[3]; + int src_width[3]; + int src_height[3]; + int src_pitch[3]; + int wm_binding_table_entries; + const uint32_t *ps_kernel_static; + int ps_kernel_static_size; #if 0 ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, height, @@ -202,7 +218,50 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, ErrorF ("INST_PM 0x%08x\n", INREG(INST_PM)); #endif - assert((id == FOURCC_UYVY) || (id == FOURCC_YUY2)); + src_surf_base[0] = pPriv->YBuf0offset; + src_surf_base[1] = pPriv->VBuf0offset; + src_surf_base[2] = pPriv->UBuf0offset; +#if 0 + ErrorF ("base 0 0x%x base 1 0x%x base 2 0x%x\n", + src_surf_base[0], src_surf_base[1], src_surf_base[2]); +#endif + + switch (id) { + case FOURCC_UYVY: + src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; + n_src_surf = 1; + ps_kernel_static = &ps_kernel_packed_static[0][0]; + ps_kernel_static_size = sizeof (ps_kernel_packed_static); + src_width[0] = width; + src_height[0] = height; + src_pitch[0] = video_pitch; + break; + case FOURCC_YUY2: + src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; + ps_kernel_static = &ps_kernel_packed_static[0][0]; + ps_kernel_static_size = sizeof (ps_kernel_packed_static); + src_width[0] = width; + src_height[0] = height; + src_pitch[0] = video_pitch; + n_src_surf = 1; + break; + case FOURCC_I420: + case FOURCC_YV12: + src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; + ps_kernel_static = &ps_kernel_planar_static[0][0]; + ps_kernel_static_size = sizeof (ps_kernel_planar_static); + src_width[0] = width; + src_height[0] = height; + src_pitch[0] = video_pitch * 2; + src_width[1] = src_width[2] = width / 2; + src_height[1] = src_height[2] = height / 2; + src_pitch[1] = src_pitch[2] = video_pitch; + n_src_surf = 3; + break; + default: + return; + } + wm_binding_table_entries = 1 + n_src_surf; IntelEmitInvarientState(pScrn); *pI830->last_3d = LAST_3D_VIDEO; @@ -224,15 +283,17 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, sf_kernel_offset = ALIGN(next_offset, 64); next_offset = sf_kernel_offset + sizeof (sf_kernel_static); ps_kernel_offset = ALIGN(next_offset, 64); - next_offset = ps_kernel_offset + sizeof (ps_kernel_static); + next_offset = ps_kernel_offset + ps_kernel_static_size; sip_kernel_offset = ALIGN(next_offset, 64); next_offset = sip_kernel_offset + sizeof (sip_kernel_static); cc_viewport_offset = ALIGN(next_offset, 32); next_offset = cc_viewport_offset + sizeof(*cc_viewport); - src_sampler_offset = ALIGN(next_offset, 32); - next_offset = src_sampler_offset + sizeof(*src_sampler_state); - + for (src_surf = 0; src_surf < n_src_surf; src_surf++) { + src_sampler_offset[src_surf] = ALIGN(next_offset, 32); + next_offset = src_sampler_offset[src_surf] + sizeof(struct brw_sampler_state); + } + /* Align VB to native size of elements, for safety */ vb_offset = ALIGN(next_offset, 8); next_offset = vb_offset + vb_size; @@ -240,10 +301,14 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, /* And then the general state: */ dest_surf_offset = ALIGN(next_offset, 32); next_offset = dest_surf_offset + sizeof(*dest_surf_state); - src_surf_offset = ALIGN(next_offset, 32); - next_offset = src_surf_offset + sizeof(*src_surf_state); + + for (src_surf = 0; src_surf < n_src_surf; src_surf++) { + src_surf_offset[src_surf] = ALIGN(next_offset, 32); + next_offset = src_surf_offset[src_surf] + sizeof(struct brw_surface_state); + } + binding_table_offset = ALIGN(next_offset, 32); - next_offset = binding_table_offset + (WM_BINDING_TABLE_ENTRIES * 4); + next_offset = binding_table_offset + (wm_binding_table_entries * 4); /* Allocate an area in framebuffer for our state layout we just set up */ total_state_size = next_offset; @@ -270,8 +335,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, cc_viewport = (void *)(state_base + cc_viewport_offset); dest_surf_state = (void *)(state_base + dest_surf_offset); - src_surf_state = (void *)(state_base + src_surf_offset); - src_sampler_state = (void *)(state_base + src_sampler_offset); + + for (src_surf = 0; src_surf < n_src_surf; src_surf++) + { + src_surf_state[src_surf] = (void *)(state_base + src_surf_offset[src_surf]); + src_sampler_state[src_surf] = (void *)(state_base + src_sampler_offset[src_surf]); + } binding_table = (void *)(state_base + binding_table_offset); vb = (void *)(state_base + vb_offset); @@ -384,50 +453,49 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, dest_surf_state->ss3.tiled_surface = i830_pixmap_tiled(pPixmap); dest_surf_state->ss3.tile_walk = 0; /* TileX */ - /* Set up the source surface state buffer */ - memset(src_surf_state, 0, sizeof(*src_surf_state)); - src_surf_state->ss0.surface_type = BRW_SURFACE_2D; - /* src_surf_state->ss0.data_return_format = - BRW_SURFACERETURNFORMAT_FLOAT32; */ - switch (id) { - case FOURCC_YUY2: - src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; - break; - case FOURCC_UYVY: - src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; - break; + for (src_surf = 0; src_surf < n_src_surf; src_surf++) + { + /* Set up the source surface state buffer */ + memset(src_surf_state[src_surf], 0, sizeof(struct brw_surface_state)); + src_surf_state[src_surf]->ss0.surface_type = BRW_SURFACE_2D; + src_surf_state[src_surf]->ss0.surface_format = src_surf_format; + src_surf_state[src_surf]->ss0.writedisable_alpha = 0; + src_surf_state[src_surf]->ss0.writedisable_red = 0; + src_surf_state[src_surf]->ss0.writedisable_green = 0; + src_surf_state[src_surf]->ss0.writedisable_blue = 0; + src_surf_state[src_surf]->ss0.color_blend = 1; + src_surf_state[src_surf]->ss0.vert_line_stride = 0; + src_surf_state[src_surf]->ss0.vert_line_stride_ofs = 0; + src_surf_state[src_surf]->ss0.mipmap_layout_mode = 0; + src_surf_state[src_surf]->ss0.render_cache_read_mode = 0; + + src_surf_state[src_surf]->ss1.base_addr = src_surf_base[src_surf]; + src_surf_state[src_surf]->ss2.width = src_width[src_surf] - 1; + src_surf_state[src_surf]->ss2.height = src_height[src_surf] - 1; + src_surf_state[src_surf]->ss2.mip_count = 0; + src_surf_state[src_surf]->ss2.render_target_rotation = 0; + src_surf_state[src_surf]->ss3.pitch = src_pitch[src_surf] - 1; } - src_surf_state->ss0.writedisable_alpha = 0; - src_surf_state->ss0.writedisable_red = 0; - src_surf_state->ss0.writedisable_green = 0; - src_surf_state->ss0.writedisable_blue = 0; - src_surf_state->ss0.color_blend = 1; - src_surf_state->ss0.vert_line_stride = 0; - src_surf_state->ss0.vert_line_stride_ofs = 0; - src_surf_state->ss0.mipmap_layout_mode = 0; - src_surf_state->ss0.render_cache_read_mode = 0; - - src_surf_state->ss1.base_addr = pPriv->YBuf0offset; - src_surf_state->ss2.width = width - 1; - src_surf_state->ss2.height = height - 1; - src_surf_state->ss2.mip_count = 0; - src_surf_state->ss2.render_target_rotation = 0; - src_surf_state->ss3.pitch = video_pitch - 1; /* FIXME: account for tiling if we ever do it */ /* Set up a binding table for our two surfaces. Only the PS will use it */ /* XXX: are these offset from the right place? */ binding_table[0] = state_base_offset + dest_surf_offset; - binding_table[1] = state_base_offset + src_surf_offset; + + for (src_surf = 0; src_surf < n_src_surf; src_surf++) + binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf]; /* Set up the packed YUV source sampler. Doesn't do colorspace conversion. */ - memset(src_sampler_state, 0, sizeof(*src_sampler_state)); - src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; - src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; - src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; - src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + for (src_surf = 0; src_surf < n_src_surf; src_surf++) + { + memset(src_sampler_state[src_surf], 0, sizeof(struct brw_sampler_state)); + src_sampler_state[src_surf]->ss0.min_filter = BRW_MAPFILTER_LINEAR; + src_sampler_state[src_surf]->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + src_sampler_state[src_surf]->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + src_sampler_state[src_surf]->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + src_sampler_state[src_surf]->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + } /* Set up the vertex shader to be disabled (passthrough) */ memset(vs_state, 0, sizeof(*vs_state)); @@ -472,13 +540,13 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, sf_state->sf6.dest_org_vbias = 0x8; sf_state->sf6.dest_org_hbias = 0x8; - memcpy (ps_kernel, ps_kernel_static, sizeof (ps_kernel_static)); + memcpy (ps_kernel, ps_kernel_static, ps_kernel_static_size); memset (wm_state, 0, sizeof (*wm_state)); wm_state->thread0.kernel_start_pointer = (state_base_offset + ps_kernel_offset) >> 6; wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); wm_state->thread1.single_program_flow = 1; /* XXX */ - wm_state->thread1.binding_table_entry_count = 2; + wm_state->thread1.binding_table_entry_count = 1 + n_src_surf; /* Though we never use the scratch space in our WM kernel, it has to be * set, and the minimum allocation is 1024 bytes. */ @@ -492,7 +560,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ wm_state->wm4.stats_enable = 1; wm_state->wm4.sampler_state_pointer = (state_base_offset + - src_sampler_offset) >> 5; + src_sampler_offset[0]) >> 5; wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ wm_state->wm5.max_threads = PS_MAX_THREADS - 1; wm_state->wm5.thread_dispatch_enable = 1; |