summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Packard <keithp@keithp.com>2008-04-09 00:09:34 -0500
committerKeith Packard <keithp@keithp.com>2008-04-10 16:58:11 -0500
commit825d9e50c59450f07178a54fed2616e551dc0455 (patch)
tree18b82292eb43f534a538dcd0fdefc15c41be17a2
parenta03eaaa67b33c57530e92c53d28917e2563b4427 (diff)
Add planer video decode kernel
Support for planar video reduces bus bandwidth by 25% and also reduces CPU usage during planar->packed conversion.
-rw-r--r--src/Makefile.am2
-rw-r--r--src/exa_wm_src_sample_planar.g4a66
-rw-r--r--src/exa_wm_src_sample_planar.g4b4
-rw-r--r--src/i830_video.c71
-rw-r--r--src/i965_video.c176
5 files changed, 196 insertions, 123 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index f50d1d4c..48ea5679 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -135,6 +135,7 @@ INTEL_G4A = \
exa_wm_src_projective.g4a \
exa_wm_src_sample_argb.g4a \
exa_wm_src_sample_a.g4a \
+ exa_wm_src_sample_planar.g4a \
exa_wm_src_data.g4a \
exa_wm_mask_affine.g4a \
exa_wm_mask_projective.g4a \
@@ -161,6 +162,7 @@ INTEL_G4B = \
exa_wm_src_projective.g4b \
exa_wm_src_sample_argb.g4b \
exa_wm_src_sample_a.g4b \
+ exa_wm_src_sample_planar.g4b \
exa_wm_src_data.g4b \
exa_wm_mask_affine.g4b \
exa_wm_mask_projective.g4b \
diff --git a/src/exa_wm_src_sample_planar.g4a b/src/exa_wm_src_sample_planar.g4a
new file mode 100644
index 00000000..92b867ea
--- /dev/null
+++ b/src/exa_wm_src_sample_planar.g4a
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface in planar format */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load r */
+mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable };
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+
+/* sample Y */
+send (16) src_msg_ind /* msg reg index */
+ src_sample_g<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
+
+/* sample U (Cr) */
+send (16) src_msg_ind /* msg reg index */
+ src_sample_r<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
+
+/* sample V (Cb) */
+send (16) src_msg_ind /* msg reg index */
+ src_sample_b<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
+
diff --git a/src/exa_wm_src_sample_planar.g4b b/src/exa_wm_src_sample_planar.g4b
new file mode 100644
index 00000000..d2b9cfe5
--- /dev/null
+++ b/src/exa_wm_src_sample_planar.g4b
@@ -0,0 +1,4 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+ { 0x01800031, 0x22001d29, 0x008d0000, 0x02520001 },
+ { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520102 },
+ { 0x01800031, 0x22401d29, 0x008d0000, 0x02520203 },
diff --git a/src/i830_video.c b/src/i830_video.c
index 14dab8fd..4e1f725d 100644
--- a/src/i830_video.c
+++ b/src/i830_video.c
@@ -1350,65 +1350,6 @@ I830CopyPackedData(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv,
}
}
-/* Copies planar data in *buf to UYVY-packed data in the screen atYBufXOffset.
- */
-static void
-I830CopyPlanarToPackedData(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv,
- unsigned char *buf, int srcPitch,
- int srcPitch2, int dstPitch, int srcH,
- int top, int left, int h, int w, int id)
-{
- I830Ptr pI830 = I830PTR(pScrn);
- uint8_t *dst1, *srcy, *srcu, *srcv;
- int y;
-
- if (pPriv->currentBuf == 0)
- dst1 = pI830->FbBase + pPriv->YBuf0offset;
- else
- dst1 = pI830->FbBase + pPriv->YBuf1offset;
-
- srcy = buf + (top * srcPitch) + left;
- if (id == FOURCC_YV12) {
- srcu = buf + (srcH * srcPitch) + ((top / 2) * srcPitch2) + (left / 2);
- srcv = buf + (srcH * srcPitch) + ((srcH / 2) * srcPitch2) +
- ((top / 2) * srcPitch2) + (left / 2);
- } else {
- srcv = buf + (srcH * srcPitch) + ((top / 2) * srcPitch2) + (left / 2);
- srcu = buf + (srcH * srcPitch) + ((srcH / 2) * srcPitch2) +
- ((top / 2) * srcPitch2) + (left / 2);
- }
-
- for (y = 0; y < h; y++) {
- uint32_t *dst = (uint32_t *)dst1;
- uint8_t *sy = srcy;
- uint8_t *su = srcu;
- uint8_t *sv = srcv;
- int i;
-
- i = w / 2;
- while(i > 4) {
- dst[0] = sy[0] | (sy[1] << 16) | (sv[0] << 8) | (su[0] << 24);
- dst[1] = sy[2] | (sy[3] << 16) | (sv[1] << 8) | (su[1] << 24);
- dst[2] = sy[4] | (sy[5] << 16) | (sv[2] << 8) | (su[2] << 24);
- dst[3] = sy[6] | (sy[7] << 16) | (sv[3] << 8) | (su[3] << 24);
- dst += 4; su += 4; sv += 4; sy += 8;
- i -= 4;
- }
- while(i--) {
- dst[0] = sy[0] | (sy[1] << 16) | (sv[0] << 8) | (su[0] << 24);
- dst++; su++; sv++;
- sy += 2;
- }
-
- dst1 += dstPitch;
- srcy += srcPitch;
- if (y & 1) {
- srcu += srcPitch2;
- srcv += srcPitch2;
- }
- }
-}
-
static void
I830CopyPlanarData(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv,
unsigned char *buf, int srcPitch,
@@ -2339,8 +2280,6 @@ I830PutImage(ScrnInfoPtr pScrn,
srcPitch2 = ((width >> 1) + 0x3ff) & ~0x3ff;
}
#endif
- if (pPriv->textured && IS_I965G(pI830))
- destId = FOURCC_YUY2;
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
@@ -2460,14 +2399,8 @@ I830PutImage(ScrnInfoPtr pScrn,
case FOURCC_I420:
top &= ~1;
nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
- if (pPriv->textured && IS_I965G(pI830)) {
- I830CopyPlanarToPackedData(pScrn, pPriv, buf, srcPitch, srcPitch2,
- dstPitch, height, top, left, nlines,
- npixels, id);
- } else {
- I830CopyPlanarData(pScrn, pPriv, buf, srcPitch, srcPitch2, dstPitch,
- height, top, left, nlines, npixels, id);
- }
+ I830CopyPlanarData(pScrn, pPriv, buf, srcPitch, srcPitch2, dstPitch,
+ height, top, left, nlines, npixels, id);
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
diff --git a/src/i965_video.c b/src/i965_video.c
index 68337e7e..63f11929 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -93,7 +93,7 @@ static const uint32_t sf_kernel_static[][4] = {
#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
-static const uint32_t ps_kernel_static[][4] = {
+static const uint32_t ps_kernel_packed_static[][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_argb.g4b"
@@ -101,11 +101,17 @@ static const uint32_t ps_kernel_static[][4] = {
#include "exa_wm_write.g4b"
};
+static const uint32_t ps_kernel_planar_static[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_planar.g4b"
+#include "exa_wm_yuv_rgb.g4b"
+#include "exa_wm_write.g4b"
+};
+
#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#define WM_BINDING_TABLE_ENTRIES 2
-
static uint32_t float_to_uint (float f) {
union {uint32_t i; float f;} x;
x.f = f;
@@ -165,8 +171,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
int urb_sf_start, urb_sf_size;
int urb_cs_start, urb_cs_size;
struct brw_surface_state *dest_surf_state;
- struct brw_surface_state *src_surf_state;
- struct brw_sampler_state *src_sampler_state;
+ struct brw_surface_state *src_surf_state[3];
+ struct brw_sampler_state *src_sampler_state[3];
struct brw_vs_unit_state *vs_state;
struct brw_sf_unit_state *sf_state;
struct brw_wm_unit_state *wm_state;
@@ -179,7 +185,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
float src_scale_x, src_scale_y;
uint32_t *binding_table;
Bool first_output = TRUE;
- int dest_surf_offset, src_surf_offset, src_sampler_offset, vs_offset;
+ int dest_surf_offset, src_surf_offset[3], src_sampler_offset[3], vs_offset;
int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
int wm_scratch_offset;
int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
@@ -188,6 +194,16 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
int vb_size = (4 * 4) * 4; /* 4 DWORDS per vertex */
char *state_base;
int state_base_offset;
+ int src_surf;
+ int n_src_surf;
+ uint32_t src_surf_format;
+ uint32_t src_surf_base[3];
+ int src_width[3];
+ int src_height[3];
+ int src_pitch[3];
+ int wm_binding_table_entries;
+ const uint32_t *ps_kernel_static;
+ int ps_kernel_static_size;
#if 0
ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, height,
@@ -202,7 +218,50 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
ErrorF ("INST_PM 0x%08x\n", INREG(INST_PM));
#endif
- assert((id == FOURCC_UYVY) || (id == FOURCC_YUY2));
+ src_surf_base[0] = pPriv->YBuf0offset;
+ src_surf_base[1] = pPriv->VBuf0offset;
+ src_surf_base[2] = pPriv->UBuf0offset;
+#if 0
+ ErrorF ("base 0 0x%x base 1 0x%x base 2 0x%x\n",
+ src_surf_base[0], src_surf_base[1], src_surf_base[2]);
+#endif
+
+ switch (id) {
+ case FOURCC_UYVY:
+ src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY;
+ n_src_surf = 1;
+ ps_kernel_static = &ps_kernel_packed_static[0][0];
+ ps_kernel_static_size = sizeof (ps_kernel_packed_static);
+ src_width[0] = width;
+ src_height[0] = height;
+ src_pitch[0] = video_pitch;
+ break;
+ case FOURCC_YUY2:
+ src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL;
+ ps_kernel_static = &ps_kernel_packed_static[0][0];
+ ps_kernel_static_size = sizeof (ps_kernel_packed_static);
+ src_width[0] = width;
+ src_height[0] = height;
+ src_pitch[0] = video_pitch;
+ n_src_surf = 1;
+ break;
+ case FOURCC_I420:
+ case FOURCC_YV12:
+ src_surf_format = BRW_SURFACEFORMAT_R8_UNORM;
+ ps_kernel_static = &ps_kernel_planar_static[0][0];
+ ps_kernel_static_size = sizeof (ps_kernel_planar_static);
+ src_width[0] = width;
+ src_height[0] = height;
+ src_pitch[0] = video_pitch * 2;
+ src_width[1] = src_width[2] = width / 2;
+ src_height[1] = src_height[2] = height / 2;
+ src_pitch[1] = src_pitch[2] = video_pitch;
+ n_src_surf = 3;
+ break;
+ default:
+ return;
+ }
+ wm_binding_table_entries = 1 + n_src_surf;
IntelEmitInvarientState(pScrn);
*pI830->last_3d = LAST_3D_VIDEO;
@@ -224,15 +283,17 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
sf_kernel_offset = ALIGN(next_offset, 64);
next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
ps_kernel_offset = ALIGN(next_offset, 64);
- next_offset = ps_kernel_offset + sizeof (ps_kernel_static);
+ next_offset = ps_kernel_offset + ps_kernel_static_size;
sip_kernel_offset = ALIGN(next_offset, 64);
next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
cc_viewport_offset = ALIGN(next_offset, 32);
next_offset = cc_viewport_offset + sizeof(*cc_viewport);
- src_sampler_offset = ALIGN(next_offset, 32);
- next_offset = src_sampler_offset + sizeof(*src_sampler_state);
-
+ for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
+ src_sampler_offset[src_surf] = ALIGN(next_offset, 32);
+ next_offset = src_sampler_offset[src_surf] + sizeof(struct brw_sampler_state);
+ }
+
/* Align VB to native size of elements, for safety */
vb_offset = ALIGN(next_offset, 8);
next_offset = vb_offset + vb_size;
@@ -240,10 +301,14 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
/* And then the general state: */
dest_surf_offset = ALIGN(next_offset, 32);
next_offset = dest_surf_offset + sizeof(*dest_surf_state);
- src_surf_offset = ALIGN(next_offset, 32);
- next_offset = src_surf_offset + sizeof(*src_surf_state);
+
+ for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
+ src_surf_offset[src_surf] = ALIGN(next_offset, 32);
+ next_offset = src_surf_offset[src_surf] + sizeof(struct brw_surface_state);
+ }
+
binding_table_offset = ALIGN(next_offset, 32);
- next_offset = binding_table_offset + (WM_BINDING_TABLE_ENTRIES * 4);
+ next_offset = binding_table_offset + (wm_binding_table_entries * 4);
/* Allocate an area in framebuffer for our state layout we just set up */
total_state_size = next_offset;
@@ -270,8 +335,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
cc_viewport = (void *)(state_base + cc_viewport_offset);
dest_surf_state = (void *)(state_base + dest_surf_offset);
- src_surf_state = (void *)(state_base + src_surf_offset);
- src_sampler_state = (void *)(state_base + src_sampler_offset);
+
+ for (src_surf = 0; src_surf < n_src_surf; src_surf++)
+ {
+ src_surf_state[src_surf] = (void *)(state_base + src_surf_offset[src_surf]);
+ src_sampler_state[src_surf] = (void *)(state_base + src_sampler_offset[src_surf]);
+ }
binding_table = (void *)(state_base + binding_table_offset);
vb = (void *)(state_base + vb_offset);
@@ -384,50 +453,49 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
dest_surf_state->ss3.tiled_surface = i830_pixmap_tiled(pPixmap);
dest_surf_state->ss3.tile_walk = 0; /* TileX */
- /* Set up the source surface state buffer */
- memset(src_surf_state, 0, sizeof(*src_surf_state));
- src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- /* src_surf_state->ss0.data_return_format =
- BRW_SURFACERETURNFORMAT_FLOAT32; */
- switch (id) {
- case FOURCC_YUY2:
- src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_YCRCB_NORMAL;
- break;
- case FOURCC_UYVY:
- src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_YCRCB_SWAPY;
- break;
+ for (src_surf = 0; src_surf < n_src_surf; src_surf++)
+ {
+ /* Set up the source surface state buffer */
+ memset(src_surf_state[src_surf], 0, sizeof(struct brw_surface_state));
+ src_surf_state[src_surf]->ss0.surface_type = BRW_SURFACE_2D;
+ src_surf_state[src_surf]->ss0.surface_format = src_surf_format;
+ src_surf_state[src_surf]->ss0.writedisable_alpha = 0;
+ src_surf_state[src_surf]->ss0.writedisable_red = 0;
+ src_surf_state[src_surf]->ss0.writedisable_green = 0;
+ src_surf_state[src_surf]->ss0.writedisable_blue = 0;
+ src_surf_state[src_surf]->ss0.color_blend = 1;
+ src_surf_state[src_surf]->ss0.vert_line_stride = 0;
+ src_surf_state[src_surf]->ss0.vert_line_stride_ofs = 0;
+ src_surf_state[src_surf]->ss0.mipmap_layout_mode = 0;
+ src_surf_state[src_surf]->ss0.render_cache_read_mode = 0;
+
+ src_surf_state[src_surf]->ss1.base_addr = src_surf_base[src_surf];
+ src_surf_state[src_surf]->ss2.width = src_width[src_surf] - 1;
+ src_surf_state[src_surf]->ss2.height = src_height[src_surf] - 1;
+ src_surf_state[src_surf]->ss2.mip_count = 0;
+ src_surf_state[src_surf]->ss2.render_target_rotation = 0;
+ src_surf_state[src_surf]->ss3.pitch = src_pitch[src_surf] - 1;
}
- src_surf_state->ss0.writedisable_alpha = 0;
- src_surf_state->ss0.writedisable_red = 0;
- src_surf_state->ss0.writedisable_green = 0;
- src_surf_state->ss0.writedisable_blue = 0;
- src_surf_state->ss0.color_blend = 1;
- src_surf_state->ss0.vert_line_stride = 0;
- src_surf_state->ss0.vert_line_stride_ofs = 0;
- src_surf_state->ss0.mipmap_layout_mode = 0;
- src_surf_state->ss0.render_cache_read_mode = 0;
-
- src_surf_state->ss1.base_addr = pPriv->YBuf0offset;
- src_surf_state->ss2.width = width - 1;
- src_surf_state->ss2.height = height - 1;
- src_surf_state->ss2.mip_count = 0;
- src_surf_state->ss2.render_target_rotation = 0;
- src_surf_state->ss3.pitch = video_pitch - 1;
/* FIXME: account for tiling if we ever do it */
/* Set up a binding table for our two surfaces. Only the PS will use it */
/* XXX: are these offset from the right place? */
binding_table[0] = state_base_offset + dest_surf_offset;
- binding_table[1] = state_base_offset + src_surf_offset;
+
+ for (src_surf = 0; src_surf < n_src_surf; src_surf++)
+ binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf];
/* Set up the packed YUV source sampler. Doesn't do colorspace conversion.
*/
- memset(src_sampler_state, 0, sizeof(*src_sampler_state));
- src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
- src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
- src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ for (src_surf = 0; src_surf < n_src_surf; src_surf++)
+ {
+ memset(src_sampler_state[src_surf], 0, sizeof(struct brw_sampler_state));
+ src_sampler_state[src_surf]->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ src_sampler_state[src_surf]->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ src_sampler_state[src_surf]->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ src_sampler_state[src_surf]->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ src_sampler_state[src_surf]->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ }
/* Set up the vertex shader to be disabled (passthrough) */
memset(vs_state, 0, sizeof(*vs_state));
@@ -472,13 +540,13 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
sf_state->sf6.dest_org_vbias = 0x8;
sf_state->sf6.dest_org_hbias = 0x8;
- memcpy (ps_kernel, ps_kernel_static, sizeof (ps_kernel_static));
+ memcpy (ps_kernel, ps_kernel_static, ps_kernel_static_size);
memset (wm_state, 0, sizeof (*wm_state));
wm_state->thread0.kernel_start_pointer =
(state_base_offset + ps_kernel_offset) >> 6;
wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
wm_state->thread1.single_program_flow = 1; /* XXX */
- wm_state->thread1.binding_table_entry_count = 2;
+ wm_state->thread1.binding_table_entry_count = 1 + n_src_surf;
/* Though we never use the scratch space in our WM kernel, it has to be
* set, and the minimum allocation is 1024 bytes.
*/
@@ -492,7 +560,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
wm_state->wm4.stats_enable = 1;
wm_state->wm4.sampler_state_pointer = (state_base_offset +
- src_sampler_offset) >> 5;
+ src_sampler_offset[0]) >> 5;
wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
wm_state->wm5.thread_dispatch_enable = 1;