Import MesaLibs 6.5.1. (in dist/ since its code is shared between lib

and xserver)...
author: Matthieu Herrb <matthieu@cvs.openbsd.org> 2006-11-25 18:55:19 +0000
committer: Matthieu Herrb <matthieu@cvs.openbsd.org> 2006-11-25 18:55:19 +0000
commit: 0641eddccd060a4ae333378927ffb3d9c34ddb98 (patch)
tree: 4a34bd4f0351f8b1cb676d91595b5b8e14b3c563 /dist/Mesa/src/mesa/swrast
parent: d2ecd06125d3ab42e53ffdd86d7d3103a57e810b (diff)
62 files changed, 27074 insertions, 0 deletions
diff --git a/dist/Mesa/src/mesa/swrast/descrip.mms b/dist/Mesa/src/mesa/swrast/descrip.mms
new file mode 100644
index 000000000..4d49673b5
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/descrip.mms
@@ -0,0 +1,80 @@
+# Makefile for core library for VMS
+# contributed by Jouk Jansen  joukj@hrem.nano.tudelft.nl
+# Last revision : 21 February 2006
+
+.first
+	define gl [---.include.gl]
+	define math [-.math]
+	define swrast [-.swrast]
+	define array_cache [-.array_cache]
+
+.include [---]mms-config.
+
+##### MACROS #####
+
+VPATH = RCS
+
+INCDIR = [---.include],[-.main],[-.glapi],[-.shader],[-.shader.slang]
+LIBDIR = [---.lib]
+CFLAGS = /include=($(INCDIR),[])/define=(PTHREADS=1)/name=(as_is,short)/float=ieee/ieee=denorm
+
+SOURCES = s_aaline.c s_aatriangle.c s_accum.c s_alpha.c \
+	s_bitmap.c s_blend.c s_blit.c s_buffers.c s_context.c \
+	s_copypix.c s_depth.c \
+        s_drawpix.c s_feedback.c s_fog.c s_imaging.c s_lines.c s_logic.c \
+	s_masking.c s_nvfragprog.c s_points.c s_readpix.c \
+	s_span.c s_stencil.c s_texstore.c s_texcombine.c s_texfilter.c \
+	s_triangle.c s_zoom.c s_atifragshader.c s_arbshader.c
+ 
+OBJECTS = s_aaline.obj,s_aatriangle.obj,s_accum.obj,s_alpha.obj,\
+	s_bitmap.obj,s_blend.obj,s_blit.obj,s_arbshader.obj,\
+	s_buffers.obj,s_context.obj,s_atifragshader.obj,\
+	s_copypix.obj,s_depth.obj,s_drawpix.obj,s_feedback.obj,s_fog.obj,\
+	s_imaging.obj,s_lines.obj,s_logic.obj,s_masking.obj,s_nvfragprog.obj,\
+	s_points.obj,s_readpix.obj,s_span.obj,s_stencil.obj,\
+	s_texstore.obj,s_texcombine.obj,s_texfilter.obj,s_triangle.obj,\
+	s_zoom.obj
+ 
+##### RULES #####
+
+VERSION=Mesa V3.4
+
+##### TARGETS #####
+# Make the library
+$(LIBDIR)$(GL_LIB) : $(OBJECTS)
+  @ library $(LIBDIR)$(GL_LIB) $(OBJECTS)
+
+clean :
+	purge
+	delete *.obj;*
+
+s_atifragshader.obj : s_atifragshader.c
+s_aaline.obj : s_aaline.c
+s_aatriangle.obj : s_aatriangle.c
+s_accum.obj : s_accum.c
+s_alpha.obj : s_alpha.c
+s_bitmap.obj : s_bitmap.c
+s_blend.obj : s_blend.c
+s_blit.obj : s_blit.c
+s_buffers.obj : s_buffers.c
+s_context.obj : s_context.c
+s_copypix.obj : s_copypix.c
+s_depth.obj : s_depth.c
+s_drawpix.obj : s_drawpix.c
+s_feedback.obj : s_feedback.c
+s_fog.obj : s_fog.c
+s_imaging.obj : s_imaging.c
+s_lines.obj : s_lines.c
+s_logic.obj : s_logic.c
+s_masking.obj : s_masking.c
+s_nvfragprog.obj : s_nvfragprog.c
+s_points.obj : s_points.c
+s_readpix.obj : s_readpix.c
+s_span.obj : s_span.c
+s_stencil.obj : s_stencil.c
+s_texstore.obj : s_texstore.c
+s_texcombine.obj : s_texcombine.c
+s_texfilter.obj : s_texfilter.c
+s_triangle.obj : s_triangle.c
+s_zoom.obj : s_zoom.c
+s_arbshader.obj : s_arbshader.c
diff --git a/dist/Mesa/src/mesa/swrast/s_aaline.c b/dist/Mesa/src/mesa/swrast/s_aaline.c
new file mode 100644
index 000000000..b8c214f68
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_aaline.c
@@ -0,0 +1,554 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.1
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "imports.h"
+#include "macros.h"
+#include "swrast/s_aaline.h"
+#include "swrast/s_context.h"
+#include "swrast/s_span.h"
+#include "swrast/swrast.h"
+#include "mtypes.h"
+
+
+#define SUB_PIXEL 4
+
+
+/*
+ * Info about the AA line we're rendering
+ */
+struct LineInfo
+{
+   GLfloat x0, y0;        /* start */
+   GLfloat x1, y1;        /* end */
+   GLfloat dx, dy;        /* direction vector */
+   GLfloat len;           /* length */
+   GLfloat halfWidth;     /* half of line width */
+   GLfloat xAdj, yAdj;    /* X and Y adjustment for quad corners around line */
+   /* for coverage computation */
+   GLfloat qx0, qy0;      /* quad vertices */
+   GLfloat qx1, qy1;
+   GLfloat qx2, qy2;
+   GLfloat qx3, qy3;
+   GLfloat ex0, ey0;      /* quad edge vectors */
+   GLfloat ex1, ey1;
+   GLfloat ex2, ey2;
+   GLfloat ex3, ey3;
+
+   /* DO_Z */
+   GLfloat zPlane[4];
+   /* DO_FOG */
+   GLfloat fPlane[4];
+   /* DO_RGBA */
+   GLfloat rPlane[4], gPlane[4], bPlane[4], aPlane[4];
+   /* DO_INDEX */
+   GLfloat iPlane[4];
+   /* DO_SPEC */
+   GLfloat srPlane[4], sgPlane[4], sbPlane[4];
+   /* DO_TEX or DO_MULTITEX */
+   GLfloat sPlane[MAX_TEXTURE_COORD_UNITS][4];
+   GLfloat tPlane[MAX_TEXTURE_COORD_UNITS][4];
+   GLfloat uPlane[MAX_TEXTURE_COORD_UNITS][4];
+   GLfloat vPlane[MAX_TEXTURE_COORD_UNITS][4];
+   GLfloat lambda[MAX_TEXTURE_COORD_UNITS];
+   GLfloat texWidth[MAX_TEXTURE_COORD_UNITS];
+   GLfloat texHeight[MAX_TEXTURE_COORD_UNITS];
+
+   struct sw_span span;
+};
+
+
+
+/*
+ * Compute the equation of a plane used to interpolate line fragment data
+ * such as color, Z, texture coords, etc.
+ * Input: (x0, y0) and (x1,y1) are the endpoints of the line.
+ *        z0, and z1 are the end point values to interpolate.
+ * Output:  plane - the plane equation.
+ *
+ * Note: we don't really have enough parameters to specify a plane.
+ * We take the endpoints of the line and compute a plane such that
+ * the cross product of the line vector and the plane normal is
+ * parallel to the projection plane.
+ */
+static void
+compute_plane(GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1,
+              GLfloat z0, GLfloat z1, GLfloat plane[4])
+{
+#if 0
+   /* original */
+   const GLfloat px = x1 - x0;
+   const GLfloat py = y1 - y0;
+   const GLfloat pz = z1 - z0;
+   const GLfloat qx = -py;
+   const GLfloat qy = px;
+   const GLfloat qz = 0;
+   const GLfloat a = py * qz - pz * qy;
+   const GLfloat b = pz * qx - px * qz;
+   const GLfloat c = px * qy - py * qx;
+   const GLfloat d = -(a * x0 + b * y0 + c * z0);
+   plane[0] = a;
+   plane[1] = b;
+   plane[2] = c;
+   plane[3] = d;
+#else
+   /* simplified */
+   const GLfloat px = x1 - x0;
+   const GLfloat py = y1 - y0;
+   const GLfloat pz = z0 - z1;
+   const GLfloat a = pz * px;
+   const GLfloat b = pz * py;
+   const GLfloat c = px * px + py * py;
+   const GLfloat d = -(a * x0 + b * y0 + c * z0);
+   if (a == 0.0 && b == 0.0 && c == 0.0 && d == 0.0) {
+      plane[0] = 0.0;
+      plane[1] = 0.0;
+      plane[2] = 1.0;
+      plane[3] = 0.0;
+   }
+   else {
+      plane[0] = a;
+      plane[1] = b;
+      plane[2] = c;
+      plane[3] = d;
+   }
+#endif
+}
+
+
+static INLINE void
+constant_plane(GLfloat value, GLfloat plane[4])
+{
+   plane[0] = 0.0;
+   plane[1] = 0.0;
+   plane[2] = -1.0;
+   plane[3] = value;
+}
+
+
+static INLINE GLfloat
+solve_plane(GLfloat x, GLfloat y, const GLfloat plane[4])
+{
+   const GLfloat z = (plane[3] + plane[0] * x + plane[1] * y) / -plane[2];
+   return z;
+}
+
+#define SOLVE_PLANE(X, Y, PLANE) \
+   ((PLANE[3] + PLANE[0] * (X) + PLANE[1] * (Y)) / -PLANE[2])
+
+
+/*
+ * Return 1 / solve_plane().
+ */
+static INLINE GLfloat
+solve_plane_recip(GLfloat x, GLfloat y, const GLfloat plane[4])
+{
+   const GLfloat denom = plane[3] + plane[0] * x + plane[1] * y;
+   if (denom == 0.0)
+      return 0.0;
+   else
+      return -plane[2] / denom;
+}
+
+
+/*
+ * Solve plane and return clamped GLchan value.
+ */
+static INLINE GLchan
+solve_plane_chan(GLfloat x, GLfloat y, const GLfloat plane[4])
+{
+   const GLfloat z = (plane[3] + plane[0] * x + plane[1] * y) / -plane[2];
+#if CHAN_TYPE == GL_FLOAT
+   return CLAMP(z, 0.0F, CHAN_MAXF);
+#else
+   if (z < 0)
+      return 0;
+   else if (z > CHAN_MAX)
+      return CHAN_MAX;
+   return (GLchan) IROUND_POS(z);
+#endif
+}
+
+
+/*
+ * Compute mipmap level of detail.
+ */
+static INLINE GLfloat
+compute_lambda(const GLfloat sPlane[4], const GLfloat tPlane[4],
+               GLfloat invQ, GLfloat width, GLfloat height)
+{
+   GLfloat dudx = sPlane[0] / sPlane[2] * invQ * width;
+   GLfloat dudy = sPlane[1] / sPlane[2] * invQ * width;
+   GLfloat dvdx = tPlane[0] / tPlane[2] * invQ * height;
+   GLfloat dvdy = tPlane[1] / tPlane[2] * invQ * height;
+   GLfloat r1 = dudx * dudx + dudy * dudy;
+   GLfloat r2 = dvdx * dvdx + dvdy * dvdy;
+   GLfloat rho2 = r1 + r2;
+   /* return log base 2 of rho */
+   if (rho2 == 0.0F)
+      return 0.0;
+   else
+      return (GLfloat) (LOGF(rho2) * 1.442695 * 0.5);/* 1.442695 = 1/log(2) */
+}
+
+
+
+
+/*
+ * Fill in the samples[] array with the (x,y) subpixel positions of
+ * xSamples * ySamples sample positions.
+ * Note that the four corner samples are put into the first four
+ * positions of the array.  This allows us to optimize for the common
+ * case of all samples being inside the polygon.
+ */
+static void
+make_sample_table(GLint xSamples, GLint ySamples, GLfloat samples[][2])
+{
+   const GLfloat dx = 1.0F / (GLfloat) xSamples;
+   const GLfloat dy = 1.0F / (GLfloat) ySamples;
+   GLint x, y;
+   GLint i;
+
+   i = 4;
+   for (x = 0; x < xSamples; x++) {
+      for (y = 0; y < ySamples; y++) {
+         GLint j;
+         if (x == 0 && y == 0) {
+            /* lower left */
+            j = 0;
+         }
+         else if (x == xSamples - 1 && y == 0) {
+            /* lower right */
+            j = 1;
+         }
+         else if (x == 0 && y == ySamples - 1) {
+            /* upper left */
+            j = 2;
+         }
+         else if (x == xSamples - 1 && y == ySamples - 1) {
+            /* upper right */
+            j = 3;
+         }
+         else {
+            j = i++;
+         }
+         samples[j][0] = x * dx + 0.5F * dx;
+         samples[j][1] = y * dy + 0.5F * dy;
+      }
+   }
+}
+
+
+
+/*
+ * Compute how much of the given pixel's area is inside the rectangle
+ * defined by vertices v0, v1, v2, v3.
+ * Vertices MUST be specified in counter-clockwise order.
+ * Return:  coverage in [0, 1].
+ */
+static GLfloat
+compute_coveragef(const struct LineInfo *info,
+                  GLint winx, GLint winy)
+{
+   static GLfloat samples[SUB_PIXEL * SUB_PIXEL][2];
+   static GLboolean haveSamples = GL_FALSE;
+   const GLfloat x = (GLfloat) winx;
+   const GLfloat y = (GLfloat) winy;
+   GLint stop = 4, i;
+   GLfloat insideCount = SUB_PIXEL * SUB_PIXEL;
+
+   if (!haveSamples) {
+      make_sample_table(SUB_PIXEL, SUB_PIXEL, samples);
+      haveSamples = GL_TRUE;
+   }
+
+#if 0 /*DEBUG*/
+   {
+      const GLfloat area = dx0 * dy1 - dx1 * dy0;
+      assert(area >= 0.0);
+   }
+#endif
+
+   for (i = 0; i < stop; i++) {
+      const GLfloat sx = x + samples[i][0];
+      const GLfloat sy = y + samples[i][1];
+      const GLfloat fx0 = sx - info->qx0;
+      const GLfloat fy0 = sy - info->qy0;
+      const GLfloat fx1 = sx - info->qx1;
+      const GLfloat fy1 = sy - info->qy1;
+      const GLfloat fx2 = sx - info->qx2;
+      const GLfloat fy2 = sy - info->qy2;
+      const GLfloat fx3 = sx - info->qx3;
+      const GLfloat fy3 = sy - info->qy3;
+      /* cross product determines if sample is inside or outside each edge */
+      GLfloat cross0 = (info->ex0 * fy0 - info->ey0 * fx0);
+      GLfloat cross1 = (info->ex1 * fy1 - info->ey1 * fx1);
+      GLfloat cross2 = (info->ex2 * fy2 - info->ey2 * fx2);
+      GLfloat cross3 = (info->ex3 * fy3 - info->ey3 * fx3);
+      /* Check if the sample is exactly on an edge.  If so, let cross be a
+       * positive or negative value depending on the direction of the edge.
+       */
+      if (cross0 == 0.0F)
+         cross0 = info->ex0 + info->ey0;
+      if (cross1 == 0.0F)
+         cross1 = info->ex1 + info->ey1;
+      if (cross2 == 0.0F)
+         cross2 = info->ex2 + info->ey2;
+      if (cross3 == 0.0F)
+         cross3 = info->ex3 + info->ey3;
+      if (cross0 < 0.0F || cross1 < 0.0F || cross2 < 0.0F || cross3 < 0.0F) {
+         /* point is outside quadrilateral */
+         insideCount -= 1.0F;
+         stop = SUB_PIXEL * SUB_PIXEL;
+      }
+   }
+   if (stop == 4)
+      return 1.0F;
+   else
+      return insideCount * (1.0F / (SUB_PIXEL * SUB_PIXEL));
+}
+
+
+/**
+ * Compute coverage value for color index mode.
+ * XXX this may not be quite correct.
+ * \return coverage in [0,15].
+ */
+static GLfloat
+compute_coveragei(const struct LineInfo *info,
+                  GLint winx, GLint winy)
+{
+   return compute_coveragef(info, winx, winy) * 15.0F;
+}
+
+
+
+typedef void (*plot_func)(GLcontext *ctx, struct LineInfo *line,
+                          int ix, int iy);
+                         
+
+
+/*
+ * Draw an AA line segment (called many times per line when stippling)
+ */
+static void
+segment(GLcontext *ctx,
+        struct LineInfo *line,
+        plot_func plot,
+        GLfloat t0, GLfloat t1)
+{
+   const GLfloat absDx = (line->dx < 0.0F) ? -line->dx : line->dx;
+   const GLfloat absDy = (line->dy < 0.0F) ? -line->dy : line->dy;
+   /* compute the actual segment's endpoints */
+   const GLfloat x0 = line->x0 + t0 * line->dx;
+   const GLfloat y0 = line->y0 + t0 * line->dy;
+   const GLfloat x1 = line->x0 + t1 * line->dx;
+   const GLfloat y1 = line->y0 + t1 * line->dy;
+
+   /* compute vertices of the line-aligned quadrilateral */
+   line->qx0 = x0 - line->yAdj;
+   line->qy0 = y0 + line->xAdj;
+   line->qx1 = x0 + line->yAdj;
+   line->qy1 = y0 - line->xAdj;
+   line->qx2 = x1 + line->yAdj;
+   line->qy2 = y1 - line->xAdj;
+   line->qx3 = x1 - line->yAdj;
+   line->qy3 = y1 + line->xAdj;
+   /* compute the quad's edge vectors (for coverage calc) */
+   line->ex0 = line->qx1 - line->qx0;
+   line->ey0 = line->qy1 - line->qy0;
+   line->ex1 = line->qx2 - line->qx1;
+   line->ey1 = line->qy2 - line->qy1;
+   line->ex2 = line->qx3 - line->qx2;
+   line->ey2 = line->qy3 - line->qy2;
+   line->ex3 = line->qx0 - line->qx3;
+   line->ey3 = line->qy0 - line->qy3;
+
+   if (absDx > absDy) {
+      /* X-major line */
+      GLfloat dydx = line->dy / line->dx;
+      GLfloat xLeft, xRight, yBot, yTop;
+      GLint ix, ixRight;
+      if (x0 < x1) {
+         xLeft = x0 - line->halfWidth;
+         xRight = x1 + line->halfWidth;
+         if (line->dy >= 0.0) {
+            yBot = y0 - 3.0F * line->halfWidth;
+            yTop = y0 + line->halfWidth;
+         }
+         else {
+            yBot = y0 - line->halfWidth;
+            yTop = y0 + 3.0F * line->halfWidth;
+         }
+      }
+      else {
+         xLeft = x1 - line->halfWidth;
+         xRight = x0 + line->halfWidth;
+         if (line->dy <= 0.0) {
+            yBot = y1 - 3.0F * line->halfWidth;
+            yTop = y1 + line->halfWidth;
+         }
+         else {
+            yBot = y1 - line->halfWidth;
+            yTop = y1 + 3.0F * line->halfWidth;
+         }
+      }
+
+      /* scan along the line, left-to-right */
+      ixRight = (GLint) (xRight + 1.0F);
+
+      /*printf("avg span height: %g\n", yTop - yBot);*/
+      for (ix = (GLint) xLeft; ix < ixRight; ix++) {
+         const GLint iyBot = (GLint) yBot;
+         const GLint iyTop = (GLint) (yTop + 1.0F);
+         GLint iy;
+         /* scan across the line, bottom-to-top */
+         for (iy = iyBot; iy < iyTop; iy++) {
+            (*plot)(ctx, line, ix, iy);
+         }
+         yBot += dydx;
+         yTop += dydx;
+      }
+   }
+   else {
+      /* Y-major line */
+      GLfloat dxdy = line->dx / line->dy;
+      GLfloat yBot, yTop, xLeft, xRight;
+      GLint iy, iyTop;
+      if (y0 < y1) {
+         yBot = y0 - line->halfWidth;
+         yTop = y1 + line->halfWidth;
+         if (line->dx >= 0.0) {
+            xLeft = x0 - 3.0F * line->halfWidth;
+            xRight = x0 + line->halfWidth;
+         }
+         else {
+            xLeft = x0 - line->halfWidth;
+            xRight = x0 + 3.0F * line->halfWidth;
+         }
+      }
+      else {
+         yBot = y1 - line->halfWidth;
+         yTop = y0 + line->halfWidth;
+         if (line->dx <= 0.0) {
+            xLeft = x1 - 3.0F * line->halfWidth;
+            xRight = x1 + line->halfWidth;
+         }
+         else {
+            xLeft = x1 - line->halfWidth;
+            xRight = x1 + 3.0F * line->halfWidth;
+         }
+      }
+
+      /* scan along the line, bottom-to-top */
+      iyTop = (GLint) (yTop + 1.0F);
+
+      /*printf("avg span width: %g\n", xRight - xLeft);*/
+      for (iy = (GLint) yBot; iy < iyTop; iy++) {
+         const GLint ixLeft = (GLint) xLeft;
+         const GLint ixRight = (GLint) (xRight + 1.0F);
+         GLint ix;
+         /* scan across the line, left-to-right */
+         for (ix = ixLeft; ix < ixRight; ix++) {
+            (*plot)(ctx, line, ix, iy);
+         }
+         xLeft += dxdy;
+         xRight += dxdy;
+      }
+   }
+}
+
+
+#define NAME(x) aa_ci_##x
+#define DO_Z
+#define DO_FOG
+#define DO_INDEX
+#include "s_aalinetemp.h"
+
+
+#define NAME(x) aa_rgba_##x
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#include "s_aalinetemp.h"
+
+
+#define NAME(x)  aa_tex_rgba_##x
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#define DO_TEX
+#include "s_aalinetemp.h"
+
+
+#define NAME(x)  aa_multitex_rgba_##x
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#define DO_MULTITEX
+#include "s_aalinetemp.h"
+
+
+#define NAME(x)  aa_multitex_spec_##x
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#define DO_MULTITEX
+#define DO_SPEC
+#include "s_aalinetemp.h"
+
+
+
+void
+_swrast_choose_aa_line_function(GLcontext *ctx)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   ASSERT(ctx->Line.SmoothFlag);
+
+   if (ctx->Visual.rgbMode) {
+      /* RGBA */
+      if (ctx->Texture._EnabledCoordUnits != 0) {
+         if (ctx->Texture._EnabledCoordUnits > 1) {
+            /* Multitextured! */
+            if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR || 
+                ctx->Fog.ColorSumEnabled)
+               swrast->Line = aa_multitex_spec_line;
+            else
+               swrast->Line = aa_multitex_rgba_line;
+         }
+         else {
+            swrast->Line = aa_tex_rgba_line;
+         }
+      }
+      else {
+         swrast->Line = aa_rgba_line;
+      }
+   }
+   else {
+      /* Color Index */
+      swrast->Line = aa_ci_line;
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_aaline.h b/dist/Mesa/src/mesa/swrast/s_aaline.h
new file mode 100644
index 000000000..41e7e5fd4
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_aaline.h
@@ -0,0 +1,39 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_AALINE_H
+#define S_AALINE_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern void
+_swrast_choose_aa_line_function(GLcontext *ctx);
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_aalinetemp.h b/dist/Mesa/src/mesa/swrast/s_aalinetemp.h
new file mode 100644
index 000000000..34c95fc34
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_aalinetemp.h
@@ -0,0 +1,329 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Antialiased line template.
+ */
+
+
+/*
+ * Function to render each fragment in the AA line.
+ * \param ix  - integer fragment window X coordiante
+ * \param iy  - integer fragment window Y coordiante
+ */
+static void
+NAME(plot)(GLcontext *ctx, struct LineInfo *line, int ix, int iy)
+{
+   const GLfloat fx = (GLfloat) ix;
+   const GLfloat fy = (GLfloat) iy;
+#ifdef DO_INDEX
+   const GLfloat coverage = compute_coveragei(line, ix, iy);
+#else
+   const GLfloat coverage = compute_coveragef(line, ix, iy);
+#endif
+   const GLuint i = line->span.end;
+
+   if (coverage == 0.0)
+      return;
+
+   line->span.end++;
+   line->span.array->coverage[i] = coverage;
+   line->span.array->x[i] = ix;
+   line->span.array->y[i] = iy;
+
+   /*
+    * Compute Z, color, texture coords, fog for the fragment by
+    * solving the plane equations at (ix,iy).
+    */
+#ifdef DO_Z
+   line->span.array->z[i] = (GLuint) solve_plane(fx, fy, line->zPlane);
+#endif
+#ifdef DO_FOG
+   line->span.array->fog[i] = solve_plane(fx, fy, line->fPlane);
+#endif
+#ifdef DO_RGBA
+   line->span.array->rgba[i][RCOMP] = solve_plane_chan(fx, fy, line->rPlane);
+   line->span.array->rgba[i][GCOMP] = solve_plane_chan(fx, fy, line->gPlane);
+   line->span.array->rgba[i][BCOMP] = solve_plane_chan(fx, fy, line->bPlane);
+   line->span.array->rgba[i][ACOMP] = solve_plane_chan(fx, fy, line->aPlane);
+#endif
+#ifdef DO_INDEX
+   line->span.array->index[i] = (GLint) solve_plane(fx, fy, line->iPlane);
+#endif
+#ifdef DO_SPEC
+   line->span.array->spec[i][RCOMP] = solve_plane_chan(fx, fy, line->srPlane);
+   line->span.array->spec[i][GCOMP] = solve_plane_chan(fx, fy, line->sgPlane);
+   line->span.array->spec[i][BCOMP] = solve_plane_chan(fx, fy, line->sbPlane);
+#endif
+#ifdef DO_TEX
+   {
+      GLfloat invQ;
+      if (ctx->FragmentProgram._Active) {
+         invQ = 1.0F;
+      }
+      else {
+         invQ = solve_plane_recip(fx, fy, line->vPlane[0]);
+      }
+      line->span.array->texcoords[0][i][0] = solve_plane(fx, fy, line->sPlane[0]) * invQ;
+      line->span.array->texcoords[0][i][1] = solve_plane(fx, fy, line->tPlane[0]) * invQ;
+      line->span.array->texcoords[0][i][2] = solve_plane(fx, fy, line->uPlane[0]) * invQ;
+      line->span.array->lambda[0][i] = compute_lambda(line->sPlane[0],
+                                                      line->tPlane[0], invQ,
+                                                      line->texWidth[0],
+                                                      line->texHeight[0]);
+   }
+#elif defined(DO_MULTITEX)
+   {
+      GLuint unit;
+      for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+            GLfloat invQ;
+            if (ctx->FragmentProgram._Active) {
+               invQ = 1.0F;
+            }
+            else {
+               invQ = solve_plane_recip(fx, fy, line->vPlane[unit]);
+            }
+            line->span.array->texcoords[unit][i][0] = solve_plane(fx, fy, line->sPlane[unit]) * invQ;
+            line->span.array->texcoords[unit][i][1] = solve_plane(fx, fy, line->tPlane[unit]) * invQ;
+            line->span.array->texcoords[unit][i][2] = solve_plane(fx, fy, line->uPlane[unit]) * invQ;
+            line->span.array->lambda[unit][i] = compute_lambda(line->sPlane[unit],
+                                                               line->tPlane[unit], invQ,
+                                                               line->texWidth[unit], line->texHeight[unit]);
+         }
+      }
+   }
+#endif
+
+   if (line->span.end == MAX_WIDTH) {
+#if defined(DO_RGBA)
+      _swrast_write_rgba_span(ctx, &(line->span));
+#else
+      _swrast_write_index_span(ctx, &(line->span));
+#endif
+      line->span.end = 0; /* reset counter */
+   }
+}
+
+
+
+/*
+ * Line setup
+ */
+static void
+NAME(line)(GLcontext *ctx, const SWvertex *v0, const SWvertex *v1)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLfloat tStart, tEnd;   /* segment start, end along line length */
+   GLboolean inSegment;
+   GLint iLen, i;
+
+   /* Init the LineInfo struct */
+   struct LineInfo line;
+   line.x0 = v0->win[0];
+   line.y0 = v0->win[1];
+   line.x1 = v1->win[0];
+   line.y1 = v1->win[1];
+   line.dx = line.x1 - line.x0;
+   line.dy = line.y1 - line.y0;
+   line.len = SQRTF(line.dx * line.dx + line.dy * line.dy);
+   line.halfWidth = 0.5F * ctx->Line._Width;
+
+   if (line.len == 0.0 || IS_INF_OR_NAN(line.len))
+      return;
+
+   INIT_SPAN(line.span, GL_LINE, 0, 0, SPAN_XY | SPAN_COVERAGE);
+
+   line.xAdj = line.dx / line.len * line.halfWidth;
+   line.yAdj = line.dy / line.len * line.halfWidth;
+
+#ifdef DO_Z
+   line.span.arrayMask |= SPAN_Z;
+   compute_plane(line.x0, line.y0, line.x1, line.y1,
+                 v0->win[2], v1->win[2], line.zPlane);
+#endif
+#ifdef DO_FOG
+   line.span.arrayMask |= SPAN_FOG;
+   compute_plane(line.x0, line.y0, line.x1, line.y1,
+                 v0->fog, v1->fog, line.fPlane);
+#endif
+#ifdef DO_RGBA
+   line.span.arrayMask |= SPAN_RGBA;
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      compute_plane(line.x0, line.y0, line.x1, line.y1,
+                    v0->color[RCOMP], v1->color[RCOMP], line.rPlane);
+      compute_plane(line.x0, line.y0, line.x1, line.y1,
+                    v0->color[GCOMP], v1->color[GCOMP], line.gPlane);
+      compute_plane(line.x0, line.y0, line.x1, line.y1,
+                    v0->color[BCOMP], v1->color[BCOMP], line.bPlane);
+      compute_plane(line.x0, line.y0, line.x1, line.y1,
+                    v0->color[ACOMP], v1->color[ACOMP], line.aPlane);
+   }
+   else {
+      constant_plane(v1->color[RCOMP], line.rPlane);
+      constant_plane(v1->color[GCOMP], line.gPlane);
+      constant_plane(v1->color[BCOMP], line.bPlane);
+      constant_plane(v1->color[ACOMP], line.aPlane);
+   }
+#endif
+#ifdef DO_SPEC
+   line.span.arrayMask |= SPAN_SPEC;
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      compute_plane(line.x0, line.y0, line.x1, line.y1,
+                    v0->specular[RCOMP], v1->specular[RCOMP], line.srPlane);
+      compute_plane(line.x0, line.y0, line.x1, line.y1,
+                    v0->specular[GCOMP], v1->specular[GCOMP], line.sgPlane);
+      compute_plane(line.x0, line.y0, line.x1, line.y1,
+                    v0->specular[BCOMP], v1->specular[BCOMP], line.sbPlane);
+   }
+   else {
+      constant_plane(v1->specular[RCOMP], line.srPlane);
+      constant_plane(v1->specular[GCOMP], line.sgPlane);
+      constant_plane(v1->specular[BCOMP], line.sbPlane);
+   }
+#endif
+#ifdef DO_INDEX
+   line.span.arrayMask |= SPAN_INDEX;
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      compute_plane(line.x0, line.y0, line.x1, line.y1,
+                    v0->index, v1->index, line.iPlane);
+   }
+   else {
+      constant_plane(v1->index, line.iPlane);
+   }
+#endif
+#ifdef DO_TEX
+   {
+      const struct gl_texture_object *obj = ctx->Texture.Unit[0]._Current;
+      const struct gl_texture_image *texImage = obj->Image[0][obj->BaseLevel];
+      const GLfloat invW0 = v0->win[3];
+      const GLfloat invW1 = v1->win[3];
+      const GLfloat s0 = v0->texcoord[0][0] * invW0;
+      const GLfloat s1 = v1->texcoord[0][0] * invW1;
+      const GLfloat t0 = v0->texcoord[0][1] * invW0;
+      const GLfloat t1 = v1->texcoord[0][1] * invW1;
+      const GLfloat r0 = v0->texcoord[0][2] * invW0;
+      const GLfloat r1 = v1->texcoord[0][2] * invW1;
+      const GLfloat q0 = v0->texcoord[0][3] * invW0;
+      const GLfloat q1 = v1->texcoord[0][3] * invW1;
+      line.span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA);
+      compute_plane(line.x0, line.y0, line.x1, line.y1, s0, s1, line.sPlane[0]);
+      compute_plane(line.x0, line.y0, line.x1, line.y1, t0, t1, line.tPlane[0]);
+      compute_plane(line.x0, line.y0, line.x1, line.y1, r0, r1, line.uPlane[0]);
+      compute_plane(line.x0, line.y0, line.x1, line.y1, q0, q1, line.vPlane[0]);
+      line.texWidth[0] = (GLfloat) texImage->Width;
+      line.texHeight[0] = (GLfloat) texImage->Height;
+   }
+#elif defined(DO_MULTITEX)
+   {
+      GLuint u;
+      line.span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA);
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         if (ctx->Texture.Unit[u]._ReallyEnabled) {
+            const struct gl_texture_object *obj = ctx->Texture.Unit[u]._Current;
+            const struct gl_texture_image *texImage = obj->Image[0][obj->BaseLevel];
+            const GLfloat invW0 = v0->win[3];
+            const GLfloat invW1 = v1->win[3];
+            const GLfloat s0 = v0->texcoord[u][0] * invW0;
+            const GLfloat s1 = v1->texcoord[u][0] * invW1;
+            const GLfloat t0 = v0->texcoord[u][1] * invW0;
+            const GLfloat t1 = v1->texcoord[u][1] * invW1;
+            const GLfloat r0 = v0->texcoord[u][2] * invW0;
+            const GLfloat r1 = v1->texcoord[u][2] * invW1;
+            const GLfloat q0 = v0->texcoord[u][3] * invW0;
+            const GLfloat q1 = v1->texcoord[u][3] * invW1;
+            compute_plane(line.x0, line.y0, line.x1, line.y1, s0, s1, line.sPlane[u]);
+            compute_plane(line.x0, line.y0, line.x1, line.y1, t0, t1, line.tPlane[u]);
+            compute_plane(line.x0, line.y0, line.x1, line.y1, r0, r1, line.uPlane[u]);
+            compute_plane(line.x0, line.y0, line.x1, line.y1, q0, q1, line.vPlane[u]);
+            line.texWidth[u]  = (GLfloat) texImage->Width;
+            line.texHeight[u] = (GLfloat) texImage->Height;
+         }
+      }
+   }
+#endif
+
+   tStart = tEnd = 0.0;
+   inSegment = GL_FALSE;
+   iLen = (GLint) line.len;
+
+   if (ctx->Line.StippleFlag) {
+      for (i = 0; i < iLen; i++) {
+         const GLuint bit = (swrast->StippleCounter / ctx->Line.StippleFactor) & 0xf;
+         if ((1 << bit) & ctx->Line.StipplePattern) {
+            /* stipple bit is on */
+            const GLfloat t = (GLfloat) i / (GLfloat) line.len;
+            if (!inSegment) {
+               /* start new segment */
+               inSegment = GL_TRUE;
+               tStart = t;
+            }
+            else {
+               /* still in the segment, extend it */
+               tEnd = t;
+            }
+         }
+         else {
+            /* stipple bit is off */
+            if (inSegment && (tEnd > tStart)) {
+               /* draw the segment */
+               segment(ctx, &line, NAME(plot), tStart, tEnd);
+               inSegment = GL_FALSE;
+            }
+            else {
+               /* still between segments, do nothing */
+            }
+         }
+         swrast->StippleCounter++;
+      }
+
+      if (inSegment) {
+         /* draw the final segment of the line */
+         segment(ctx, &line, NAME(plot), tStart, 1.0F);
+      }
+   }
+   else {
+      /* non-stippled */
+      segment(ctx, &line, NAME(plot), 0.0, 1.0);
+   }
+
+#if defined(DO_RGBA)
+   _swrast_write_rgba_span(ctx, &(line.span));
+#else
+   _swrast_write_index_span(ctx, &(line.span));
+#endif
+}
+
+
+
+
+#undef DO_Z
+#undef DO_FOG
+#undef DO_RGBA
+#undef DO_INDEX
+#undef DO_SPEC
+#undef DO_TEX
+#undef DO_MULTITEX
+#undef NAME
diff --git a/dist/Mesa/src/mesa/swrast/s_aatriangle.c b/dist/Mesa/src/mesa/swrast/s_aatriangle.c
new file mode 100644
index 000000000..63a13cf3f
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_aatriangle.c
@@ -0,0 +1,494 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Antialiased Triangle rasterizers
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "context.h"
+#include "macros.h"
+#include "imports.h"
+#include "s_aatriangle.h"
+#include "s_context.h"
+#include "s_span.h"
+
+
+/*
+ * Compute coefficients of a plane using the X,Y coords of the v0, v1, v2
+ * vertices and the given Z values.
+ * A point (x,y,z) lies on plane iff a*x+b*y+c*z+d = 0.
+ */
+static INLINE void
+compute_plane(const GLfloat v0[], const GLfloat v1[], const GLfloat v2[],
+              GLfloat z0, GLfloat z1, GLfloat z2, GLfloat plane[4])
+{
+   const GLfloat px = v1[0] - v0[0];
+   const GLfloat py = v1[1] - v0[1];
+   const GLfloat pz = z1 - z0;
+
+   const GLfloat qx = v2[0] - v0[0];
+   const GLfloat qy = v2[1] - v0[1];
+   const GLfloat qz = z2 - z0;
+
+   /* Crossproduct "(a,b,c):= dv1 x dv2" is orthogonal to plane. */
+   const GLfloat a = py * qz - pz * qy;
+   const GLfloat b = pz * qx - px * qz;
+   const GLfloat c = px * qy - py * qx;
+   /* Point on the plane = "r*(a,b,c) + w", with fixed "r" depending
+      on the distance of plane from origin and arbitrary "w" parallel
+      to the plane. */
+   /* The scalar product "(r*(a,b,c)+w)*(a,b,c)" is "r*(a^2+b^2+c^2)",
+      which is equal to "-d" below. */
+   const GLfloat d = -(a * v0[0] + b * v0[1] + c * z0);
+
+   plane[0] = a;
+   plane[1] = b;
+   plane[2] = c;
+   plane[3] = d;
+}
+
+
+/*
+ * Compute coefficients of a plane with a constant Z value.
+ */
+static INLINE void
+constant_plane(GLfloat value, GLfloat plane[4])
+{
+   plane[0] = 0.0;
+   plane[1] = 0.0;
+   plane[2] = -1.0;
+   plane[3] = value;
+}
+
+#define CONSTANT_PLANE(VALUE, PLANE)	\
+do {					\
+   PLANE[0] = 0.0F;			\
+   PLANE[1] = 0.0F;			\
+   PLANE[2] = -1.0F;			\
+   PLANE[3] = VALUE;			\
+} while (0)
+
+
+
+/*
+ * Solve plane equation for Z at (X,Y).
+ */
+static INLINE GLfloat
+solve_plane(GLfloat x, GLfloat y, const GLfloat plane[4])
+{
+   ASSERT(plane[2] != 0.0F);
+   return (plane[3] + plane[0] * x + plane[1] * y) / -plane[2];
+}
+
+
+#define SOLVE_PLANE(X, Y, PLANE) \
+   ((PLANE[3] + PLANE[0] * (X) + PLANE[1] * (Y)) / -PLANE[2])
+
+
+/*
+ * Return 1 / solve_plane().
+ */
+static INLINE GLfloat
+solve_plane_recip(GLfloat x, GLfloat y, const GLfloat plane[4])
+{
+   const GLfloat denom = plane[3] + plane[0] * x + plane[1] * y;
+   if (denom == 0.0F)
+      return 0.0F;
+   else
+      return -plane[2] / denom;
+}
+
+
+/*
+ * Solve plane and return clamped GLchan value.
+ */
+static INLINE GLchan
+solve_plane_chan(GLfloat x, GLfloat y, const GLfloat plane[4])
+{
+   const GLfloat z = (plane[3] + plane[0] * x + plane[1] * y) / -plane[2];
+#if CHAN_TYPE == GL_FLOAT
+   return CLAMP(z, 0.0F, CHAN_MAXF);
+#else
+   if (z < 0)
+      return 0;
+   else if (z > CHAN_MAX)
+      return CHAN_MAX;
+   return (GLchan) IROUND_POS(z);
+#endif
+}
+
+
+
+/*
+ * Compute how much (area) of the given pixel is inside the triangle.
+ * Vertices MUST be specified in counter-clockwise order.
+ * Return:  coverage in [0, 1].
+ */
+static GLfloat
+compute_coveragef(const GLfloat v0[3], const GLfloat v1[3],
+                  const GLfloat v2[3], GLint winx, GLint winy)
+{
+   /* Given a position [0,3]x[0,3] return the sub-pixel sample position.
+    * Contributed by Ray Tice.
+    *
+    * Jitter sample positions -
+    * - average should be .5 in x & y for each column
+    * - each of the 16 rows and columns should be used once
+    * - the rectangle formed by the first four points
+    *   should contain the other points
+    * - the distrubition should be fairly even in any given direction
+    *
+    * The pattern drawn below isn't optimal, but it's better than a regular
+    * grid.  In the drawing, the center of each subpixel is surrounded by
+    * four dots.  The "x" marks the jittered position relative to the
+    * subpixel center.
+    */
+#define POS(a, b) (0.5+a*4+b)/16
+   static const GLfloat samples[16][2] = {
+      /* start with the four corners */
+      { POS(0, 2), POS(0, 0) },
+      { POS(3, 3), POS(0, 2) },
+      { POS(0, 0), POS(3, 1) },
+      { POS(3, 1), POS(3, 3) },
+      /* continue with interior samples */
+      { POS(1, 1), POS(0, 1) },
+      { POS(2, 0), POS(0, 3) },
+      { POS(0, 3), POS(1, 3) },
+      { POS(1, 2), POS(1, 0) },
+      { POS(2, 3), POS(1, 2) },
+      { POS(3, 2), POS(1, 1) },
+      { POS(0, 1), POS(2, 2) },
+      { POS(1, 0), POS(2, 1) },
+      { POS(2, 1), POS(2, 3) },
+      { POS(3, 0), POS(2, 0) },
+      { POS(1, 3), POS(3, 0) },
+      { POS(2, 2), POS(3, 2) }
+   };
+
+   const GLfloat x = (GLfloat) winx;
+   const GLfloat y = (GLfloat) winy;
+   const GLfloat dx0 = v1[0] - v0[0];
+   const GLfloat dy0 = v1[1] - v0[1];
+   const GLfloat dx1 = v2[0] - v1[0];
+   const GLfloat dy1 = v2[1] - v1[1];
+   const GLfloat dx2 = v0[0] - v2[0];
+   const GLfloat dy2 = v0[1] - v2[1];
+   GLint stop = 4, i;
+   GLfloat insideCount = 16.0F;
+
+#ifdef DEBUG
+   {
+      const GLfloat area = dx0 * dy1 - dx1 * dy0;
+      ASSERT(area >= 0.0);
+   }
+#endif
+
+   for (i = 0; i < stop; i++) {
+      const GLfloat sx = x + samples[i][0];
+      const GLfloat sy = y + samples[i][1];
+      /* cross product determines if sample is inside or outside each edge */
+      GLfloat cross = (dx0 * (sy - v0[1]) - dy0 * (sx - v0[0]));
+      /* Check if the sample is exactly on an edge.  If so, let cross be a
+       * positive or negative value depending on the direction of the edge.
+       */
+      if (cross == 0.0F)
+         cross = dx0 + dy0;
+      if (cross < 0.0F) {
+         /* sample point is outside first edge */
+         insideCount -= 1.0F;
+         stop = 16;
+      }
+      else {
+         /* sample point is inside first edge */
+         cross = (dx1 * (sy - v1[1]) - dy1 * (sx - v1[0]));
+         if (cross == 0.0F)
+            cross = dx1 + dy1;
+         if (cross < 0.0F) {
+            /* sample point is outside second edge */
+            insideCount -= 1.0F;
+            stop = 16;
+         }
+         else {
+            /* sample point is inside first and second edges */
+            cross = (dx2 * (sy - v2[1]) -  dy2 * (sx - v2[0]));
+            if (cross == 0.0F)
+               cross = dx2 + dy2;
+            if (cross < 0.0F) {
+               /* sample point is outside third edge */
+               insideCount -= 1.0F;
+               stop = 16;
+            }
+         }
+      }
+   }
+   if (stop == 4)
+      return 1.0F;
+   else
+      return insideCount * (1.0F / 16.0F);
+}
+
+
+
+/*
+ * Compute how much (area) of the given pixel is inside the triangle.
+ * Vertices MUST be specified in counter-clockwise order.
+ * Return:  coverage in [0, 15].
+ */
+static GLint
+compute_coveragei(const GLfloat v0[3], const GLfloat v1[3],
+                  const GLfloat v2[3], GLint winx, GLint winy)
+{
+   /* NOTE: 15 samples instead of 16. */
+   static const GLfloat samples[15][2] = {
+      /* start with the four corners */
+      { POS(0, 2), POS(0, 0) },
+      { POS(3, 3), POS(0, 2) },
+      { POS(0, 0), POS(3, 1) },
+      { POS(3, 1), POS(3, 3) },
+      /* continue with interior samples */
+      { POS(1, 1), POS(0, 1) },
+      { POS(2, 0), POS(0, 3) },
+      { POS(0, 3), POS(1, 3) },
+      { POS(1, 2), POS(1, 0) },
+      { POS(2, 3), POS(1, 2) },
+      { POS(3, 2), POS(1, 1) },
+      { POS(0, 1), POS(2, 2) },
+      { POS(1, 0), POS(2, 1) },
+      { POS(2, 1), POS(2, 3) },
+      { POS(3, 0), POS(2, 0) },
+      { POS(1, 3), POS(3, 0) }
+   };
+   const GLfloat x = (GLfloat) winx;
+   const GLfloat y = (GLfloat) winy;
+   const GLfloat dx0 = v1[0] - v0[0];
+   const GLfloat dy0 = v1[1] - v0[1];
+   const GLfloat dx1 = v2[0] - v1[0];
+   const GLfloat dy1 = v2[1] - v1[1];
+   const GLfloat dx2 = v0[0] - v2[0];
+   const GLfloat dy2 = v0[1] - v2[1];
+   GLint stop = 4, i;
+   GLint insideCount = 15;
+
+#ifdef DEBUG
+   {
+      const GLfloat area = dx0 * dy1 - dx1 * dy0;
+      ASSERT(area >= 0.0);
+   }
+#endif
+
+   for (i = 0; i < stop; i++) {
+      const GLfloat sx = x + samples[i][0];
+      const GLfloat sy = y + samples[i][1];
+      const GLfloat fx0 = sx - v0[0];
+      const GLfloat fy0 = sy - v0[1];
+      const GLfloat fx1 = sx - v1[0];
+      const GLfloat fy1 = sy - v1[1];
+      const GLfloat fx2 = sx - v2[0];
+      const GLfloat fy2 = sy - v2[1];
+      /* cross product determines if sample is inside or outside each edge */
+      GLfloat cross0 = (dx0 * fy0 - dy0 * fx0);
+      GLfloat cross1 = (dx1 * fy1 - dy1 * fx1);
+      GLfloat cross2 = (dx2 * fy2 - dy2 * fx2);
+      /* Check if the sample is exactly on an edge.  If so, let cross be a
+       * positive or negative value depending on the direction of the edge.
+       */
+      if (cross0 == 0.0F)
+         cross0 = dx0 + dy0;
+      if (cross1 == 0.0F)
+         cross1 = dx1 + dy1;
+      if (cross2 == 0.0F)
+         cross2 = dx2 + dy2;
+      if (cross0 < 0.0F || cross1 < 0.0F || cross2 < 0.0F) {
+         /* point is outside triangle */
+         insideCount--;
+         stop = 15;
+      }
+   }
+   if (stop == 4)
+      return 15;
+   else
+      return insideCount;
+}
+
+
+
+static void
+rgba_aa_tri(GLcontext *ctx,
+	    const SWvertex *v0,
+	    const SWvertex *v1,
+	    const SWvertex *v2)
+{
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#include "s_aatritemp.h"
+}
+
+
+static void
+index_aa_tri(GLcontext *ctx,
+	     const SWvertex *v0,
+	     const SWvertex *v1,
+	     const SWvertex *v2)
+{
+#define DO_Z
+#define DO_FOG
+#define DO_INDEX
+#include "s_aatritemp.h"
+}
+
+
+/*
+ * Compute mipmap level of detail.
+ * XXX we should really include the R coordinate in this computation
+ * in order to do 3-D texture mipmapping.
+ */
+static INLINE GLfloat
+compute_lambda(const GLfloat sPlane[4], const GLfloat tPlane[4],
+               const GLfloat qPlane[4], GLfloat cx, GLfloat cy,
+               GLfloat invQ, GLfloat texWidth, GLfloat texHeight)
+{
+   const GLfloat s = solve_plane(cx, cy, sPlane);
+   const GLfloat t = solve_plane(cx, cy, tPlane);
+   const GLfloat invQ_x1 = solve_plane_recip(cx+1.0F, cy, qPlane);
+   const GLfloat invQ_y1 = solve_plane_recip(cx, cy+1.0F, qPlane);
+   const GLfloat s_x1 = s - sPlane[0] / sPlane[2];
+   const GLfloat s_y1 = s - sPlane[1] / sPlane[2];
+   const GLfloat t_x1 = t - tPlane[0] / tPlane[2];
+   const GLfloat t_y1 = t - tPlane[1] / tPlane[2];
+   GLfloat dsdx = s_x1 * invQ_x1 - s * invQ;
+   GLfloat dsdy = s_y1 * invQ_y1 - s * invQ;
+   GLfloat dtdx = t_x1 * invQ_x1 - t * invQ;
+   GLfloat dtdy = t_y1 * invQ_y1 - t * invQ;
+   GLfloat maxU, maxV, rho, lambda;
+   dsdx = FABSF(dsdx);
+   dsdy = FABSF(dsdy);
+   dtdx = FABSF(dtdx);
+   dtdy = FABSF(dtdy);
+   maxU = MAX2(dsdx, dsdy) * texWidth;
+   maxV = MAX2(dtdx, dtdy) * texHeight;
+   rho = MAX2(maxU, maxV);
+   lambda = LOG2(rho);
+   return lambda;
+}
+
+
+static void
+tex_aa_tri(GLcontext *ctx,
+	   const SWvertex *v0,
+	   const SWvertex *v1,
+	   const SWvertex *v2)
+{
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#define DO_TEX
+#include "s_aatritemp.h"
+}
+
+
+static void
+spec_tex_aa_tri(GLcontext *ctx,
+		const SWvertex *v0,
+		const SWvertex *v1,
+		const SWvertex *v2)
+{
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#define DO_TEX
+#define DO_SPEC
+#include "s_aatritemp.h"
+}
+
+
+static void
+multitex_aa_tri(GLcontext *ctx,
+		const SWvertex *v0,
+		const SWvertex *v1,
+		const SWvertex *v2)
+{
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#define DO_MULTITEX
+#include "s_aatritemp.h"
+}
+
+static void
+spec_multitex_aa_tri(GLcontext *ctx,
+		     const SWvertex *v0,
+		     const SWvertex *v1,
+		     const SWvertex *v2)
+{
+#define DO_Z
+#define DO_FOG
+#define DO_RGBA
+#define DO_MULTITEX
+#define DO_SPEC
+#include "s_aatritemp.h"
+}
+
+
+/*
+ * Examine GL state and set swrast->Triangle to an
+ * appropriate antialiased triangle rasterizer function.
+ */
+void
+_swrast_set_aa_triangle_function(GLcontext *ctx)
+{
+   ASSERT(ctx->Polygon.SmoothFlag);
+
+   if (ctx->Texture._EnabledCoordUnits != 0) {
+      if (NEED_SECONDARY_COLOR(ctx)) {
+         if (ctx->Texture._EnabledCoordUnits > 1) {
+            SWRAST_CONTEXT(ctx)->Triangle = spec_multitex_aa_tri;
+         }
+         else {
+            SWRAST_CONTEXT(ctx)->Triangle = spec_tex_aa_tri;
+         }
+      }
+      else {
+         if (ctx->Texture._EnabledCoordUnits > 1) {
+            SWRAST_CONTEXT(ctx)->Triangle = multitex_aa_tri;
+         }
+         else {
+            SWRAST_CONTEXT(ctx)->Triangle = tex_aa_tri;
+         }
+      }
+   }
+   else if (ctx->Visual.rgbMode) {
+      SWRAST_CONTEXT(ctx)->Triangle = rgba_aa_tri;
+   }
+   else {
+      SWRAST_CONTEXT(ctx)->Triangle = index_aa_tri;
+   }
+
+   ASSERT(SWRAST_CONTEXT(ctx)->Triangle);
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_aatriangle.h b/dist/Mesa/src/mesa/swrast/s_aatriangle.h
new file mode 100644
index 000000000..ebb828eb1
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_aatriangle.h
@@ -0,0 +1,39 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_AATRIANGLE_H
+#define S_AATRIANGLE_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern void
+_swrast_set_aa_triangle_function(GLcontext *ctx);
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_aatritemp.h b/dist/Mesa/src/mesa/swrast/s_aatritemp.h
new file mode 100644
index 000000000..23c262f83
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_aatritemp.h
@@ -0,0 +1,545 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Antialiased Triangle Rasterizer Template
+ *
+ * This file is #include'd to generate custom AA triangle rasterizers.
+ * NOTE: this code hasn't been optimized yet.  That'll come after it
+ * works correctly.
+ *
+ * The following macros may be defined to indicate what auxillary information
+ * must be copmuted across the triangle:
+ *    DO_Z         - if defined, compute Z values
+ *    DO_RGBA      - if defined, compute RGBA values
+ *    DO_INDEX     - if defined, compute color index values
+ *    DO_SPEC      - if defined, compute specular RGB values
+ *    DO_TEX       - if defined, compute unit 0 STRQ texcoords
+ *    DO_MULTITEX  - if defined, compute all unit's STRQ texcoords
+ */
+
+/*void triangle( GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint pv )*/
+{
+   const GLfloat *p0 = v0->win;
+   const GLfloat *p1 = v1->win;
+   const GLfloat *p2 = v2->win;
+   const SWvertex *vMin, *vMid, *vMax;
+   GLint iyMin, iyMax;
+   GLfloat yMin, yMax;
+   GLboolean ltor;
+   GLfloat majDx, majDy;  /* major (i.e. long) edge dx and dy */
+   
+   struct sw_span span;
+   
+#ifdef DO_Z
+   GLfloat zPlane[4];
+#endif
+#ifdef DO_FOG
+   GLfloat fogPlane[4];
+#else
+   GLfloat *fog = NULL;
+#endif
+#ifdef DO_RGBA
+   GLfloat rPlane[4], gPlane[4], bPlane[4], aPlane[4];
+#endif
+#ifdef DO_INDEX
+   GLfloat iPlane[4];
+#endif
+#ifdef DO_SPEC
+   GLfloat srPlane[4], sgPlane[4], sbPlane[4];
+#endif
+#ifdef DO_TEX
+   GLfloat sPlane[4], tPlane[4], uPlane[4], vPlane[4];
+   GLfloat texWidth, texHeight;
+#elif defined(DO_MULTITEX)
+   GLfloat sPlane[MAX_TEXTURE_COORD_UNITS][4];  /* texture S */
+   GLfloat tPlane[MAX_TEXTURE_COORD_UNITS][4];  /* texture T */
+   GLfloat uPlane[MAX_TEXTURE_COORD_UNITS][4];  /* texture R */
+   GLfloat vPlane[MAX_TEXTURE_COORD_UNITS][4];  /* texture Q */
+   GLfloat texWidth[MAX_TEXTURE_COORD_UNITS];
+   GLfloat texHeight[MAX_TEXTURE_COORD_UNITS];
+#endif
+   GLfloat bf = SWRAST_CONTEXT(ctx)->_BackfaceSign;
+   
+   
+   INIT_SPAN(span, GL_POLYGON, 0, 0, SPAN_COVERAGE);
+
+   /* determine bottom to top order of vertices */
+   {
+      GLfloat y0 = v0->win[1];
+      GLfloat y1 = v1->win[1];
+      GLfloat y2 = v2->win[1];
+      if (y0 <= y1) {
+	 if (y1 <= y2) {
+	    vMin = v0;   vMid = v1;   vMax = v2;   /* y0<=y1<=y2 */
+	 }
+	 else if (y2 <= y0) {
+	    vMin = v2;   vMid = v0;   vMax = v1;   /* y2<=y0<=y1 */
+	 }
+	 else {
+	    vMin = v0;   vMid = v2;   vMax = v1;  bf = -bf; /* y0<=y2<=y1 */
+	 }
+      }
+      else {
+	 if (y0 <= y2) {
+	    vMin = v1;   vMid = v0;   vMax = v2;  bf = -bf; /* y1<=y0<=y2 */
+	 }
+	 else if (y2 <= y1) {
+	    vMin = v2;   vMid = v1;   vMax = v0;  bf = -bf; /* y2<=y1<=y0 */
+	 }
+	 else {
+	    vMin = v1;   vMid = v2;   vMax = v0;   /* y1<=y2<=y0 */
+	 }
+      }
+   }
+
+   majDx = vMax->win[0] - vMin->win[0];
+   majDy = vMax->win[1] - vMin->win[1];
+
+   {
+      const GLfloat botDx = vMid->win[0] - vMin->win[0];
+      const GLfloat botDy = vMid->win[1] - vMin->win[1];
+      const GLfloat area = majDx * botDy - botDx * majDy;
+      /* Do backface culling */
+      if (area * bf < 0 || area == 0 || IS_INF_OR_NAN(area))
+	 return;
+      ltor = (GLboolean) (area < 0.0F);
+   }
+
+   /* Plane equation setup:
+    * We evaluate plane equations at window (x,y) coordinates in order
+    * to compute color, Z, fog, texcoords, etc.  This isn't terribly
+    * efficient but it's easy and reliable.
+    */
+#ifdef DO_Z
+   compute_plane(p0, p1, p2, p0[2], p1[2], p2[2], zPlane);
+   span.arrayMask |= SPAN_Z;
+#endif
+#ifdef DO_FOG
+   compute_plane(p0, p1, p2, v0->fog, v1->fog, v2->fog, fogPlane);
+   span.arrayMask |= SPAN_FOG;
+#endif
+#ifdef DO_RGBA
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      compute_plane(p0, p1, p2, v0->color[RCOMP], v1->color[RCOMP], v2->color[RCOMP], rPlane);
+      compute_plane(p0, p1, p2, v0->color[GCOMP], v1->color[GCOMP], v2->color[GCOMP], gPlane);
+      compute_plane(p0, p1, p2, v0->color[BCOMP], v1->color[BCOMP], v2->color[BCOMP], bPlane);
+      compute_plane(p0, p1, p2, v0->color[ACOMP], v1->color[ACOMP], v2->color[ACOMP], aPlane);
+   }
+   else {
+      constant_plane(v2->color[RCOMP], rPlane);
+      constant_plane(v2->color[GCOMP], gPlane);
+      constant_plane(v2->color[BCOMP], bPlane);
+      constant_plane(v2->color[ACOMP], aPlane);
+   }
+   span.arrayMask |= SPAN_RGBA;
+#endif
+#ifdef DO_INDEX
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      compute_plane(p0, p1, p2, (GLfloat) v0->index,
+                    v1->index, v2->index, iPlane);
+   }
+   else {
+      constant_plane(v2->index, iPlane);
+   }
+   span.arrayMask |= SPAN_INDEX;
+#endif
+#ifdef DO_SPEC
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      compute_plane(p0, p1, p2, v0->specular[RCOMP], v1->specular[RCOMP], v2->specular[RCOMP], srPlane);
+      compute_plane(p0, p1, p2, v0->specular[GCOMP], v1->specular[GCOMP], v2->specular[GCOMP], sgPlane);
+      compute_plane(p0, p1, p2, v0->specular[BCOMP], v1->specular[BCOMP], v2->specular[BCOMP], sbPlane);
+   }
+   else {
+      constant_plane(v2->specular[RCOMP], srPlane);
+      constant_plane(v2->specular[GCOMP], sgPlane);
+      constant_plane(v2->specular[BCOMP], sbPlane);
+   }
+   span.arrayMask |= SPAN_SPEC;
+#endif
+#ifdef DO_TEX
+   {
+      const struct gl_texture_object *obj = ctx->Texture.Unit[0]._Current;
+      const struct gl_texture_image *texImage = obj->Image[0][obj->BaseLevel];
+      const GLfloat invW0 = v0->win[3];
+      const GLfloat invW1 = v1->win[3];
+      const GLfloat invW2 = v2->win[3];
+      const GLfloat s0 = v0->texcoord[0][0] * invW0;
+      const GLfloat s1 = v1->texcoord[0][0] * invW1;
+      const GLfloat s2 = v2->texcoord[0][0] * invW2;
+      const GLfloat t0 = v0->texcoord[0][1] * invW0;
+      const GLfloat t1 = v1->texcoord[0][1] * invW1;
+      const GLfloat t2 = v2->texcoord[0][1] * invW2;
+      const GLfloat r0 = v0->texcoord[0][2] * invW0;
+      const GLfloat r1 = v1->texcoord[0][2] * invW1;
+      const GLfloat r2 = v2->texcoord[0][2] * invW2;
+      const GLfloat q0 = v0->texcoord[0][3] * invW0;
+      const GLfloat q1 = v1->texcoord[0][3] * invW1;
+      const GLfloat q2 = v2->texcoord[0][3] * invW2;
+      compute_plane(p0, p1, p2, s0, s1, s2, sPlane);
+      compute_plane(p0, p1, p2, t0, t1, t2, tPlane);
+      compute_plane(p0, p1, p2, r0, r1, r2, uPlane);
+      compute_plane(p0, p1, p2, q0, q1, q2, vPlane);
+      texWidth = (GLfloat) texImage->Width;
+      texHeight = (GLfloat) texImage->Height;
+   }
+   span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA);
+#elif defined(DO_MULTITEX)
+   {
+      GLuint u;
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         if (ctx->Texture.Unit[u]._ReallyEnabled) {
+            const struct gl_texture_object *obj = ctx->Texture.Unit[u]._Current;
+            const struct gl_texture_image *texImage = obj->Image[0][obj->BaseLevel];
+            const GLfloat invW0 = v0->win[3];
+            const GLfloat invW1 = v1->win[3];
+            const GLfloat invW2 = v2->win[3];
+            const GLfloat s0 = v0->texcoord[u][0] * invW0;
+            const GLfloat s1 = v1->texcoord[u][0] * invW1;
+            const GLfloat s2 = v2->texcoord[u][0] * invW2;
+            const GLfloat t0 = v0->texcoord[u][1] * invW0;
+            const GLfloat t1 = v1->texcoord[u][1] * invW1;
+            const GLfloat t2 = v2->texcoord[u][1] * invW2;
+            const GLfloat r0 = v0->texcoord[u][2] * invW0;
+            const GLfloat r1 = v1->texcoord[u][2] * invW1;
+            const GLfloat r2 = v2->texcoord[u][2] * invW2;
+            const GLfloat q0 = v0->texcoord[u][3] * invW0;
+            const GLfloat q1 = v1->texcoord[u][3] * invW1;
+            const GLfloat q2 = v2->texcoord[u][3] * invW2;
+            compute_plane(p0, p1, p2, s0, s1, s2, sPlane[u]);
+            compute_plane(p0, p1, p2, t0, t1, t2, tPlane[u]);
+            compute_plane(p0, p1, p2, r0, r1, r2, uPlane[u]);
+            compute_plane(p0, p1, p2, q0, q1, q2, vPlane[u]);
+            texWidth[u]  = (GLfloat) texImage->Width;
+            texHeight[u] = (GLfloat) texImage->Height;
+         }
+      }
+   }
+   span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA);
+#endif
+
+   /* Begin bottom-to-top scan over the triangle.
+    * The long edge will either be on the left or right side of the
+    * triangle.  We always scan from the long edge toward the shorter
+    * edges, stopping when we find that coverage = 0.  If the long edge
+    * is on the left we scan left-to-right.  Else, we scan right-to-left.
+    */
+   yMin = vMin->win[1];
+   yMax = vMax->win[1];
+   iyMin = (GLint) yMin;
+   iyMax = (GLint) yMax + 1;
+
+   if (ltor) {
+      /* scan left to right */
+      const GLfloat *pMin = vMin->win;
+      const GLfloat *pMid = vMid->win;
+      const GLfloat *pMax = vMax->win;
+      const GLfloat dxdy = majDx / majDy;
+      const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F;
+      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
+      GLint iy;
+      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+         GLint ix, startX = (GLint) (x - xAdj);
+         GLuint count;
+         GLfloat coverage = 0.0F;
+
+         /* skip over fragments with zero coverage */
+         while (startX < MAX_WIDTH) {
+            coverage = compute_coveragef(pMin, pMid, pMax, startX, iy);
+            if (coverage > 0.0F)
+               break;
+            startX++;
+         }
+
+         /* enter interior of triangle */
+         ix = startX;
+         count = 0;
+         while (coverage > 0.0F) {
+            /* (cx,cy) = center of fragment */
+            const GLfloat cx = ix + 0.5F, cy = iy + 0.5F;
+            struct span_arrays *array = span.array;
+#ifdef DO_INDEX
+            array->coverage[count] = (GLfloat) compute_coveragei(pMin, pMid, pMax, ix, iy);
+#else
+            array->coverage[count] = coverage;
+#endif
+#ifdef DO_Z
+            array->z[count] = (GLuint) solve_plane(cx, cy, zPlane);
+#endif
+#ifdef DO_FOG
+	    array->fog[count] = solve_plane(cx, cy, fogPlane);
+#endif
+#ifdef DO_RGBA
+            array->rgba[count][RCOMP] = solve_plane_chan(cx, cy, rPlane);
+            array->rgba[count][GCOMP] = solve_plane_chan(cx, cy, gPlane);
+            array->rgba[count][BCOMP] = solve_plane_chan(cx, cy, bPlane);
+            array->rgba[count][ACOMP] = solve_plane_chan(cx, cy, aPlane);
+#endif
+#ifdef DO_INDEX
+            array->index[count] = (GLint) solve_plane(cx, cy, iPlane);
+#endif
+#ifdef DO_SPEC
+            array->spec[count][RCOMP] = solve_plane_chan(cx, cy, srPlane);
+            array->spec[count][GCOMP] = solve_plane_chan(cx, cy, sgPlane);
+            array->spec[count][BCOMP] = solve_plane_chan(cx, cy, sbPlane);
+#endif
+#ifdef DO_TEX
+            {
+               const GLfloat invQ = solve_plane_recip(cx, cy, vPlane);
+               array->texcoords[0][count][0] = solve_plane(cx, cy, sPlane) * invQ;
+               array->texcoords[0][count][1] = solve_plane(cx, cy, tPlane) * invQ;
+               array->texcoords[0][count][2] = solve_plane(cx, cy, uPlane) * invQ;
+               array->lambda[0][count] = compute_lambda(sPlane, tPlane, vPlane,
+                                                      cx, cy, invQ,
+                                                      texWidth, texHeight);
+            }
+#elif defined(DO_MULTITEX)
+            {
+               GLuint unit;
+               for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+                  if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+                     GLfloat invQ = solve_plane_recip(cx, cy, vPlane[unit]);
+                     array->texcoords[unit][count][0] = solve_plane(cx, cy, sPlane[unit]) * invQ;
+                     array->texcoords[unit][count][1] = solve_plane(cx, cy, tPlane[unit]) * invQ;
+                     array->texcoords[unit][count][2] = solve_plane(cx, cy, uPlane[unit]) * invQ;
+                     array->lambda[unit][count] = compute_lambda(sPlane[unit],
+                                      tPlane[unit], vPlane[unit], cx, cy, invQ,
+                                      texWidth[unit], texHeight[unit]);
+                  }
+               }
+            }
+#endif
+            ix++;
+            count++;
+            coverage = compute_coveragef(pMin, pMid, pMax, ix, iy);
+         }
+         
+         if (ix <= startX)
+            continue;
+         
+         span.x = startX;
+         span.y = iy;
+         span.end = (GLuint) ix - (GLuint) startX;
+         ASSERT(span.interpMask == 0);
+#if defined(DO_RGBA)
+         _swrast_write_rgba_span(ctx, &span);
+#else
+         _swrast_write_index_span(ctx, &span);
+#endif
+      }
+   }
+   else {
+      /* scan right to left */
+      const GLfloat *pMin = vMin->win;
+      const GLfloat *pMid = vMid->win;
+      const GLfloat *pMax = vMax->win;
+      const GLfloat dxdy = majDx / majDy;
+      const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F;
+      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
+      GLint iy;
+      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+         GLint ix, left, startX = (GLint) (x + xAdj);
+         GLuint count, n;
+         GLfloat coverage = 0.0F;
+         
+         /* make sure we're not past the window edge */
+         if (startX >= ctx->DrawBuffer->_Xmax) {
+            startX = ctx->DrawBuffer->_Xmax - 1;
+         }
+
+         /* skip fragments with zero coverage */
+         while (startX >= 0) {
+            coverage = compute_coveragef(pMin, pMax, pMid, startX, iy);
+            if (coverage > 0.0F)
+               break;
+            startX--;
+         }
+         
+         /* enter interior of triangle */
+         ix = startX;
+         count = 0;
+         while (coverage > 0.0F) {
+            /* (cx,cy) = center of fragment */
+            const GLfloat cx = ix + 0.5F, cy = iy + 0.5F;
+            struct span_arrays *array = span.array;
+#ifdef DO_INDEX
+            array->coverage[ix] = (GLfloat) compute_coveragei(pMin, pMax, pMid, ix, iy);
+#else
+            array->coverage[ix] = coverage;
+#endif
+#ifdef DO_Z
+            array->z[ix] = (GLuint) solve_plane(cx, cy, zPlane);
+#endif
+#ifdef DO_FOG
+            array->fog[ix] = solve_plane(cx, cy, fogPlane);
+#endif
+#ifdef DO_RGBA
+            array->rgba[ix][RCOMP] = solve_plane_chan(cx, cy, rPlane);
+            array->rgba[ix][GCOMP] = solve_plane_chan(cx, cy, gPlane);
+            array->rgba[ix][BCOMP] = solve_plane_chan(cx, cy, bPlane);
+            array->rgba[ix][ACOMP] = solve_plane_chan(cx, cy, aPlane);
+#endif
+#ifdef DO_INDEX
+            array->index[ix] = (GLint) solve_plane(cx, cy, iPlane);
+#endif
+#ifdef DO_SPEC
+            array->spec[ix][RCOMP] = solve_plane_chan(cx, cy, srPlane);
+            array->spec[ix][GCOMP] = solve_plane_chan(cx, cy, sgPlane);
+            array->spec[ix][BCOMP] = solve_plane_chan(cx, cy, sbPlane);
+#endif
+#ifdef DO_TEX
+            {
+               const GLfloat invQ = solve_plane_recip(cx, cy, vPlane);
+               array->texcoords[0][ix][0] = solve_plane(cx, cy, sPlane) * invQ;
+               array->texcoords[0][ix][1] = solve_plane(cx, cy, tPlane) * invQ;
+               array->texcoords[0][ix][2] = solve_plane(cx, cy, uPlane) * invQ;
+               array->lambda[0][ix] = compute_lambda(sPlane, tPlane, vPlane,
+                                          cx, cy, invQ, texWidth, texHeight);
+            }
+#elif defined(DO_MULTITEX)
+            {
+               GLuint unit;
+               for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+                  if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+                     GLfloat invQ = solve_plane_recip(cx, cy, vPlane[unit]);
+                     array->texcoords[unit][ix][0] = solve_plane(cx, cy, sPlane[unit]) * invQ;
+                     array->texcoords[unit][ix][1] = solve_plane(cx, cy, tPlane[unit]) * invQ;
+                     array->texcoords[unit][ix][2] = solve_plane(cx, cy, uPlane[unit]) * invQ;
+                     array->lambda[unit][ix] = compute_lambda(sPlane[unit],
+                                                            tPlane[unit],
+                                                            vPlane[unit],
+                                                            cx, cy, invQ,
+                                                            texWidth[unit],
+                                                            texHeight[unit]);
+                  }
+               }
+            }
+#endif
+            ix--;
+            count++;
+            coverage = compute_coveragef(pMin, pMax, pMid, ix, iy);
+         }
+         
+         if (startX <= ix)
+            continue;
+
+         n = (GLuint) startX - (GLuint) ix;
+
+         left = ix + 1;
+
+         /* shift all values to the left */
+         /* XXX this is temporary */
+         {
+            struct span_arrays *array = span.array;
+            GLint j;
+            for (j = 0; j < (GLint) n; j++) {
+#ifdef DO_RGBA
+               COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
+#endif
+#ifdef DO_SPEC
+               COPY_CHAN4(array->spec[j], array->spec[j + left]);
+#endif
+#ifdef DO_INDEX
+               array->index[j] = array->index[j + left];
+#endif
+#ifdef DO_Z
+               array->z[j] = array->z[j + left];
+#endif
+#ifdef DO_FOG
+               array->fog[j] = array->fog[j + left];
+#endif
+#ifdef DO_TEX
+               COPY_4V(array->texcoords[0][j], array->texcoords[0][j + left]);
+#endif
+#if defined(DO_MULTITEX) || defined(DO_TEX)
+               array->lambda[0][j] = array->lambda[0][j + left];
+#endif
+               array->coverage[j] = array->coverage[j + left];
+            }
+         }
+#ifdef DO_MULTITEX
+         /* shift texcoords */
+         {
+            struct span_arrays *array = span.array;
+            GLuint unit;
+            for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+               if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+                  GLint j;
+                  for (j = 0; j < (GLint) n; j++) {
+		     array->texcoords[unit][j][0] = array->texcoords[unit][j + left][0];
+                     array->texcoords[unit][j][1] = array->texcoords[unit][j + left][1];
+                     array->texcoords[unit][j][2] = array->texcoords[unit][j + left][2];
+                     array->lambda[unit][j] = array->lambda[unit][j + left];
+                  }
+               }
+            }
+         }
+#endif
+
+         span.x = left;
+         span.y = iy;
+         span.end = n;
+         ASSERT(span.interpMask == 0);
+#if defined(DO_RGBA)
+         _swrast_write_rgba_span(ctx, &span);
+#else
+         _swrast_write_index_span(ctx, &span);
+#endif
+      }
+   }
+}
+
+
+#ifdef DO_Z
+#undef DO_Z
+#endif
+
+#ifdef DO_FOG
+#undef DO_FOG
+#endif
+
+#ifdef DO_RGBA
+#undef DO_RGBA
+#endif
+
+#ifdef DO_INDEX
+#undef DO_INDEX
+#endif
+
+#ifdef DO_SPEC
+#undef DO_SPEC
+#endif
+
+#ifdef DO_TEX
+#undef DO_TEX
+#endif
+
+#ifdef DO_MULTITEX
+#undef DO_MULTITEX
+#endif
+
+#ifdef DO_OCCLUSION_TEST
+#undef DO_OCCLUSION_TEST
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_accum.c b/dist/Mesa/src/mesa/swrast/s_accum.c
new file mode 100644
index 000000000..a159e8049
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_accum.c
@@ -0,0 +1,584 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "imports.h"
+#include "fbobject.h"
+
+#include "s_accum.h"
+#include "s_context.h"
+#include "s_masking.h"
+#include "s_span.h"
+
+
+/* XXX this would have to change for accum buffers with more or less
+ * than 16 bits per color channel.
+ */
+#define ACCUM_SCALE16 32767.0
+
+
+/*
+ * Accumulation buffer notes
+ *
+ * Normally, accumulation buffer values are GLshorts with values in
+ * [-32767, 32767] which represent floating point colors in [-1, 1],
+ * as defined by the OpenGL specification.
+ *
+ * We optimize for the common case used for full-scene antialiasing:
+ *    // start with accum buffer cleared to zero
+ *    glAccum(GL_LOAD, w);   // or GL_ACCUM the first image
+ *    glAccum(GL_ACCUM, w);
+ *    ...
+ *    glAccum(GL_ACCUM, w);
+ *    glAccum(GL_RETURN, 1.0);
+ * That is, we start with an empty accumulation buffer and accumulate
+ * n images, each with weight w = 1/n.
+ * In this scenario, we can simply store unscaled integer values in
+ * the accum buffer instead of scaled integers.  We'll also keep track
+ * of the w value so when we do GL_RETURN we simply divide the accumulated
+ * values by n (n=1/w).
+ * This lets us avoid _many_ int->float->int conversions.
+ */
+
+
+#if CHAN_BITS == 8
+/* enable the optimization */
+#define USE_OPTIMIZED_ACCUM  1
+#else
+#define USE_OPTIMIZED_ACCUM  0
+#endif
+
+
+/**
+ * This is called when we fall out of optimized/unscaled accum buffer mode.
+ * That is, we convert each unscaled accum buffer value into a scaled value
+ * representing the range[-1, 1].
+ */
+static void
+rescale_accum( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_renderbuffer *rb
+      = ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer;
+   const GLfloat s = swrast->_IntegerAccumScaler * (32767.0F / CHAN_MAXF);
+
+   assert(rb);
+   assert(rb->_BaseFormat == GL_RGBA);
+   /* add other types in future? */
+   assert(rb->DataType == GL_SHORT || rb->DataType == GL_UNSIGNED_SHORT);
+   assert(swrast->_IntegerAccumMode);
+
+   if (rb->GetPointer(ctx, rb, 0, 0)) {
+      /* directly-addressable memory */
+      GLuint y;
+      for (y = 0; y < rb->Height; y++) {
+         GLuint i;
+         GLshort *acc = (GLshort *) rb->GetPointer(ctx, rb, 0, y);
+         for (i = 0; i < 4 * rb->Width; i++) {
+            acc[i] = (GLshort) (acc[i] * s);
+         }
+      }
+   }
+   else {
+      /* use get/put row funcs */
+      GLuint y;
+      for (y = 0; y < rb->Height; y++) {
+         GLshort accRow[MAX_WIDTH * 4];
+         GLuint i;
+         rb->GetRow(ctx, rb, rb->Width, 0, y, accRow);
+         for (i = 0; i < 4 * rb->Width; i++) {
+            accRow[i] = (GLshort) (accRow[i] * s);
+         }
+         rb->PutRow(ctx, rb, rb->Width, 0, y, accRow, NULL);
+      }
+   }
+
+   swrast->_IntegerAccumMode = GL_FALSE;
+}
+
+
+
+/**
+ * Clear the accumulation Buffer.
+ */
+void
+_swrast_clear_accum_buffer( GLcontext *ctx, struct gl_renderbuffer *rb )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint x, y, width, height;
+
+   if (ctx->Visual.accumRedBits == 0) {
+      /* No accumulation buffer! Not an error. */
+      return;
+   }
+
+   assert(rb);
+   assert(rb->_BaseFormat == GL_RGBA);
+   /* add other types in future? */
+   assert(rb->DataType == GL_SHORT || rb->DataType == GL_UNSIGNED_SHORT);
+
+   /* bounds, with scissor */
+   x = ctx->DrawBuffer->_Xmin;
+   y = ctx->DrawBuffer->_Ymin;
+   width = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+   height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+
+   if (rb->DataType == GL_SHORT || rb->DataType == GL_UNSIGNED_SHORT) {
+      const GLfloat accScale = 32767.0;
+      GLshort clearVal[4];
+      GLuint i;
+
+      clearVal[0] = (GLshort) (ctx->Accum.ClearColor[0] * accScale);
+      clearVal[1] = (GLshort) (ctx->Accum.ClearColor[1] * accScale);
+      clearVal[2] = (GLshort) (ctx->Accum.ClearColor[2] * accScale);
+      clearVal[3] = (GLshort) (ctx->Accum.ClearColor[3] * accScale);
+
+      for (i = 0; i < height; i++) {
+         rb->PutMonoRow(ctx, rb, width, x, y + i, clearVal, NULL);
+      }
+   }
+   else {
+      /* someday support other sizes */
+   }
+
+   /* update optimized accum state vars */
+   if (ctx->Accum.ClearColor[0] == 0.0 && ctx->Accum.ClearColor[1] == 0.0 &&
+       ctx->Accum.ClearColor[2] == 0.0 && ctx->Accum.ClearColor[3] == 0.0) {
+#if USE_OPTIMIZED_ACCUM
+      swrast->_IntegerAccumMode = GL_TRUE;
+#else
+      swrast->_IntegerAccumMode = GL_FALSE;
+#endif
+      swrast->_IntegerAccumScaler = 0.0;  /* denotes empty accum buffer */
+   }
+   else {
+      swrast->_IntegerAccumMode = GL_FALSE;
+   }
+}
+
+
+static void
+accum_add(GLcontext *ctx, GLfloat value,
+          GLint xpos, GLint ypos, GLint width, GLint height )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_renderbuffer *rb
+      = ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer;
+
+   assert(rb);
+
+   /* Leave optimized accum buffer mode */
+   if (swrast->_IntegerAccumMode)
+      rescale_accum(ctx);
+
+   if (rb->DataType == GL_SHORT || rb->DataType == GL_UNSIGNED_SHORT) {
+      const GLshort incr = (GLshort) (value * ACCUM_SCALE16);
+      if (rb->GetPointer(ctx, rb, 0, 0)) {
+         GLint i, j;
+         for (i = 0; i < height; i++) {
+            GLshort *acc = (GLshort *) rb->GetPointer(ctx, rb, xpos, ypos + i);
+            for (j = 0; j < 4 * width; j++) {
+               acc[j] += incr;
+            }
+         }
+      }
+      else {
+         GLint i, j;
+         for (i = 0; i < height; i++) {
+            GLshort accRow[4 * MAX_WIDTH];
+            rb->GetRow(ctx, rb, width, xpos, ypos + i, accRow);
+            for (j = 0; j < 4 * width; j++) {
+               accRow[j] += incr;
+            }
+            rb->PutRow(ctx, rb, width, xpos, ypos + i, accRow, NULL);
+         }
+      }
+   }
+   else {
+      /* other types someday */
+   }
+}
+
+
+static void
+accum_mult(GLcontext *ctx, GLfloat mult,
+           GLint xpos, GLint ypos, GLint width, GLint height )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_renderbuffer *rb
+      = ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer;
+
+   assert(rb);
+
+   /* Leave optimized accum buffer mode */
+   if (swrast->_IntegerAccumMode)
+      rescale_accum(ctx);
+
+   if (rb->DataType == GL_SHORT || rb->DataType == GL_UNSIGNED_SHORT) {
+      if (rb->GetPointer(ctx, rb, 0, 0)) {
+         GLint i, j;
+         for (i = 0; i < height; i++) {
+            GLshort *acc = (GLshort *) rb->GetPointer(ctx, rb, xpos, ypos + i);
+            for (j = 0; j < 4 * width; j++) {
+               acc[j] = (GLshort) (acc[j] * mult);
+            }
+         }
+      }
+      else {
+         GLint i, j;
+         for (i = 0; i < height; i++) {
+            GLshort accRow[4 * MAX_WIDTH];
+            rb->GetRow(ctx, rb, width, xpos, ypos + i, accRow);
+            for (j = 0; j < 4 * width; j++) {
+               accRow[j] = (GLshort) (accRow[j] * mult);
+            }
+            rb->PutRow(ctx, rb, width, xpos, ypos + i, accRow, NULL);
+         }
+      }
+   }
+   else {
+      /* other types someday */
+   }
+}
+
+
+
+static void
+accum_accum(GLcontext *ctx, GLfloat value,
+            GLint xpos, GLint ypos, GLint width, GLint height )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_renderbuffer *rb
+      = ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer;
+   const GLboolean directAccess = (rb->GetPointer(ctx, rb, 0, 0) != NULL);
+
+   assert(rb);
+
+   if (!ctx->ReadBuffer->_ColorReadBuffer) {
+      /* no read buffer - OK */
+      return;
+   }
+
+   /* May have to leave optimized accum buffer mode */
+   if (swrast->_IntegerAccumScaler == 0.0 && value > 0.0 && value <= 1.0)
+      swrast->_IntegerAccumScaler = value;
+   if (swrast->_IntegerAccumMode && value != swrast->_IntegerAccumScaler)
+      rescale_accum(ctx);
+
+   if (rb->DataType == GL_SHORT || rb->DataType == GL_UNSIGNED_SHORT) {
+      const GLfloat scale = value * ACCUM_SCALE16 / CHAN_MAXF;
+      GLshort accumRow[4 * MAX_WIDTH];
+      GLchan rgba[MAX_WIDTH][4];
+      GLint i;
+
+      for (i = 0; i < height; i++) {
+         GLshort *acc;
+         if (directAccess) {
+            acc = (GLshort *) rb->GetPointer(ctx, rb, xpos, ypos + i);
+         }
+         else {
+            rb->GetRow(ctx, rb, width, xpos, ypos + i, accumRow);
+            acc = accumRow;
+         }
+
+         /* read colors from color buffer */
+         _swrast_read_rgba_span(ctx, ctx->ReadBuffer->_ColorReadBuffer, width,
+                                xpos, ypos + i, rgba);
+
+         /* do accumulation */
+         if (swrast->_IntegerAccumMode) {
+            /* simply add integer color values into accum buffer */
+            GLint j;
+            for (j = 0; j < width; j++) {
+               acc[j * 4 + 0] += rgba[j][RCOMP];
+               acc[j * 4 + 1] += rgba[j][GCOMP];
+               acc[j * 4 + 2] += rgba[j][BCOMP];
+               acc[j * 4 + 3] += rgba[j][ACOMP];
+            }
+         }
+         else {
+            /* scaled integer (or float) accum buffer */
+            GLint j;
+            for (j = 0; j < width; j++) {
+               acc[j * 4 + 0] += (GLshort) ((GLfloat) rgba[j][RCOMP] * scale);
+               acc[j * 4 + 1] += (GLshort) ((GLfloat) rgba[j][GCOMP] * scale);
+               acc[j * 4 + 2] += (GLshort) ((GLfloat) rgba[j][BCOMP] * scale);
+               acc[j * 4 + 3] += (GLshort) ((GLfloat) rgba[j][ACOMP] * scale);
+            }
+         }
+
+         if (!directAccess) {
+            rb->PutRow(ctx, rb, width, xpos, ypos + i, accumRow, NULL);
+         }
+      }
+   }
+   else {
+      /* other types someday */
+   }
+}
+
+
+
+static void
+accum_load(GLcontext *ctx, GLfloat value,
+           GLint xpos, GLint ypos, GLint width, GLint height )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_renderbuffer *rb
+      = ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer;
+   const GLboolean directAccess = (rb->GetPointer(ctx, rb, 0, 0) != NULL);
+
+   assert(rb);
+
+   if (!ctx->ReadBuffer->_ColorReadBuffer) {
+      /* no read buffer - OK */
+      return;
+   }
+
+   /* This is a change to go into optimized accum buffer mode */
+   if (value > 0.0 && value <= 1.0) {
+#if USE_OPTIMIZED_ACCUM
+      swrast->_IntegerAccumMode = GL_TRUE;
+#else
+      swrast->_IntegerAccumMode = GL_FALSE;
+#endif
+      swrast->_IntegerAccumScaler = value;
+   }
+   else {
+      swrast->_IntegerAccumMode = GL_FALSE;
+      swrast->_IntegerAccumScaler = 0.0;
+   }
+
+   if (rb->DataType == GL_SHORT || rb->DataType == GL_UNSIGNED_SHORT) {
+      const GLfloat scale = value * ACCUM_SCALE16 / CHAN_MAXF;
+      GLshort accumRow[4 * MAX_WIDTH];
+      GLchan rgba[MAX_WIDTH][4];
+      GLint i;
+
+      for (i = 0; i < height; i++) {
+         GLshort *acc;
+         if (directAccess) {
+            acc = (GLshort *) rb->GetPointer(ctx, rb, xpos, ypos + i);
+         }
+         else {
+            rb->GetRow(ctx, rb, width, xpos, ypos + i, accumRow);
+            acc = accumRow;
+         }
+
+         /* read colors from color buffer */
+         _swrast_read_rgba_span(ctx, ctx->ReadBuffer->_ColorReadBuffer, width,
+                                xpos, ypos + i, rgba);
+
+         /* do load */
+         if (swrast->_IntegerAccumMode) {
+            /* just copy values in */
+            GLint j;
+            assert(swrast->_IntegerAccumScaler > 0.0);
+            assert(swrast->_IntegerAccumScaler <= 1.0);
+            for (j = 0; j < width; j++) {
+               acc[j * 4 + 0] = rgba[j][RCOMP];
+               acc[j * 4 + 1] = rgba[j][GCOMP];
+               acc[j * 4 + 2] = rgba[j][BCOMP];
+               acc[j * 4 + 3] = rgba[j][ACOMP];
+            }
+         }
+         else {
+            /* scaled integer (or float) accum buffer */
+            GLint j;
+            for (j = 0; j < width; j++) {
+               acc[j * 4 + 0] = (GLshort) ((GLfloat) rgba[j][RCOMP] * scale);
+               acc[j * 4 + 1] = (GLshort) ((GLfloat) rgba[j][GCOMP] * scale);
+               acc[j * 4 + 2] = (GLshort) ((GLfloat) rgba[j][BCOMP] * scale);
+               acc[j * 4 + 3] = (GLshort) ((GLfloat) rgba[j][ACOMP] * scale);
+            }
+         }
+
+         if (!directAccess) {
+            rb->PutRow(ctx, rb, width, xpos, ypos + i, accumRow, NULL);
+         }
+      }
+   }
+}
+
+
+static void
+accum_return(GLcontext *ctx, GLfloat value,
+             GLint xpos, GLint ypos, GLint width, GLint height )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *accumRb = fb->Attachment[BUFFER_ACCUM].Renderbuffer;
+   const GLboolean directAccess
+      = (accumRb->GetPointer(ctx, accumRb, 0, 0) != NULL);
+   const GLboolean masking = (!ctx->Color.ColorMask[RCOMP] ||
+                              !ctx->Color.ColorMask[GCOMP] ||
+                              !ctx->Color.ColorMask[BCOMP] ||
+                              !ctx->Color.ColorMask[ACOMP]);
+
+   static GLchan multTable[32768];
+   static GLfloat prevMult = 0.0;
+   const GLfloat mult = swrast->_IntegerAccumScaler;
+   const GLint max = MIN2((GLint) (256 / mult), 32767);
+
+   /* May have to leave optimized accum buffer mode */
+   if (swrast->_IntegerAccumMode && value != 1.0)
+      rescale_accum(ctx);
+
+   if (swrast->_IntegerAccumMode && swrast->_IntegerAccumScaler > 0) {
+      /* build lookup table to avoid many floating point multiplies */
+      GLint j;
+      assert(swrast->_IntegerAccumScaler <= 1.0);
+      if (mult != prevMult) {
+         for (j = 0; j < max; j++)
+            multTable[j] = IROUND((GLfloat) j * mult);
+         prevMult = mult;
+      }
+   }
+
+   if (accumRb->DataType == GL_SHORT ||
+       accumRb->DataType == GL_UNSIGNED_SHORT) {
+      const GLfloat scale = value * CHAN_MAXF / ACCUM_SCALE16;
+      GLuint buffer;
+      GLint i;
+
+      /* XXX maybe transpose the 'i' and 'buffer' loops??? */
+      for (i = 0; i < height; i++) {
+         GLchan rgba[MAX_WIDTH][4];
+         GLshort accumRow[4 * MAX_WIDTH];
+         GLshort *acc;
+
+         if (directAccess) {
+            acc = (GLshort *) accumRb->GetPointer(ctx, accumRb, xpos, ypos +i);
+         }
+         else {
+            accumRb->GetRow(ctx, accumRb, width, xpos, ypos + i, accumRow);
+            acc = accumRow;
+         }
+
+         /* get the colors to return */
+         if (swrast->_IntegerAccumMode) {
+            GLint j;
+            for (j = 0; j < width; j++) {
+               ASSERT(acc[j * 4 + 0] < max);
+               ASSERT(acc[j * 4 + 1] < max);
+               ASSERT(acc[j * 4 + 2] < max);
+               ASSERT(acc[j * 4 + 3] < max);
+               rgba[j][RCOMP] = multTable[acc[j * 4 + 0]];
+               rgba[j][GCOMP] = multTable[acc[j * 4 + 1]];
+               rgba[j][BCOMP] = multTable[acc[j * 4 + 2]];
+               rgba[j][ACOMP] = multTable[acc[j * 4 + 3]];
+            }
+         }
+         else {
+            /* scaled integer (or float) accum buffer */
+            GLint j;
+            for (j = 0; j < width; j++) {
+#if CHAN_BITS==32
+               GLchan r = acc[j * 4 + 0] * scale;
+               GLchan g = acc[j * 4 + 1] * scale;
+               GLchan b = acc[j * 4 + 2] * scale;
+               GLchan a = acc[j * 4 + 3] * scale;
+#else
+               GLint r = IROUND( (GLfloat) (acc[j * 4 + 0]) * scale );
+               GLint g = IROUND( (GLfloat) (acc[j * 4 + 1]) * scale );
+               GLint b = IROUND( (GLfloat) (acc[j * 4 + 2]) * scale );
+               GLint a = IROUND( (GLfloat) (acc[j * 4 + 3]) * scale );
+#endif
+               rgba[j][RCOMP] = CLAMP( r, 0, CHAN_MAX );
+               rgba[j][GCOMP] = CLAMP( g, 0, CHAN_MAX );
+               rgba[j][BCOMP] = CLAMP( b, 0, CHAN_MAX );
+               rgba[j][ACOMP] = CLAMP( a, 0, CHAN_MAX );
+            }
+         }
+
+         /* store colors */
+         for (buffer = 0; buffer < fb->_NumColorDrawBuffers[0]; buffer++) {
+            struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[0][buffer];
+            if (masking) {
+               _swrast_mask_rgba_array(ctx, rb, width, xpos, ypos + i, rgba);
+            }
+            rb->PutRow(ctx, rb, width, xpos, ypos + i, rgba, NULL);
+         }
+      }
+   }
+   else {
+      /* other types someday */
+   }
+}
+
+
+
+/**
+ * Software fallback for glAccum.
+ */
+void
+_swrast_Accum( GLcontext *ctx, GLenum op, GLfloat value,
+	       GLint xpos, GLint ypos,
+	       GLint width, GLint height )
+
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (SWRAST_CONTEXT(ctx)->NewState)
+      _swrast_validate_derived( ctx );
+
+   if (!ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer) {
+      _mesa_warning(ctx, "Calling glAccum() without an accumulation buffer");
+      return;
+   }
+
+   RENDER_START(swrast, ctx);
+
+   switch (op) {
+      case GL_ADD:
+         if (value != 0.0F) {
+            accum_add(ctx, value, xpos, ypos, width, height);
+	 }
+	 break;
+      case GL_MULT:
+         if (value != 1.0F) {
+            accum_mult(ctx, value, xpos, ypos, width, height);
+	 }
+	 break;
+      case GL_ACCUM:
+         if (value != 0.0F) {
+            accum_accum(ctx, value, xpos, ypos, width, height);
+         }
+	 break;
+      case GL_LOAD:
+         accum_load(ctx, value, xpos, ypos, width, height);
+	 break;
+      case GL_RETURN:
+         accum_return(ctx, value, xpos, ypos, width, height);
+	 break;
+      default:
+         _mesa_problem(ctx, "invalid mode in _swrast_Accum()");
+         break;
+   }
+
+   RENDER_FINISH(swrast, ctx);
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_accum.h b/dist/Mesa/src/mesa/swrast/s_accum.h
new file mode 100644
index 000000000..97d2bef4c
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_accum.h
@@ -0,0 +1,37 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_ACCUM_H
+#define S_ACCUM_H
+
+
+#include "mtypes.h"
+
+
+extern void
+_swrast_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb);
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_alpha.c b/dist/Mesa/src/mesa/swrast/s_alpha.c
new file mode 100644
index 000000000..55c1e6a58
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_alpha.c
@@ -0,0 +1,224 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.1
+ *
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file swrast/s_alpha.c
+ * \brief Functions to apply alpha test.
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "macros.h"
+
+#include "s_alpha.h"
+#include "s_context.h"
+
+
+/**
+ * \fn GLint _swrast_alpha_test( const GLcontext *ctx, struct sw_span *span )
+ * \brief Apply the alpha test to a span of pixels.
+ * \return
+ *      - "0" = all pixels in the span failed the alpha test.
+ *      - "1" = one or more pixels passed the alpha test.
+ */
+GLint
+_swrast_alpha_test( const GLcontext *ctx, struct sw_span *span )
+{
+   const GLchan (*rgba)[4] = (const GLchan (*)[4]) span->array->rgba;
+   GLchan ref;
+   const GLuint n = span->end;
+   GLubyte *mask = span->array->mask;
+   GLuint i;
+
+   CLAMPED_FLOAT_TO_CHAN(ref, ctx->Color.AlphaRef);
+
+   if (span->arrayMask & SPAN_RGBA) {
+      /* Use the array values */
+      switch (ctx->Color.AlphaFunc) {
+         case GL_LESS:
+            for (i = 0; i < n; i++)
+               mask[i] &= (rgba[i][ACOMP] < ref);
+            break;
+         case GL_LEQUAL:
+            for (i = 0; i < n; i++)
+               mask[i] &= (rgba[i][ACOMP] <= ref);
+            break;
+         case GL_GEQUAL:
+            for (i = 0; i < n; i++)
+               mask[i] &= (rgba[i][ACOMP] >= ref);
+            break;
+         case GL_GREATER:
+            for (i = 0; i < n; i++)
+               mask[i] &= (rgba[i][ACOMP] > ref);
+            break;
+         case GL_NOTEQUAL:
+            for (i = 0; i < n; i++)
+               mask[i] &= (rgba[i][ACOMP] != ref);
+            break;
+         case GL_EQUAL:
+            for (i = 0; i < n; i++)
+               mask[i] &= (rgba[i][ACOMP] == ref);
+            break;
+         case GL_ALWAYS:
+            /* do nothing */
+            return 1;
+         case GL_NEVER:
+            /* caller should check for zero! */
+            span->writeAll = GL_FALSE;
+            return 0;
+         default:
+            _mesa_problem( ctx, "Invalid alpha test in _swrast_alpha_test" );
+            return 0;
+      }
+   }
+   else {
+      /* Use the interpolation values */
+#if CHAN_TYPE == GL_FLOAT
+      const GLfloat alphaStep = span->alphaStep;
+      GLfloat alpha = span->alpha;
+      ASSERT(span->interpMask & SPAN_RGBA);
+      switch (ctx->Color.AlphaFunc) {
+         case GL_LESS:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (alpha < ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_LEQUAL:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (alpha <= ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_GEQUAL:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (alpha >= ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_GREATER:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (alpha > ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_NOTEQUAL:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (alpha != ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_EQUAL:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (alpha == ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_ALWAYS:
+            /* do nothing */
+            return 1;
+         case GL_NEVER:
+            /* caller should check for zero! */
+            span->writeAll = GL_FALSE;
+            return 0;
+         default:
+            _mesa_problem( ctx, "Invalid alpha test in gl_alpha_test" );
+            return 0;
+      }
+#else
+      /* 8 or 16-bit channel interpolation */
+      const GLfixed alphaStep = span->alphaStep;
+      GLfixed alpha = span->alpha;
+      ASSERT(span->interpMask & SPAN_RGBA);
+      switch (ctx->Color.AlphaFunc) {
+         case GL_LESS:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (FixedToChan(alpha) < ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_LEQUAL:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (FixedToChan(alpha) <= ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_GEQUAL:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (FixedToChan(alpha) >= ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_GREATER:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (FixedToChan(alpha) > ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_NOTEQUAL:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (FixedToChan(alpha) != ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_EQUAL:
+            for (i = 0; i < n; i++) {
+               mask[i] &= (FixedToChan(alpha) == ref);
+               alpha += alphaStep;
+            }
+            break;
+         case GL_ALWAYS:
+            /* do nothing */
+            return 1;
+         case GL_NEVER:
+            /* caller should check for zero! */
+            span->writeAll = GL_FALSE;
+            return 0;
+         default:
+            _mesa_problem( ctx, "Invalid alpha test in gl_alpha_test" );
+            return 0;
+      }
+#endif /* CHAN_TYPE */
+   }
+
+#if 0
+   /* XXXX This causes conformance failures!!!! */
+   while ((span->start <= span->end)  &&
+          (mask[span->start] == 0))
+     span->start ++;
+
+   while ((span->end >= span->start)  &&
+          (mask[span->end] == 0))
+     span->end --;
+#endif
+
+   span->writeAll = GL_FALSE;
+
+   if (span->start >= span->end)
+     return 0;
+   else
+     return 1;
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_alpha.h b/dist/Mesa/src/mesa/swrast/s_alpha.h
new file mode 100644
index 000000000..add0f74c4
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_alpha.h
@@ -0,0 +1,39 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.1
+ *
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_ALPHA_H
+#define S_ALPHA_H
+
+
+#include "mtypes.h"
+#include "s_context.h"
+
+
+extern GLint
+_swrast_alpha_test( const GLcontext *ctx, struct sw_span *span );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_arbshader.c b/dist/Mesa/src/mesa/swrast/s_arbshader.c
new file mode 100644
index 000000000..c3f85ee15
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_arbshader.c
@@ -0,0 +1,120 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Michal Krol
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "s_arbshader.h"
+#include "s_context.h"
+#include "shaderobjects.h"
+#include "shaderobjects_3dlabs.h"
+#include "slang_utility.h"
+#include "slang_link.h"
+
+#if FEATURE_ARB_fragment_shader
+
+void
+_swrast_exec_arbshader(GLcontext *ctx, struct sw_span *span)
+{
+   struct gl2_program_intf **pro;
+   GLuint i;
+
+   if (!ctx->ShaderObjects._FragmentShaderPresent)
+      return;
+
+   pro = ctx->ShaderObjects.CurrentProgram;
+   if (!ctx->ShaderObjects._VertexShaderPresent)
+      (**pro).UpdateFixedUniforms(pro);
+
+   for (i = span->start; i < span->end; i++) {
+      /* only run shader on active fragments */
+      if (span->array->mask[i]) {
+         GLfloat vec[4];
+         GLuint j;
+         GLboolean discard;
+
+         vec[0] = (GLfloat) span->x + i;
+         vec[1] = (GLfloat) span->y;
+         vec[2] = (GLfloat) span->array->z[i] / ctx->DrawBuffer->_DepthMaxF;
+         vec[3] = span->w + span->dwdx * i;
+         (**pro).UpdateFixedVarying(pro, SLANG_FRAGMENT_FIXED_FRAGCOORD, vec,
+                                    0, 4 * sizeof(GLfloat), GL_TRUE);
+
+         vec[0] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
+         vec[1] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
+         vec[2] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
+         vec[3] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
+         (**pro).UpdateFixedVarying(pro, SLANG_FRAGMENT_FIXED_COLOR, vec, 0,
+                                    4 * sizeof(GLfloat), GL_TRUE);
+
+         vec[0] = CHAN_TO_FLOAT(span->array->spec[i][RCOMP]);
+         vec[1] = CHAN_TO_FLOAT(span->array->spec[i][GCOMP]);
+         vec[2] = CHAN_TO_FLOAT(span->array->spec[i][BCOMP]);
+         vec[3] = CHAN_TO_FLOAT(span->array->spec[i][ACOMP]);
+         (**pro).UpdateFixedVarying(pro, SLANG_FRAGMENT_FIXED_SECONDARYCOLOR,
+                                    vec, 0, 4 * sizeof(GLfloat), GL_TRUE);
+
+         for (j = 0; j < ctx->Const.MaxTextureCoordUnits; j++) {
+            vec[0] = span->array->texcoords[j][i][0];
+            vec[1] = span->array->texcoords[j][i][1];
+            vec[2] = span->array->texcoords[j][i][2];
+            vec[3] = span->array->texcoords[j][i][3];
+            (**pro).UpdateFixedVarying(pro, SLANG_FRAGMENT_FIXED_TEXCOORD,
+                                       vec, j, 4 * sizeof(GLfloat), GL_TRUE);
+         }
+
+         for (j = 0; j < MAX_VARYING_VECTORS; j++) {
+            GLuint k;
+
+            for (k = 0; k < VARYINGS_PER_VECTOR; k++) {
+               (**pro).UpdateVarying(pro, j * VARYINGS_PER_VECTOR + k,
+                                     &span->array->varying[i][j][k],
+                                     GL_FALSE);
+            }
+         }
+
+         _slang_exec_fragment_shader(pro);
+
+         _slang_fetch_discard(pro, &discard);
+         if (discard) {
+            span->array->mask[i] = GL_FALSE;
+            span->writeAll = GL_FALSE;
+         }
+         else {
+            (**pro).UpdateFixedVarying(pro, SLANG_FRAGMENT_FIXED_FRAGCOLOR,
+                                       vec, 0, 4 * sizeof(GLfloat), GL_FALSE);
+            UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], vec[0]);
+            UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], vec[1]);
+            UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], vec[2]);
+            UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], vec[3]);
+         }
+      }
+   }
+}
+
+#endif /* FEATURE_ARB_fragment_shader */
+
diff --git a/dist/Mesa/src/mesa/swrast/s_arbshader.h b/dist/Mesa/src/mesa/swrast/s_arbshader.h
new file mode 100644
index 000000000..805cd4da3
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_arbshader.h
@@ -0,0 +1,38 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 2006  David Airlie   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_ARBSHADER_H
+#define S_ARBSHADER_H
+
+#include "s_context.h"
+
+#if FEATURE_ARB_fragment_shader
+
+extern void _swrast_exec_arbshader (GLcontext *ctx, struct sw_span *span);
+
+#endif /* FEATURE_ARB_fragment_shader */
+
+#endif
+
diff --git a/dist/Mesa/src/mesa/swrast/s_atifragshader.c b/dist/Mesa/src/mesa/swrast/s_atifragshader.c
new file mode 100644
index 000000000..08640e146
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_atifragshader.c
@@ -0,0 +1,617 @@
+/*
+ *
+ * Copyright (C) 2004  David Airlie   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "glheader.h"
+#include "colormac.h"
+#include "context.h"
+#include "atifragshader.h"
+#include "macros.h"
+#include "program.h"
+
+#include "s_atifragshader.h"
+
+
+/**
+ * Fetch a texel.
+ */
+static void
+fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
+	    GLuint unit, GLfloat color[4])
+{
+   GLchan rgba[4];
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   /* XXX use a float-valued TextureSample routine here!!! */
+   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
+                               1, (const GLfloat(*)[4]) texcoord,
+                               &lambda, &rgba);
+   color[0] = CHAN_TO_FLOAT(rgba[0]);
+   color[1] = CHAN_TO_FLOAT(rgba[1]);
+   color[2] = CHAN_TO_FLOAT(rgba[2]);
+   color[3] = CHAN_TO_FLOAT(rgba[3]);
+}
+
+static void
+apply_swizzle(GLfloat values[4], GLuint swizzle)
+{
+   GLfloat s, t, r, q;
+
+   s = values[0];
+   t = values[1];
+   r = values[2];
+   q = values[3];
+
+   switch (swizzle) {
+   case GL_SWIZZLE_STR_ATI:
+      values[0] = s;
+      values[1] = t;
+      values[2] = r;
+      break;
+   case GL_SWIZZLE_STQ_ATI:
+      values[0] = s;
+      values[1] = t;
+      values[2] = q;
+      break;
+   case GL_SWIZZLE_STR_DR_ATI:
+      values[0] = s / r;
+      values[1] = t / r;
+      values[2] = 1 / r;
+      break;
+   case GL_SWIZZLE_STQ_DQ_ATI:
+/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
+      if (q == 0.0F) q = 0.000000001;
+      values[0] = s / q;
+      values[1] = t / q;
+      values[2] = 1 / q;
+      break;
+   }
+   values[3] = 0.0;
+}
+
+static void
+apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
+{
+   GLint i;
+   GLint start, end;
+   if (!rep)
+      return;
+
+   start = optype ? 3 : 0;
+   end = 4;
+
+   for (i = start; i < end; i++) {
+      switch (rep) {
+      case GL_RED:
+	 val[i] = val[0];
+	 break;
+      case GL_GREEN:
+	 val[i] = val[1];
+	 break;
+      case GL_BLUE:
+	 val[i] = val[2];
+	 break;
+      case GL_ALPHA:
+	 val[i] = val[3];
+	 break;
+      }
+   }
+}
+
+static void
+apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
+{
+   GLint i;
+   GLint start, end;
+
+   if (!mod)
+      return;
+
+   start = optype ? 3 : 0;
+   end = 4;
+
+   for (i = start; i < end; i++) {
+      if (mod & GL_COMP_BIT_ATI)
+	 val[i] = 1 - val[i];
+
+      if (mod & GL_BIAS_BIT_ATI)
+	 val[i] = val[i] - 0.5;
+
+      if (mod & GL_2X_BIT_ATI)
+	 val[i] = 2 * val[i];
+
+      if (mod & GL_NEGATE_BIT_ATI)
+	 val[i] = -val[i];
+   }
+}
+
+static void
+apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
+{
+   GLint i;
+   GLint has_sat = mod & GL_SATURATE_BIT_ATI;
+   GLint start, end;
+
+   mod &= ~GL_SATURATE_BIT_ATI;
+
+   start = optype ? 3 : 0;
+   end = optype ? 4 : 3;
+
+   for (i = start; i < end; i++) {
+      switch (mod) {
+      case GL_2X_BIT_ATI:
+	 val[i] = 2 * val[i];
+	 break;
+      case GL_4X_BIT_ATI:
+	 val[i] = 4 * val[i];
+	 break;
+      case GL_8X_BIT_ATI:
+	 val[i] = 8 * val[i];
+	 break;
+      case GL_HALF_BIT_ATI:
+	 val[i] = val[i] * 0.5;
+	 break;
+      case GL_QUARTER_BIT_ATI:
+	 val[i] = val[i] * 0.25;
+	 break;
+      case GL_EIGHTH_BIT_ATI:
+	 val[i] = val[i] * 0.125;
+	 break;
+      }
+
+      if (has_sat) {
+	 if (val[i] < 0.0)
+	    val[i] = 0;
+	 else if (val[i] > 1.0)
+	    val[i] = 1.0;
+      }
+      else {
+	 if (val[i] < -8.0)
+	    val[i] = -8.0;
+	 else if (val[i] > 8.0)
+	    val[i] = 8.0;
+      }
+   }
+}
+
+
+static void
+write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
+	       GLfloat * dst)
+{
+   GLint i;
+   apply_dst_mod(optype, mod, src);
+
+   if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
+      if (mask) {
+	 if (mask & GL_RED_BIT_ATI)
+	    dst[0] = src[0];
+
+	 if (mask & GL_GREEN_BIT_ATI)
+	    dst[1] = src[1];
+
+	 if (mask & GL_BLUE_BIT_ATI)
+	    dst[2] = src[2];
+      }
+      else {
+	 for (i = 0; i < 3; i++)
+	    dst[i] = src[i];
+      }
+   }
+   else
+      dst[3] = src[3];
+}
+
+static void
+finish_pass(struct atifs_machine *machine)
+{
+   GLint i;
+
+   for (i = 0; i < 6; i++) {
+      COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
+   }
+}
+
+/**
+ * Execute the given fragment shader
+ * NOTE: we do everything in single-precision floating point; we don't
+ * currently observe the single/half/fixed-precision qualifiers.
+ * \param ctx - rendering context
+ * \param program - the fragment program to execute
+ * \param machine - machine state (register file)
+ * \param maxInst - max number of instructions to execute
+ * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
+ */
+
+struct ati_fs_opcode_st ati_fs_opcodes[] = {
+   {GL_ADD_ATI, 2},
+   {GL_SUB_ATI, 2},
+   {GL_MUL_ATI, 2},
+   {GL_MAD_ATI, 3},
+   {GL_LERP_ATI, 3},
+   {GL_MOV_ATI, 1},
+   {GL_CND_ATI, 3},
+   {GL_CND0_ATI, 3},
+   {GL_DOT2_ADD_ATI, 3},
+   {GL_DOT3_ATI, 2},
+   {GL_DOT4_ATI, 2}
+};
+
+
+
+static void
+handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
+	       const struct sw_span *span, GLuint column, GLuint idx)
+{
+   GLuint swizzle = texinst->swizzle;
+   GLuint pass_tex = texinst->src;
+
+   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
+      pass_tex -= GL_TEXTURE0_ARB;
+      COPY_4V(machine->Registers[idx],
+	      span->array->texcoords[pass_tex][column]);
+   }
+   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
+      pass_tex -= GL_REG_0_ATI;
+      COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
+   }
+   apply_swizzle(machine->Registers[idx], swizzle);
+
+}
+
+static void
+handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
+		 struct atifs_setupinst *texinst, const struct sw_span *span,
+		 GLuint column, GLuint idx)
+{
+/* sample from unit idx using texinst->src as coords */
+   GLuint swizzle = texinst->swizzle;
+   GLuint coord_source = texinst->src;
+   GLfloat tex_coords[4];
+
+   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
+      coord_source -= GL_TEXTURE0_ARB;
+      COPY_4V(tex_coords, span->array->texcoords[coord_source][column]);
+   }
+   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
+      coord_source -= GL_REG_0_ATI;
+      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
+   }
+   apply_swizzle(tex_coords, swizzle);
+   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
+}
+
+#define SETUP_SRC_REG(optype, i, x)		\
+do {						\
+   COPY_4V(src[optype][i], x); 			\
+} while (0)
+
+static GLboolean
+execute_shader(GLcontext * ctx,
+	       const struct ati_fragment_shader *shader, GLuint maxInst,
+	       struct atifs_machine *machine, const struct sw_span *span,
+	       GLuint column)
+{
+   GLuint pc;
+   struct atifs_instruction *inst;
+   struct atifs_setupinst *texinst;
+   GLint optype;
+   GLint i, j, pass;
+   GLint dstreg;
+   GLfloat src[2][3][4];
+   GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
+   GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
+   GLfloat dst[2][4], *dstp;
+
+   for (pass = 0; pass < shader->NumPasses; pass++) {
+      if (pass > 0)
+	 finish_pass(machine);
+      for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
+	 texinst = &shader->SetupInst[pass][j];
+	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
+	    handle_pass_op(machine, texinst, span, column, j);
+	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
+	    handle_sample_op(ctx, machine, texinst, span, column, j);
+      }
+
+      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
+	 inst = &shader->Instructions[pass][pc];
+
+	 /* setup the source registers for color and alpha ops */
+	 for (optype = 0; optype < 2; optype++) {
+	    for (i = 0; i < inst->ArgCount[optype]; i++) {
+	       GLint index = inst->SrcReg[optype][i].Index;
+
+	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
+		  SETUP_SRC_REG(optype, i,
+				machine->Registers[index - GL_REG_0_ATI]);
+	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
+		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
+		     SETUP_SRC_REG(optype, i,
+				shader->Constants[index - GL_CON_0_ATI]);
+		  } else {
+		     SETUP_SRC_REG(optype, i,
+				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
+		  }
+	       }
+	       else if (index == GL_ONE)
+		  SETUP_SRC_REG(optype, i, ones);
+	       else if (index == GL_ZERO)
+		  SETUP_SRC_REG(optype, i, zeros);
+	       else if (index == GL_PRIMARY_COLOR_EXT)
+		  SETUP_SRC_REG(optype, i,
+				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
+	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
+		  SETUP_SRC_REG(optype, i,
+				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
+
+	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
+			     src[optype][i]);
+	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
+			     src[optype][i]);
+	    }
+	 }
+
+	 /* Execute the operations - color then alpha */
+	 for (optype = 0; optype < 2; optype++) {
+	    if (inst->Opcode[optype]) {
+	       switch (inst->Opcode[optype]) {
+	       case GL_ADD_ATI:
+		  if (!optype)
+		     for (i = 0; i < 3; i++) {
+			dst[optype][i] =
+			   src[optype][0][i] + src[optype][1][i];
+		     }
+		  else
+		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
+		  break;
+	       case GL_SUB_ATI:
+		  if (!optype)
+		     for (i = 0; i < 3; i++) {
+			dst[optype][i] =
+			   src[optype][0][i] - src[optype][1][i];
+		     }
+		  else
+		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
+		  break;
+	       case GL_MUL_ATI:
+		  if (!optype)
+		     for (i = 0; i < 3; i++) {
+			dst[optype][i] =
+			   src[optype][0][i] * src[optype][1][i];
+		     }
+		  else
+		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
+		  break;
+	       case GL_MAD_ATI:
+		  if (!optype)
+		     for (i = 0; i < 3; i++) {
+			dst[optype][i] =
+			   src[optype][0][i] * src[optype][1][i] +
+			   src[optype][2][i];
+		     }
+		  else
+		     dst[optype][3] =
+			src[optype][0][3] * src[optype][1][3] +
+			src[optype][2][3];
+		  break;
+	       case GL_LERP_ATI:
+		  if (!optype)
+		     for (i = 0; i < 3; i++) {
+			dst[optype][i] =
+			   src[optype][0][i] * src[optype][1][i] + (1 -
+								    src
+								    [optype]
+								    [0][i]) *
+			   src[optype][2][i];
+		     }
+		  else
+		     dst[optype][3] =
+			src[optype][0][3] * src[optype][1][3] + (1 -
+								 src[optype]
+								 [0][3]) *
+			src[optype][2][3];
+		  break;
+
+	       case GL_MOV_ATI:
+		  if (!optype)
+		     for (i = 0; i < 3; i++) {
+			dst[optype][i] = src[optype][0][i];
+		     }
+		  else
+		     dst[optype][3] = src[optype][0][3];
+		  break;
+	       case GL_CND_ATI:
+		  if (!optype) {
+		     for (i = 0; i < 3; i++) {
+			dst[optype][i] =
+			   (src[optype][2][i] >
+			    0.5) ? src[optype][0][i] : src[optype][1][i];
+		     }
+		  }
+		  else {
+		     dst[optype][3] =
+			(src[optype][2][3] >
+			 0.5) ? src[optype][0][3] : src[optype][1][3];
+		  }
+		  break;
+
+	       case GL_CND0_ATI:
+		  if (!optype)
+		     for (i = 0; i < 3; i++) {
+			dst[optype][i] =
+			   (src[optype][2][i] >=
+			    0) ? src[optype][0][i] : src[optype][1][i];
+		     }
+		  else {
+		     dst[optype][3] =
+			(src[optype][2][3] >=
+			 0) ? src[optype][0][3] : src[optype][1][3];
+		  }
+		  break;
+	       case GL_DOT2_ADD_ATI:
+		  {
+		     GLfloat result;
+
+		     /* DOT 2 always uses the source from the color op */
+		     /* could save recalculation of dot products for alpha inst */
+		     result = src[0][0][0] * src[0][1][0] +
+			src[0][0][1] * src[0][1][1] + src[0][2][2];
+		     if (!optype) {
+			for (i = 0; i < 3; i++) {
+			   dst[optype][i] = result;
+			}
+		     }
+		     else
+			dst[optype][3] = result;
+		  }
+		  break;
+	       case GL_DOT3_ATI:
+		  {
+		     GLfloat result;
+
+		     /* DOT 3 always uses the source from the color op */
+		     result = src[0][0][0] * src[0][1][0] +
+			src[0][0][1] * src[0][1][1] +
+			src[0][0][2] * src[0][1][2];
+
+		     if (!optype) {
+			for (i = 0; i < 3; i++) {
+			   dst[optype][i] = result;
+			}
+		     }
+		     else
+			dst[optype][3] = result;
+		  }
+		  break;
+	       case GL_DOT4_ATI:
+		  {
+		     GLfloat result;
+
+		     /* DOT 4 always uses the source from the color op */
+		     result = src[0][0][0] * src[0][1][0] +
+			src[0][0][1] * src[0][1][1] +
+			src[0][0][2] * src[0][1][2] +
+			src[0][0][3] * src[0][1][3];
+		     if (!optype) {
+			for (i = 0; i < 3; i++) {
+			   dst[optype][i] = result;
+			}
+		     }
+		     else
+			dst[optype][3] = result;
+		  }
+		  break;
+
+	       }
+	    }
+	 }
+
+	 /* write out the destination registers */
+	 for (optype = 0; optype < 2; optype++) {
+	    if (inst->Opcode[optype]) {
+	       dstreg = inst->DstReg[optype].Index;
+	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
+
+	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
+		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
+	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
+			      inst->DstReg[optype].dstMask, dst[optype],
+			      dstp);
+	       else
+		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
+	    }
+	 }
+      }
+   }
+   return GL_TRUE;
+}
+
+static void
+init_machine(GLcontext * ctx, struct atifs_machine *machine,
+	     const struct ati_fragment_shader *shader,
+	     const struct sw_span *span, GLuint col)
+{
+   GLint i, j;
+
+   for (i = 0; i < 6; i++) {
+      for (j = 0; j < 4; j++)
+	 ctx->ATIFragmentShader.Machine.Registers[i][j] = 0.0;
+   }
+
+   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][0] =
+      CHAN_TO_FLOAT(span->array->rgba[col][0]);
+   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][1] =
+      CHAN_TO_FLOAT(span->array->rgba[col][1]);
+   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][2] =
+      CHAN_TO_FLOAT(span->array->rgba[col][2]);
+   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][3] =
+      CHAN_TO_FLOAT(span->array->rgba[col][3]);
+
+   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][0] =
+      CHAN_TO_FLOAT(span->array->spec[col][0]);
+   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][1] =
+      CHAN_TO_FLOAT(span->array->spec[col][1]);
+   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][2] =
+      CHAN_TO_FLOAT(span->array->spec[col][2]);
+   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][3] =
+      CHAN_TO_FLOAT(span->array->spec[col][3]);
+}
+
+
+
+/**
+ * Execute the current fragment program, operating on the given span.
+ */
+void
+_swrast_exec_fragment_shader(GLcontext * ctx, struct sw_span *span)
+{
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint i;
+
+   ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
+
+   for (i = 0; i < span->end; i++) {
+      if (span->array->mask[i]) {
+	 init_machine(ctx, &ctx->ATIFragmentShader.Machine,
+		      ctx->ATIFragmentShader.Current, span, i);
+	 /* can't really happen... */
+	 if (!execute_shader(ctx, shader, ~0,
+			    &ctx->ATIFragmentShader.Machine, span, i)) {
+	    span->array->mask[i] = GL_FALSE;
+            span->writeAll = GL_FALSE;
+	 }
+
+	 {
+	    const GLfloat *colOut =
+	       ctx->ATIFragmentShader.Machine.Registers[0];
+
+	    /*fprintf(stderr,"outputs %f %f %f %f\n", colOut[0], colOut[1], colOut[2], colOut[3]); */
+	    UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
+	    UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
+	    UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
+	    UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
+	 }
+      }
+   }
+
+   ctx->_CurrentProgram = 0;
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_atifragshader.h b/dist/Mesa/src/mesa/swrast/s_atifragshader.h
new file mode 100644
index 000000000..269be1f2e
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_atifragshader.h
@@ -0,0 +1,37 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.1
+ *
+ * Copyright (C) 1999-2003  David Airlie   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_ATIFRAGSHADER_H
+#define S_ATIFRAGSHADER_H
+
+
+#include "s_context.h"
+
+
+extern void
+_swrast_exec_fragment_shader( GLcontext *ctx, struct sw_span *span );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_bitmap.c b/dist/Mesa/src/mesa/swrast/s_bitmap.c
new file mode 100644
index 000000000..dd5008cf6
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_bitmap.c
@@ -0,0 +1,282 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file swrast/s_bitmap.c
+ * \brief glBitmap rendering.
+ * \author Brian Paul
+ */
+
+#include "glheader.h"
+#include "bufferobj.h"
+#include "image.h"
+#include "macros.h"
+#include "pixel.h"
+
+#include "s_context.h"
+#include "s_span.h"
+
+
+
+/*
+ * Render a bitmap.
+ */
+void
+_swrast_Bitmap( GLcontext *ctx, GLint px, GLint py,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLint row, col;
+   GLuint count = 0;
+   struct sw_span span;
+
+   ASSERT(ctx->RenderMode == GL_RENDER);
+
+   if (unpack->BufferObj->Name) {
+      /* unpack from PBO */
+      GLubyte *buf;
+      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                     GL_COLOR_INDEX, GL_BITMAP,
+                                     (GLvoid *) bitmap)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
+         return;
+      }
+      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                                              GL_READ_ONLY_ARB,
+                                              unpack->BufferObj);
+      if (!buf) {
+         /* buffer is already mapped - that's an error */
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
+         return;
+      }
+      bitmap = ADD_POINTERS(buf, bitmap);
+   }
+
+   RENDER_START(swrast,ctx);
+
+   if (SWRAST_CONTEXT(ctx)->NewState)
+      _swrast_validate_derived( ctx );
+
+   INIT_SPAN(span, GL_BITMAP, width, 0, SPAN_XY);
+
+   if (ctx->Visual.rgbMode) {
+      span.interpMask |= SPAN_RGBA;
+      span.red   = FloatToFixed(ctx->Current.RasterColor[0] * CHAN_MAXF);
+      span.green = FloatToFixed(ctx->Current.RasterColor[1] * CHAN_MAXF);
+      span.blue  = FloatToFixed(ctx->Current.RasterColor[2] * CHAN_MAXF);
+      span.alpha = FloatToFixed(ctx->Current.RasterColor[3] * CHAN_MAXF);
+      span.redStep = span.greenStep = span.blueStep = span.alphaStep = 0;
+   }
+   else {
+      span.interpMask |= SPAN_INDEX;
+      span.index = FloatToFixed(ctx->Current.RasterIndex);
+      span.indexStep = 0;
+   }
+
+   if (ctx->Depth.Test)
+      _swrast_span_default_z(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+   if (ctx->Texture._EnabledCoordUnits)
+      _swrast_span_default_texcoords(ctx, &span);
+
+   for (row = 0; row < height; row++) {
+      const GLubyte *src = (const GLubyte *) _mesa_image_address2d(unpack,
+                 bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0);
+
+      if (unpack->LsbFirst) {
+         /* Lsb first */
+         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
+         for (col = 0; col < width; col++) {
+            if (*src & mask) {
+               span.array->x[count] = px + col;
+               span.array->y[count] = py + row;
+               count++;
+            }
+            if (mask == 128U) {
+               src++;
+               mask = 1U;
+            }
+            else {
+               mask = mask << 1;
+            }
+         }
+
+         /* get ready for next row */
+         if (mask != 1)
+            src++;
+      }
+      else {
+         /* Msb first */
+         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+         for (col = 0; col < width; col++) {
+            if (*src & mask) {
+               span.array->x[count] = px + col;
+               span.array->y[count] = py + row;
+               count++;
+            }
+            if (mask == 1U) {
+               src++;
+               mask = 128U;
+            }
+            else {
+               mask = mask >> 1;
+            }
+         }
+
+         /* get ready for next row */
+         if (mask != 128)
+            src++;
+      }
+
+      if (count + width >= MAX_WIDTH || row + 1 == height) {
+         /* flush the span */
+         span.end = count;
+         if (ctx->Visual.rgbMode)
+            _swrast_write_rgba_span(ctx, &span);
+         else
+            _swrast_write_index_span(ctx, &span);
+         span.end = 0;
+         count = 0;
+      }
+   }
+
+   RENDER_FINISH(swrast,ctx);
+
+   if (unpack->BufferObj->Name) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                              unpack->BufferObj);
+   }
+}
+
+
+#if 0
+/*
+ * XXX this is another way to implement Bitmap.  Use horizontal runs of
+ * fragments, initializing the mask array to indicate which fragmens to
+ * draw or skip.
+ */
+void
+_swrast_Bitmap( GLcontext *ctx, GLint px, GLint py,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLint row, col;
+   struct sw_span span;
+
+   ASSERT(ctx->RenderMode == GL_RENDER);
+   ASSERT(bitmap);
+
+   RENDER_START(swrast,ctx);
+
+   if (SWRAST_CONTEXT(ctx)->NewState)
+      _swrast_validate_derived( ctx );
+
+   INIT_SPAN(span, GL_BITMAP, width, 0, SPAN_MASK);
+
+   /*span.arrayMask |= SPAN_MASK;*/  /* we'll init span.mask[] */
+   span.x = px;
+   span.y = py;
+   /*span.end = width;*/
+   if (ctx->Visual.rgbMode) {
+      span.interpMask |= SPAN_RGBA;
+      span.red   = FloatToFixed(ctx->Current.RasterColor[0] * CHAN_MAXF);
+      span.green = FloatToFixed(ctx->Current.RasterColor[1] * CHAN_MAXF);
+      span.blue  = FloatToFixed(ctx->Current.RasterColor[2] * CHAN_MAXF);
+      span.alpha = FloatToFixed(ctx->Current.RasterColor[3] * CHAN_MAXF);
+      span.redStep = span.greenStep = span.blueStep = span.alphaStep = 0;
+   }
+   else {
+      span.interpMask |= SPAN_INDEX;
+      span.index = FloatToFixed(ctx->Current.RasterIndex);
+      span.indexStep = 0;
+   }
+
+   if (ctx->Depth.Test)
+      _swrast_span_default_z(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+   if (ctx->Texture._EnabledCoordUnits)
+      _swrast_span_default_texcoords(ctx, &span);
+
+   for (row=0; row<height; row++, span.y++) {
+      const GLubyte *src = (const GLubyte *) _mesa_image_address2d(unpack,
+                 bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0);
+
+      if (unpack->LsbFirst) {
+         /* Lsb first */
+         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            span.array->mask[col] = (*src & mask) ? GL_TRUE : GL_FALSE;
+            if (mask == 128U) {
+               src++;
+               mask = 1U;
+            }
+            else {
+               mask = mask << 1;
+            }
+         }
+
+         if (ctx->Visual.rgbMode)
+            _swrast_write_rgba_span(ctx, &span);
+         else
+	    _swrast_write_index_span(ctx, &span);
+
+         /* get ready for next row */
+         if (mask != 1)
+            src++;
+      }
+      else {
+         /* Msb first */
+         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            span.array->mask[col] = (*src & mask) ? GL_TRUE : GL_FALSE;
+            if (mask == 1U) {
+               src++;
+               mask = 128U;
+            }
+            else {
+               mask = mask >> 1;
+            }
+         }
+
+         if (ctx->Visual.rgbMode)
+            _swrast_write_rgba_span(ctx, &span);
+         else
+            _swrast_write_index_span(ctx, &span);
+
+         /* get ready for next row */
+         if (mask != 128)
+            src++;
+      }
+   }
+
+   RENDER_FINISH(swrast,ctx);
+}
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_blend.c b/dist/Mesa/src/mesa/swrast/s_blend.c
new file mode 100644
index 000000000..d94ff3923
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_blend.c
@@ -0,0 +1,872 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Regarding GL_NV_blend_square:
+ *
+ * Portions of this software may use or implement intellectual
+ * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
+ * any and all warranties with respect to such intellectual property,
+ * including any use thereof or modifications thereto.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "macros.h"
+
+#include "s_blend.h"
+#include "s_context.h"
+#include "s_span.h"
+
+
+#if defined(USE_MMX_ASM)
+#include "x86/mmx.h"
+#include "x86/common_x86_asm.h"
+#define _BLENDAPI _ASMAPI
+#else
+#define _BLENDAPI
+#endif
+
+
+/*
+ * Special case for glBlendFunc(GL_ZERO, GL_ONE)
+ */
+static void _BLENDAPI
+blend_noop( GLcontext *ctx, GLuint n, const GLubyte mask[],
+            GLchan rgba[][4], CONST GLchan dest[][4] )
+{
+   GLuint i;
+   ASSERT(ctx->Color.BlendEquationRGB==GL_FUNC_ADD);
+   ASSERT(ctx->Color.BlendEquationA==GL_FUNC_ADD);
+   ASSERT(ctx->Color.BlendSrcRGB==GL_ZERO);
+   ASSERT(ctx->Color.BlendDstRGB==GL_ONE);
+   (void) ctx;
+
+   for (i = 0; i < n; i++) {
+      if (mask[i]) {
+         COPY_CHAN4( rgba[i], dest[i] );
+      }
+   }
+}
+
+
+/*
+ * Special case for glBlendFunc(GL_ONE, GL_ZERO)
+ */
+static void _BLENDAPI
+blend_replace( GLcontext *ctx, GLuint n, const GLubyte mask[],
+               GLchan rgba[][4], CONST GLchan dest[][4] )
+{
+   ASSERT(ctx->Color.BlendEquationRGB==GL_FUNC_ADD);
+   ASSERT(ctx->Color.BlendEquationA==GL_FUNC_ADD);
+   ASSERT(ctx->Color.BlendSrcRGB==GL_ONE);
+   ASSERT(ctx->Color.BlendDstRGB==GL_ZERO);
+   (void) ctx;
+   (void) n;
+   (void) mask;
+   (void) rgba;
+   (void) dest;
+}
+
+
+/*
+ * Common transparency blending mode.
+ */
+static void _BLENDAPI
+blend_transparency( GLcontext *ctx, GLuint n, const GLubyte mask[],
+                    GLchan rgba[][4], CONST GLchan dest[][4] )
+{
+   GLuint i;
+   ASSERT(ctx->Color.BlendEquationRGB==GL_FUNC_ADD);
+   ASSERT(ctx->Color.BlendEquationA==GL_FUNC_ADD);
+   ASSERT(ctx->Color.BlendSrcRGB==GL_SRC_ALPHA);
+   ASSERT(ctx->Color.BlendDstRGB==GL_ONE_MINUS_SRC_ALPHA);
+   (void) ctx;
+
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         const GLchan t = rgba[i][ACOMP];  /* t in [0, CHAN_MAX] */
+         if (t == 0) {
+            /* 0% alpha */
+            rgba[i][RCOMP] = dest[i][RCOMP];
+            rgba[i][GCOMP] = dest[i][GCOMP];
+            rgba[i][BCOMP] = dest[i][BCOMP];
+            rgba[i][ACOMP] = dest[i][ACOMP];
+         }
+         else if (t == CHAN_MAX) {
+            /* 100% alpha, no-op */
+         }
+         else {
+#if 0
+            /* This is pretty close, but Glean complains */
+            const GLint s = CHAN_MAX - t;
+            const GLint r = (rgba[i][RCOMP] * t + dest[i][RCOMP] * s + 1) >> 8;
+            const GLint g = (rgba[i][GCOMP] * t + dest[i][GCOMP] * s + 1) >> 8;
+            const GLint b = (rgba[i][BCOMP] * t + dest[i][BCOMP] * s + 1) >> 8;
+            const GLint a = (rgba[i][ACOMP] * t + dest[i][ACOMP] * s + 1) >> 8;
+#elif 0
+            /* This is slower but satisfies Glean */
+            const GLint s = CHAN_MAX - t;
+            const GLint r = (rgba[i][RCOMP] * t + dest[i][RCOMP] * s) / 255;
+            const GLint g = (rgba[i][GCOMP] * t + dest[i][GCOMP] * s) / 255;
+            const GLint b = (rgba[i][BCOMP] * t + dest[i][BCOMP] * s) / 255;
+            const GLint a = (rgba[i][ACOMP] * t + dest[i][ACOMP] * s) / 255;
+#else
+#if CHAN_BITS == 8
+            /* This satisfies Glean and should be reasonably fast */
+            /* Contributed by Nathan Hand */
+#if 0
+#define DIV255(X)  (((X) << 8) + (X) + 256) >> 16
+#else
+	    GLint temp;
+#define DIV255(X)  (temp = (X), ((temp << 8) + temp + 256) >> 16)
+#endif
+            const GLint r = DIV255((rgba[i][RCOMP] - dest[i][RCOMP]) * t) + dest[i][RCOMP];
+            const GLint g = DIV255((rgba[i][GCOMP] - dest[i][GCOMP]) * t) + dest[i][GCOMP];
+            const GLint b = DIV255((rgba[i][BCOMP] - dest[i][BCOMP]) * t) + dest[i][BCOMP];
+            const GLint a = DIV255((rgba[i][ACOMP] - dest[i][ACOMP]) * t) + dest[i][ACOMP]; 
+
+#undef DIV255
+#elif CHAN_BITS == 16
+            const GLfloat tt = (GLfloat) t / CHAN_MAXF;
+            const GLint r = (GLint) ((rgba[i][RCOMP] - dest[i][RCOMP]) * tt + dest[i][RCOMP]);
+            const GLint g = (GLint) ((rgba[i][GCOMP] - dest[i][GCOMP]) * tt + dest[i][GCOMP]);
+            const GLint b = (GLint) ((rgba[i][BCOMP] - dest[i][BCOMP]) * tt + dest[i][BCOMP]);
+            const GLint a = (GLint) ((rgba[i][ACOMP] - dest[i][ACOMP]) * tt + dest[i][ACOMP]);
+#else /* CHAN_BITS == 32 */
+            const GLfloat tt = (GLfloat) t / CHAN_MAXF;
+            const GLfloat r = (rgba[i][RCOMP] - dest[i][RCOMP]) * tt + dest[i][RCOMP];
+            const GLfloat g = (rgba[i][GCOMP] - dest[i][GCOMP]) * tt + dest[i][GCOMP];
+            const GLfloat b = (rgba[i][BCOMP] - dest[i][BCOMP]) * tt + dest[i][BCOMP];
+            const GLfloat a = CLAMP( rgba[i][ACOMP], 0.0F, CHAN_MAXF ) * t +
+                              CLAMP( dest[i][ACOMP], 0.0F, CHAN_MAXF ) * (1.0F - t);
+#endif
+#endif
+            ASSERT(r <= CHAN_MAX);
+            ASSERT(g <= CHAN_MAX);
+            ASSERT(b <= CHAN_MAX);
+            ASSERT(a <= CHAN_MAX);
+            rgba[i][RCOMP] = (GLchan) r;
+            rgba[i][GCOMP] = (GLchan) g;
+            rgba[i][BCOMP] = (GLchan) b;
+            rgba[i][ACOMP] = (GLchan) a;
+         }
+      }
+   }
+}
+
+
+
+/*
+ * Add src and dest.
+ */
+static void _BLENDAPI
+blend_add( GLcontext *ctx, GLuint n, const GLubyte mask[],
+           GLchan rgba[][4], CONST GLchan dest[][4] )
+{
+   GLuint i;
+   ASSERT(ctx->Color.BlendEquationRGB==GL_FUNC_ADD);
+   ASSERT(ctx->Color.BlendEquationA==GL_FUNC_ADD);
+   ASSERT(ctx->Color.BlendSrcRGB==GL_ONE);
+   ASSERT(ctx->Color.BlendDstRGB==GL_ONE);
+   (void) ctx;
+
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+#if CHAN_TYPE == GL_FLOAT
+         /* don't RGB clamp to max */
+         GLfloat a = CLAMP(rgba[i][ACOMP], 0.0F, CHAN_MAXF) + dest[i][ACOMP];
+         rgba[i][RCOMP] += dest[i][RCOMP];
+         rgba[i][GCOMP] += dest[i][GCOMP];
+         rgba[i][BCOMP] += dest[i][BCOMP];
+         rgba[i][ACOMP] = (GLchan) MIN2( a, CHAN_MAXF );
+#else
+         GLint r = rgba[i][RCOMP] + dest[i][RCOMP];
+         GLint g = rgba[i][GCOMP] + dest[i][GCOMP];
+         GLint b = rgba[i][BCOMP] + dest[i][BCOMP];
+         GLint a = rgba[i][ACOMP] + dest[i][ACOMP];
+         rgba[i][RCOMP] = (GLchan) MIN2( r, CHAN_MAX );
+         rgba[i][GCOMP] = (GLchan) MIN2( g, CHAN_MAX );
+         rgba[i][BCOMP] = (GLchan) MIN2( b, CHAN_MAX );
+         rgba[i][ACOMP] = (GLchan) MIN2( a, CHAN_MAX );
+#endif
+      }
+   }
+}
+
+
+
+/*
+ * Blend min function  (for GL_EXT_blend_minmax)
+ */
+static void _BLENDAPI
+blend_min( GLcontext *ctx, GLuint n, const GLubyte mask[],
+           GLchan rgba[][4], CONST GLchan dest[][4] )
+{
+   GLuint i;
+   ASSERT(ctx->Color.BlendEquationRGB==GL_MIN);
+   ASSERT(ctx->Color.BlendEquationA==GL_MIN);
+   (void) ctx;
+
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         rgba[i][RCOMP] = (GLchan) MIN2( rgba[i][RCOMP], dest[i][RCOMP] );
+         rgba[i][GCOMP] = (GLchan) MIN2( rgba[i][GCOMP], dest[i][GCOMP] );
+         rgba[i][BCOMP] = (GLchan) MIN2( rgba[i][BCOMP], dest[i][BCOMP] );
+#if CHAN_TYPE == GL_FLOAT
+         rgba[i][ACOMP] = (GLchan) MIN2(CLAMP(rgba[i][ACOMP], 0.0F, CHAN_MAXF),
+                                        dest[i][ACOMP]);
+#else
+         rgba[i][ACOMP] = (GLchan) MIN2( rgba[i][ACOMP], dest[i][ACOMP] );
+#endif
+      }
+   }
+}
+
+
+
+/*
+ * Blend max function  (for GL_EXT_blend_minmax)
+ */
+static void _BLENDAPI
+blend_max( GLcontext *ctx, GLuint n, const GLubyte mask[],
+           GLchan rgba[][4], CONST GLchan dest[][4] )
+{
+   GLuint i;
+   ASSERT(ctx->Color.BlendEquationRGB==GL_MAX);
+   ASSERT(ctx->Color.BlendEquationA==GL_MAX);
+   (void) ctx;
+
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         rgba[i][RCOMP] = (GLchan) MAX2( rgba[i][RCOMP], dest[i][RCOMP] );
+         rgba[i][GCOMP] = (GLchan) MAX2( rgba[i][GCOMP], dest[i][GCOMP] );
+         rgba[i][BCOMP] = (GLchan) MAX2( rgba[i][BCOMP], dest[i][BCOMP] );
+#if CHAN_TYPE == GL_FLOAT
+         rgba[i][ACOMP] = (GLchan) MAX2(CLAMP(rgba[i][ACOMP], 0.0F, CHAN_MAXF),
+                                        dest[i][ACOMP]);
+#else
+         rgba[i][ACOMP] = (GLchan) MAX2( rgba[i][ACOMP], dest[i][ACOMP] );
+#endif
+      }
+   }
+}
+
+
+
+/*
+ * Modulate:  result = src * dest
+ */
+static void _BLENDAPI
+blend_modulate( GLcontext *ctx, GLuint n, const GLubyte mask[],
+                GLchan rgba[][4], CONST GLchan dest[][4] )
+{
+   GLuint i;
+   (void) ctx;
+
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+#if CHAN_TYPE == GL_FLOAT
+         rgba[i][RCOMP] = rgba[i][RCOMP] * dest[i][RCOMP];
+         rgba[i][GCOMP] = rgba[i][GCOMP] * dest[i][GCOMP];
+         rgba[i][BCOMP] = rgba[i][BCOMP] * dest[i][BCOMP];
+         rgba[i][ACOMP] = rgba[i][ACOMP] * dest[i][ACOMP];
+#elif CHAN_TYPE == GL_UNSIGNED_SHORT
+         GLint r = (rgba[i][RCOMP] * dest[i][RCOMP] + 65535) >> 16;
+         GLint g = (rgba[i][GCOMP] * dest[i][GCOMP] + 65535) >> 16;
+         GLint b = (rgba[i][BCOMP] * dest[i][BCOMP] + 65535) >> 16;
+         GLint a = (rgba[i][ACOMP] * dest[i][ACOMP] + 65535) >> 16;
+         rgba[i][RCOMP] = (GLchan) r;
+         rgba[i][GCOMP] = (GLchan) g;
+         rgba[i][BCOMP] = (GLchan) b;
+         rgba[i][ACOMP] = (GLchan) a;
+#else
+         GLint r = (rgba[i][RCOMP] * dest[i][RCOMP] + 255) >> 8;
+         GLint g = (rgba[i][GCOMP] * dest[i][GCOMP] + 255) >> 8;
+         GLint b = (rgba[i][BCOMP] * dest[i][BCOMP] + 255) >> 8;
+         GLint a = (rgba[i][ACOMP] * dest[i][ACOMP] + 255) >> 8;
+         rgba[i][RCOMP] = (GLchan) r;
+         rgba[i][GCOMP] = (GLchan) g;
+         rgba[i][BCOMP] = (GLchan) b;
+         rgba[i][ACOMP] = (GLchan) a;
+#endif
+      }
+   }
+}
+
+
+
+/*
+ * General case blend pixels.
+ * Input:  n - number of pixels
+ *         mask - the usual write mask
+ * In/Out:  rgba - the incoming and modified pixels
+ * Input:  dest - the pixels from the dest color buffer
+ */
+static void _BLENDAPI
+blend_general( GLcontext *ctx, GLuint n, const GLubyte mask[],
+               GLchan rgba[][4], CONST GLchan dest[][4] )
+{
+   const GLfloat rscale = 1.0F / CHAN_MAXF;
+   const GLfloat gscale = 1.0F / CHAN_MAXF;
+   const GLfloat bscale = 1.0F / CHAN_MAXF;
+   const GLfloat ascale = 1.0F / CHAN_MAXF;
+   GLuint i;
+
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+#if CHAN_TYPE == GL_FLOAT
+         GLfloat Rs, Gs, Bs, As;  /* Source colors */
+         GLfloat Rd, Gd, Bd, Ad;  /* Dest colors */
+#else
+         GLint Rs, Gs, Bs, As;  /* Source colors */
+         GLint Rd, Gd, Bd, Ad;  /* Dest colors */
+#endif
+         GLfloat sR, sG, sB, sA;  /* Source scaling */
+         GLfloat dR, dG, dB, dA;  /* Dest scaling */
+         GLfloat r, g, b, a;      /* result color */
+
+         /* Incoming/source Color */
+         Rs = rgba[i][RCOMP];
+         Gs = rgba[i][GCOMP];
+         Bs = rgba[i][BCOMP];
+         As = rgba[i][ACOMP];
+#if CHAN_TYPE == GL_FLOAT
+         /* clamp */
+         Rs = MIN2(Rs, CHAN_MAXF);
+         Gs = MIN2(Gs, CHAN_MAXF);
+         Bs = MIN2(Bs, CHAN_MAXF);
+         As = MIN2(As, CHAN_MAXF);
+#endif
+
+         /* Frame buffer/dest color */
+         Rd = dest[i][RCOMP];
+         Gd = dest[i][GCOMP];
+         Bd = dest[i][BCOMP];
+         Ad = dest[i][ACOMP];
+#if CHAN_TYPE == GL_FLOAT
+         /* clamp */
+         Rd = MIN2(Rd, CHAN_MAXF);
+         Gd = MIN2(Gd, CHAN_MAXF);
+         Bd = MIN2(Bd, CHAN_MAXF);
+         Ad = MIN2(Ad, CHAN_MAXF);
+#endif
+
+         /* Source RGB factor */
+         switch (ctx->Color.BlendSrcRGB) {
+            case GL_ZERO:
+               sR = sG = sB = 0.0F;
+               break;
+            case GL_ONE:
+               sR = sG = sB = 1.0F;
+               break;
+            case GL_DST_COLOR:
+               sR = (GLfloat) Rd * rscale;
+               sG = (GLfloat) Gd * gscale;
+               sB = (GLfloat) Bd * bscale;
+               break;
+            case GL_ONE_MINUS_DST_COLOR:
+               sR = 1.0F - (GLfloat) Rd * rscale;
+               sG = 1.0F - (GLfloat) Gd * gscale;
+               sB = 1.0F - (GLfloat) Bd * bscale;
+               break;
+            case GL_SRC_ALPHA:
+               sR = sG = sB = (GLfloat) As * ascale;
+               break;
+            case GL_ONE_MINUS_SRC_ALPHA:
+               sR = sG = sB = 1.0F - (GLfloat) As * ascale;
+               break;
+            case GL_DST_ALPHA:
+               sR = sG = sB = (GLfloat) Ad * ascale;
+               break;
+            case GL_ONE_MINUS_DST_ALPHA:
+               sR = sG = sB = 1.0F - (GLfloat) Ad * ascale;
+               break;
+            case GL_SRC_ALPHA_SATURATE:
+               if (As < CHAN_MAX - Ad) {
+                  sR = sG = sB = (GLfloat) As * ascale;
+               }
+               else {
+                  sR = sG = sB = 1.0F - (GLfloat) Ad * ascale;
+               }
+               break;
+            case GL_CONSTANT_COLOR:
+               sR = ctx->Color.BlendColor[0];
+               sG = ctx->Color.BlendColor[1];
+               sB = ctx->Color.BlendColor[2];
+               break;
+            case GL_ONE_MINUS_CONSTANT_COLOR:
+               sR = 1.0F - ctx->Color.BlendColor[0];
+               sG = 1.0F - ctx->Color.BlendColor[1];
+               sB = 1.0F - ctx->Color.BlendColor[2];
+               break;
+            case GL_CONSTANT_ALPHA:
+               sR = sG = sB = ctx->Color.BlendColor[3];
+               break;
+            case GL_ONE_MINUS_CONSTANT_ALPHA:
+               sR = sG = sB = 1.0F - ctx->Color.BlendColor[3];
+               break;
+            case GL_SRC_COLOR: /* GL_NV_blend_square */
+               sR = (GLfloat) Rs * rscale;
+               sG = (GLfloat) Gs * gscale;
+               sB = (GLfloat) Bs * bscale;
+               break;
+            case GL_ONE_MINUS_SRC_COLOR: /* GL_NV_blend_square */
+               sR = 1.0F - (GLfloat) Rs * rscale;
+               sG = 1.0F - (GLfloat) Gs * gscale;
+               sB = 1.0F - (GLfloat) Bs * bscale;
+               break;
+            default:
+               /* this should never happen */
+               _mesa_problem(ctx, "Bad blend source RGB factor in do_blend");
+               return;
+         }
+
+         /* Source Alpha factor */
+         switch (ctx->Color.BlendSrcA) {
+            case GL_ZERO:
+               sA = 0.0F;
+               break;
+            case GL_ONE:
+               sA = 1.0F;
+               break;
+            case GL_DST_COLOR:
+               sA = (GLfloat) Ad * ascale;
+               break;
+            case GL_ONE_MINUS_DST_COLOR:
+               sA = 1.0F - (GLfloat) Ad * ascale;
+               break;
+            case GL_SRC_ALPHA:
+               sA = (GLfloat) As * ascale;
+               break;
+            case GL_ONE_MINUS_SRC_ALPHA:
+               sA = 1.0F - (GLfloat) As * ascale;
+               break;
+            case GL_DST_ALPHA:
+               sA =(GLfloat) Ad * ascale;
+               break;
+            case GL_ONE_MINUS_DST_ALPHA:
+               sA = 1.0F - (GLfloat) Ad * ascale;
+               break;
+            case GL_SRC_ALPHA_SATURATE:
+               sA = 1.0;
+               break;
+            case GL_CONSTANT_COLOR:
+               sA = ctx->Color.BlendColor[3];
+               break;
+            case GL_ONE_MINUS_CONSTANT_COLOR:
+               sA = 1.0F - ctx->Color.BlendColor[3];
+               break;
+            case GL_CONSTANT_ALPHA:
+               sA = ctx->Color.BlendColor[3];
+               break;
+            case GL_ONE_MINUS_CONSTANT_ALPHA:
+               sA = 1.0F - ctx->Color.BlendColor[3];
+               break;
+            case GL_SRC_COLOR: /* GL_NV_blend_square */
+               sA = (GLfloat) As * ascale;
+               break;
+            case GL_ONE_MINUS_SRC_COLOR: /* GL_NV_blend_square */
+               sA = 1.0F - (GLfloat) As * ascale;
+               break;
+            default:
+               /* this should never happen */
+               sA = 0.0F;
+               _mesa_problem(ctx, "Bad blend source A factor in do_blend");
+         }
+
+         /* Dest RGB factor */
+         switch (ctx->Color.BlendDstRGB) {
+            case GL_ZERO:
+               dR = dG = dB = 0.0F;
+               break;
+            case GL_ONE:
+               dR = dG = dB = 1.0F;
+               break;
+            case GL_SRC_COLOR:
+               dR = (GLfloat) Rs * rscale;
+               dG = (GLfloat) Gs * gscale;
+               dB = (GLfloat) Bs * bscale;
+               break;
+            case GL_ONE_MINUS_SRC_COLOR:
+               dR = 1.0F - (GLfloat) Rs * rscale;
+               dG = 1.0F - (GLfloat) Gs * gscale;
+               dB = 1.0F - (GLfloat) Bs * bscale;
+               break;
+            case GL_SRC_ALPHA:
+               dR = dG = dB = (GLfloat) As * ascale;
+               break;
+            case GL_ONE_MINUS_SRC_ALPHA:
+               dR = dG = dB = 1.0F - (GLfloat) As * ascale;
+               break;
+            case GL_DST_ALPHA:
+               dR = dG = dB = (GLfloat) Ad * ascale;
+               break;
+            case GL_ONE_MINUS_DST_ALPHA:
+               dR = dG = dB = 1.0F - (GLfloat) Ad * ascale;
+               break;
+            case GL_CONSTANT_COLOR:
+               dR = ctx->Color.BlendColor[0];
+               dG = ctx->Color.BlendColor[1];
+               dB = ctx->Color.BlendColor[2];
+               break;
+            case GL_ONE_MINUS_CONSTANT_COLOR:
+               dR = 1.0F - ctx->Color.BlendColor[0];
+               dG = 1.0F - ctx->Color.BlendColor[1];
+               dB = 1.0F - ctx->Color.BlendColor[2];
+               break;
+            case GL_CONSTANT_ALPHA:
+               dR = dG = dB = ctx->Color.BlendColor[3];
+               break;
+            case GL_ONE_MINUS_CONSTANT_ALPHA:
+               dR = dG = dB = 1.0F - ctx->Color.BlendColor[3];
+               break;
+            case GL_DST_COLOR: /* GL_NV_blend_square */
+               dR = (GLfloat) Rd * rscale;
+               dG = (GLfloat) Gd * gscale;
+               dB = (GLfloat) Bd * bscale;
+               break;
+            case GL_ONE_MINUS_DST_COLOR: /* GL_NV_blend_square */
+               dR = 1.0F - (GLfloat) Rd * rscale;
+               dG = 1.0F - (GLfloat) Gd * gscale;
+               dB = 1.0F - (GLfloat) Bd * bscale;
+               break;
+            default:
+               /* this should never happen */
+               dR = dG = dB = 0.0F;
+               _mesa_problem(ctx, "Bad blend dest RGB factor in do_blend");
+         }
+
+         /* Dest Alpha factor */
+         switch (ctx->Color.BlendDstA) {
+            case GL_ZERO:
+               dA = 0.0F;
+               break;
+            case GL_ONE:
+               dA = 1.0F;
+               break;
+            case GL_SRC_COLOR:
+               dA = (GLfloat) As * ascale;
+               break;
+            case GL_ONE_MINUS_SRC_COLOR:
+               dA = 1.0F - (GLfloat) As * ascale;
+               break;
+            case GL_SRC_ALPHA:
+               dA = (GLfloat) As * ascale;
+               break;
+            case GL_ONE_MINUS_SRC_ALPHA:
+               dA = 1.0F - (GLfloat) As * ascale;
+               break;
+            case GL_DST_ALPHA:
+               dA = (GLfloat) Ad * ascale;
+               break;
+            case GL_ONE_MINUS_DST_ALPHA:
+               dA = 1.0F - (GLfloat) Ad * ascale;
+               break;
+            case GL_CONSTANT_COLOR:
+               dA = ctx->Color.BlendColor[3];
+               break;
+            case GL_ONE_MINUS_CONSTANT_COLOR:
+               dA = 1.0F - ctx->Color.BlendColor[3];
+               break;
+            case GL_CONSTANT_ALPHA:
+               dA = ctx->Color.BlendColor[3];
+               break;
+            case GL_ONE_MINUS_CONSTANT_ALPHA:
+               dA = 1.0F - ctx->Color.BlendColor[3];
+               break;
+            case GL_DST_COLOR: /* GL_NV_blend_square */
+               dA = (GLfloat) Ad * ascale;
+               break;
+            case GL_ONE_MINUS_DST_COLOR: /* GL_NV_blend_square */
+               dA = 1.0F - (GLfloat) Ad * ascale;
+               break;
+            default:
+               /* this should never happen */
+               dA = 0.0F;
+               _mesa_problem(ctx, "Bad blend dest A factor in do_blend");
+               return;
+         }
+
+         /* Due to round-off problems we have to clamp against zero. */
+         /* Optimization: we don't have to do this for all src & dst factors */
+         if (dA < 0.0F)  dA = 0.0F;
+         if (dR < 0.0F)  dR = 0.0F;
+         if (dG < 0.0F)  dG = 0.0F;
+         if (dB < 0.0F)  dB = 0.0F;
+         if (sA < 0.0F)  sA = 0.0F;
+         if (sR < 0.0F)  sR = 0.0F;
+         if (sG < 0.0F)  sG = 0.0F;
+         if (sB < 0.0F)  sB = 0.0F;
+
+         ASSERT( sR <= 1.0 );
+         ASSERT( sG <= 1.0 );
+         ASSERT( sB <= 1.0 );
+         ASSERT( sA <= 1.0 );
+         ASSERT( dR <= 1.0 );
+         ASSERT( dG <= 1.0 );
+         ASSERT( dB <= 1.0 );
+         ASSERT( dA <= 1.0 );
+
+         /* compute blended color */
+#if CHAN_TYPE == GL_FLOAT
+         if (ctx->Color.BlendEquationRGB==GL_FUNC_ADD) {
+            r = Rs * sR + Rd * dR;
+            g = Gs * sG + Gd * dG;
+            b = Bs * sB + Bd * dB;
+            a = As * sA + Ad * dA;
+         }
+         else if (ctx->Color.BlendEquationRGB==GL_FUNC_SUBTRACT) {
+            r = Rs * sR - Rd * dR;
+            g = Gs * sG - Gd * dG;
+            b = Bs * sB - Bd * dB;
+            a = As * sA - Ad * dA;
+         }
+         else if (ctx->Color.BlendEquationRGB==GL_FUNC_REVERSE_SUBTRACT) {
+            r = Rd * dR - Rs * sR;
+            g = Gd * dG - Gs * sG;
+            b = Bd * dB - Bs * sB;
+            a = Ad * dA - As * sA;
+         }
+         else if (ctx->Color.BlendEquationRGB==GL_MIN) {
+	    r = MIN2( Rd, Rs );
+	    g = MIN2( Gd, Gs );
+	    b = MIN2( Bd, Bs );
+	 }
+         else if (ctx->Color.BlendEquationRGB==GL_MAX) {
+	    r = MAX2( Rd, Rs );
+	    g = MAX2( Gd, Gs );
+	    b = MAX2( Bd, Bs );
+	 }
+         else {
+            /* should never get here */
+            r = g = b = 0.0F;  /* silence uninitialized var warning */
+            _mesa_problem(ctx, "unexpected BlendEquation in blend_general()");
+         }
+
+         if (ctx->Color.BlendEquationA==GL_FUNC_ADD) {
+            a = As * sA + Ad * dA;
+         }
+         else if (ctx->Color.BlendEquationA==GL_FUNC_SUBTRACT) {
+            a = As * sA - Ad * dA;
+         }
+         else if (ctx->Color.BlendEquationA==GL_FUNC_REVERSE_SUBTRACT) {
+            a = Ad * dA - As * sA;
+         }
+         else if (ctx->Color.BlendEquationA==GL_MIN) {
+	    a = MIN2( Ad, As );
+	 }
+         else if (ctx->Color.BlendEquationA==GL_MAX) {
+	    a = MAX2( Ad, As );
+	 }
+         else {
+            /* should never get here */
+            a = 0.0F;  /* silence uninitialized var warning */
+            _mesa_problem(ctx, "unexpected BlendEquation in blend_general()");
+         }
+
+         /* final clamping */
+         rgba[i][RCOMP] = MAX2( r, 0.0F );
+         rgba[i][GCOMP] = MAX2( g, 0.0F );
+         rgba[i][BCOMP] = MAX2( b, 0.0F );
+         rgba[i][ACOMP] = CLAMP( a, 0.0F, CHAN_MAXF );
+#else
+         if (ctx->Color.BlendEquationRGB==GL_FUNC_ADD) {
+            r = Rs * sR + Rd * dR + 0.5F;
+            g = Gs * sG + Gd * dG + 0.5F;
+            b = Bs * sB + Bd * dB + 0.5F;
+         }
+         else if (ctx->Color.BlendEquationRGB==GL_FUNC_SUBTRACT) {
+            r = Rs * sR - Rd * dR + 0.5F;
+            g = Gs * sG - Gd * dG + 0.5F;
+            b = Bs * sB - Bd * dB + 0.5F;
+         }
+         else if (ctx->Color.BlendEquationRGB==GL_FUNC_REVERSE_SUBTRACT) {
+            r = Rd * dR - Rs * sR + 0.5F;
+            g = Gd * dG - Gs * sG + 0.5F;
+            b = Bd * dB - Bs * sB + 0.5F;
+         }
+         else if (ctx->Color.BlendEquationRGB==GL_MIN) {
+	    r = MIN2( Rd, Rs );
+	    g = MIN2( Gd, Gs );
+	    b = MIN2( Bd, Bs );
+	 }
+         else if (ctx->Color.BlendEquationRGB==GL_MAX) {
+	    r = MAX2( Rd, Rs );
+	    g = MAX2( Gd, Gs );
+	    b = MAX2( Bd, Bs );
+	 }
+         else {
+            /* should never get here */
+            r = g = b = 0.0F;  /* silence uninitialized var warning */
+            _mesa_problem(ctx, "unexpected BlendEquation in blend_general()");
+         }
+
+         if (ctx->Color.BlendEquationA==GL_FUNC_ADD) {
+            a = As * sA + Ad * dA + 0.5F;
+         }
+         else if (ctx->Color.BlendEquationA==GL_FUNC_SUBTRACT) {
+            a = As * sA - Ad * dA + 0.5F;
+         }
+         else if (ctx->Color.BlendEquationA==GL_FUNC_REVERSE_SUBTRACT) {
+            a = Ad * dA - As * sA + 0.5F;
+         }
+         else if (ctx->Color.BlendEquationA==GL_MIN) {
+	    a = MIN2( Ad, As );
+	 }
+         else if (ctx->Color.BlendEquationA==GL_MAX) {
+	    a = MAX2( Ad, As );
+	 }
+         else {
+            /* should never get here */
+            a = 0.0F;  /* silence uninitialized var warning */
+            _mesa_problem(ctx, "unexpected BlendEquation in blend_general()");
+         }
+
+         /* final clamping */
+         rgba[i][RCOMP] = (GLchan) (GLint) CLAMP( r, 0.0F, CHAN_MAXF );
+         rgba[i][GCOMP] = (GLchan) (GLint) CLAMP( g, 0.0F, CHAN_MAXF );
+         rgba[i][BCOMP] = (GLchan) (GLint) CLAMP( b, 0.0F, CHAN_MAXF );
+         rgba[i][ACOMP] = (GLchan) (GLint) CLAMP( a, 0.0F, CHAN_MAXF );
+#endif
+      }
+   }
+}
+
+
+/*
+ * Analyze current blending parameters to pick fastest blending function.
+ * Result: the ctx->Color.BlendFunc pointer is updated.
+ */
+void _swrast_choose_blend_func( GLcontext *ctx )
+{
+   const GLenum eq = ctx->Color.BlendEquationRGB;
+   const GLenum srcRGB = ctx->Color.BlendSrcRGB;
+   const GLenum dstRGB = ctx->Color.BlendDstRGB;
+   const GLenum srcA = ctx->Color.BlendSrcA;
+   const GLenum dstA = ctx->Color.BlendDstA;
+
+   if (ctx->Color.BlendEquationRGB != ctx->Color.BlendEquationA) {
+      SWRAST_CONTEXT(ctx)->BlendFunc = blend_general;
+   }
+   else if (eq==GL_MIN) {
+      /* Note: GL_MIN ignores the blending weight factors */
+#if defined(USE_MMX_ASM)
+      if ( cpu_has_mmx ) {
+         SWRAST_CONTEXT(ctx)->BlendFunc = _mesa_mmx_blend_min;
+      }
+      else
+#endif
+         SWRAST_CONTEXT(ctx)->BlendFunc = blend_min;
+   }
+   else if (eq==GL_MAX) {
+      /* Note: GL_MAX ignores the blending weight factors */
+#if defined(USE_MMX_ASM)
+      if ( cpu_has_mmx ) {
+         SWRAST_CONTEXT(ctx)->BlendFunc = _mesa_mmx_blend_max;
+      }
+      else
+#endif
+         SWRAST_CONTEXT(ctx)->BlendFunc = blend_max;
+   }
+   else if (srcRGB != srcA || dstRGB != dstA) {
+      SWRAST_CONTEXT(ctx)->BlendFunc = blend_general;
+   }
+   else if (eq==GL_FUNC_ADD && srcRGB==GL_SRC_ALPHA
+            && dstRGB==GL_ONE_MINUS_SRC_ALPHA) {
+#if defined(USE_MMX_ASM)
+      if ( cpu_has_mmx ) {
+         SWRAST_CONTEXT(ctx)->BlendFunc = _mesa_mmx_blend_transparency;
+      }
+      else
+#endif
+	 SWRAST_CONTEXT(ctx)->BlendFunc = blend_transparency;
+   }
+   else if (eq==GL_FUNC_ADD && srcRGB==GL_ONE && dstRGB==GL_ONE) {
+#if defined(USE_MMX_ASM)
+      if ( cpu_has_mmx ) {
+         SWRAST_CONTEXT(ctx)->BlendFunc = _mesa_mmx_blend_add;
+      }
+      else
+#endif
+         SWRAST_CONTEXT(ctx)->BlendFunc = blend_add;
+   }
+   else if (((eq==GL_FUNC_ADD || eq==GL_FUNC_REVERSE_SUBTRACT)
+	     && (srcRGB==GL_ZERO && dstRGB==GL_SRC_COLOR))
+	    ||
+	    ((eq==GL_FUNC_ADD || eq==GL_FUNC_SUBTRACT)
+	     && (srcRGB==GL_DST_COLOR && dstRGB==GL_ZERO))) {
+#if defined(USE_MMX_ASM)
+      if ( cpu_has_mmx ) {
+         SWRAST_CONTEXT(ctx)->BlendFunc = _mesa_mmx_blend_modulate;
+      }
+      else
+#endif
+         SWRAST_CONTEXT(ctx)->BlendFunc = blend_modulate;
+   }
+   else if (eq==GL_FUNC_ADD && srcRGB == GL_ZERO && dstRGB == GL_ONE) {
+      SWRAST_CONTEXT(ctx)->BlendFunc = blend_noop;
+   }
+   else if (eq==GL_FUNC_ADD && srcRGB == GL_ONE && dstRGB == GL_ZERO) {
+      SWRAST_CONTEXT(ctx)->BlendFunc = blend_replace;
+   }
+   else {
+      SWRAST_CONTEXT(ctx)->BlendFunc = blend_general;
+   }
+}
+
+
+
+/*
+ * Apply the blending operator to a span of pixels.
+ * We can handle horizontal runs of pixels (spans) or arrays of x/y
+ * pixel coordinates.
+ */
+void
+_swrast_blend_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                   const struct sw_span *span, GLchan rgba[][4])
+{
+   GLchan framebuffer[MAX_WIDTH][4];
+
+   ASSERT(span->end <= MAX_WIDTH);
+   ASSERT(span->arrayMask & SPAN_RGBA);
+   ASSERT(!ctx->Color._LogicOpEnabled);
+
+   /* Read span of current frame buffer pixels */
+   if (span->arrayMask & SPAN_XY) {
+      /* array of x/y pixel coords */
+      _swrast_get_values(ctx, rb, span->end, span->array->x, span->array->y,
+                         framebuffer, 4 * sizeof(GLchan));
+   }
+   else {
+      /* horizontal run of pixels */
+      _swrast_read_rgba_span(ctx, rb, span->end, span->x, span->y,
+                             framebuffer);
+   }
+
+   SWRAST_CONTEXT(ctx)->BlendFunc( ctx, span->end, span->array->mask, rgba,
+				   (const GLchan (*)[4]) framebuffer );
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_blend.h b/dist/Mesa/src/mesa/swrast/s_blend.h
new file mode 100644
index 000000000..c95193215
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_blend.h
@@ -0,0 +1,43 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_BLEND_H
+#define S_BLEND_H
+
+
+#include "mtypes.h"
+#include "s_context.h"
+
+
+extern void
+_swrast_blend_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                   const struct sw_span *span, GLchan rgba[][4]);
+
+
+extern void
+_swrast_choose_blend_func(GLcontext *ctx);
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_blit.c b/dist/Mesa/src/mesa/swrast/s_blit.c
new file mode 100644
index 000000000..5aec4aad0
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_blit.c
@@ -0,0 +1,790 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "macros.h"
+#include "s_context.h"
+
+
+#define ABS(X)   ((X) < 0 ? -(X) : (X))
+
+
+/**
+ * Generate a row resampler function for GL_NEAREST mode.
+ */
+#define RESAMPLE(NAME, PIXELTYPE, SIZE)			\
+static void						\
+NAME(GLint srcWidth, GLint dstWidth,			\
+     const GLvoid *srcBuffer, GLvoid *dstBuffer,	\
+     GLboolean flip)					\
+{							\
+   const PIXELTYPE *src = (const PIXELTYPE *) srcBuffer;\
+   PIXELTYPE *dst = (PIXELTYPE *) dstBuffer;		\
+   GLint dstCol;					\
+							\
+   if (flip) {						\
+      for (dstCol = 0; dstCol < dstWidth; dstCol++) {	\
+         GLint srcCol = (dstCol * srcWidth) / dstWidth;	\
+         ASSERT(srcCol >= 0);				\
+         ASSERT(srcCol < srcWidth);			\
+         srcCol = srcWidth - 1 - srcCol; /* flip */	\
+         if (SIZE == 1) {				\
+            dst[dstCol] = src[srcCol];			\
+         }						\
+         else if (SIZE == 2) {				\
+            dst[dstCol*2+0] = src[srcCol*2+0];		\
+            dst[dstCol*2+1] = src[srcCol*2+1];		\
+         }						\
+         else if (SIZE == 4) {				\
+            dst[dstCol*4+0] = src[srcCol*4+0];		\
+            dst[dstCol*4+1] = src[srcCol*4+1];		\
+            dst[dstCol*4+2] = src[srcCol*4+2];		\
+            dst[dstCol*4+3] = src[srcCol*4+3];		\
+         }						\
+      }							\
+   }							\
+   else {						\
+      for (dstCol = 0; dstCol < dstWidth; dstCol++) {	\
+         GLint srcCol = (dstCol * srcWidth) / dstWidth;	\
+         ASSERT(srcCol >= 0);				\
+         ASSERT(srcCol < srcWidth);			\
+         if (SIZE == 1) {				\
+            dst[dstCol] = src[srcCol];			\
+         }						\
+         else if (SIZE == 2) {				\
+            dst[dstCol*2+0] = src[srcCol*2+0];		\
+            dst[dstCol*2+1] = src[srcCol*2+1];		\
+         }						\
+         else if (SIZE == 4) {				\
+            dst[dstCol*4+0] = src[srcCol*4+0];		\
+            dst[dstCol*4+1] = src[srcCol*4+1];		\
+            dst[dstCol*4+2] = src[srcCol*4+2];		\
+            dst[dstCol*4+3] = src[srcCol*4+3];		\
+         }						\
+      }							\
+   }							\
+}
+
+/**
+ * Resamplers for 1, 2, 4, 8 and 16-byte pixels.
+ */
+RESAMPLE(resample_row_1, GLubyte, 1)
+RESAMPLE(resample_row_2, GLushort, 1)
+RESAMPLE(resample_row_4, GLuint, 1)
+RESAMPLE(resample_row_8, GLuint, 2)
+RESAMPLE(resample_row_16, GLuint, 4)
+
+
+/**
+ * Blit color, depth or stencil with GL_NEAREST filtering.
+ */
+static void
+blit_nearest(GLcontext *ctx,
+             GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+             GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+             GLenum buffer)
+{
+   struct gl_renderbuffer *readRb, *drawRb;
+
+   const GLint srcWidth = ABS(srcX1 - srcX0);
+   const GLint dstWidth = ABS(dstX1 - dstX0);
+   const GLint srcHeight = ABS(srcY1 - srcY0);
+   const GLint dstHeight = ABS(dstY1 - dstY0);
+
+   const GLint srcXpos = MIN2(srcX0, srcX1);
+   const GLint srcYpos = MIN2(srcY0, srcY1);
+   const GLint dstXpos = MIN2(dstX0, dstX1);
+   const GLint dstYpos = MIN2(dstY0, dstY1);
+
+   const GLboolean invertX = (srcX1 < srcX0) ^ (dstX1 < dstX0);
+   const GLboolean invertY = (srcY1 < srcY0) ^ (dstY1 < dstY0);
+
+   GLint dstRow;
+
+   GLint comps, pixelSize;
+   GLvoid *srcBuffer, *dstBuffer;
+   GLint prevY = -1;
+
+   typedef void (*resample_func)(GLint srcWidth, GLint dstWidth,
+                                 const GLvoid *srcBuffer, GLvoid *dstBuffer,
+                                 GLboolean flip);
+   resample_func resampleRow;
+
+   switch (buffer) {
+   case GL_COLOR_BUFFER_BIT:
+      readRb = ctx->ReadBuffer->_ColorReadBuffer;
+      drawRb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+      comps = 4;
+      break;
+   case GL_DEPTH_BUFFER_BIT:
+      readRb = ctx->ReadBuffer->_DepthBuffer;
+      drawRb = ctx->DrawBuffer->_DepthBuffer;
+      comps = 1;
+      break;
+   case GL_STENCIL_BUFFER_BIT:
+      readRb = ctx->ReadBuffer->_StencilBuffer;
+      drawRb = ctx->DrawBuffer->_StencilBuffer;
+      comps = 1;
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected buffer in blit_nearest()");
+      return;
+   }
+
+   switch (readRb->DataType) {
+   case GL_UNSIGNED_BYTE:
+      pixelSize = comps * sizeof(GLubyte);
+      break;
+   case GL_UNSIGNED_SHORT:
+      pixelSize = comps * sizeof(GLushort);
+      break;
+   case GL_UNSIGNED_INT:
+      pixelSize = comps * sizeof(GLuint);
+      break;
+   case GL_FLOAT:
+      pixelSize = comps * sizeof(GLfloat);
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected buffer type (0x%x) in blit_nearest",
+                    readRb->DataType);
+      return;
+   }
+
+   /* choose row resampler */
+   switch (pixelSize) {
+   case 1:
+      resampleRow = resample_row_1;
+      break;
+   case 2:
+      resampleRow = resample_row_2;
+      break;
+   case 4:
+      resampleRow = resample_row_4;
+      break;
+   case 8:
+      resampleRow = resample_row_8;
+      break;
+   case 16:
+      resampleRow = resample_row_16;
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected pixel size (%d) in blit_nearest",
+                    pixelSize);
+      return;
+   }
+
+   /* allocate the src/dst row buffers */
+   srcBuffer = _mesa_malloc(pixelSize * srcWidth);
+   if (!srcBuffer) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBlitFrameBufferEXT");
+      return;
+   }
+   dstBuffer = _mesa_malloc(pixelSize * dstWidth);
+   if (!dstBuffer) {
+      _mesa_free(srcBuffer);
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBlitFrameBufferEXT");
+      return;
+   }
+
+   for (dstRow = 0; dstRow < dstHeight; dstRow++) {
+      const GLint dstY = dstYpos + dstRow;
+      GLint srcRow = (dstRow * srcHeight) / dstHeight;
+      GLint srcY;
+
+      ASSERT(srcRow >= 0);
+      ASSERT(srcRow < srcHeight);
+
+      if (invertY) {
+         srcRow = srcHeight - 1 - srcRow;
+      }
+
+      srcY = srcYpos + srcRow;
+
+      /* get pixel row from source and resample to match dest width */
+      if (prevY != srcY) {
+         readRb->GetRow(ctx, readRb, srcWidth, srcXpos, srcY, srcBuffer);
+         (*resampleRow)(srcWidth, dstWidth, srcBuffer, dstBuffer, invertX);
+         prevY = srcY;
+      }
+
+      /* store pixel row in destination */
+      drawRb->PutRow(ctx, drawRb, dstWidth, dstXpos, dstY, dstBuffer, NULL);
+   }
+
+   _mesa_free(srcBuffer);
+   _mesa_free(dstBuffer);
+}
+
+
+
+#define LERP(T, A, B)  ( (A) + (T) * ((B) - (A)) )
+
+static INLINE GLfloat
+lerp_2d(GLfloat a, GLfloat b,
+        GLfloat v00, GLfloat v10, GLfloat v01, GLfloat v11)
+{
+   const GLfloat temp0 = LERP(a, v00, v10);
+   const GLfloat temp1 = LERP(a, v01, v11);
+   return LERP(b, temp0, temp1);
+}
+
+
+/**
+ * Bilinear interpolation of two source rows.
+ * GLubyte pixels.
+ */
+static void
+resample_linear_row_ub(GLint srcWidth, GLint dstWidth,
+                       const GLvoid *srcBuffer0, const GLvoid *srcBuffer1,
+                       GLvoid *dstBuffer, GLboolean flip, GLfloat rowWeight)
+{
+   const GLubyte (*srcColor0)[4] = (const GLubyte (*)[4]) srcBuffer0;
+   const GLubyte (*srcColor1)[4] = (const GLubyte (*)[4]) srcBuffer1;
+   GLubyte (*dstColor)[4] = (GLubyte (*)[4]) dstBuffer;
+   const GLfloat dstWidthF = (GLfloat) dstWidth;
+   GLint dstCol;
+
+   for (dstCol = 0; dstCol < dstWidth; dstCol++) {
+      const GLfloat srcCol = (dstCol * srcWidth) / dstWidthF;
+      GLint srcCol0 = IFLOOR(srcCol);
+      GLint srcCol1 = srcCol0 + 1;
+      GLfloat colWeight = srcCol - srcCol0; /* fractional part of srcCol */
+      GLfloat red, green, blue, alpha;
+
+      ASSERT(srcCol0 >= 0);
+      ASSERT(srcCol0 < srcWidth);
+      ASSERT(srcCol1 <= srcWidth);
+
+      if (srcCol1 == srcWidth) {
+         /* last column fudge */
+         srcCol1--;
+         colWeight = 0.0;
+      }
+
+      if (flip) {
+         srcCol0 = srcWidth - 1 - srcCol0;
+         srcCol1 = srcWidth - 1 - srcCol1;
+      }
+
+      red = lerp_2d(colWeight, rowWeight,
+                    srcColor0[srcCol0][RCOMP], srcColor0[srcCol1][RCOMP],
+                    srcColor1[srcCol0][RCOMP], srcColor1[srcCol1][RCOMP]);
+      green = lerp_2d(colWeight, rowWeight,
+                    srcColor0[srcCol0][GCOMP], srcColor0[srcCol1][GCOMP],
+                    srcColor1[srcCol0][GCOMP], srcColor1[srcCol1][GCOMP]);
+      blue = lerp_2d(colWeight, rowWeight,
+                    srcColor0[srcCol0][BCOMP], srcColor0[srcCol1][BCOMP],
+                    srcColor1[srcCol0][BCOMP], srcColor1[srcCol1][BCOMP]);
+      alpha = lerp_2d(colWeight, rowWeight,
+                    srcColor0[srcCol0][ACOMP], srcColor0[srcCol1][ACOMP],
+                    srcColor1[srcCol0][ACOMP], srcColor1[srcCol1][ACOMP]);
+      
+      dstColor[dstCol][RCOMP] = IFLOOR(red);
+      dstColor[dstCol][GCOMP] = IFLOOR(green);
+      dstColor[dstCol][BCOMP] = IFLOOR(blue);
+      dstColor[dstCol][ACOMP] = IFLOOR(alpha);
+   }
+}
+
+
+
+/**
+ * Bilinear filtered blit (color only).
+ */
+static void
+blit_linear(GLcontext *ctx,
+            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+            GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1)
+{
+   struct gl_renderbuffer *readRb = ctx->ReadBuffer->_ColorReadBuffer;
+   struct gl_renderbuffer *drawRb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+
+   const GLint srcWidth = ABS(srcX1 - srcX0);
+   const GLint dstWidth = ABS(dstX1 - dstX0);
+   const GLint srcHeight = ABS(srcY1 - srcY0);
+   const GLint dstHeight = ABS(dstY1 - dstY0);
+   const GLfloat dstHeightF = (GLfloat) dstHeight;
+
+   const GLint srcXpos = MIN2(srcX0, srcX1);
+   const GLint srcYpos = MIN2(srcY0, srcY1);
+   const GLint dstXpos = MIN2(dstX0, dstX1);
+   const GLint dstYpos = MIN2(dstY0, dstY1);
+
+   const GLboolean invertX = (srcX1 < srcX0) ^ (dstX1 < dstX0);
+   const GLboolean invertY = (srcY1 < srcY0) ^ (dstY1 < dstY0);
+
+   GLint dstRow;
+
+   GLint pixelSize;
+   GLvoid *srcBuffer0, *srcBuffer1;
+   GLint srcBufferY0 = -1, srcBufferY1 = -1;
+   GLvoid *dstBuffer;
+
+   switch (readRb->DataType) {
+   case GL_UNSIGNED_BYTE:
+      pixelSize = 4 * sizeof(GLubyte);
+      break;
+   case GL_UNSIGNED_SHORT:
+      pixelSize = 4 * sizeof(GLushort);
+      break;
+   case GL_UNSIGNED_INT:
+      pixelSize = 4 * sizeof(GLuint);
+      break;
+   case GL_FLOAT:
+      pixelSize = 4 * sizeof(GLfloat);
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected buffer type (0x%x) in blit_nearest",
+                    readRb->DataType);
+      return;
+   }
+
+   /* Allocate the src/dst row buffers.
+    * Keep two adjacent src rows around for bilinear sampling.
+    */
+   srcBuffer0 = _mesa_malloc(pixelSize * srcWidth);
+   if (!srcBuffer0) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBlitFrameBufferEXT");
+      return;
+   }
+   srcBuffer1 = _mesa_malloc(pixelSize * srcWidth);
+   if (!srcBuffer1) {
+      _mesa_free(srcBuffer0);
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBlitFrameBufferEXT");
+      return;
+   }
+   dstBuffer = _mesa_malloc(pixelSize * dstWidth);
+   if (!dstBuffer) {
+      _mesa_free(srcBuffer0);
+      _mesa_free(srcBuffer1);
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBlitFrameBufferEXT");
+      return;
+   }
+
+   for (dstRow = 0; dstRow < dstHeight; dstRow++) {
+      const GLint dstY = dstYpos + dstRow;
+      const GLfloat srcRow = (dstRow * srcHeight) / dstHeightF;
+      GLint srcRow0 = IFLOOR(srcRow);
+      GLint srcRow1 = srcRow0 + 1;
+      GLfloat rowWeight = srcRow - srcRow0; /* fractional part of srcRow */
+
+      ASSERT(srcRow >= 0);
+      ASSERT(srcRow < srcHeight);
+
+      if (srcRow1 == srcHeight) {
+         /* last row fudge */
+         srcRow1 = srcRow0;
+         rowWeight = 0.0;
+      }
+
+      if (invertY) {
+         srcRow0 = srcHeight - 1 - srcRow0;
+         srcRow1 = srcHeight - 1 - srcRow1;
+      }
+
+      srcY0 = srcYpos + srcRow0;
+      srcY1 = srcYpos + srcRow1;
+
+      /* get the two source rows */
+      if (srcY0 == srcBufferY0 && srcY1 == srcBufferY1) {
+         /* use same source row buffers again */
+      }
+      else if (srcY0 == srcBufferY1) {
+         /* move buffer1 into buffer0 by swapping pointers */
+         GLvoid *tmp = srcBuffer0;
+         srcBuffer0 = srcBuffer1;
+         srcBuffer1 = tmp;
+         /* get y1 row */
+         readRb->GetRow(ctx, readRb, srcWidth, srcXpos, srcY1, srcBuffer1);
+         srcBufferY0 = srcY0;
+         srcBufferY1 = srcY1;
+      }
+      else {
+         /* get both new rows */
+         readRb->GetRow(ctx, readRb, srcWidth, srcXpos, srcY0, srcBuffer0);
+         readRb->GetRow(ctx, readRb, srcWidth, srcXpos, srcY1, srcBuffer1);
+         srcBufferY0 = srcY0;
+         srcBufferY1 = srcY1;
+      }
+
+      if (readRb->DataType == GL_UNSIGNED_BYTE) {
+         resample_linear_row_ub(srcWidth, dstWidth, srcBuffer0, srcBuffer1,
+                                dstBuffer, invertX, rowWeight);
+      }
+      else {
+         _mesa_problem(ctx, "Unsupported color channel type in sw blit");
+         break;
+      }
+
+      /* store pixel row in destination */
+      drawRb->PutRow(ctx, drawRb, dstWidth, dstXpos, dstY, dstBuffer, NULL);
+   }
+
+   _mesa_free(srcBuffer0);
+   _mesa_free(srcBuffer1);
+   _mesa_free(dstBuffer);
+}
+
+
+/**
+ * Simple case:  Blit color, depth or stencil with no scaling or flipping.
+ * XXX we could easily support vertical flipping here.
+ */
+static void
+simple_blit(GLcontext *ctx,
+            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+            GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+            GLenum buffer)
+{
+   struct gl_renderbuffer *readRb, *drawRb;
+   const GLint width = srcX1 - srcX0;
+   const GLint height = srcY1 - srcY0;
+   GLint row, srcY, dstY, yStep;
+   GLint comps, bytesPerRow;
+   void *rowBuffer;
+
+   /* only one buffer */
+   ASSERT(_mesa_bitcount(buffer) == 1);
+   /* no flipping checks */
+   ASSERT(srcX0 < srcX1);
+   ASSERT(srcY0 < srcY1);
+   ASSERT(dstX0 < dstX1);
+   ASSERT(dstY0 < dstY1);
+   /* size checks */
+   ASSERT(srcX1 - srcX0 == dstX1 - dstX0);
+   ASSERT(srcY1 - srcY0 == dstY1 - dstY0);
+
+   /* determine if copy should be bottom-to-top or top-to-bottom */
+   if (srcY0 > dstY0) {
+      /* src above dst: copy bottom-to-top */
+      yStep = 1;
+      srcY = srcY0;
+      dstY = dstY0;
+   }
+   else {
+      /* src below dst: copy top-to-bottom */
+      yStep = -1;
+      srcY = srcY1 - 1;
+      dstY = dstY1 - 1;
+   }
+
+   switch (buffer) {
+   case GL_COLOR_BUFFER_BIT:
+      readRb = ctx->ReadBuffer->_ColorReadBuffer;
+      drawRb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+      comps = 4;
+      break;
+   case GL_DEPTH_BUFFER_BIT:
+      readRb = ctx->ReadBuffer->_DepthBuffer;
+      drawRb = ctx->DrawBuffer->_DepthBuffer;
+      comps = 1;
+      break;
+   case GL_STENCIL_BUFFER_BIT:
+      readRb = ctx->ReadBuffer->_StencilBuffer;
+      drawRb = ctx->DrawBuffer->_StencilBuffer;
+      comps = 1;
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected buffer in simple_blit()");
+      return;
+   }
+
+   ASSERT(readRb->DataType == drawRb->DataType);
+
+   /* compute bytes per row */
+   switch (readRb->DataType) {
+   case GL_UNSIGNED_BYTE:
+      bytesPerRow = comps * width * sizeof(GLubyte);
+      break;
+   case GL_UNSIGNED_SHORT:
+      bytesPerRow = comps * width * sizeof(GLushort);
+      break;
+   case GL_UNSIGNED_INT:
+      bytesPerRow = comps * width * sizeof(GLuint);
+      break;
+   case GL_FLOAT:
+      bytesPerRow = comps * width * sizeof(GLfloat);
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected buffer type in simple_blit");
+      return;
+   }
+
+   /* allocate the row buffer */
+   rowBuffer = _mesa_malloc(bytesPerRow);
+   if (!rowBuffer) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBlitFrameBufferEXT");
+      return;
+   }
+
+   for (row = 0; row < height; row++) {
+      readRb->GetRow(ctx, readRb, width, srcX0, srcY, rowBuffer);
+      drawRb->PutRow(ctx, drawRb, width, dstX0, dstY, rowBuffer, NULL);
+      srcY += yStep;
+      dstY += yStep;
+   }
+
+   _mesa_free(rowBuffer);
+}
+
+
+/**
+ * Clip dst coords against Xmax (or Ymax).
+ */
+static INLINE void
+clip_right_or_top(GLint *srcX0, GLint *srcX1,
+                  GLint *dstX0, GLint *dstX1,
+                  GLint maxValue)
+{
+   GLfloat t, bias;
+
+   if (*dstX1 > maxValue) {
+      /* X1 outside right edge */
+      ASSERT(*dstX0 < maxValue); /* X0 should be inside right edge */
+      t = (GLfloat) (maxValue - *dstX0) / (GLfloat) (*dstX1 - *dstX0);
+      /* chop off [t, 1] part */
+      ASSERT(t >= 0.0 && t <= 1.0);
+      *dstX1 = maxValue;
+      bias = (*srcX0 < *srcX1) ? 0.5 : -0.5;
+      *srcX1 = *srcX0 + (GLint) (t * (*srcX1 - *srcX0) + bias);
+   }
+   else if (*dstX0 > maxValue) {
+      /* X0 outside right edge */
+      ASSERT(*dstX1 < maxValue); /* X1 should be inside right edge */
+      t = (GLfloat) (maxValue - *dstX1) / (GLfloat) (*dstX0 - *dstX1);
+      /* chop off [t, 1] part */
+      ASSERT(t >= 0.0 && t <= 1.0);
+      *dstX0 = maxValue;
+      bias = (*srcX0 < *srcX1) ? -0.5 : 0.5;
+      *srcX0 = *srcX1 + (GLint) (t * (*srcX0 - *srcX1) + bias);
+   }
+}
+
+
+/**
+ * Clip dst coords against Xmin (or Ymin).
+ */
+static INLINE void
+clip_left_or_bottom(GLint *srcX0, GLint *srcX1,
+                    GLint *dstX0, GLint *dstX1,
+                    GLint minValue)
+{
+   GLfloat t, bias;
+
+   if (*dstX0 < minValue) {
+      /* X0 outside left edge */
+      ASSERT(*dstX1 > minValue); /* X1 should be inside left edge */
+      t = (GLfloat) (minValue - *dstX0) / (GLfloat) (*dstX1 - *dstX0);
+      /* chop off [0, t] part */
+      ASSERT(t >= 0.0 && t <= 1.0);
+      *dstX0 = minValue;
+      bias = (*srcX0 < *srcX1) ? 0.5 : -0.5; /* flipped??? */
+      *srcX0 = *srcX0 + (GLint) (t * (*srcX1 - *srcX0) + bias);
+   }
+   else if (*dstX1 < minValue) {
+      /* X1 outside left edge */
+      ASSERT(*dstX0 > minValue); /* X0 should be inside left edge */
+      t = (GLfloat) (minValue - *dstX1) / (GLfloat) (*dstX0 - *dstX1);
+      /* chop off [0, t] part */
+      ASSERT(t >= 0.0 && t <= 1.0);
+      *dstX1 = minValue;
+      bias = (*srcX0 < *srcX1) ? 0.5 : -0.5;
+      *srcX1 = *srcX1 + (GLint) (t * (*srcX0 - *srcX1) + bias);
+   }
+}
+
+
+/**
+ * Do clipping of blit src/dest rectangles.
+ * The dest rect is clipped against both the buffer bounds and scissor bounds.
+ * The src rect is just clipped against the buffer bounds.
+ *
+ * When either the src or dest rect is clipped, the other is also clipped
+ * proportionately!
+ *
+ * Note that X0 need not be less than X1 (same for Y) for either the source
+ * and dest rects.  That makes the clipping a little trickier.
+ *
+ * \return GL_TRUE if anything is left to draw, GL_FALSE if totally clipped
+ */
+static GLboolean
+clip_blit(GLcontext *ctx,
+          GLint *srcX0, GLint *srcY0, GLint *srcX1, GLint *srcY1,
+          GLint *dstX0, GLint *dstY0, GLint *dstX1, GLint *dstY1)
+{
+   const GLint srcXmin = 0;
+   const GLint srcXmax = ctx->ReadBuffer->Width;
+   const GLint srcYmin = 0;
+   const GLint srcYmax = ctx->ReadBuffer->Height;
+
+   /* these include scissor bounds */
+   const GLint dstXmin = ctx->DrawBuffer->_Xmin;
+   const GLint dstXmax = ctx->DrawBuffer->_Xmax;
+   const GLint dstYmin = ctx->DrawBuffer->_Ymin;
+   const GLint dstYmax = ctx->DrawBuffer->_Ymax;
+
+   /*
+   printf("PreClipX:  src: %d .. %d  dst: %d .. %d\n",
+          *srcX0, *srcX1, *dstX0, *dstX1);
+   printf("PreClipY:  src: %d .. %d  dst: %d .. %d\n",
+          *srcY0, *srcY1, *dstY0, *dstY1);
+   */
+
+   /* trivial rejection tests */
+   if (*dstX0 == *dstX1)
+      return GL_FALSE; /* no width */
+   if (*dstX0 <= dstXmin && *dstX1 <= dstXmin)
+      return GL_FALSE; /* totally out (left) of bounds */
+   if (*dstX0 >= dstXmax && *dstX1 >= dstXmax)
+      return GL_FALSE; /* totally out (right) of bounds */
+
+   if (*dstY0 == *dstY1)
+      return GL_FALSE;
+   if (*dstY0 <= dstYmin && *dstY1 <= dstYmin)
+      return GL_FALSE;
+   if (*dstY0 >= dstYmax && *dstY1 >= dstYmax)
+      return GL_FALSE;
+
+   if (*srcX0 == *srcX1)
+      return GL_FALSE;
+   if (*srcX0 <= srcXmin && *srcX1 <= srcXmin)
+      return GL_FALSE;
+   if (*srcX0 >= srcXmax && *srcX1 >= srcXmax)
+      return GL_FALSE;
+
+   if (*srcY0 == *srcY1)
+      return GL_FALSE;
+   if (*srcY0 <= srcYmin && *srcY1 <= srcYmin)
+      return GL_FALSE;
+   if (*srcY0 >= srcYmax && *srcY1 >= srcYmax)
+      return GL_FALSE;
+
+   /*
+    * dest clip
+    */
+   clip_right_or_top(srcX0, srcX1, dstX0, dstX1, dstXmax);
+   clip_right_or_top(srcY0, srcY1, dstY0, dstY1, dstYmax);
+   clip_left_or_bottom(srcX0, srcX1, dstX0, dstX1, dstXmin);
+   clip_left_or_bottom(srcY0, srcY1, dstY0, dstY1, dstYmin);
+
+   /*
+    * src clip (just swap src/dst values from above)
+    */
+   clip_right_or_top(dstX0, dstX1, srcX0, srcX1, srcXmax);
+   clip_right_or_top(dstY0, dstY1, srcY0, srcY1, srcYmax);
+   clip_left_or_bottom(dstX0, dstX1, srcX0, srcX1, srcXmin);
+   clip_left_or_bottom(dstY0, dstY1, srcY0, srcY1, srcYmin);
+
+   /*
+   printf("PostClipX: src: %d .. %d  dst: %d .. %d\n",
+          *srcX0, *srcX1, *dstX0, *dstX1);
+   printf("PostClipY: src: %d .. %d  dst: %d .. %d\n",
+          *srcY0, *srcY1, *dstY0, *dstY1);
+   */
+
+   ASSERT(*dstX0 >= dstXmin);
+   ASSERT(*dstX0 <= dstXmax);
+   ASSERT(*dstX1 >= dstXmin);
+   ASSERT(*dstX1 <= dstXmax);
+
+   ASSERT(*dstY0 >= dstYmin);
+   ASSERT(*dstY0 <= dstYmax);
+   ASSERT(*dstY1 >= dstYmin);
+   ASSERT(*dstY1 <= dstYmax);
+
+   ASSERT(*srcX0 >= srcXmin);
+   ASSERT(*srcX0 <= srcXmax);
+   ASSERT(*srcX1 >= srcXmin);
+   ASSERT(*srcX1 <= srcXmax);
+
+   ASSERT(*srcY0 >= srcYmin);
+   ASSERT(*srcY0 <= srcYmax);
+   ASSERT(*srcY1 >= srcYmin);
+   ASSERT(*srcY1 <= srcYmax);
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Software fallback for glBlitFramebufferEXT().
+ */
+void
+_swrast_BlitFramebuffer(GLcontext *ctx,
+                        GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                        GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                        GLbitfield mask, GLenum filter)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   static const GLint buffers[3] = {
+      GL_COLOR_BUFFER_BIT,
+      GL_DEPTH_BUFFER_BIT,
+      GL_STENCIL_BUFFER_BIT
+   };
+   GLint i;
+
+   if (!clip_blit(ctx, &srcX0, &srcY0, &srcX1, &srcY1,
+                  &dstX0, &dstY0, &dstX1, &dstY1)) {
+      return;
+   }
+
+   RENDER_START(swrast, ctx);
+
+   if (srcX1 - srcX0 == dstX1 - dstX0 &&
+       srcY1 - srcY0 == dstY1 - dstY0 &&
+       srcX0 < srcX1 &&
+       srcY0 < srcY1 &&
+       dstX0 < dstX1 &&
+       dstY0 < dstY1) {
+      /* no stretching or flipping.
+       * filter doesn't matter.
+       */
+      for (i = 0; i < 3; i++) {
+         if (mask & buffers[i]) {
+            simple_blit(ctx, srcX0, srcY0, srcX1, srcY1,
+                        dstX0, dstY0, dstX1, dstY1, buffers[i]);
+         }
+      }
+   }
+   else {
+      if (filter == GL_NEAREST) {
+         for (i = 0; i < 3; i++) {
+            if (mask & buffers[i]) {
+               blit_nearest(ctx,  srcX0, srcY0, srcX1, srcY1,
+                            dstX0, dstY0, dstX1, dstY1, buffers[i]);
+            }
+         }
+      }
+      else {
+         ASSERT(filter == GL_LINEAR);
+         if (mask & GL_COLOR_BUFFER_BIT) {  /* depth/stencil not allowed */
+            blit_linear(ctx,  srcX0, srcY0, srcX1, srcY1,
+                        dstX0, dstY0, dstX1, dstY1);
+         }
+      }
+   }
+
+   RENDER_FINISH(swrast, ctx);
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_buffers.c b/dist/Mesa/src/mesa/swrast/s_buffers.c
new file mode 100644
index 000000000..57f23ee82
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_buffers.c
@@ -0,0 +1,309 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/** XXX This file should be named s_clear.c */
+
+#include "glheader.h"
+#include "colormac.h"
+#include "macros.h"
+#include "imports.h"
+#include "mtypes.h"
+
+#include "s_accum.h"
+#include "s_context.h"
+#include "s_depth.h"
+#include "s_masking.h"
+#include "s_stencil.h"
+
+
+/**
+ * Clear the color buffer when glColorMask is in effect.
+ */
+static void
+clear_rgba_buffer_with_masking(GLcontext *ctx, struct gl_renderbuffer *rb)
+{
+   const GLint x = ctx->DrawBuffer->_Xmin;
+   const GLint y = ctx->DrawBuffer->_Ymin;
+   const GLint height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+   const GLint width  = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+   GLchan clearColor[4];
+   GLint i;
+
+   ASSERT(ctx->Visual.rgbMode);
+   ASSERT(rb->PutRow);
+
+   CLAMPED_FLOAT_TO_CHAN(clearColor[RCOMP], ctx->Color.ClearColor[0]);
+   CLAMPED_FLOAT_TO_CHAN(clearColor[GCOMP], ctx->Color.ClearColor[1]);
+   CLAMPED_FLOAT_TO_CHAN(clearColor[BCOMP], ctx->Color.ClearColor[2]);
+   CLAMPED_FLOAT_TO_CHAN(clearColor[ACOMP], ctx->Color.ClearColor[3]);
+
+   for (i = 0; i < height; i++) {
+      GLchan rgba[MAX_WIDTH][4];
+      GLint j;
+      for (j = 0; j < width; j++) {
+         COPY_CHAN4(rgba[j], clearColor);
+      }
+      _swrast_mask_rgba_array( ctx, rb, width, x, y + i, rgba );
+      rb->PutRow(ctx, rb, width, x, y + i, rgba, NULL);
+   }
+}
+
+
+/**
+ * Clear color index buffer with masking.
+ */
+static void
+clear_ci_buffer_with_masking(GLcontext *ctx, struct gl_renderbuffer *rb)
+{
+   const GLint x = ctx->DrawBuffer->_Xmin;
+   const GLint y = ctx->DrawBuffer->_Ymin;
+   const GLint height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+   const GLint width  = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+   GLint i;
+
+   ASSERT(!ctx->Visual.rgbMode);
+   ASSERT(rb->PutRow);
+   ASSERT(rb->DataType == GL_UNSIGNED_INT);
+
+   for (i = 0; i < height;i++) {
+      GLuint span[MAX_WIDTH];
+      GLint j;
+      for (j = 0; j < width;j++) {
+         span[j] = ctx->Color.ClearIndex;
+      }
+      _swrast_mask_ci_array(ctx, rb, width, x, y + i, span);
+      rb->PutRow(ctx, rb, width, x, y + i, span, NULL);
+   }
+}
+
+
+/**
+ * Clear an rgba color buffer without channel masking.
+ */
+static void
+clear_rgba_buffer(GLcontext *ctx, struct gl_renderbuffer *rb)
+{
+   const GLint x = ctx->DrawBuffer->_Xmin;
+   const GLint y = ctx->DrawBuffer->_Ymin;
+   const GLint height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+   const GLint width  = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+   GLubyte clear8[4];
+   GLushort clear16[4];
+   GLvoid *clearVal;
+   GLint i;
+
+   ASSERT(ctx->Visual.rgbMode);
+
+   ASSERT(ctx->Color.ColorMask[0] &&
+          ctx->Color.ColorMask[1] &&
+          ctx->Color.ColorMask[2] &&
+          ctx->Color.ColorMask[3]);             
+
+   ASSERT(rb->PutMonoRow);
+
+   switch (rb->DataType) {
+      case GL_UNSIGNED_BYTE:
+         UNCLAMPED_FLOAT_TO_UBYTE(clear8[0], ctx->Color.ClearColor[0]);
+         UNCLAMPED_FLOAT_TO_UBYTE(clear8[1], ctx->Color.ClearColor[1]);
+         UNCLAMPED_FLOAT_TO_UBYTE(clear8[2], ctx->Color.ClearColor[2]);
+         UNCLAMPED_FLOAT_TO_UBYTE(clear8[3], ctx->Color.ClearColor[3]);
+         clearVal = clear8;
+         break;
+      case GL_UNSIGNED_SHORT:
+         UNCLAMPED_FLOAT_TO_USHORT(clear16[0], ctx->Color.ClearColor[0]);
+         UNCLAMPED_FLOAT_TO_USHORT(clear16[1], ctx->Color.ClearColor[1]);
+         UNCLAMPED_FLOAT_TO_USHORT(clear16[2], ctx->Color.ClearColor[2]);
+         UNCLAMPED_FLOAT_TO_USHORT(clear16[3], ctx->Color.ClearColor[3]);
+         clearVal = clear16;
+         break;
+      case GL_FLOAT:
+         clearVal = ctx->Color.ClearColor;
+         break;
+      default:
+         _mesa_problem(ctx, "Bad rb DataType in clear_color_buffer");
+         return;
+   }
+
+   for (i = 0; i < height; i++) {
+      rb->PutMonoRow(ctx, rb, width, x, y + i, clearVal, NULL);
+   }
+}
+
+
+/**
+ * Clear color index buffer without masking.
+ */
+static void
+clear_ci_buffer(GLcontext *ctx, struct gl_renderbuffer *rb)
+{
+   const GLint x = ctx->DrawBuffer->_Xmin;
+   const GLint y = ctx->DrawBuffer->_Ymin;
+   const GLint height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+   const GLint width  = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+   GLubyte clear8;
+   GLushort clear16;
+   GLuint clear32;
+   GLvoid *clearVal;
+   GLint i;
+
+   ASSERT(!ctx->Visual.rgbMode);
+
+   ASSERT((ctx->Color.IndexMask & ((1 << rb->IndexBits) - 1))
+          == (GLuint) ((1 << rb->IndexBits) - 1));
+
+   ASSERT(rb->PutMonoRow);
+
+   /* setup clear value */
+   switch (rb->DataType) {
+      case GL_UNSIGNED_BYTE:
+         clear8 = (GLubyte) ctx->Color.ClearIndex;
+         clearVal = &clear8;
+         break;
+      case GL_UNSIGNED_SHORT:
+         clear16 = (GLushort) ctx->Color.ClearIndex;
+         clearVal = &clear16;
+         break;
+      case GL_UNSIGNED_INT:
+         clear32 = ctx->Color.ClearIndex;
+         clearVal = &clear32;
+         break;
+      default:
+         _mesa_problem(ctx, "Bad rb DataType in clear_color_buffer");
+         return;
+   }
+
+   for (i = 0; i < height; i++)
+      rb->PutMonoRow(ctx, rb, width, x, y + i, clearVal, NULL);
+}
+
+
+/**
+ * Clear the front/back/left/right/aux color buffers.
+ * This function is usually only called if the device driver can't
+ * clear its own color buffers for some reason (such as with masking).
+ */
+static void
+clear_color_buffers(GLcontext *ctx)
+{
+   GLboolean masking;
+   GLuint i;
+
+   if (ctx->Visual.rgbMode) {
+      if (ctx->Color.ColorMask[0] && 
+          ctx->Color.ColorMask[1] && 
+          ctx->Color.ColorMask[2] && 
+          ctx->Color.ColorMask[3]) {
+         masking = GL_FALSE;
+      }
+      else {
+         masking = GL_TRUE;
+      }
+   }
+   else {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+      const GLuint indexBits = (1 << rb->IndexBits) - 1;
+      if ((ctx->Color.IndexMask & indexBits) == indexBits) {
+         masking = GL_FALSE;
+      }
+      else {
+         masking = GL_TRUE;
+      }
+   }
+
+   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers[0]; i++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0][i];
+      if (ctx->Visual.rgbMode) {
+         if (masking) {
+            clear_rgba_buffer_with_masking(ctx, rb);
+         }
+         else {
+            clear_rgba_buffer(ctx, rb);
+         }
+      }
+      else {
+         if (masking) {
+            clear_ci_buffer_with_masking(ctx, rb);
+         }
+         else {
+            clear_ci_buffer(ctx, rb);
+         }
+      }
+   }
+}
+
+
+/**
+ * Called via the device driver's ctx->Driver.Clear() function if the
+ * device driver can't clear one or more of the buffers itself.
+ * \param mask  bitfield of BUFER_BIT_* values indicating which renderbuffers
+ *              are to be cleared.
+ * \param all  if GL_TRUE, clear whole buffer, else clear specified region.
+ */
+void
+_swrast_Clear(GLcontext *ctx, GLbitfield mask,
+	      GLboolean all, GLint x, GLint y, GLint width, GLint height)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   (void) all; (void) x; (void) y; (void) width; (void) height;
+
+#ifdef DEBUG_FOO
+   {
+      const GLbitfield legalBits =
+         BUFFER_BIT_FRONT_LEFT |
+	 BUFFER_BIT_FRONT_RIGHT |
+	 BUFFER_BIT_BACK_LEFT |
+	 BUFFER_BIT_BACK_RIGHT |
+	 BUFFER_BIT_DEPTH |
+	 BUFFER_BIT_STENCIL |
+	 BUFFER_BIT_ACCUM |
+         BUFFER_BIT_AUX0 |
+         BUFFER_BIT_AUX1 |
+         BUFFER_BIT_AUX2 |
+         BUFFER_BIT_AUX3;
+      assert((mask & (~legalBits)) == 0);
+   }
+#endif
+
+   RENDER_START(swrast,ctx);
+
+   /* do software clearing here */
+   if (mask) {
+      if (mask & ctx->DrawBuffer->_ColorDrawBufferMask[0]) {
+         clear_color_buffers(ctx);
+      }
+      if (mask & BUFFER_BIT_DEPTH) {
+         _swrast_clear_depth_buffer(ctx, ctx->DrawBuffer->_DepthBuffer);
+      }
+      if (mask & BUFFER_BIT_ACCUM) {
+         _swrast_clear_accum_buffer(ctx,
+                       ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer);
+      }
+      if (mask & BUFFER_BIT_STENCIL) {
+         _swrast_clear_stencil_buffer(ctx, ctx->DrawBuffer->_StencilBuffer);
+      }
+   }
+
+   RENDER_FINISH(swrast,ctx);
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_context.c b/dist/Mesa/src/mesa/swrast/s_context.c
new file mode 100644
index 000000000..a265d9e6c
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_context.c
@@ -0,0 +1,810 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ *    Brian Paul
+ */
+
+#include "imports.h"
+#include "bufferobj.h"
+#include "context.h"
+#include "colormac.h"
+#include "mtypes.h"
+#include "program.h"
+#include "teximage.h"
+#include "swrast.h"
+#include "s_blend.h"
+#include "s_context.h"
+#include "s_lines.h"
+#include "s_points.h"
+#include "s_span.h"
+#include "s_triangle.h"
+#include "s_texfilter.h"
+
+
+/**
+ * Recompute the value of swrast->_RasterMask, etc. according to
+ * the current context.  The _RasterMask field can be easily tested by
+ * drivers to determine certain basic GL state (does the primitive need
+ * stenciling, logic-op, fog, etc?).
+ */
+static void
+_swrast_update_rasterflags( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLbitfield rasterMask = 0;
+
+   if (ctx->Color.AlphaEnabled)           rasterMask |= ALPHATEST_BIT;
+   if (ctx->Color.BlendEnabled)           rasterMask |= BLEND_BIT;
+   if (ctx->Depth.Test)                   rasterMask |= DEPTH_BIT;
+   if (swrast->_FogEnabled)               rasterMask |= FOG_BIT;
+   if (ctx->Scissor.Enabled)              rasterMask |= CLIP_BIT;
+   if (ctx->Stencil.Enabled)              rasterMask |= STENCIL_BIT;
+   if (ctx->Visual.rgbMode) {
+      const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask);
+      if (colorMask != 0xffffffff)        rasterMask |= MASKING_BIT;
+      if (ctx->Color._LogicOpEnabled)     rasterMask |= LOGIC_OP_BIT;
+      if (ctx->Texture._EnabledUnits)     rasterMask |= TEXTURE_BIT;
+   }
+   else {
+      if (ctx->Color.IndexMask != 0xffffffff) rasterMask |= MASKING_BIT;
+      if (ctx->Color.IndexLogicOpEnabled)     rasterMask |= LOGIC_OP_BIT;
+   }
+
+   if (   ctx->Viewport.X < 0
+       || ctx->Viewport.X + ctx->Viewport.Width > (GLint) ctx->DrawBuffer->Width
+       || ctx->Viewport.Y < 0
+       || ctx->Viewport.Y + ctx->Viewport.Height > (GLint) ctx->DrawBuffer->Height) {
+      rasterMask |= CLIP_BIT;
+   }
+
+   if (ctx->Query.CurrentOcclusionObject)
+      rasterMask |= OCCLUSION_BIT;
+
+
+   /* If we're not drawing to exactly one color buffer set the
+    * MULTI_DRAW_BIT flag.  Also set it if we're drawing to no
+    * buffers or the RGBA or CI mask disables all writes.
+    */
+   if (ctx->DrawBuffer->_NumColorDrawBuffers[0] != 1) {
+      /* more than one color buffer designated for writing (or zero buffers) */
+      rasterMask |= MULTI_DRAW_BIT;
+   }
+   else if (ctx->Visual.rgbMode && *((GLuint *) ctx->Color.ColorMask) == 0) {
+      rasterMask |= MULTI_DRAW_BIT; /* all RGBA channels disabled */
+   }
+   else if (!ctx->Visual.rgbMode && ctx->Color.IndexMask==0) {
+      rasterMask |= MULTI_DRAW_BIT; /* all color index bits disabled */
+   }
+
+   if (ctx->FragmentProgram._Active) {
+      rasterMask |= FRAGPROG_BIT;
+   }
+
+   if (ctx->ShaderObjects._FragmentShaderPresent) {
+      rasterMask |= FRAGPROG_BIT;
+   }
+
+   if (ctx->ATIFragmentShader._Enabled) {
+      rasterMask |= ATIFRAGSHADER_BIT;
+   }
+
+#if CHAN_TYPE == GL_FLOAT
+   if (ctx->Color.ClampFragmentColor == GL_TRUE) {
+      rasterMask |= CLAMPING_BIT;
+   }
+#endif
+
+   SWRAST_CONTEXT(ctx)->_RasterMask = rasterMask;
+}
+
+
+/**
+ * Examine polycon culls tate to compute the _BackfaceSign field.
+ * _BackfaceSign will be 0 if no culling, -1 if culling back-faces,
+ * and 1 if culling front-faces.  The Polygon FrontFace state also
+ * factors in.
+ */
+static void
+_swrast_update_polygon( GLcontext *ctx )
+{
+   GLfloat backface_sign = 1;
+
+   if (ctx->Polygon.CullFlag) {
+      backface_sign = 1;
+      switch(ctx->Polygon.CullFaceMode) {
+      case GL_BACK:
+	 if(ctx->Polygon.FrontFace==GL_CCW)
+	    backface_sign = -1;
+	 break;
+      case GL_FRONT:
+	 if(ctx->Polygon.FrontFace!=GL_CCW)
+	    backface_sign = -1;
+	 break;
+      default:
+      case GL_FRONT_AND_BACK:
+	 backface_sign = 0;
+	 break;
+      }
+   }
+   else {
+      backface_sign = 0;
+   }
+
+   SWRAST_CONTEXT(ctx)->_BackfaceSign = backface_sign;
+}
+
+
+/**
+ * Update the _PreferPixelFog field to indicate if we need to compute
+ * fog factors per-fragment.
+ */
+static void
+_swrast_update_fog_hint( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   swrast->_PreferPixelFog = (!swrast->AllowVertexFog ||
+                              ctx->FragmentProgram._Enabled || /* not _Active! */
+			      (ctx->Hint.Fog == GL_NICEST &&
+			       swrast->AllowPixelFog));
+}
+
+
+
+/**
+ * Update the swrast->_AnyTextureCombine flag.
+ */
+static void
+_swrast_update_texture_env( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint i;
+   swrast->_AnyTextureCombine = GL_FALSE;
+   for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+      if (ctx->Texture.Unit[i].EnvMode == GL_COMBINE_EXT ||
+          ctx->Texture.Unit[i].EnvMode == GL_COMBINE4_NV) {
+         swrast->_AnyTextureCombine = GL_TRUE;
+         return;
+      }
+   }
+}
+
+
+/**
+ * Update swrast->_FogColor and swrast->_FogEnable values.
+ */
+static void
+_swrast_update_fog_state( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   /* convert fog color to GLchan values */
+   CLAMPED_FLOAT_TO_CHAN(swrast->_FogColor[RCOMP], ctx->Fog.Color[RCOMP]);
+   CLAMPED_FLOAT_TO_CHAN(swrast->_FogColor[GCOMP], ctx->Fog.Color[GCOMP]);
+   CLAMPED_FLOAT_TO_CHAN(swrast->_FogColor[BCOMP], ctx->Fog.Color[BCOMP]);
+
+   /* determine if fog is needed, and if so, which fog mode */
+   swrast->_FogEnabled = GL_FALSE;
+   if (ctx->FragmentProgram._Active) {
+      if (ctx->FragmentProgram._Current->Base.Target==GL_FRAGMENT_PROGRAM_ARB) {
+         const struct gl_fragment_program *fp
+            = ctx->FragmentProgram._Current;
+         if (fp->FogOption != GL_NONE) {
+            swrast->_FogEnabled = GL_TRUE;
+            swrast->_FogMode = fp->FogOption;
+         }
+      }
+   }
+   else if (ctx->Fog.Enabled) {
+      swrast->_FogEnabled = GL_TRUE;
+      swrast->_FogMode = ctx->Fog.Mode;
+   }
+}
+
+
+/**
+ * Update state for running fragment programs.  Basically, load the
+ * program parameters with current state values.
+ */
+static void
+_swrast_update_fragment_program( GLcontext *ctx )
+{
+   if (ctx->FragmentProgram._Active) {
+      const struct gl_fragment_program *fp = ctx->FragmentProgram._Current;
+      _mesa_load_state_parameters(ctx, fp->Base.Parameters);
+   }
+}
+
+
+
+#define _SWRAST_NEW_DERIVED (_SWRAST_NEW_RASTERMASK |	\
+			     _NEW_TEXTURE |		\
+			     _NEW_HINT |		\
+			     _NEW_POLYGON )
+
+/* State referenced by _swrast_choose_triangle, _swrast_choose_line.
+ */
+#define _SWRAST_NEW_TRIANGLE (_SWRAST_NEW_DERIVED |		\
+			      _NEW_RENDERMODE|			\
+                              _NEW_POLYGON|			\
+                              _NEW_DEPTH|			\
+                              _NEW_STENCIL|			\
+                              _NEW_COLOR|			\
+                              _NEW_TEXTURE|			\
+                              _SWRAST_NEW_RASTERMASK|		\
+                              _NEW_LIGHT|			\
+                              _NEW_FOG |			\
+			      _DD_NEW_SEPARATE_SPECULAR)
+
+#define _SWRAST_NEW_LINE (_SWRAST_NEW_DERIVED |		\
+			  _NEW_RENDERMODE|		\
+                          _NEW_LINE|			\
+                          _NEW_TEXTURE|			\
+                          _NEW_LIGHT|			\
+                          _NEW_FOG|			\
+                          _NEW_DEPTH |			\
+                          _DD_NEW_SEPARATE_SPECULAR)
+
+#define _SWRAST_NEW_POINT (_SWRAST_NEW_DERIVED |	\
+			   _NEW_RENDERMODE |		\
+			   _NEW_POINT |			\
+			   _NEW_TEXTURE |		\
+			   _NEW_LIGHT |			\
+			   _NEW_FOG |			\
+                           _DD_NEW_SEPARATE_SPECULAR)
+
+#define _SWRAST_NEW_TEXTURE_SAMPLE_FUNC _NEW_TEXTURE
+
+#define _SWRAST_NEW_TEXTURE_ENV_MODE _NEW_TEXTURE
+
+#define _SWRAST_NEW_BLEND_FUNC _NEW_COLOR
+
+
+
+/**
+ * Stub for swrast->Triangle to select a true triangle function
+ * after a state change.
+ */
+static void
+_swrast_validate_triangle( GLcontext *ctx,
+			   const SWvertex *v0,
+                           const SWvertex *v1,
+                           const SWvertex *v2 )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   _swrast_validate_derived( ctx );
+   swrast->choose_triangle( ctx );
+
+   if (ctx->Texture._EnabledUnits == 0
+       && NEED_SECONDARY_COLOR(ctx)
+       && !ctx->FragmentProgram._Active) {
+      /* separate specular color, but no texture */
+      swrast->SpecTriangle = swrast->Triangle;
+      swrast->Triangle = _swrast_add_spec_terms_triangle;
+   }
+
+   swrast->Triangle( ctx, v0, v1, v2 );
+}
+
+/**
+ * Called via swrast->Line.  Examine current GL state and choose a software
+ * line routine.  Then call it.
+ */
+static void
+_swrast_validate_line( GLcontext *ctx, const SWvertex *v0, const SWvertex *v1 )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   _swrast_validate_derived( ctx );
+   swrast->choose_line( ctx );
+
+   if (ctx->Texture._EnabledUnits == 0
+       && NEED_SECONDARY_COLOR(ctx)
+       && !ctx->FragmentProgram._Active) {
+      swrast->SpecLine = swrast->Line;
+      swrast->Line = _swrast_add_spec_terms_line;
+   }
+
+
+   swrast->Line( ctx, v0, v1 );
+}
+
+/**
+ * Called via swrast->Point.  Examine current GL state and choose a software
+ * point routine.  Then call it.
+ */
+static void
+_swrast_validate_point( GLcontext *ctx, const SWvertex *v0 )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   _swrast_validate_derived( ctx );
+   swrast->choose_point( ctx );
+
+   if (ctx->Texture._EnabledUnits == 0
+       && NEED_SECONDARY_COLOR(ctx)
+       && !ctx->FragmentProgram._Active) {
+      swrast->SpecPoint = swrast->Point;
+      swrast->Point = _swrast_add_spec_terms_point;
+   }
+
+   swrast->Point( ctx, v0 );
+}
+
+
+/**
+ * Called via swrast->BlendFunc.  Examine GL state to choose a blending
+ * function, then call it.
+ */
+static void _ASMAPI
+_swrast_validate_blend_func( GLcontext *ctx, GLuint n,
+			     const GLubyte mask[],
+			     GLchan src[][4],
+			     CONST GLchan dst[][4] )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   _swrast_validate_derived( ctx );
+   _swrast_choose_blend_func( ctx );
+
+   swrast->BlendFunc( ctx, n, mask, src, dst );
+}
+
+
+/**
+ * Make sure we have texture image data for all the textures we may need
+ * for subsequent rendering.
+ */
+static void
+_swrast_validate_texture_images(GLcontext *ctx)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint u;
+
+   if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) {
+      /* no textures enabled, or no way to validate images! */
+      return;
+   }
+
+   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
+      if (ctx->Texture.Unit[u]._ReallyEnabled) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
+         ASSERT(texObj);
+         if (texObj) {
+            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+            GLuint face;
+            for (face = 0; face < numFaces; face++) {
+               GLuint lvl;
+               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
+                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
+                  if (texImg && !texImg->Data) {
+                     swrast->ValidateTextureImage(ctx, texObj, face, lvl);
+                     ASSERT(texObj->Image[face][lvl]->Data);
+                  }
+               }
+            }
+         }
+      }
+   }
+}
+
+
+/**
+ * Free the texture image data attached to all currently enabled
+ * textures.  Meant to be called by device drivers when transitioning
+ * from software to hardware rendering.
+ */
+void
+_swrast_eject_texture_images(GLcontext *ctx)
+{
+   GLuint u;
+
+   if (!ctx->Texture._EnabledUnits) {
+      /* no textures enabled */
+      return;
+   }
+
+   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
+      if (ctx->Texture.Unit[u]._ReallyEnabled) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
+         ASSERT(texObj);
+         if (texObj) {
+            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+            GLuint face;
+            for (face = 0; face < numFaces; face++) {
+               GLuint lvl;
+               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
+                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
+                  if (texImg && texImg->Data) {
+                     _mesa_free_texmemory(texImg->Data);
+                     texImg->Data = NULL;
+                  }
+               }
+            }
+         }
+      }
+   }
+}
+
+
+
+static void
+_swrast_sleep( GLcontext *ctx, GLbitfield new_state )
+{
+   (void) ctx; (void) new_state;
+}
+
+
+static void
+_swrast_invalidate_state( GLcontext *ctx, GLbitfield new_state )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint i;
+
+   swrast->NewState |= new_state;
+
+   /* After 10 statechanges without any swrast functions being called,
+    * put the module to sleep.
+    */
+   if (++swrast->StateChanges > 10) {
+      swrast->InvalidateState = _swrast_sleep;
+      swrast->NewState = ~0;
+      new_state = ~0;
+   }
+
+   if (new_state & swrast->InvalidateTriangleMask)
+      swrast->Triangle = _swrast_validate_triangle;
+
+   if (new_state & swrast->InvalidateLineMask)
+      swrast->Line = _swrast_validate_line;
+
+   if (new_state & swrast->InvalidatePointMask)
+      swrast->Point = _swrast_validate_point;
+
+   if (new_state & _SWRAST_NEW_BLEND_FUNC)
+      swrast->BlendFunc = _swrast_validate_blend_func;
+
+   if (new_state & _SWRAST_NEW_TEXTURE_SAMPLE_FUNC)
+      for (i = 0 ; i < ctx->Const.MaxTextureImageUnits ; i++)
+	 swrast->TextureSample[i] = NULL;
+}
+
+
+static void
+_swrast_update_texture_samplers(GLcontext *ctx)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint u;
+
+   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
+      const struct gl_texture_object *tObj = ctx->Texture.Unit[u]._Current;
+      if (tObj)
+         swrast->TextureSample[u] =
+            _swrast_choose_texture_sample_func(ctx, tObj);
+   }
+}
+
+
+void
+_swrast_validate_derived( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (swrast->NewState) {
+      if (swrast->NewState & _NEW_POLYGON)
+	 _swrast_update_polygon( ctx );
+
+      if (swrast->NewState & (_NEW_HINT | _NEW_PROGRAM))
+	 _swrast_update_fog_hint( ctx );
+
+      if (swrast->NewState & _SWRAST_NEW_TEXTURE_ENV_MODE)
+	 _swrast_update_texture_env( ctx );
+
+      if (swrast->NewState & (_NEW_FOG | _NEW_PROGRAM))
+         _swrast_update_fog_state( ctx );
+
+      if (swrast->NewState & _NEW_PROGRAM)
+	 _swrast_update_fragment_program( ctx );
+
+      if (swrast->NewState & _NEW_TEXTURE)
+         _swrast_update_texture_samplers( ctx );
+
+      if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM))
+         _swrast_validate_texture_images( ctx );
+
+      if (swrast->NewState & _SWRAST_NEW_RASTERMASK)
+ 	 _swrast_update_rasterflags( ctx );
+
+      swrast->NewState = 0;
+      swrast->StateChanges = 0;
+      swrast->InvalidateState = _swrast_invalidate_state;
+   }
+}
+
+#define SWRAST_DEBUG 0
+
+/* Public entrypoints:  See also s_accum.c, s_bitmap.c, etc.
+ */
+void
+_swrast_Quad( GLcontext *ctx,
+	      const SWvertex *v0, const SWvertex *v1,
+              const SWvertex *v2, const SWvertex *v3 )
+{
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_Quad\n");
+      _swrast_print_vertex( ctx, v0 );
+      _swrast_print_vertex( ctx, v1 );
+      _swrast_print_vertex( ctx, v2 );
+      _swrast_print_vertex( ctx, v3 );
+   }
+   SWRAST_CONTEXT(ctx)->Triangle( ctx, v0, v1, v3 );
+   SWRAST_CONTEXT(ctx)->Triangle( ctx, v1, v2, v3 );
+}
+
+void
+_swrast_Triangle( GLcontext *ctx, const SWvertex *v0,
+                  const SWvertex *v1, const SWvertex *v2 )
+{
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_Triangle\n");
+      _swrast_print_vertex( ctx, v0 );
+      _swrast_print_vertex( ctx, v1 );
+      _swrast_print_vertex( ctx, v2 );
+   }
+   SWRAST_CONTEXT(ctx)->Triangle( ctx, v0, v1, v2 );
+}
+
+void
+_swrast_Line( GLcontext *ctx, const SWvertex *v0, const SWvertex *v1 )
+{
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_Line\n");
+      _swrast_print_vertex( ctx, v0 );
+      _swrast_print_vertex( ctx, v1 );
+   }
+   SWRAST_CONTEXT(ctx)->Line( ctx, v0, v1 );
+}
+
+void
+_swrast_Point( GLcontext *ctx, const SWvertex *v0 )
+{
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_Point\n");
+      _swrast_print_vertex( ctx, v0 );
+   }
+   SWRAST_CONTEXT(ctx)->Point( ctx, v0 );
+}
+
+void
+_swrast_InvalidateState( GLcontext *ctx, GLbitfield new_state )
+{
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_InvalidateState\n");
+   }
+   SWRAST_CONTEXT(ctx)->InvalidateState( ctx, new_state );
+}
+
+void
+_swrast_ResetLineStipple( GLcontext *ctx )
+{
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_ResetLineStipple\n");
+   }
+   SWRAST_CONTEXT(ctx)->StippleCounter = 0;
+}
+
+void
+_swrast_allow_vertex_fog( GLcontext *ctx, GLboolean value )
+{
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_allow_vertex_fog %d\n", value);
+   }
+   SWRAST_CONTEXT(ctx)->InvalidateState( ctx, _NEW_HINT );
+   SWRAST_CONTEXT(ctx)->AllowVertexFog = value;
+}
+
+void
+_swrast_allow_pixel_fog( GLcontext *ctx, GLboolean value )
+{
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_allow_pixel_fog %d\n", value);
+   }
+   SWRAST_CONTEXT(ctx)->InvalidateState( ctx, _NEW_HINT );
+   SWRAST_CONTEXT(ctx)->AllowPixelFog = value;
+}
+
+
+GLboolean
+_swrast_CreateContext( GLcontext *ctx )
+{
+   GLuint i;
+   SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext));
+
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_CreateContext\n");
+   }
+
+   if (!swrast)
+      return GL_FALSE;
+
+   swrast->NewState = ~0;
+
+   swrast->choose_point = _swrast_choose_point;
+   swrast->choose_line = _swrast_choose_line;
+   swrast->choose_triangle = _swrast_choose_triangle;
+
+   swrast->InvalidatePointMask = _SWRAST_NEW_POINT;
+   swrast->InvalidateLineMask = _SWRAST_NEW_LINE;
+   swrast->InvalidateTriangleMask = _SWRAST_NEW_TRIANGLE;
+
+   swrast->Point = _swrast_validate_point;
+   swrast->Line = _swrast_validate_line;
+   swrast->Triangle = _swrast_validate_triangle;
+   swrast->InvalidateState = _swrast_sleep;
+   swrast->BlendFunc = _swrast_validate_blend_func;
+
+   swrast->AllowVertexFog = GL_TRUE;
+   swrast->AllowPixelFog = GL_TRUE;
+
+   /* Optimized Accum buffer */
+   swrast->_IntegerAccumMode = GL_FALSE;
+   swrast->_IntegerAccumScaler = 0.0;
+
+   for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++)
+      swrast->TextureSample[i] = NULL;
+
+   swrast->SpanArrays = MALLOC_STRUCT(span_arrays);
+   if (!swrast->SpanArrays) {
+      FREE(swrast);
+      return GL_FALSE;
+   }
+
+   /* init point span buffer */
+   swrast->PointSpan.primitive = GL_POINT;
+   swrast->PointSpan.start = 0;
+   swrast->PointSpan.end = 0;
+   swrast->PointSpan.facing = 0;
+   swrast->PointSpan.array = swrast->SpanArrays;
+
+   swrast->TexelBuffer = (GLchan *) MALLOC(ctx->Const.MaxTextureImageUnits *
+                                           MAX_WIDTH * 4 * sizeof(GLchan));
+   if (!swrast->TexelBuffer) {
+      FREE(swrast->SpanArrays);
+      FREE(swrast);
+      return GL_FALSE;
+   }
+
+   ctx->swrast_context = swrast;
+
+   return GL_TRUE;
+}
+
+void
+_swrast_DestroyContext( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (SWRAST_DEBUG) {
+      _mesa_debug(ctx, "_swrast_DestroyContext\n");
+   }
+
+   FREE( swrast->SpanArrays );
+   FREE( swrast->TexelBuffer );
+   FREE( swrast );
+
+   ctx->swrast_context = 0;
+}
+
+
+struct swrast_device_driver *
+_swrast_GetDeviceDriverReference( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   return &swrast->Driver;
+}
+
+void
+_swrast_flush( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   /* flush any pending fragments from rendering points */
+   if (swrast->PointSpan.end > 0) {
+      if (ctx->Visual.rgbMode) {
+         _swrast_write_rgba_span(ctx, &(swrast->PointSpan));
+      }
+      else {
+         _swrast_write_index_span(ctx, &(swrast->PointSpan));
+      }
+      swrast->PointSpan.end = 0;
+   }
+}
+
+void
+_swrast_render_primitive( GLcontext *ctx, GLenum prim )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   if (swrast->Primitive == GL_POINTS && prim != GL_POINTS) {
+      _swrast_flush(ctx);
+   }
+   swrast->Primitive = prim;
+}
+
+
+void
+_swrast_render_start( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   if (swrast->Driver.SpanRenderStart)
+      swrast->Driver.SpanRenderStart( ctx );
+   swrast->PointSpan.end = 0;
+}
+ 
+void
+_swrast_render_finish( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   if (swrast->Driver.SpanRenderFinish)
+      swrast->Driver.SpanRenderFinish( ctx );
+
+   _swrast_flush(ctx);
+}
+
+
+#define SWRAST_DEBUG_VERTICES 0
+
+void
+_swrast_print_vertex( GLcontext *ctx, const SWvertex *v )
+{
+   GLuint i;
+
+   if (SWRAST_DEBUG_VERTICES) {
+      _mesa_debug(ctx, "win %f %f %f %f\n",
+                  v->win[0], v->win[1], v->win[2], v->win[3]);
+
+      for (i = 0 ; i < ctx->Const.MaxTextureCoordUnits ; i++)
+	 if (ctx->Texture.Unit[i]._ReallyEnabled)
+	    _mesa_debug(ctx, "texcoord[%d] %f %f %f %f\n", i,
+                        v->texcoord[i][0], v->texcoord[i][1],
+                        v->texcoord[i][2], v->texcoord[i][3]);
+
+#if CHAN_TYPE == GL_FLOAT
+      _mesa_debug(ctx, "color %f %f %f %f\n",
+                  v->color[0], v->color[1], v->color[2], v->color[3]);
+      _mesa_debug(ctx, "spec %f %f %f %f\n",
+                  v->specular[0], v->specular[1],
+                  v->specular[2], v->specular[3]);
+#else
+      _mesa_debug(ctx, "color %d %d %d %d\n",
+                  v->color[0], v->color[1], v->color[2], v->color[3]);
+      _mesa_debug(ctx, "spec %d %d %d %d\n",
+                  v->specular[0], v->specular[1],
+                  v->specular[2], v->specular[3]);
+#endif
+      _mesa_debug(ctx, "fog %f\n", v->fog);
+      _mesa_debug(ctx, "index %d\n", v->index);
+      _mesa_debug(ctx, "pointsize %f\n", v->pointSize);
+      _mesa_debug(ctx, "\n");
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_context.h b/dist/Mesa/src/mesa/swrast/s_context.h
new file mode 100644
index 000000000..3c5a4c322
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_context.h
@@ -0,0 +1,416 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * \file swrast/s_context.h
+ * \brief Software rasterization context and private types.
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/**
+ * \mainpage swrast module
+ *
+ * This module, software rasterization, contains the software fallback
+ * routines for drawing points, lines, triangles, bitmaps and images.
+ * All rendering boils down to writing spans (arrays) of pixels with
+ * particular colors.  The span-writing routines must be implemented
+ * by the device driver.
+ */
+
+
+#ifndef S_CONTEXT_H
+#define S_CONTEXT_H
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+/**
+ * \defgroup SpanFlags SPAN_XXX-flags
+ * Bitmasks to indicate which span_arrays need to be computed
+ * (sw_span::interpMask) or have already been filled in (sw_span::arrayMask)
+ */
+/*@{*/
+#define SPAN_RGBA         0x001
+#define SPAN_SPEC         0x002
+#define SPAN_INDEX        0x004
+#define SPAN_Z            0x008
+#define SPAN_W            0x010
+#define SPAN_FOG          0x020
+#define SPAN_TEXTURE      0x040
+#define SPAN_INT_TEXTURE  0x080
+#define SPAN_LAMBDA       0x100
+#define SPAN_COVERAGE     0x200
+#define SPAN_FLAT         0x400  /**< flat shading? */
+#define SPAN_XY           0x800
+#define SPAN_MASK        0x1000
+#define SPAN_VARYING     0x2000
+/*@}*/
+
+
+/**
+ * \struct span_arrays 
+ * \brief Arrays of fragment values.
+ *
+ * These will either be computed from the x/xStep values above or
+ * filled in by glDraw/CopyPixels, etc.
+ * These arrays are separated out of sw_span to conserve memory.
+ */
+struct span_arrays {
+   /* XXX the next three fields could go into a union */
+   GLchan  rgb[MAX_WIDTH][3];
+   GLchan  rgba[MAX_WIDTH][4];
+   GLuint  index[MAX_WIDTH];
+   GLchan  spec[MAX_WIDTH][4]; /* specular color */
+   GLint   x[MAX_WIDTH];  /**< X/Y used for point/line rendering only */
+   GLint   y[MAX_WIDTH];  /**< X/Y used for point/line rendering only */
+   GLuint  z[MAX_WIDTH];
+   GLfloat fog[MAX_WIDTH];
+   GLfloat texcoords[MAX_TEXTURE_COORD_UNITS][MAX_WIDTH][4];
+   GLfloat lambda[MAX_TEXTURE_COORD_UNITS][MAX_WIDTH];
+   GLfloat coverage[MAX_WIDTH];
+   GLfloat varying[MAX_WIDTH][MAX_VARYING_VECTORS][VARYINGS_PER_VECTOR];
+
+   /** This mask indicates which fragments are alive or culled */
+   GLubyte mask[MAX_WIDTH];
+};
+
+
+/**
+ * \struct sw_span
+ * \brief Contains data for either a horizontal line or a set of
+ * pixels that are passed through a pipeline of functions before being
+ * drawn.
+ *
+ * The sw_span structure describes the colors, Z, fogcoord, texcoords,
+ * etc for either a horizontal run or an array of independent pixels.
+ * We can either specify a base/step to indicate interpolated values, or
+ * fill in arrays of values.  The interpMask and arrayMask bitfields
+ * indicate which are active.
+ *
+ * With this structure it's easy to hand-off span rasterization to
+ * subroutines instead of doing it all inline in the triangle functions
+ * like we used to do.
+ * It also cleans up the local variable namespace a great deal.
+ *
+ * It would be interesting to experiment with multiprocessor rasterization
+ * with this structure.  The triangle rasterizer could simply emit a
+ * stream of these structures which would be consumed by one or more
+ * span-processing threads which could run in parallel.
+ */
+struct sw_span {
+   GLint x, y;
+
+   /** Only need to process pixels between start <= i < end */
+   /** At this time, start is always zero. */
+   GLuint start, end;
+
+   /** This flag indicates that mask[] array is effectively filled with ones */
+   GLboolean writeAll;
+
+   /** either GL_POLYGON, GL_LINE, GL_POLYGON, GL_BITMAP */
+   GLenum primitive;
+
+   /** 0 = front-facing span, 1 = back-facing span (for two-sided stencil) */
+   GLuint facing;
+
+   /**
+    * This bitmask (of  \link SpanFlags SPAN_* flags\endlink) indicates
+    * which of the x/xStep variables are relevant.
+    */
+   GLbitfield interpMask;
+
+   /* For horizontal spans, step is the partial derivative wrt X.
+    * For lines, step is the delta from one fragment to the next.
+    */
+#if CHAN_TYPE == GL_FLOAT
+   GLfloat red, redStep;
+   GLfloat green, greenStep;
+   GLfloat blue, blueStep;
+   GLfloat alpha, alphaStep;
+   GLfloat specRed, specRedStep;
+   GLfloat specGreen, specGreenStep;
+   GLfloat specBlue, specBlueStep;
+#else /* CHAN_TYPE == GL_UNSIGNED_BYTE or GL_UNSIGNED_SHORT */
+   GLfixed red, redStep;
+   GLfixed green, greenStep;
+   GLfixed blue, blueStep;
+   GLfixed alpha, alphaStep;
+   GLfixed specRed, specRedStep;
+   GLfixed specGreen, specGreenStep;
+   GLfixed specBlue, specBlueStep;
+#endif
+   GLfixed index, indexStep;
+   GLfixed z, zStep;    /* XXX z should probably be GLuint */
+   GLfloat fog, fogStep;
+   GLfloat tex[MAX_TEXTURE_COORD_UNITS][4];  /* s, t, r, q */
+   GLfloat texStepX[MAX_TEXTURE_COORD_UNITS][4];
+   GLfloat texStepY[MAX_TEXTURE_COORD_UNITS][4];
+   GLfixed intTex[2], intTexStep[2];  /* s, t only */
+   GLfloat var[MAX_VARYING_VECTORS][VARYINGS_PER_VECTOR];
+   GLfloat varStepX[MAX_VARYING_VECTORS][VARYINGS_PER_VECTOR];
+   GLfloat varStepY[MAX_VARYING_VECTORS][VARYINGS_PER_VECTOR];
+
+   /* partial derivatives wrt X and Y. */
+   GLfloat dzdx, dzdy;
+   GLfloat w, dwdx, dwdy;
+   GLfloat drdx, drdy;
+   GLfloat dgdx, dgdy;
+   GLfloat dbdx, dbdy;
+   GLfloat dadx, dady;
+   GLfloat dsrdx, dsrdy;
+   GLfloat dsgdx, dsgdy;
+   GLfloat dsbdx, dsbdy;
+   GLfloat dfogdx, dfogdy;
+
+   /**
+    * This bitmask (of \link SpanFlags SPAN_* flags\endlink) indicates
+    * which of the fragment arrays in the span_arrays struct are relevant.
+    */
+   GLbitfield arrayMask;
+
+   /**
+    * We store the arrays of fragment values in a separate struct so
+    * that we can allocate sw_span structs on the stack without using
+    * a lot of memory.  The span_arrays struct is about 400KB while the
+    * sw_span struct is only about 512 bytes.
+    */
+   struct span_arrays *array;
+};
+
+
+#define INIT_SPAN(S, PRIMITIVE, END, INTERP_MASK, ARRAY_MASK)	\
+do {								\
+   (S).primitive = (PRIMITIVE);					\
+   (S).interpMask = (INTERP_MASK);				\
+   (S).arrayMask = (ARRAY_MASK);				\
+   (S).start = 0;						\
+   (S).end = (END);						\
+   (S).facing = 0;						\
+   (S).array = SWRAST_CONTEXT(ctx)->SpanArrays;			\
+} while (0)
+
+
+typedef void (*texture_sample_func)(GLcontext *ctx,
+                                    const struct gl_texture_object *tObj,
+                                    GLuint n, const GLfloat texcoords[][4],
+                                    const GLfloat lambda[], GLchan rgba[][4]);
+
+typedef void (_ASMAPIP blend_func)( GLcontext *ctx, GLuint n,
+                                    const GLubyte mask[],
+                                    GLchan src[][4], CONST GLchan dst[][4] );
+
+typedef void (*swrast_point_func)( GLcontext *ctx, const SWvertex *);
+
+typedef void (*swrast_line_func)( GLcontext *ctx,
+                                  const SWvertex *, const SWvertex *);
+
+typedef void (*swrast_tri_func)( GLcontext *ctx, const SWvertex *,
+                                 const SWvertex *, const SWvertex *);
+
+
+typedef void (*validate_texture_image_func)(GLcontext *ctx,
+                                            struct gl_texture_object *texObj,
+                                            GLuint face, GLuint level);
+
+
+/** \defgroup Bitmasks
+ * Bitmasks to indicate which rasterization options are enabled
+ * (RasterMask)
+ */
+/*@{*/
+#define ALPHATEST_BIT		0x001	/**< Alpha-test pixels */
+#define BLEND_BIT		0x002	/**< Blend pixels */
+#define DEPTH_BIT		0x004	/**< Depth-test pixels */
+#define FOG_BIT			0x008	/**< Fog pixels */
+#define LOGIC_OP_BIT		0x010	/**< Apply logic op in software */
+#define CLIP_BIT		0x020	/**< Scissor or window clip pixels */
+#define STENCIL_BIT		0x040	/**< Stencil pixels */
+#define MASKING_BIT		0x080	/**< Do glColorMask or glIndexMask */
+#define MULTI_DRAW_BIT		0x400	/**< Write to more than one color- */
+                                        /**< buffer or no buffers. */
+#define OCCLUSION_BIT           0x800   /**< GL_HP_occlusion_test enabled */
+#define TEXTURE_BIT		0x1000	/**< Texturing really enabled */
+#define FRAGPROG_BIT            0x2000  /**< Fragment program enabled */
+#define ATIFRAGSHADER_BIT       0x4000  /**< ATI Fragment shader enabled */
+#define CLAMPING_BIT            0x8000  /**< Clamp colors to [0,1] */
+/*@}*/
+
+#define _SWRAST_NEW_RASTERMASK (_NEW_BUFFERS|	\
+			        _NEW_SCISSOR|	\
+			        _NEW_COLOR|	\
+			        _NEW_DEPTH|	\
+			        _NEW_FOG|	\
+                                _NEW_PROGRAM|   \
+			        _NEW_STENCIL|	\
+			        _NEW_TEXTURE|	\
+			        _NEW_VIEWPORT|	\
+			        _NEW_DEPTH)
+
+
+/**
+ * \struct SWcontext
+ * \brief SWContext?
+ */
+typedef struct
+{
+   /** Driver interface:
+    */
+   struct swrast_device_driver Driver;
+
+   /** Configuration mechanisms to make software rasterizer match
+    * characteristics of the hardware rasterizer (if present):
+    */
+   GLboolean AllowVertexFog;
+   GLboolean AllowPixelFog;
+
+   /** Derived values, invalidated on statechanges, updated from
+    * _swrast_validate_derived():
+    */
+   GLbitfield _RasterMask;
+   GLfloat _BackfaceSign;
+   GLboolean _PreferPixelFog;    /* Compute fog blend factor per fragment? */
+   GLboolean _AnyTextureCombine;
+   GLchan _FogColor[3];
+   GLboolean _FogEnabled;
+   GLenum _FogMode;  /* either GL_FOG_MODE or fragment program's fog mode */
+
+   /* Accum buffer temporaries.
+    */
+   GLboolean _IntegerAccumMode;	/**< Storing unscaled integers? */
+   GLfloat _IntegerAccumScaler;	/**< Implicit scale factor */
+
+   /* Working values:
+    */
+   GLuint StippleCounter;    /**< Line stipple counter */
+   GLbitfield NewState;
+   GLuint StateChanges;
+   GLenum Primitive;    /* current primitive being drawn (ala glBegin) */
+
+   void (*InvalidateState)( GLcontext *ctx, GLbitfield new_state );
+
+   /**
+    * When the NewState mask intersects these masks, we invalidate the
+    * Point/Line/Triangle function pointers below.
+    */
+   /*@{*/
+   GLbitfield InvalidatePointMask;
+   GLbitfield InvalidateLineMask;
+   GLbitfield InvalidateTriangleMask;
+   /*@}*/
+
+   /**
+    * Device drivers plug in functions for these callbacks.
+    * Will be called when the GL state change mask intersects the above masks.
+    */
+   /*@{*/
+   void (*choose_point)( GLcontext * );
+   void (*choose_line)( GLcontext * );
+   void (*choose_triangle)( GLcontext * );
+   /*@}*/
+
+   /**
+    * Current point, line and triangle drawing functions.
+    */
+   /*@{*/
+   swrast_point_func Point;
+   swrast_line_func Line;
+   swrast_tri_func Triangle;
+   /*@}*/
+
+   /**
+    * Placeholders for when separate specular (or secondary color) is
+    * enabled but texturing is not.
+    */
+   /*@{*/
+   swrast_point_func SpecPoint;
+   swrast_line_func SpecLine;
+   swrast_tri_func SpecTriangle;
+   /*@}*/
+
+   /**
+    * Typically, we'll allocate a sw_span structure as a local variable
+    * and set its 'array' pointer to point to this object.  The reason is
+    * this object is big and causes problems when allocated on the stack
+    * on some systems.
+    */
+   struct span_arrays *SpanArrays;
+
+   /**
+    * Used to buffer N GL_POINTS, instead of rendering one by one.
+    */
+   struct sw_span PointSpan;
+
+   /** Internal hooks, kept up to date by the same mechanism as above.
+    */
+   blend_func BlendFunc;
+   texture_sample_func TextureSample[MAX_TEXTURE_IMAGE_UNITS];
+
+   /** Buffer for saving the sampled texture colors.
+    * Needed for GL_ARB_texture_env_crossbar implementation.
+    */
+   GLchan *TexelBuffer;
+
+   validate_texture_image_func ValidateTextureImage;
+
+} SWcontext;
+
+
+extern void
+_swrast_validate_derived( GLcontext *ctx );
+
+
+#define SWRAST_CONTEXT(ctx) ((SWcontext *)ctx->swrast_context)
+
+#define RENDER_START(SWctx, GLctx)			\
+   do {							\
+      if ((SWctx)->Driver.SpanRenderStart) {		\
+         (*(SWctx)->Driver.SpanRenderStart)(GLctx);	\
+      }							\
+   } while (0)
+
+#define RENDER_FINISH(SWctx, GLctx)			\
+   do {							\
+      if ((SWctx)->Driver.SpanRenderFinish) {		\
+         (*(SWctx)->Driver.SpanRenderFinish)(GLctx);	\
+      }							\
+   } while (0)
+
+
+
+/*
+ * XXX these macros are just bandages for now in order to make
+ * CHAN_BITS==32 compile cleanly.
+ * These should probably go elsewhere at some point.
+ */
+#if CHAN_TYPE == GL_FLOAT
+#define ChanToFixed(X)  (X)
+#define FixedToChan(X)  (X)
+#else
+#define ChanToFixed(X)  IntToFixed(X)
+#define FixedToChan(X)  FixedToInt(X)
+#endif
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_copypix.c b/dist/Mesa/src/mesa/swrast/s_copypix.c
new file mode 100644
index 000000000..88fd44513
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_copypix.c
@@ -0,0 +1,943 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "convolve.h"
+#include "histogram.h"
+#include "image.h"
+#include "macros.h"
+#include "imports.h"
+#include "pixel.h"
+
+#include "s_context.h"
+#include "s_depth.h"
+#include "s_span.h"
+#include "s_stencil.h"
+#include "s_zoom.h"
+
+
+
+/*
+ * Determine if there's overlap in an image copy.
+ * This test also compensates for the fact that copies are done from
+ * bottom to top and overlaps can sometimes be handled correctly
+ * without making a temporary image copy.
+ */
+static GLboolean
+regions_overlap(GLint srcx, GLint srcy,
+                GLint dstx, GLint dsty,
+                GLint width, GLint height,
+                GLfloat zoomX, GLfloat zoomY)
+{
+   if (zoomX == 1.0 && zoomY == 1.0) {
+      /* no zoom */
+      if (srcx >= dstx + width || (srcx + width <= dstx)) {
+         return GL_FALSE;
+      }
+      else if (srcy < dsty) { /* this is OK */
+         return GL_FALSE;
+      }
+      else if (srcy > dsty + height) {
+         return GL_FALSE;
+      }
+      else {
+         return GL_TRUE;
+      }
+   }
+   else {
+      /* add one pixel of slop when zooming, just to be safe */
+      if ((srcx > dstx + (width * zoomX) + 1) || (srcx + width + 1 < dstx)) {
+         return GL_FALSE;
+      }
+      else if ((srcy < dsty) && (srcy + height < dsty + (height * zoomY))) {
+         return GL_FALSE;
+      }
+      else if ((srcy > dsty) && (srcy + height > dsty + (height * zoomY))) {
+         return GL_FALSE;
+      }
+      else {
+         return GL_TRUE;
+      }
+   }
+}
+
+
+/**
+ * Convert GLfloat[n][4] colors to GLchan[n][4].
+ * XXX maybe move into image.c
+ */
+static void
+float_span_to_chan(GLuint n, CONST GLfloat in[][4], GLchan out[][4])
+{
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      UNCLAMPED_FLOAT_TO_CHAN(out[i][RCOMP], in[i][RCOMP]);
+      UNCLAMPED_FLOAT_TO_CHAN(out[i][GCOMP], in[i][GCOMP]);
+      UNCLAMPED_FLOAT_TO_CHAN(out[i][BCOMP], in[i][BCOMP]);
+      UNCLAMPED_FLOAT_TO_CHAN(out[i][ACOMP], in[i][ACOMP]);
+   }
+}
+
+
+/**
+ * Convert GLchan[n][4] colors to GLfloat[n][4].
+ * XXX maybe move into image.c
+ */
+static void
+chan_span_to_float(GLuint n, CONST GLchan in[][4], GLfloat out[][4])
+{
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      out[i][RCOMP] = CHAN_TO_FLOAT(in[i][RCOMP]);
+      out[i][GCOMP] = CHAN_TO_FLOAT(in[i][GCOMP]);
+      out[i][BCOMP] = CHAN_TO_FLOAT(in[i][BCOMP]);
+      out[i][ACOMP] = CHAN_TO_FLOAT(in[i][ACOMP]);
+   }
+}
+
+
+
+/*
+ * RGBA copypixels with convolution.
+ */
+static void
+copy_conv_rgba_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
+                      GLint width, GLint height, GLint destx, GLint desty)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_renderbuffer *drawRb = NULL;
+   GLboolean quick_draw;
+   GLint row;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
+   const GLuint transferOps = ctx->_ImageTransferState;
+   GLfloat *dest, *tmpImage, *convImage;
+   struct sw_span span;
+
+   INIT_SPAN(span, GL_BITMAP, 0, 0, SPAN_RGBA);
+
+   if (ctx->Depth.Test)
+      _swrast_span_default_z(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+
+
+   if (SWRAST_CONTEXT(ctx)->_RasterMask == 0
+       && !zoom
+       && destx >= 0
+       && destx + width <= (GLint) ctx->DrawBuffer->Width) {
+      quick_draw = GL_TRUE;
+      drawRb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+   }
+   else {
+      quick_draw = GL_FALSE;
+   }
+
+   /* allocate space for GLfloat image */
+   tmpImage = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
+   if (!tmpImage) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels");
+      return;
+   }
+   convImage = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
+   if (!convImage) {
+      _mesa_free(tmpImage);
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels");
+      return;
+   }
+
+   /* read source image */
+   dest = tmpImage;
+   for (row = 0; row < height; row++) {
+      GLchan rgba[MAX_WIDTH][4];
+      /* Read GLchan and convert to GLfloat */
+      _swrast_read_rgba_span(ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                             width, srcx, srcy + row, rgba);
+      chan_span_to_float(width, (CONST GLchan (*)[4]) rgba,
+                         (GLfloat (*)[4]) dest);
+      dest += 4 * width;
+   }
+
+   /* do the image transfer ops which preceed convolution */
+   for (row = 0; row < height; row++) {
+      GLfloat (*rgba)[4] = (GLfloat (*)[4]) (tmpImage + row * width * 4);
+      _mesa_apply_rgba_transfer_ops(ctx,
+                                    transferOps & IMAGE_PRE_CONVOLUTION_BITS,
+                                    width, rgba);
+   }
+
+   /* do convolution */
+   if (ctx->Pixel.Convolution2DEnabled) {
+      _mesa_convolve_2d_image(ctx, &width, &height, tmpImage, convImage);
+   }
+   else {
+      ASSERT(ctx->Pixel.Separable2DEnabled);
+      _mesa_convolve_sep_image(ctx, &width, &height, tmpImage, convImage);
+   }
+   _mesa_free(tmpImage);
+
+   /* do remaining post-convolution image transfer ops */
+   for (row = 0; row < height; row++) {
+      GLfloat (*rgba)[4] = (GLfloat (*)[4]) (convImage + row * width * 4);
+      _mesa_apply_rgba_transfer_ops(ctx,
+                                    transferOps & IMAGE_POST_CONVOLUTION_BITS,
+                                    width, rgba);
+   }
+
+   /* write the new image */
+   for (row = 0; row < height; row++) {
+      const GLfloat *src = convImage + row * width * 4;
+      GLint dy;
+
+      /* convert floats back to chan */
+      float_span_to_chan(width, (const GLfloat (*)[4]) src, span.array->rgba);
+
+      /* write row to framebuffer */
+      dy = desty + row;
+      if (quick_draw && dy >= 0 && dy < (GLint) ctx->DrawBuffer->Height) {
+         drawRb->PutRow(ctx, drawRb, width, destx, dy, span.array->rgba, NULL);
+      }
+      else {
+         span.x = destx;
+         span.y = dy;
+         span.end = width;
+         if (zoom) {
+            _swrast_write_zoomed_rgba_span(ctx, destx, desty, &span, 
+                                        (CONST GLchan (*)[4])span.array->rgba);
+         }
+         else {
+            _swrast_write_rgba_span(ctx, &span);
+         }
+      }
+   }
+
+   _mesa_free(convImage);
+}
+
+
+/*
+ * RGBA copypixels
+ */
+static void
+copy_rgba_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
+                 GLint width, GLint height, GLint destx, GLint desty)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_renderbuffer *drawRb;
+   GLchan *tmpImage,*p;
+   GLboolean quick_draw;
+   GLint sy, dy, stepy, j;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
+   GLint overlapping;
+   const GLuint transferOps = ctx->_ImageTransferState;
+   struct sw_span span;
+
+   if (!ctx->ReadBuffer->_ColorReadBuffer) {
+      /* no readbuffer - OK */
+      return;
+   }
+
+   INIT_SPAN(span, GL_BITMAP, 0, 0, SPAN_RGBA);
+
+   if (ctx->Pixel.Convolution2DEnabled || ctx->Pixel.Separable2DEnabled) {
+      copy_conv_rgba_pixels(ctx, srcx, srcy, width, height, destx, desty);
+      return;
+   }
+
+   /* Determine if copy should be done bottom-to-top or top-to-bottom */
+   if (srcy < desty) {
+      /* top-down  max-to-min */
+      sy = srcy + height - 1;
+      dy = desty + height - 1;
+      stepy = -1;
+   }
+   else {
+      /* bottom-up  min-to-max */
+      sy = srcy;
+      dy = desty;
+      stepy = 1;
+   }
+
+   if (ctx->DrawBuffer == ctx->ReadBuffer) {
+      overlapping = regions_overlap(srcx, srcy, destx, desty, width, height,
+                                    ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
+   }
+   else {
+      overlapping = GL_FALSE;
+   }
+
+   if (ctx->Depth.Test)
+      _swrast_span_default_z(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+
+   if (SWRAST_CONTEXT(ctx)->_RasterMask == 0
+       && !zoom
+       && destx >= 0
+       && destx + width <= (GLint) ctx->DrawBuffer->Width) {
+      quick_draw = GL_TRUE;
+      drawRb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+   }
+   else {
+      quick_draw = GL_FALSE;
+      drawRb = NULL;
+   }
+
+   if (overlapping) {
+      GLint ssy = sy;
+      tmpImage = (GLchan *) _mesa_malloc(width * height * sizeof(GLchan) * 4);
+      if (!tmpImage) {
+         _mesa_error( ctx, GL_OUT_OF_MEMORY, "glCopyPixels" );
+         return;
+      }
+      /* read the source image */
+      p = tmpImage;
+      for (j = 0; j < height; j++, ssy += stepy) {
+         _swrast_read_rgba_span( ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                                 width, srcx, ssy, (GLchan (*)[4]) p );
+         p += width * 4;
+      }
+      p = tmpImage;
+   }
+   else {
+      tmpImage = NULL;  /* silence compiler warnings */
+      p = NULL;
+   }
+
+   for (j = 0; j < height; j++, sy += stepy, dy += stepy) {
+      /* Get source pixels */
+      if (overlapping) {
+         /* get from buffered image */
+         ASSERT(width < MAX_WIDTH);
+         _mesa_memcpy(span.array->rgba, p, width * sizeof(GLchan) * 4);
+         p += width * 4;
+      }
+      else {
+         /* get from framebuffer */
+         ASSERT(width < MAX_WIDTH);
+         _swrast_read_rgba_span( ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                                 width, srcx, sy, span.array->rgba );
+      }
+
+      if (transferOps) {
+         GLfloat rgbaFloat[MAX_WIDTH][4];
+         /* convert to float, transfer, convert back to chan */
+         chan_span_to_float(width, (CONST GLchan (*)[4]) span.array->rgba,
+                            rgbaFloat);
+         _mesa_apply_rgba_transfer_ops(ctx, transferOps, width, rgbaFloat);
+         float_span_to_chan(width, (CONST GLfloat (*)[4]) rgbaFloat,
+                            span.array->rgba);
+      }
+
+      /* Write color span */
+      if (quick_draw && dy >= 0 && dy < (GLint) ctx->DrawBuffer->Height) {
+         drawRb->PutRow(ctx, drawRb, width, destx, dy, span.array->rgba, NULL);
+      }
+      else {
+         span.x = destx;
+         span.y = dy;
+         span.end = width;
+         if (zoom) {
+            _swrast_write_zoomed_rgba_span(ctx, destx, desty, &span,
+                                       (CONST GLchan (*)[4]) span.array->rgba);
+         }
+         else {
+            _swrast_write_rgba_span(ctx, &span);
+         }
+      }
+   }
+
+   if (overlapping)
+      _mesa_free(tmpImage);
+}
+
+
+static void
+copy_ci_pixels( GLcontext *ctx, GLint srcx, GLint srcy,
+                GLint width, GLint height,
+                GLint destx, GLint desty )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint *tmpImage,*p;
+   GLint sy, dy, stepy;
+   GLint j;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
+   const GLboolean shift_or_offset = ctx->Pixel.IndexShift || ctx->Pixel.IndexOffset;
+   GLint overlapping;
+   struct sw_span span;
+
+   if (!ctx->ReadBuffer->_ColorReadBuffer) {
+      /* no readbuffer - OK */
+      return;
+   }
+
+   INIT_SPAN(span, GL_BITMAP, 0, 0, SPAN_INDEX);
+
+   /* Determine if copy should be bottom-to-top or top-to-bottom */
+   if (srcy<desty) {
+      /* top-down  max-to-min */
+      sy = srcy + height - 1;
+      dy = desty + height - 1;
+      stepy = -1;
+   }
+   else {
+      /* bottom-up  min-to-max */
+      sy = srcy;
+      dy = desty;
+      stepy = 1;
+   }
+
+   if (ctx->DrawBuffer == ctx->ReadBuffer) {
+      overlapping = regions_overlap(srcx, srcy, destx, desty, width, height,
+                                    ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
+   }
+   else {
+      overlapping = GL_FALSE;
+   }
+
+   if (ctx->Depth.Test)
+      _swrast_span_default_z(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+
+   if (overlapping) {
+      GLint ssy = sy;
+      tmpImage = (GLuint *) _mesa_malloc(width * height * sizeof(GLuint));
+      if (!tmpImage) {
+         _mesa_error( ctx, GL_OUT_OF_MEMORY, "glCopyPixels" );
+         return;
+      }
+      /* read the image */
+      p = tmpImage;
+      for (j = 0; j < height; j++, ssy += stepy) {
+         _swrast_read_index_span( ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                                  width, srcx, ssy, p );
+         p += width;
+      }
+      p = tmpImage;
+   }
+   else {
+      tmpImage = NULL;  /* silence compiler warning */
+      p = NULL;
+   }
+
+   for (j = 0; j < height; j++, sy += stepy, dy += stepy) {
+      /* Get color indexes */
+      if (overlapping) {
+         _mesa_memcpy(span.array->index, p, width * sizeof(GLuint));
+         p += width;
+      }
+      else {
+         _swrast_read_index_span( ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                                  width, srcx, sy, span.array->index );
+      }
+
+      /* Apply shift, offset, look-up table */
+      if (shift_or_offset) {
+         _mesa_shift_and_offset_ci( ctx, width, span.array->index );
+      }
+      if (ctx->Pixel.MapColorFlag) {
+         _mesa_map_ci( ctx, width, span.array->index );
+      }
+
+      /* write color indexes */
+      span.x = destx;
+      span.y = dy;
+      span.end = width;
+      if (zoom)
+         _swrast_write_zoomed_index_span(ctx, destx, desty, &span);
+      else
+         _swrast_write_index_span(ctx, &span);
+   }
+
+   if (overlapping)
+      _mesa_free(tmpImage);
+}
+
+
+/**
+ * Convert floating point Z values to integer Z values with pixel transfer's
+ * Z scale and bias.
+ */
+static void
+scale_and_bias_z(GLcontext *ctx, GLuint width,
+                 const GLfloat depth[], GLuint z[])
+{
+   const GLuint depthMax = ctx->DrawBuffer->_DepthMax;
+   GLuint i;
+
+   if (depthMax <= 0xffffff &&
+       ctx->Pixel.DepthScale == 1.0 &&
+       ctx->Pixel.DepthBias == 0.0) {
+      /* no scale or bias and no clamping and no worry of overflow */
+      const GLfloat depthMaxF = ctx->DrawBuffer->_DepthMaxF;
+      for (i = 0; i < width; i++) {
+         z[i] = (GLuint) (depth[i] * depthMaxF);
+      }
+   }
+   else {
+      /* need to be careful with overflow */
+      const GLdouble depthMaxF = ctx->DrawBuffer->_DepthMaxF;
+      for (i = 0; i < width; i++) {
+         GLdouble d = depth[i] * ctx->Pixel.DepthScale + ctx->Pixel.DepthBias;
+         d = CLAMP(d, 0.0, 1.0) * depthMaxF;
+         if (d >= depthMaxF)
+            z[i] = depthMax;
+         else
+            z[i] = (GLuint) d;
+      }
+   }
+}
+
+
+
+/*
+ * TODO: Optimize!!!!
+ */
+static void
+copy_depth_pixels( GLcontext *ctx, GLint srcx, GLint srcy,
+                   GLint width, GLint height,
+                   GLint destx, GLint desty )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *readRb = fb->_DepthBuffer;
+   GLfloat *p, *tmpImage;
+   GLint sy, dy, stepy;
+   GLint j;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
+   GLint overlapping;
+   struct sw_span span;
+
+   if (!readRb) {
+      /* no readbuffer - OK */
+      return;
+   }
+
+   INIT_SPAN(span, GL_BITMAP, 0, 0, SPAN_Z);
+
+   /* Determine if copy should be bottom-to-top or top-to-bottom */
+   if (srcy<desty) {
+      /* top-down  max-to-min */
+      sy = srcy + height - 1;
+      dy = desty + height - 1;
+      stepy = -1;
+   }
+   else {
+      /* bottom-up  min-to-max */
+      sy = srcy;
+      dy = desty;
+      stepy = 1;
+   }
+
+   if (ctx->DrawBuffer == ctx->ReadBuffer) {
+      overlapping = regions_overlap(srcx, srcy, destx, desty, width, height,
+                                    ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
+   }
+   else {
+      overlapping = GL_FALSE;
+   }
+
+   _swrast_span_default_color(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+
+   if (overlapping) {
+      GLint ssy = sy;
+      tmpImage = (GLfloat *) _mesa_malloc(width * height * sizeof(GLfloat));
+      if (!tmpImage) {
+         _mesa_error( ctx, GL_OUT_OF_MEMORY, "glCopyPixels" );
+         return;
+      }
+      p = tmpImage;
+      for (j = 0; j < height; j++, ssy += stepy) {
+         _swrast_read_depth_span_float(ctx, readRb, width, srcx, ssy, p);
+         p += width;
+      }
+      p = tmpImage;
+   }
+   else {
+      tmpImage = NULL;  /* silence compiler warning */
+      p = NULL;
+   }
+
+   for (j = 0; j < height; j++, sy += stepy, dy += stepy) {
+      GLfloat depth[MAX_WIDTH];
+      /* get depth values */
+      if (overlapping) {
+         _mesa_memcpy(depth, p, width * sizeof(GLfloat));
+         p += width;
+      }
+      else {
+         _swrast_read_depth_span_float(ctx, readRb, width, srcx, sy, depth);
+      }
+
+      /* apply scale and bias */
+      scale_and_bias_z(ctx, width, depth, span.array->z);
+
+      /* write depth values */
+      span.x = destx;
+      span.y = dy;
+      span.end = width;
+      if (fb->Visual.rgbMode) {
+         if (zoom)
+            _swrast_write_zoomed_rgba_span(ctx, destx, desty, &span, 
+                                       (const GLchan (*)[4]) span.array->rgba);
+         else
+            _swrast_write_rgba_span(ctx, &span);
+      }
+      else {
+         if (zoom)
+            _swrast_write_zoomed_index_span(ctx, destx, desty, &span);
+         else
+            _swrast_write_index_span(ctx, &span);
+      }
+   }
+
+   if (overlapping)
+      _mesa_free(tmpImage);
+}
+
+
+
+static void
+copy_stencil_pixels( GLcontext *ctx, GLint srcx, GLint srcy,
+                     GLint width, GLint height,
+                     GLint destx, GLint desty )
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   GLint sy, dy, stepy;
+   GLint j;
+   GLstencil *p, *tmpImage;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
+   const GLboolean shift_or_offset = ctx->Pixel.IndexShift || ctx->Pixel.IndexOffset;
+   GLint overlapping;
+
+   if (!rb) {
+      /* no readbuffer - OK */
+      return;
+   }
+
+   /* Determine if copy should be bottom-to-top or top-to-bottom */
+   if (srcy < desty) {
+      /* top-down  max-to-min */
+      sy = srcy + height - 1;
+      dy = desty + height - 1;
+      stepy = -1;
+   }
+   else {
+      /* bottom-up  min-to-max */
+      sy = srcy;
+      dy = desty;
+      stepy = 1;
+   }
+
+   if (ctx->DrawBuffer == ctx->ReadBuffer) {
+      overlapping = regions_overlap(srcx, srcy, destx, desty, width, height,
+                                    ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
+   }
+   else {
+      overlapping = GL_FALSE;
+   }
+
+   if (overlapping) {
+      GLint ssy = sy;
+      tmpImage = (GLstencil *) _mesa_malloc(width * height * sizeof(GLstencil));
+      if (!tmpImage) {
+         _mesa_error( ctx, GL_OUT_OF_MEMORY, "glCopyPixels" );
+         return;
+      }
+      p = tmpImage;
+      for (j = 0; j < height; j++, ssy += stepy) {
+         _swrast_read_stencil_span( ctx, rb, width, srcx, ssy, p );
+         p += width;
+      }
+      p = tmpImage;
+   }
+   else {
+      tmpImage = NULL;  /* silence compiler warning */
+      p = NULL;
+   }
+
+   for (j = 0; j < height; j++, sy += stepy, dy += stepy) {
+      GLstencil stencil[MAX_WIDTH];
+
+      /* Get stencil values */
+      if (overlapping) {
+         _mesa_memcpy(stencil, p, width * sizeof(GLstencil));
+         p += width;
+      }
+      else {
+         _swrast_read_stencil_span( ctx, rb, width, srcx, sy, stencil );
+      }
+
+      /* Apply shift, offset, look-up table */
+      if (shift_or_offset) {
+         _mesa_shift_and_offset_stencil( ctx, width, stencil );
+      }
+      if (ctx->Pixel.MapStencilFlag) {
+         _mesa_map_stencil( ctx, width, stencil );
+      }
+
+      /* Write stencil values */
+      if (zoom) {
+         _swrast_write_zoomed_stencil_span(ctx, destx, desty, width,
+                                           destx, dy, stencil);
+      }
+      else {
+         _swrast_write_stencil_span( ctx, width, destx, dy, stencil );
+      }
+   }
+
+   if (overlapping)
+      _mesa_free(tmpImage);
+}
+
+
+/**
+ * This isn't terribly efficient.  If a driver really has combined
+ * depth/stencil buffers the driver should implement an optimized
+ * CopyPixels function.
+ */
+static void
+copy_depth_stencil_pixels(GLcontext *ctx,
+                          const GLint srcX, const GLint srcY,
+                          const GLint width, const GLint height,
+                          const GLint destX, const GLint destY)
+{
+   struct gl_renderbuffer *stencilReadRb, *depthReadRb, *depthDrawRb;
+   GLint sy, dy, stepy;
+   GLint j;
+   GLstencil *tempStencilImage = NULL, *stencilPtr = NULL;
+   GLfloat *tempDepthImage = NULL, *depthPtr = NULL;
+   const GLfloat depthScale = ctx->DrawBuffer->_DepthMaxF;
+   const GLuint stencilMask = ctx->Stencil.WriteMask[0];
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
+   const GLboolean shiftOrOffset
+      = ctx->Pixel.IndexShift || ctx->Pixel.IndexOffset;
+   const GLboolean scaleOrBias
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+   GLint overlapping;
+
+   depthDrawRb = ctx->DrawBuffer->_DepthBuffer;
+   depthReadRb = ctx->ReadBuffer->_DepthBuffer;
+   stencilReadRb = ctx->ReadBuffer->_StencilBuffer;
+
+   ASSERT(depthDrawRb);
+   ASSERT(depthReadRb);
+   ASSERT(stencilReadRb);
+
+   /* Determine if copy should be bottom-to-top or top-to-bottom */
+   if (srcY < destY) {
+      /* top-down  max-to-min */
+      sy = srcY + height - 1;
+      dy = destY + height - 1;
+      stepy = -1;
+   }
+   else {
+      /* bottom-up  min-to-max */
+      sy = srcY;
+      dy = destY;
+      stepy = 1;
+   }
+
+   if (ctx->DrawBuffer == ctx->ReadBuffer) {
+      overlapping = regions_overlap(srcX, srcY, destX, destY, width, height,
+                                    ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
+   }
+   else {
+      overlapping = GL_FALSE;
+   }
+
+   if (overlapping) {
+      GLint ssy = sy;
+
+      if (stencilMask != 0x0) {
+         tempStencilImage
+            = (GLstencil *) _mesa_malloc(width * height * sizeof(GLstencil));
+         if (!tempStencilImage) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels");
+            return;
+         }
+
+         /* get copy of stencil pixels */
+         stencilPtr = tempStencilImage;
+         for (j = 0; j < height; j++, ssy += stepy) {
+            _swrast_read_stencil_span(ctx, stencilReadRb,
+                                      width, srcX, ssy, stencilPtr);
+            stencilPtr += width;
+         }
+         stencilPtr = tempStencilImage;
+      }
+
+      if (ctx->Depth.Mask) {
+         tempDepthImage
+            = (GLfloat *) _mesa_malloc(width * height * sizeof(GLfloat));
+         if (!tempDepthImage) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels");
+            _mesa_free(tempStencilImage);
+            return;
+         }
+
+         /* get copy of depth pixels */
+         depthPtr = tempDepthImage;
+         for (j = 0; j < height; j++, ssy += stepy) {
+            _swrast_read_depth_span_float(ctx, depthReadRb,
+                                          width, srcX, ssy, depthPtr);
+            depthPtr += width;
+         }
+         depthPtr = tempDepthImage;
+      }
+   }
+
+   for (j = 0; j < height; j++, sy += stepy, dy += stepy) {
+      if (stencilMask != 0x0) {
+         GLstencil stencil[MAX_WIDTH];
+
+         /* Get stencil values */
+         if (overlapping) {
+            _mesa_memcpy(stencil, stencilPtr, width * sizeof(GLstencil));
+            stencilPtr += width;
+         }
+         else {
+            _swrast_read_stencil_span(ctx, stencilReadRb,
+                                      width, srcX, sy, stencil);
+         }
+
+         /* Apply shift, offset, look-up table */
+         if (shiftOrOffset) {
+            _mesa_shift_and_offset_stencil(ctx, width, stencil);
+         }
+         if (ctx->Pixel.MapStencilFlag) {
+            _mesa_map_stencil(ctx, width, stencil);
+         }
+
+         /* Write values */
+         if (zoom) {
+            _swrast_write_zoomed_stencil_span(ctx, destX, destY, width,
+                                              destX, dy, stencil);
+         }
+         else {
+            _swrast_write_stencil_span( ctx, width, destX, dy, stencil );
+         }
+      }
+
+      if (ctx->Depth.Mask) {
+         GLfloat depth[MAX_WIDTH];
+         GLuint zVals32[MAX_WIDTH];
+         GLushort zVals16[MAX_WIDTH];
+         GLvoid *zVals;
+         GLuint zBytes;
+
+         /* get depth values */
+         if (overlapping) {
+            _mesa_memcpy(depth, depthPtr, width * sizeof(GLfloat));
+            depthPtr += width;
+         }
+         else {
+            _swrast_read_depth_span_float(ctx, depthReadRb,
+                                          width, srcX, sy, depth);
+         }
+
+         /* scale & bias */
+         if (scaleOrBias) {
+            _mesa_scale_and_bias_depth(ctx, width, depth);
+         }
+         /* convert to integer Z values */
+         if (depthDrawRb->DataType == GL_UNSIGNED_SHORT) {
+            GLint k;
+            for (k = 0; k < width; k++)
+               zVals16[k] = (GLushort) (depth[k] * depthScale);
+            zVals = zVals16;
+            zBytes = 2;
+         }
+         else {
+            GLint k;
+            for (k = 0; k < width; k++)
+               zVals32[k] = (GLuint) (depth[k] * depthScale);
+            zVals = zVals32;
+            zBytes = 4;
+         }
+
+         /* Write values */
+         if (zoom) {
+            _swrast_write_zoomed_z_span(ctx, destX, destY, width,
+                                        destX, dy, zVals);
+         }
+         else {
+            _swrast_put_row(ctx, depthDrawRb, width, destX, dy, zVals, zBytes);
+         }
+      }
+   }
+
+   if (tempStencilImage)
+      _mesa_free(tempStencilImage);
+
+   if (tempDepthImage)
+      _mesa_free(tempDepthImage);
+}
+
+
+/**
+ * Do software-based glCopyPixels.
+ * By time we get here, all parameters will have been error-checked.
+ */
+void
+_swrast_CopyPixels( GLcontext *ctx,
+		    GLint srcx, GLint srcy, GLsizei width, GLsizei height,
+		    GLint destx, GLint desty, GLenum type )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   RENDER_START(swrast,ctx);
+      
+   if (swrast->NewState)
+      _swrast_validate_derived( ctx );
+
+   switch (type) {
+   case GL_COLOR:
+      if (ctx->Visual.rgbMode) {
+         copy_rgba_pixels( ctx, srcx, srcy, width, height, destx, desty );
+      }
+      else {
+         copy_ci_pixels( ctx, srcx, srcy, width, height, destx, desty );
+      }
+      break;
+   case GL_DEPTH:
+      copy_depth_pixels( ctx, srcx, srcy, width, height, destx, desty );
+      break;
+   case GL_STENCIL:
+      copy_stencil_pixels( ctx, srcx, srcy, width, height, destx, desty );
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      copy_depth_stencil_pixels(ctx, srcx, srcy, width, height, destx, desty);
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected type in _swrast_CopyPixels");
+   }
+
+   RENDER_FINISH(swrast,ctx);
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_depth.c b/dist/Mesa/src/mesa/swrast/s_depth.c
new file mode 100644
index 000000000..1001cea35
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_depth.c
@@ -0,0 +1,1429 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "imports.h"
+#include "fbobject.h"
+
+#include "s_depth.h"
+#include "s_context.h"
+#include "s_span.h"
+
+
+/**
+ * Do depth test for a horizontal span of fragments.
+ * Input:  zbuffer - array of z values in the zbuffer
+ *         z - array of fragment z values
+ * Return:  number of fragments which pass the test.
+ */
+static GLuint
+depth_test_span16( GLcontext *ctx, GLuint n,
+                   GLushort zbuffer[], const GLuint z[], GLubyte mask[] )
+{
+   GLuint passed = 0;
+
+   /* switch cases ordered from most frequent to less frequent */
+   switch (ctx->Depth.Func) {
+      case GL_LESS:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  if (z[i] < zbuffer[i]) {
+		     /* pass */
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  if (z[i] < zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_LEQUAL:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] <= zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] <= zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_GEQUAL:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] >= zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] >= zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_GREATER:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] > zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] > zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_NOTEQUAL:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] != zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] != zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_EQUAL:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] == zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] == zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_ALWAYS:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  zbuffer[i] = z[i];
+		  passed++;
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer or mask */
+	    passed = n;
+	 }
+	 break;
+      case GL_NEVER:
+         _mesa_bzero(mask, n * sizeof(GLubyte));
+	 break;
+      default:
+         _mesa_problem(ctx, "Bad depth func in depth_test_span16");
+   }
+
+   return passed;
+}
+
+
+static GLuint
+depth_test_span32( GLcontext *ctx, GLuint n,
+                   GLuint zbuffer[], const GLuint z[], GLubyte mask[] )
+{
+   GLuint passed = 0;
+
+   /* switch cases ordered from most frequent to less frequent */
+   switch (ctx->Depth.Func) {
+      case GL_LESS:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  if (z[i] < zbuffer[i]) {
+		     /* pass */
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  if (z[i] < zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_LEQUAL:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] <= zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] <= zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_GEQUAL:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] >= zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] >= zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_GREATER:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] > zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] > zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_NOTEQUAL:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] != zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] != zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_EQUAL:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] == zbuffer[i]) {
+		     zbuffer[i] = z[i];
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  if (z[i] == zbuffer[i]) {
+		     /* pass */
+		     passed++;
+		  }
+		  else {
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_ALWAYS:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  zbuffer[i] = z[i];
+		  passed++;
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer or mask */
+	    passed = n;
+	 }
+	 break;
+      case GL_NEVER:
+         _mesa_bzero(mask, n * sizeof(GLubyte));
+	 break;
+      default:
+         _mesa_problem(ctx, "Bad depth func in depth_test_span32");
+   }
+
+   return passed;
+}
+
+
+
+/*
+ * Apply depth test to span of fragments.
+ */
+static GLuint
+depth_test_span( GLcontext *ctx, struct sw_span *span)
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *rb = fb->_DepthBuffer;
+   const GLint x = span->x;
+   const GLint y = span->y;
+   const GLuint count = span->end;
+   const GLuint *zValues = span->array->z;
+   GLubyte *mask = span->array->mask;
+   GLuint passed;
+
+   ASSERT((span->arrayMask & SPAN_XY) == 0);
+   ASSERT(span->arrayMask & SPAN_Z);
+   
+   if (rb->GetPointer(ctx, rb, 0, 0)) {
+      /* Directly access buffer */
+      if (rb->DataType == GL_UNSIGNED_SHORT) {
+         GLushort *zbuffer = (GLushort *) rb->GetPointer(ctx, rb, x, y);
+         passed = depth_test_span16(ctx, count, zbuffer, zValues, mask);
+      }
+      else {
+         GLuint *zbuffer = (GLuint *) rb->GetPointer(ctx, rb, x, y);
+         ASSERT(rb->DataType == GL_UNSIGNED_INT);
+         passed = depth_test_span32(ctx, count, zbuffer, zValues, mask);
+      }
+   }
+   else {
+      /* read depth values from buffer, test, write back */
+      if (rb->DataType == GL_UNSIGNED_SHORT) {
+         GLushort zbuffer[MAX_WIDTH];
+         rb->GetRow(ctx, rb, count, x, y, zbuffer);
+         passed = depth_test_span16(ctx, count, zbuffer, zValues, mask );
+         rb->PutRow(ctx, rb, count, x, y, zbuffer, NULL);
+      }
+      else {
+         GLuint zbuffer[MAX_WIDTH];
+         ASSERT(rb->DataType == GL_UNSIGNED_INT);
+         rb->GetRow(ctx, rb, count, x, y, zbuffer);
+         passed = depth_test_span32(ctx, count, zbuffer, zValues, mask );
+         rb->PutRow(ctx, rb, count, x, y, zbuffer, NULL);
+      }
+   }
+
+   if (passed < count) {
+      span->writeAll = GL_FALSE;
+   }
+   return passed;
+}
+
+
+
+#define Z_ADDRESS(X, Y)   (zStart + (Y) * stride + (X))
+
+
+/*
+ * Do depth testing for an array of fragments at assorted locations.
+ */
+static void
+direct_depth_test_pixels16(GLcontext *ctx, GLushort *zStart, GLuint stride,
+                           GLuint n, const GLint x[], const GLint y[],
+                           const GLuint z[], GLubyte mask[] )
+{
+   /* switch cases ordered from most frequent to less frequent */
+   switch (ctx->Depth.Func) {
+      case GL_LESS:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] < *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] < *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_LEQUAL:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] <= *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] <= *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_GEQUAL:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] >= *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] >= *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_GREATER:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] > *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] > *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_NOTEQUAL:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] != *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] != *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_EQUAL:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] == *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] == *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_ALWAYS:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLushort *zptr = Z_ADDRESS(x[i], y[i]);
+		  *zptr = z[i];
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer or mask */
+	 }
+	 break;
+      case GL_NEVER:
+	 /* depth test never passes */
+         _mesa_bzero(mask, n * sizeof(GLubyte));
+	 break;
+      default:
+         _mesa_problem(ctx, "Bad depth func in direct_depth_test_pixels");
+   }
+}
+
+
+
+/*
+ * Do depth testing for an array of fragments with direct access to zbuffer.
+ */
+static void
+direct_depth_test_pixels32(GLcontext *ctx, GLuint *zStart, GLuint stride,
+                           GLuint n, const GLint x[], const GLint y[],
+                           const GLuint z[], GLubyte mask[] )
+{
+   /* switch cases ordered from most frequent to less frequent */
+   switch (ctx->Depth.Func) {
+      case GL_LESS:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] < *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] < *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_LEQUAL:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] <= *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] <= *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_GEQUAL:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] >= *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] >= *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_GREATER:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] > *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] > *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_NOTEQUAL:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] != *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] != *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_EQUAL:
+         if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] == *zptr) {
+		     /* pass */
+		     *zptr = z[i];
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  if (z[i] == *zptr) {
+		     /* pass */
+		  }
+		  else {
+		     /* fail */
+		     mask[i] = 0;
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_ALWAYS:
+	 if (ctx->Depth.Mask) {
+	    /* Update Z buffer */
+            GLuint i;
+	    for (i=0; i<n; i++) {
+	       if (mask[i]) {
+		  GLuint *zptr = Z_ADDRESS(x[i], y[i]);
+		  *zptr = z[i];
+	       }
+	    }
+	 }
+	 else {
+	    /* Don't update Z buffer or mask */
+	 }
+	 break;
+      case GL_NEVER:
+	 /* depth test never passes */
+         _mesa_bzero(mask, n * sizeof(GLubyte));
+	 break;
+      default:
+         _mesa_problem(ctx, "Bad depth func in direct_depth_test_pixels");
+   }
+}
+
+
+
+
+static GLuint
+depth_test_pixels( GLcontext *ctx, struct sw_span *span )
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *rb = fb->_DepthBuffer;
+   const GLuint count = span->end;
+   const GLint *x = span->array->x;
+   const GLint *y = span->array->y;
+   const GLuint *z = span->array->z;
+   GLubyte *mask = span->array->mask;
+
+   if (rb->GetPointer(ctx, rb, 0, 0)) {
+      /* Directly access values */
+      if (rb->DataType == GL_UNSIGNED_SHORT) {
+         GLushort *zStart = (GLushort *) rb->Data;
+         GLuint stride = rb->Width;
+         direct_depth_test_pixels16(ctx, zStart, stride, count, x, y, z, mask);
+      }
+      else {
+         GLuint *zStart = (GLuint *) rb->Data;
+         GLuint stride = rb->Width;
+         ASSERT(rb->DataType == GL_UNSIGNED_INT);
+         direct_depth_test_pixels32(ctx, zStart, stride, count, x, y, z, mask);
+      }
+   }
+   else {
+      /* read depth values from buffer, test, write back */
+      if (rb->DataType == GL_UNSIGNED_SHORT) {
+         GLushort zbuffer[MAX_WIDTH];
+         _swrast_get_values(ctx, rb, count, x, y, zbuffer, sizeof(GLushort));
+         depth_test_span16(ctx, count, zbuffer, z, mask );
+         rb->PutValues(ctx, rb, count, x, y, zbuffer, NULL);
+      }
+      else {
+         GLuint zbuffer[MAX_WIDTH];
+         ASSERT(rb->DataType == GL_UNSIGNED_INT);
+         _swrast_get_values(ctx, rb, count, x, y, zbuffer, sizeof(GLuint));
+         depth_test_span32(ctx, count, zbuffer, z, mask );
+         rb->PutValues(ctx, rb, count, x, y, zbuffer, NULL);
+      }
+   }
+
+   return count; /* not really correct, but OK */
+}
+
+
+/**
+ * Apply depth (Z) buffer testing to the span.
+ * \return approx number of pixels that passed (only zero is reliable)
+ */
+GLuint
+_swrast_depth_test_span( GLcontext *ctx, struct sw_span *span)
+{
+   if (span->arrayMask & SPAN_XY)
+      return depth_test_pixels(ctx, span);
+   else
+      return depth_test_span(ctx, span);
+}
+
+
+/**
+ * GL_EXT_depth_bounds_test extension.
+ * Discard fragments depending on whether the corresponding Z-buffer
+ * values are outside the depth bounds test range.
+ * Note: we test the Z buffer values, not the fragment Z values!
+ * \return GL_TRUE if any fragments pass, GL_FALSE if no fragments pass
+ */
+GLboolean
+_swrast_depth_bounds_test( GLcontext *ctx, struct sw_span *span )
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *rb = fb->_DepthBuffer;
+   GLuint zMin = (GLuint) (ctx->Depth.BoundsMin * fb->_DepthMaxF + 0.5F);
+   GLuint zMax = (GLuint) (ctx->Depth.BoundsMax * fb->_DepthMaxF + 0.5F);
+   GLubyte *mask = span->array->mask;
+   const GLuint count = span->end;
+   GLuint i;
+   GLboolean anyPass = GL_FALSE;
+
+   if (rb->DataType == GL_UNSIGNED_SHORT) {
+      /* get 16-bit values */
+      GLushort zbuffer16[MAX_WIDTH], *zbuffer;
+      if (span->arrayMask & SPAN_XY) {
+         _swrast_get_values(ctx, rb, count, span->array->x, span->array->y,
+                            zbuffer16, sizeof(GLushort));
+         zbuffer = zbuffer16;
+      }
+      else {
+         zbuffer = (GLushort*) rb->GetPointer(ctx, rb, span->x, span->y);
+         if (!zbuffer) {
+            rb->GetRow(ctx, rb, count, span->x, span->y, zbuffer16);
+            zbuffer = zbuffer16;
+         }
+      }
+      assert(zbuffer);
+
+      /* Now do the tests */
+      for (i = 0; i < count; i++) {
+         if (mask[i]) {
+            if (zbuffer[i] < zMin || zbuffer[i] > zMax)
+               mask[i] = GL_FALSE;
+            else
+               anyPass = GL_TRUE;
+         }
+      }
+   }
+   else {
+      /* get 32-bit values */
+      GLuint zbuffer32[MAX_WIDTH], *zbuffer;
+      ASSERT(rb->DataType == GL_UNSIGNED_INT);
+      if (span->arrayMask & SPAN_XY) {
+         _swrast_get_values(ctx, rb, count, span->array->x, span->array->y,
+                            zbuffer32, sizeof(GLuint));
+         zbuffer = zbuffer32;
+      }
+      else {
+         zbuffer = (GLuint*) rb->GetPointer(ctx, rb, span->x, span->y);
+         if (!zbuffer) {
+            rb->GetRow(ctx, rb, count, span->x, span->y, zbuffer32);
+            zbuffer = zbuffer32;
+         }
+      }
+      assert(zbuffer);
+
+      /* Now do the tests */
+      for (i = 0; i < count; i++) {
+         if (mask[i]) {
+            if (zbuffer[i] < zMin || zbuffer[i] > zMax)
+               mask[i] = GL_FALSE;
+            else
+               anyPass = GL_TRUE;
+         }
+      }
+   }
+
+   return anyPass;
+}
+
+
+
+/**********************************************************************/
+/*****                      Read Depth Buffer                     *****/
+/**********************************************************************/
+
+
+/**
+ * Read a span of depth values from the given depth renderbuffer, returning
+ * the values as GLfloats.
+ * This function does clipping to prevent reading outside the depth buffer's
+ * bounds.  Though the clipping is redundant when we're called from
+ * _swrast_ReadPixels.
+ */
+void
+_swrast_read_depth_span_float( GLcontext *ctx, struct gl_renderbuffer *rb,
+                               GLint n, GLint x, GLint y, GLfloat depth[] )
+{
+   const GLfloat scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+
+   if (!rb) {
+      /* really only doing this to prevent FP exceptions later */
+      _mesa_bzero(depth, n * sizeof(GLfloat));
+   }
+
+   ASSERT(rb->_BaseFormat == GL_DEPTH_COMPONENT);
+
+   if (y < 0 || y >= (GLint) rb->Height ||
+       x + n <= 0 || x >= (GLint) rb->Width) {
+      /* span is completely outside framebuffer */
+      _mesa_bzero(depth, n * sizeof(GLfloat));
+      return;
+   }
+
+   if (x < 0) {
+      GLint dx = -x;
+      GLint i;
+      for (i = 0; i < dx; i++)
+         depth[i] = 0.0;
+      x = 0;
+      n -= dx;
+      depth += dx;
+   }
+   if (x + n > (GLint) rb->Width) {
+      GLint dx = x + n - (GLint) rb->Width;
+      GLint i;
+      for (i = 0; i < dx; i++)
+         depth[n - i - 1] = 0.0;
+      n -= dx;
+   }
+   if (n <= 0) {
+      return;
+   }
+
+   if (rb->DataType == GL_UNSIGNED_INT) {
+      GLuint temp[MAX_WIDTH];
+      GLint i;
+      rb->GetRow(ctx, rb, n, x, y, temp);
+      for (i = 0; i < n; i++) {
+         depth[i] = temp[i] * scale;
+      }
+   }
+   else if (rb->DataType == GL_UNSIGNED_SHORT) {
+      GLushort temp[MAX_WIDTH];
+      GLint i;
+      rb->GetRow(ctx, rb, n, x, y, temp);
+      for (i = 0; i < n; i++) {
+         depth[i] = temp[i] * scale;
+      }
+   }
+   else {
+      _mesa_problem(ctx, "Invalid depth renderbuffer data type");
+   }
+}
+
+
+/**
+ * As above, but return 32-bit GLuint values.
+ */
+void
+_swrast_read_depth_span_uint( GLcontext *ctx, struct gl_renderbuffer *rb,
+                              GLint n, GLint x, GLint y, GLuint depth[] )
+{
+   if (!rb) {
+      /* really only doing this to prevent FP exceptions later */
+      _mesa_bzero(depth, n * sizeof(GLfloat));
+   }
+
+   ASSERT(rb->_BaseFormat == GL_DEPTH_COMPONENT);
+
+   if (y < 0 || y >= (GLint) rb->Height ||
+       x + n <= 0 || x >= (GLint) rb->Width) {
+      /* span is completely outside framebuffer */
+      _mesa_bzero(depth, n * sizeof(GLfloat));
+      return;
+   }
+
+   if (x < 0) {
+      GLint dx = -x;
+      GLint i;
+      for (i = 0; i < dx; i++)
+         depth[i] = 0.0;
+      x = 0;
+      n -= dx;
+      depth += dx;
+   }
+   if (x + n > (GLint) rb->Width) {
+      GLint dx = x + n - (GLint) rb->Width;
+      GLint i;
+      for (i = 0; i < dx; i++)
+         depth[n - i - 1] = 0.0;
+      n -= dx;
+   }
+   if (n <= 0) {
+      return;
+   }
+
+   if (rb->DataType == GL_UNSIGNED_INT) {
+      rb->GetRow(ctx, rb, n, x, y, depth);
+      if (rb->DepthBits < 32) {
+         GLuint shift = 32 - rb->DepthBits;
+         GLint i;
+         for (i = 0; i < n; i++) {
+            GLuint z = depth[i];
+            depth[i] = z << shift; /* XXX lsb bits? */
+         }
+      }
+   }
+   else if (rb->DataType == GL_UNSIGNED_SHORT) {
+      GLushort temp[MAX_WIDTH];
+      GLint i;
+      rb->GetRow(ctx, rb, n, x, y, temp);
+      if (rb->DepthBits == 16) {
+         for (i = 0; i < n; i++) {
+            GLuint z = temp[i];
+            depth[i] = (z << 16) | z;
+         }
+      }
+      else {
+         GLuint shift = 16 - rb->DepthBits;
+         for (i = 0; i < n; i++) {
+            GLuint z = temp[i];
+            depth[i] = (z << (shift + 16)) | (z << shift); /* XXX lsb bits? */
+         }
+      }
+   }
+   else {
+      _mesa_problem(ctx, "Invalid depth renderbuffer data type");
+   }
+}
+
+
+
+/**
+ * Clear the given z/depth renderbuffer.
+ */
+void
+_swrast_clear_depth_buffer( GLcontext *ctx, struct gl_renderbuffer *rb )
+{
+   GLuint clearValue;
+   GLint x, y, width, height;
+
+   if (!rb || !ctx->Depth.Mask) {
+      /* no depth buffer, or writing to it is disabled */
+      return;
+   }
+
+   /* compute integer clearing value */
+   if (ctx->Depth.Clear == 1.0) {
+      clearValue = ctx->DrawBuffer->_DepthMax;
+   }
+   else {
+      clearValue = (GLuint) (ctx->Depth.Clear * ctx->DrawBuffer->_DepthMaxF);
+   }
+
+   assert(rb->_BaseFormat == GL_DEPTH_COMPONENT);
+
+   /* compute region to clear */
+   x = ctx->DrawBuffer->_Xmin;
+   y = ctx->DrawBuffer->_Ymin;
+   width  = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+   height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+
+   if (rb->GetPointer(ctx, rb, 0, 0)) {
+      /* Direct buffer access is possible.  Either this is just malloc'd
+       * memory, or perhaps the driver mmap'd the zbuffer memory.
+       */
+      if (rb->DataType == GL_UNSIGNED_SHORT) {
+         if ((clearValue & 0xff) == ((clearValue >> 8) & 0xff) &&
+             ((GLushort *) rb->GetPointer(ctx, rb, 0, 0) + width ==
+              (GLushort *) rb->GetPointer(ctx, rb, 0, 1))) {
+            /* optimized case */
+            GLushort *dst = (GLushort *) rb->GetPointer(ctx, rb, x, y);
+            GLuint len = width * height * sizeof(GLushort);
+            _mesa_memset(dst, (clearValue & 0xff), len);
+         }
+         else {
+            /* general case */
+            GLint i, j;
+            for (i = 0; i < height; i++) {
+               GLushort *dst = (GLushort *) rb->GetPointer(ctx, rb, x, y + i);
+               for (j = 0; j < width; j++) {
+                  dst[j] = clearValue;
+               }
+            }
+         }
+      }
+      else {
+         GLint i, j;
+         ASSERT(rb->DataType == GL_UNSIGNED_INT);
+         for (i = 0; i < height; i++) {
+            GLuint *dst = (GLuint *) rb->GetPointer(ctx, rb, x, y + i);
+            for (j = 0; j < width; j++) {
+               dst[j] = clearValue;
+            }
+         }
+      }
+   }
+   else {
+      /* Direct access not possible.  Use PutRow to write new values. */
+      if (rb->DataType == GL_UNSIGNED_SHORT) {
+         GLushort clearVal16 = (GLushort) (clearValue & 0xffff);
+         GLint i;
+         for (i = 0; i < height; i++) {
+            rb->PutMonoRow(ctx, rb, width, x, y + i, &clearVal16, NULL);
+         }
+      }
+      else if (rb->DataType == GL_UNSIGNED_INT) {
+         GLint i;
+         ASSERT(sizeof(clearValue) == sizeof(GLuint));
+         for (i = 0; i < height; i++) {
+            rb->PutMonoRow(ctx, rb, width, x, y + i, &clearValue, NULL);
+         }
+      }
+      else {
+         _mesa_problem(ctx, "bad depth renderbuffer DataType");
+      }
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_depth.h b/dist/Mesa/src/mesa/swrast/s_depth.h
new file mode 100644
index 000000000..f93d95fa3
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_depth.h
@@ -0,0 +1,56 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_DEPTH_H
+#define S_DEPTH_H
+
+
+#include "mtypes.h"
+#include "s_context.h"
+
+
+extern GLuint
+_swrast_depth_test_span( GLcontext *ctx, struct sw_span *span);
+
+
+extern GLboolean
+_swrast_depth_bounds_test( GLcontext *ctx, struct sw_span *span );
+
+
+extern void
+_swrast_read_depth_span_float( GLcontext *ctx, struct gl_renderbuffer *rb,
+                               GLint n, GLint x, GLint y, GLfloat depth[] );
+
+
+extern void
+_swrast_read_depth_span_uint( GLcontext *ctx, struct gl_renderbuffer *rb,
+                              GLint n, GLint x, GLint y, GLuint depth[] );
+
+
+extern void
+_swrast_clear_depth_buffer( GLcontext *ctx, struct gl_renderbuffer *rb );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_drawpix.c b/dist/Mesa/src/mesa/swrast/s_drawpix.c
new file mode 100644
index 000000000..cfe516733
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_drawpix.c
@@ -0,0 +1,1098 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "bufferobj.h"
+#include "context.h"
+#include "convolve.h"
+#include "image.h"
+#include "macros.h"
+#include "imports.h"
+#include "pixel.h"
+#include "state.h"
+
+#include "s_context.h"
+#include "s_drawpix.h"
+#include "s_span.h"
+#include "s_stencil.h"
+#include "s_zoom.h"
+
+
+/*
+ * Try to do a fast and simple RGB(a) glDrawPixels.
+ * Return:  GL_TRUE if success, GL_FALSE if slow path must be used instead
+ */
+static GLboolean
+fast_draw_pixels(GLcontext *ctx, GLint x, GLint y,
+                 GLsizei width, GLsizei height,
+                 GLenum format, GLenum type,
+                 const struct gl_pixelstore_attrib *unpack,
+                 const GLvoid *pixels)
+{
+   const GLint imgX = x, imgY = y;
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct sw_span span;
+
+   INIT_SPAN(span, GL_BITMAP, 0, 0, SPAN_RGBA);
+
+   if (swrast->_RasterMask & MULTI_DRAW_BIT)
+      return GL_FALSE;
+
+   if (ctx->_ImageTransferState) {
+      /* don't handle any pixel transfer options here */
+      return GL_FALSE;
+   }
+
+   if (ctx->Depth.Test)
+      _swrast_span_default_z(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+   if (ctx->Texture._EnabledCoordUnits)
+      _swrast_span_default_texcoords(ctx, &span);
+
+   if ((swrast->_RasterMask & ~CLIP_BIT) == 0
+       && ctx->Texture._EnabledCoordUnits == 0
+       && unpack->Alignment == 1 /* XXX may not really need this */
+       && !unpack->SwapBytes
+       && !unpack->LsbFirst) {
+
+      /* XXX there's a lot of clipping code here that should be replaced
+       * by a call to _mesa_clip_drawpixels().
+       */
+      GLint destX = x;
+      GLint destY = y;
+      GLint drawWidth = width;           /* actual width drawn */
+      GLint drawHeight = height;         /* actual height drawn */
+      GLint skipPixels = unpack->SkipPixels;
+      GLint skipRows = unpack->SkipRows;
+      GLint rowLength;
+
+      if (unpack->RowLength > 0)
+         rowLength = unpack->RowLength;
+      else
+         rowLength = width;
+
+      /* If we're not using pixel zoom then do all clipping calculations
+       * now.  Otherwise, we'll let the _swrast_write_zoomed_*_span() functions
+       * handle the clipping.
+       */
+      if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==1.0F) {
+         /* horizontal clipping */
+         if (destX < ctx->DrawBuffer->_Xmin) {
+            skipPixels += (ctx->DrawBuffer->_Xmin - destX);
+            drawWidth  -= (ctx->DrawBuffer->_Xmin - destX);
+            destX = ctx->DrawBuffer->_Xmin;
+         }
+         if (destX + drawWidth > ctx->DrawBuffer->_Xmax)
+            drawWidth -= (destX + drawWidth - ctx->DrawBuffer->_Xmax);
+         if (drawWidth <= 0)
+            return GL_TRUE;
+
+         /* vertical clipping */
+         if (destY < ctx->DrawBuffer->_Ymin) {
+            skipRows   += (ctx->DrawBuffer->_Ymin - destY);
+            drawHeight -= (ctx->DrawBuffer->_Ymin - destY);
+            destY = ctx->DrawBuffer->_Ymin;
+         }
+         if (destY + drawHeight > ctx->DrawBuffer->_Ymax)
+            drawHeight -= (destY + drawHeight - ctx->DrawBuffer->_Ymax);
+         if (drawHeight <= 0)
+            return GL_TRUE;
+      }
+      else if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==-1.0F) {
+         /* upside-down image */
+         /* horizontal clipping */
+         if (destX < ctx->DrawBuffer->_Xmin) {
+            skipPixels += (ctx->DrawBuffer->_Xmin - destX);
+            drawWidth  -= (ctx->DrawBuffer->_Xmin - destX);
+            destX = ctx->DrawBuffer->_Xmin;
+         }
+         if (destX + drawWidth > ctx->DrawBuffer->_Xmax)
+            drawWidth -= (destX + drawWidth - ctx->DrawBuffer->_Xmax);
+         if (drawWidth <= 0)
+            return GL_TRUE;
+
+         /* vertical clipping */
+         if (destY > ctx->DrawBuffer->_Ymax) {
+            skipRows   += (destY - ctx->DrawBuffer->_Ymax);
+            drawHeight -= (destY - ctx->DrawBuffer->_Ymax);
+            destY = ctx->DrawBuffer->_Ymax;
+         }
+         if (destY - drawHeight < ctx->DrawBuffer->_Ymin)
+            drawHeight -= (ctx->DrawBuffer->_Ymin - (destY - drawHeight));
+         if (drawHeight <= 0)
+            return GL_TRUE;
+      }
+      else {
+         if (drawWidth > MAX_WIDTH)
+            return GL_FALSE; /* fall back to general case path */
+      }
+
+
+      /*
+       * Ready to draw!
+       * The window region at (destX, destY) of size (drawWidth, drawHeight)
+       * will be written to.
+       * We'll take pixel data from buffer pointed to by "pixels" but we'll
+       * skip "skipRows" rows and skip "skipPixels" pixels/row.
+       */
+
+      if (format == GL_RGBA && type == CHAN_TYPE) {
+         if (ctx->Visual.rgbMode) {
+            const GLchan *src = (const GLchan *) pixels
+               + (skipRows * rowLength + skipPixels) * 4;
+            if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==1.0F) {
+               /* no zooming */
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  rb->PutRow(ctx, rb, drawWidth, destX, destY, src, NULL);
+                  src += rowLength * 4;
+                  destY++;
+               }
+            }
+            else if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==-1.0F) {
+               /* upside-down */
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  destY--;
+                  rb->PutRow(ctx, rb, drawWidth, destX, destY, src, NULL);
+                  src += rowLength * 4;
+               }
+            }
+            else {
+               /* with zooming */
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  span.x = destX;
+                  span.y = destY + row;
+                  span.end = drawWidth;
+                  _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span,
+                                                 (CONST GLchan (*)[4]) src);
+                  src += rowLength * 4;
+               }
+            }
+         }
+         return GL_TRUE;
+      }
+      else if (format == GL_RGB && type == CHAN_TYPE) {
+         if (ctx->Visual.rgbMode) {
+            const GLchan *src = (const GLchan *) pixels
+               + (skipRows * rowLength + skipPixels) * 3;
+            if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==1.0F) {
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  rb->PutRowRGB(ctx, rb, drawWidth, destX, destY, src, NULL);
+                  src += rowLength * 3;
+                  destY++;
+               }
+            }
+            else if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==-1.0F) {
+               /* upside-down */
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  destY--;
+                  rb->PutRowRGB(ctx, rb, drawWidth, destX, destY, src, NULL);
+                  src += rowLength * 3;
+               }
+            }
+            else {
+               /* with zooming */
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  span.x = destX;
+                  span.y = destY;
+                  span.end = drawWidth;
+                  _swrast_write_zoomed_rgb_span(ctx, imgX, imgY, &span, 
+                                         (CONST GLchan (*)[3]) src);
+                  src += rowLength * 3;
+                  destY++;
+               }
+            }
+         }
+         return GL_TRUE;
+      }
+      else if (format == GL_LUMINANCE && type == CHAN_TYPE) {
+         if (ctx->Visual.rgbMode) {
+            const GLchan *src = (const GLchan *) pixels
+               + (skipRows * rowLength + skipPixels);
+            if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==1.0F) {
+               /* no zooming */
+               GLint row;
+               ASSERT(drawWidth <= MAX_WIDTH);
+               for (row=0; row<drawHeight; row++) {
+                  GLint i;
+		  for (i=0;i<drawWidth;i++) {
+                     span.array->rgb[i][0] = src[i];
+                     span.array->rgb[i][1] = src[i];
+                     span.array->rgb[i][2] = src[i];
+		  }
+                  rb->PutRowRGB(ctx, rb, drawWidth, destX, destY,
+                                span.array->rgb, NULL);
+                  src += rowLength;
+                  destY++;
+               }
+            }
+            else if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==-1.0F) {
+               /* upside-down */
+               GLint row;
+               ASSERT(drawWidth <= MAX_WIDTH);
+               for (row=0; row<drawHeight; row++) {
+                  GLint i;
+                  for (i=0;i<drawWidth;i++) {
+                     span.array->rgb[i][0] = src[i];
+                     span.array->rgb[i][1] = src[i];
+                     span.array->rgb[i][2] = src[i];
+                  }
+                  destY--;
+                  rb->PutRow(ctx, rb, drawWidth, destX, destY,
+                             span.array->rgb, NULL);
+                  src += rowLength;
+               }
+            }
+            else {
+               /* with zooming */
+               GLint row;
+               ASSERT(drawWidth <= MAX_WIDTH);
+               for (row=0; row<drawHeight; row++) {
+                  GLint i;
+		  for (i=0;i<drawWidth;i++) {
+                     span.array->rgb[i][0] = src[i];
+                     span.array->rgb[i][1] = src[i];
+                     span.array->rgb[i][2] = src[i];
+		  }
+                  span.x = destX;
+                  span.y = destY;
+                  span.end = drawWidth;
+                  _swrast_write_zoomed_rgb_span(ctx, imgX, imgY, &span,
+                             (CONST GLchan (*)[3]) span.array->rgb);
+                  src += rowLength;
+                  destY++;
+               }
+            }
+         }
+         return GL_TRUE;
+      }
+      else if (format == GL_LUMINANCE_ALPHA && type == CHAN_TYPE) {
+         if (ctx->Visual.rgbMode) {
+            const GLchan *src = (const GLchan *) pixels
+               + (skipRows * rowLength + skipPixels)*2;
+            if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==1.0F) {
+               /* no zooming */
+               GLint row;
+               ASSERT(drawWidth <= MAX_WIDTH);
+               for (row=0; row<drawHeight; row++) {
+                  GLint i;
+                  const GLchan *ptr = src;
+		  for (i=0;i<drawWidth;i++) {
+                     span.array->rgba[i][0] = *ptr;
+                     span.array->rgba[i][1] = *ptr;
+                     span.array->rgba[i][2] = *ptr++;
+                     span.array->rgba[i][3] = *ptr++;
+		  }
+                  rb->PutRow(ctx, rb, drawWidth, destX, destY,
+                             span.array->rgba, NULL);
+                  src += rowLength*2;
+                  destY++;
+               }
+            }
+            else if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==-1.0F) {
+               /* upside-down */
+               GLint row;
+               ASSERT(drawWidth <= MAX_WIDTH);
+               for (row=0; row<drawHeight; row++) {
+                  GLint i;
+                  const GLchan *ptr = src;
+                  for (i=0;i<drawWidth;i++) {
+                     span.array->rgba[i][0] = *ptr;
+                     span.array->rgba[i][1] = *ptr;
+                     span.array->rgba[i][2] = *ptr++;
+                     span.array->rgba[i][3] = *ptr++;
+                  }
+                  destY--;
+                  rb->PutRow(ctx, rb, drawWidth, destX, destY,
+                             span.array->rgba, NULL);
+                  src += rowLength*2;
+               }
+            }
+            else {
+               /* with zooming */
+               GLint row;
+               ASSERT(drawWidth <= MAX_WIDTH);
+               for (row=0; row<drawHeight; row++) {
+                  const GLchan *ptr = src;
+                  GLint i;
+		  for (i=0;i<drawWidth;i++) {
+                     span.array->rgba[i][0] = *ptr;
+                     span.array->rgba[i][1] = *ptr;
+                     span.array->rgba[i][2] = *ptr++;
+                     span.array->rgba[i][3] = *ptr++;
+		  }
+                  span.x = destX;
+                  span.y = destY;
+                  span.end = drawWidth;
+                  _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span,
+                            (CONST GLchan (*)[4]) span.array->rgba);
+                  src += rowLength*2;
+                  destY++;
+               }
+            }
+         }
+         return GL_TRUE;
+      }
+      else if (format==GL_COLOR_INDEX && type==GL_UNSIGNED_BYTE) {
+         const GLubyte *src =
+            (const GLubyte *) pixels + skipRows * rowLength + skipPixels;
+         if (ctx->Visual.rgbMode) {
+            /* convert CI data to RGBA */
+            if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==1.0F) {
+               /* no zooming */
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  ASSERT(drawWidth <= MAX_WIDTH);
+                  _mesa_map_ci8_to_rgba(ctx, drawWidth, src, span.array->rgba);
+                  rb->PutRow(ctx, rb, drawWidth, destX, destY,
+                             span.array->rgba, NULL);
+                  src += rowLength;
+                  destY++;
+               }
+               return GL_TRUE;
+            }
+            else if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==-1.0F) {
+               /* upside-down */
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  ASSERT(drawWidth <= MAX_WIDTH);
+                  _mesa_map_ci8_to_rgba(ctx, drawWidth, src, span.array->rgba);
+                  destY--;
+                  rb->PutRow(ctx, rb, drawWidth, destX, destY,
+                             span.array->rgba, NULL);
+                  src += rowLength;
+               }
+               return GL_TRUE;
+            }
+            else {
+               /* with zooming */
+               GLint row;
+               for (row=0; row<drawHeight; row++) {
+                  ASSERT(drawWidth <= MAX_WIDTH);
+                  _mesa_map_ci8_to_rgba(ctx, drawWidth, src, span.array->rgba);
+                  span.x = destX;
+                  span.y = destY;
+                  span.end = drawWidth;
+                  _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span,
+                            (CONST GLchan (*)[4]) span.array->rgba);
+                  src += rowLength;
+                  destY++;
+               }
+               return GL_TRUE;
+            }
+         }
+         else {
+            /* write CI data to CI frame buffer */
+            GLint row;
+            if (ctx->Pixel.ZoomX==1.0F && ctx->Pixel.ZoomY==1.0F) {
+               /* no zooming */
+               for (row=0; row<drawHeight; row++) {
+                  GLuint index32[MAX_WIDTH];
+                  GLint col;
+                  for (col = 0; col < drawWidth; col++)
+                     index32[col] = src[col];
+                  rb->PutRow(ctx, rb, drawWidth, destX, destY, index32, NULL);
+                  src += rowLength;
+                  destY++;
+               }
+               return GL_TRUE;
+            }
+            else {
+               /* with zooming */
+               return GL_FALSE;
+            }
+         }
+      }
+      else {
+         /* can't handle this pixel format and/or data type here */
+         return GL_FALSE;
+      }
+   }
+
+   /* can't do a simple draw, have to use slow path */
+   return GL_FALSE;
+}
+
+
+
+/*
+ * Draw color index image.
+ */
+static void
+draw_index_pixels( GLcontext *ctx, GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum type,
+                   const struct gl_pixelstore_attrib *unpack,
+                   const GLvoid *pixels )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLint imgX = x, imgY = y;
+   const GLboolean zoom = ctx->Pixel.ZoomX!=1.0 || ctx->Pixel.ZoomY!=1.0;
+   GLint row, skipPixels;
+   struct sw_span span;
+
+   INIT_SPAN(span, GL_BITMAP, 0, 0, SPAN_INDEX);
+
+   if (ctx->Depth.Test)
+      _swrast_span_default_z(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+
+   /*
+    * General solution
+    */
+   skipPixels = 0;
+   while (skipPixels < width) {
+      const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+      ASSERT(spanWidth <= MAX_WIDTH);
+      for (row = 0; row < height; row++) {
+         const GLvoid *source = _mesa_image_address2d(unpack, pixels,
+                                                      width, height,
+                                                      GL_COLOR_INDEX, type,
+                                                      row, skipPixels);
+         _mesa_unpack_index_span(ctx, spanWidth, GL_UNSIGNED_INT,
+                                 span.array->index, type, source, unpack,
+                                 ctx->_ImageTransferState);
+
+         /* These may get changed during writing/clipping */
+         span.x = x + skipPixels;
+         span.y = y + row;
+         span.end = spanWidth;
+         
+         if (zoom)
+            _swrast_write_zoomed_index_span(ctx, imgX, imgY, &span);
+         else
+            _swrast_write_index_span(ctx, &span);
+      }
+      skipPixels += spanWidth;
+   }
+}
+
+
+
+/*
+ * Draw stencil image.
+ */
+static void
+draw_stencil_pixels( GLcontext *ctx, GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum type,
+                     const struct gl_pixelstore_attrib *unpack,
+                     const GLvoid *pixels )
+{
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+   GLint skipPixels;
+
+   /* if width > MAX_WIDTH, have to process image in chunks */
+   skipPixels = 0;
+   while (skipPixels < width) {
+      const GLint spanX = x + skipPixels;
+      const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+      GLint row;
+      for (row = 0; row < height; row++) {
+         const GLint spanY = y + row;
+         GLstencil values[MAX_WIDTH];
+         GLenum destType = (sizeof(GLstencil) == sizeof(GLubyte))
+                         ? GL_UNSIGNED_BYTE : GL_UNSIGNED_SHORT;
+         const GLvoid *source = _mesa_image_address2d(unpack, pixels,
+                                                      width, height,
+                                                      GL_COLOR_INDEX, type,
+                                                      row, skipPixels);
+         _mesa_unpack_index_span(ctx, spanWidth, destType, values,
+                                 type, source, unpack,
+                                 ctx->_ImageTransferState);
+         if (ctx->_ImageTransferState & IMAGE_SHIFT_OFFSET_BIT) {
+            _mesa_shift_and_offset_stencil(ctx, spanWidth, values);
+         }
+         if (ctx->Pixel.MapStencilFlag) {
+            _mesa_map_stencil(ctx, spanWidth, values);
+         }
+
+         if (zoom) {
+            _swrast_write_zoomed_stencil_span(ctx, x, y, spanWidth,
+                                              spanX, spanY, values);
+         }
+         else {
+            _swrast_write_stencil_span(ctx, spanWidth, spanX, spanY, values);
+         }
+      }
+      skipPixels += spanWidth;
+   }
+}
+
+
+/*
+ * Draw depth image.
+ */
+static void
+draw_depth_pixels( GLcontext *ctx, GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum type,
+                   const struct gl_pixelstore_attrib *unpack,
+                   const GLvoid *pixels )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLboolean scaleOrBias
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+   struct sw_span span;
+
+   INIT_SPAN(span, GL_BITMAP, 0, 0, SPAN_Z);
+
+   _swrast_span_default_color(ctx, &span);
+
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+   if (ctx->Texture._EnabledCoordUnits)
+      _swrast_span_default_texcoords(ctx, &span);
+
+   if (type == GL_UNSIGNED_SHORT
+       && ctx->DrawBuffer->Visual.depthBits == 16
+       && !scaleOrBias
+       && !zoom
+       && ctx->Visual.rgbMode
+       && width <= MAX_WIDTH) {
+      /* Special case: directly write 16-bit depth values */
+      GLint row;
+      for (row = 0; row < height; row++) {
+         const GLushort *zSrc = (const GLushort *)
+            _mesa_image_address2d(unpack, pixels, width, height,
+                                  GL_DEPTH_COMPONENT, type, row, 0);
+         GLint i;
+         for (i = 0; i < width; i++)
+            span.array->z[i] = zSrc[i];
+         span.x = x;
+         span.y = y + row;
+         span.end = width;
+         _swrast_write_rgba_span(ctx, &span);
+      }
+   }
+   else if (type == GL_UNSIGNED_INT
+            && !scaleOrBias
+            && !zoom
+            && ctx->Visual.rgbMode
+            && width <= MAX_WIDTH) {
+      /* Special case: shift 32-bit values down to Visual.depthBits */
+      const GLint shift = 32 - ctx->DrawBuffer->Visual.depthBits;
+      GLint row;
+      for (row = 0; row < height; row++) {
+         const GLuint *zSrc = (const GLuint *)
+            _mesa_image_address2d(unpack, pixels, width, height,
+                                  GL_DEPTH_COMPONENT, type, row, 0);
+         if (shift == 0) {
+            _mesa_memcpy(span.array->z, zSrc, width * sizeof(GLuint));
+         }
+         else {
+            GLint col;
+            for (col = 0; col < width; col++)
+               span.array->z[col] = zSrc[col] >> shift;
+         }
+         span.x = x;
+         span.y = y + row;
+         span.end = width;
+         _swrast_write_rgba_span(ctx, &span);
+      }
+   }
+   else {
+      /* General case */
+      const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF;
+      GLint skipPixels = 0;
+
+      /* in case width > MAX_WIDTH do the copy in chunks */
+      while (skipPixels < width) {
+         const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+         GLint row;
+         ASSERT(span.end <= MAX_WIDTH);
+         for (row = 0; row < height; row++) {
+            const GLvoid *zSrc = _mesa_image_address2d(unpack,
+                                                      pixels, width, height,
+                                                      GL_DEPTH_COMPONENT, type,
+                                                      row, skipPixels);
+
+            /* Set these for each row since the _swrast_write_* function may
+             * change them while clipping.
+             */
+            span.x = x + skipPixels;
+            span.y = y + row;
+            span.end = spanWidth;
+
+            _mesa_unpack_depth_span(ctx, spanWidth,
+                                    GL_UNSIGNED_INT, span.array->z, depthMax,
+                                    type, zSrc, unpack);
+            if (zoom) {
+               _swrast_write_zoomed_depth_span(ctx, x, y, &span);
+            }
+            else if (ctx->Visual.rgbMode) {
+               _swrast_write_rgba_span(ctx, &span);
+            }
+            else {
+               _swrast_write_index_span(ctx, &span);
+            }
+         }
+         skipPixels += spanWidth;
+      }
+   }
+}
+
+
+
+/*
+ * Draw RGBA image.
+ */
+static void
+draw_rgba_pixels( GLcontext *ctx, GLint x, GLint y,
+                  GLsizei width, GLsizei height,
+                  GLenum format, GLenum type,
+                  const struct gl_pixelstore_attrib *unpack,
+                  const GLvoid *pixels )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLint imgX = x, imgY = y;
+   struct gl_renderbuffer *rb = NULL; /* only used for quickDraw path */
+   const GLboolean zoom = ctx->Pixel.ZoomX!=1.0 || ctx->Pixel.ZoomY!=1.0;
+   GLboolean quickDraw;
+   GLfloat *convImage = NULL;
+   GLuint transferOps = ctx->_ImageTransferState;
+   struct sw_span span;
+
+   INIT_SPAN(span, GL_BITMAP, 0, 0, SPAN_RGBA);
+
+   /* Try an optimized glDrawPixels first */
+   if (fast_draw_pixels(ctx, x, y, width, height, format, type, unpack, pixels))
+      return;
+
+   if (ctx->Depth.Test)
+      _swrast_span_default_z(ctx, &span);
+   if (swrast->_FogEnabled)
+      _swrast_span_default_fog(ctx, &span);
+   if (ctx->Texture._EnabledCoordUnits)
+      _swrast_span_default_texcoords(ctx, &span);
+
+   if (swrast->_RasterMask == 0 && !zoom && x >= 0 && y >= 0
+       && x + width <= (GLint) ctx->DrawBuffer->Width
+       && y + height <= (GLint) ctx->DrawBuffer->Height
+       && ctx->DrawBuffer->_NumColorDrawBuffers[0] == 1) {
+      quickDraw = GL_TRUE;
+      rb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+   }
+   else {
+      quickDraw = GL_FALSE;
+      rb = NULL;
+   }
+
+   if (ctx->Pixel.Convolution2DEnabled || ctx->Pixel.Separable2DEnabled) {
+      /* Convolution has to be handled specially.  We'll create an
+       * intermediate image, applying all pixel transfer operations
+       * up to convolution.  Then we'll convolve the image.  Then
+       * we'll proceed with the rest of the transfer operations and
+       * rasterize the image.
+       */
+      GLint row;
+      GLfloat *dest, *tmpImage;
+
+      tmpImage = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
+      if (!tmpImage) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+         return;
+      }
+      convImage = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
+      if (!convImage) {
+         _mesa_free(tmpImage);
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+         return;
+      }
+
+      /* Unpack the image and apply transfer ops up to convolution */
+      dest = tmpImage;
+      for (row = 0; row < height; row++) {
+         const GLvoid *source = _mesa_image_address2d(unpack,
+                                  pixels, width, height, format, type, row, 0);
+         _mesa_unpack_color_span_float(ctx, width, GL_RGBA, (GLfloat *) dest,
+                                     format, type, source, unpack,
+                                     transferOps & IMAGE_PRE_CONVOLUTION_BITS);
+         dest += width * 4;
+      }
+
+      /* do convolution */
+      if (ctx->Pixel.Convolution2DEnabled) {
+         _mesa_convolve_2d_image(ctx, &width, &height, tmpImage, convImage);
+      }
+      else {
+         ASSERT(ctx->Pixel.Separable2DEnabled);
+         _mesa_convolve_sep_image(ctx, &width, &height, tmpImage, convImage);
+      }
+      _mesa_free(tmpImage);
+
+      /* continue transfer ops and draw the convolved image */
+      unpack = &ctx->DefaultPacking;
+      pixels = convImage;
+      format = GL_RGBA;
+      type = GL_FLOAT;
+      transferOps &= IMAGE_POST_CONVOLUTION_BITS;
+   }
+
+   /*
+    * General solution
+    */
+   {
+      const GLbitfield interpMask = span.interpMask;
+      const GLbitfield arrayMask = span.arrayMask;
+      GLint skipPixels = 0;
+
+      /* if the span is wider than MAX_WIDTH we have to do it in chunks */
+      while (skipPixels < width) {
+         const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+         GLint row;
+
+         ASSERT(span.end <= MAX_WIDTH);
+
+         for (row = 0; row < height; row++) {
+            const GLvoid *source = _mesa_image_address2d(unpack,
+                     pixels, width, height, format, type, row, skipPixels);
+
+            /* Set these for each row since the _swrast_write_* function may
+             * change them while clipping.
+             */
+            span.x = x + skipPixels;
+            span.y = y + row;
+            span.end = spanWidth;
+            span.arrayMask = arrayMask;
+            span.interpMask = interpMask;
+
+            _mesa_unpack_color_span_chan(ctx, spanWidth, GL_RGBA,
+                                         (GLchan *) span.array->rgba,
+                                         format, type, source, unpack,
+                                         transferOps);
+
+            if ((ctx->Pixel.MinMaxEnabled && ctx->MinMax.Sink) ||
+                (ctx->Pixel.HistogramEnabled && ctx->Histogram.Sink))
+               continue;
+
+            /* draw the span */
+            if (quickDraw) {
+               rb->PutRow(ctx, rb, span.end, span.x, span.y,
+                          span.array->rgba, NULL);
+            }
+            else if (zoom) {
+               _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span,
+                                       (CONST GLchan (*)[4]) span.array->rgba);
+            }
+            else {
+               _swrast_write_rgba_span(ctx, &span);
+            }
+         }
+
+         skipPixels += spanWidth;
+      }
+   }
+
+   if (convImage) {
+      _mesa_free(convImage);
+   }
+}
+
+
+/**
+ * This is a bit different from drawing GL_DEPTH_COMPONENT pixels.
+ * The only per-pixel operations that apply are depth scale/bias,
+ * stencil offset/shift, GL_DEPTH_WRITEMASK and GL_STENCIL_WRITEMASK,
+ * and pixel zoom.
+ * Also, only the depth buffer and stencil buffers are touched, not the
+ * color buffer(s).
+ */
+static void
+draw_depth_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
+                          GLsizei width, GLsizei height, GLenum type,
+                          const struct gl_pixelstore_attrib *unpack,
+                          const GLvoid *pixels)
+{
+   const GLint imgX = x, imgY = y;
+   const GLboolean scaleOrBias
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+   const GLfloat depthScale = ctx->DrawBuffer->_DepthMaxF;
+   const GLuint stencilMask = ctx->Stencil.WriteMask[0];
+   const GLuint stencilType = (STENCIL_BITS == 8) ? 
+      GL_UNSIGNED_BYTE : GL_UNSIGNED_SHORT;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+   struct gl_renderbuffer *depthRb, *stencilRb;
+   struct gl_pixelstore_attrib clippedUnpack = *unpack;
+
+   if (!zoom) {
+      if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height,
+                                 &clippedUnpack)) {
+         /* totally clipped */
+         return;
+      }
+   }
+   
+   depthRb = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+   stencilRb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+   ASSERT(depthRb);
+   ASSERT(stencilRb);
+
+   if (depthRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
+       stencilRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
+       depthRb == stencilRb &&
+       !scaleOrBias &&
+       !zoom &&
+       ctx->Depth.Mask &&
+       (stencilMask & 0xff) == 0xff) {
+      /* This is the ideal case.
+       * Drawing GL_DEPTH_STENCIL pixels into a combined depth/stencil buffer.
+       * Plus, no pixel transfer ops, zooming, or masking needed.
+       */
+      GLint i;
+      for (i = 0; i < height; i++) {
+         const GLuint *src = (const GLuint *) 
+            _mesa_image_address2d(&clippedUnpack, pixels, width, height,
+                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
+         depthRb->PutRow(ctx, depthRb, width, x, y + i, src, NULL);
+      }
+   }
+   else {
+      /* sub-optimal cases:
+       * Separate depth/stencil buffers, or pixel transfer ops required.
+       */
+      /* XXX need to handle very wide images (skippixels) */
+      GLint i;
+
+      depthRb = ctx->DrawBuffer->_DepthBuffer;
+      stencilRb = ctx->DrawBuffer->_StencilBuffer;
+
+      for (i = 0; i < height; i++) {
+         const GLuint *depthStencilSrc = (const GLuint *)
+            _mesa_image_address2d(&clippedUnpack, pixels, width, height,
+                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
+
+         if (ctx->Depth.Mask) {
+            if (!scaleOrBias && ctx->DrawBuffer->Visual.depthBits == 24) {
+               /* fast path 24-bit zbuffer */
+               GLuint zValues[MAX_WIDTH];
+               GLint j;
+               ASSERT(depthRb->DataType == GL_UNSIGNED_INT);
+               for (j = 0; j < width; j++) {
+                  zValues[j] = depthStencilSrc[j] >> 8;
+               }
+               if (zoom)
+                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width,
+                                              x, y + i, zValues);
+               else
+                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
+            }
+            else if (!scaleOrBias && ctx->DrawBuffer->Visual.depthBits == 16) {
+               /* fast path 16-bit zbuffer */
+               GLushort zValues[MAX_WIDTH];
+               GLint j;
+               ASSERT(depthRb->DataType == GL_UNSIGNED_SHORT);
+               for (j = 0; j < width; j++) {
+                  zValues[j] = depthStencilSrc[j] >> 16;
+               }
+               if (zoom)
+                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width,
+                                              x, y + i, zValues);
+               else
+                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
+            }
+            else {
+               /* general case */
+               GLuint zValues[MAX_WIDTH];  /* 16 or 32-bit Z value storage */
+               _mesa_unpack_depth_span(ctx, width,
+                                       depthRb->DataType, zValues, depthScale,
+                                       type, depthStencilSrc, &clippedUnpack);
+               if (zoom) {
+                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width, x,
+                                              y + i, zValues);
+               }
+               else {
+                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
+               }
+            }
+         }
+
+         if (stencilMask != 0x0) {
+            GLstencil stencilValues[MAX_WIDTH];
+            /* get stencil values, with shift/offset/mapping */
+            _mesa_unpack_stencil_span(ctx, width, stencilType, stencilValues,
+                                      type, depthStencilSrc, &clippedUnpack,
+                                      ctx->_ImageTransferState);
+            if (zoom)
+               _swrast_write_zoomed_stencil_span(ctx, imgX, imgY, width,
+                                                  x, y + i, stencilValues);
+            else
+               _swrast_write_stencil_span(ctx, width, x, y + i, stencilValues);
+         }
+      }
+   }
+}
+
+
+
+/**
+ * Execute software-based glDrawPixels.
+ * By time we get here, all error checking will have been done.
+ */
+void
+_swrast_DrawPixels( GLcontext *ctx,
+		    GLint x, GLint y,
+		    GLsizei width, GLsizei height,
+		    GLenum format, GLenum type,
+		    const struct gl_pixelstore_attrib *unpack,
+		    const GLvoid *pixels )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   RENDER_START(swrast,ctx);
+
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   if (swrast->NewState)
+      _swrast_validate_derived( ctx );
+
+   if (unpack->BufferObj->Name) {
+      /* unpack from PBO */
+      GLubyte *buf;
+      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glDrawPixels(invalid PBO access)");
+         goto end;
+      }
+      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                                              GL_READ_ONLY_ARB,
+                                              unpack->BufferObj);
+      if (!buf) {
+         /* buffer is already mapped - that's an error */
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels(PBO is mapped)");
+         goto end;
+      }
+      pixels = ADD_POINTERS(buf, pixels);
+   }
+
+   switch (format) {
+   case GL_STENCIL_INDEX:
+      draw_stencil_pixels( ctx, x, y, width, height, type, unpack, pixels );
+      break;
+   case GL_DEPTH_COMPONENT:
+      draw_depth_pixels( ctx, x, y, width, height, type, unpack, pixels );
+      break;
+   case GL_COLOR_INDEX:
+      if (ctx->Visual.rgbMode)
+	 draw_rgba_pixels(ctx, x,y, width, height, format, type, unpack, pixels);
+      else
+	 draw_index_pixels(ctx, x, y, width, height, type, unpack, pixels);
+      break;
+   case GL_RED:
+   case GL_GREEN:
+   case GL_BLUE:
+   case GL_ALPHA:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_ALPHA:
+   case GL_RGB:
+   case GL_BGR:
+   case GL_RGBA:
+   case GL_BGRA:
+   case GL_ABGR_EXT:
+      draw_rgba_pixels(ctx, x, y, width, height, format, type, unpack, pixels);
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      draw_depth_stencil_pixels(ctx, x, y, width, height,
+                                type, unpack, pixels);
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected format in _swrast_DrawPixels");
+      /* don't return yet, clean-up */
+   }
+
+end:
+
+   RENDER_FINISH(swrast,ctx);
+
+   if (unpack->BufferObj->Name) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                              unpack->BufferObj);
+   }
+}
+
+
+
+#if 0  /* experimental */
+/*
+ * Execute glDrawDepthPixelsMESA().
+ */
+void
+_swrast_DrawDepthPixelsMESA( GLcontext *ctx,
+                             GLint x, GLint y,
+                             GLsizei width, GLsizei height,
+                             GLenum colorFormat, GLenum colorType,
+                             const GLvoid *colors,
+                             GLenum depthType, const GLvoid *depths,
+                             const struct gl_pixelstore_attrib *unpack )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (swrast->NewState)
+      _swrast_validate_derived( ctx );
+
+   RENDER_START(swrast,ctx);
+
+   switch (colorFormat) {
+   case GL_COLOR_INDEX:
+      if (ctx->Visual.rgbMode)
+	 draw_rgba_pixels(ctx, x,y, width, height, colorFormat, colorType,
+                          unpack, colors);
+      else
+	 draw_index_pixels(ctx, x, y, width, height, colorType,
+                           unpack, colors);
+      break;
+   case GL_RED:
+   case GL_GREEN:
+   case GL_BLUE:
+   case GL_ALPHA:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_ALPHA:
+   case GL_RGB:
+   case GL_BGR:
+   case GL_RGBA:
+   case GL_BGRA:
+   case GL_ABGR_EXT:
+      draw_rgba_pixels(ctx, x, y, width, height, colorFormat, colorType,
+                       unpack, colors);
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected format in glDrawDepthPixelsMESA");
+   }
+
+   RENDER_FINISH(swrast,ctx);
+}
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_drawpix.h b/dist/Mesa/src/mesa/swrast/s_drawpix.h
new file mode 100644
index 000000000..66067115d
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_drawpix.h
@@ -0,0 +1,36 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_DRAWPIXELS_H
+#define S_DRAWPIXELS_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+/* XXX kill this header? */
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_feedback.c b/dist/Mesa/src/mesa/swrast/s_feedback.c
new file mode 100644
index 000000000..26cb05cd5
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_feedback.c
@@ -0,0 +1,161 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "glheader.h"
+#include "colormac.h"
+#include "context.h"
+#include "enums.h"
+#include "feedback.h"
+#include "macros.h"
+
+#include "s_context.h"
+#include "s_feedback.h"
+#include "s_triangle.h"
+
+
+#define FB_3D		0x01
+#define FB_4D		0x02
+#define FB_INDEX	0x04
+#define FB_COLOR	0x08
+#define FB_TEXTURE	0X10
+
+
+
+
+static void feedback_vertex( GLcontext *ctx,
+                             const SWvertex *v, const SWvertex *pv )
+{
+   const GLuint texUnit = 0;  /* See section 5.3 of 1.2.1 spec */
+   GLfloat win[4];
+   GLfloat color[4];
+   GLfloat tc[4];
+
+   win[0] = v->win[0];
+   win[1] = v->win[1];
+   win[2] = v->win[2] / ctx->DrawBuffer->_DepthMaxF;
+   win[3] = 1.0F / v->win[3];
+
+   color[0] = CHAN_TO_FLOAT(pv->color[0]);
+   color[1] = CHAN_TO_FLOAT(pv->color[1]);
+   color[2] = CHAN_TO_FLOAT(pv->color[2]);
+   color[3] = CHAN_TO_FLOAT(pv->color[3]);
+
+   if (v->texcoord[texUnit][3] != 1.0 &&
+       v->texcoord[texUnit][3] != 0.0) {
+      GLfloat invq = 1.0F / v->texcoord[texUnit][3];
+      tc[0] = v->texcoord[texUnit][0] * invq;
+      tc[1] = v->texcoord[texUnit][1] * invq;
+      tc[2] = v->texcoord[texUnit][2] * invq;
+      tc[3] = v->texcoord[texUnit][3];
+   }
+   else {
+      COPY_4V(tc, v->texcoord[texUnit]);
+   }
+
+   _mesa_feedback_vertex( ctx, win, color, (GLfloat) v->index, tc );
+}
+
+
+/*
+ * Put triangle in feedback buffer.
+ */
+void _swrast_feedback_triangle( GLcontext *ctx,
+                           const SWvertex *v0,
+                           const SWvertex *v1,
+			   const SWvertex *v2)
+{
+   if (_swrast_culltriangle( ctx, v0, v1, v2 )) {
+      FEEDBACK_TOKEN( ctx, (GLfloat) (GLint) GL_POLYGON_TOKEN );
+      FEEDBACK_TOKEN( ctx, (GLfloat) 3 );        /* three vertices */
+
+      if (ctx->Light.ShadeModel == GL_SMOOTH) {
+	 feedback_vertex( ctx, v0, v0 );
+	 feedback_vertex( ctx, v1, v1 );
+	 feedback_vertex( ctx, v2, v2 );
+      } else {
+	 feedback_vertex( ctx, v0, v2 );
+	 feedback_vertex( ctx, v1, v2 );
+	 feedback_vertex( ctx, v2, v2 );
+      }
+   }
+}
+
+
+void _swrast_feedback_line( GLcontext *ctx, const SWvertex *v0, const SWvertex *v1 )
+{
+   GLenum token = GL_LINE_TOKEN;
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (swrast->StippleCounter==0)
+      token = GL_LINE_RESET_TOKEN;
+
+   FEEDBACK_TOKEN( ctx, (GLfloat) (GLint) token );
+
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      feedback_vertex( ctx, v0, v0 );
+      feedback_vertex( ctx, v1, v1 );
+   } else {
+      feedback_vertex( ctx, v0, v1 );
+      feedback_vertex( ctx, v1, v1 );
+   }
+
+   swrast->StippleCounter++;
+}
+
+
+void _swrast_feedback_point( GLcontext *ctx, const SWvertex *v )
+{
+   FEEDBACK_TOKEN( ctx, (GLfloat) (GLint) GL_POINT_TOKEN );
+   feedback_vertex( ctx, v, v );
+}
+
+
+void _swrast_select_triangle( GLcontext *ctx,
+                         const SWvertex *v0,
+                         const SWvertex *v1,
+			 const SWvertex *v2)
+{
+   if (_swrast_culltriangle( ctx, v0, v1, v2 )) {
+      const GLfloat zs = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+
+      _mesa_update_hitflag( ctx, v0->win[2] * zs );
+      _mesa_update_hitflag( ctx, v1->win[2] * zs );
+      _mesa_update_hitflag( ctx, v2->win[2] * zs );
+   }
+}
+
+
+void _swrast_select_line( GLcontext *ctx, const SWvertex *v0, const SWvertex *v1 )
+{
+   const GLfloat zs = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   _mesa_update_hitflag( ctx, v0->win[2] * zs );
+   _mesa_update_hitflag( ctx, v1->win[2] * zs );
+}
+
+
+void _swrast_select_point( GLcontext *ctx, const SWvertex *v )
+{
+   const GLfloat zs = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   _mesa_update_hitflag( ctx, v->win[2] * zs );
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_feedback.h b/dist/Mesa/src/mesa/swrast/s_feedback.h
new file mode 100644
index 000000000..73f45c10b
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_feedback.h
@@ -0,0 +1,51 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_FEEDBACK_H
+#define S_FEEDBACK_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern void _swrast_feedback_point( GLcontext *ctx, const SWvertex *v );
+
+extern void _swrast_feedback_line( GLcontext *ctx,
+                              const SWvertex *v1, const SWvertex *v2 );
+
+extern void _swrast_feedback_triangle( GLcontext *ctx, const SWvertex *v0,
+                                  const SWvertex *v1, const SWvertex *v2 );
+
+extern void _swrast_select_point( GLcontext *ctx, const SWvertex *v );
+
+extern void _swrast_select_line( GLcontext *ctx,
+                            const SWvertex *v1, const SWvertex *v2 );
+
+extern void _swrast_select_triangle( GLcontext *ctx, const SWvertex *v0,
+                                const SWvertex *v1, const SWvertex *v2 );
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_fog.c b/dist/Mesa/src/mesa/swrast/s_fog.c
new file mode 100644
index 000000000..e3d6274b3
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_fog.c
@@ -0,0 +1,327 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "colormac.h"
+#include "context.h"
+#include "macros.h"
+
+#include "s_context.h"
+#include "s_fog.h"
+#include "s_span.h"
+
+
+/**
+ * Used to convert current raster distance to a fog factor in [0,1].
+ */
+GLfloat
+_swrast_z_to_fogfactor(GLcontext *ctx, GLfloat z)
+{
+   GLfloat d, f;
+
+   switch (ctx->Fog.Mode) {
+   case GL_LINEAR:
+      if (ctx->Fog.Start == ctx->Fog.End)
+         d = 1.0F;
+      else
+         d = 1.0F / (ctx->Fog.End - ctx->Fog.Start);
+      f = (ctx->Fog.End - z) * d;
+      return CLAMP(f, 0.0F, 1.0F);
+   case GL_EXP:
+      d = ctx->Fog.Density;
+      f = EXPF(-d * z);
+      f = CLAMP(f, 0.0F, 1.0F);
+      return f;
+   case GL_EXP2:
+      d = ctx->Fog.Density;
+      f = EXPF(-(d * d * z * z));
+      f = CLAMP(f, 0.0F, 1.0F);
+      return f;
+   default:
+      _mesa_problem(ctx, "Bad fog mode in _swrast_z_to_fogfactor");
+      return 0.0; 
+   }
+}
+
+
+/**
+ * Apply fog to a span of RGBA pixels.
+ * The fog value are either in the span->array->fog array or interpolated from
+ * the fog/fogStep values.
+ * They fog values are either fog coordinates (Z) or fog blend factors.
+ * _PreferPixelFog should be in sync with that state!
+ */
+void
+_swrast_fog_rgba_span( const GLcontext *ctx, struct sw_span *span )
+{
+   const SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLchan rFog = swrast->_FogColor[RCOMP];
+   const GLchan gFog = swrast->_FogColor[GCOMP];
+   const GLchan bFog = swrast->_FogColor[BCOMP];
+   const GLuint haveW = (span->interpMask & SPAN_W);
+   GLchan (*rgba)[4] = (GLchan (*)[4]) span->array->rgba;
+
+   ASSERT(swrast->_FogEnabled);
+   ASSERT((span->interpMask | span->arrayMask) & SPAN_FOG);
+   ASSERT(span->arrayMask & SPAN_RGBA);
+
+   /* NOTE: if haveW is true, that means the fog start/step values are
+    * perspective-corrected and we have to divide each fog coord by W.
+    */
+
+   /* we need to compute fog blend factors */
+   if (swrast->_PreferPixelFog) {
+      /* The span's fog values are fog coordinates, now compute blend factors
+       * and blend the fragment colors with the fog color.
+       */
+      switch (swrast->_FogMode) {
+      case GL_LINEAR:
+         {
+            const GLfloat fogEnd = ctx->Fog.End;
+            const GLfloat fogScale = (ctx->Fog.Start == ctx->Fog.End)
+               ? 1.0F : 1.0F / (ctx->Fog.End - ctx->Fog.Start);
+            const GLfloat fogStep = span->fogStep;
+            GLfloat fogCoord = span->fog;
+            const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+            GLfloat w = haveW ? span->w : 1.0F;
+            GLuint i;
+            for (i = 0; i < span->end; i++) {
+               GLfloat f, oneMinusF;
+               f = (fogEnd - FABSF(fogCoord) / w) * fogScale;
+               f = CLAMP(f, 0.0F, 1.0F);
+               oneMinusF = 1.0F - f;
+               rgba[i][RCOMP] = (GLchan) (f * rgba[i][RCOMP] + oneMinusF * rFog);
+               rgba[i][GCOMP] = (GLchan) (f * rgba[i][GCOMP] + oneMinusF * gFog);
+               rgba[i][BCOMP] = (GLchan) (f * rgba[i][BCOMP] + oneMinusF * bFog);
+               fogCoord += fogStep;
+               w += wStep;
+            }
+         }
+         break;
+      case GL_EXP:
+         {
+            const GLfloat density = -ctx->Fog.Density;
+            const GLfloat fogStep = span->fogStep;
+            GLfloat fogCoord = span->fog;
+            const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+            GLfloat w = haveW ? span->w : 1.0F;
+            GLuint i;
+            for (i = 0; i < span->end; i++) {
+               GLfloat f, oneMinusF;
+               f = EXPF(density * FABSF(fogCoord) / w);
+               f = CLAMP(f, 0.0F, 1.0F);
+               oneMinusF = 1.0F - f;
+               rgba[i][RCOMP] = (GLchan) (f * rgba[i][RCOMP] + oneMinusF * rFog);
+               rgba[i][GCOMP] = (GLchan) (f * rgba[i][GCOMP] + oneMinusF * gFog);
+               rgba[i][BCOMP] = (GLchan) (f * rgba[i][BCOMP] + oneMinusF * bFog);
+               fogCoord += fogStep;
+               w += wStep;
+            }
+         }
+         break;
+      case GL_EXP2:
+         {
+            const GLfloat negDensitySquared = -ctx->Fog.Density * ctx->Fog.Density;
+            const GLfloat fogStep = span->fogStep;
+            GLfloat fogCoord = span->fog;
+            const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+            GLfloat w = haveW ? span->w : 1.0F;
+            GLuint i;
+            for (i = 0; i < span->end; i++) {
+               const GLfloat coord = fogCoord / w;
+               GLfloat tmp = negDensitySquared * coord * coord;
+               GLfloat f, oneMinusF;
+#if defined(__alpha__) || defined(__alpha)
+               /* XXX this underflow check may be needed for other systems*/
+               if (tmp < FLT_MIN_10_EXP)
+                  tmp = FLT_MIN_10_EXP;
+#endif
+               f = EXPF(tmp);
+               f = CLAMP(f, 0.0F, 1.0F);
+               oneMinusF = 1.0F - f;
+               rgba[i][RCOMP] = (GLchan) (f * rgba[i][RCOMP] + oneMinusF * rFog);
+               rgba[i][GCOMP] = (GLchan) (f * rgba[i][GCOMP] + oneMinusF * gFog);
+               rgba[i][BCOMP] = (GLchan) (f * rgba[i][BCOMP] + oneMinusF * bFog);
+               fogCoord += fogStep;
+               w += wStep;
+            }
+         }
+         break;
+      default:
+         _mesa_problem(ctx, "Bad fog mode in _swrast_fog_rgba_span");
+         return;
+      }
+   }
+   else if (span->arrayMask & SPAN_FOG) {
+      /* The span's fog array values are blend factors.
+       * They were previously computed per-vertex.
+       */
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         const GLfloat f = span->array->fog[i];
+         const GLfloat oneMinusF = 1.0F - f;
+         rgba[i][RCOMP] = (GLchan) (f * rgba[i][RCOMP] + oneMinusF * rFog);
+         rgba[i][GCOMP] = (GLchan) (f * rgba[i][GCOMP] + oneMinusF * gFog);
+         rgba[i][BCOMP] = (GLchan) (f * rgba[i][BCOMP] + oneMinusF * bFog);
+      }
+   }
+   else {
+      /* The span's fog start/step values are blend factors.
+       * They were previously computed per-vertex.
+       */
+      const GLfloat fogStep = span->fogStep;
+      GLfloat fog = span->fog;
+      const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+      GLfloat w = haveW ? span->w : 1.0F;
+      GLuint i;
+      ASSERT(span->interpMask & SPAN_FOG);
+      for (i = 0; i < span->end; i++) {
+         const GLfloat fact = fog / w;
+         const GLfloat oneMinusF = 1.0F - fact;
+         rgba[i][RCOMP] = (GLchan) (fact * rgba[i][RCOMP] + oneMinusF * rFog);
+         rgba[i][GCOMP] = (GLchan) (fact * rgba[i][GCOMP] + oneMinusF * gFog);
+         rgba[i][BCOMP] = (GLchan) (fact * rgba[i][BCOMP] + oneMinusF * bFog);
+         fog += fogStep;
+         w += wStep;
+      }
+   }
+}
+
+
+/**
+ * As above, but color index mode.
+ */
+void
+_swrast_fog_ci_span( const GLcontext *ctx, struct sw_span *span )
+{
+   const SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLuint haveW = (span->interpMask & SPAN_W);
+   const GLuint fogIndex = (GLuint) ctx->Fog.Index;
+   GLuint *index = span->array->index;
+
+   ASSERT(swrast->_FogEnabled);
+   ASSERT(span->arrayMask & SPAN_INDEX);
+   ASSERT((span->interpMask | span->arrayMask) & SPAN_FOG);
+
+   /* we need to compute fog blend factors */
+   if (swrast->_PreferPixelFog) {
+      /* The span's fog values are fog coordinates, now compute blend factors
+       * and blend the fragment colors with the fog color.
+       */
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+         {
+            const GLfloat fogEnd = ctx->Fog.End;
+            const GLfloat fogScale = (ctx->Fog.Start == ctx->Fog.End)
+               ? 1.0F : 1.0F / (ctx->Fog.End - ctx->Fog.Start);
+            const GLfloat fogStep = span->fogStep;
+            GLfloat fogCoord = span->fog;
+            const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+            GLfloat w = haveW ? span->w : 1.0F;
+            GLuint i;
+            for (i = 0; i < span->end; i++) {
+               GLfloat f = (fogEnd - fogCoord / w) * fogScale;
+               f = CLAMP(f, 0.0F, 1.0F);
+               index[i] = (GLuint) ((GLfloat) index[i] + (1.0F - f) * fogIndex);
+               fogCoord += fogStep;
+               w += wStep;
+            }
+         }
+         break;
+      case GL_EXP:
+         {
+            const GLfloat density = -ctx->Fog.Density;
+            const GLfloat fogStep = span->fogStep;
+            GLfloat fogCoord = span->fog;
+            const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+            GLfloat w = haveW ? span->w : 1.0F;
+            GLuint i;
+            for (i = 0; i < span->end; i++) {
+               GLfloat f = EXPF(density * fogCoord / w);
+               f = CLAMP(f, 0.0F, 1.0F);
+               index[i] = (GLuint) ((GLfloat) index[i] + (1.0F - f) * fogIndex);
+               fogCoord += fogStep;
+               w += wStep;
+            }
+         }
+         break;
+      case GL_EXP2:
+         {
+            const GLfloat negDensitySquared = -ctx->Fog.Density * ctx->Fog.Density;
+            const GLfloat fogStep = span->fogStep;
+            GLfloat fogCoord = span->fog;
+            const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+            GLfloat w = haveW ? span->w : 1.0F;
+            GLuint i;
+            for (i = 0; i < span->end; i++) {
+               const GLfloat coord = fogCoord / w;
+               GLfloat tmp = negDensitySquared * coord * coord;
+               GLfloat f;
+#if defined(__alpha__) || defined(__alpha)
+               /* XXX this underflow check may be needed for other systems*/
+               if (tmp < FLT_MIN_10_EXP)
+                  tmp = FLT_MIN_10_EXP;
+#endif
+               f = EXPF(tmp);
+               f = CLAMP(f, 0.0F, 1.0F);
+               index[i] = (GLuint) ((GLfloat) index[i] + (1.0F - f) * fogIndex);
+               fogCoord += fogStep;
+               w += wStep;
+            }
+         }
+         break;
+      default:
+         _mesa_problem(ctx, "Bad fog mode in _swrast_fog_ci_span");
+         return;
+      }
+   }
+   else if (span->arrayMask & SPAN_FOG) {
+      /* The span's fog array values are blend factors.
+       * They were previously computed per-vertex.
+       */
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         const GLfloat f = span->array->fog[i];
+         index[i] = (GLuint) ((GLfloat) index[i] + (1.0F - f) * fogIndex);
+      }
+   }
+   else {
+      /* The span's fog start/step values are blend factors.
+       * They were previously computed per-vertex.
+       */
+      const GLfloat fogStep = span->fogStep;
+      GLfloat fog = span->fog;
+      const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+      GLfloat w = haveW ? span->w : 1.0F;
+      GLuint i;
+      ASSERT(span->interpMask & SPAN_FOG);
+      for (i = 0; i < span->end; i++) {
+         const GLfloat f = fog / w;
+         index[i] = (GLuint) ((GLfloat) index[i] + (1.0F - f) * fogIndex);
+         fog += fogStep;
+         w += wStep;
+      }
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_fog.h b/dist/Mesa/src/mesa/swrast/s_fog.h
new file mode 100644
index 000000000..f5744b1d9
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_fog.h
@@ -0,0 +1,45 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.1
+ *
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_FOG_H
+#define S_FOG_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern GLfloat
+_swrast_z_to_fogfactor(GLcontext *ctx, GLfloat z);
+
+extern void
+_swrast_fog_rgba_span( const GLcontext *ctx, struct sw_span *span );
+
+extern void
+_swrast_fog_ci_span( const GLcontext *ctx, struct sw_span *span );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_imaging.c b/dist/Mesa/src/mesa/swrast/s_imaging.c
new file mode 100644
index 000000000..ac69d270b
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_imaging.c
@@ -0,0 +1,199 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* KW:  Moved these here to remove knowledge of swrast from core mesa.
+ * Should probably pull the entire software implementation of these
+ * extensions into either swrast or a sister module.  
+ */
+
+#include "s_context.h"
+#include "s_span.h"
+#include "colortab.h"
+#include "convolve.h"
+
+
+void
+_swrast_CopyColorTable( GLcontext *ctx, 
+			GLenum target, GLenum internalformat,
+			GLint x, GLint y, GLsizei width)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLchan data[MAX_WIDTH][4];
+   struct gl_buffer_object *bufferSave;
+
+   if (!ctx->ReadBuffer->_ColorReadBuffer) {
+      /* no readbuffer - OK */
+      return;
+   }
+
+   if (width > MAX_WIDTH)
+      width = MAX_WIDTH;
+
+   RENDER_START( swrast, ctx );
+
+   /* read the data from framebuffer */
+   _swrast_read_rgba_span( ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                           width, x, y, data );
+
+   RENDER_FINISH(swrast,ctx);
+
+   /* save PBO binding */
+   bufferSave = ctx->Unpack.BufferObj;
+   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+
+   _mesa_ColorTable(target, internalformat, width, GL_RGBA, CHAN_TYPE, data);
+
+   /* restore PBO binding */
+   ctx->Unpack.BufferObj = bufferSave;
+}
+
+
+void
+_swrast_CopyColorSubTable( GLcontext *ctx,GLenum target, GLsizei start,
+			   GLint x, GLint y, GLsizei width)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLchan data[MAX_WIDTH][4];
+   struct gl_buffer_object *bufferSave;
+
+   if (!ctx->ReadBuffer->_ColorReadBuffer) {
+      /* no readbuffer - OK */
+      return;
+   }
+
+   if (width > MAX_WIDTH)
+      width = MAX_WIDTH;
+
+   RENDER_START( swrast, ctx );
+
+   /* read the data from framebuffer */
+   _swrast_read_rgba_span( ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                           width, x, y, data );
+
+   RENDER_FINISH(swrast,ctx);
+
+   /* save PBO binding */
+   bufferSave = ctx->Unpack.BufferObj;
+   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+
+   _mesa_ColorSubTable(target, start, width, GL_RGBA, CHAN_TYPE, data);
+
+   /* restore PBO binding */
+   ctx->Unpack.BufferObj = bufferSave;
+}
+
+
+void
+_swrast_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target, 
+				GLenum internalFormat, 
+				GLint x, GLint y, GLsizei width)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLchan rgba[MAX_CONVOLUTION_WIDTH][4];
+   struct gl_buffer_object *bufferSave;
+
+   if (!ctx->ReadBuffer->_ColorReadBuffer) {
+      /* no readbuffer - OK */
+      return;
+   }
+
+   RENDER_START( swrast, ctx );
+
+   /* read the data from framebuffer */
+   _swrast_read_rgba_span( ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                           width, x, y, (GLchan (*)[4]) rgba );
+   
+   RENDER_FINISH( swrast, ctx );
+
+   /* save PBO binding */
+   bufferSave = ctx->Unpack.BufferObj;
+   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+
+   /* store as convolution filter */
+   _mesa_ConvolutionFilter1D(target, internalFormat, width,
+                             GL_RGBA, CHAN_TYPE, rgba);
+
+   /* restore PBO binding */
+   ctx->Unpack.BufferObj = bufferSave;
+}
+
+
+void
+_swrast_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target, 
+				GLenum internalFormat, 
+				GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_pixelstore_attrib packSave;
+   GLchan rgba[MAX_CONVOLUTION_HEIGHT][MAX_CONVOLUTION_WIDTH][4];
+   GLint i;
+   struct gl_buffer_object *bufferSave;
+
+   if (!ctx->ReadBuffer->_ColorReadBuffer) {
+      /* no readbuffer - OK */
+      return;
+   }
+
+   RENDER_START(swrast,ctx);
+   
+   /* read pixels from framebuffer */
+   for (i = 0; i < height; i++) {
+      _swrast_read_rgba_span( ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                              width, x, y + i, (GLchan (*)[4]) rgba[i] );
+   }
+
+   RENDER_FINISH(swrast,ctx);
+
+   /*
+    * HACK: save & restore context state so we can store this as a
+    * convolution filter via the GL api.  Doesn't call any callbacks
+    * hanging off ctx->Unpack statechanges.
+    */
+
+   packSave = ctx->Unpack;  /* save pixel packing params */
+
+   ctx->Unpack.Alignment = 1;
+   ctx->Unpack.RowLength = MAX_CONVOLUTION_WIDTH;
+   ctx->Unpack.SkipPixels = 0;
+   ctx->Unpack.SkipRows = 0;
+   ctx->Unpack.ImageHeight = 0;
+   ctx->Unpack.SkipImages = 0;
+   ctx->Unpack.SwapBytes = GL_FALSE;
+   ctx->Unpack.LsbFirst = GL_FALSE;
+   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+   ctx->NewState |= _NEW_PACKUNPACK;
+
+   /* save PBO binding */
+   bufferSave = ctx->Unpack.BufferObj;
+   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+
+   _mesa_ConvolutionFilter2D(target, internalFormat, width, height,
+                             GL_RGBA, CHAN_TYPE, rgba);
+
+   /* restore PBO binding */
+   ctx->Unpack.BufferObj = bufferSave;
+
+   ctx->Unpack = packSave;  /* restore pixel packing params */
+   ctx->NewState |= _NEW_PACKUNPACK; 
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_lines.c b/dist/Mesa/src/mesa/swrast/s_lines.c
new file mode 100644
index 000000000..1abe85c54
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_lines.c
@@ -0,0 +1,337 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "macros.h"
+#include "s_aaline.h"
+#include "s_context.h"
+#include "s_depth.h"
+#include "s_feedback.h"
+#include "s_lines.h"
+#include "s_span.h"
+
+
+/*
+ * Init the mask[] array to implement a line stipple.
+ */
+static void
+compute_stipple_mask( GLcontext *ctx, GLuint len, GLubyte mask[] )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint i;
+
+   for (i = 0; i < len; i++) {
+      GLuint bit = (swrast->StippleCounter / ctx->Line.StippleFactor) & 0xf;
+      if ((1 << bit) & ctx->Line.StipplePattern) {
+         mask[i] = GL_TRUE;
+      }
+      else {
+         mask[i] = GL_FALSE;
+      }
+      swrast->StippleCounter++;
+   }
+}
+
+
+/*
+ * To draw a wide line we can simply redraw the span N times, side by side.
+ */
+static void
+draw_wide_line( GLcontext *ctx, struct sw_span *span, GLboolean xMajor )
+{
+   GLint width, start;
+
+   ASSERT(span->end < MAX_WIDTH);
+
+   width = (GLint) CLAMP( ctx->Line._Width, MIN_LINE_WIDTH, MAX_LINE_WIDTH );
+
+   if (width & 1)
+      start = width / 2;
+   else
+      start = width / 2 - 1;
+
+   if (xMajor) {
+      GLint *y = span->array->y;
+      GLuint i;
+      GLint w;
+      for (w = 0; w < width; w++) {
+         if (w == 0) {
+            for (i = 0; i < span->end; i++)
+               y[i] -= start;
+         }
+         else {
+            for (i = 0; i < span->end; i++)
+               y[i]++;
+         }
+         if (ctx->Visual.rgbMode)
+            _swrast_write_rgba_span(ctx, span);
+         else
+            _swrast_write_index_span(ctx, span);
+      }
+   }
+   else {
+      GLint *x = span->array->x;
+      GLuint i;
+      GLint w;
+      for (w = 0; w < width; w++) {
+         if (w == 0) {
+            for (i = 0; i < span->end; i++)
+               x[i] -= start;
+         }
+         else {
+            for (i = 0; i < span->end; i++)
+               x[i]++;
+         }
+         if (ctx->Visual.rgbMode)
+            _swrast_write_rgba_span(ctx, span);
+         else
+            _swrast_write_index_span(ctx, span);
+      }
+   }
+}
+
+
+
+/**********************************************************************/
+/*****                    Rasterization                           *****/
+/**********************************************************************/
+
+/* Simple color index line (no stipple, width=1, no Z, no fog, no tex)*/
+#define NAME simple_ci_line
+#define INTERP_INDEX
+#define RENDER_SPAN(span) _swrast_write_index_span(ctx, &span)
+#include "s_linetemp.h"
+
+/* Simple RGBA index line (no stipple, width=1, no Z, no fog, no tex)*/
+#define NAME simple_rgba_line
+#define INTERP_RGBA
+#define RENDER_SPAN(span) _swrast_write_rgba_span(ctx, &span);
+#include "s_linetemp.h"
+
+
+/* Z, fog, wide, stipple color index line */
+#define NAME general_ci_line
+#define INTERP_INDEX
+#define INTERP_Z
+#define INTERP_FOG
+#define RENDER_SPAN(span)					\
+   if (ctx->Line.StippleFlag) {					\
+      span.arrayMask |= SPAN_MASK;				\
+      compute_stipple_mask(ctx, span.end, span.array->mask);    \
+   }								\
+   if (ctx->Line._Width > 1.0) {					\
+      draw_wide_line(ctx, &span, (GLboolean)(dx > dy));		\
+   }								\
+   else {							\
+      _swrast_write_index_span(ctx, &span);			\
+   }
+#include "s_linetemp.h"
+
+
+/* Z, fog, wide, stipple RGBA line */
+#define NAME general_rgba_line
+#define INTERP_RGBA
+#define INTERP_Z
+#define INTERP_FOG
+#define RENDER_SPAN(span)					\
+   if (ctx->Line.StippleFlag) {					\
+      span.arrayMask |= SPAN_MASK;				\
+      compute_stipple_mask(ctx, span.end, span.array->mask);	\
+   }								\
+   if (ctx->Line._Width > 1.0) {					\
+      draw_wide_line(ctx, &span, (GLboolean)(dx > dy));		\
+   }								\
+   else {							\
+      _swrast_write_rgba_span(ctx, &span);			\
+   }
+#include "s_linetemp.h"
+
+
+/* Single-texture line, w/ fog, Z, specular, etc. */
+#define NAME textured_line
+#define INTERP_RGBA
+#define INTERP_Z
+#define INTERP_FOG
+#define INTERP_TEX
+#define RENDER_SPAN(span)					\
+   if (ctx->Line.StippleFlag) {					\
+      span.arrayMask |= SPAN_MASK;				\
+      compute_stipple_mask(ctx, span.end, span.array->mask);	\
+   }								\
+   if (ctx->Line._Width > 1.0) {					\
+      draw_wide_line(ctx, &span, (GLboolean)(dx > dy));		\
+   }								\
+   else {							\
+      _swrast_write_rgba_span(ctx, &span);			\
+   }
+#include "s_linetemp.h"
+
+
+/* Multi-texture or separate specular line, w/ fog, Z, specular, etc. */
+#define NAME multitextured_line
+#define INTERP_RGBA
+#define INTERP_SPEC
+#define INTERP_Z
+#define INTERP_FOG
+#define INTERP_MULTITEX
+#define RENDER_SPAN(span)					\
+   if (ctx->Line.StippleFlag) {					\
+      span.arrayMask |= SPAN_MASK;				\
+      compute_stipple_mask(ctx, span.end, span.array->mask);	\
+   }								\
+   if (ctx->Line._Width > 1.0) {					\
+      draw_wide_line(ctx, &span, (GLboolean)(dx > dy));		\
+   }								\
+   else {							\
+      _swrast_write_rgba_span(ctx, &span);			\
+   }
+#include "s_linetemp.h"
+
+
+
+void
+_swrast_add_spec_terms_line( GLcontext *ctx,
+                             const SWvertex *v0,
+                             const SWvertex *v1 )
+{
+   SWvertex *ncv0 = (SWvertex *)v0;
+   SWvertex *ncv1 = (SWvertex *)v1;
+   GLchan c[2][4];
+   COPY_CHAN4( c[0], ncv0->color );
+   COPY_CHAN4( c[1], ncv1->color );
+   ACC_3V( ncv0->color, ncv0->specular );
+   ACC_3V( ncv1->color, ncv1->specular );
+   SWRAST_CONTEXT(ctx)->SpecLine( ctx, ncv0, ncv1 );
+   COPY_CHAN4( ncv0->color, c[0] );
+   COPY_CHAN4( ncv1->color, c[1] );
+}
+
+
+#ifdef DEBUG
+extern void
+_mesa_print_line_function(GLcontext *ctx);  /* silence compiler warning */
+void
+_mesa_print_line_function(GLcontext *ctx)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   _mesa_printf("Line Func == ");
+   if (swrast->Line == simple_ci_line)
+      _mesa_printf("simple_ci_line\n");
+   else if (swrast->Line == simple_rgba_line)
+      _mesa_printf("simple_rgba_line\n");
+   else if (swrast->Line == general_ci_line)
+      _mesa_printf("general_ci_line\n");
+   else if (swrast->Line == general_rgba_line)
+      _mesa_printf("general_rgba_line\n");
+   else if (swrast->Line == textured_line)
+      _mesa_printf("textured_line\n");
+   else if (swrast->Line == multitextured_line)
+      _mesa_printf("multitextured_line\n");
+   else
+      _mesa_printf("Driver func %p\n", (void *(*)()) swrast->Line);
+}
+#endif
+
+
+
+#ifdef DEBUG
+
+/* record the current line function name */
+static const char *lineFuncName = NULL;
+
+#define USE(lineFunc)                   \
+do {                                    \
+    lineFuncName = #lineFunc;           \
+    /*_mesa_printf("%s\n", lineFuncName);*/   \
+    swrast->Line = lineFunc;            \
+} while (0)
+
+#else
+
+#define USE(lineFunc)  swrast->Line = lineFunc
+
+#endif
+
+
+
+/*
+ * Determine which line drawing function to use given the current
+ * rendering context.
+ *
+ * Please update the summary flag _SWRAST_NEW_LINE if you add or remove
+ * tests to this code.
+ */
+void
+_swrast_choose_line( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLboolean rgbmode = ctx->Visual.rgbMode;
+
+   if (ctx->RenderMode == GL_RENDER) {
+      if (ctx->Line.SmoothFlag) {
+         /* antialiased lines */
+         _swrast_choose_aa_line_function(ctx);
+         ASSERT(swrast->Line);
+      }
+      else if (ctx->Texture._EnabledCoordUnits) {
+         /* textured lines */
+         if (ctx->Texture._EnabledCoordUnits > 0x1
+             || NEED_SECONDARY_COLOR(ctx)) {
+            /* multi-texture and/or separate specular color */
+            USE(multitextured_line);
+         }
+         else {
+            USE(textured_line);
+         }
+      }
+      else if (ctx->Depth.Test || swrast->_FogEnabled || ctx->Line._Width != 1.0
+               || ctx->Line.StippleFlag) {
+         /* no texture, but Z, fog, width>1, stipple, etc. */
+         if (rgbmode)
+            USE(general_rgba_line);
+         else
+            USE(general_ci_line);
+      }
+      else {
+         /* simplest lines */
+         if (rgbmode)
+            USE(simple_rgba_line);
+         else
+            USE(simple_ci_line);
+      }
+   }
+   else if (ctx->RenderMode == GL_FEEDBACK) {
+      USE(_swrast_feedback_line);
+   }
+   else {
+      ASSERT(ctx->RenderMode == GL_SELECT);
+      USE(_swrast_select_line);
+   }
+
+   /*_mesa_print_line_function(ctx);*/
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_lines.h b/dist/Mesa/src/mesa/swrast/s_lines.h
new file mode 100644
index 000000000..5372b99b9
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_lines.h
@@ -0,0 +1,41 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_LINES_H
+#define S_LINES_H
+
+#include "mtypes.h"
+
+void
+_swrast_choose_line( GLcontext *ctx );
+
+void
+_swrast_add_spec_terms_line( GLcontext *ctx,
+			     const SWvertex *v0,
+			     const SWvertex *v1 );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_linetemp.h b/dist/Mesa/src/mesa/swrast/s_linetemp.h
new file mode 100644
index 000000000..f7c5ab492
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_linetemp.h
@@ -0,0 +1,454 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Line Rasterizer Template
+ *
+ * This file is #include'd to generate custom line rasterizers.
+ *
+ * The following macros may be defined to indicate what auxillary information
+ * must be interplated along the line:
+ *    INTERP_Z        - if defined, interpolate Z values
+ *    INTERP_FOG      - if defined, interpolate FOG values
+ *    INTERP_RGBA     - if defined, interpolate RGBA values
+ *    INTERP_SPEC     - if defined, interpolate specular RGB values
+ *    INTERP_INDEX    - if defined, interpolate color index values
+ *    INTERP_TEX      - if defined, interpolate unit 0 texcoords
+ *    INTERP_MULTITEX - if defined, interpolate multi-texcoords
+ *
+ * When one can directly address pixels in the color buffer the following
+ * macros can be defined and used to directly compute pixel addresses during
+ * rasterization (see pixelPtr):
+ *    PIXEL_TYPE          - the datatype of a pixel (GLubyte, GLushort, GLuint)
+ *    BYTES_PER_ROW       - number of bytes per row in the color buffer
+ *    PIXEL_ADDRESS(X,Y)  - returns the address of pixel at (X,Y) where
+ *                          Y==0 at bottom of screen and increases upward.
+ *
+ * Similarly, for direct depth buffer access, this type is used for depth
+ * buffer addressing:
+ *    DEPTH_TYPE          - either GLushort or GLuint
+ *
+ * Optionally, one may provide one-time setup code
+ *    SETUP_CODE    - code which is to be executed once per line
+ *
+ * To actually "plot" each pixel the PLOT macro must be defined...
+ *    PLOT(X,Y) - code to plot a pixel.  Example:
+ *                if (Z < *zPtr) {
+ *                   *zPtr = Z;
+ *                   color = pack_rgb( FixedToInt(r0), FixedToInt(g0),
+ *                                     FixedToInt(b0) );
+ *                   put_pixel( X, Y, color );
+ *                }
+ *
+ * This code was designed for the origin to be in the lower-left corner.
+ *
+ */
+
+
+static void
+NAME( GLcontext *ctx, const SWvertex *vert0, const SWvertex *vert1 )
+{
+   struct sw_span span;
+   GLuint interpFlags = 0;
+   GLint x0 = (GLint) vert0->win[0];
+   GLint x1 = (GLint) vert1->win[0];
+   GLint y0 = (GLint) vert0->win[1];
+   GLint y1 = (GLint) vert1->win[1];
+   GLint dx, dy;
+   GLint numPixels;
+   GLint xstep, ystep;
+#if defined(DEPTH_TYPE)
+   const GLint depthBits = ctx->Visual.depthBits;
+   const GLint fixedToDepthShift = depthBits <= 16 ? FIXED_SHIFT : 0;
+   struct gl_renderbuffer *zrb = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+#define FixedToDepth(F)  ((F) >> fixedToDepthShift)
+   GLint zPtrXstep, zPtrYstep;
+   DEPTH_TYPE *zPtr;
+#elif defined(INTERP_Z)
+   const GLint depthBits = ctx->Visual.depthBits;
+#endif
+#ifdef PIXEL_ADDRESS
+   PIXEL_TYPE *pixelPtr;
+   GLint pixelXstep, pixelYstep;
+#endif
+
+#ifdef SETUP_CODE
+   SETUP_CODE
+#endif
+
+   /* Cull primitives with malformed coordinates.
+    */
+   {
+      GLfloat tmp = vert0->win[0] + vert0->win[1]
+                  + vert1->win[0] + vert1->win[1];
+      if (IS_INF_OR_NAN(tmp))
+	 return;
+   }
+
+   /*
+   printf("%s():\n", __FUNCTION__);
+   printf(" (%f, %f, %f) -> (%f, %f, %f)\n",
+          vert0->win[0], vert0->win[1], vert0->win[2],
+          vert1->win[0], vert1->win[1], vert1->win[2]);
+   printf(" (%d, %d, %d) -> (%d, %d, %d)\n",
+          vert0->color[0], vert0->color[1], vert0->color[2], 
+          vert1->color[0], vert1->color[1], vert1->color[2]);
+   printf(" (%d, %d, %d) -> (%d, %d, %d)\n",
+          vert0->specular[0], vert0->specular[1], vert0->specular[2], 
+          vert1->specular[0], vert1->specular[1], vert1->specular[2]);
+   */
+
+/*
+ * Despite being clipped to the view volume, the line's window coordinates
+ * may just lie outside the window bounds.  That is, if the legal window
+ * coordinates are [0,W-1][0,H-1], it's possible for x==W and/or y==H.
+ * This quick and dirty code nudges the endpoints inside the window if
+ * necessary.
+ */
+#ifdef CLIP_HACK
+   {
+      GLint w = ctx->DrawBuffer->Width;
+      GLint h = ctx->DrawBuffer->Height;
+      if ((x0==w) | (x1==w)) {
+         if ((x0==w) & (x1==w))
+           return;
+         x0 -= x0==w;
+         x1 -= x1==w;
+      }
+      if ((y0==h) | (y1==h)) {
+         if ((y0==h) & (y1==h))
+           return;
+         y0 -= y0==h;
+         y1 -= y1==h;
+      }
+   }
+#endif
+
+   dx = x1 - x0;
+   dy = y1 - y0;
+   if (dx == 0 && dy == 0)
+      return;
+
+#ifdef DEPTH_TYPE
+   zPtr = (DEPTH_TYPE *) zrb->GetPointer(ctx, zrb, x0, y0);
+#endif
+#ifdef PIXEL_ADDRESS
+   pixelPtr = (PIXEL_TYPE *) PIXEL_ADDRESS(x0,y0);
+#endif
+
+   if (dx<0) {
+      dx = -dx;   /* make positive */
+      xstep = -1;
+#ifdef DEPTH_TYPE
+      zPtrXstep = -((GLint)sizeof(DEPTH_TYPE));
+#endif
+#ifdef PIXEL_ADDRESS
+      pixelXstep = -((GLint)sizeof(PIXEL_TYPE));
+#endif
+   }
+   else {
+      xstep = 1;
+#ifdef DEPTH_TYPE
+      zPtrXstep = ((GLint)sizeof(DEPTH_TYPE));
+#endif
+#ifdef PIXEL_ADDRESS
+      pixelXstep = ((GLint)sizeof(PIXEL_TYPE));
+#endif
+   }
+
+   if (dy<0) {
+      dy = -dy;   /* make positive */
+      ystep = -1;
+#ifdef DEPTH_TYPE
+      zPtrYstep = -((GLint) (ctx->DrawBuffer->Width * sizeof(DEPTH_TYPE)));
+#endif
+#ifdef PIXEL_ADDRESS
+      pixelYstep = BYTES_PER_ROW;
+#endif
+   }
+   else {
+      ystep = 1;
+#ifdef DEPTH_TYPE
+      zPtrYstep = (GLint) (ctx->DrawBuffer->Width * sizeof(DEPTH_TYPE));
+#endif
+#ifdef PIXEL_ADDRESS
+      pixelYstep = -(BYTES_PER_ROW);
+#endif
+   }
+
+   ASSERT(dx >= 0);
+   ASSERT(dy >= 0);
+
+   numPixels = MAX2(dx, dy);
+
+   /*
+    * Span setup: compute start and step values for all interpolated values.
+    */
+#ifdef INTERP_RGBA
+   interpFlags |= SPAN_RGBA;
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      span.red   = ChanToFixed(vert0->color[0]);
+      span.green = ChanToFixed(vert0->color[1]);
+      span.blue  = ChanToFixed(vert0->color[2]);
+      span.alpha = ChanToFixed(vert0->color[3]);
+      span.redStep   = (ChanToFixed(vert1->color[0]) - span.red  ) / numPixels;
+      span.greenStep = (ChanToFixed(vert1->color[1]) - span.green) / numPixels;
+      span.blueStep  = (ChanToFixed(vert1->color[2]) - span.blue ) / numPixels;
+      span.alphaStep = (ChanToFixed(vert1->color[3]) - span.alpha) / numPixels;
+   }
+   else {
+      span.red   = ChanToFixed(vert1->color[0]);
+      span.green = ChanToFixed(vert1->color[1]);
+      span.blue  = ChanToFixed(vert1->color[2]);
+      span.alpha = ChanToFixed(vert1->color[3]);
+      span.redStep   = 0;
+      span.greenStep = 0;
+      span.blueStep  = 0;
+      span.alphaStep = 0;
+   }
+#endif
+#ifdef INTERP_SPEC
+   interpFlags |= SPAN_SPEC;
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      span.specRed       = ChanToFixed(vert0->specular[0]);
+      span.specGreen     = ChanToFixed(vert0->specular[1]);
+      span.specBlue      = ChanToFixed(vert0->specular[2]);
+      span.specRedStep   = (ChanToFixed(vert1->specular[0]) - span.specRed) / numPixels;
+      span.specGreenStep = (ChanToFixed(vert1->specular[1]) - span.specBlue) / numPixels;
+      span.specBlueStep  = (ChanToFixed(vert1->specular[2]) - span.specGreen) / numPixels;
+   }
+   else {
+      span.specRed       = ChanToFixed(vert1->specular[0]);
+      span.specGreen     = ChanToFixed(vert1->specular[1]);
+      span.specBlue      = ChanToFixed(vert1->specular[2]);
+      span.specRedStep   = 0;
+      span.specGreenStep = 0;
+      span.specBlueStep  = 0;
+   }
+#endif
+#ifdef INTERP_INDEX
+   interpFlags |= SPAN_INDEX;
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
+      span.index = FloatToFixed(vert0->index);
+      span.indexStep = FloatToFixed(vert1->index - vert0->index) / numPixels;
+   }
+   else {
+      span.index = FloatToFixed(vert1->index);
+      span.indexStep = 0;
+   }
+#endif
+#if defined(INTERP_Z) || defined(DEPTH_TYPE)
+   interpFlags |= SPAN_Z;
+   {
+      if (depthBits <= 16) {
+         span.z = FloatToFixed(vert0->win[2]) + FIXED_HALF;
+         span.zStep = FloatToFixed(vert1->win[2] - vert0->win[2]) / numPixels;
+      }
+      else {
+         /* don't use fixed point */
+         span.z = (GLint) vert0->win[2];
+         span.zStep = (GLint) ((vert1->win[2] - vert0->win[2]) / numPixels);
+      }
+   }
+#endif
+#ifdef INTERP_FOG
+   interpFlags |= SPAN_FOG;
+   span.fog = vert0->fog;
+   span.fogStep = (vert1->fog - vert0->fog) / numPixels;
+#endif
+#ifdef INTERP_TEX
+   interpFlags |= SPAN_TEXTURE;
+   {
+      const GLfloat invw0 = vert0->win[3];
+      const GLfloat invw1 = vert1->win[3];
+      const GLfloat invLen = 1.0F / numPixels;
+      GLfloat ds, dt, dr, dq;
+      span.tex[0][0] = invw0 * vert0->texcoord[0][0];
+      span.tex[0][1] = invw0 * vert0->texcoord[0][1];
+      span.tex[0][2] = invw0 * vert0->texcoord[0][2];
+      span.tex[0][3] = invw0 * vert0->texcoord[0][3];
+      ds = (invw1 * vert1->texcoord[0][0]) - span.tex[0][0];
+      dt = (invw1 * vert1->texcoord[0][1]) - span.tex[0][1];
+      dr = (invw1 * vert1->texcoord[0][2]) - span.tex[0][2];
+      dq = (invw1 * vert1->texcoord[0][3]) - span.tex[0][3];
+      span.texStepX[0][0] = ds * invLen;
+      span.texStepX[0][1] = dt * invLen;
+      span.texStepX[0][2] = dr * invLen;
+      span.texStepX[0][3] = dq * invLen;
+      span.texStepY[0][0] = 0.0F;
+      span.texStepY[0][1] = 0.0F;
+      span.texStepY[0][2] = 0.0F;
+      span.texStepY[0][3] = 0.0F;
+   }
+#endif
+#ifdef INTERP_MULTITEX
+   interpFlags |= SPAN_TEXTURE;
+   {
+      const GLfloat invLen = 1.0F / numPixels;
+      GLuint u;
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         if (ctx->Texture.Unit[u]._ReallyEnabled) {
+            const GLfloat invw0 = vert0->win[3];
+            const GLfloat invw1 = vert1->win[3];
+            GLfloat ds, dt, dr, dq;
+            span.tex[u][0] = invw0 * vert0->texcoord[u][0];
+            span.tex[u][1] = invw0 * vert0->texcoord[u][1];
+            span.tex[u][2] = invw0 * vert0->texcoord[u][2];
+            span.tex[u][3] = invw0 * vert0->texcoord[u][3];
+            ds = (invw1 * vert1->texcoord[u][0]) - span.tex[u][0];
+            dt = (invw1 * vert1->texcoord[u][1]) - span.tex[u][1];
+            dr = (invw1 * vert1->texcoord[u][2]) - span.tex[u][2];
+            dq = (invw1 * vert1->texcoord[u][3]) - span.tex[u][3];
+            span.texStepX[u][0] = ds * invLen;
+            span.texStepX[u][1] = dt * invLen;
+            span.texStepX[u][2] = dr * invLen;
+            span.texStepX[u][3] = dq * invLen;
+            span.texStepY[u][0] = 0.0F;
+            span.texStepY[u][1] = 0.0F;
+            span.texStepY[u][2] = 0.0F;
+            span.texStepY[u][3] = 0.0F;
+	 }
+      }
+   }
+#endif
+
+   INIT_SPAN(span, GL_LINE, numPixels, interpFlags, SPAN_XY);
+
+   /* Need these for fragment prog texcoord interpolation */
+   span.w = 1.0F;
+   span.dwdx = 0.0F;
+   span.dwdy = 0.0F;
+
+   /*
+    * Draw
+    */
+
+   if (dx > dy) {
+      /*** X-major line ***/
+      GLint i;
+      GLint errorInc = dy+dy;
+      GLint error = errorInc-dx;
+      GLint errorDec = error-dx;
+
+      for (i = 0; i < dx; i++) {
+#ifdef DEPTH_TYPE
+         GLuint Z = FixedToDepth(span.z);
+#endif
+#ifdef PLOT
+         PLOT( x0, y0 );
+#else
+         span.array->x[i] = x0;
+         span.array->y[i] = y0;
+#endif
+         x0 += xstep;
+#ifdef DEPTH_TYPE
+         zPtr = (DEPTH_TYPE *) ((GLubyte*) zPtr + zPtrXstep);
+         span.z += span.zStep;
+#endif
+#ifdef PIXEL_ADDRESS
+         pixelPtr = (PIXEL_TYPE*) ((GLubyte*) pixelPtr + pixelXstep);
+#endif
+         if (error<0) {
+            error += errorInc;
+         }
+         else {
+            error += errorDec;
+            y0 += ystep;
+#ifdef DEPTH_TYPE
+            zPtr = (DEPTH_TYPE *) ((GLubyte*) zPtr + zPtrYstep);
+#endif
+#ifdef PIXEL_ADDRESS
+            pixelPtr = (PIXEL_TYPE*) ((GLubyte*) pixelPtr + pixelYstep);
+#endif
+         }
+      }
+   }
+   else {
+      /*** Y-major line ***/
+      GLint i;
+      GLint errorInc = dx+dx;
+      GLint error = errorInc-dy;
+      GLint errorDec = error-dy;
+
+      for (i=0;i<dy;i++) {
+#ifdef DEPTH_TYPE
+         GLuint Z = FixedToDepth(span.z);
+#endif
+#ifdef PLOT
+         PLOT( x0, y0 );
+#else
+         span.array->x[i] = x0;
+         span.array->y[i] = y0;
+#endif
+         y0 += ystep;
+#ifdef DEPTH_TYPE
+         zPtr = (DEPTH_TYPE *) ((GLubyte*) zPtr + zPtrYstep);
+         span.z += span.zStep;
+#endif
+#ifdef PIXEL_ADDRESS
+         pixelPtr = (PIXEL_TYPE*) ((GLubyte*) pixelPtr + pixelYstep);
+#endif
+         if (error<0) {
+            error += errorInc;
+         }
+         else {
+            error += errorDec;
+            x0 += xstep;
+#ifdef DEPTH_TYPE
+            zPtr = (DEPTH_TYPE *) ((GLubyte*) zPtr + zPtrXstep);
+#endif
+#ifdef PIXEL_ADDRESS
+            pixelPtr = (PIXEL_TYPE*) ((GLubyte*) pixelPtr + pixelXstep);
+#endif
+         }
+      }
+   }
+
+#ifdef RENDER_SPAN
+   RENDER_SPAN( span );
+#endif
+
+   (void)span;
+
+}
+
+
+#undef NAME
+#undef INTERP_Z
+#undef INTERP_FOG
+#undef INTERP_RGBA
+#undef INTERP_SPEC
+#undef INTERP_TEX
+#undef INTERP_MULTITEX
+#undef INTERP_INDEX
+#undef PIXEL_ADDRESS
+#undef PIXEL_TYPE
+#undef DEPTH_TYPE
+#undef BYTES_PER_ROW
+#undef SETUP_CODE
+#undef PLOT
+#undef CLIP_HACK
+#undef FixedToDepth
+#undef RENDER_SPAN
diff --git a/dist/Mesa/src/mesa/swrast/s_logic.c b/dist/Mesa/src/mesa/swrast/s_logic.c
new file mode 100644
index 000000000..60458c2b8
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_logic.c
@@ -0,0 +1,244 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "imports.h"
+#include "macros.h"
+
+#include "s_context.h"
+#include "s_logic.h"
+#include "s_span.h"
+
+
+#define LOGIC_OP_LOOP(MODE)			\
+do {						\
+   GLuint i;					\
+   switch (MODE) {				\
+      case GL_CLEAR:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = 0;			\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_SET:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = ~0;			\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_COPY:				\
+	 /* do nothing */			\
+	 break;					\
+      case GL_COPY_INVERTED:			\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = ~src[i];		\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_NOOP:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = dest[i];		\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_INVERT:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = ~dest[i];		\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_AND:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] &= dest[i];		\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_NAND:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = ~(src[i] & dest[i]);	\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_OR:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] |= dest[i];		\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_NOR:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = ~(src[i] | dest[i]);	\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_XOR:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] ^= dest[i];		\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_EQUIV:				\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = ~(src[i] ^ dest[i]);	\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_AND_REVERSE:			\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = src[i] & ~dest[i];	\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_AND_INVERTED:			\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = ~src[i] & dest[i];	\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_OR_REVERSE:			\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = src[i] | ~dest[i];	\
+	    }					\
+	 }					\
+	 break;					\
+      case GL_OR_INVERTED:			\
+         for (i = 0; i < n; i++) {		\
+	    if (mask[i]) {			\
+	       src[i] = ~src[i] | dest[i];	\
+	    }					\
+	 }					\
+	 break;					\
+      default:					\
+	 _mesa_problem(ctx, "bad logicop mode");\
+   }						\
+} while (0)
+
+
+
+static void
+logicop_ubyte(GLcontext *ctx, GLuint n, GLubyte src[], const GLubyte dest[],
+              const GLubyte mask[])
+{
+   LOGIC_OP_LOOP(ctx->Color.LogicOp);
+}
+
+
+static void
+logicop_ushort(GLcontext *ctx, GLuint n, GLushort src[], const GLushort dest[],
+               const GLubyte mask[])
+{
+   LOGIC_OP_LOOP(ctx->Color.LogicOp);
+}
+
+
+static void
+logicop_uint(GLcontext *ctx, GLuint n, GLuint src[], const GLuint dest[],
+             const GLubyte mask[])
+{
+   LOGIC_OP_LOOP(ctx->Color.LogicOp);
+}
+
+
+
+/*
+ * Apply the current logic operator to a span of CI pixels.  This is only
+ * used if the device driver can't do logic ops.
+ */
+void
+_swrast_logicop_ci_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                        const struct sw_span *span, GLuint index[])
+{
+   GLuint dest[MAX_WIDTH];
+
+   ASSERT(span->end < MAX_WIDTH);
+   ASSERT(rb->DataType == GL_UNSIGNED_INT);
+
+   /* Read dest values from frame buffer */
+   if (span->arrayMask & SPAN_XY) {
+      _swrast_get_values(ctx, rb, span->end, span->array->x, span->array->y,
+                         dest, sizeof(GLuint));
+   }
+   else {
+      rb->GetRow(ctx, rb, span->end, span->x, span->y, dest);
+   }
+
+   logicop_uint(ctx, span->end, index, dest, span->array->mask);
+}
+
+
+/**
+ * Apply the current logic operator to a span of RGBA pixels.
+ * We can handle horizontal runs of pixels (spans) or arrays of x/y
+ * pixel coordinates.
+ */
+void
+_swrast_logicop_rgba_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                          const struct sw_span *span, GLchan rgba[][4])
+{
+   GLchan dest[MAX_WIDTH][4];
+
+   ASSERT(span->end < MAX_WIDTH);
+   ASSERT(span->arrayMask & SPAN_RGBA);
+   ASSERT(rb->DataType == CHAN_TYPE);
+
+   if (span->arrayMask & SPAN_XY) {
+      _swrast_get_values(ctx, rb, span->end, span->array->x, span->array->y,
+                         dest, 4 * sizeof(GLchan));
+   }
+   else {
+      _swrast_read_rgba_span(ctx, rb, span->end, span->x, span->y, dest);
+   }
+
+   /* XXX make this a runtime test */
+#if CHAN_TYPE == GL_UNSIGNED_BYTE
+   /* treat 4*GLubyte as GLuint */
+   logicop_uint(ctx, span->end, (GLuint *) rgba,
+                (const GLuint *) dest, span->array->mask);
+#elif CHAN_TYPE == GL_UNSIGNED_SHORT
+   logicop_ushort(ctx, 4 * span->end, (GLushort *) rgba,
+                  (const GLushort *) dest, span->array->mask);
+#elif CHAN_TYPE == GL_FLOAT
+   logicop_uint(ctx, 4 * span->end, (GLuint *) rgba,
+                (const GLuint *) dest, span->array->mask);
+#endif
+   (void) logicop_ubyte;
+   (void) logicop_ushort;
+   (void) logicop_uint;
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_logic.h b/dist/Mesa/src/mesa/swrast/s_logic.h
new file mode 100644
index 000000000..ce5f183ff
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_logic.h
@@ -0,0 +1,44 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_LOGIC_H
+#define S_LOGIC_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern void
+_swrast_logicop_ci_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                        const struct sw_span *span, GLuint index[]);
+
+
+extern void
+_swrast_logicop_rgba_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                          const struct sw_span *span, GLchan rgba[][4]);
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_masking.c b/dist/Mesa/src/mesa/swrast/s_masking.c
new file mode 100644
index 000000000..65c4e7d01
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_masking.c
@@ -0,0 +1,172 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Implement the effect of glColorMask and glIndexMask in software.
+ */
+
+
+#include "glheader.h"
+#include "enums.h"
+#include "macros.h"
+
+#include "s_context.h"
+#include "s_masking.h"
+#include "s_span.h"
+
+
+
+void
+_swrast_mask_rgba_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                       const struct sw_span *span, GLchan rgba[][4])
+{
+   GLchan dest[MAX_WIDTH][4];
+#if CHAN_BITS == 8
+   GLuint srcMask = *((GLuint*)ctx->Color.ColorMask);
+   GLuint dstMask = ~srcMask;
+   GLuint *rgba32 = (GLuint *) rgba;
+   GLuint *dest32 = (GLuint *) dest;
+#else
+   const GLboolean rMask = ctx->Color.ColorMask[RCOMP];
+   const GLboolean gMask = ctx->Color.ColorMask[GCOMP];
+   const GLboolean bMask = ctx->Color.ColorMask[BCOMP];
+   const GLboolean aMask = ctx->Color.ColorMask[ACOMP];
+#endif
+   const GLuint n = span->end;
+   GLuint i;
+
+   ASSERT(n < MAX_WIDTH);
+   ASSERT(span->arrayMask & SPAN_RGBA);
+
+   if (span->arrayMask & SPAN_XY) {
+      _swrast_get_values(ctx, rb, n, span->array->x, span->array->y,
+                         dest, 4 * sizeof(GLchan));
+   }
+   else {
+      _swrast_read_rgba_span(ctx, rb, n, span->x, span->y, dest);
+   }
+
+#if CHAN_BITS == 8
+   for (i = 0; i < n; i++) {
+      rgba32[i] = (rgba32[i] & srcMask) | (dest32[i] & dstMask);
+   }
+#else
+   for (i = 0; i < n; i++) {
+      if (!rMask)  rgba[i][RCOMP] = dest[i][RCOMP];
+      if (!gMask)  rgba[i][GCOMP] = dest[i][GCOMP];
+      if (!bMask)  rgba[i][BCOMP] = dest[i][BCOMP];
+      if (!aMask)  rgba[i][ACOMP] = dest[i][ACOMP];
+   }
+#endif
+}
+
+
+/*
+ * Apply glColorMask to a span of RGBA pixels.
+ */
+void
+_swrast_mask_rgba_array(GLcontext *ctx, struct gl_renderbuffer *rb,
+                        GLuint n, GLint x, GLint y, GLchan rgba[][4])
+{
+   GLchan dest[MAX_WIDTH][4];
+   GLuint i;
+
+#if CHAN_BITS == 8
+
+   GLuint srcMask = *((GLuint*)ctx->Color.ColorMask);
+   GLuint dstMask = ~srcMask;
+   GLuint *rgba32 = (GLuint *) rgba;
+   GLuint *dest32 = (GLuint *) dest;
+
+   _swrast_read_rgba_span( ctx, rb, n, x, y, dest );
+   for (i = 0; i < n; i++) {
+      rgba32[i] = (rgba32[i] & srcMask) | (dest32[i] & dstMask);
+   }
+
+#else
+
+   const GLint rMask = ctx->Color.ColorMask[RCOMP];
+   const GLint gMask = ctx->Color.ColorMask[GCOMP];
+   const GLint bMask = ctx->Color.ColorMask[BCOMP];
+   const GLint aMask = ctx->Color.ColorMask[ACOMP];
+
+   _swrast_read_rgba_span( ctx, rb, n, x, y, dest );
+   for (i = 0; i < n; i++) {
+      if (!rMask)  rgba[i][RCOMP] = dest[i][RCOMP];
+      if (!gMask)  rgba[i][GCOMP] = dest[i][GCOMP];
+      if (!bMask)  rgba[i][BCOMP] = dest[i][BCOMP];
+      if (!aMask)  rgba[i][ACOMP] = dest[i][ACOMP];
+   }
+
+#endif
+}
+
+
+
+void
+_swrast_mask_ci_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                     const struct sw_span *span, GLuint index[])
+{
+   const GLuint srcMask = ctx->Color.IndexMask;
+   const GLuint dstMask = ~srcMask;
+   GLuint dest[MAX_WIDTH];
+   GLuint i;
+
+   ASSERT(span->arrayMask & SPAN_INDEX);
+   ASSERT(span->end <= MAX_WIDTH);
+   ASSERT(rb->DataType == GL_UNSIGNED_INT);
+
+   if (span->arrayMask & SPAN_XY) {
+      _swrast_get_values(ctx, rb, span->end, span->array->x, span->array->y,
+                         dest, sizeof(GLuint));
+   }
+   else {
+      _swrast_read_index_span(ctx, rb, span->end, span->x, span->y, dest);
+   }
+
+   for (i = 0; i < span->end; i++) {
+      index[i] = (index[i] & srcMask) | (dest[i] & dstMask);
+   }
+}
+
+
+/*
+ * Apply glIndexMask to an array of CI pixels.
+ */
+void
+_swrast_mask_ci_array(GLcontext *ctx, struct gl_renderbuffer *rb,
+                      GLuint n, GLint x, GLint y, GLuint index[])
+{
+   const GLuint srcMask = ctx->Color.IndexMask;
+   const GLuint dstMask = ~srcMask;
+   GLuint dest[MAX_WIDTH];
+   GLuint i;
+
+   _swrast_read_index_span(ctx, rb, n, x, y, dest);
+
+   for (i=0;i<n;i++) {
+      index[i] = (index[i] & srcMask) | (dest[i] & dstMask);
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_masking.h b/dist/Mesa/src/mesa/swrast/s_masking.h
new file mode 100644
index 000000000..e2265448f
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_masking.h
@@ -0,0 +1,58 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_MASKING_H
+#define S_MASKING_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+/*
+ * Implement glColorMask for a span of RGBA pixels.
+ */
+extern void
+_swrast_mask_rgba_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                       const struct sw_span *span, GLchan rgba[][4]);
+
+
+extern void
+_swrast_mask_rgba_array(GLcontext *ctx, struct gl_renderbuffer *rb,
+                        GLuint n, GLint x, GLint y, GLchan rgba[][4]);
+
+
+/*
+ * Implement glIndexMask for a span of CI pixels.
+ */
+extern void
+_swrast_mask_ci_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                     const struct sw_span *span, GLuint index[]);
+
+extern void
+_swrast_mask_ci_array(GLcontext *ctx, struct gl_renderbuffer *rb,
+                      GLuint n, GLint x, GLint y, GLuint index[]);
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_nvfragprog.c b/dist/Mesa/src/mesa/swrast/s_nvfragprog.c
new file mode 100644
index 000000000..97dbed04a
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_nvfragprog.c
@@ -0,0 +1,1511 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Regarding GL_NV_fragment_program:
+ *
+ * Portions of this software may use or implement intellectual
+ * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
+ * any and all warranties with respect to such intellectual property,
+ * including any use thereof or modifications thereto.
+ */
+
+#include "glheader.h"
+#include "colormac.h"
+#include "context.h"
+#include "program_instruction.h"
+#include "program.h"
+
+#include "s_nvfragprog.h"
+#include "s_span.h"
+
+
+/* if 1, print some debugging info */
+#define DEBUG_FRAG 0
+
+/**
+ * Fetch a texel.
+ */
+static void
+fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
+             GLuint unit, GLfloat color[4] )
+{
+   GLchan rgba[4];
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   /* XXX use a float-valued TextureSample routine here!!! */
+   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
+                               1, (const GLfloat (*)[4]) texcoord,
+                               &lambda, &rgba);
+   color[0] = CHAN_TO_FLOAT(rgba[0]);
+   color[1] = CHAN_TO_FLOAT(rgba[1]);
+   color[2] = CHAN_TO_FLOAT(rgba[2]);
+   color[3] = CHAN_TO_FLOAT(rgba[3]);
+}
+
+
+/**
+ * Fetch a texel with the given partial derivatives to compute a level
+ * of detail in the mipmap.
+ */
+static void
+fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
+                   const GLfloat texdx[4], const GLfloat texdy[4],
+                   GLuint unit, GLfloat color[4] )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
+   const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
+   const GLfloat texW = (GLfloat) texImg->WidthScale;
+   const GLfloat texH = (GLfloat) texImg->HeightScale;
+   GLchan rgba[4];
+
+   GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
+                                         texdx[1], texdy[1], /* dt/dx, dt/dy */
+                                         texdx[3], texdy[2], /* dq/dx, dq/dy */
+                                         texW, texH,
+                                         texcoord[0], texcoord[1], texcoord[3],
+                                         1.0F / texcoord[3]);
+
+   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
+                               1, (const GLfloat (*)[4]) texcoord,
+                               &lambda, &rgba);
+   color[0] = CHAN_TO_FLOAT(rgba[0]);
+   color[1] = CHAN_TO_FLOAT(rgba[1]);
+   color[2] = CHAN_TO_FLOAT(rgba[2]);
+   color[3] = CHAN_TO_FLOAT(rgba[3]);
+}
+
+
+/**
+ * Return a pointer to the 4-element float vector specified by the given
+ * source register.
+ */
+static INLINE const GLfloat *
+get_register_pointer( GLcontext *ctx,
+                      const struct prog_src_register *source,
+                      const struct fp_machine *machine,
+                      const struct gl_fragment_program *program )
+{
+   const GLfloat *src;
+   switch (source->File) {
+      case PROGRAM_TEMPORARY:
+         ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
+         src = machine->Temporaries[source->Index];
+         break;
+      case PROGRAM_INPUT:
+         ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
+         src = machine->Inputs[source->Index];
+         break;
+      case PROGRAM_OUTPUT:
+         /* This is only for PRINT */
+         ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS);
+         src = machine->Outputs[source->Index];
+         break;
+      case PROGRAM_LOCAL_PARAM:
+         ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
+         src = program->Base.LocalParams[source->Index];
+         break;
+      case PROGRAM_ENV_PARAM:
+         ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
+         src = ctx->FragmentProgram.Parameters[source->Index];
+         break;
+      case PROGRAM_STATE_VAR:
+         /* Fallthrough */
+      case PROGRAM_NAMED_PARAM:
+         ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
+         src = program->Base.Parameters->ParameterValues[source->Index];
+         break;
+      default:
+         _mesa_problem(ctx, "Invalid input register file %d in fetch_vector4", source->File);
+         src = NULL;
+   }
+   return src;
+}
+
+
+/**
+ * Fetch a 4-element float vector from the given source register.
+ * Apply swizzling and negating as needed.
+ */
+static void
+fetch_vector4( GLcontext *ctx,
+               const struct prog_src_register *source,
+               const struct fp_machine *machine,
+               const struct gl_fragment_program *program,
+               GLfloat result[4] )
+{
+   const GLfloat *src = get_register_pointer(ctx, source, machine, program);
+   ASSERT(src);
+
+   result[0] = src[GET_SWZ(source->Swizzle, 0)];
+   result[1] = src[GET_SWZ(source->Swizzle, 1)];
+   result[2] = src[GET_SWZ(source->Swizzle, 2)];
+   result[3] = src[GET_SWZ(source->Swizzle, 3)];
+
+   if (source->NegateBase) {
+      result[0] = -result[0];
+      result[1] = -result[1];
+      result[2] = -result[2];
+      result[3] = -result[3];
+   }
+   if (source->Abs) {
+      result[0] = FABSF(result[0]);
+      result[1] = FABSF(result[1]);
+      result[2] = FABSF(result[2]);
+      result[3] = FABSF(result[3]);
+   }
+   if (source->NegateAbs) {
+      result[0] = -result[0];
+      result[1] = -result[1];
+      result[2] = -result[2];
+      result[3] = -result[3];
+   }
+}
+
+
+/**
+ * Fetch the derivative with respect to X for the given register.
+ * \return GL_TRUE if it was easily computed or GL_FALSE if we
+ * need to execute another instance of the program (ugh)!
+ */
+static GLboolean
+fetch_vector4_deriv( GLcontext *ctx,
+                     const struct prog_src_register *source,
+                     const struct sw_span *span,
+                     char xOrY, GLint column, GLfloat result[4] )
+{
+   GLfloat src[4];
+
+   ASSERT(xOrY == 'X' || xOrY == 'Y');
+
+   switch (source->Index) {
+   case FRAG_ATTRIB_WPOS:
+      if (xOrY == 'X') {
+         src[0] = 1.0;
+         src[1] = 0.0;
+         src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
+         src[3] = span->dwdx;
+      }
+      else {
+         src[0] = 0.0;
+         src[1] = 1.0;
+         src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
+         src[3] = span->dwdy;
+      }
+      break;
+   case FRAG_ATTRIB_COL0:
+      if (xOrY == 'X') {
+         src[0] = span->drdx * (1.0F / CHAN_MAXF);
+         src[1] = span->dgdx * (1.0F / CHAN_MAXF);
+         src[2] = span->dbdx * (1.0F / CHAN_MAXF);
+         src[3] = span->dadx * (1.0F / CHAN_MAXF);
+      }
+      else {
+         src[0] = span->drdy * (1.0F / CHAN_MAXF);
+         src[1] = span->dgdy * (1.0F / CHAN_MAXF);
+         src[2] = span->dbdy * (1.0F / CHAN_MAXF);
+         src[3] = span->dady * (1.0F / CHAN_MAXF);
+      }
+      break;
+   case FRAG_ATTRIB_COL1:
+      if (xOrY == 'X') {
+         src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
+         src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
+         src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
+         src[3] = 0.0; /* XXX need this */
+      }
+      else {
+         src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
+         src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
+         src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
+         src[3] = 0.0; /* XXX need this */
+      }
+      break;
+   case FRAG_ATTRIB_FOGC:
+      if (xOrY == 'X') {
+         src[0] = span->dfogdx;
+         src[1] = 0.0;
+         src[2] = 0.0;
+         src[3] = 0.0;
+      }
+      else {
+         src[0] = span->dfogdy;
+         src[1] = 0.0;
+         src[2] = 0.0;
+         src[3] = 0.0;
+      }
+      break;
+   case FRAG_ATTRIB_TEX0:
+   case FRAG_ATTRIB_TEX1:
+   case FRAG_ATTRIB_TEX2:
+   case FRAG_ATTRIB_TEX3:
+   case FRAG_ATTRIB_TEX4:
+   case FRAG_ATTRIB_TEX5:
+   case FRAG_ATTRIB_TEX6:
+   case FRAG_ATTRIB_TEX7:
+      if (xOrY == 'X') {
+         const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
+         /* this is a little tricky - I think I've got it right */
+         const GLfloat invQ = 1.0f / (span->tex[u][3]
+                                      + span->texStepX[u][3] * column);
+         src[0] = span->texStepX[u][0] * invQ;
+         src[1] = span->texStepX[u][1] * invQ;
+         src[2] = span->texStepX[u][2] * invQ;
+         src[3] = span->texStepX[u][3] * invQ;
+      }
+      else {
+         const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
+         /* Tricky, as above, but in Y direction */
+         const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
+         src[0] = span->texStepY[u][0] * invQ;
+         src[1] = span->texStepY[u][1] * invQ;
+         src[2] = span->texStepY[u][2] * invQ;
+         src[3] = span->texStepY[u][3] * invQ;
+      }
+      break;
+   default:
+      return GL_FALSE;
+   }
+
+   result[0] = src[GET_SWZ(source->Swizzle, 0)];
+   result[1] = src[GET_SWZ(source->Swizzle, 1)];
+   result[2] = src[GET_SWZ(source->Swizzle, 2)];
+   result[3] = src[GET_SWZ(source->Swizzle, 3)];
+
+   if (source->NegateBase) {
+      result[0] = -result[0];
+      result[1] = -result[1];
+      result[2] = -result[2];
+      result[3] = -result[3];
+   }
+   if (source->Abs) {
+      result[0] = FABSF(result[0]);
+      result[1] = FABSF(result[1]);
+      result[2] = FABSF(result[2]);
+      result[3] = FABSF(result[3]);
+   }
+   if (source->NegateAbs) {
+      result[0] = -result[0];
+      result[1] = -result[1];
+      result[2] = -result[2];
+      result[3] = -result[3];
+   }
+   return GL_TRUE;
+}
+
+
+/**
+ * As above, but only return result[0] element.
+ */
+static void
+fetch_vector1( GLcontext *ctx,
+               const struct prog_src_register *source,
+               const struct fp_machine *machine,
+               const struct gl_fragment_program *program,
+               GLfloat result[4] )
+{
+   const GLfloat *src = get_register_pointer(ctx, source, machine, program);
+   ASSERT(src);
+
+   result[0] = src[GET_SWZ(source->Swizzle, 0)];
+
+   if (source->NegateBase) {
+      result[0] = -result[0];
+   }
+   if (source->Abs) {
+      result[0] = FABSF(result[0]);
+   }
+   if (source->NegateAbs) {
+      result[0] = -result[0];
+   }
+}
+
+
+/**
+ * Test value against zero and return GT, LT, EQ or UN if NaN.
+ */
+static INLINE GLuint
+generate_cc( float value )
+{
+   if (value != value)
+      return COND_UN;  /* NaN */
+   if (value > 0.0F)
+      return COND_GT;
+   if (value < 0.0F)
+      return COND_LT;
+   return COND_EQ;
+}
+
+
+/**
+ * Test if the ccMaskRule is satisfied by the given condition code.
+ * Used to mask destination writes according to the current condition codee.
+ */
+static INLINE GLboolean
+test_cc(GLuint condCode, GLuint ccMaskRule)
+{
+   switch (ccMaskRule) {
+   case COND_EQ: return (condCode == COND_EQ);
+   case COND_NE: return (condCode != COND_EQ);
+   case COND_LT: return (condCode == COND_LT);
+   case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
+   case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
+   case COND_GT: return (condCode == COND_GT);
+   case COND_TR: return GL_TRUE;
+   case COND_FL: return GL_FALSE;
+   default:      return GL_TRUE;
+   }
+}
+
+
+/**
+ * Store 4 floats into a register.  Observe the instructions saturate and
+ * set-condition-code flags.
+ */
+static void
+store_vector4( const struct prog_instruction *inst,
+               struct fp_machine *machine,
+               const GLfloat value[4] )
+{
+   const struct prog_dst_register *dest = &(inst->DstReg);
+   const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
+   const GLboolean updateCC = inst->CondUpdate;
+   GLfloat *dstReg;
+   GLfloat dummyReg[4];
+   GLfloat clampedValue[4];
+   GLboolean condWriteMask[4];
+   GLuint writeMask = dest->WriteMask;
+
+   switch (dest->File) {
+      case PROGRAM_OUTPUT:
+         dstReg = machine->Outputs[dest->Index];
+         break;
+      case PROGRAM_TEMPORARY:
+         dstReg = machine->Temporaries[dest->Index];
+         break;
+      case PROGRAM_WRITE_ONLY:
+         dstReg = dummyReg;
+         return;
+      default:
+         _mesa_problem(NULL, "bad register file in store_vector4(fp)");
+         return;
+   }
+
+#if DEBUG_FRAG
+   if (value[0] > 1.0e10 ||
+       IS_INF_OR_NAN(value[0]) ||
+       IS_INF_OR_NAN(value[1]) ||
+       IS_INF_OR_NAN(value[2]) ||
+       IS_INF_OR_NAN(value[3])  )
+      printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
+#endif
+
+   if (clamp) {
+      clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
+      clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
+      clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
+      clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
+      value = clampedValue;
+   }
+
+   if (dest->CondMask != COND_TR) {
+      condWriteMask[0] = GET_BIT(writeMask, 0)
+         && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)], dest->CondMask);
+      condWriteMask[1] = GET_BIT(writeMask, 1)
+         && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)], dest->CondMask);
+      condWriteMask[2] = GET_BIT(writeMask, 2)
+         && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)], dest->CondMask);
+      condWriteMask[3] = GET_BIT(writeMask, 3)
+         && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)], dest->CondMask);
+
+      writeMask = ((condWriteMask[0] << 0) |
+		   (condWriteMask[1] << 1) |
+		   (condWriteMask[2] << 2) |
+		   (condWriteMask[3] << 3));
+   }
+
+   if (GET_BIT(writeMask, 0)) {
+      dstReg[0] = value[0];
+      if (updateCC)
+         machine->CondCodes[0] = generate_cc(value[0]);
+   }
+   if (GET_BIT(writeMask, 1)) {
+      dstReg[1] = value[1];
+      if (updateCC)
+         machine->CondCodes[1] = generate_cc(value[1]);
+   }
+   if (GET_BIT(writeMask, 2)) {
+      dstReg[2] = value[2];
+      if (updateCC)
+         machine->CondCodes[2] = generate_cc(value[2]);
+   }
+   if (GET_BIT(writeMask, 3)) {
+      dstReg[3] = value[3];
+      if (updateCC)
+         machine->CondCodes[3] = generate_cc(value[3]);
+   }
+}
+
+
+/**
+ * Initialize a new machine state instance from an existing one, adding
+ * the partial derivatives onto the input registers.
+ * Used to implement DDX and DDY instructions in non-trivial cases.
+ */
+static void
+init_machine_deriv( GLcontext *ctx,
+                    const struct fp_machine *machine,
+                    const struct gl_fragment_program *program,
+                    const struct sw_span *span, char xOrY,
+                    struct fp_machine *dMachine )
+{
+   GLuint u;
+
+   ASSERT(xOrY == 'X' || xOrY == 'Y');
+
+   /* copy existing machine */
+   _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
+
+   if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
+      /* Clear temporary registers (undefined for ARB_f_p) */
+      _mesa_bzero( (void*) machine->Temporaries,
+                   MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
+   }
+
+   /* Add derivatives */
+   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
+      GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
+      if (xOrY == 'X') {
+         wpos[0] += 1.0F;
+         wpos[1] += 0.0F;
+         wpos[2] += span->dzdx;
+         wpos[3] += span->dwdx;
+      }
+      else {
+         wpos[0] += 0.0F;
+         wpos[1] += 1.0F;
+         wpos[2] += span->dzdy;
+         wpos[3] += span->dwdy;
+      }
+   }
+   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
+      GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
+      if (xOrY == 'X') {
+         col0[0] += span->drdx * (1.0F / CHAN_MAXF);
+         col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
+         col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
+         col0[3] += span->dadx * (1.0F / CHAN_MAXF);
+      }
+      else {
+         col0[0] += span->drdy * (1.0F / CHAN_MAXF);
+         col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
+         col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
+         col0[3] += span->dady * (1.0F / CHAN_MAXF);
+      }
+   }
+   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
+      GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
+      if (xOrY == 'X') {
+         col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
+         col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
+         col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
+         col1[3] += 0.0; /*XXX fix */
+      }
+      else {
+         col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
+         col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
+         col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
+         col1[3] += 0.0; /*XXX fix */
+      }
+   }
+   if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
+      GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
+      if (xOrY == 'X') {
+         fogc[0] += span->dfogdx;
+      }
+      else {
+         fogc[0] += span->dfogdy;
+      }
+   }
+   for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+      if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
+         GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
+         /* XXX perspective-correct interpolation */
+         if (xOrY == 'X') {
+            tex[0] += span->texStepX[u][0];
+            tex[1] += span->texStepX[u][1];
+            tex[2] += span->texStepX[u][2];
+            tex[3] += span->texStepX[u][3];
+         }
+         else {
+            tex[0] += span->texStepY[u][0];
+            tex[1] += span->texStepY[u][1];
+            tex[2] += span->texStepY[u][2];
+            tex[3] += span->texStepY[u][3];
+         }
+      }
+   }
+
+   /* init condition codes */
+   dMachine->CondCodes[0] = COND_EQ;
+   dMachine->CondCodes[1] = COND_EQ;
+   dMachine->CondCodes[2] = COND_EQ;
+   dMachine->CondCodes[3] = COND_EQ;
+}
+
+
+/**
+ * Execute the given vertex program.
+ * NOTE: we do everything in single-precision floating point; we don't
+ * currently observe the single/half/fixed-precision qualifiers.
+ * \param ctx - rendering context
+ * \param program - the fragment program to execute
+ * \param machine - machine state (register file)
+ * \param maxInst - max number of instructions to execute
+ * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
+ */
+static GLboolean
+execute_program( GLcontext *ctx,
+                 const struct gl_fragment_program *program, GLuint maxInst,
+                 struct fp_machine *machine, const struct sw_span *span,
+                 GLuint column )
+{
+   GLuint pc;
+
+#if DEBUG_FRAG
+   printf("execute fragment program --------------------\n");
+#endif
+
+   for (pc = 0; pc < maxInst; pc++) {
+      const struct prog_instruction *inst = program->Base.Instructions + pc;
+
+      if (ctx->FragmentProgram.CallbackEnabled &&
+          ctx->FragmentProgram.Callback) {
+         ctx->FragmentProgram.CurrentPosition = inst->StringPos;
+         ctx->FragmentProgram.Callback(program->Base.Target,
+                                       ctx->FragmentProgram.CallbackData);
+      }
+
+      switch (inst->Opcode) {
+         case OPCODE_ABS:
+            {
+               GLfloat a[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = FABSF(a[0]);
+               result[1] = FABSF(a[1]);
+               result[2] = FABSF(a[2]);
+               result[3] = FABSF(a[3]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_ADD:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = a[0] + b[0];
+               result[1] = a[1] + b[1];
+               result[2] = a[2] + b[2];
+               result[3] = a[3] + b[3];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_CMP:
+            {
+               GLfloat a[4], b[4], c[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
+               result[0] = a[0] < 0.0F ? b[0] : c[0];
+               result[1] = a[1] < 0.0F ? b[1] : c[1];
+               result[2] = a[2] < 0.0F ? b[2] : c[2];
+               result[3] = a[3] < 0.0F ? b[3] : c[3];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_COS:
+            {
+               GLfloat a[4], result[4];
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_DDX: /* Partial derivative with respect to X */
+            {
+               GLfloat a[4], aNext[4], result[4];
+               struct fp_machine dMachine;
+               if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
+                                        column, result)) {
+                  /* This is tricky.  Make a copy of the current machine state,
+                   * increment the input registers by the dx or dy partial
+                   * derivatives, then re-execute the program up to the
+                   * preceeding instruction, then fetch the source register.
+                   * Finally, find the difference in the register values for
+                   * the original and derivative runs.
+                   */
+                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
+                  init_machine_deriv(ctx, machine, program, span,
+                                     'X', &dMachine);
+                  execute_program(ctx, program, pc, &dMachine, span, column);
+                  fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
+                  result[0] = aNext[0] - a[0];
+                  result[1] = aNext[1] - a[1];
+                  result[2] = aNext[2] - a[2];
+                  result[3] = aNext[3] - a[3];
+               }
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_DDY: /* Partial derivative with respect to Y */
+            {
+               GLfloat a[4], aNext[4], result[4];
+               struct fp_machine dMachine;
+               if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
+                                        column, result)) {
+                  init_machine_deriv(ctx, machine, program, span,
+                                     'Y', &dMachine);
+                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
+                  execute_program(ctx, program, pc, &dMachine, span, column);
+                  fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
+                  result[0] = aNext[0] - a[0];
+                  result[1] = aNext[1] - a[1];
+                  result[2] = aNext[2] - a[2];
+                  result[3] = aNext[3] - a[3];
+               }
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_DP3:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = result[1] = result[2] = result[3] = 
+                  a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+               store_vector4( inst, machine, result );
+#if DEBUG_FRAG
+               printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
+                      result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
+#endif
+            }
+            break;
+         case OPCODE_DP4:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = result[1] = result[2] = result[3] = 
+                  a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
+               store_vector4( inst, machine, result );
+#if DEBUG_FRAG
+               printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
+                      result[0], a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+#endif
+            }
+            break;
+         case OPCODE_DPH:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = result[1] = result[2] = result[3] = 
+                  a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_DST: /* Distance vector */
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = 1.0F;
+               result[1] = a[1] * b[1];
+               result[2] = a[2];
+               result[3] = b[3];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_EX2: /* Exponential base 2 */
+            {
+               GLfloat a[4], result[4];
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = result[1] = result[2] = result[3] =
+                  (GLfloat) _mesa_pow(2.0, a[0]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_FLR:
+            {
+               GLfloat a[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = FLOORF(a[0]);
+               result[1] = FLOORF(a[1]);
+               result[2] = FLOORF(a[2]);
+               result[3] = FLOORF(a[3]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_FRC:
+            {
+               GLfloat a[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = a[0] - FLOORF(a[0]);
+               result[1] = a[1] - FLOORF(a[1]);
+               result[2] = a[2] - FLOORF(a[2]);
+               result[3] = a[3] - FLOORF(a[3]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_KIL_NV: /* NV_f_p only */
+            {
+               const GLuint swizzle = inst->DstReg.CondSwizzle;
+               const GLuint condMask = inst->DstReg.CondMask;
+               if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
+                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
+                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
+                   test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
+                  return GL_FALSE;
+               }
+            }
+            break;
+         case OPCODE_KIL: /* ARB_f_p only */
+            {
+               GLfloat a[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
+                  return GL_FALSE;
+               }
+            }
+            break;
+         case OPCODE_LG2:  /* log base 2 */
+            {
+               GLfloat a[4], result[4];
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = result[1] = result[2] = result[3]
+                  = LOG2(a[0]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_LIT:
+            {
+               const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
+               GLfloat a[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               a[0] = MAX2(a[0], 0.0F);
+               a[1] = MAX2(a[1], 0.0F);
+               /* XXX ARB version clamps a[3], NV version doesn't */
+               a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
+               result[0] = 1.0F;
+               result[1] = a[0];
+               /* XXX we could probably just use pow() here */
+               if (a[0] > 0.0F) {
+                  if (a[1] == 0.0 && a[3] == 0.0)
+                     result[2] = 1.0;
+                  else
+                     result[2] = EXPF(a[3] * LOGF(a[1]));
+               }
+               else {
+                  result[2] = 0.0;
+               }
+               result[3] = 1.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_LRP:
+            {
+               GLfloat a[4], b[4], c[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
+               result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
+               result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
+               result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
+               result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_MAD:
+            {
+               GLfloat a[4], b[4], c[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
+               result[0] = a[0] * b[0] + c[0];
+               result[1] = a[1] * b[1] + c[1];
+               result[2] = a[2] * b[2] + c[2];
+               result[3] = a[3] * b[3] + c[3];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_MAX:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = MAX2(a[0], b[0]);
+               result[1] = MAX2(a[1], b[1]);
+               result[2] = MAX2(a[2], b[2]);
+               result[3] = MAX2(a[3], b[3]);
+               store_vector4( inst, machine, result );
+#if DEBUG_FRAG
+               printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3], 
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+#endif
+            }
+            break;
+         case OPCODE_MIN:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = MIN2(a[0], b[0]);
+               result[1] = MIN2(a[1], b[1]);
+               result[2] = MIN2(a[2], b[2]);
+               result[3] = MIN2(a[3], b[3]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_MOV:
+            {
+               GLfloat result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
+               store_vector4( inst, machine, result );
+#if DEBUG_FRAG
+               printf("MOV (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3]);
+#endif
+            }
+            break;
+         case OPCODE_MUL:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = a[0] * b[0];
+               result[1] = a[1] * b[1];
+               result[2] = a[2] * b[2];
+               result[3] = a[3] * b[3];
+               store_vector4( inst, machine, result );
+#if DEBUG_FRAG
+               printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3], 
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+#endif
+            }
+            break;
+         case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
+            {
+               GLfloat a[4], result[4];
+               GLhalfNV hx, hy;
+               GLuint *rawResult = (GLuint *) result;
+               GLuint twoHalves;
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               hx = _mesa_float_to_half(a[0]);
+               hy = _mesa_float_to_half(a[1]);
+               twoHalves = hx | (hy << 16);
+               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
+                  = twoHalves;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
+            {
+               GLfloat a[4], result[4];
+               GLuint usx, usy, *rawResult = (GLuint *) result;
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               a[0] = CLAMP(a[0], 0.0F, 1.0F);
+               a[1] = CLAMP(a[1], 0.0F, 1.0F);
+               usx = IROUND(a[0] * 65535.0F);
+               usy = IROUND(a[1] * 65535.0F);
+               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
+                  = usx | (usy << 16);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
+            {
+               GLfloat a[4], result[4];
+               GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
+               a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
+               a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
+               a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
+               ubx = IROUND(127.0F * a[0] + 128.0F);
+               uby = IROUND(127.0F * a[1] + 128.0F);
+               ubz = IROUND(127.0F * a[2] + 128.0F);
+               ubw = IROUND(127.0F * a[3] + 128.0F);
+               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
+                  = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
+            {
+               GLfloat a[4], result[4];
+               GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               a[0] = CLAMP(a[0], 0.0F, 1.0F);
+               a[1] = CLAMP(a[1], 0.0F, 1.0F);
+               a[2] = CLAMP(a[2], 0.0F, 1.0F);
+               a[3] = CLAMP(a[3], 0.0F, 1.0F);
+               ubx = IROUND(255.0F * a[0]);
+               uby = IROUND(255.0F * a[1]);
+               ubz = IROUND(255.0F * a[2]);
+               ubw = IROUND(255.0F * a[3]);
+               rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
+                  = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_POW:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = result[1] = result[2] = result[3]
+                  = (GLfloat)_mesa_pow(a[0], b[0]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_RCP:
+            {
+               GLfloat a[4], result[4];
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+#if DEBUG_FRAG
+               if (a[0] == 0)
+                  printf("RCP(0)\n");
+               else if (IS_INF_OR_NAN(a[0]))
+                  printf("RCP(inf)\n");
+#endif
+               result[0] = result[1] = result[2] = result[3]
+                  = 1.0F / a[0];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_RFL:
+            {
+               GLfloat axis[4], dir[4], result[4], tmp[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
+               tmp[3] = axis[0] * axis[0]
+                      + axis[1] * axis[1]
+                      + axis[2] * axis[2];
+               tmp[0] = (2.0F * (axis[0] * dir[0] +
+                                 axis[1] * dir[1] +
+                                 axis[2] * dir[2])) / tmp[3];
+               result[0] = tmp[0] * axis[0] - dir[0];
+               result[1] = tmp[0] * axis[1] - dir[1];
+               result[2] = tmp[0] * axis[2] - dir[2];
+               /* result[3] is never written! XXX enforce in parser! */
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_RSQ: /* 1 / sqrt() */
+            {
+               GLfloat a[4], result[4];
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               a[0] = FABSF(a[0]);
+               result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
+               store_vector4( inst, machine, result );
+#if DEBUG_FRAG
+               printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
+#endif
+            }
+            break;
+         case OPCODE_SCS: /* sine and cos */
+            {
+               GLfloat a[4], result[4];
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = (GLfloat)_mesa_cos(a[0]);
+               result[1] = (GLfloat)_mesa_sin(a[0]);
+               result[2] = 0.0;  /* undefined! */
+               result[3] = 0.0;  /* undefined! */
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SEQ: /* set on equal */
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
+               result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
+               result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
+               result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SFL: /* set false, operands ignored */
+            {
+               static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SGE: /* set on greater or equal */
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
+               result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
+               result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
+               result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SGT: /* set on greater */
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
+               result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
+               result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
+               result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SIN:
+            {
+               GLfloat a[4], result[4];
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = result[1] = result[2] = 
+		       result[3] = (GLfloat)_mesa_sin(a[0]);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SLE: /* set on less or equal */
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
+               result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
+               result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
+               result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SLT: /* set on less */
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
+               result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
+               result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
+               result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SNE: /* set on not equal */
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
+               result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
+               result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
+               result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_STR: /* set true, operands ignored */
+            {
+               static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SUB:
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = a[0] - b[0];
+               result[1] = a[1] - b[1];
+               result[2] = a[2] - b[2];
+               result[3] = a[3] - b[3];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_SWZ:
+            {
+               const struct prog_src_register *source = &inst->SrcReg[0];
+               const GLfloat *src = get_register_pointer(ctx, source,
+                                                         machine, program);
+               GLfloat result[4];
+               GLuint i;
+
+               /* do extended swizzling here */
+               for (i = 0; i < 4; i++) {
+                  if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ZERO)
+                     result[i] = 0.0;
+                  else if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ONE)
+                     result[i] = 1.0;
+                  else
+                     result[i] = src[GET_SWZ(source->Swizzle, i)];
+
+                  if (source->NegateBase & (1 << i))
+                     result[i] = -result[i];
+               }
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_TEX: /* Both ARB and NV frag prog */
+            /* Texel lookup */
+            {
+               GLfloat texcoord[4], color[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
+               /* Note: we pass 0 for LOD.  The ARB extension requires it
+                * while the NV extension says it's implementation dependant.
+                */
+               /* KW: Previously lambda was passed as zero, but I
+		* believe this is incorrect, the spec seems to
+		* indicate rather that lambda should not be
+		* changed/biased, unlike TXB where texcoord[3] is
+		* added to the lambda calculations.  The lambda should
+		* still be calculated normally for TEX & TXP though,
+		* not set to zero.  Otherwise it's very difficult to
+		* implement normal GL semantics through the fragment
+		* shader.
+		*/
+               fetch_texel( ctx, texcoord, 
+			    span->array->lambda[inst->TexSrcUnit][column],
+			    inst->TexSrcUnit, color );
+#if DEBUG_FRAG
+               if (color[3])
+                  printf("color[3] = %f\n", color[3]);
+#endif
+               store_vector4( inst, machine, color );
+            }
+            break;
+         case OPCODE_TXB: /* GL_ARB_fragment_program only */
+            /* Texel lookup with LOD bias */
+            {
+               GLfloat texcoord[4], color[4], bias, lambda;
+
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
+               /* texcoord[3] is the bias to add to lambda */
+               bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
+                    + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
+                    + texcoord[3];
+               lambda = span->array->lambda[inst->TexSrcUnit][column] + bias;
+               fetch_texel( ctx, texcoord, lambda,
+                            inst->TexSrcUnit, color );
+               store_vector4( inst, machine, color );
+            }
+            break;
+         case OPCODE_TXD: /* GL_NV_fragment_program only */
+            /* Texture lookup w/ partial derivatives for LOD */
+            {
+               GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
+               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
+               fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
+                                  color );
+               store_vector4( inst, machine, color );
+            }
+            break;
+         case OPCODE_TXP: /* GL_ARB_fragment_program only */
+            /* Texture lookup w/ projective divide */
+            {
+               GLfloat texcoord[4], color[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
+	       /* Not so sure about this test - if texcoord[3] is
+		* zero, we'd probably be fine except for an ASSERT in
+		* IROUND_POS() which gets triggered by the inf values created.
+		*/
+	       if (texcoord[3] != 0.0) {
+		  texcoord[0] /= texcoord[3];
+		  texcoord[1] /= texcoord[3];
+		  texcoord[2] /= texcoord[3];
+	       }
+               /* KW: Previously lambda was passed as zero, but I
+		* believe this is incorrect, the spec seems to
+		* indicate rather that lambda should not be
+		* changed/biased, unlike TXB where texcoord[3] is
+		* added to the lambda calculations.  The lambda should
+		* still be calculated normally for TEX & TXP though,
+		* not set to zero.
+		*/
+               fetch_texel( ctx, texcoord, 
+			    span->array->lambda[inst->TexSrcUnit][column],
+			    inst->TexSrcUnit, color );
+               store_vector4( inst, machine, color );
+            }
+            break;
+         case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
+            /* Texture lookup w/ projective divide */
+            {
+               GLfloat texcoord[4], color[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
+               if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
+		   texcoord[3] != 0.0) {
+                  texcoord[0] /= texcoord[3];
+                  texcoord[1] /= texcoord[3];
+                  texcoord[2] /= texcoord[3];
+               }
+               fetch_texel( ctx, texcoord,
+                            span->array->lambda[inst->TexSrcUnit][column],
+                            inst->TexSrcUnit, color );
+               store_vector4( inst, machine, color );
+            }
+            break;
+         case OPCODE_UP2H: /* unpack two 16-bit floats */
+            {
+               GLfloat a[4], result[4];
+               const GLuint *rawBits = (const GLuint *) a;
+               GLhalfNV hx, hy;
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               hx = rawBits[0] & 0xffff;
+               hy = rawBits[0] >> 16;
+               result[0] = result[2] = _mesa_half_to_float(hx);
+               result[1] = result[3] = _mesa_half_to_float(hy);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_UP2US: /* unpack two GLushorts */
+            {
+               GLfloat a[4], result[4];
+               const GLuint *rawBits = (const GLuint *) a;
+               GLushort usx, usy;
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               usx = rawBits[0] & 0xffff;
+               usy = rawBits[0] >> 16;
+               result[0] = result[2] = usx * (1.0f / 65535.0f);
+               result[1] = result[3] = usy * (1.0f / 65535.0f);
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_UP4B: /* unpack four GLbytes */
+            {
+               GLfloat a[4], result[4];
+               const GLuint *rawBits = (const GLuint *) a;
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
+               result[1] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
+               result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
+               result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_UP4UB: /* unpack four GLubytes */
+            {
+               GLfloat a[4], result[4];
+               const GLuint *rawBits = (const GLuint *) a;
+               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
+               result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
+               result[1] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
+               result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
+               result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_XPD: /* cross product */
+            {
+               GLfloat a[4], b[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               result[0] = a[1] * b[2] - a[2] * b[1];
+               result[1] = a[2] * b[0] - a[0] * b[2];
+               result[2] = a[0] * b[1] - a[1] * b[0];
+               result[3] = 1.0;
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_X2D: /* 2-D matrix transform */
+            {
+               GLfloat a[4], b[4], c[4], result[4];
+               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
+               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
+               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
+               result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
+               result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
+               result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
+               result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
+               store_vector4( inst, machine, result );
+            }
+            break;
+         case OPCODE_PRINT:
+            {
+               if (inst->SrcReg[0].File != -1) {
+                  GLfloat a[4];
+                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
+                  _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
+                               a[0], a[1], a[2], a[3]);
+               }
+               else {
+                  _mesa_printf("%s\n", (const char *) inst->Data);
+               }
+            }
+            break;
+         case OPCODE_END:
+            return GL_TRUE;
+         default:
+            _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
+                          inst->Opcode);
+            return GL_TRUE; /* return value doesn't matter */
+      }
+   }
+   return GL_TRUE;
+}
+
+
+static void
+init_machine( GLcontext *ctx, struct fp_machine *machine,
+              const struct gl_fragment_program *program,
+              const struct sw_span *span, GLuint col )
+{
+   GLuint inputsRead = program->Base.InputsRead;
+   GLuint u;
+
+   if (ctx->FragmentProgram.CallbackEnabled)
+      inputsRead = ~0;
+
+   if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
+      /* Clear temporary registers (undefined for ARB_f_p) */
+      _mesa_bzero(machine->Temporaries,
+                  MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
+   }
+
+   /* Load input registers */
+   if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
+      GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
+      ASSERT(span->arrayMask & SPAN_Z);
+      if (span->arrayMask & SPAN_XY) {
+         wpos[0] = (GLfloat) span->array->x[col];
+         wpos[1] = (GLfloat) span->array->y[col];
+      }
+      else {
+         wpos[0] = (GLfloat) span->x + col;
+         wpos[1] = (GLfloat) span->y;
+      }
+      wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
+      wpos[3] = span->w + col * span->dwdx;
+   }
+   if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
+      GLfloat *col0 = machine->Inputs[FRAG_ATTRIB_COL0];
+      ASSERT(span->arrayMask & SPAN_RGBA);
+      col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
+      col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
+      col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
+      col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
+   }
+   if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
+      GLfloat *col1 = machine->Inputs[FRAG_ATTRIB_COL1];
+      col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
+      col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
+      col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
+      col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
+   }
+   if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
+      GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
+      ASSERT(span->arrayMask & SPAN_FOG);
+      fogc[0] = span->array->fog[col];
+      fogc[1] = 0.0F;
+      fogc[2] = 0.0F;
+      fogc[3] = 0.0F;
+   }
+   for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+      if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
+         GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
+         /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
+         COPY_4V(tex, span->array->texcoords[u][col]);
+         /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
+      }
+   }
+
+   /* init condition codes */
+   machine->CondCodes[0] = COND_EQ;
+   machine->CondCodes[1] = COND_EQ;
+   machine->CondCodes[2] = COND_EQ;
+   machine->CondCodes[3] = COND_EQ;
+}
+
+
+
+/**
+ * Execute the current fragment program, operating on the given span.
+ */
+void
+_swrast_exec_fragment_program( GLcontext *ctx, struct sw_span *span )
+{
+   const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
+   GLuint i;
+
+   ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
+
+   if (program->Base.Parameters) {
+      _mesa_load_state_parameters(ctx, program->Base.Parameters);
+   }   
+
+   for (i = 0; i < span->end; i++) {
+      if (span->array->mask[i]) {
+         init_machine(ctx, &ctx->FragmentProgram.Machine,
+                      ctx->FragmentProgram._Current, span, i);
+
+         if (!execute_program(ctx, program, ~0,
+                              &ctx->FragmentProgram.Machine, span, i)) {
+            span->array->mask[i] = GL_FALSE;  /* killed fragment */
+            span->writeAll = GL_FALSE;
+         }
+
+         /* Store output registers */
+         {
+            const GLfloat *colOut
+               = ctx->FragmentProgram.Machine.Outputs[FRAG_RESULT_COLR];
+            UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
+            UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
+            UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
+            UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
+         }
+         /* depth value */
+         if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
+            const GLfloat depth
+               = ctx->FragmentProgram.Machine.Outputs[FRAG_RESULT_DEPR][2];
+            if (depth <= 0.0)
+               span->array->z[i] = 0;
+            else if (depth >= 1.0)
+               span->array->z[i] = ctx->DrawBuffer->_DepthMax;
+            else
+               span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
+         }
+      }
+   }
+
+   if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
+      span->interpMask &= ~SPAN_Z;
+      span->arrayMask |= SPAN_Z;
+   }
+
+   ctx->_CurrentProgram = 0;
+}
+
diff --git a/dist/Mesa/src/mesa/swrast/s_nvfragprog.h b/dist/Mesa/src/mesa/swrast/s_nvfragprog.h
new file mode 100644
index 000000000..ac5a15fe9
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_nvfragprog.h
@@ -0,0 +1,37 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.1
+ *
+ * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_NVFRAGPROG_H
+#define S_NVFRAGPROG_H
+
+
+#include "s_context.h"
+
+
+extern void
+_swrast_exec_fragment_program( GLcontext *ctx, struct sw_span *span );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_points.c b/dist/Mesa/src/mesa/swrast/s_points.c
new file mode 100644
index 000000000..5879bccf1
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_points.c
@@ -0,0 +1,275 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.1
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "colormac.h"
+#include "context.h"
+#include "macros.h"
+#include "texstate.h"
+#include "s_context.h"
+#include "s_feedback.h"
+#include "s_points.h"
+#include "s_span.h"
+
+
+
+#define RGBA       0x1
+#define INDEX      0x2
+#define SMOOTH     0x4
+#define TEXTURE    0x8
+#define SPECULAR  0x10
+#define LARGE     0x20
+#define ATTENUATE 0x40
+#define SPRITE    0x80
+
+
+/*
+ * CI points with size == 1.0
+ */
+#define FLAGS (INDEX)
+#define NAME size1_ci_point
+#include "s_pointtemp.h"
+
+
+/*
+ * General CI points.
+ */
+#define FLAGS (INDEX | LARGE)
+#define NAME general_ci_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Antialiased CI points.
+ */
+#define FLAGS (INDEX | SMOOTH)
+#define NAME antialiased_ci_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Distance attenuated, general CI points.
+ */
+#define FLAGS (INDEX | ATTENUATE)
+#define NAME atten_general_ci_point
+#include "s_pointtemp.h"
+
+
+/*
+ * RGBA points with size == 1.0
+ */
+#define FLAGS (RGBA)
+#define NAME size1_rgba_point
+#include "s_pointtemp.h"
+
+
+/*
+ * General RGBA points.
+ */
+#define FLAGS (RGBA | LARGE)
+#define NAME general_rgba_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Antialiased RGBA points.
+ */
+#define FLAGS (RGBA | SMOOTH)
+#define NAME antialiased_rgba_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Textured RGBA points.
+ */
+#define FLAGS (RGBA | LARGE | TEXTURE | SPECULAR)
+#define NAME textured_rgba_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Antialiased points with texture mapping.
+ */
+#define FLAGS (RGBA | SMOOTH | TEXTURE | SPECULAR)
+#define NAME antialiased_tex_rgba_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Distance attenuated, general RGBA points.
+ */
+#define FLAGS (RGBA | ATTENUATE)
+#define NAME atten_general_rgba_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Distance attenuated, textured RGBA points.
+ */
+#define FLAGS (RGBA | ATTENUATE | TEXTURE | SPECULAR)
+#define NAME atten_textured_rgba_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Distance attenuated, antialiased points with or without texture mapping.
+ */
+#define FLAGS (RGBA | ATTENUATE | TEXTURE | SMOOTH)
+#define NAME atten_antialiased_rgba_point
+#include "s_pointtemp.h"
+
+
+/*
+ * Sprite (textured point)
+ */
+#define FLAGS (RGBA | SPRITE | SPECULAR)
+#define NAME sprite_point
+#include "s_pointtemp.h"
+
+
+#define FLAGS (RGBA | SPRITE | SPECULAR | ATTENUATE)
+#define NAME atten_sprite_point
+#include "s_pointtemp.h"
+
+
+
+void _swrast_add_spec_terms_point( GLcontext *ctx,
+				   const SWvertex *v0 )
+{
+   SWvertex *ncv0 = (SWvertex *)v0;
+   GLchan c[1][4];
+   COPY_CHAN4( c[0], ncv0->color );
+   ACC_3V( ncv0->color, ncv0->specular );
+   SWRAST_CONTEXT(ctx)->SpecPoint( ctx, ncv0 );
+   COPY_CHAN4( ncv0->color, c[0] );
+}
+
+
+
+/* record the current point function name */
+#ifdef DEBUG
+
+static const char *pntFuncName = NULL;
+
+#define USE(pntFunc)                   \
+do {                                   \
+    pntFuncName = #pntFunc;            \
+    /*printf("%s\n", pntFuncName);*/   \
+    swrast->Point = pntFunc;           \
+} while (0)
+
+#else
+
+#define USE(pntFunc)  swrast->Point = pntFunc
+
+#endif
+
+
+/*
+ * Examine the current context to determine which point drawing function
+ * should be used.
+ */
+void
+_swrast_choose_point( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLboolean rgbMode = ctx->Visual.rgbMode;
+
+   if (ctx->RenderMode==GL_RENDER) {
+      if (ctx->Point.PointSprite) {
+         /* GL_ARB_point_sprite / GL_NV_point_sprite */
+         /* XXX this might not be good enough */
+         if (ctx->Point._Attenuated)
+            USE(atten_sprite_point);
+         else
+            USE(sprite_point);
+      }
+      else if (ctx->Point.SmoothFlag) {
+         /* Smooth points */
+         if (rgbMode) {
+            if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled) {
+               USE(atten_antialiased_rgba_point);
+            }
+            else if (ctx->Texture._EnabledCoordUnits) {
+               USE(antialiased_tex_rgba_point);
+            }
+            else {
+               USE(antialiased_rgba_point);
+            }
+         }
+         else {
+            USE(antialiased_ci_point);
+         }
+      }
+      else if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled) {
+         if (rgbMode) {
+            if (ctx->Texture._EnabledCoordUnits) {
+               if (ctx->Point.SmoothFlag) {
+                  USE(atten_antialiased_rgba_point);
+               }
+               else {
+                  USE(atten_textured_rgba_point);
+               }
+            }
+            else {
+               USE(atten_general_rgba_point);
+            }
+         }
+         else {
+            /* ci, atten */
+            USE(atten_general_ci_point);
+         }
+      }
+      else if (ctx->Texture._EnabledCoordUnits && rgbMode) {
+         /* textured */
+         USE(textured_rgba_point);
+      }
+      else if (ctx->Point._Size != 1.0) {
+         /* large points */
+         if (rgbMode) {
+            USE(general_rgba_point);
+         }
+         else {
+            USE(general_ci_point);
+         }
+      }
+      else {
+         /* single pixel points */
+         if (rgbMode) {
+            USE(size1_rgba_point);
+         }
+         else {
+            USE(size1_ci_point);
+         }
+      }
+   }
+   else if (ctx->RenderMode==GL_FEEDBACK) {
+      USE(_swrast_feedback_point);
+   }
+   else {
+      /* GL_SELECT mode */
+      USE(_swrast_select_point);
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_points.h b/dist/Mesa/src/mesa/swrast/s_points.h
new file mode 100644
index 000000000..40b442e95
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_points.h
@@ -0,0 +1,39 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_POINTS_H
+#define S_POINTS_H
+
+#include "mtypes.h"
+
+extern void
+_swrast_choose_point( GLcontext *ctx );
+
+extern void
+_swrast_add_spec_terms_point( GLcontext *ctx,
+			      const SWvertex *v0 );
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_pointtemp.h b/dist/Mesa/src/mesa/swrast/s_pointtemp.h
new file mode 100644
index 000000000..4ce261009
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_pointtemp.h
@@ -0,0 +1,419 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Regarding GL_NV_point_sprite:
+ *
+ * Portions of this software may use or implement intellectual
+ * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
+ * any and all warranties with respect to such intellectual property,
+ * including any use thereof or modifications thereto.
+ */
+
+
+/*
+ * Point rendering template code.
+ *
+ * Set FLAGS = bitwise-OR of the following tokens:
+ *
+ *   RGBA = do rgba instead of color index
+ *   SMOOTH = do antialiasing
+ *   TEXTURE = do texture coords
+ *   SPECULAR = do separate specular color
+ *   LARGE = do points with diameter > 1 pixel
+ *   ATTENUATE = compute point size attenuation
+ *   SPRITE = GL_ARB_point_sprite / GL_NV_point_sprite
+ *
+ * Notes: LARGE and ATTENUATE are exclusive of each other.
+ *        TEXTURE requires RGBA
+ */
+
+
+/*
+ * NOTES on antialiased point rasterization:
+ *
+ * Let d = distance of fragment center from vertex.
+ * if d < rmin2 then
+ *    fragment has 100% coverage
+ * else if d > rmax2 then
+ *    fragment has 0% coverage
+ * else
+ *    fragment has % coverage = (d - rmin2) / (rmax2 - rmin2)
+ */
+
+
+
+static void
+NAME ( GLcontext *ctx, const SWvertex *vert )
+{
+#if FLAGS & (ATTENUATE | LARGE | SMOOTH | SPRITE)
+   GLfloat size;
+#endif
+#if FLAGS & RGBA
+#if (FLAGS & ATTENUATE) && (FLAGS & SMOOTH)
+   GLfloat alphaAtten;
+#endif
+   const GLchan red   = vert->color[0];
+   const GLchan green = vert->color[1];
+   const GLchan blue  = vert->color[2];
+   const GLchan alpha = vert->color[3];
+#endif
+#if FLAGS & SPECULAR
+   const GLchan specRed   = vert->specular[0];
+   const GLchan specGreen = vert->specular[1];
+   const GLchan specBlue  = vert->specular[2];
+#endif
+#if FLAGS & INDEX
+   const GLuint colorIndex = (GLuint) vert->index; /* XXX round? */
+#endif
+#if FLAGS & TEXTURE
+   GLfloat texcoord[MAX_TEXTURE_COORD_UNITS][4];
+   GLuint u;
+#endif
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct sw_span *span = &(swrast->PointSpan);
+
+   /* Cull primitives with malformed coordinates.
+    */
+   {
+      float tmp = vert->win[0] + vert->win[1];
+      if (IS_INF_OR_NAN(tmp))
+	 return;
+   }
+
+   /*
+    * Span init
+    */
+   span->interpMask = SPAN_FOG;
+   span->arrayMask = SPAN_XY | SPAN_Z;
+   span->fog = vert->fog;
+   span->fogStep = 0.0;
+#if FLAGS & RGBA
+   span->arrayMask |= SPAN_RGBA;
+#endif
+#if FLAGS & SPECULAR
+   span->arrayMask |= SPAN_SPEC;
+#endif
+#if FLAGS & INDEX
+   span->arrayMask |= SPAN_INDEX;
+#endif
+#if FLAGS & TEXTURE
+   span->arrayMask |= SPAN_TEXTURE;
+   if (ctx->FragmentProgram._Active) {
+      /* Don't divide texture s,t,r by q (use TXP to do that) */
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         if (ctx->Texture._EnabledCoordUnits & (1 << u)) {
+            COPY_4V(texcoord[u], vert->texcoord[u]);
+         }
+      }
+   }
+   else {
+      /* Divide texture s,t,r by q here */
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         if (ctx->Texture._EnabledCoordUnits & (1 << u)) {
+            const GLfloat q = vert->texcoord[u][3];
+            const GLfloat invQ = (q == 0.0F || q == 1.0F) ? 1.0F : (1.0F / q);
+            texcoord[u][0] = vert->texcoord[u][0] * invQ;
+            texcoord[u][1] = vert->texcoord[u][1] * invQ;
+            texcoord[u][2] = vert->texcoord[u][2] * invQ;
+            texcoord[u][3] = q;
+         }
+      }
+   }
+   /* need these for fragment programs */
+   span->w = 1.0F;
+   span->dwdx = 0.0F;
+   span->dwdy = 0.0F;
+#endif
+#if FLAGS & SMOOTH
+   span->arrayMask |= SPAN_COVERAGE;
+#endif
+#if FLAGS & SPRITE
+   span->arrayMask |= SPAN_TEXTURE;
+#endif
+
+   /* Compute point size if not known to be one */
+#if FLAGS & ATTENUATE
+   /* first, clamp attenuated size to the user-specifed range */
+   size = CLAMP(vert->pointSize, ctx->Point.MinSize, ctx->Point.MaxSize);
+#if (FLAGS & RGBA) && (FLAGS & SMOOTH)
+   /* only if multisampling, compute the fade factor */
+   if (ctx->Multisample.Enabled) {
+      if (vert->pointSize >= ctx->Point.Threshold) {
+         alphaAtten = 1.0F;
+      }
+      else {
+         GLfloat dsize = vert->pointSize / ctx->Point.Threshold;
+         alphaAtten = dsize * dsize;
+      }
+   }
+   else {
+      alphaAtten = 1.0;
+   }
+#endif
+#elif FLAGS & (LARGE | SMOOTH | SPRITE)
+   /* constant, non-attenuated size */
+   size = ctx->Point._Size; /* this is already clamped */
+#endif
+
+
+#if FLAGS & (ATTENUATE | LARGE | SMOOTH | SPRITE)
+   /***
+    *** Multi-pixel points
+    ***/
+
+   /* do final clamping now */
+   if (ctx->Point.SmoothFlag) {
+      size = CLAMP(size, ctx->Const.MinPointSizeAA, ctx->Const.MaxPointSizeAA);
+   }
+   else {
+      size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
+   }
+
+   {{
+      GLint x, y;
+      const GLfloat radius = 0.5F * size;
+      const GLint z = (GLint) (vert->win[2] + 0.5F);
+      GLuint count;
+#if FLAGS & SMOOTH
+      const GLfloat rmin = radius - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
+      const GLfloat rmax = radius + 0.7071F;
+      const GLfloat rmin2 = MAX2(0.0F, rmin * rmin);
+      const GLfloat rmax2 = rmax * rmax;
+      const GLfloat cscale = 1.0F / (rmax2 - rmin2);
+      const GLint xmin = (GLint) (vert->win[0] - radius);
+      const GLint xmax = (GLint) (vert->win[0] + radius);
+      const GLint ymin = (GLint) (vert->win[1] - radius);
+      const GLint ymax = (GLint) (vert->win[1] + radius);
+#else
+      /* non-smooth */
+      GLint xmin, xmax, ymin, ymax;
+      GLint iSize = (GLint) (size + 0.5F);
+      GLint iRadius;
+      iSize = MAX2(1, iSize);
+      iRadius = iSize / 2;
+      if (iSize & 1) {
+         /* odd size */
+         xmin = (GLint) (vert->win[0] - iRadius);
+         xmax = (GLint) (vert->win[0] + iRadius);
+         ymin = (GLint) (vert->win[1] - iRadius);
+         ymax = (GLint) (vert->win[1] + iRadius);
+      }
+      else {
+         /* even size */
+         xmin = (GLint) vert->win[0] - iRadius + 1;
+         xmax = xmin + iSize - 1;
+         ymin = (GLint) vert->win[1] - iRadius + 1;
+         ymax = ymin + iSize - 1;
+      }
+#endif /*SMOOTH*/
+
+      /* check if we need to flush */
+      if (span->end + (xmax-xmin+1) * (ymax-ymin+1) >= MAX_WIDTH ||
+          (swrast->_RasterMask & (BLEND_BIT | LOGIC_OP_BIT | MASKING_BIT))) {
+#if FLAGS & RGBA
+         _swrast_write_rgba_span(ctx, span);
+#else
+         _swrast_write_index_span(ctx, span);
+#endif
+         span->end = 0;
+      }
+
+      /*
+       * OK, generate fragments
+       */
+      count = span->end;
+      (void) radius;
+      for (y = ymin; y <= ymax; y++) {
+         /* check if we need to flush */
+         if (count + (xmax-xmin+1) >= MAX_WIDTH) {
+	     span->end = count;
+#if FLAGS & RGBA
+            _swrast_write_rgba_span(ctx, span);
+#else
+            _swrast_write_index_span(ctx, span);
+#endif
+            count = span->end = 0;
+         }
+         for (x = xmin; x <= xmax; x++) {
+#if FLAGS & (SPRITE | TEXTURE)
+            GLuint u;
+#endif
+
+#if FLAGS & RGBA
+            span->array->rgba[count][RCOMP] = red;
+            span->array->rgba[count][GCOMP] = green;
+            span->array->rgba[count][BCOMP] = blue;
+            span->array->rgba[count][ACOMP] = alpha;
+#endif
+#if FLAGS & SPECULAR
+            span->array->spec[count][RCOMP] = specRed;
+            span->array->spec[count][GCOMP] = specGreen;
+            span->array->spec[count][BCOMP] = specBlue;
+#endif
+#if FLAGS & INDEX
+            span->array->index[count] = colorIndex;
+#endif
+#if FLAGS & TEXTURE
+            for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+               if (ctx->Texture._EnabledCoordUnits & (1 << u)) {
+                  COPY_4V(span->array->texcoords[u][count], texcoord[u]);
+               }
+            }
+#endif
+
+#if FLAGS & SMOOTH
+            /* compute coverage */
+            {
+               const GLfloat dx = x - vert->win[0] + 0.5F;
+               const GLfloat dy = y - vert->win[1] + 0.5F;
+               const GLfloat dist2 = dx * dx + dy * dy;
+               if (dist2 < rmax2) {
+                  if (dist2 >= rmin2) {
+                     /* compute partial coverage */
+                     span->array->coverage[count] = 1.0F - (dist2 - rmin2) * cscale;
+#if FLAGS & INDEX
+                     /* coverage in [0,15] */
+                     span->array->coverage[count] *= 15.0;
+#endif
+                  }
+                  else {
+                     /* full coverage */
+                     span->array->coverage[count] = 1.0F;
+                  }
+
+                  span->array->x[count] = x;
+                  span->array->y[count] = y;
+                  span->array->z[count] = z;
+
+#if (FLAGS & ATTENUATE) && (FLAGS & RGBA)
+                  span->array->rgba[count][ACOMP] = (GLchan) (alpha * alphaAtten);
+#elif FLAGS & RGBA
+                  span->array->rgba[count][ACOMP] = alpha;
+#endif /*ATTENUATE*/
+                  count++;
+               } /*if*/
+            }
+
+#else /*SMOOTH*/
+
+            /* not smooth (square points) */
+            span->array->x[count] = x;
+            span->array->y[count] = y;
+            span->array->z[count] = z;
+
+#if FLAGS & SPRITE
+            for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+               if (ctx->Texture.Unit[u]._ReallyEnabled) {
+                  if (ctx->Point.CoordReplace[u]) {
+                     GLfloat s = 0.5F + (x + 0.5F - vert->win[0]) / size;
+                     GLfloat t, r;
+                     if (ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
+                        t = 0.5F + (y + 0.5F - vert->win[1]) / size;
+                     else /* GL_UPPER_LEFT */
+                        t = 0.5F - (y + 0.5F - vert->win[1]) / size;
+                     if (ctx->Point.SpriteRMode == GL_ZERO)
+                        r = 0.0F;
+                     else if (ctx->Point.SpriteRMode == GL_S)
+                        r = vert->texcoord[u][0];
+                     else /* GL_R */
+                        r = vert->texcoord[u][2];
+                     span->array->texcoords[u][count][0] = s;
+                     span->array->texcoords[u][count][1] = t;
+                     span->array->texcoords[u][count][2] = r;
+                     span->array->texcoords[u][count][3] = 1.0F;
+                  }
+                  else {
+                     COPY_4V(span->array->texcoords[u][count], vert->texcoord[u]);
+                  }
+               }
+            }
+#endif /*SPRITE*/
+
+            count++;  /* square point */
+
+#endif /*SMOOTH*/
+
+	 } /*for x*/
+      } /*for y*/
+      span->end = count;
+   }}
+
+#else /* LARGE | ATTENUATE | SMOOTH | SPRITE */
+
+   /***
+    *** Single-pixel points
+    ***/
+   {{
+      GLuint count;
+
+      /* check if we need to flush */
+      if (span->end >= MAX_WIDTH ||
+          (swrast->_RasterMask & (BLEND_BIT | LOGIC_OP_BIT | MASKING_BIT))) {
+#if FLAGS & RGBA
+         _swrast_write_rgba_span(ctx, span);
+#else
+         _swrast_write_index_span(ctx, span);
+#endif
+         span->end = 0;
+      }
+
+      count = span->end;
+
+#if FLAGS & RGBA
+      span->array->rgba[count][RCOMP] = red;
+      span->array->rgba[count][GCOMP] = green;
+      span->array->rgba[count][BCOMP] = blue;
+      span->array->rgba[count][ACOMP] = alpha;
+#endif
+#if FLAGS & SPECULAR
+      span->array->spec[count][RCOMP] = specRed;
+      span->array->spec[count][GCOMP] = specGreen;
+      span->array->spec[count][BCOMP] = specBlue;
+#endif
+#if FLAGS & INDEX
+      span->array->index[count] = colorIndex;
+#endif
+#if FLAGS & TEXTURE
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         if (ctx->Texture.Unit[u]._ReallyEnabled) {
+            COPY_4V(span->array->texcoords[u][count], texcoord[u]);
+         }
+      }
+#endif
+
+      span->array->x[count] = (GLint) vert->win[0];
+      span->array->y[count] = (GLint) vert->win[1];
+      span->array->z[count] = (GLint) (vert->win[2] + 0.5F);
+      span->end = count + 1;
+   }}
+
+#endif /* LARGE || ATTENUATE || SMOOTH */
+
+   ASSERT(span->end <= MAX_WIDTH);
+}
+
+
+#undef FLAGS
+#undef NAME
diff --git a/dist/Mesa/src/mesa/swrast/s_readpix.c b/dist/Mesa/src/mesa/swrast/s_readpix.c
new file mode 100644
index 000000000..4c3be5a83
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_readpix.c
@@ -0,0 +1,596 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "bufferobj.h"
+#include "colormac.h"
+#include "convolve.h"
+#include "context.h"
+#include "feedback.h"
+#include "image.h"
+#include "macros.h"
+#include "imports.h"
+#include "pixel.h"
+#include "state.h"
+
+#include "s_context.h"
+#include "s_depth.h"
+#include "s_span.h"
+#include "s_stencil.h"
+
+
+/*
+ * Read a block of color index pixels.
+ */
+static void
+read_index_pixels( GLcontext *ctx,
+                   GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum type, GLvoid *pixels,
+                   const struct gl_pixelstore_attrib *packing )
+{
+   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+   GLint i;
+
+   ASSERT(rb);
+
+   /* width should never be > MAX_WIDTH since we did clipping earlier */
+   ASSERT(width <= MAX_WIDTH);
+
+   /* process image row by row */
+   for (i = 0; i < height; i++) {
+      GLuint index[MAX_WIDTH];
+      GLvoid *dest;
+      ASSERT(rb->DataType == GL_UNSIGNED_INT);
+      rb->GetRow(ctx, rb, width, x, y + i, index);
+
+      dest = _mesa_image_address2d(packing, pixels, width, height,
+                                   GL_COLOR_INDEX, type, i, 0);
+
+      _mesa_pack_index_span(ctx, width, type, dest, index,
+                            &ctx->Pack, ctx->_ImageTransferState);
+   }
+}
+
+
+
+/**
+ * Read pixels for format=GL_DEPTH_COMPONENT.
+ */
+static void
+read_depth_pixels( GLcontext *ctx,
+                   GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum type, GLvoid *pixels,
+                   const struct gl_pixelstore_attrib *packing )
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->_DepthBuffer;
+   const GLboolean biasOrScale
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+
+   /* clipping should have been done already */
+   ASSERT(x >= 0);
+   ASSERT(y >= 0);
+   ASSERT(x + width <= rb->Width);
+   ASSERT(y + height <= rb->Height);
+   /* width should never be > MAX_WIDTH since we did clipping earlier */
+   ASSERT(width <= MAX_WIDTH);
+
+   ASSERT(rb);
+
+   if (type == GL_UNSIGNED_SHORT && fb->Visual.depthBits == 16
+       && !biasOrScale && !packing->SwapBytes) {
+      /* Special case: directly read 16-bit unsigned depth values. */
+      GLint j;
+      ASSERT(rb->InternalFormat == GL_DEPTH_COMPONENT16);
+      ASSERT(rb->DataType == GL_UNSIGNED_SHORT);
+      for (j = 0; j < height; j++, y++) {
+         void *dest =_mesa_image_address2d(packing, pixels, width, height,
+                                           GL_DEPTH_COMPONENT, type, j, 0);
+         rb->GetRow(ctx, rb, width, x, y, dest);
+      }
+   }
+   else if (type == GL_UNSIGNED_INT && fb->Visual.depthBits == 24
+            && !biasOrScale && !packing->SwapBytes) {
+      /* Special case: directly read 24-bit unsigned depth values. */
+      GLint j;
+      ASSERT(rb->InternalFormat == GL_DEPTH_COMPONENT32);
+      ASSERT(rb->DataType == GL_UNSIGNED_INT);
+      for (j = 0; j < height; j++, y++) {
+         GLuint *dest = (GLuint *)
+            _mesa_image_address2d(packing, pixels, width, height,
+                                  GL_DEPTH_COMPONENT, type, j, 0);
+         GLint k;
+         rb->GetRow(ctx, rb, width, x, y, dest);
+         /* convert range from 24-bit to 32-bit */
+         for (k = 0; k < width; k++) {
+            dest[k] = (dest[k] << 8) | (dest[k] >> 24);
+         }
+      }
+   }
+   else if (type == GL_UNSIGNED_INT && fb->Visual.depthBits == 32
+            && !biasOrScale && !packing->SwapBytes) {
+      /* Special case: directly read 32-bit unsigned depth values. */
+      GLint j;
+      ASSERT(rb->InternalFormat == GL_DEPTH_COMPONENT32);
+      ASSERT(rb->DataType == GL_UNSIGNED_INT);
+      for (j = 0; j < height; j++, y++) {
+         void *dest = _mesa_image_address2d(packing, pixels, width, height,
+                                            GL_DEPTH_COMPONENT, type, j, 0);
+         rb->GetRow(ctx, rb, width, x, y, dest);
+      }
+   }
+   else {
+      /* General case (slower) */
+      GLint j;
+      for (j = 0; j < height; j++, y++) {
+         GLfloat depthValues[MAX_WIDTH];
+         GLvoid *dest = _mesa_image_address2d(packing, pixels, width, height,
+                                              GL_DEPTH_COMPONENT, type, j, 0);
+         _swrast_read_depth_span_float(ctx, rb, width, x, y, depthValues);
+         _mesa_pack_depth_span(ctx, width, dest, type, depthValues, packing);
+      }
+   }
+}
+
+
+/**
+ * Read pixels for format=GL_STENCIL_INDEX.
+ */
+static void
+read_stencil_pixels( GLcontext *ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum type, GLvoid *pixels,
+                     const struct gl_pixelstore_attrib *packing )
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   GLint j;
+
+   ASSERT(rb);
+
+   /* width should never be > MAX_WIDTH since we did clipping earlier */
+   ASSERT(width <= MAX_WIDTH);
+
+   /* process image row by row */
+   for (j=0;j<height;j++,y++) {
+      GLvoid *dest;
+      GLstencil stencil[MAX_WIDTH];
+
+      _swrast_read_stencil_span(ctx, rb, width, x, y, stencil);
+
+      dest = _mesa_image_address2d(packing, pixels, width, height,
+                                   GL_STENCIL_INDEX, type, j, 0);
+
+      _mesa_pack_stencil_span(ctx, width, type, dest, stencil, packing);
+   }
+}
+
+
+
+/**
+ * Optimized glReadPixels for particular pixel formats:
+ *   GL_UNSIGNED_BYTE, GL_RGBA
+ * when pixel scaling, biasing and mapping are disabled.
+ */
+static GLboolean
+read_fast_rgba_pixels( GLcontext *ctx,
+                       GLint x, GLint y,
+                       GLsizei width, GLsizei height,
+                       GLenum format, GLenum type,
+                       GLvoid *pixels,
+                       const struct gl_pixelstore_attrib *packing )
+{
+   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+
+   /* can't do scale, bias, mapping, etc */
+   if (ctx->_ImageTransferState)
+       return GL_FALSE;
+
+   /* can't do fancy pixel packing */
+   if (packing->Alignment != 1 || packing->SwapBytes || packing->LsbFirst)
+      return GL_FALSE;
+
+   {
+      GLint srcX = x;
+      GLint srcY = y;
+      GLint readWidth = width;           /* actual width read */
+      GLint readHeight = height;         /* actual height read */
+      GLint skipPixels = packing->SkipPixels;
+      GLint skipRows = packing->SkipRows;
+      GLint rowLength;
+
+      if (packing->RowLength > 0)
+         rowLength = packing->RowLength;
+      else
+         rowLength = width;
+
+      /*
+       * Ready to read!
+       * The window region at (destX, destY) of size (readWidth, readHeight)
+       * will be read back.
+       * We'll write pixel data to buffer pointed to by "pixels" but we'll
+       * skip "skipRows" rows and skip "skipPixels" pixels/row.
+       */
+#if CHAN_BITS == 8
+      if (format == GL_RGBA && type == GL_UNSIGNED_BYTE)
+#elif CHAN_BITS == 16
+      if (format == GL_RGBA && type == GL_UNSIGNED_SHORT)
+#else
+      if (0)
+#endif
+      {
+         GLchan *dest = (GLchan *) pixels
+                      + (skipRows * rowLength + skipPixels) * 4;
+         GLint row;
+
+         if (packing->Invert) {
+            /* start at top and go down */
+            dest += (readHeight - 1) * rowLength * 4;
+            rowLength = -rowLength;
+         }
+
+         ASSERT(rb->GetRow);
+         for (row=0; row<readHeight; row++) {
+            rb->GetRow(ctx, rb, readWidth, srcX, srcY, dest);
+            dest += rowLength * 4;
+            srcY++;
+         }
+         return GL_TRUE;
+      }
+      else {
+         /* can't do this format/type combination */
+         return GL_FALSE;
+      }
+   }
+}
+
+
+
+/*
+ * Read R, G, B, A, RGB, L, or LA pixels.
+ */
+static void
+read_rgba_pixels( GLcontext *ctx,
+                  GLint x, GLint y,
+                  GLsizei width, GLsizei height,
+                  GLenum format, GLenum type, GLvoid *pixels,
+                  const struct gl_pixelstore_attrib *packing )
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->_ColorReadBuffer;
+
+   ASSERT(rb);
+
+   /* Try optimized path first */
+   if (read_fast_rgba_pixels( ctx, x, y, width, height,
+                              format, type, pixels, packing )) {
+      return; /* done! */
+   }
+
+   /* width should never be > MAX_WIDTH since we did clipping earlier */
+   ASSERT(width <= MAX_WIDTH);
+
+   if (ctx->Pixel.Convolution2DEnabled || ctx->Pixel.Separable2DEnabled) {
+      const GLuint transferOps = ctx->_ImageTransferState;
+      GLfloat *dest, *src, *tmpImage, *convImage;
+      GLint row;
+
+      tmpImage = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
+      if (!tmpImage) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels");
+         return;
+      }
+      convImage = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
+      if (!convImage) {
+         _mesa_free(tmpImage);
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels");
+         return;
+      }
+
+      /* read full RGBA, FLOAT image */
+      dest = tmpImage;
+      for (row = 0; row < height; row++, y++) {
+         GLchan rgba[MAX_WIDTH][4];
+         if (fb->Visual.rgbMode) {
+            _swrast_read_rgba_span(ctx, rb, width, x, y, rgba);
+         }
+         else {
+            GLuint index[MAX_WIDTH];
+            ASSERT(rb->DataType == GL_UNSIGNED_INT);
+            rb->GetRow(ctx, rb, width, x, y, index);
+            if (ctx->Pixel.IndexShift != 0 || ctx->Pixel.IndexOffset !=0 ) {
+               _mesa_map_ci(ctx, width, index);
+            }
+            _mesa_map_ci_to_rgba_chan(ctx, width, index, rgba);
+         }
+         _mesa_pack_rgba_span_chan(ctx, width, (const GLchan (*)[4]) rgba,
+                              GL_RGBA, GL_FLOAT, dest, &ctx->DefaultPacking,
+                              transferOps & IMAGE_PRE_CONVOLUTION_BITS);
+         dest += width * 4;
+      }
+
+      /* do convolution */
+      if (ctx->Pixel.Convolution2DEnabled) {
+         _mesa_convolve_2d_image(ctx, &width, &height, tmpImage, convImage);
+      }
+      else {
+         ASSERT(ctx->Pixel.Separable2DEnabled);
+         _mesa_convolve_sep_image(ctx, &width, &height, tmpImage, convImage);
+      }
+      _mesa_free(tmpImage);
+
+      /* finish transfer ops and pack the resulting image */
+      src = convImage;
+      for (row = 0; row < height; row++) {
+         GLvoid *dest;
+         dest = _mesa_image_address2d(packing, pixels, width, height,
+                                      format, type, row, 0);
+         _mesa_pack_rgba_span_float(ctx, width,
+                                    (const GLfloat (*)[4]) src,
+                                    format, type, dest, packing,
+                                    transferOps & IMAGE_POST_CONVOLUTION_BITS);
+         src += width * 4;
+      }
+      _mesa_free(convImage);
+   }
+   else {
+      /* no convolution */
+      GLint row;
+      for (row = 0; row < height; row++, y++) {
+         GLchan rgba[MAX_WIDTH][4];
+         GLvoid *dst;
+         if (fb->Visual.rgbMode) {
+            _swrast_read_rgba_span(ctx, rb, width, x, y, rgba);
+         }
+         else {
+            GLuint index[MAX_WIDTH];
+            ASSERT(rb->DataType == GL_UNSIGNED_INT);
+            rb->GetRow(ctx, rb, width, x, y, index);
+            if (ctx->Pixel.IndexShift != 0 || ctx->Pixel.IndexOffset != 0) {
+               _mesa_map_ci(ctx, width, index);
+            }
+            _mesa_map_ci_to_rgba_chan(ctx, width, index, rgba);
+         }
+         dst = _mesa_image_address2d(packing, pixels, width, height,
+                                     format, type, row, 0);
+         if (fb->Visual.redBits < CHAN_BITS ||
+             fb->Visual.greenBits < CHAN_BITS ||
+             fb->Visual.blueBits < CHAN_BITS) {
+            /* Requantize the color values into floating point and go from
+             * there.  This fixes conformance failures with 5/6/5 color
+             * buffers, for example.
+             */
+            GLfloat rgbaf[MAX_WIDTH][4];
+            _mesa_chan_to_float_span(ctx, width,
+                                     (CONST GLchan (*)[4]) rgba, rgbaf);
+            _mesa_pack_rgba_span_float(ctx, width,
+                                       (CONST GLfloat (*)[4]) rgbaf,
+                                       format, type, dst, packing,
+                                       ctx->_ImageTransferState);
+         }
+         else {
+            /* GLubytes are fine */
+            _mesa_pack_rgba_span_chan(ctx, width, (CONST GLchan (*)[4]) rgba,
+                                 format, type, dst, packing,
+                                 ctx->_ImageTransferState);
+         }
+      }
+   }
+}
+
+
+/**
+ * Read combined depth/stencil values.
+ * We'll have already done error checking to be sure the expected
+ * depth and stencil buffers really exist.
+ */
+static void
+read_depth_stencil_pixels(GLcontext *ctx,
+                          GLint x, GLint y,
+                          GLsizei width, GLsizei height,
+                          GLenum type, GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing )
+{
+   const GLboolean scaleOrBias
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+   const GLboolean stencilTransfer = ctx->Pixel.IndexShift
+      || ctx->Pixel.IndexOffset || ctx->Pixel.MapStencilFlag;
+   struct gl_renderbuffer *depthRb, *stencilRb;
+
+   depthRb = ctx->ReadBuffer->_DepthBuffer;
+   stencilRb = ctx->ReadBuffer->_StencilBuffer;
+
+   ASSERT(depthRb);
+   ASSERT(stencilRb);
+
+   depthRb = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+   stencilRb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+
+   if (depthRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
+       stencilRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
+       depthRb == stencilRb &&
+       !scaleOrBias &&
+       !stencilTransfer) {
+      /* This is the ideal case.
+       * Reading GL_DEPTH_STENCIL pixels from combined depth/stencil buffer.
+       * Plus, no pixel transfer ops to worry about!
+       */
+      GLint i;
+      GLint dstStride = _mesa_image_row_stride(packing, width,
+                                               GL_DEPTH_STENCIL_EXT, type);
+      GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing, pixels,
+                                                       width, height,
+                                                       GL_DEPTH_STENCIL_EXT,
+                                                       type, 0, 0);
+      for (i = 0; i < height; i++) {
+         depthRb->GetRow(ctx, depthRb, width, x, y + i, dst);
+         dst += dstStride;
+      }
+   }
+   else {
+      /* Reading GL_DEPTH_STENCIL pixels from separate depth/stencil buffers,
+       * or we need pixel transfer.
+       */
+      GLint i;
+      depthRb = ctx->ReadBuffer->_DepthBuffer;
+      stencilRb = ctx->ReadBuffer->_StencilBuffer;
+
+      for (i = 0; i < height; i++) {
+         GLstencil stencilVals[MAX_WIDTH];
+
+         GLuint *depthStencilDst = (GLuint *)
+            _mesa_image_address2d(packing, pixels, width, height,
+                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
+
+         _swrast_read_stencil_span(ctx, stencilRb, width,
+                                   x, y + i, stencilVals);
+
+         if (!scaleOrBias && !stencilTransfer
+             && ctx->ReadBuffer->Visual.depthBits == 24) {
+            /* ideal case */
+            GLuint zVals[MAX_WIDTH]; /* 24-bit values! */
+            GLint j;
+            ASSERT(depthRb->DataType == GL_UNSIGNED_INT);
+            /* note, we've already been clipped */
+            depthRb->GetRow(ctx, depthRb, width, x, y + i, zVals);
+            for (j = 0; j < width; j++) {
+               depthStencilDst[j] = (zVals[j] << 8) | (stencilVals[j] & 0xff);
+            }
+         }
+         else {
+            /* general case */
+            GLfloat depthVals[MAX_WIDTH];
+            _swrast_read_depth_span_float(ctx, depthRb, width, x, y + i,
+                                          depthVals);
+            _mesa_pack_depth_stencil_span(ctx, width, depthStencilDst,
+                                          depthVals, stencilVals, packing);
+         }
+      }
+   }
+}
+
+
+
+/**
+ * Software fallback routine for ctx->Driver.ReadPixels().
+ * By time we get here, all error checking will have been done.
+ */
+void
+_swrast_ReadPixels( GLcontext *ctx,
+		    GLint x, GLint y, GLsizei width, GLsizei height,
+		    GLenum format, GLenum type,
+		    const struct gl_pixelstore_attrib *packing,
+		    GLvoid *pixels )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_pixelstore_attrib clippedPacking = *packing;
+
+   /* Need to do RENDER_START before clipping or anything else since this
+    * is where a driver may grab the hw lock and get an updated window
+    * size.
+    */
+   RENDER_START(swrast, ctx);
+
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   if (swrast->NewState)
+      _swrast_validate_derived( ctx );
+
+   /* Do all needed clipping here, so that we can forget about it later */
+   if (!_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) {
+      /* The ReadPixels region is totally outside the window bounds */
+      return;
+   }
+
+   if (clippedPacking.BufferObj->Name) {
+      /* pack into PBO */
+      GLubyte *buf;
+      if (!_mesa_validate_pbo_access(2, &clippedPacking, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glReadPixels(invalid PBO access)");
+         goto end;
+      }
+      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
+                                              GL_WRITE_ONLY_ARB,
+                                              clippedPacking.BufferObj);
+      if (!buf) {
+         /* buffer is already mapped - that's an error */
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glReadPixels(PBO is mapped)");
+         goto end;
+      }
+      pixels = ADD_POINTERS(buf, pixels);
+   }
+
+   switch (format) {
+      case GL_COLOR_INDEX:
+         read_index_pixels(ctx, x, y, width, height, type, pixels,
+                           &clippedPacking);
+	 break;
+      case GL_STENCIL_INDEX:
+	 read_stencil_pixels(ctx, x, y, width, height, type, pixels,
+                             &clippedPacking);
+         break;
+      case GL_DEPTH_COMPONENT:
+	 read_depth_pixels(ctx, x, y, width, height, type, pixels,
+                           &clippedPacking);
+	 break;
+      case GL_RED:
+      case GL_GREEN:
+      case GL_BLUE:
+      case GL_ALPHA:
+      case GL_RGB:
+      case GL_LUMINANCE:
+      case GL_LUMINANCE_ALPHA:
+      case GL_RGBA:
+      case GL_BGR:
+      case GL_BGRA:
+      case GL_ABGR_EXT:
+         read_rgba_pixels(ctx, x, y, width, height,
+                          format, type, pixels, &clippedPacking);
+	 break;
+      case GL_DEPTH_STENCIL_EXT:
+         read_depth_stencil_pixels(ctx, x, y, width, height,
+                                   type, pixels, &clippedPacking);
+         break;
+      default:
+	 _mesa_problem(ctx, "unexpected format in _swrast_ReadPixels");
+         /* don't return yet, clean-up */
+   }
+
+
+end:
+   RENDER_FINISH(swrast, ctx);
+
+   if (clippedPacking.BufferObj->Name) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
+                              clippedPacking.BufferObj);
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_span.c b/dist/Mesa/src/mesa/swrast/s_span.c
new file mode 100644
index 000000000..d14310541
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_span.c
@@ -0,0 +1,1622 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * \file swrast/s_span.c
+ * \brief Span processing functions used by all rasterization functions.
+ * This is where all the per-fragment tests are performed
+ * \author Brian Paul
+ */
+
+#include "glheader.h"
+#include "colormac.h"
+#include "context.h"
+#include "macros.h"
+#include "imports.h"
+
+#include "s_atifragshader.h"
+#include "s_alpha.h"
+#include "s_arbshader.h"
+#include "s_blend.h"
+#include "s_context.h"
+#include "s_depth.h"
+#include "s_fog.h"
+#include "s_logic.h"
+#include "s_masking.h"
+#include "s_nvfragprog.h"
+#include "s_span.h"
+#include "s_stencil.h"
+#include "s_texcombine.h"
+
+
+/**
+ * Init span's Z interpolation values to the RasterPos Z.
+ * Used during setup for glDraw/CopyPixels.
+ */
+void
+_swrast_span_default_z( GLcontext *ctx, struct sw_span *span )
+{
+   const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF;
+   if (ctx->DrawBuffer->Visual.depthBits <= 16)
+      span->z = FloatToFixed(ctx->Current.RasterPos[2] * depthMax + 0.5F);
+   else
+      span->z = (GLint) (ctx->Current.RasterPos[2] * depthMax + 0.5F);
+   span->zStep = 0;
+   span->interpMask |= SPAN_Z;
+}
+
+
+/**
+ * Init span's fog interpolation values to the RasterPos fog.
+ * Used during setup for glDraw/CopyPixels.
+ */
+void
+_swrast_span_default_fog( GLcontext *ctx, struct sw_span *span )
+{
+   span->fog = _swrast_z_to_fogfactor(ctx, ctx->Current.RasterDistance);
+   span->fogStep = span->dfogdx = span->dfogdy = 0.0F;
+   span->interpMask |= SPAN_FOG;
+}
+
+
+/**
+ * Init span's rgba or index interpolation values to the RasterPos color.
+ * Used during setup for glDraw/CopyPixels.
+ */
+void
+_swrast_span_default_color( GLcontext *ctx, struct sw_span *span )
+{
+   if (ctx->Visual.rgbMode) {
+      GLchan r, g, b, a;
+      UNCLAMPED_FLOAT_TO_CHAN(r, ctx->Current.RasterColor[0]);
+      UNCLAMPED_FLOAT_TO_CHAN(g, ctx->Current.RasterColor[1]);
+      UNCLAMPED_FLOAT_TO_CHAN(b, ctx->Current.RasterColor[2]);
+      UNCLAMPED_FLOAT_TO_CHAN(a, ctx->Current.RasterColor[3]);
+#if CHAN_TYPE == GL_FLOAT
+      span->red = r;
+      span->green = g;
+      span->blue = b;
+      span->alpha = a;
+#else
+      span->red   = IntToFixed(r);
+      span->green = IntToFixed(g);
+      span->blue  = IntToFixed(b);
+      span->alpha = IntToFixed(a);
+#endif
+      span->redStep = 0;
+      span->greenStep = 0;
+      span->blueStep = 0;
+      span->alphaStep = 0;
+      span->interpMask |= SPAN_RGBA;
+   }
+   else {
+      span->index = FloatToFixed(ctx->Current.RasterIndex);
+      span->indexStep = 0;
+      span->interpMask |= SPAN_INDEX;
+   }
+}
+
+
+/**
+ * Init span's texcoord interpolation values to the RasterPos texcoords.
+ * Used during setup for glDraw/CopyPixels.
+ */
+void
+_swrast_span_default_texcoords( GLcontext *ctx, struct sw_span *span )
+{
+   GLuint i;
+   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+      const GLfloat *tc = ctx->Current.RasterTexCoords[i];
+      if (ctx->FragmentProgram._Active || ctx->ATIFragmentShader._Enabled) {
+         COPY_4V(span->tex[i], tc);
+      }
+      else if (tc[3] > 0.0F) {
+         /* use (s/q, t/q, r/q, 1) */
+         span->tex[i][0] = tc[0] / tc[3];
+         span->tex[i][1] = tc[1] / tc[3];
+         span->tex[i][2] = tc[2] / tc[3];
+         span->tex[i][3] = 1.0;
+      }
+      else {
+         ASSIGN_4V(span->tex[i], 0.0F, 0.0F, 0.0F, 1.0F);
+      }
+      ASSIGN_4V(span->texStepX[i], 0.0F, 0.0F, 0.0F, 0.0F);
+      ASSIGN_4V(span->texStepY[i], 0.0F, 0.0F, 0.0F, 0.0F);
+   }
+   span->interpMask |= SPAN_TEXTURE;
+}
+
+
+/* Fill in the span.color.rgba array from the interpolation values */
+static void
+interpolate_colors(GLcontext *ctx, struct sw_span *span)
+{
+   const GLuint n = span->end;
+   GLchan (*rgba)[4] = span->array->rgba;
+   GLuint i;
+   (void) ctx;
+
+   ASSERT((span->interpMask & SPAN_RGBA)  &&
+	  !(span->arrayMask & SPAN_RGBA));
+
+   if (span->interpMask & SPAN_FLAT) {
+      /* constant color */
+      GLchan color[4];
+      color[RCOMP] = FixedToChan(span->red);
+      color[GCOMP] = FixedToChan(span->green);
+      color[BCOMP] = FixedToChan(span->blue);
+      color[ACOMP] = FixedToChan(span->alpha);
+      for (i = 0; i < n; i++) {
+         COPY_CHAN4(span->array->rgba[i], color);
+      }
+   }
+   else {
+      /* interpolate */
+#if CHAN_TYPE == GL_FLOAT
+      GLfloat r = span->red;
+      GLfloat g = span->green;
+      GLfloat b = span->blue;
+      GLfloat a = span->alpha;
+      const GLfloat dr = span->redStep;
+      const GLfloat dg = span->greenStep;
+      const GLfloat db = span->blueStep;
+      const GLfloat da = span->alphaStep;
+#else
+      GLfixed r = span->red;
+      GLfixed g = span->green;
+      GLfixed b = span->blue;
+      GLfixed a = span->alpha;
+      const GLint dr = span->redStep;
+      const GLint dg = span->greenStep;
+      const GLint db = span->blueStep;
+      const GLint da = span->alphaStep;
+#endif
+      for (i = 0; i < n; i++) {
+         rgba[i][RCOMP] = FixedToChan(r);
+         rgba[i][GCOMP] = FixedToChan(g);
+         rgba[i][BCOMP] = FixedToChan(b);
+         rgba[i][ACOMP] = FixedToChan(a);
+         r += dr;
+         g += dg;
+         b += db;
+         a += da;
+      }
+   }
+   span->arrayMask |= SPAN_RGBA;
+}
+
+
+/* Fill in the span.color.index array from the interpolation values */
+static void
+interpolate_indexes(GLcontext *ctx, struct sw_span *span)
+{
+   GLfixed index = span->index;
+   const GLint indexStep = span->indexStep;
+   const GLuint n = span->end;
+   GLuint *indexes = span->array->index;
+   GLuint i;
+   (void) ctx;
+   ASSERT((span->interpMask & SPAN_INDEX)  &&
+	  !(span->arrayMask & SPAN_INDEX));
+
+   if ((span->interpMask & SPAN_FLAT) || (indexStep == 0)) {
+      /* constant color */
+      index = FixedToInt(index);
+      for (i = 0; i < n; i++) {
+         indexes[i] = index;
+      }
+   }
+   else {
+      /* interpolate */
+      for (i = 0; i < n; i++) {
+         indexes[i] = FixedToInt(index);
+         index += indexStep;
+      }
+   }
+   span->arrayMask |= SPAN_INDEX;
+   span->interpMask &= ~SPAN_INDEX;
+}
+
+
+/* Fill in the span.->array->spec array from the interpolation values */
+static void
+interpolate_specular(GLcontext *ctx, struct sw_span *span)
+{
+   (void) ctx;
+   if (span->interpMask & SPAN_FLAT) {
+      /* constant color */
+      const GLchan r = FixedToChan(span->specRed);
+      const GLchan g = FixedToChan(span->specGreen);
+      const GLchan b = FixedToChan(span->specBlue);
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         span->array->spec[i][RCOMP] = r;
+         span->array->spec[i][GCOMP] = g;
+         span->array->spec[i][BCOMP] = b;
+      }
+   }
+   else {
+      /* interpolate */
+#if CHAN_TYPE == GL_FLOAT
+      GLfloat r = span->specRed;
+      GLfloat g = span->specGreen;
+      GLfloat b = span->specBlue;
+#else
+      GLfixed r = span->specRed;
+      GLfixed g = span->specGreen;
+      GLfixed b = span->specBlue;
+#endif
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         span->array->spec[i][RCOMP] = FixedToChan(r);
+         span->array->spec[i][GCOMP] = FixedToChan(g);
+         span->array->spec[i][BCOMP] = FixedToChan(b);
+         r += span->specRedStep;
+         g += span->specGreenStep;
+         b += span->specBlueStep;
+      }
+   }
+   span->arrayMask |= SPAN_SPEC;
+}
+
+
+/* Fill in the span.array.fog values from the interpolation values */
+static void
+interpolate_fog(const GLcontext *ctx, struct sw_span *span)
+{
+   GLfloat *fog = span->array->fog;
+   const GLfloat fogStep = span->fogStep;
+   GLfloat fogCoord = span->fog;
+   const GLuint haveW = (span->interpMask & SPAN_W);
+   const GLfloat wStep = haveW ? span->dwdx : 0.0F;
+   GLfloat w = haveW ? span->w : 1.0F;
+   GLuint i;
+   for (i = 0; i < span->end; i++) {
+      fog[i] = fogCoord / w;
+      fogCoord += fogStep;
+      w += wStep;
+   }
+   span->arrayMask |= SPAN_FOG;
+}
+
+
+/* Fill in the span.zArray array from the interpolation values */
+void
+_swrast_span_interpolate_z( const GLcontext *ctx, struct sw_span *span )
+{
+   const GLuint n = span->end;
+   GLuint i;
+
+   ASSERT((span->interpMask & SPAN_Z)  &&
+	  !(span->arrayMask & SPAN_Z));
+
+   if (ctx->DrawBuffer->Visual.depthBits <= 16) {
+      GLfixed zval = span->z;
+      GLuint *z = span->array->z; 
+      for (i = 0; i < n; i++) {
+         z[i] = FixedToInt(zval);
+         zval += span->zStep;
+      }
+   }
+   else {
+      /* Deep Z buffer, no fixed->int shift */
+      GLuint zval = span->z;
+      GLuint *z = span->array->z;
+      for (i = 0; i < n; i++) {
+         z[i] = zval;
+         zval += span->zStep;
+      }
+   }
+   span->interpMask &= ~SPAN_Z;
+   span->arrayMask |= SPAN_Z;
+}
+
+
+/*
+ * This the ideal solution, as given in the OpenGL spec.
+ */
+#if 0
+static GLfloat
+compute_lambda(GLfloat dsdx, GLfloat dsdy, GLfloat dtdx, GLfloat dtdy,
+               GLfloat dqdx, GLfloat dqdy, GLfloat texW, GLfloat texH,
+               GLfloat s, GLfloat t, GLfloat q, GLfloat invQ)
+{
+   GLfloat dudx = texW * ((s + dsdx) / (q + dqdx) - s * invQ);
+   GLfloat dvdx = texH * ((t + dtdx) / (q + dqdx) - t * invQ);
+   GLfloat dudy = texW * ((s + dsdy) / (q + dqdy) - s * invQ);
+   GLfloat dvdy = texH * ((t + dtdy) / (q + dqdy) - t * invQ);
+   GLfloat x = SQRTF(dudx * dudx + dvdx * dvdx);
+   GLfloat y = SQRTF(dudy * dudy + dvdy * dvdy);
+   GLfloat rho = MAX2(x, y);
+   GLfloat lambda = LOG2(rho);
+   return lambda;
+}
+#endif
+
+
+/*
+ * This is a faster approximation
+ */
+GLfloat
+_swrast_compute_lambda(GLfloat dsdx, GLfloat dsdy, GLfloat dtdx, GLfloat dtdy,
+                     GLfloat dqdx, GLfloat dqdy, GLfloat texW, GLfloat texH,
+                     GLfloat s, GLfloat t, GLfloat q, GLfloat invQ)
+{
+   GLfloat dsdx2 = (s + dsdx) / (q + dqdx) - s * invQ;
+   GLfloat dtdx2 = (t + dtdx) / (q + dqdx) - t * invQ;
+   GLfloat dsdy2 = (s + dsdy) / (q + dqdy) - s * invQ;
+   GLfloat dtdy2 = (t + dtdy) / (q + dqdy) - t * invQ;
+   GLfloat maxU, maxV, rho, lambda;
+   dsdx2 = FABSF(dsdx2);
+   dsdy2 = FABSF(dsdy2);
+   dtdx2 = FABSF(dtdx2);
+   dtdy2 = FABSF(dtdy2);
+   maxU = MAX2(dsdx2, dsdy2) * texW;
+   maxV = MAX2(dtdx2, dtdy2) * texH;
+   rho = MAX2(maxU, maxV);
+   lambda = LOG2(rho);
+   return lambda;
+}
+
+
+/**
+ * Fill in the span.texcoords array from the interpolation values.
+ * Note: in the places where we divide by Q (or mult by invQ) we're
+ * really doing two things: perspective correction and texcoord
+ * projection.  Remember, for texcoord (s,t,r,q) we need to index
+ * texels with (s/q, t/q, r/q).
+ * If we're using a fragment program, we never do the division
+ * for texcoord projection.  That's done by the TXP instruction
+ * or user-written code.
+ */
+static void
+interpolate_texcoords(GLcontext *ctx, struct sw_span *span)
+{
+   ASSERT(span->interpMask & SPAN_TEXTURE);
+   ASSERT(!(span->arrayMask & SPAN_TEXTURE));
+
+   if (ctx->Texture._EnabledCoordUnits > 1) {
+      /* multitexture */
+      GLuint u;
+      span->arrayMask |= SPAN_TEXTURE;
+      /* XXX CoordUnits vs. ImageUnits */
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         if (ctx->Texture._EnabledCoordUnits & (1 << u)) {
+            const struct gl_texture_object *obj =ctx->Texture.Unit[u]._Current;
+            GLfloat texW, texH;
+            GLboolean needLambda;
+            if (obj) {
+               const struct gl_texture_image *img = obj->Image[0][obj->BaseLevel];
+               needLambda = (obj->MinFilter != obj->MagFilter)
+                  || ctx->FragmentProgram._Active;
+               texW = img->WidthScale;
+               texH = img->HeightScale;
+            }
+            else {
+               /* using a fragment program */
+               texW = 1.0;
+               texH = 1.0;
+               needLambda = GL_FALSE;
+            }
+            if (needLambda) {
+               GLfloat (*texcoord)[4] = span->array->texcoords[u];
+               GLfloat *lambda = span->array->lambda[u];
+               const GLfloat dsdx = span->texStepX[u][0];
+               const GLfloat dsdy = span->texStepY[u][0];
+               const GLfloat dtdx = span->texStepX[u][1];
+               const GLfloat dtdy = span->texStepY[u][1];
+               const GLfloat drdx = span->texStepX[u][2];
+               const GLfloat dqdx = span->texStepX[u][3];
+               const GLfloat dqdy = span->texStepY[u][3];
+               GLfloat s = span->tex[u][0];
+               GLfloat t = span->tex[u][1];
+               GLfloat r = span->tex[u][2];
+               GLfloat q = span->tex[u][3];
+               GLuint i;
+               if (ctx->FragmentProgram._Active || ctx->ATIFragmentShader._Enabled ||
+                   ctx->ShaderObjects._FragmentShaderPresent) {
+                  /* do perspective correction but don't divide s, t, r by q */
+                  const GLfloat dwdx = span->dwdx;
+                  GLfloat w = span->w;
+                  for (i = 0; i < span->end; i++) {
+                     const GLfloat invW = 1.0F / w;
+                     texcoord[i][0] = s * invW;
+                     texcoord[i][1] = t * invW;
+                     texcoord[i][2] = r * invW;
+                     texcoord[i][3] = q * invW;
+                     lambda[i] = _swrast_compute_lambda(dsdx, dsdy, dtdx, dtdy,
+                                                        dqdx, dqdy, texW, texH,
+                                                        s, t, q, invW);
+                     s += dsdx;
+                     t += dtdx;
+                     r += drdx;
+                     q += dqdx;
+                     w += dwdx;
+                  }
+
+               }
+               else {
+                  for (i = 0; i < span->end; i++) {
+                     const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+                     texcoord[i][0] = s * invQ;
+                     texcoord[i][1] = t * invQ;
+                     texcoord[i][2] = r * invQ;
+                     texcoord[i][3] = q;
+                     lambda[i] = _swrast_compute_lambda(dsdx, dsdy, dtdx, dtdy,
+                                                        dqdx, dqdy, texW, texH,
+                                                        s, t, q, invQ);
+                     s += dsdx;
+                     t += dtdx;
+                     r += drdx;
+                     q += dqdx;
+                  }
+               }
+               span->arrayMask |= SPAN_LAMBDA;
+            }
+            else {
+               GLfloat (*texcoord)[4] = span->array->texcoords[u];
+               GLfloat *lambda = span->array->lambda[u];
+               const GLfloat dsdx = span->texStepX[u][0];
+               const GLfloat dtdx = span->texStepX[u][1];
+               const GLfloat drdx = span->texStepX[u][2];
+               const GLfloat dqdx = span->texStepX[u][3];
+               GLfloat s = span->tex[u][0];
+               GLfloat t = span->tex[u][1];
+               GLfloat r = span->tex[u][2];
+               GLfloat q = span->tex[u][3];
+               GLuint i;
+               if (ctx->FragmentProgram._Active || ctx->ATIFragmentShader._Enabled ||
+                   ctx->ShaderObjects._FragmentShaderPresent) {
+                  /* do perspective correction but don't divide s, t, r by q */
+                  const GLfloat dwdx = span->dwdx;
+                  GLfloat w = span->w;
+                  for (i = 0; i < span->end; i++) {
+                     const GLfloat invW = 1.0F / w;
+                     texcoord[i][0] = s * invW;
+                     texcoord[i][1] = t * invW;
+                     texcoord[i][2] = r * invW;
+                     texcoord[i][3] = q * invW;
+                     lambda[i] = 0.0;
+                     s += dsdx;
+                     t += dtdx;
+                     r += drdx;
+                     q += dqdx;
+                     w += dwdx;
+                  }
+               }
+               else if (dqdx == 0.0F) {
+                  /* Ortho projection or polygon's parallel to window X axis */
+                  const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+                  for (i = 0; i < span->end; i++) {
+                     texcoord[i][0] = s * invQ;
+                     texcoord[i][1] = t * invQ;
+                     texcoord[i][2] = r * invQ;
+                     texcoord[i][3] = q;
+                     lambda[i] = 0.0;
+                     s += dsdx;
+                     t += dtdx;
+                     r += drdx;
+                  }
+               }
+               else {
+                  for (i = 0; i < span->end; i++) {
+                     const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+                     texcoord[i][0] = s * invQ;
+                     texcoord[i][1] = t * invQ;
+                     texcoord[i][2] = r * invQ;
+                     texcoord[i][3] = q;
+                     lambda[i] = 0.0;
+                     s += dsdx;
+                     t += dtdx;
+                     r += drdx;
+                     q += dqdx;
+                  }
+               }
+            } /* lambda */
+         } /* if */
+      } /* for */
+   }
+   else {
+      /* single texture */
+      const struct gl_texture_object *obj = ctx->Texture.Unit[0]._Current;
+      GLfloat texW, texH;
+      GLboolean needLambda;
+      if (obj) {
+         const struct gl_texture_image *img = obj->Image[0][obj->BaseLevel];
+         needLambda = (obj->MinFilter != obj->MagFilter)
+            || ctx->FragmentProgram._Active;
+         texW = (GLfloat) img->WidthScale;
+         texH = (GLfloat) img->HeightScale;
+      }
+      else {
+         needLambda = GL_FALSE;
+         texW = texH = 1.0;
+      }
+      span->arrayMask |= SPAN_TEXTURE;
+      if (needLambda) {
+         /* just texture unit 0, with lambda */
+         GLfloat (*texcoord)[4] = span->array->texcoords[0];
+         GLfloat *lambda = span->array->lambda[0];
+         const GLfloat dsdx = span->texStepX[0][0];
+         const GLfloat dsdy = span->texStepY[0][0];
+         const GLfloat dtdx = span->texStepX[0][1];
+         const GLfloat dtdy = span->texStepY[0][1];
+         const GLfloat drdx = span->texStepX[0][2];
+         const GLfloat dqdx = span->texStepX[0][3];
+         const GLfloat dqdy = span->texStepY[0][3];
+         GLfloat s = span->tex[0][0];
+         GLfloat t = span->tex[0][1];
+         GLfloat r = span->tex[0][2];
+         GLfloat q = span->tex[0][3];
+         GLuint i;
+         if (ctx->FragmentProgram._Active || ctx->ATIFragmentShader._Enabled ||
+             ctx->ShaderObjects._FragmentShaderPresent) {
+            /* do perspective correction but don't divide s, t, r by q */
+            const GLfloat dwdx = span->dwdx;
+            GLfloat w = span->w;
+            for (i = 0; i < span->end; i++) {
+               const GLfloat invW = 1.0F / w;
+               texcoord[i][0] = s * invW;
+               texcoord[i][1] = t * invW;
+               texcoord[i][2] = r * invW;
+               texcoord[i][3] = q * invW;
+               lambda[i] = _swrast_compute_lambda(dsdx, dsdy, dtdx, dtdy,
+                                                  dqdx, dqdy, texW, texH,
+                                                  s, t, q, invW);
+               s += dsdx;
+               t += dtdx;
+               r += drdx;
+               q += dqdx;
+               w += dwdx;
+            }
+         }
+         else {
+            /* tex.c */
+            for (i = 0; i < span->end; i++) {
+               const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+               lambda[i] = _swrast_compute_lambda(dsdx, dsdy, dtdx, dtdy,
+                                                dqdx, dqdy, texW, texH,
+                                                s, t, q, invQ);
+               texcoord[i][0] = s * invQ;
+               texcoord[i][1] = t * invQ;
+               texcoord[i][2] = r * invQ;
+               texcoord[i][3] = q;
+               s += dsdx;
+               t += dtdx;
+               r += drdx;
+               q += dqdx;
+            }
+         }
+         span->arrayMask |= SPAN_LAMBDA;
+      }
+      else {
+         /* just texture 0, without lambda */
+         GLfloat (*texcoord)[4] = span->array->texcoords[0];
+         const GLfloat dsdx = span->texStepX[0][0];
+         const GLfloat dtdx = span->texStepX[0][1];
+         const GLfloat drdx = span->texStepX[0][2];
+         const GLfloat dqdx = span->texStepX[0][3];
+         GLfloat s = span->tex[0][0];
+         GLfloat t = span->tex[0][1];
+         GLfloat r = span->tex[0][2];
+         GLfloat q = span->tex[0][3];
+         GLuint i;
+         if (ctx->FragmentProgram._Active || ctx->ATIFragmentShader._Enabled ||
+             ctx->ShaderObjects._FragmentShaderPresent) {
+            /* do perspective correction but don't divide s, t, r by q */
+            const GLfloat dwdx = span->dwdx;
+            GLfloat w = span->w;
+            for (i = 0; i < span->end; i++) {
+               const GLfloat invW = 1.0F / w;
+               texcoord[i][0] = s * invW;
+               texcoord[i][1] = t * invW;
+               texcoord[i][2] = r * invW;
+               texcoord[i][3] = q * invW;
+               s += dsdx;
+               t += dtdx;
+               r += drdx;
+               q += dqdx;
+               w += dwdx;
+            }
+         }
+         else if (dqdx == 0.0F) {
+            /* Ortho projection or polygon's parallel to window X axis */
+            const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+            for (i = 0; i < span->end; i++) {
+               texcoord[i][0] = s * invQ;
+               texcoord[i][1] = t * invQ;
+               texcoord[i][2] = r * invQ;
+               texcoord[i][3] = q;
+               s += dsdx;
+               t += dtdx;
+               r += drdx;
+            }
+         }
+         else {
+            for (i = 0; i < span->end; i++) {
+               const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+               texcoord[i][0] = s * invQ;
+               texcoord[i][1] = t * invQ;
+               texcoord[i][2] = r * invQ;
+               texcoord[i][3] = q;
+               s += dsdx;
+               t += dtdx;
+               r += drdx;
+               q += dqdx;
+            }
+         }
+      }
+   }
+}
+
+
+/**
+ * Fill in the span.varying array from the interpolation values.
+ */
+static void
+interpolate_varying(GLcontext *ctx, struct sw_span *span)
+{
+   GLuint i, j;
+
+   ASSERT(span->interpMask & SPAN_VARYING);
+   ASSERT(!(span->arrayMask & SPAN_VARYING));
+
+   span->arrayMask |= SPAN_VARYING;
+
+   for (i = 0; i < MAX_VARYING_VECTORS; i++) {
+      for (j = 0; j < VARYINGS_PER_VECTOR; j++) {
+         const GLfloat dvdx = span->varStepX[i][j];
+         GLfloat v = span->var[i][j];
+         const GLfloat dwdx = span->dwdx;
+         GLfloat w = span->w;
+         GLuint k;
+
+         for (k = 0; k < span->end; k++) {
+            GLfloat invW = 1.0f / w;
+            span->array->varying[k][i][j] = v * invW;
+            v += dvdx;
+            w += dwdx;
+         }
+      }
+   }
+}
+
+
+/**
+ * Apply the current polygon stipple pattern to a span of pixels.
+ */
+static void
+stipple_polygon_span( GLcontext *ctx, struct sw_span *span )
+{
+   const GLuint highbit = 0x80000000;
+   const GLuint stipple = ctx->PolygonStipple[span->y % 32];
+   GLubyte *mask = span->array->mask;
+   GLuint i, m;
+
+   ASSERT(ctx->Polygon.StippleFlag);
+   ASSERT((span->arrayMask & SPAN_XY) == 0);
+
+   m = highbit >> (GLuint) (span->x % 32);
+
+   for (i = 0; i < span->end; i++) {
+      if ((m & stipple) == 0) {
+	 mask[i] = 0;
+      }
+      m = m >> 1;
+      if (m == 0) {
+         m = highbit;
+      }
+   }
+   span->writeAll = GL_FALSE;
+}
+
+
+/**
+ * Clip a pixel span to the current buffer/window boundaries:
+ * DrawBuffer->_Xmin, _Xmax, _Ymin, _Ymax.  This will accomplish
+ * window clipping and scissoring.
+ * Return:   GL_TRUE   some pixels still visible
+ *           GL_FALSE  nothing visible
+ */
+static GLuint
+clip_span( GLcontext *ctx, struct sw_span *span )
+{
+   const GLint xmin = ctx->DrawBuffer->_Xmin;
+   const GLint xmax = ctx->DrawBuffer->_Xmax;
+   const GLint ymin = ctx->DrawBuffer->_Ymin;
+   const GLint ymax = ctx->DrawBuffer->_Ymax;
+
+   if (span->arrayMask & SPAN_XY) {
+      /* arrays of x/y pixel coords */
+      const GLint *x = span->array->x;
+      const GLint *y = span->array->y;
+      const GLint n = span->end;
+      GLubyte *mask = span->array->mask;
+      GLint i;
+      if (span->arrayMask & SPAN_MASK) {
+         /* note: using & intead of && to reduce branches */
+         for (i = 0; i < n; i++) {
+            mask[i] &= (x[i] >= xmin) & (x[i] < xmax)
+                     & (y[i] >= ymin) & (y[i] < ymax);
+         }
+      }
+      else {
+         /* note: using & intead of && to reduce branches */
+         for (i = 0; i < n; i++) {
+            mask[i] = (x[i] >= xmin) & (x[i] < xmax)
+                    & (y[i] >= ymin) & (y[i] < ymax);
+         }
+      }
+      return GL_TRUE;  /* some pixels visible */
+   }
+   else {
+      /* horizontal span of pixels */
+      const GLint x = span->x;
+      const GLint y = span->y;
+      const GLint n = span->end;
+
+      /* Trivial rejection tests */
+      if (y < ymin || y >= ymax || x + n <= xmin || x >= xmax) {
+         span->end = 0;
+         return GL_FALSE;  /* all pixels clipped */
+      }
+
+      /* Clip to the left */
+      if (x < xmin) {
+         ASSERT(x + n > xmin);
+         span->writeAll = GL_FALSE;
+         _mesa_bzero(span->array->mask, (xmin - x) * sizeof(GLubyte));
+      }
+
+      /* Clip to right */
+      if (x + n > xmax) {
+         ASSERT(x < xmax);
+         span->end = xmax - x;
+      }
+
+      return GL_TRUE;  /* some pixels visible */
+   }
+}
+
+
+/**
+ * Apply all the per-fragment opertions to a span of color index fragments
+ * and write them to the enabled color drawbuffers.
+ * The 'span' parameter can be considered to be const.  Note that
+ * span->interpMask and span->arrayMask may be changed but will be restored
+ * to their original values before returning.
+ */
+void
+_swrast_write_index_span( GLcontext *ctx, struct sw_span *span)
+{
+   const SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const GLuint output = 0;
+   const GLbitfield origInterpMask = span->interpMask;
+   const GLbitfield origArrayMask = span->arrayMask;
+   GLuint buf;
+
+   ASSERT(span->end <= MAX_WIDTH);
+   ASSERT(span->primitive == GL_POINT  ||  span->primitive == GL_LINE ||
+	  span->primitive == GL_POLYGON  ||  span->primitive == GL_BITMAP);
+   ASSERT((span->interpMask | span->arrayMask) & SPAN_INDEX);
+   ASSERT((span->interpMask & span->arrayMask) == 0);
+
+   if (span->arrayMask & SPAN_MASK) {
+      /* mask was initialized by caller, probably glBitmap */
+      span->writeAll = GL_FALSE;
+   }
+   else {
+      _mesa_memset(span->array->mask, 1, span->end);
+      span->writeAll = GL_TRUE;
+   }
+
+   /* Clipping */
+   if ((swrast->_RasterMask & CLIP_BIT) || (span->primitive != GL_POLYGON)) {
+      if (!clip_span(ctx, span)) {
+         return;
+      }
+   }
+
+   /* Depth bounds test */
+   if (ctx->Depth.BoundsTest && ctx->DrawBuffer->Visual.depthBits > 0) {
+      if (!_swrast_depth_bounds_test(ctx, span)) {
+         return;
+      }
+   }
+
+#ifdef DEBUG
+   /* Make sure all fragments are within window bounds */
+   if (span->arrayMask & SPAN_XY) {
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         if (span->array->mask[i]) {
+            assert(span->array->x[i] >= ctx->DrawBuffer->_Xmin);
+            assert(span->array->x[i] < ctx->DrawBuffer->_Xmax);
+            assert(span->array->y[i] >= ctx->DrawBuffer->_Ymin);
+            assert(span->array->y[i] < ctx->DrawBuffer->_Ymax);
+         }
+      }
+   }
+#endif
+
+   /* Polygon Stippling */
+   if (ctx->Polygon.StippleFlag && span->primitive == GL_POLYGON) {
+      stipple_polygon_span(ctx, span);
+   }
+
+   /* Stencil and Z testing */
+   if (ctx->Depth.Test || ctx->Stencil.Enabled) {
+      if (span->interpMask & SPAN_Z)
+         _swrast_span_interpolate_z(ctx, span);
+
+      if (ctx->Stencil.Enabled) {
+         if (!_swrast_stencil_and_ztest_span(ctx, span)) {
+            span->arrayMask = origArrayMask;
+            return;
+         }
+      }
+      else {
+         ASSERT(ctx->Depth.Test);
+         if (!_swrast_depth_test_span(ctx, span)) {
+            span->interpMask = origInterpMask;
+            span->arrayMask = origArrayMask;
+            return;
+         }
+      }
+   }
+
+#if FEATURE_ARB_occlusion_query
+   if (ctx->Query.CurrentOcclusionObject) {
+      /* update count of 'passed' fragments */
+      struct gl_query_object *q = ctx->Query.CurrentOcclusionObject;
+      GLuint i;
+      for (i = 0; i < span->end; i++)
+         q->Result += span->array->mask[i];
+   }
+#endif
+
+   /* we have to wait until after occlusion to do this test */
+   if (ctx->Color.DrawBuffer == GL_NONE || ctx->Color.IndexMask == 0) {
+      /* write no pixels */
+      span->arrayMask = origArrayMask;
+      return;
+   }
+
+   /* Interpolate the color indexes if needed */
+   if (swrast->_FogEnabled ||
+       ctx->Color.IndexLogicOpEnabled ||
+       ctx->Color.IndexMask != 0xffffffff ||
+       (span->arrayMask & SPAN_COVERAGE)) {
+      if (span->interpMask & SPAN_INDEX) {
+         interpolate_indexes(ctx, span);
+      }
+   }
+
+   /* Fog */
+   if (swrast->_FogEnabled) {
+      _swrast_fog_ci_span(ctx, span);
+   }
+
+   /* Antialias coverage application */
+   if (span->arrayMask & SPAN_COVERAGE) {
+      const GLfloat *coverage = span->array->coverage;
+      GLuint *index = span->array->index;
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         ASSERT(coverage[i] < 16);
+         index[i] = (index[i] & ~0xf) | ((GLuint) coverage[i]);
+      }
+   }
+
+   /* Loop over drawing buffers */
+   for (buf = 0; buf < fb->_NumColorDrawBuffers[output]; buf++) {
+      struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[output][buf];
+      GLuint indexTemp[MAX_WIDTH], *index32;
+
+      ASSERT(rb->_BaseFormat == GL_COLOR_INDEX);
+
+      if (ctx->Color.IndexLogicOpEnabled ||
+          ctx->Color.IndexMask != 0xffffffff) {
+         /* make copy of incoming indexes */
+         MEMCPY(indexTemp, span->array->index, span->end * sizeof(GLuint));
+
+         if (ctx->Color.IndexLogicOpEnabled) {
+            _swrast_logicop_ci_span(ctx, rb, span, indexTemp);
+         }
+
+         if (ctx->Color.IndexMask != 0xffffffff) {
+            _swrast_mask_ci_span(ctx, rb, span, indexTemp);
+         }
+         index32 = indexTemp;
+      }
+      else {
+         index32 = span->array->index;
+      }
+
+      if ((span->interpMask & SPAN_INDEX) && span->indexStep == 0) {
+         /* all fragments have same color index */
+         GLubyte index8;
+         GLushort index16;
+         GLuint index32;
+         void *value;
+
+         if (rb->DataType == GL_UNSIGNED_BYTE) {
+            index8 = FixedToInt(span->index);
+            value = &index8;
+         }
+         else if (rb->DataType == GL_UNSIGNED_SHORT) {
+            index16 = FixedToInt(span->index);
+            value = &index16;
+         }
+         else {
+            ASSERT(rb->DataType == GL_UNSIGNED_INT);
+            index32 = FixedToInt(span->index);
+            value = &index32;
+         }
+
+         if (span->arrayMask & SPAN_XY) {
+            rb->PutMonoValues(ctx, rb, span->end, span->array->x, 
+                              span->array->y, value, span->array->mask);
+         }
+         else {
+            rb->PutMonoRow(ctx, rb, span->end, span->x, span->y,
+                           value, span->array->mask);
+         }
+      }
+      else {
+         /* each fragment is a different color */
+         GLubyte index8[MAX_WIDTH];
+         GLushort index16[MAX_WIDTH];
+         void *values;
+
+         if (rb->DataType == GL_UNSIGNED_BYTE) {
+            GLuint k;
+            for (k = 0; k < span->end; k++) {
+               index8[k] = (GLubyte) index32[k];
+            }
+            values = index8;
+         }
+         else if (rb->DataType == GL_UNSIGNED_SHORT) {
+            GLuint k;
+            for (k = 0; k < span->end; k++) {
+               index16[k] = (GLushort) index32[k];
+            }
+            values = index16;
+         }
+         else {
+            ASSERT(rb->DataType == GL_UNSIGNED_INT);
+            values = index32;
+         }
+
+         if (span->arrayMask & SPAN_XY) {
+            rb->PutValues(ctx, rb, span->end, span->array->x, span->array->y,
+                          values, span->array->mask);
+         }
+         else {
+            rb->PutRow(ctx, rb, span->end, span->x, span->y,
+                       values, span->array->mask);
+         }
+      }
+   }
+
+   span->interpMask = origInterpMask;
+   span->arrayMask = origArrayMask;
+}
+
+
+/**
+ * Add specular color to base color.  This is used only when
+ * GL_LIGHT_MODEL_COLOR_CONTROL = GL_SEPARATE_SPECULAR_COLOR.
+ */
+static void
+add_colors(GLuint n, GLchan rgba[][4], GLchan specular[][4] )
+{
+   GLuint i;
+   for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+      /* no clamping */
+      rgba[i][RCOMP] += specular[i][RCOMP];
+      rgba[i][GCOMP] += specular[i][GCOMP];
+      rgba[i][BCOMP] += specular[i][BCOMP];
+#else
+      GLint r = rgba[i][RCOMP] + specular[i][RCOMP];
+      GLint g = rgba[i][GCOMP] + specular[i][GCOMP];
+      GLint b = rgba[i][BCOMP] + specular[i][BCOMP];
+      rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
+      rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
+      rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
+   }
+}
+
+
+/**
+ * XXX merge this code into the _swrast_write_rgba_span() routine!
+ *
+ * Draw to more than one RGBA color buffer (or none).
+ * All fragment operations, up to (but not) blending/logicop should
+ * have been done first.
+ */
+static void
+multi_write_rgba_span( GLcontext *ctx, struct sw_span *span )
+{
+   const GLuint colorMask = *((GLuint *) ctx->Color.ColorMask);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const GLuint output = 0;
+   GLuint i;
+
+   ASSERT(span->end < MAX_WIDTH);
+   ASSERT(colorMask != 0x0);
+
+   for (i = 0; i < fb->_NumColorDrawBuffers[output]; i++) {
+      struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[output][i];
+      GLchan rgbaTmp[MAX_WIDTH][4];
+
+      /* make copy of incoming colors */
+      MEMCPY( rgbaTmp, span->array->rgba, 4 * span->end * sizeof(GLchan) );
+
+      if (ctx->Color._LogicOpEnabled) {
+         _swrast_logicop_rgba_span(ctx, rb, span, rgbaTmp);
+      }
+      else if (ctx->Color.BlendEnabled) {
+         _swrast_blend_span(ctx, rb, span, rgbaTmp);
+      }
+
+      if (colorMask != 0xffffffff) {
+         _swrast_mask_rgba_span(ctx, rb, span, rgbaTmp);
+      }
+
+      if (span->arrayMask & SPAN_XY) {
+         /* array of pixel coords */
+         ASSERT(rb->PutValues);
+         rb->PutValues(ctx, rb, span->end, span->array->x,
+                       span->array->y, rgbaTmp, span->array->mask);
+      }
+      else {
+         /* horizontal run of pixels */
+         ASSERT(rb->PutRow);
+         rb->PutRow(ctx, rb, span->end, span->x, span->y, rgbaTmp,
+                    span->array->mask);
+      }
+   }
+}
+
+
+/**
+ * Apply all the per-fragment operations to a span.
+ * This now includes texturing (_swrast_write_texture_span() is history).
+ * This function may modify any of the array values in the span.
+ * span->interpMask and span->arrayMask may be changed but will be restored
+ * to their original values before returning.
+ */
+void
+_swrast_write_rgba_span( GLcontext *ctx, struct sw_span *span)
+{
+   const GLuint colorMask = *((GLuint *) ctx->Color.ColorMask);
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLbitfield origInterpMask = span->interpMask;
+   const GLbitfield origArrayMask = span->arrayMask;
+   const GLboolean deferredTexture = !(ctx->Color.AlphaEnabled ||
+                                       ctx->FragmentProgram._Active ||
+                                       ctx->ShaderObjects._FragmentShaderPresent);
+
+   ASSERT(span->primitive == GL_POINT  ||  span->primitive == GL_LINE ||
+	  span->primitive == GL_POLYGON  ||  span->primitive == GL_BITMAP);
+   ASSERT(span->end <= MAX_WIDTH);
+   ASSERT((span->interpMask & span->arrayMask) == 0);
+
+   /*
+   printf("%s()  interp 0x%x  array 0x%x\n", __FUNCTION__,
+          span->interpMask, span->arrayMask);
+   */
+
+   if (span->arrayMask & SPAN_MASK) {
+      /* mask was initialized by caller, probably glBitmap */
+      span->writeAll = GL_FALSE;
+   }
+   else {
+      _mesa_memset(span->array->mask, 1, span->end);
+      span->writeAll = GL_TRUE;
+   }
+
+   /* Clip to window/scissor box */
+   if ((swrast->_RasterMask & CLIP_BIT) || (span->primitive != GL_POLYGON)) {
+      if (!clip_span(ctx, span)) {
+	 return;
+      }
+   }
+
+#ifdef DEBUG
+   /* Make sure all fragments are within window bounds */
+   if (span->arrayMask & SPAN_XY) {
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         if (span->array->mask[i]) {
+            assert(span->array->x[i] >= ctx->DrawBuffer->_Xmin);
+            assert(span->array->x[i] < ctx->DrawBuffer->_Xmax);
+            assert(span->array->y[i] >= ctx->DrawBuffer->_Ymin);
+            assert(span->array->y[i] < ctx->DrawBuffer->_Ymax);
+         }
+      }
+   }
+#endif
+
+   /* Polygon Stippling */
+   if (ctx->Polygon.StippleFlag && span->primitive == GL_POLYGON) {
+      stipple_polygon_span(ctx, span);
+   }
+
+   /* Interpolate texcoords? */
+   if (ctx->Texture._EnabledCoordUnits
+       && (span->interpMask & SPAN_TEXTURE)
+       && (span->arrayMask & SPAN_TEXTURE) == 0) {
+      interpolate_texcoords(ctx, span);
+   }
+
+   if (ctx->ShaderObjects._FragmentShaderPresent) {
+      interpolate_varying(ctx, span);
+   }
+
+   /* This is the normal place to compute the resulting fragment color/Z.
+    * As an optimization, we try to defer this until after Z/stencil
+    * testing in order to try to avoid computing colors that we won't
+    * actually need.
+    */
+   if (!deferredTexture) {
+      /* Now we need the rgba array, fill it in if needed */
+      if ((span->interpMask & SPAN_RGBA) && (span->arrayMask & SPAN_RGBA) == 0)
+         interpolate_colors(ctx, span);
+
+      if (span->interpMask & SPAN_SPEC)
+         interpolate_specular(ctx, span);
+
+      if (span->interpMask & SPAN_FOG)
+         interpolate_fog(ctx, span);
+
+      /* Compute fragment colors with fragment program or texture lookups */
+#if FEATURE_ARB_fragment_shader
+      if (ctx->ShaderObjects._FragmentShaderPresent) {
+         if (span->interpMask & SPAN_Z)
+            _swrast_span_interpolate_z (ctx, span);
+         _swrast_exec_arbshader (ctx, span);
+      }
+      else
+#endif
+      if (ctx->FragmentProgram._Active) {
+         /* frag prog may need Z values */
+         if (span->interpMask & SPAN_Z)
+            _swrast_span_interpolate_z(ctx, span);
+         _swrast_exec_fragment_program( ctx, span );
+      }
+      else if (ctx->ATIFragmentShader._Enabled)
+         _swrast_exec_fragment_shader( ctx, span );
+      else if (ctx->Texture._EnabledUnits && (span->arrayMask & SPAN_TEXTURE))
+         _swrast_texture_span( ctx, span );
+
+      /* Do the alpha test */
+      if (ctx->Color.AlphaEnabled) {
+         if (!_swrast_alpha_test(ctx, span)) {
+            span->arrayMask = origArrayMask;
+	    return;
+	 }
+      }
+   }
+
+   /* Stencil and Z testing */
+   if (ctx->Stencil.Enabled || ctx->Depth.Test) {
+      if (span->interpMask & SPAN_Z)
+         _swrast_span_interpolate_z(ctx, span);
+
+      if (ctx->Stencil.Enabled && ctx->DrawBuffer->Visual.stencilBits > 0) {
+         /* Combined Z/stencil tests */
+         if (!_swrast_stencil_and_ztest_span(ctx, span)) {
+            span->interpMask = origInterpMask;
+            span->arrayMask = origArrayMask;
+            return;
+         }
+      }
+      else if (ctx->DrawBuffer->Visual.depthBits > 0) {
+         /* Just regular depth testing */
+         ASSERT(ctx->Depth.Test);
+         ASSERT(span->arrayMask & SPAN_Z);
+         if (!_swrast_depth_test_span(ctx, span)) {
+            span->interpMask = origInterpMask;
+            span->arrayMask = origArrayMask;
+            return;
+         }
+      }
+   }
+
+#if FEATURE_ARB_occlusion_query
+   if (ctx->Query.CurrentOcclusionObject) {
+      /* update count of 'passed' fragments */
+      struct gl_query_object *q = ctx->Query.CurrentOcclusionObject;
+      GLuint i;
+      for (i = 0; i < span->end; i++)
+         q->Result += span->array->mask[i];
+   }
+#endif
+
+   /* We had to wait until now to check for glColorMask(0,0,0,0) because of
+    * the occlusion test.
+    */
+   if (colorMask == 0x0) {
+      span->interpMask = origInterpMask;
+      span->arrayMask = origArrayMask;
+      return;
+   }
+
+   /* If we were able to defer fragment color computation to now, there's
+    * a good chance that many fragments will have already been killed by
+    * Z/stencil testing.
+    */
+   if (deferredTexture) {
+      /* Now we need the rgba array, fill it in if needed */
+      if ((span->interpMask & SPAN_RGBA) && (span->arrayMask & SPAN_RGBA) == 0)
+         interpolate_colors(ctx, span);
+
+      if (span->interpMask & SPAN_SPEC)
+         interpolate_specular(ctx, span);
+
+      if (span->interpMask & SPAN_FOG)
+         interpolate_fog(ctx, span);
+
+#if FEATURE_ARB_fragment_shader
+      if (ctx->ShaderObjects._FragmentShaderPresent) {
+         if (span->interpMask & SPAN_Z)
+            _swrast_span_interpolate_z (ctx, span);
+         _swrast_exec_arbshader (ctx, span);
+      }
+      else
+#endif
+      if (ctx->FragmentProgram._Active)
+         _swrast_exec_fragment_program( ctx, span );
+      else if (ctx->ATIFragmentShader._Enabled)
+         _swrast_exec_fragment_shader( ctx, span );
+      else if (ctx->Texture._EnabledUnits && (span->arrayMask & SPAN_TEXTURE))
+         _swrast_texture_span( ctx, span );
+   }
+
+   ASSERT(span->arrayMask & SPAN_RGBA);
+
+   if (!ctx->FragmentProgram._Enabled) {
+      /* Add base and specular colors */
+      if (ctx->Fog.ColorSumEnabled ||
+          (ctx->Light.Enabled &&
+           ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)) {
+         if (span->interpMask & SPAN_SPEC) {
+            interpolate_specular(ctx, span);
+         }
+         if (span->arrayMask & SPAN_SPEC) {
+            add_colors( span->end, span->array->rgba, span->array->spec );
+         }
+         else {
+            /* We probably added the base/specular colors during the
+             * vertex stage!
+             */
+         }
+      }
+   }
+
+   /* Fog */
+   if (swrast->_FogEnabled) {
+      _swrast_fog_rgba_span(ctx, span);
+   }
+
+   /* Antialias coverage application */
+   if (span->arrayMask & SPAN_COVERAGE) {
+      GLchan (*rgba)[4] = span->array->rgba;
+      GLfloat *coverage = span->array->coverage;
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         rgba[i][ACOMP] = (GLchan) (rgba[i][ACOMP] * coverage[i]);
+      }
+   }
+
+   /* Clamp color/alpha values over the range [0.0, 1.0] before storage */
+#if CHAN_TYPE == GL_FLOAT
+   if (ctx->Color.ClampFragmentColor) {
+      GLchan (*rgba)[4] = span->array->rgba;
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         rgba[i][RCOMP] = CLAMP(rgba[i][RCOMP], 0.0, CHAN_MAXF);
+         rgba[i][GCOMP] = CLAMP(rgba[i][GCOMP], 0.0, CHAN_MAXF);
+         rgba[i][BCOMP] = CLAMP(rgba[i][BCOMP], 0.0, CHAN_MAXF);
+         rgba[i][ACOMP] = CLAMP(rgba[i][ACOMP], 0.0, CHAN_MAXF);
+      }
+   }
+#endif
+
+   if (swrast->_RasterMask & MULTI_DRAW_BIT) {
+      /* need to do blend/logicop separately for each color buffer */
+      multi_write_rgba_span(ctx, span);
+   }
+   else {
+      /* normal: write to exactly one buffer */
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];
+
+      if (ctx->Color._LogicOpEnabled) {
+         _swrast_logicop_rgba_span(ctx, rb, span, span->array->rgba);
+      }
+      else if (ctx->Color.BlendEnabled) {
+         _swrast_blend_span(ctx, rb, span, span->array->rgba);
+      }
+
+      /* Color component masking */
+      if (colorMask != 0xffffffff) {
+         _swrast_mask_rgba_span(ctx, rb, span, span->array->rgba);
+      }
+
+      /* Finally, write the pixels to a color buffer */
+      if (span->arrayMask & SPAN_XY) {
+         /* array of pixel coords */
+         ASSERT(rb->PutValues);
+         ASSERT(rb->_BaseFormat == GL_RGB || rb->_BaseFormat == GL_RGBA);
+         /* XXX check datatype */
+         rb->PutValues(ctx, rb, span->end, span->array->x, span->array->y,
+                       span->array->rgba, span->array->mask);
+      }
+      else {
+         /* horizontal run of pixels */
+         ASSERT(rb->PutRow);
+         ASSERT(rb->_BaseFormat == GL_RGB || rb->_BaseFormat == GL_RGBA);
+         /* XXX check datatype */
+         rb->PutRow(ctx, rb, span->end, span->x, span->y, span->array->rgba,
+                    span->writeAll ? NULL : span->array->mask);
+      }
+   }
+
+   span->interpMask = origInterpMask;
+   span->arrayMask = origArrayMask;
+}
+
+
+
+/**
+ * Read RGBA pixels from frame buffer.  Clipping will be done to prevent
+ * reading ouside the buffer's boundaries.
+ */
+void
+_swrast_read_rgba_span( GLcontext *ctx, struct gl_renderbuffer *rb,
+                        GLuint n, GLint x, GLint y, GLchan rgba[][4] )
+{
+   const GLint bufWidth = (GLint) rb->Width;
+   const GLint bufHeight = (GLint) rb->Height;
+
+   if (y < 0 || y >= bufHeight || x + (GLint) n < 0 || x >= bufWidth) {
+      /* completely above, below, or right */
+      /* XXX maybe leave rgba values undefined? */
+      _mesa_bzero(rgba, 4 * n * sizeof(GLchan));
+   }
+   else {
+      GLint skip, length;
+      if (x < 0) {
+         /* left edge clipping */
+         skip = -x;
+         length = (GLint) n - skip;
+         if (length < 0) {
+            /* completely left of window */
+            return;
+         }
+         if (length > bufWidth) {
+            length = bufWidth;
+         }
+      }
+      else if ((GLint) (x + n) > bufWidth) {
+         /* right edge clipping */
+         skip = 0;
+         length = bufWidth - x;
+         if (length < 0) {
+            /* completely to right of window */
+            return;
+         }
+      }
+      else {
+         /* no clipping */
+         skip = 0;
+         length = (GLint) n;
+      }
+
+      ASSERT(rb);
+      ASSERT(rb->GetRow);
+      ASSERT(rb->_BaseFormat == GL_RGB || rb->_BaseFormat == GL_RGBA);
+      ASSERT(rb->DataType == CHAN_TYPE);
+      rb->GetRow(ctx, rb, length, x + skip, y, rgba + skip);
+   }
+}
+
+
+/**
+ * Read CI pixels from frame buffer.  Clipping will be done to prevent
+ * reading ouside the buffer's boundaries.
+ */
+void
+_swrast_read_index_span( GLcontext *ctx, struct gl_renderbuffer *rb,
+                         GLuint n, GLint x, GLint y, GLuint index[] )
+{
+   const GLint bufWidth = (GLint) rb->Width;
+   const GLint bufHeight = (GLint) rb->Height;
+
+   if (y < 0 || y >= bufHeight || x + (GLint) n < 0 || x >= bufWidth) {
+      /* completely above, below, or right */
+      _mesa_bzero(index, n * sizeof(GLuint));
+   }
+   else {
+      GLint skip, length;
+      if (x < 0) {
+         /* left edge clipping */
+         skip = -x;
+         length = (GLint) n - skip;
+         if (length < 0) {
+            /* completely left of window */
+            return;
+         }
+         if (length > bufWidth) {
+            length = bufWidth;
+         }
+      }
+      else if ((GLint) (x + n) > bufWidth) {
+         /* right edge clipping */
+         skip = 0;
+         length = bufWidth - x;
+         if (length < 0) {
+            /* completely to right of window */
+            return;
+         }
+      }
+      else {
+         /* no clipping */
+         skip = 0;
+         length = (GLint) n;
+      }
+
+      ASSERT(rb->GetRow);
+      ASSERT(rb->_BaseFormat == GL_COLOR_INDEX);
+
+      if (rb->DataType == GL_UNSIGNED_BYTE) {
+         GLubyte index8[MAX_WIDTH];
+         GLint i;
+         rb->GetRow(ctx, rb, length, x + skip, y, index8);
+         for (i = 0; i < length; i++)
+            index[skip + i] = index8[i];
+      }
+      else if (rb->DataType == GL_UNSIGNED_SHORT) {
+         GLushort index16[MAX_WIDTH];
+         GLint i;
+         rb->GetRow(ctx, rb, length, x + skip, y, index16);
+         for (i = 0; i < length; i++)
+            index[skip + i] = index16[i];
+      }
+      else if (rb->DataType == GL_UNSIGNED_INT) {
+         rb->GetRow(ctx, rb, length, x + skip, y, index + skip);
+      }
+   }
+}
+
+
+/**
+ * Wrapper for gl_renderbuffer::GetValues() which does clipping to avoid
+ * reading values outside the buffer bounds.
+ * We can use this for reading any format/type of renderbuffer.
+ * \param valueSize is the size in bytes of each value put into the
+ *                  values array.
+ */
+void
+_swrast_get_values(GLcontext *ctx, struct gl_renderbuffer *rb,
+                   GLuint count, const GLint x[], const GLint y[],
+                   void *values, GLuint valueSize)
+{
+   GLuint i, inCount = 0, inStart = 0;
+
+   for (i = 0; i < count; i++) {
+      if (x[i] >= 0 && y[i] >= 0 && x[i] < rb->Width && y[i] < rb->Height) {
+         /* inside */
+         if (inCount == 0)
+            inStart = i;
+         inCount++;
+      }
+      else {
+         if (inCount > 0) {
+            /* read [inStart, inStart + inCount) */
+            rb->GetValues(ctx, rb, inCount, x + inStart, y + inStart,
+                          (GLubyte *) values + inStart * valueSize);
+            inCount = 0;
+         }
+      }
+   }
+   if (inCount > 0) {
+      /* read last values */
+      rb->GetValues(ctx, rb, inCount, x + inStart, y + inStart,
+                    (GLubyte *) values + inStart * valueSize);
+   }
+}
+
+
+/**
+ * Wrapper for gl_renderbuffer::PutRow() which does clipping.
+ */
+void
+_swrast_put_row(GLcontext *ctx, struct gl_renderbuffer *rb,
+                GLuint count, GLint x, GLint y,
+                const GLvoid *values, GLuint valueSize)
+{
+   GLint skip = 0;
+
+   if (y < 0 || y >= rb->Height)
+      return; /* above or below */
+
+   if (x + (GLint) count <= 0 || x >= rb->Width)
+      return; /* entirely left or right */
+
+   if (x + count > rb->Width) {
+      /* right clip */
+      GLint clip = x + count - rb->Width;
+      count -= clip;
+   }
+
+   if (x < 0) {
+      /* left clip */
+      skip = -x;
+      x = 0;
+      count -= skip;
+   }
+
+   rb->PutRow(ctx, rb, count, x, y,
+              (const GLubyte *) values + skip * valueSize, NULL);
+}
+
+
+/**
+ * Wrapper for gl_renderbuffer::GetRow() which does clipping.
+ */
+void
+_swrast_get_row(GLcontext *ctx, struct gl_renderbuffer *rb,
+                GLuint count, GLint x, GLint y,
+                GLvoid *values, GLuint valueSize)
+{
+   GLint skip = 0;
+
+   if (y < 0 || y >= rb->Height)
+      return; /* above or below */
+
+   if (x + (GLint) count <= 0 || x >= rb->Width)
+      return; /* entirely left or right */
+
+   if (x + count > rb->Width) {
+      /* right clip */
+      GLint clip = x + count - rb->Width;
+      count -= clip;
+   }
+
+   if (x < 0) {
+      /* left clip */
+      skip = -x;
+      x = 0;
+      count -= skip;
+   }
+
+   rb->GetRow(ctx, rb, count, x, y, (GLubyte *) values + skip * valueSize);
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_span.h b/dist/Mesa/src/mesa/swrast/s_span.h
new file mode 100644
index 000000000..40a57e1ae
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_span.h
@@ -0,0 +1,85 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_SPAN_H
+#define S_SPAN_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern void
+_swrast_span_default_z( GLcontext *ctx, struct sw_span *span );
+
+extern void
+_swrast_span_interpolate_z( const GLcontext *ctx, struct sw_span *span );
+
+extern void
+_swrast_span_default_fog( GLcontext *ctx, struct sw_span *span );
+
+extern void
+_swrast_span_default_color( GLcontext *ctx, struct sw_span *span );
+
+extern void
+_swrast_span_default_texcoords( GLcontext *ctx, struct sw_span *span );
+
+extern GLfloat
+_swrast_compute_lambda(GLfloat dsdx, GLfloat dsdy, GLfloat dtdx, GLfloat dtdy,
+                       GLfloat dqdx, GLfloat dqdy, GLfloat texW, GLfloat texH,
+                       GLfloat s, GLfloat t, GLfloat q, GLfloat invQ);
+
+extern void
+_swrast_write_index_span( GLcontext *ctx, struct sw_span *span);
+
+
+extern void
+_swrast_write_rgba_span( GLcontext *ctx, struct sw_span *span);
+
+
+extern void
+_swrast_read_rgba_span( GLcontext *ctx, struct gl_renderbuffer *rb,
+                        GLuint n, GLint x, GLint y, GLchan rgba[][4] );
+
+extern void
+_swrast_read_index_span( GLcontext *ctx, struct gl_renderbuffer *rb,
+                         GLuint n, GLint x, GLint y, GLuint indx[] );
+
+extern void
+_swrast_get_values(GLcontext *ctx, struct gl_renderbuffer *rb,
+                   GLuint count, const GLint x[], const GLint y[],
+                   void *values, GLuint valueSize);
+
+extern void
+_swrast_put_row(GLcontext *ctx, struct gl_renderbuffer *rb,
+                GLuint count, GLint x, GLint y,
+                const GLvoid *values, GLuint valueSize);
+
+extern void
+_swrast_get_row(GLcontext *ctx, struct gl_renderbuffer *rb,
+                GLuint count, GLint x, GLint y,
+                GLvoid *values, GLuint valueSize);
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_spantemp.h b/dist/Mesa/src/mesa/swrast/s_spantemp.h
new file mode 100644
index 000000000..1eef81eb9
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_spantemp.h
@@ -0,0 +1,235 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Templates for the span/pixel-array write/read functions called via
+ * the gl_renderbuffer's GetRow, GetValues, PutRow, PutMonoRow, PutValues
+ * and PutMonoValues functions.
+ *
+ * Define the following macros before including this file:
+ *   NAME(BASE)  to generate the function name (i.e. add prefix or suffix)
+ *   RB_TYPE  the renderbuffer DataType
+ *   CI_MODE  if set, color index mode, else RGBA
+ *   SPAN_VARS  to declare any local variables
+ *   INIT_PIXEL_PTR(P, X, Y)  to initialize a pointer to a pixel
+ *   INC_PIXEL_PTR(P)  to increment a pixel pointer by one pixel
+ *   STORE_PIXEL(DST, X, Y, VALUE)  to store pixel values in buffer
+ *   FETCH_PIXEL(DST, SRC)  to fetch pixel values from buffer
+ *
+ * Note that in the STORE_PIXEL macros, we also pass in the (X,Y) coordinates
+ * for the pixels to be stored.  This is useful when dithering and probably
+ * ignored otherwise.
+ */
+
+#include "macros.h"
+
+
+#ifdef CI_MODE
+#define RB_COMPONENTS 1
+#elif !defined(RB_COMPONENTS)
+#define RB_COMPONENTS 4
+#endif
+
+
+static void
+NAME(get_row)( GLcontext *ctx, struct gl_renderbuffer *rb,
+               GLuint count, GLint x, GLint y, void *values )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+#ifdef CI_MODE
+   RB_TYPE *dest = (RB_TYPE *) values;
+#else
+   RB_TYPE (*dest)[RB_COMPONENTS] = (RB_TYPE (*)[RB_COMPONENTS]) values;
+#endif
+   GLuint i;
+   INIT_PIXEL_PTR(pixel, x, y);
+   for (i = 0; i < count; i++) {
+      FETCH_PIXEL(dest[i], pixel);
+      INC_PIXEL_PTR(pixel);
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(get_values)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                  GLuint count, const GLint x[], const GLint y[], void *values )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+#ifdef CI_MODE
+   RB_TYPE *dest = (RB_TYPE *) values;
+#else
+   RB_TYPE (*dest)[RB_COMPONENTS] = (RB_TYPE (*)[RB_COMPONENTS]) values;
+#endif
+   GLuint i;
+   for (i = 0; i < count; i++) {
+      INIT_PIXEL_PTR(pixel, x[i], y[i]);
+      FETCH_PIXEL(dest[i], pixel);
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(put_row)( GLcontext *ctx, struct gl_renderbuffer *rb,
+               GLuint count, GLint x, GLint y,
+               const void *values, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE (*src)[RB_COMPONENTS] = (const RB_TYPE (*)[RB_COMPONENTS]) values;
+   GLuint i;
+   INIT_PIXEL_PTR(pixel, x, y);
+   if (mask) {
+      for (i = 0; i < count; i++) {
+         if (mask[i]) {
+            STORE_PIXEL(pixel, x + i, y, src[i]);
+         }
+         INC_PIXEL_PTR(pixel);
+      }
+   }
+   else {
+      for (i = 0; i < count; i++) {
+         STORE_PIXEL(pixel, x + i, y, src[i]);
+         INC_PIXEL_PTR(pixel);
+      }
+   }
+   (void) rb;
+}
+
+
+#if !defined(CI_MODE)
+static void
+NAME(put_row_rgb)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                   GLuint count, GLint x, GLint y,
+                   const void *values, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE (*src)[3] = (const RB_TYPE (*)[3]) values;
+   GLuint i;
+   INIT_PIXEL_PTR(pixel, x, y);
+   for (i = 0; i < count; i++) {
+      if (!mask || mask[i]) {
+#ifdef STORE_PIXEL_RGB
+         STORE_PIXEL_RGB(pixel, x + i, y, src[i]);
+#else
+         STORE_PIXEL(pixel, x + i, y, src[i]);
+#endif
+      }
+      INC_PIXEL_PTR(pixel);
+   }
+   (void) rb;
+}
+#endif
+
+
+static void
+NAME(put_mono_row)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                    GLuint count, GLint x, GLint y,
+                    const void *value, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE *src = (const RB_TYPE *) value;
+   GLuint i;
+   INIT_PIXEL_PTR(pixel, x, y);
+   if (mask) {
+      for (i = 0; i < count; i++) {
+         if (mask[i]) {
+            STORE_PIXEL(pixel, x + i, y, src);
+         }
+         INC_PIXEL_PTR(pixel);
+      }
+   }
+   else {
+      for (i = 0; i < count; i++) {
+         STORE_PIXEL(pixel, x + i, y, src);
+         INC_PIXEL_PTR(pixel);
+      }
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(put_values)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                  GLuint count, const GLint x[], const GLint y[],
+                  const void *values, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE (*src)[RB_COMPONENTS] = (const RB_TYPE (*)[RB_COMPONENTS]) values;
+   GLuint i;
+   ASSERT(mask);
+   for (i = 0; i < count; i++) {
+      if (mask[i]) {
+         INIT_PIXEL_PTR(pixel, x[i], y[i]);
+         STORE_PIXEL(pixel, x[i], y[i], src[i]);
+      }
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(put_mono_values)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                       GLuint count, const GLint x[], const GLint y[],
+                       const void *value, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE *src = (const RB_TYPE *) value;
+   GLuint i;
+   ASSERT(mask);
+   for (i = 0; i < count; i++) {
+      if (mask[i]) {
+         INIT_PIXEL_PTR(pixel, x[i], y[i]);
+         STORE_PIXEL(pixel, x[i], y[i], src);
+      }
+   }
+   (void) rb;
+}
+
+
+#undef NAME
+#undef RB_TYPE
+#undef RB_COMPONENTS
+#undef CI_MODE
+#undef SPAN_VARS
+#undef INIT_PIXEL_PTR
+#undef INC_PIXEL_PTR
+#undef STORE_PIXEL
+#undef STORE_PIXEL_RGB
+#undef FETCH_PIXEL
diff --git a/dist/Mesa/src/mesa/swrast/s_stencil.c b/dist/Mesa/src/mesa/swrast/s_stencil.c
new file mode 100644
index 000000000..1c78f848e
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_stencil.c
@@ -0,0 +1,1260 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "imports.h"
+
+#include "s_context.h"
+#include "s_depth.h"
+#include "s_stencil.h"
+#include "s_span.h"
+
+
+
+/* Stencil Logic:
+
+IF stencil test fails THEN
+   Apply fail-op to stencil value
+   Don't write the pixel (RGBA,Z)
+ELSE
+   IF doing depth test && depth test fails THEN
+      Apply zfail-op to stencil value
+      Write RGBA and Z to appropriate buffers
+   ELSE
+      Apply zpass-op to stencil value
+ENDIF
+
+*/
+
+
+/**
+ * Apply the given stencil operator to the array of stencil values.
+ * Don't touch stencil[i] if mask[i] is zero.
+ * Input:  n - size of stencil array
+ *         oper - the stencil buffer operator
+ *         face - 0 or 1 for front or back face operation
+ *         stencil - array of stencil values
+ *         mask - array [n] of flag:  1=apply operator, 0=don't apply operator
+ * Output:  stencil - modified values
+ */
+static void
+apply_stencil_op( const GLcontext *ctx, GLenum oper, GLuint face,
+                  GLuint n, GLstencil stencil[], const GLubyte mask[] )
+{
+   const GLstencil ref = ctx->Stencil.Ref[face];
+   const GLstencil wrtmask = ctx->Stencil.WriteMask[face];
+   const GLstencil invmask = (GLstencil) (~wrtmask);
+   const GLstencil stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
+   GLuint i;
+
+   switch (oper) {
+      case GL_KEEP:
+         /* do nothing */
+         break;
+      case GL_ZERO:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  stencil[i] = 0;
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  stencil[i] = (GLstencil) (stencil[i] & invmask);
+	       }
+	    }
+	 }
+	 break;
+      case GL_REPLACE:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  stencil[i] = ref;
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  GLstencil s = stencil[i];
+		  stencil[i] = (GLstencil) ((invmask & s ) | (wrtmask & ref));
+	       }
+	    }
+	 }
+	 break;
+      case GL_INCR:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  GLstencil s = stencil[i];
+		  if (s < stencilMax) {
+		     stencil[i] = (GLstencil) (s+1);
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  /* VERIFY logic of adding 1 to a write-masked value */
+		  GLstencil s = stencil[i];
+		  if (s < stencilMax) {
+		     stencil[i] = (GLstencil) ((invmask & s) | (wrtmask & (s+1)));
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_DECR:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  GLstencil s = stencil[i];
+		  if (s>0) {
+		     stencil[i] = (GLstencil) (s-1);
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  /* VERIFY logic of subtracting 1 to a write-masked value */
+		  GLstencil s = stencil[i];
+		  if (s>0) {
+		     stencil[i] = (GLstencil) ((invmask & s) | (wrtmask & (s-1)));
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_INCR_WRAP_EXT:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  stencil[i]++;
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil s = stencil[i];
+                  stencil[i] = (GLstencil) ((invmask & s) | (wrtmask & (s+1)));
+	       }
+	    }
+	 }
+	 break;
+      case GL_DECR_WRAP_EXT:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  stencil[i]--;
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil s = stencil[i];
+                  stencil[i] = (GLstencil) ((invmask & s) | (wrtmask & (s-1)));
+	       }
+	    }
+	 }
+	 break;
+      case GL_INVERT:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  GLstencil s = stencil[i];
+		  stencil[i] = (GLstencil) ~s;
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+		  GLstencil s = stencil[i];
+		  stencil[i] = (GLstencil) ((invmask & s) | (wrtmask & ~s));
+	       }
+	    }
+	 }
+	 break;
+      default:
+         _mesa_problem(ctx, "Bad stencil op in apply_stencil_op");
+   }
+}
+
+
+
+
+/**
+ * Apply stencil test to an array of stencil values (before depth buffering).
+ * Input:  face - 0 or 1 for front or back-face polygons
+ *         n - number of pixels in the array
+ *         stencil - array of [n] stencil values
+ *         mask - array [n] of flag:  0=skip the pixel, 1=stencil the pixel
+ * Output:  mask - pixels which fail the stencil test will have their
+ *                 mask flag set to 0.
+ *          stencil - updated stencil values (where the test passed)
+ * Return:  GL_FALSE = all pixels failed, GL_TRUE = zero or more pixels passed.
+ */
+static GLboolean
+do_stencil_test( GLcontext *ctx, GLuint face, GLuint n, GLstencil stencil[],
+                 GLubyte mask[] )
+{
+   GLubyte fail[MAX_WIDTH];
+   GLboolean allfail = GL_FALSE;
+   GLuint i;
+   GLstencil r, s;
+   const GLuint valueMask = ctx->Stencil.ValueMask[face];
+
+   ASSERT(n <= MAX_WIDTH);
+
+   /*
+    * Perform stencil test.  The results of this operation are stored
+    * in the fail[] array:
+    *   IF fail[i] is non-zero THEN
+    *       the stencil fail operator is to be applied
+    *   ELSE
+    *       the stencil fail operator is not to be applied
+    *   ENDIF
+    */
+   switch (ctx->Stencil.Function[face]) {
+      case GL_NEVER:
+         /* never pass; always fail */
+         for (i=0;i<n;i++) {
+	    if (mask[i]) {
+	       mask[i] = 0;
+	       fail[i] = 1;
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 allfail = GL_TRUE;
+	 break;
+      case GL_LESS:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+	       s = (GLstencil) (stencil[i] & valueMask);
+	       if (r < s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_LEQUAL:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+	       s = (GLstencil) (stencil[i] & valueMask);
+	       if (r <= s) {
+		  /* pass */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_GREATER:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+	       s = (GLstencil) (stencil[i] & valueMask);
+	       if (r > s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_GEQUAL:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+	       s = (GLstencil) (stencil[i] & valueMask);
+	       if (r >= s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_EQUAL:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+	       s = (GLstencil) (stencil[i] & valueMask);
+	       if (r == s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_NOTEQUAL:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+	       s = (GLstencil) (stencil[i] & valueMask);
+	       if (r != s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_ALWAYS:
+	 /* always pass */
+	 for (i=0;i<n;i++) {
+	    fail[i] = 0;
+	 }
+	 break;
+      default:
+         _mesa_problem(ctx, "Bad stencil func in gl_stencil_span");
+         return 0;
+   }
+
+   if (ctx->Stencil.FailFunc[face] != GL_KEEP) {
+      apply_stencil_op( ctx, ctx->Stencil.FailFunc[face], face, n, stencil, fail );
+   }
+
+   return !allfail;
+}
+
+
+
+/**
+ * Apply stencil and depth testing to the span of pixels.
+ * Both software and hardware stencil buffers are acceptable.
+ * Input:  n - number of pixels in the span
+ *         x, y - location of leftmost pixel in span
+ *         z - array [n] of z values
+ *         mask - array [n] of flags  (1=test this pixel, 0=skip the pixel)
+ * Output:  mask - array [n] of flags (1=stencil and depth test passed)
+ * Return: GL_FALSE - all fragments failed the testing
+ *         GL_TRUE - one or more fragments passed the testing
+ *
+ */
+static GLboolean
+stencil_and_ztest_span(GLcontext *ctx, struct sw_span *span, GLuint face)
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   GLstencil stencilRow[MAX_WIDTH];
+   GLstencil *stencil;
+   const GLuint n = span->end;
+   const GLint x = span->x;
+   const GLint y = span->y;
+   GLubyte *mask = span->array->mask;
+
+   ASSERT((span->arrayMask & SPAN_XY) == 0);
+   ASSERT(ctx->Stencil.Enabled);
+   ASSERT(n <= MAX_WIDTH);
+#ifdef DEBUG
+   if (ctx->Depth.Test) {
+      ASSERT(span->arrayMask & SPAN_Z);
+   }
+#endif
+
+   stencil = (GLstencil *) rb->GetPointer(ctx, rb, x, y);
+   if (!stencil) {
+      rb->GetRow(ctx, rb, n, x, y, stencilRow);
+      stencil = stencilRow;
+   }
+
+   /*
+    * Apply the stencil test to the fragments.
+    * failMask[i] is 1 if the stencil test failed.
+    */
+   if (do_stencil_test( ctx, face, n, stencil, mask ) == GL_FALSE) {
+      /* all fragments failed the stencil test, we're done. */
+      span->writeAll = GL_FALSE;
+      if (!rb->GetPointer(ctx, rb, 0, 0)) {
+         /* put updated stencil values into buffer */
+         rb->PutRow(ctx, rb, n, x, y, stencil, NULL);
+      }
+      return GL_FALSE;
+   }
+
+   /*
+    * Some fragments passed the stencil test, apply depth test to them
+    * and apply Zpass and Zfail stencil ops.
+    */
+   if (ctx->Depth.Test == GL_FALSE) {
+      /*
+       * No depth buffer, just apply zpass stencil function to active pixels.
+       */
+      apply_stencil_op( ctx, ctx->Stencil.ZPassFunc[face], face, n, stencil, mask );
+   }
+   else {
+      /*
+       * Perform depth buffering, then apply zpass or zfail stencil function.
+       */
+      GLubyte passmask[MAX_WIDTH], failmask[MAX_WIDTH], oldmask[MAX_WIDTH];
+      GLuint i;
+
+      /* save the current mask bits */
+      _mesa_memcpy(oldmask, mask, n * sizeof(GLubyte));
+
+      /* apply the depth test */
+      _swrast_depth_test_span(ctx, span);
+
+      /* Set the stencil pass/fail flags according to result of depth testing.
+       * if oldmask[i] == 0 then
+       *    Don't touch the stencil value
+       * else if oldmask[i] and newmask[i] then
+       *    Depth test passed
+       * else
+       *    assert(oldmask[i] && !newmask[i])
+       *    Depth test failed
+       * endif
+       */
+      for (i=0;i<n;i++) {
+         ASSERT(mask[i] == 0 || mask[i] == 1);
+         passmask[i] = oldmask[i] & mask[i];
+         failmask[i] = oldmask[i] & (mask[i] ^ 1);
+      }
+
+      /* apply the pass and fail operations */
+      if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) {
+         apply_stencil_op( ctx, ctx->Stencil.ZFailFunc[face], face,
+                           n, stencil, failmask );
+      }
+      if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) {
+         apply_stencil_op( ctx, ctx->Stencil.ZPassFunc[face], face,
+                           n, stencil, passmask );
+      }
+   }
+
+   /*
+    * Write updated stencil values back into hardware stencil buffer.
+    */
+   if (!rb->GetPointer(ctx, rb, 0, 0)) {
+      rb->PutRow(ctx, rb, n, x, y, stencil, NULL);
+   }
+   
+   span->writeAll = GL_FALSE;
+   
+   return GL_TRUE;  /* one or more fragments passed both tests */
+}
+
+
+
+/*
+ * Return the address of a stencil buffer value given the window coords:
+ */
+#define STENCIL_ADDRESS(X, Y)  (stencilStart + (Y) * stride + (X))
+
+
+
+/**
+ * Apply the given stencil operator for each pixel in the array whose
+ * mask flag is set.
+ * \note  This is for software stencil buffers only.
+ * Input:  n - number of pixels in the span
+ *         x, y - array of [n] pixels
+ *         operator - the stencil buffer operator
+ *         mask - array [n] of flag:  1=apply operator, 0=don't apply operator
+ */
+static void
+apply_stencil_op_to_pixels( GLcontext *ctx,
+                            GLuint n, const GLint x[], const GLint y[],
+                            GLenum oper, GLuint face, const GLubyte mask[] )
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   const GLstencil stencilMax = (1 << fb->Visual.stencilBits) - 1;
+   const GLstencil ref = ctx->Stencil.Ref[face];
+   const GLstencil wrtmask = ctx->Stencil.WriteMask[face];
+   const GLstencil invmask = (GLstencil) (~wrtmask);
+   GLuint i;
+   GLstencil *stencilStart = (GLubyte *) rb->Data;
+   const GLuint stride = rb->Width;
+
+   ASSERT(rb->GetPointer(ctx, rb, 0, 0));
+   ASSERT(sizeof(GLstencil) == 1);
+
+   switch (oper) {
+      case GL_KEEP:
+         /* do nothing */
+         break;
+      case GL_ZERO:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+                  *sptr = 0;
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+		  *sptr = (GLstencil) (invmask & *sptr);
+	       }
+	    }
+	 }
+	 break;
+      case GL_REPLACE:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+                  *sptr = ref;
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+		  *sptr = (GLstencil) ((invmask & *sptr ) | (wrtmask & ref));
+	       }
+	    }
+	 }
+	 break;
+      case GL_INCR:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+		  if (*sptr < stencilMax) {
+		     *sptr = (GLstencil) (*sptr + 1);
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+		  if (*sptr < stencilMax) {
+		     *sptr = (GLstencil) ((invmask & *sptr) | (wrtmask & (*sptr+1)));
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_DECR:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+		  if (*sptr>0) {
+		     *sptr = (GLstencil) (*sptr - 1);
+		  }
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+		  if (*sptr>0) {
+		     *sptr = (GLstencil) ((invmask & *sptr) | (wrtmask & (*sptr-1)));
+		  }
+	       }
+	    }
+	 }
+	 break;
+      case GL_INCR_WRAP_EXT:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+                  *sptr = (GLstencil) (*sptr + 1);
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+                  *sptr = (GLstencil) ((invmask & *sptr) | (wrtmask & (*sptr+1)));
+	       }
+	    }
+	 }
+	 break;
+      case GL_DECR_WRAP_EXT:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+                  *sptr = (GLstencil) (*sptr - 1);
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+                  *sptr = (GLstencil) ((invmask & *sptr) | (wrtmask & (*sptr-1)));
+	       }
+	    }
+	 }
+	 break;
+      case GL_INVERT:
+	 if (invmask==0) {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+                  *sptr = (GLstencil) (~*sptr);
+	       }
+	    }
+	 }
+	 else {
+	    for (i=0;i<n;i++) {
+	       if (mask[i]) {
+                  GLstencil *sptr = STENCIL_ADDRESS( x[i], y[i] );
+                  *sptr = (GLstencil) ((invmask & *sptr) | (wrtmask & ~*sptr));
+	       }
+	    }
+	 }
+	 break;
+      default:
+         _mesa_problem(ctx, "Bad stencilop in apply_stencil_op_to_pixels");
+   }
+}
+
+
+
+/**
+ * Apply stencil test to an array of pixels before depth buffering.
+ *
+ * \note Used for software stencil buffer only.
+ * Input:  n - number of pixels in the span
+ *         x, y - array of [n] pixels to stencil
+ *         mask - array [n] of flag:  0=skip the pixel, 1=stencil the pixel
+ * Output:  mask - pixels which fail the stencil test will have their
+ *                 mask flag set to 0.
+ * \return  GL_FALSE = all pixels failed, GL_TRUE = zero or more pixels passed.
+ */
+static GLboolean
+stencil_test_pixels( GLcontext *ctx, GLuint face, GLuint n,
+                     const GLint x[], const GLint y[], GLubyte mask[] )
+{
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   GLubyte fail[MAX_WIDTH];
+   GLstencil r, s;
+   GLuint i;
+   GLboolean allfail = GL_FALSE;
+   const GLuint valueMask = ctx->Stencil.ValueMask[face];
+   const GLstencil *stencilStart = (GLstencil *) rb->Data;
+   const GLuint stride = rb->Width;
+
+   ASSERT(rb->GetPointer(ctx, rb, 0, 0));
+   ASSERT(sizeof(GLstencil) == 1);
+
+   /*
+    * Perform stencil test.  The results of this operation are stored
+    * in the fail[] array:
+    *   IF fail[i] is non-zero THEN
+    *       the stencil fail operator is to be applied
+    *   ELSE
+    *       the stencil fail operator is not to be applied
+    *   ENDIF
+    */
+
+   switch (ctx->Stencil.Function[face]) {
+      case GL_NEVER:
+         /* always fail */
+         for (i=0;i<n;i++) {
+	    if (mask[i]) {
+	       mask[i] = 0;
+	       fail[i] = 1;
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 allfail = GL_TRUE;
+	 break;
+      case GL_LESS:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+               const GLstencil *sptr = STENCIL_ADDRESS(x[i],y[i]);
+	       s = (GLstencil) (*sptr & valueMask);
+	       if (r < s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_LEQUAL:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+               const GLstencil *sptr = STENCIL_ADDRESS(x[i],y[i]);
+	       s = (GLstencil) (*sptr & valueMask);
+	       if (r <= s) {
+		  /* pass */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_GREATER:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+               const GLstencil *sptr = STENCIL_ADDRESS(x[i],y[i]);
+	       s = (GLstencil) (*sptr & valueMask);
+	       if (r > s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_GEQUAL:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+               const GLstencil *sptr = STENCIL_ADDRESS(x[i],y[i]);
+	       s = (GLstencil) (*sptr & valueMask);
+	       if (r >= s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_EQUAL:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+               const GLstencil *sptr = STENCIL_ADDRESS(x[i],y[i]);
+	       s = (GLstencil) (*sptr & valueMask);
+	       if (r == s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_NOTEQUAL:
+	 r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask);
+	 for (i=0;i<n;i++) {
+	    if (mask[i]) {
+               const GLstencil *sptr = STENCIL_ADDRESS(x[i],y[i]);
+	       s = (GLstencil) (*sptr & valueMask);
+	       if (r != s) {
+		  /* passed */
+		  fail[i] = 0;
+	       }
+	       else {
+		  fail[i] = 1;
+		  mask[i] = 0;
+	       }
+	    }
+	    else {
+	       fail[i] = 0;
+	    }
+	 }
+	 break;
+      case GL_ALWAYS:
+	 /* always pass */
+	 for (i=0;i<n;i++) {
+	    fail[i] = 0;
+	 }
+	 break;
+      default:
+         _mesa_problem(ctx, "Bad stencil func in gl_stencil_pixels");
+         return 0;
+   }
+
+   if (ctx->Stencil.FailFunc[face] != GL_KEEP) {
+      apply_stencil_op_to_pixels( ctx, n, x, y, ctx->Stencil.FailFunc[face],
+                                  face, fail );
+   }
+
+   return !allfail;
+}
+
+
+
+
+/**
+ * Apply stencil and depth testing to an array of pixels.
+ * This is used both for software and hardware stencil buffers.
+ *
+ * The comments in this function are a bit sparse but the code is
+ * almost identical to stencil_and_ztest_span(), which is well
+ * commented.
+ *
+ * Input:  n - number of pixels in the array
+ *         x, y - array of [n] pixel positions
+ *         z - array [n] of z values
+ *         mask - array [n] of flags  (1=test this pixel, 0=skip the pixel)
+ * Output: mask - array [n] of flags (1=stencil and depth test passed)
+ * Return: GL_FALSE - all fragments failed the testing
+ *         GL_TRUE - one or more fragments passed the testing
+ */
+static GLboolean
+stencil_and_ztest_pixels( GLcontext *ctx, struct sw_span *span, GLuint face )
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   const GLuint n = span->end;
+   const GLint *x = span->array->x;
+   const GLint *y = span->array->y;
+   GLubyte *mask = span->array->mask;
+
+   ASSERT(span->arrayMask & SPAN_XY);
+   ASSERT(ctx->Stencil.Enabled);
+   ASSERT(n <= MAX_WIDTH);
+
+   if (!rb->GetPointer(ctx, rb, 0, 0)) {
+      /* No direct access */
+      GLstencil stencil[MAX_WIDTH];
+      GLubyte origMask[MAX_WIDTH];
+
+      ASSERT(rb->DataType == GL_UNSIGNED_BYTE);
+      _swrast_get_values(ctx, rb, n, x, y, stencil, sizeof(GLubyte));
+
+      _mesa_memcpy(origMask, mask, n * sizeof(GLubyte));
+
+      (void) do_stencil_test(ctx, face, n, stencil, mask);
+
+      if (ctx->Depth.Test == GL_FALSE) {
+         apply_stencil_op(ctx, ctx->Stencil.ZPassFunc[face], face,
+                          n, stencil, mask);
+      }
+      else {
+         _swrast_depth_test_span(ctx, span);
+
+         if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) {
+            GLubyte failmask[MAX_WIDTH];
+            GLuint i;
+            for (i = 0; i < n; i++) {
+               ASSERT(mask[i] == 0 || mask[i] == 1);
+               failmask[i] = origMask[i] & (mask[i] ^ 1);
+            }
+            apply_stencil_op(ctx, ctx->Stencil.ZFailFunc[face], face,
+                             n, stencil, failmask);
+         }
+         if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) {
+            GLubyte passmask[MAX_WIDTH];
+            GLuint i;
+            for (i = 0; i < n; i++) {
+               ASSERT(mask[i] == 0 || mask[i] == 1);
+               passmask[i] = origMask[i] & mask[i];
+            }
+            apply_stencil_op(ctx, ctx->Stencil.ZPassFunc[face], face,
+                             n, stencil, passmask);
+         }
+      }
+
+      /* Write updated stencil values into hardware stencil buffer */
+      rb->PutValues(ctx, rb, n, x, y, stencil, origMask);
+
+      return GL_TRUE;
+   }
+   else {
+      /* Direct access to stencil buffer */
+
+      if (stencil_test_pixels(ctx, face, n, x, y, mask) == GL_FALSE) {
+         /* all fragments failed the stencil test, we're done. */
+         return GL_FALSE;
+      }
+
+      if (ctx->Depth.Test==GL_FALSE) {
+         apply_stencil_op_to_pixels(ctx, n, x, y,
+                                    ctx->Stencil.ZPassFunc[face], face, mask);
+      }
+      else {
+         GLubyte passmask[MAX_WIDTH], failmask[MAX_WIDTH], oldmask[MAX_WIDTH];
+         GLuint i;
+
+         _mesa_memcpy(oldmask, mask, n * sizeof(GLubyte));
+
+         _swrast_depth_test_span(ctx, span);
+
+         for (i=0;i<n;i++) {
+            ASSERT(mask[i] == 0 || mask[i] == 1);
+            passmask[i] = oldmask[i] & mask[i];
+            failmask[i] = oldmask[i] & (mask[i] ^ 1);
+         }
+
+         if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) {
+            apply_stencil_op_to_pixels(ctx, n, x, y,
+                                       ctx->Stencil.ZFailFunc[face],
+                                       face, failmask);
+         }
+         if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) {
+            apply_stencil_op_to_pixels(ctx, n, x, y,
+                                       ctx->Stencil.ZPassFunc[face],
+                                       face, passmask);
+         }
+      }
+
+      return GL_TRUE;  /* one or more fragments passed both tests */
+   }
+}
+
+
+/**
+ * /return GL_TRUE = one or more fragments passed,
+ * GL_FALSE = all fragments failed.
+ */
+GLboolean
+_swrast_stencil_and_ztest_span(GLcontext *ctx, struct sw_span *span)
+{
+   /* span->facing can only be non-zero if using two-sided stencil */
+   ASSERT(ctx->Stencil._TestTwoSide || span->facing == 0);
+   if (span->arrayMask & SPAN_XY)
+      return stencil_and_ztest_pixels(ctx, span, span->facing);
+   else
+      return stencil_and_ztest_span(ctx, span, span->facing);
+}
+
+
+#if 0
+GLuint
+clip_span(GLuint bufferWidth, GLuint bufferHeight,
+          GLint x, GLint y, GLuint *count)
+{
+   GLuint n = *count;
+   GLuint skipPixels = 0;
+
+   if (y < 0 || y >= bufferHeight || x + n <= 0 || x >= bufferWidth) {
+      /* totally out of bounds */
+      n = 0;
+   }
+   else {
+      /* left clip */
+      if (x < 0) {
+         skipPixels = -x;
+         x = 0;
+         n -= skipPixels;
+      }
+      /* right clip */
+      if (x + n > bufferWidth) {
+         GLint dx = x + n - bufferWidth;
+         n -= dx;
+      }
+   }
+
+   *count = n;
+
+   return skipPixels;
+}
+#endif
+
+
+/**
+ * Return a span of stencil values from the stencil buffer.
+ * Used for glRead/CopyPixels
+ * Input:  n - how many pixels
+ *         x,y - location of first pixel
+ * Output:  stencil - the array of stencil values
+ */
+void
+_swrast_read_stencil_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                          GLint n, GLint x, GLint y, GLstencil stencil[])
+{
+   if (y < 0 || y >= rb->Height || x + n <= 0 || x >= rb->Width) {
+      /* span is completely outside framebuffer */
+      return; /* undefined values OK */
+   }
+
+   if (x < 0) {
+      GLint dx = -x;
+      x = 0;
+      n -= dx;
+      stencil += dx;
+   }
+   if (x + n > rb->Width) {
+      GLint dx = x + n - rb->Width;
+      n -= dx;
+   }
+   if (n <= 0) {
+      return;
+   }
+
+   rb->GetRow(ctx, rb, n, x, y, stencil);
+}
+
+
+
+/**
+ * Write a span of stencil values to the stencil buffer.  This function
+ * applies the stencil write mask when needed.
+ * Used for glDraw/CopyPixels
+ * Input:  n - how many pixels
+ *         x, y - location of first pixel
+ *         stencil - the array of stencil values
+ */
+void
+_swrast_write_stencil_span(GLcontext *ctx, GLint n, GLint x, GLint y,
+                           const GLstencil stencil[] )
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   const GLuint stencilMax = (1 << fb->Visual.stencilBits) - 1;
+   const GLuint stencilMask = ctx->Stencil.WriteMask[0];
+
+   if (y < 0 || y >= rb->Height || x + n <= 0 || x >= rb->Width) {
+      /* span is completely outside framebuffer */
+      return; /* undefined values OK */
+   }
+   if (x < 0) {
+      GLint dx = -x;
+      x = 0;
+      n -= dx;
+      stencil += dx;
+   }
+   if (x + n > rb->Width) {
+      GLint dx = x + n - rb->Width;
+      n -= dx;
+   }
+   if (n <= 0) {
+      return;
+   }
+
+   if ((stencilMask & stencilMax) != stencilMax) {
+      /* need to apply writemask */
+      GLstencil destVals[MAX_WIDTH], newVals[MAX_WIDTH];
+      GLint i;
+      rb->GetRow(ctx, rb, n, x, y, destVals);
+      for (i = 0; i < n; i++) {
+         newVals[i]
+            = (stencil[i] & stencilMask) | (destVals[i] & ~stencilMask);
+      }
+      rb->PutRow(ctx, rb, n, x, y, newVals, NULL);
+   }
+   else {
+      rb->PutRow(ctx, rb, n, x, y, stencil, NULL);
+   }
+}
+
+
+
+/**
+ * Clear the stencil buffer.
+ */
+void
+_swrast_clear_stencil_buffer( GLcontext *ctx, struct gl_renderbuffer *rb )
+{
+   const GLubyte stencilBits = ctx->DrawBuffer->Visual.stencilBits;
+   const GLuint mask = ctx->Stencil.WriteMask[0];
+   const GLuint invMask = ~mask;
+   const GLuint clearVal = (ctx->Stencil.Clear & mask);
+   const GLuint stencilMax = (1 << stencilBits) - 1;
+   GLint x, y, width, height;
+
+   if (!rb || mask == 0)
+      return;
+
+   ASSERT(rb->DataType == GL_UNSIGNED_BYTE ||
+          rb->DataType == GL_UNSIGNED_SHORT);
+
+   ASSERT(rb->_BaseFormat == GL_STENCIL_INDEX);
+
+   /* compute region to clear */
+   x = ctx->DrawBuffer->_Xmin;
+   y = ctx->DrawBuffer->_Ymin;
+   width  = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+   height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+
+   if (rb->GetPointer(ctx, rb, 0, 0)) {
+      /* Direct buffer access */
+      if ((mask & stencilMax) != stencilMax) {
+         /* need to mask the clear */
+         if (rb->DataType == GL_UNSIGNED_BYTE) {
+            GLint i, j;
+            for (i = 0; i < height; i++) {
+               GLubyte *stencil = (GLubyte*) rb->GetPointer(ctx, rb, x, y + i);
+               for (j = 0; j < width; j++) {
+                  stencil[j] = (stencil[j] & invMask) | clearVal;
+               }
+            }
+         }
+         else {
+            GLint i, j;
+            for (i = 0; i < height; i++) {
+               GLushort *stencil = (GLushort*) rb->GetPointer(ctx, rb, x, y + i);
+               for (j = 0; j < width; j++) {
+                  stencil[j] = (stencil[j] & invMask) | clearVal;
+               }
+            }
+         }
+      }
+      else {
+         /* no bit masking */
+         if (width == rb->Width && rb->DataType == GL_UNSIGNED_BYTE) {
+            /* optimized case */
+            /* Note: bottom-to-top raster assumed! */
+            GLubyte *stencil = (GLubyte *) rb->GetPointer(ctx, rb, x, y);
+            GLuint len = width * height * sizeof(GLubyte);
+            _mesa_memset(stencil, clearVal, len);
+         }
+         else {
+            /* general case */
+            GLint i;
+            for (i = 0; i < height; i++) {
+               GLvoid *stencil = rb->GetPointer(ctx, rb, x, y + i);
+               if (rb->DataType == GL_UNSIGNED_BYTE) {
+                  _mesa_memset(stencil, clearVal, width);
+               }
+               else {
+                  _mesa_memset16((short unsigned int*) stencil, clearVal, width);
+               }
+            }
+         }
+      }
+   }
+   else {
+      /* no direct access */
+      if ((mask & stencilMax) != stencilMax) {
+         /* need to mask the clear */
+         if (rb->DataType == GL_UNSIGNED_BYTE) {
+            GLint i, j;
+            for (i = 0; i < height; i++) {
+               GLubyte stencil[MAX_WIDTH];
+               rb->GetRow(ctx, rb, width, x, y + i, stencil);
+               for (j = 0; j < width; j++) {
+                  stencil[j] = (stencil[j] & invMask) | clearVal;
+               }
+               rb->PutRow(ctx, rb, width, x, y + i, stencil, NULL);
+            }
+         }
+         else {
+            GLint i, j;
+            for (i = 0; i < height; i++) {
+               GLushort stencil[MAX_WIDTH];
+               rb->GetRow(ctx, rb, width, x, y + i, stencil);
+               for (j = 0; j < width; j++) {
+                  stencil[j] = (stencil[j] & invMask) | clearVal;
+               }
+               rb->PutRow(ctx, rb, width, x, y + i, stencil, NULL);
+            }
+         }
+      }
+      else {
+         /* no bit masking */
+         const GLubyte clear8 = (GLubyte) clearVal;
+         const GLushort clear16 = (GLushort) clearVal;
+         const void *clear;
+         GLint i;
+         if (rb->DataType == GL_UNSIGNED_BYTE) {
+            clear = &clear8;
+         }
+         else {
+            clear = &clear16;
+         }
+         for (i = 0; i < height; i++) {
+            rb->PutMonoRow(ctx, rb, width, x, y + i, clear, NULL);
+         }
+      }
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_stencil.h b/dist/Mesa/src/mesa/swrast/s_stencil.h
new file mode 100644
index 000000000..fabc25250
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_stencil.h
@@ -0,0 +1,53 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_STENCIL_H
+#define S_STENCIL_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+
+extern GLboolean
+_swrast_stencil_and_ztest_span(GLcontext *ctx, struct sw_span *span);
+
+
+extern void
+_swrast_read_stencil_span(GLcontext *ctx, struct gl_renderbuffer *rb,
+                          GLint n, GLint x, GLint y, GLstencil stencil[]);
+
+
+extern void
+_swrast_write_stencil_span( GLcontext *ctx, GLint n, GLint x, GLint y,
+                          const GLstencil stencil[] );
+
+
+extern void
+_swrast_clear_stencil_buffer( GLcontext *ctx, struct gl_renderbuffer *rb );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_texcombine.c b/dist/Mesa/src/mesa/swrast/s_texcombine.c
new file mode 100644
index 000000000..ac26c9ceb
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_texcombine.c
@@ -0,0 +1,1164 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "imports.h"
+#include "macros.h"
+#include "pixel.h"
+
+#include "s_context.h"
+#include "s_texcombine.h"
+
+
+#define PROD(A,B)   ( (GLuint)(A) * ((GLuint)(B)+1) )
+#define S_PROD(A,B) ( (GLint)(A) * ((GLint)(B)+1) )
+#if CHAN_BITS == 32
+typedef GLfloat ChanTemp;
+#else
+typedef GLuint ChanTemp;
+#endif
+
+
+/**
+ * Do texture application for GL_ARB/EXT_texture_env_combine.
+ * This function also supports GL_{EXT,ARB}_texture_env_dot3 and
+ * GL_ATI_texture_env_combine3.  Since "classic" texture environments are
+ * implemented using GL_ARB_texture_env_combine-like state, this same function
+ * is used for classic texture environment application as well.
+ *
+ * \param ctx          rendering context
+ * \param textureUnit  the texture unit to apply
+ * \param n            number of fragments to process (span width)
+ * \param primary_rgba incoming fragment color array
+ * \param texelBuffer  pointer to texel colors for all texture units
+ * 
+ * \param rgba         incoming colors, which get modified here
+ */
+static void
+texture_combine( const GLcontext *ctx, GLuint unit, GLuint n,
+                 CONST GLchan (*primary_rgba)[4],
+                 CONST GLchan *texelBuffer,
+                 GLchan (*rgba)[4] )
+{
+   const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
+   const GLchan (*argRGB [3])[4];
+   const GLchan (*argA [3])[4];
+   const GLuint RGBshift = textureUnit->_CurrentCombine->ScaleShiftRGB;
+   const GLuint Ashift   = textureUnit->_CurrentCombine->ScaleShiftA;
+#if CHAN_TYPE == GL_FLOAT
+   const GLchan RGBmult = (GLfloat) (1 << RGBshift);
+   const GLchan Amult = (GLfloat) (1 << Ashift);
+#else
+   const GLint half = (CHAN_MAX + 1) / 2;
+#endif
+   static const GLchan one[4] = { CHAN_MAX, CHAN_MAX, CHAN_MAX, CHAN_MAX };
+   static const GLchan zero[4] = { 0, 0, 0, 0 };
+   const GLuint numColorArgs = textureUnit->_CurrentCombine->_NumArgsRGB;
+   const GLuint numAlphaArgs = textureUnit->_CurrentCombine->_NumArgsA;
+   GLchan ccolor[3][MAX_WIDTH][4];
+   GLuint i, j;
+
+   ASSERT(ctx->Extensions.EXT_texture_env_combine ||
+          ctx->Extensions.ARB_texture_env_combine);
+   ASSERT(SWRAST_CONTEXT(ctx)->_AnyTextureCombine);
+
+   /*
+   printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
+          textureUnit->_CurrentCombine->ModeRGB,
+          textureUnit->_CurrentCombine->ModeA,
+          textureUnit->_CurrentCombine->SourceRGB[0],
+          textureUnit->_CurrentCombine->SourceA[0],
+          textureUnit->_CurrentCombine->SourceRGB[1],
+          textureUnit->_CurrentCombine->SourceA[1]);
+   */
+
+   /*
+    * Do operand setup for up to 3 operands.  Loop over the terms.
+    */
+   for (j = 0; j < numColorArgs; j++) {
+      const GLenum srcRGB = textureUnit->_CurrentCombine->SourceRGB[j];
+
+      switch (srcRGB) {
+         case GL_TEXTURE:
+            argRGB[j] = (const GLchan (*)[4])
+               (texelBuffer + unit * (n * 4 * sizeof(GLchan)));
+            break;
+         case GL_PRIMARY_COLOR:
+            argRGB[j] = primary_rgba;
+            break;
+         case GL_PREVIOUS:
+            argRGB[j] = (const GLchan (*)[4]) rgba;
+            break;
+         case GL_CONSTANT:
+            {
+               GLchan (*c)[4] = ccolor[j];
+               GLchan red, green, blue, alpha;
+               UNCLAMPED_FLOAT_TO_CHAN(red,   textureUnit->EnvColor[0]);
+               UNCLAMPED_FLOAT_TO_CHAN(green, textureUnit->EnvColor[1]);
+               UNCLAMPED_FLOAT_TO_CHAN(blue,  textureUnit->EnvColor[2]);
+               UNCLAMPED_FLOAT_TO_CHAN(alpha, textureUnit->EnvColor[3]);
+               for (i = 0; i < n; i++) {
+                  c[i][RCOMP] = red;
+                  c[i][GCOMP] = green;
+                  c[i][BCOMP] = blue;
+                  c[i][ACOMP] = alpha;
+               }
+               argRGB[j] = (const GLchan (*)[4]) ccolor[j];
+            }
+            break;
+	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
+	  */
+	 case GL_ZERO:
+            argRGB[j] = & zero;
+            break;
+	 case GL_ONE:
+            argRGB[j] = & one;
+            break;
+         default:
+            /* ARB_texture_env_crossbar source */
+            {
+               const GLuint srcUnit = srcRGB - GL_TEXTURE0;
+               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
+               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
+                  return;
+               argRGB[j] = (const GLchan (*)[4])
+                  (texelBuffer + srcUnit * (n * 4 * sizeof(GLchan)));
+            }
+      }
+
+      if (textureUnit->_CurrentCombine->OperandRGB[j] != GL_SRC_COLOR) {
+         const GLchan (*src)[4] = argRGB[j];
+         GLchan (*dst)[4] = ccolor[j];
+
+         /* point to new arg[j] storage */
+         argRGB[j] = (const GLchan (*)[4]) ccolor[j];
+
+         if (textureUnit->_CurrentCombine->OperandRGB[j] == GL_ONE_MINUS_SRC_COLOR) {
+            for (i = 0; i < n; i++) {
+               dst[i][RCOMP] = CHAN_MAX - src[i][RCOMP];
+               dst[i][GCOMP] = CHAN_MAX - src[i][GCOMP];
+               dst[i][BCOMP] = CHAN_MAX - src[i][BCOMP];
+            }
+         }
+         else if (textureUnit->_CurrentCombine->OperandRGB[j] == GL_SRC_ALPHA) {
+            for (i = 0; i < n; i++) {
+               dst[i][RCOMP] = src[i][ACOMP];
+               dst[i][GCOMP] = src[i][ACOMP];
+               dst[i][BCOMP] = src[i][ACOMP];
+            }
+         }
+         else {
+            ASSERT(textureUnit->_CurrentCombine->OperandRGB[j] ==GL_ONE_MINUS_SRC_ALPHA);
+            for (i = 0; i < n; i++) {
+               dst[i][RCOMP] = CHAN_MAX - src[i][ACOMP];
+               dst[i][GCOMP] = CHAN_MAX - src[i][ACOMP];
+               dst[i][BCOMP] = CHAN_MAX - src[i][ACOMP];
+            }
+         }
+      }
+   }
+
+   /*
+    * Set up the argA[i] pointers
+    */
+   for (j = 0; j < numAlphaArgs; j++) {
+      const GLenum srcA = textureUnit->_CurrentCombine->SourceA[j];
+
+      switch (srcA) {
+         case GL_TEXTURE:
+            argA[j] = (const GLchan (*)[4])
+               (texelBuffer + unit * (n * 4 * sizeof(GLchan)));
+            break;
+         case GL_PRIMARY_COLOR:
+            argA[j] = primary_rgba;
+            break;
+         case GL_PREVIOUS:
+            argA[j] = (const GLchan (*)[4]) rgba;
+            break;
+         case GL_CONSTANT:
+            {
+               GLchan alpha, (*c)[4] = ccolor[j];
+               UNCLAMPED_FLOAT_TO_CHAN(alpha, textureUnit->EnvColor[3]);
+               for (i = 0; i < n; i++)
+                  c[i][ACOMP] = alpha;
+               argA[j] = (const GLchan (*)[4]) ccolor[j];
+            }
+            break;
+	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
+	  */
+	 case GL_ZERO:
+            argA[j] = & zero;
+            break;
+	 case GL_ONE:
+            argA[j] = & one;
+            break;
+         default:
+            /* ARB_texture_env_crossbar source */
+            {
+               const GLuint srcUnit = srcA - GL_TEXTURE0;
+               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
+               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
+                  return;
+               argA[j] = (const GLchan (*)[4])
+                  (texelBuffer + srcUnit * (n * 4 * sizeof(GLchan)));
+            }
+      }
+
+      if (textureUnit->_CurrentCombine->OperandA[j] == GL_ONE_MINUS_SRC_ALPHA) {
+         const GLchan (*src)[4] = argA[j];
+         GLchan (*dst)[4] = ccolor[j];
+         argA[j] = (const GLchan (*)[4]) ccolor[j];
+         for (i = 0; i < n; i++) {
+            dst[i][ACOMP] = CHAN_MAX - src[i][ACOMP];
+         }
+      }
+   }
+
+   /*
+    * Do the texture combine.
+    */
+   switch (textureUnit->_CurrentCombine->ModeRGB) {
+      case GL_REPLACE:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            if (RGBshift) {
+               for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+                  rgba[i][RCOMP] = arg0[i][RCOMP] * RGBmult;
+                  rgba[i][GCOMP] = arg0[i][GCOMP] * RGBmult;
+                  rgba[i][BCOMP] = arg0[i][BCOMP] * RGBmult;
+#else
+                  GLuint r = (GLuint) arg0[i][RCOMP] << RGBshift;
+                  GLuint g = (GLuint) arg0[i][GCOMP] << RGBshift;
+                  GLuint b = (GLuint) arg0[i][BCOMP] << RGBshift;
+                  rgba[i][RCOMP] = MIN2(r, CHAN_MAX);
+                  rgba[i][GCOMP] = MIN2(g, CHAN_MAX);
+                  rgba[i][BCOMP] = MIN2(b, CHAN_MAX);
+#endif
+               }
+            }
+            else {
+               for (i = 0; i < n; i++) {
+                  rgba[i][RCOMP] = arg0[i][RCOMP];
+                  rgba[i][GCOMP] = arg0[i][GCOMP];
+                  rgba[i][BCOMP] = arg0[i][BCOMP];
+               }
+            }
+         }
+         break;
+      case GL_MODULATE:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - RGBshift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * RGBmult;
+               rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * RGBmult;
+               rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * RGBmult;
+#else
+               GLuint r = PROD(arg0[i][RCOMP], arg1[i][RCOMP]) >> shift;
+               GLuint g = PROD(arg0[i][GCOMP], arg1[i][GCOMP]) >> shift;
+               GLuint b = PROD(arg0[i][BCOMP], arg1[i][BCOMP]) >> shift;
+               rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_ADD:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * RGBmult;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * RGBmult;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * RGBmult;
+#else
+               GLint r = ((GLint) arg0[i][RCOMP] + (GLint) arg1[i][RCOMP]) << RGBshift;
+               GLint g = ((GLint) arg0[i][GCOMP] + (GLint) arg1[i][GCOMP]) << RGBshift;
+               GLint b = ((GLint) arg0[i][BCOMP] + (GLint) arg1[i][BCOMP]) << RGBshift;
+               rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_ADD_SIGNED:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5) * RGBmult;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5) * RGBmult;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5) * RGBmult;
+#else
+               GLint r = (GLint) arg0[i][RCOMP] + (GLint) arg1[i][RCOMP] -half;
+               GLint g = (GLint) arg0[i][GCOMP] + (GLint) arg1[i][GCOMP] -half;
+               GLint b = (GLint) arg0[i][BCOMP] + (GLint) arg1[i][BCOMP] -half;
+               r = (r < 0) ? 0 : r << RGBshift;
+               g = (g < 0) ? 0 : g << RGBshift;
+               b = (b < 0) ? 0 : b << RGBshift;
+               rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_INTERPOLATE:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            const GLchan (*arg2)[4] = (const GLchan (*)[4]) argRGB[2];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - RGBshift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
+                      arg1[i][RCOMP] * (CHAN_MAXF - arg2[i][RCOMP])) * RGBmult;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
+                      arg1[i][GCOMP] * (CHAN_MAXF - arg2[i][GCOMP])) * RGBmult;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
+                      arg1[i][BCOMP] * (CHAN_MAXF - arg2[i][BCOMP])) * RGBmult;
+#else
+               GLuint r = (PROD(arg0[i][RCOMP], arg2[i][RCOMP])
+                           + PROD(arg1[i][RCOMP], CHAN_MAX - arg2[i][RCOMP]))
+                              >> shift;
+               GLuint g = (PROD(arg0[i][GCOMP], arg2[i][GCOMP])
+                           + PROD(arg1[i][GCOMP], CHAN_MAX - arg2[i][GCOMP]))
+                              >> shift;
+               GLuint b = (PROD(arg0[i][BCOMP], arg2[i][BCOMP])
+                           + PROD(arg1[i][BCOMP], CHAN_MAX - arg2[i][BCOMP]))
+                              >> shift;
+               rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_SUBTRACT:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * RGBmult;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * RGBmult;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * RGBmult;
+#else
+               GLint r = ((GLint) arg0[i][RCOMP] - (GLint) arg1[i][RCOMP]) << RGBshift;
+               GLint g = ((GLint) arg0[i][GCOMP] - (GLint) arg1[i][GCOMP]) << RGBshift;
+               GLint b = ((GLint) arg0[i][BCOMP] - (GLint) arg1[i][BCOMP]) << RGBshift;
+               rgba[i][RCOMP] = (GLchan) CLAMP(r, 0, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) CLAMP(g, 0, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) CLAMP(b, 0, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_DOT3_RGB_EXT:
+      case GL_DOT3_RGBA_EXT:
+         {
+            /* Do not scale the result by 1 2 or 4 */
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               GLchan dot = ((arg0[i][RCOMP]-0.5F) * (arg1[i][RCOMP]-0.5F) +
+                             (arg0[i][GCOMP]-0.5F) * (arg1[i][GCOMP]-0.5F) +
+                             (arg0[i][BCOMP]-0.5F) * (arg1[i][BCOMP]-0.5F))
+                            * 4.0F;
+               dot = CLAMP(dot, 0.0F, CHAN_MAXF);
+#else
+               GLint dot = (S_PROD((GLint)arg0[i][RCOMP] - half,
+				   (GLint)arg1[i][RCOMP] - half) +
+			    S_PROD((GLint)arg0[i][GCOMP] - half,
+				   (GLint)arg1[i][GCOMP] - half) +
+			    S_PROD((GLint)arg0[i][BCOMP] - half,
+				   (GLint)arg1[i][BCOMP] - half)) >> 6;
+               dot = CLAMP(dot, 0, CHAN_MAX);
+#endif
+               rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = (GLchan) dot;
+            }
+         }
+         break;
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
+         {
+            /* DO scale the result by 1 2 or 4 */
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               GLchan dot = ((arg0[i][RCOMP]-0.5F) * (arg1[i][RCOMP]-0.5F) +
+                             (arg0[i][GCOMP]-0.5F) * (arg1[i][GCOMP]-0.5F) +
+                             (arg0[i][BCOMP]-0.5F) * (arg1[i][BCOMP]-0.5F))
+                            * 4.0F * RGBmult;
+               dot = CLAMP(dot, 0.0, CHAN_MAXF);
+#else
+               GLint dot = (S_PROD((GLint)arg0[i][RCOMP] - half,
+				   (GLint)arg1[i][RCOMP] - half) +
+			    S_PROD((GLint)arg0[i][GCOMP] - half,
+				   (GLint)arg1[i][GCOMP] - half) +
+			    S_PROD((GLint)arg0[i][BCOMP] - half,
+				   (GLint)arg1[i][BCOMP] - half)) >> 6;
+               dot <<= RGBshift;
+               dot = CLAMP(dot, 0, CHAN_MAX);
+#endif
+               rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = (GLchan) dot;
+            }
+         }
+         break;
+      case GL_MODULATE_ADD_ATI:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            const GLchan (*arg2)[4] = (const GLchan (*)[4]) argRGB[2];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - RGBshift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) + arg1[i][RCOMP]) * RGBmult;
+               rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) + arg1[i][GCOMP]) * RGBmult;
+               rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) + arg1[i][BCOMP]) * RGBmult;
+#else
+               GLuint r = (PROD(arg0[i][RCOMP], arg2[i][RCOMP])
+                           + ((GLuint) arg1[i][RCOMP] << CHAN_BITS)) >> shift;
+               GLuint g = (PROD(arg0[i][GCOMP], arg2[i][GCOMP])
+                           + ((GLuint) arg1[i][GCOMP] << CHAN_BITS)) >> shift;
+               GLuint b = (PROD(arg0[i][BCOMP], arg2[i][BCOMP])
+                           + ((GLuint) arg1[i][BCOMP] << CHAN_BITS)) >> shift;
+               rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
+            }
+	 }
+         break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            const GLchan (*arg2)[4] = (const GLchan (*)[4]) argRGB[2];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - RGBshift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) + arg1[i][RCOMP] - 0.5) * RGBmult;
+               rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) + arg1[i][GCOMP] - 0.5) * RGBmult;
+               rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) + arg1[i][BCOMP] - 0.5) * RGBmult;
+#else
+               GLint r = (S_PROD(arg0[i][RCOMP], arg2[i][RCOMP])
+			  + (((GLint) arg1[i][RCOMP] - half) << CHAN_BITS))
+		    >> shift;
+               GLint g = (S_PROD(arg0[i][GCOMP], arg2[i][GCOMP])
+			  + (((GLint) arg1[i][GCOMP] - half) << CHAN_BITS))
+		    >> shift;
+               GLint b = (S_PROD(arg0[i][BCOMP], arg2[i][BCOMP])
+			  + (((GLint) arg1[i][BCOMP] - half) << CHAN_BITS))
+		    >> shift;
+               rgba[i][RCOMP] = (GLchan) CLAMP(r, 0, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) CLAMP(g, 0, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) CLAMP(b, 0, CHAN_MAX);
+#endif
+            }
+	 }
+         break;
+      case GL_MODULATE_SUBTRACT_ATI:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            const GLchan (*arg2)[4] = (const GLchan (*)[4]) argRGB[2];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - RGBshift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) - arg1[i][RCOMP]) * RGBmult;
+               rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) - arg1[i][GCOMP]) * RGBmult;
+               rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) - arg1[i][BCOMP]) * RGBmult;
+#else
+               GLint r = (S_PROD(arg0[i][RCOMP], arg2[i][RCOMP])
+			  - ((GLint) arg1[i][RCOMP] << CHAN_BITS))
+		    >> shift;
+               GLint g = (S_PROD(arg0[i][GCOMP], arg2[i][GCOMP])
+			  - ((GLint) arg1[i][GCOMP] << CHAN_BITS))
+		    >> shift;
+               GLint b = (S_PROD(arg0[i][BCOMP], arg2[i][BCOMP])
+			  - ((GLint) arg1[i][BCOMP] << CHAN_BITS))
+		    >> shift;
+               rgba[i][RCOMP] = (GLchan) CLAMP(r, 0, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) CLAMP(g, 0, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) CLAMP(b, 0, CHAN_MAX);
+#endif
+            }
+	 }
+         break;
+      default:
+         _mesa_problem(ctx, "invalid combine mode");
+   }
+
+   switch (textureUnit->_CurrentCombine->ModeA) {
+      case GL_REPLACE:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            if (Ashift) {
+               for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+                  GLchan a = arg0[i][ACOMP] * Amult;
+#else
+                  GLuint a = (GLuint) arg0[i][ACOMP] << Ashift;
+#endif
+                  rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+               }
+            }
+            else {
+               for (i = 0; i < n; i++) {
+                  rgba[i][ACOMP] = arg0[i][ACOMP];
+               }
+            }
+         }
+         break;
+      case GL_MODULATE:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - Ashift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * Amult;
+#else
+               GLuint a = (PROD(arg0[i][ACOMP], arg1[i][ACOMP]) >> shift);
+               rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_ADD:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            const GLchan  (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * Amult;
+#else
+               GLint a = ((GLint) arg0[i][ACOMP] + arg1[i][ACOMP]) << Ashift;
+               rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_ADD_SIGNED:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * Amult;
+#else
+               GLint a = (GLint) arg0[i][ACOMP] + (GLint) arg1[i][ACOMP] -half;
+               a = (a < 0) ? 0 : a << Ashift;
+               rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_INTERPOLATE:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+            const GLchan (*arg2)[4] = (const GLchan (*)[4]) argA[2];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - Ashift;
+#endif
+            for (i=0; i<n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
+                                 arg1[i][ACOMP] * (CHAN_MAXF - arg2[i][ACOMP]))
+                                * Amult;
+#else
+               GLuint a = (PROD(arg0[i][ACOMP], arg2[i][ACOMP])
+                           + PROD(arg1[i][ACOMP], CHAN_MAX - arg2[i][ACOMP]))
+                              >> shift;
+               rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_SUBTRACT:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * Amult;
+#else
+               GLint a = ((GLint) arg0[i][ACOMP] - (GLint) arg1[i][ACOMP]) << Ashift;
+               rgba[i][ACOMP] = (GLchan) CLAMP(a, 0, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_MODULATE_ADD_ATI:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+            const GLchan (*arg2)[4] = (const GLchan (*)[4]) argA[2];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - Ashift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) + arg1[i][ACOMP]) * Amult;
+#else
+               GLint a = (PROD(arg0[i][ACOMP], arg2[i][ACOMP])
+			   + ((GLuint) arg1[i][ACOMP] << CHAN_BITS))
+		    >> shift;
+               rgba[i][ACOMP] = (GLchan) CLAMP(a, 0, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+            const GLchan (*arg2)[4] = (const GLchan (*)[4]) argA[2];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - Ashift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) + arg1[i][ACOMP] - 0.5F) * Amult;
+#else
+               GLint a = (S_PROD(arg0[i][ACOMP], arg2[i][ACOMP])
+			  + (((GLint) arg1[i][ACOMP] - half) << CHAN_BITS))
+		    >> shift;
+               rgba[i][ACOMP] = (GLchan) CLAMP(a, 0, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_MODULATE_SUBTRACT_ATI:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+            const GLchan (*arg2)[4] = (const GLchan (*)[4]) argA[2];
+#if CHAN_TYPE != GL_FLOAT
+            const GLint shift = CHAN_BITS - Ashift;
+#endif
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) - arg1[i][ACOMP]) * Amult;
+#else
+               GLint a = (S_PROD(arg0[i][ACOMP], arg2[i][ACOMP]) 
+			  - ((GLint) arg1[i][ACOMP] << CHAN_BITS))
+		    >> shift;
+               rgba[i][ACOMP] = (GLchan) CLAMP(a, 0, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      default:
+         _mesa_problem(ctx, "invalid combine mode");
+   }
+
+   /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
+    * This is kind of a kludge.  It would have been better if the spec
+    * were written such that the GL_COMBINE_ALPHA value could be set to
+    * GL_DOT3.
+    */
+   if (textureUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT ||
+       textureUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) {
+      for (i = 0; i < n; i++) {
+	 rgba[i][ACOMP] = rgba[i][RCOMP];
+      }
+   }
+}
+#undef PROD
+
+
+/**
+ * Apply a conventional OpenGL texture env mode (REPLACE, ADD, BLEND,
+ * MODULATE, or DECAL) to an array of fragments.
+ * Input:  textureUnit - pointer to texture unit to apply
+ *         format - base internal texture format
+ *         n - number of fragments
+ *         primary_rgba - primary colors (may alias rgba for single texture)
+ *         texels - array of texel colors
+ * InOut:  rgba - incoming fragment colors modified by texel colors
+ *                according to the texture environment mode.
+ */
+static void
+texture_apply( const GLcontext *ctx,
+               const struct gl_texture_unit *texUnit,
+               GLuint n,
+               CONST GLchan primary_rgba[][4], CONST GLchan texel[][4],
+               GLchan rgba[][4] )
+{
+   GLint baseLevel;
+   GLuint i;
+   GLchan Rc, Gc, Bc, Ac;
+   GLenum format;
+   (void) primary_rgba;
+
+   ASSERT(texUnit);
+   ASSERT(texUnit->_Current);
+
+   baseLevel = texUnit->_Current->BaseLevel;
+   ASSERT(texUnit->_Current->Image[0][baseLevel]);
+
+   format = texUnit->_Current->Image[0][baseLevel]->_BaseFormat;
+
+   if (format == GL_COLOR_INDEX || format == GL_YCBCR_MESA) {
+      format = GL_RGBA;  /* a bit of a hack */
+   }
+   else if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL_EXT) {
+      format = texUnit->_Current->DepthMode;
+   }
+
+   switch (texUnit->EnvMode) {
+      case GL_REPLACE:
+	 switch (format) {
+	    case GL_ALPHA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf */
+                  /* Av = At */
+                  rgba[i][ACOMP] = texel[i][ACOMP];
+	       }
+	       break;
+	    case GL_LUMINANCE:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Lt */
+                  GLchan Lt = texel[i][RCOMP];
+                  rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = Lt;
+                  /* Av = Af */
+	       }
+	       break;
+	    case GL_LUMINANCE_ALPHA:
+	       for (i=0;i<n;i++) {
+                  GLchan Lt = texel[i][RCOMP];
+		  /* Cv = Lt */
+		  rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = Lt;
+		  /* Av = At */
+		  rgba[i][ACOMP] = texel[i][ACOMP];
+	       }
+	       break;
+	    case GL_INTENSITY:
+	       for (i=0;i<n;i++) {
+		  /* Cv = It */
+                  GLchan It = texel[i][RCOMP];
+                  rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = It;
+                  /* Av = It */
+                  rgba[i][ACOMP] = It;
+	       }
+	       break;
+	    case GL_RGB:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Ct */
+		  rgba[i][RCOMP] = texel[i][RCOMP];
+		  rgba[i][GCOMP] = texel[i][GCOMP];
+		  rgba[i][BCOMP] = texel[i][BCOMP];
+		  /* Av = Af */
+	       }
+	       break;
+	    case GL_RGBA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Ct */
+		  rgba[i][RCOMP] = texel[i][RCOMP];
+		  rgba[i][GCOMP] = texel[i][GCOMP];
+		  rgba[i][BCOMP] = texel[i][BCOMP];
+		  /* Av = At */
+		  rgba[i][ACOMP] = texel[i][ACOMP];
+	       }
+	       break;
+            default:
+               _mesa_problem(ctx, "Bad format (GL_REPLACE) in texture_apply");
+               return;
+	 }
+	 break;
+
+      case GL_MODULATE:
+         switch (format) {
+	    case GL_ALPHA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf */
+		  /* Av = AfAt */
+		  rgba[i][ACOMP] = CHAN_PRODUCT( rgba[i][ACOMP], texel[i][ACOMP] );
+	       }
+	       break;
+	    case GL_LUMINANCE:
+	       for (i=0;i<n;i++) {
+		  /* Cv = LtCf */
+                  GLchan Lt = texel[i][RCOMP];
+		  rgba[i][RCOMP] = CHAN_PRODUCT( rgba[i][RCOMP], Lt );
+		  rgba[i][GCOMP] = CHAN_PRODUCT( rgba[i][GCOMP], Lt );
+		  rgba[i][BCOMP] = CHAN_PRODUCT( rgba[i][BCOMP], Lt );
+		  /* Av = Af */
+	       }
+	       break;
+	    case GL_LUMINANCE_ALPHA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = CfLt */
+                  GLchan Lt = texel[i][RCOMP];
+		  rgba[i][RCOMP] = CHAN_PRODUCT( rgba[i][RCOMP], Lt );
+		  rgba[i][GCOMP] = CHAN_PRODUCT( rgba[i][GCOMP], Lt );
+		  rgba[i][BCOMP] = CHAN_PRODUCT( rgba[i][BCOMP], Lt );
+		  /* Av = AfAt */
+		  rgba[i][ACOMP] = CHAN_PRODUCT( rgba[i][ACOMP], texel[i][ACOMP] );
+	       }
+	       break;
+	    case GL_INTENSITY:
+	       for (i=0;i<n;i++) {
+		  /* Cv = CfIt */
+                  GLchan It = texel[i][RCOMP];
+		  rgba[i][RCOMP] = CHAN_PRODUCT( rgba[i][RCOMP], It );
+		  rgba[i][GCOMP] = CHAN_PRODUCT( rgba[i][GCOMP], It );
+		  rgba[i][BCOMP] = CHAN_PRODUCT( rgba[i][BCOMP], It );
+		  /* Av = AfIt */
+		  rgba[i][ACOMP] = CHAN_PRODUCT( rgba[i][ACOMP], It );
+	       }
+	       break;
+	    case GL_RGB:
+	       for (i=0;i<n;i++) {
+		  /* Cv = CfCt */
+		  rgba[i][RCOMP] = CHAN_PRODUCT( rgba[i][RCOMP], texel[i][RCOMP] );
+		  rgba[i][GCOMP] = CHAN_PRODUCT( rgba[i][GCOMP], texel[i][GCOMP] );
+		  rgba[i][BCOMP] = CHAN_PRODUCT( rgba[i][BCOMP], texel[i][BCOMP] );
+		  /* Av = Af */
+	       }
+	       break;
+	    case GL_RGBA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = CfCt */
+		  rgba[i][RCOMP] = CHAN_PRODUCT( rgba[i][RCOMP], texel[i][RCOMP] );
+		  rgba[i][GCOMP] = CHAN_PRODUCT( rgba[i][GCOMP], texel[i][GCOMP] );
+		  rgba[i][BCOMP] = CHAN_PRODUCT( rgba[i][BCOMP], texel[i][BCOMP] );
+		  /* Av = AfAt */
+		  rgba[i][ACOMP] = CHAN_PRODUCT( rgba[i][ACOMP], texel[i][ACOMP] );
+	       }
+	       break;
+            default:
+               _mesa_problem(ctx, "Bad format (GL_MODULATE) in texture_apply");
+               return;
+	 }
+	 break;
+
+      case GL_DECAL:
+         switch (format) {
+            case GL_ALPHA:
+            case GL_LUMINANCE:
+            case GL_LUMINANCE_ALPHA:
+            case GL_INTENSITY:
+               /* undefined */
+               break;
+	    case GL_RGB:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Ct */
+		  rgba[i][RCOMP] = texel[i][RCOMP];
+		  rgba[i][GCOMP] = texel[i][GCOMP];
+		  rgba[i][BCOMP] = texel[i][BCOMP];
+		  /* Av = Af */
+	       }
+	       break;
+	    case GL_RGBA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf(1-At) + CtAt */
+		  GLchan t = texel[i][ACOMP], s = CHAN_MAX - t;
+		  rgba[i][RCOMP] = CHAN_PRODUCT(rgba[i][RCOMP], s) + CHAN_PRODUCT(texel[i][RCOMP],t);
+		  rgba[i][GCOMP] = CHAN_PRODUCT(rgba[i][GCOMP], s) + CHAN_PRODUCT(texel[i][GCOMP],t);
+		  rgba[i][BCOMP] = CHAN_PRODUCT(rgba[i][BCOMP], s) + CHAN_PRODUCT(texel[i][BCOMP],t);
+		  /* Av = Af */
+	       }
+	       break;
+            default:
+               _mesa_problem(ctx, "Bad format (GL_DECAL) in texture_apply");
+               return;
+	 }
+	 break;
+
+      case GL_BLEND:
+         UNCLAMPED_FLOAT_TO_CHAN(Rc, texUnit->EnvColor[0]);
+         UNCLAMPED_FLOAT_TO_CHAN(Gc, texUnit->EnvColor[1]);
+         UNCLAMPED_FLOAT_TO_CHAN(Bc, texUnit->EnvColor[2]);
+         UNCLAMPED_FLOAT_TO_CHAN(Ac, texUnit->EnvColor[3]);
+	 switch (format) {
+	    case GL_ALPHA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf */
+		  /* Av = AfAt */
+                  rgba[i][ACOMP] = CHAN_PRODUCT(rgba[i][ACOMP], texel[i][ACOMP]);
+	       }
+	       break;
+            case GL_LUMINANCE:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf(1-Lt) + CcLt */
+		  GLchan Lt = texel[i][RCOMP], s = CHAN_MAX - Lt;
+		  rgba[i][RCOMP] = CHAN_PRODUCT(rgba[i][RCOMP], s) + CHAN_PRODUCT(Rc, Lt);
+		  rgba[i][GCOMP] = CHAN_PRODUCT(rgba[i][GCOMP], s) + CHAN_PRODUCT(Gc, Lt);
+		  rgba[i][BCOMP] = CHAN_PRODUCT(rgba[i][BCOMP], s) + CHAN_PRODUCT(Bc, Lt);
+		  /* Av = Af */
+	       }
+	       break;
+	    case GL_LUMINANCE_ALPHA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf(1-Lt) + CcLt */
+		  GLchan Lt = texel[i][RCOMP], s = CHAN_MAX - Lt;
+		  rgba[i][RCOMP] = CHAN_PRODUCT(rgba[i][RCOMP], s) + CHAN_PRODUCT(Rc, Lt);
+		  rgba[i][GCOMP] = CHAN_PRODUCT(rgba[i][GCOMP], s) + CHAN_PRODUCT(Gc, Lt);
+		  rgba[i][BCOMP] = CHAN_PRODUCT(rgba[i][BCOMP], s) + CHAN_PRODUCT(Bc, Lt);
+		  /* Av = AfAt */
+		  rgba[i][ACOMP] = CHAN_PRODUCT(rgba[i][ACOMP],texel[i][ACOMP]);
+	       }
+	       break;
+            case GL_INTENSITY:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf(1-It) + CcIt */
+		  GLchan It = texel[i][RCOMP], s = CHAN_MAX - It;
+		  rgba[i][RCOMP] = CHAN_PRODUCT(rgba[i][RCOMP], s) + CHAN_PRODUCT(Rc, It);
+		  rgba[i][GCOMP] = CHAN_PRODUCT(rgba[i][GCOMP], s) + CHAN_PRODUCT(Gc, It);
+		  rgba[i][BCOMP] = CHAN_PRODUCT(rgba[i][BCOMP], s) + CHAN_PRODUCT(Bc, It);
+                  /* Av = Af(1-It) + Ac*It */
+                  rgba[i][ACOMP] = CHAN_PRODUCT(rgba[i][ACOMP], s) + CHAN_PRODUCT(Ac, It);
+               }
+               break;
+	    case GL_RGB:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf(1-Ct) + CcCt */
+		  rgba[i][RCOMP] = CHAN_PRODUCT(rgba[i][RCOMP], (CHAN_MAX-texel[i][RCOMP])) + CHAN_PRODUCT(Rc,texel[i][RCOMP]);
+		  rgba[i][GCOMP] = CHAN_PRODUCT(rgba[i][GCOMP], (CHAN_MAX-texel[i][GCOMP])) + CHAN_PRODUCT(Gc,texel[i][GCOMP]);
+		  rgba[i][BCOMP] = CHAN_PRODUCT(rgba[i][BCOMP], (CHAN_MAX-texel[i][BCOMP])) + CHAN_PRODUCT(Bc,texel[i][BCOMP]);
+		  /* Av = Af */
+	       }
+	       break;
+	    case GL_RGBA:
+	       for (i=0;i<n;i++) {
+		  /* Cv = Cf(1-Ct) + CcCt */
+		  rgba[i][RCOMP] = CHAN_PRODUCT(rgba[i][RCOMP], (CHAN_MAX-texel[i][RCOMP])) + CHAN_PRODUCT(Rc,texel[i][RCOMP]);
+		  rgba[i][GCOMP] = CHAN_PRODUCT(rgba[i][GCOMP], (CHAN_MAX-texel[i][GCOMP])) + CHAN_PRODUCT(Gc,texel[i][GCOMP]);
+		  rgba[i][BCOMP] = CHAN_PRODUCT(rgba[i][BCOMP], (CHAN_MAX-texel[i][BCOMP])) + CHAN_PRODUCT(Bc,texel[i][BCOMP]);
+		  /* Av = AfAt */
+		  rgba[i][ACOMP] = CHAN_PRODUCT(rgba[i][ACOMP],texel[i][ACOMP]);
+	       }
+	       break;
+            default:
+               _mesa_problem(ctx, "Bad format (GL_BLEND) in texture_apply");
+               return;
+	 }
+	 break;
+
+     /* XXX don't clamp results if GLchan is float??? */
+
+      case GL_ADD:  /* GL_EXT_texture_add_env */
+         switch (format) {
+            case GL_ALPHA:
+               for (i=0;i<n;i++) {
+                  /* Rv = Rf */
+                  /* Gv = Gf */
+                  /* Bv = Bf */
+                  rgba[i][ACOMP] = CHAN_PRODUCT(rgba[i][ACOMP], texel[i][ACOMP]);
+               }
+               break;
+            case GL_LUMINANCE:
+               for (i=0;i<n;i++) {
+                  ChanTemp Lt = texel[i][RCOMP];
+                  ChanTemp r = rgba[i][RCOMP] + Lt;
+                  ChanTemp g = rgba[i][GCOMP] + Lt;
+                  ChanTemp b = rgba[i][BCOMP] + Lt;
+                  rgba[i][RCOMP] = MIN2(r, CHAN_MAX);
+                  rgba[i][GCOMP] = MIN2(g, CHAN_MAX);
+                  rgba[i][BCOMP] = MIN2(b, CHAN_MAX);
+                  /* Av = Af */
+               }
+               break;
+            case GL_LUMINANCE_ALPHA:
+               for (i=0;i<n;i++) {
+                  ChanTemp Lt = texel[i][RCOMP];
+                  ChanTemp r = rgba[i][RCOMP] + Lt;
+                  ChanTemp g = rgba[i][GCOMP] + Lt;
+                  ChanTemp b = rgba[i][BCOMP] + Lt;
+                  rgba[i][RCOMP] = MIN2(r, CHAN_MAX);
+                  rgba[i][GCOMP] = MIN2(g, CHAN_MAX);
+                  rgba[i][BCOMP] = MIN2(b, CHAN_MAX);
+                  rgba[i][ACOMP] = CHAN_PRODUCT(rgba[i][ACOMP], texel[i][ACOMP]);
+               }
+               break;
+            case GL_INTENSITY:
+               for (i=0;i<n;i++) {
+                  GLchan It = texel[i][RCOMP];
+                  ChanTemp r = rgba[i][RCOMP] + It;
+                  ChanTemp g = rgba[i][GCOMP] + It;
+                  ChanTemp b = rgba[i][BCOMP] + It;
+                  ChanTemp a = rgba[i][ACOMP] + It;
+                  rgba[i][RCOMP] = MIN2(r, CHAN_MAX);
+                  rgba[i][GCOMP] = MIN2(g, CHAN_MAX);
+                  rgba[i][BCOMP] = MIN2(b, CHAN_MAX);
+                  rgba[i][ACOMP] = MIN2(a, CHAN_MAX);
+               }
+               break;
+	    case GL_RGB:
+	       for (i=0;i<n;i++) {
+                  ChanTemp r = rgba[i][RCOMP] + texel[i][RCOMP];
+                  ChanTemp g = rgba[i][GCOMP] + texel[i][GCOMP];
+                  ChanTemp b = rgba[i][BCOMP] + texel[i][BCOMP];
+		  rgba[i][RCOMP] = MIN2(r, CHAN_MAX);
+		  rgba[i][GCOMP] = MIN2(g, CHAN_MAX);
+		  rgba[i][BCOMP] = MIN2(b, CHAN_MAX);
+		  /* Av = Af */
+	       }
+	       break;
+	    case GL_RGBA:
+	       for (i=0;i<n;i++) {
+                  ChanTemp r = rgba[i][RCOMP] + texel[i][RCOMP];
+                  ChanTemp g = rgba[i][GCOMP] + texel[i][GCOMP];
+                  ChanTemp b = rgba[i][BCOMP] + texel[i][BCOMP];
+		  rgba[i][RCOMP] = MIN2(r, CHAN_MAX);
+		  rgba[i][GCOMP] = MIN2(g, CHAN_MAX);
+		  rgba[i][BCOMP] = MIN2(b, CHAN_MAX);
+                  rgba[i][ACOMP] = CHAN_PRODUCT(rgba[i][ACOMP], texel[i][ACOMP]);
+               }
+               break;
+            default:
+               _mesa_problem(ctx, "Bad format (GL_ADD) in texture_apply");
+               return;
+	 }
+	 break;
+
+      default:
+         _mesa_problem(ctx, "Bad env mode in texture_apply");
+         return;
+   }
+}
+
+
+
+/**
+ * Apply texture mapping to a span of fragments.
+ */
+void
+_swrast_texture_span( GLcontext *ctx, struct sw_span *span )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLchan primary_rgba[MAX_WIDTH][4];
+   GLuint unit;
+
+   ASSERT(span->end < MAX_WIDTH);
+   ASSERT(span->arrayMask & SPAN_TEXTURE);
+
+   /*
+    * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
+    */
+   if (swrast->_AnyTextureCombine)
+      MEMCPY(primary_rgba, span->array->rgba, 4 * span->end * sizeof(GLchan));
+
+   /*
+    * Must do all texture sampling before combining in order to
+    * accomodate GL_ARB_texture_env_crossbar.
+    */
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+         const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+         const struct gl_texture_object *curObj = texUnit->_Current;
+         GLfloat *lambda = span->array->lambda[unit];
+         GLchan (*texels)[4] = (GLchan (*)[4])
+            (swrast->TexelBuffer + unit * (span->end * 4 * sizeof(GLchan)));
+
+         /* adjust texture lod (lambda) */
+         if (span->arrayMask & SPAN_LAMBDA) {
+            if (texUnit->LodBias + curObj->LodBias != 0.0F) {
+               /* apply LOD bias, but don't clamp yet */
+               const GLfloat bias = CLAMP(texUnit->LodBias + curObj->LodBias,
+                                          -ctx->Const.MaxTextureLodBias,
+                                          ctx->Const.MaxTextureLodBias);
+               GLuint i;
+               for (i = 0; i < span->end; i++) {
+                  lambda[i] += bias;
+               }
+            }
+
+            if (curObj->MinLod != -1000.0 || curObj->MaxLod != 1000.0) {
+               /* apply LOD clamping to lambda */
+               const GLfloat min = curObj->MinLod;
+               const GLfloat max = curObj->MaxLod;
+               GLuint i;
+               for (i = 0; i < span->end; i++) {
+                  GLfloat l = lambda[i];
+                  lambda[i] = CLAMP(l, min, max);
+               }
+            }
+         }
+
+         /* Sample the texture (span->end = number of fragments) */
+         swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
+                         (const GLfloat (*)[4]) span->array->texcoords[unit],
+                         lambda, texels );
+
+         /* GL_SGI_texture_color_table */
+         if (texUnit->ColorTableEnabled) {
+            _mesa_lookup_rgba_chan(&texUnit->ColorTable, span->end, texels);
+         }
+      }
+   }
+
+   /*
+    * OK, now apply the texture (aka texture combine/blend).
+    * We modify the span->color.rgba values.
+    */
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+         const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+         if (texUnit->_CurrentCombine != &texUnit->_EnvMode ) {
+            texture_combine( ctx, unit, span->end,
+                             (CONST GLchan (*)[4]) primary_rgba,
+                             swrast->TexelBuffer,
+                             span->array->rgba );
+         }
+         else {
+            /* conventional texture blend */
+            const GLchan (*texels)[4] = (const GLchan (*)[4])
+               (swrast->TexelBuffer + unit *
+                (span->end * 4 * sizeof(GLchan)));
+            texture_apply( ctx, texUnit, span->end,
+                           (CONST GLchan (*)[4]) primary_rgba, texels,
+                           span->array->rgba );
+         }
+      }
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_texcombine.h b/dist/Mesa/src/mesa/swrast/s_texcombine.h
new file mode 100644
index 000000000..eca967c54
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_texcombine.h
@@ -0,0 +1,36 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_TEXCOMBINE_H
+#define S_TEXCOMBINE_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+extern void
+_swrast_texture_span( GLcontext *ctx, struct sw_span *span );
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_texfilter.c b/dist/Mesa/src/mesa/swrast/s_texfilter.c
new file mode 100644
index 000000000..629b4ec13
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_texfilter.c
@@ -0,0 +1,2744 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "imports.h"
+#include "texformat.h"
+
+#include "s_context.h"
+#include "s_texfilter.h"
+
+
+/**
+ * Constants for integer linear interpolation.
+ */
+#define ILERP_SCALE 65536.0F
+#define ILERP_SHIFT 16
+
+
+/**
+ * Linear interpolation macros
+ */
+#define LERP(T, A, B)  ( (A) + (T) * ((B) - (A)) )
+#define ILERP(IT, A, B)  ( (A) + (((IT) * ((B) - (A))) >> ILERP_SHIFT) )
+
+
+/**
+ * Do 2D/biliner interpolation of float values.
+ * v00, v10, v01 and v11 are typically four texture samples in a square/box.
+ * a and b are the horizontal and vertical interpolants.
+ * It's important that this function is inlined when compiled with
+ * optimization!  If we find that's not true on some systems, convert
+ * to a macro.
+ */
+static INLINE GLfloat
+lerp_2d(GLfloat a, GLfloat b,
+        GLfloat v00, GLfloat v10, GLfloat v01, GLfloat v11)
+{
+   const GLfloat temp0 = LERP(a, v00, v10);
+   const GLfloat temp1 = LERP(a, v01, v11);
+   return LERP(b, temp0, temp1);
+}
+
+
+/**
+ * Do 2D/biliner interpolation of integer values.
+ * \sa lerp_2d
+ */
+static INLINE GLint
+ilerp_2d(GLint ia, GLint ib,
+         GLint v00, GLint v10, GLint v01, GLint v11)
+{
+   /* fixed point interpolants in [0, ILERP_SCALE] */
+   const GLint temp0 = ILERP(ia, v00, v10);
+   const GLint temp1 = ILERP(ia, v01, v11);
+   return ILERP(ib, temp0, temp1);
+}
+
+
+/**
+ * Do 3D/trilinear interpolation of float values.
+ * \sa lerp_2d
+ */
+static INLINE GLfloat
+lerp_3d(GLfloat a, GLfloat b, GLfloat c,
+        GLfloat v000, GLfloat v100, GLfloat v010, GLfloat v110,
+        GLfloat v001, GLfloat v101, GLfloat v011, GLfloat v111)
+{
+   const GLfloat temp00 = LERP(a, v000, v100);
+   const GLfloat temp10 = LERP(a, v010, v110);
+   const GLfloat temp01 = LERP(a, v001, v101);
+   const GLfloat temp11 = LERP(a, v011, v111);
+   const GLfloat temp0 = LERP(b, temp00, temp10);
+   const GLfloat temp1 = LERP(b, temp01, temp11);
+   return LERP(c, temp0, temp1);
+}
+
+
+/**
+ * Do 3D/trilinear interpolation of integer values.
+ * \sa lerp_2d
+ */
+static INLINE GLint
+ilerp_3d(GLint ia, GLint ib, GLint ic,
+         GLint v000, GLint v100, GLint v010, GLint v110,
+         GLint v001, GLint v101, GLint v011, GLint v111)
+{
+   /* fixed point interpolants in [0, ILERP_SCALE] */
+   const GLint temp00 = ILERP(ia, v000, v100);
+   const GLint temp10 = ILERP(ia, v010, v110);
+   const GLint temp01 = ILERP(ia, v001, v101);
+   const GLint temp11 = ILERP(ia, v011, v111);
+   const GLint temp0 = ILERP(ib, temp00, temp10);
+   const GLint temp1 = ILERP(ib, temp01, temp11);
+   return ILERP(ic, temp0, temp1);
+}
+
+
+/**
+ * Do linear interpolation of colors.
+ */
+static INLINE void
+lerp_rgba(GLchan result[4], GLfloat t, const GLchan a[4], const GLchan b[4])
+{
+#if CHAN_TYPE == GL_FLOAT
+   result[0] = LERP(t, a[0], b[0]);
+   result[1] = LERP(t, a[1], b[1]);
+   result[2] = LERP(t, a[2], b[2]);
+   result[3] = LERP(t, a[3], b[3]);
+#elif CHAN_TYPE == GL_UNSIGNED_SHORT
+   result[0] = (GLchan) (LERP(t, a[0], b[0]) + 0.5);
+   result[1] = (GLchan) (LERP(t, a[1], b[1]) + 0.5);
+   result[2] = (GLchan) (LERP(t, a[2], b[2]) + 0.5);
+   result[3] = (GLchan) (LERP(t, a[3], b[3]) + 0.5);
+#else
+   /* fixed point interpolants in [0, ILERP_SCALE] */
+   const GLint it = IROUND_POS(t * ILERP_SCALE);
+   ASSERT(CHAN_TYPE == GL_UNSIGNED_BYTE);
+   result[0] = ILERP(it, a[0], b[0]);
+   result[1] = ILERP(it, a[1], b[1]);
+   result[2] = ILERP(it, a[2], b[2]);
+   result[3] = ILERP(it, a[3], b[3]);
+#endif
+}
+
+
+/**
+ * Do bilinear interpolation of colors.
+ */
+static INLINE void
+lerp_rgba_2d(GLchan result[4], GLfloat a, GLfloat b,
+             const GLchan t00[4], const GLchan t10[4],
+             const GLchan t01[4], const GLchan t11[4])
+{
+#if CHAN_TYPE == GL_FLOAT
+   result[0] = lerp_2d(a, b, t00[0], t10[0], t01[0], t11[0]);
+   result[1] = lerp_2d(a, b, t00[1], t10[1], t01[1], t11[1]);
+   result[2] = lerp_2d(a, b, t00[2], t10[2], t01[2], t11[2]);
+   result[3] = lerp_2d(a, b, t00[3], t10[3], t01[3], t11[3]);
+#elif CHAN_TYPE == GL_UNSIGNED_SHORT
+   result[0] = (GLchan) (lerp_2d(a, b, t00[0], t10[0], t01[0], t11[0]) + 0.5);
+   result[1] = (GLchan) (lerp_2d(a, b, t00[1], t10[1], t01[1], t11[1]) + 0.5);
+   result[2] = (GLchan) (lerp_2d(a, b, t00[2], t10[2], t01[2], t11[2]) + 0.5);
+   result[3] = (GLchan) (lerp_2d(a, b, t00[3], t10[3], t01[3], t11[3]) + 0.5);
+#else
+   const GLint ia = IROUND_POS(a * ILERP_SCALE);
+   const GLint ib = IROUND_POS(b * ILERP_SCALE);
+   ASSERT(CHAN_TYPE == GL_UNSIGNED_BYTE);
+   result[0] = ilerp_2d(ia, ib, t00[0], t10[0], t01[0], t11[0]);
+   result[1] = ilerp_2d(ia, ib, t00[1], t10[1], t01[1], t11[1]);
+   result[2] = ilerp_2d(ia, ib, t00[2], t10[2], t01[2], t11[2]);
+   result[3] = ilerp_2d(ia, ib, t00[3], t10[3], t01[3], t11[3]);
+#endif
+}
+
+
+/**
+ * Do trilinear interpolation of colors.
+ */
+static INLINE void
+lerp_rgba_3d(GLchan result[4], GLfloat a, GLfloat b, GLfloat c,
+             const GLchan t000[4], const GLchan t100[4],
+             const GLchan t010[4], const GLchan t110[4],
+             const GLchan t001[4], const GLchan t101[4],
+             const GLchan t011[4], const GLchan t111[4])
+{
+   GLuint k;
+   /* compiler should unroll these short loops */
+#if CHAN_TYPE == GL_FLOAT
+   for (k = 0; k < 4; k++) {
+      result[k] = lerp_3d(a, b, c, t000[k], t100[k], t010[k], t110[k],
+                                   t001[k], t101[k], t011[k], t111[k]);
+   }
+#elif CHAN_TYPE == GL_UNSIGNED_SHORT
+   for (k = 0; k < 4; k++) {
+      result[k] = (GLchan)(lerp_3d(a, b, c,
+                                   t000[k], t100[k], t010[k], t110[k],
+                                   t001[k], t101[k], t011[k], t111[k]) + 0.5F);
+   }
+#else
+   GLint ia = IROUND_POS(a * ILERP_SCALE);
+   GLint ib = IROUND_POS(b * ILERP_SCALE);
+   GLint ic = IROUND_POS(c * ILERP_SCALE);
+   for (k = 0; k < 4; k++) {
+      result[k] = ilerp_3d(ia, ib, ic, t000[k], t100[k], t010[k], t110[k],
+                                       t001[k], t101[k], t011[k], t111[k]);
+   }
+#endif
+}
+
+
+/**
+ * Compute the remainder of a divided by b, but be careful with
+ * negative values so that GL_REPEAT mode works right.
+ */
+static INLINE GLint
+repeat_remainder(GLint a, GLint b)
+{
+   if (a >= 0)
+      return a % b;
+   else
+      return (a + 1) % b + b - 1;
+}
+
+
+/**
+ * Used to compute texel locations for linear sampling.
+ * Input:
+ *    wrapMode = GL_REPEAT, GL_CLAMP, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_BORDER
+ *    S = texcoord in [0,1]
+ *    SIZE = width (or height or depth) of texture
+ * Output:
+ *    U = texcoord in [0, width]
+ *    I0, I1 = two nearest texel indexes
+ */
+#define COMPUTE_LINEAR_TEXEL_LOCATIONS(wrapMode, S, U, SIZE, I0, I1)	\
+{									\
+   switch (wrapMode) {							\
+   case GL_REPEAT:							\
+      U = S * SIZE - 0.5F;						\
+      if (img->_IsPowerOfTwo) {						\
+         I0 = IFLOOR(U) & (SIZE - 1);					\
+         I1 = (I0 + 1) & (SIZE - 1);					\
+      }									\
+      else {								\
+         I0 = repeat_remainder(IFLOOR(U), SIZE);			\
+         I1 = repeat_remainder(I0 + 1, SIZE);				\
+      }									\
+      break;								\
+   case GL_CLAMP_TO_EDGE:						\
+      if (S <= 0.0F)							\
+         U = 0.0F;							\
+      else if (S >= 1.0F)						\
+         U = (GLfloat) SIZE;						\
+      else								\
+         U = S * SIZE;							\
+      U -= 0.5F;							\
+      I0 = IFLOOR(U);							\
+      I1 = I0 + 1;							\
+      if (I0 < 0)							\
+         I0 = 0;							\
+      if (I1 >= (GLint) SIZE)						\
+         I1 = SIZE - 1;							\
+      break;								\
+   case GL_CLAMP_TO_BORDER:						\
+      {									\
+         const GLfloat min = -1.0F / (2.0F * SIZE);			\
+         const GLfloat max = 1.0F - min;				\
+         if (S <= min)							\
+            U = min * SIZE;						\
+         else if (S >= max)						\
+            U = max * SIZE;						\
+         else								\
+            U = S * SIZE;						\
+         U -= 0.5F;							\
+         I0 = IFLOOR(U);						\
+         I1 = I0 + 1;							\
+      }									\
+      break;								\
+   case GL_MIRRORED_REPEAT:						\
+      {									\
+         const GLint flr = IFLOOR(S);					\
+         if (flr & 1)							\
+            U = 1.0F - (S - (GLfloat) flr);	/* flr is odd */	\
+         else								\
+            U = S - (GLfloat) flr;		/* flr is even */	\
+         U = (U * SIZE) - 0.5F;						\
+         I0 = IFLOOR(U);						\
+         I1 = I0 + 1;							\
+         if (I0 < 0)							\
+            I0 = 0;							\
+         if (I1 >= (GLint) SIZE)					\
+            I1 = SIZE - 1;						\
+      }									\
+      break;								\
+   case GL_MIRROR_CLAMP_EXT:						\
+      U = FABSF(S);							\
+      if (U >= 1.0F)							\
+         U = (GLfloat) SIZE;						\
+      else								\
+         U *= SIZE;							\
+      U -= 0.5F;							\
+      I0 = IFLOOR(U);							\
+      I1 = I0 + 1;							\
+      break;								\
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:					\
+      U = FABSF(S);							\
+      if (U >= 1.0F)							\
+         U = (GLfloat) SIZE;						\
+      else								\
+         U *= SIZE;							\
+      U -= 0.5F;							\
+      I0 = IFLOOR(U);							\
+      I1 = I0 + 1;							\
+      if (I0 < 0)							\
+         I0 = 0;							\
+      if (I1 >= (GLint) SIZE)						\
+         I1 = SIZE - 1;							\
+      break;								\
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:					\
+      {									\
+         const GLfloat min = -1.0F / (2.0F * SIZE);			\
+         const GLfloat max = 1.0F - min;				\
+         U = FABSF(S);							\
+         if (U <= min)							\
+            U = min * SIZE;						\
+         else if (U >= max)						\
+            U = max * SIZE;						\
+         else								\
+            U *= SIZE;							\
+         U -= 0.5F;							\
+         I0 = IFLOOR(U);						\
+         I1 = I0 + 1;							\
+      }									\
+      break;								\
+   case GL_CLAMP:							\
+      if (S <= 0.0F)							\
+         U = 0.0F;							\
+      else if (S >= 1.0F)						\
+         U = (GLfloat) SIZE;						\
+      else								\
+         U = S * SIZE;							\
+      U -= 0.5F;							\
+      I0 = IFLOOR(U);							\
+      I1 = I0 + 1;							\
+      break;								\
+   default:								\
+      _mesa_problem(ctx, "Bad wrap mode");				\
+   }									\
+}
+
+
+/**
+ * Used to compute texel location for nearest sampling.
+ */
+#define COMPUTE_NEAREST_TEXEL_LOCATION(wrapMode, S, SIZE, I)		\
+{									\
+   switch (wrapMode) {							\
+   case GL_REPEAT:							\
+      /* s limited to [0,1) */						\
+      /* i limited to [0,size-1] */					\
+      I = IFLOOR(S * SIZE);						\
+      if (img->_IsPowerOfTwo)						\
+         I &= (SIZE - 1);						\
+      else								\
+         I = repeat_remainder(I, SIZE);					\
+      break;								\
+   case GL_CLAMP_TO_EDGE:						\
+      {									\
+         /* s limited to [min,max] */					\
+         /* i limited to [0, size-1] */					\
+         const GLfloat min = 1.0F / (2.0F * SIZE);			\
+         const GLfloat max = 1.0F - min;				\
+         if (S < min)							\
+            I = 0;							\
+         else if (S > max)						\
+            I = SIZE - 1;						\
+         else								\
+            I = IFLOOR(S * SIZE);					\
+      }									\
+      break;								\
+   case GL_CLAMP_TO_BORDER:						\
+      {									\
+         /* s limited to [min,max] */					\
+         /* i limited to [-1, size] */					\
+         const GLfloat min = -1.0F / (2.0F * SIZE);			\
+         const GLfloat max = 1.0F - min;				\
+         if (S <= min)							\
+            I = -1;							\
+         else if (S >= max)						\
+            I = SIZE;							\
+         else								\
+            I = IFLOOR(S * SIZE);					\
+      }									\
+      break;								\
+   case GL_MIRRORED_REPEAT:						\
+      {									\
+         const GLfloat min = 1.0F / (2.0F * SIZE);			\
+         const GLfloat max = 1.0F - min;				\
+         const GLint flr = IFLOOR(S);					\
+         GLfloat u;							\
+         if (flr & 1)							\
+            u = 1.0F - (S - (GLfloat) flr);	/* flr is odd */	\
+         else								\
+            u = S - (GLfloat) flr;		/* flr is even */	\
+         if (u < min)							\
+            I = 0;							\
+         else if (u > max)						\
+            I = SIZE - 1;						\
+         else								\
+            I = IFLOOR(u * SIZE);					\
+      }									\
+      break;								\
+   case GL_MIRROR_CLAMP_EXT:						\
+      {									\
+         /* s limited to [0,1] */					\
+         /* i limited to [0,size-1] */					\
+         const GLfloat u = FABSF(S);					\
+         if (u <= 0.0F)							\
+            I = 0;							\
+         else if (u >= 1.0F)						\
+            I = SIZE - 1;						\
+         else								\
+            I = IFLOOR(u * SIZE);					\
+      }									\
+      break;								\
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:					\
+      {									\
+         /* s limited to [min,max] */					\
+         /* i limited to [0, size-1] */					\
+         const GLfloat min = 1.0F / (2.0F * SIZE);			\
+         const GLfloat max = 1.0F - min;				\
+         const GLfloat u = FABSF(S);					\
+         if (u < min)							\
+            I = 0;							\
+         else if (u > max)						\
+            I = SIZE - 1;						\
+         else								\
+            I = IFLOOR(u * SIZE);					\
+      }									\
+      break;								\
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:					\
+      {									\
+         /* s limited to [min,max] */					\
+         /* i limited to [0, size-1] */					\
+         const GLfloat min = -1.0F / (2.0F * SIZE);			\
+         const GLfloat max = 1.0F - min;				\
+         const GLfloat u = FABSF(S);					\
+         if (u < min)							\
+            I = -1;							\
+         else if (u > max)						\
+            I = SIZE;							\
+         else								\
+            I = IFLOOR(u * SIZE);					\
+      }									\
+      break;								\
+   case GL_CLAMP:							\
+      /* s limited to [0,1] */						\
+      /* i limited to [0,size-1] */					\
+      if (S <= 0.0F)							\
+         I = 0;								\
+      else if (S >= 1.0F)						\
+         I = SIZE - 1;							\
+      else								\
+         I = IFLOOR(S * SIZE);						\
+      break;								\
+   default:								\
+      _mesa_problem(ctx, "Bad wrap mode");				\
+   }									\
+}
+
+
+/* Power of two image sizes only */
+#define COMPUTE_LINEAR_REPEAT_TEXEL_LOCATION(S, U, SIZE, I0, I1)	\
+{									\
+   U = S * SIZE - 0.5F;							\
+   I0 = IFLOOR(U) & (SIZE - 1);						\
+   I1 = (I0 + 1) & (SIZE - 1);						\
+}
+
+
+/**
+ * For linear interpolation between mipmap levels N and N+1, this function
+ * computes N.
+ */
+static INLINE GLint
+linear_mipmap_level(const struct gl_texture_object *tObj, GLfloat lambda)
+{
+   if (lambda < 0.0F)
+      return tObj->BaseLevel;
+   else if (lambda > tObj->_MaxLambda)
+      return (GLint) (tObj->BaseLevel + tObj->_MaxLambda);
+   else
+      return (GLint) (tObj->BaseLevel + lambda);
+}
+
+
+/**
+ * Compute the nearest mipmap level to take texels from.
+ */
+static INLINE GLint
+nearest_mipmap_level(const struct gl_texture_object *tObj, GLfloat lambda)
+{
+   GLfloat l;
+   GLint level;
+   if (lambda <= 0.5F)
+      l = 0.0F;
+   else if (lambda > tObj->_MaxLambda + 0.4999F)
+      l = tObj->_MaxLambda + 0.4999F;
+   else
+      l = lambda;
+   level = (GLint) (tObj->BaseLevel + l + 0.5F);
+   if (level > tObj->_MaxLevel)
+      level = tObj->_MaxLevel;
+   return level;
+}
+
+
+
+/*
+ * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
+ * see 1-pixel bands of improperly weighted linear-filtered textures.
+ * The tests/texwrap.c demo is a good test.
+ * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
+ * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
+ */
+#define FRAC(f)  ((f) - IFLOOR(f))
+
+
+
+/*
+ * Bitflags for texture border color sampling.
+ */
+#define I0BIT   1
+#define I1BIT   2
+#define J0BIT   4
+#define J1BIT   8
+#define K0BIT  16
+#define K1BIT  32
+
+
+
+/*
+ * The lambda[] array values are always monotonic.  Either the whole span
+ * will be minified, magnified, or split between the two.  This function
+ * determines the subranges in [0, n-1] that are to be minified or magnified.
+ */
+static INLINE void
+compute_min_mag_ranges(const struct gl_texture_object *tObj,
+                       GLuint n, const GLfloat lambda[],
+                       GLuint *minStart, GLuint *minEnd,
+                       GLuint *magStart, GLuint *magEnd)
+{
+   GLfloat minMagThresh;
+
+   /* we shouldn't be here if minfilter == magfilter */
+   ASSERT(tObj->MinFilter != tObj->MagFilter);
+
+   /* This bit comes from the OpenGL spec: */
+   if (tObj->MagFilter == GL_LINEAR
+       && (tObj->MinFilter == GL_NEAREST_MIPMAP_NEAREST ||
+           tObj->MinFilter == GL_NEAREST_MIPMAP_LINEAR)) {
+      minMagThresh = 0.5F;
+   }
+   else {
+      minMagThresh = 0.0F;
+   }
+
+#if 0
+   /* DEBUG CODE: Verify that lambda[] is monotonic.
+    * We can't really use this because the inaccuracy in the LOG2 function
+    * causes this test to fail, yet the resulting texturing is correct.
+    */
+   if (n > 1) {
+      GLuint i;
+      printf("lambda delta = %g\n", lambda[0] - lambda[n-1]);
+      if (lambda[0] >= lambda[n-1]) { /* decreasing */
+         for (i = 0; i < n - 1; i++) {
+            ASSERT((GLint) (lambda[i] * 10) >= (GLint) (lambda[i+1] * 10));
+         }
+      }
+      else { /* increasing */
+         for (i = 0; i < n - 1; i++) {
+            ASSERT((GLint) (lambda[i] * 10) <= (GLint) (lambda[i+1] * 10));
+         }
+      }
+   }
+#endif /* DEBUG */
+
+   if (lambda[0] <= minMagThresh && lambda[n-1] <= minMagThresh) {
+      /* magnification for whole span */
+      *magStart = 0;
+      *magEnd = n;
+      *minStart = *minEnd = 0;
+   }
+   else if (lambda[0] > minMagThresh && lambda[n-1] > minMagThresh) {
+      /* minification for whole span */
+      *minStart = 0;
+      *minEnd = n;
+      *magStart = *magEnd = 0;
+   }
+   else {
+      /* a mix of minification and magnification */
+      GLuint i;
+      if (lambda[0] > minMagThresh) {
+         /* start with minification */
+         for (i = 1; i < n; i++) {
+            if (lambda[i] <= minMagThresh)
+               break;
+         }
+         *minStart = 0;
+         *minEnd = i;
+         *magStart = i;
+         *magEnd = n;
+      }
+      else {
+         /* start with magnification */
+         for (i = 1; i < n; i++) {
+            if (lambda[i] > minMagThresh)
+               break;
+         }
+         *magStart = 0;
+         *magEnd = i;
+         *minStart = i;
+         *minEnd = n;
+      }
+   }
+
+#if 0
+   /* Verify the min/mag Start/End values
+    * We don't use this either (see above)
+    */
+   {
+      GLint i;
+      for (i = 0; i < n; i++) {
+         if (lambda[i] > minMagThresh) {
+            /* minification */
+            ASSERT(i >= *minStart);
+            ASSERT(i < *minEnd);
+         }
+         else {
+            /* magnification */
+            ASSERT(i >= *magStart);
+            ASSERT(i < *magEnd);
+         }
+      }
+   }
+#endif
+}
+
+
+/**********************************************************************/
+/*                    1-D Texture Sampling Functions                  */
+/**********************************************************************/
+
+/*
+ * Return the texture sample for coordinate (s) using GL_NEAREST filter.
+ */
+static void
+sample_1d_nearest(GLcontext *ctx,
+                  const struct gl_texture_object *tObj,
+                  const struct gl_texture_image *img,
+                  const GLfloat texcoord[4], GLchan rgba[4])
+{
+   const GLint width = img->Width2;  /* without border, power of two */
+   GLint i;
+   COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapS, texcoord[0], width, i);
+   /* skip over the border, if any */
+   i += img->Border;
+   if (i < 0 || i >= (GLint) img->Width) {
+      /* Need this test for GL_CLAMP_TO_BORDER mode */
+      COPY_CHAN4(rgba, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i, 0, 0, rgba);
+   }
+}
+
+
+/*
+ * Return the texture sample for coordinate (s) using GL_LINEAR filter.
+ */
+static void
+sample_1d_linear(GLcontext *ctx,
+                 const struct gl_texture_object *tObj,
+                 const struct gl_texture_image *img,
+                 const GLfloat texcoord[4], GLchan rgba[4])
+{
+   const GLint width = img->Width2;
+   GLint i0, i1;
+   GLfloat u;
+   GLbitfield useBorderColor = 0x0;
+   GLfloat a;
+   GLchan t0[4], t1[4];  /* texels */
+
+   COMPUTE_LINEAR_TEXEL_LOCATIONS(tObj->WrapS, texcoord[0], u, width, i0, i1);
+
+   if (img->Border) {
+      i0 += img->Border;
+      i1 += img->Border;
+   }
+   else {
+      if (i0 < 0 || i0 >= width)   useBorderColor |= I0BIT;
+      if (i1 < 0 || i1 >= width)   useBorderColor |= I1BIT;
+   }
+
+   /* fetch texel colors */
+   if (useBorderColor & I0BIT) {
+      COPY_CHAN4(t0, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i0, 0, 0, t0);
+   }
+   if (useBorderColor & I1BIT) {
+      COPY_CHAN4(t1, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i1, 0, 0, t1);
+   }
+
+   a = FRAC(u);
+   lerp_rgba(rgba, a, t0, t1);
+}
+
+
+static void
+sample_1d_nearest_mipmap_nearest(GLcontext *ctx,
+                                 const struct gl_texture_object *tObj,
+                                 GLuint n, const GLfloat texcoord[][4],
+                                 const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = nearest_mipmap_level(tObj, lambda[i]);
+      sample_1d_nearest(ctx, tObj, tObj->Image[0][level], texcoord[i], rgba[i]);
+   }
+}
+
+
+static void
+sample_1d_linear_mipmap_nearest(GLcontext *ctx,
+                                const struct gl_texture_object *tObj,
+                                GLuint n, const GLfloat texcoord[][4],
+                                const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = nearest_mipmap_level(tObj, lambda[i]);
+      sample_1d_linear(ctx, tObj, tObj->Image[0][level], texcoord[i], rgba[i]);
+   }
+}
+
+
+static void
+sample_1d_nearest_mipmap_linear(GLcontext *ctx,
+                                const struct gl_texture_object *tObj,
+                                GLuint n, const GLfloat texcoord[][4],
+                                const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      if (level >= tObj->_MaxLevel) {
+         sample_1d_nearest(ctx, tObj, tObj->Image[0][tObj->_MaxLevel],
+                           texcoord[i], rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];
+         const GLfloat f = FRAC(lambda[i]);
+         sample_1d_nearest(ctx, tObj, tObj->Image[0][level  ], texcoord[i], t0);
+         sample_1d_nearest(ctx, tObj, tObj->Image[0][level+1], texcoord[i], t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+
+static void
+sample_1d_linear_mipmap_linear(GLcontext *ctx,
+                               const struct gl_texture_object *tObj,
+                               GLuint n, const GLfloat texcoord[][4],
+                               const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      if (level >= tObj->_MaxLevel) {
+         sample_1d_linear(ctx, tObj, tObj->Image[0][tObj->_MaxLevel],
+                          texcoord[i], rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];
+         const GLfloat f = FRAC(lambda[i]);
+         sample_1d_linear(ctx, tObj, tObj->Image[0][level  ], texcoord[i], t0);
+         sample_1d_linear(ctx, tObj, tObj->Image[0][level+1], texcoord[i], t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+
+static void
+sample_nearest_1d( GLcontext *ctx,
+                   const struct gl_texture_object *tObj, GLuint n,
+                   const GLfloat texcoords[][4], const GLfloat lambda[],
+                   GLchan rgba[][4] )
+{
+   GLuint i;
+   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   (void) lambda;
+   for (i=0;i<n;i++) {
+      sample_1d_nearest(ctx, tObj, image, texcoords[i], rgba[i]);
+   }
+}
+
+
+
+static void
+sample_linear_1d( GLcontext *ctx,
+                  const struct gl_texture_object *tObj, GLuint n,
+                  const GLfloat texcoords[][4], const GLfloat lambda[],
+                  GLchan rgba[][4] )
+{
+   GLuint i;
+   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   (void) lambda;
+   for (i=0;i<n;i++) {
+      sample_1d_linear(ctx, tObj, image, texcoords[i], rgba[i]);
+   }
+}
+
+
+/*
+ * Given an (s) texture coordinate and lambda (level of detail) value,
+ * return a texture sample.
+ *
+ */
+static void
+sample_lambda_1d( GLcontext *ctx,
+                  const struct gl_texture_object *tObj, GLuint n,
+                  const GLfloat texcoords[][4],
+                  const GLfloat lambda[], GLchan rgba[][4] )
+{
+   GLuint minStart, minEnd;  /* texels with minification */
+   GLuint magStart, magEnd;  /* texels with magnification */
+   GLuint i;
+
+   ASSERT(lambda != NULL);
+   compute_min_mag_ranges(tObj, n, lambda,
+                          &minStart, &minEnd, &magStart, &magEnd);
+
+   if (minStart < minEnd) {
+      /* do the minified texels */
+      const GLuint m = minEnd - minStart;
+      switch (tObj->MinFilter) {
+      case GL_NEAREST:
+         for (i = minStart; i < minEnd; i++)
+            sample_1d_nearest(ctx, tObj, tObj->Image[0][tObj->BaseLevel],
+                              texcoords[i], rgba[i]);
+         break;
+      case GL_LINEAR:
+         for (i = minStart; i < minEnd; i++)
+            sample_1d_linear(ctx, tObj, tObj->Image[0][tObj->BaseLevel],
+                             texcoords[i], rgba[i]);
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         sample_1d_nearest_mipmap_nearest(ctx, tObj, m, texcoords + minStart,
+                                          lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         sample_1d_linear_mipmap_nearest(ctx, tObj, m, texcoords + minStart,
+                                         lambda + minStart, rgba + minStart);
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         sample_1d_nearest_mipmap_linear(ctx, tObj, m, texcoords + minStart,
+                                         lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         sample_1d_linear_mipmap_linear(ctx, tObj, m, texcoords + minStart,
+                                        lambda + minStart, rgba + minStart);
+         break;
+      default:
+         _mesa_problem(ctx, "Bad min filter in sample_1d_texture");
+         return;
+      }
+   }
+
+   if (magStart < magEnd) {
+      /* do the magnified texels */
+      switch (tObj->MagFilter) {
+      case GL_NEAREST:
+         for (i = magStart; i < magEnd; i++)
+            sample_1d_nearest(ctx, tObj, tObj->Image[0][tObj->BaseLevel],
+                              texcoords[i], rgba[i]);
+         break;
+      case GL_LINEAR:
+         for (i = magStart; i < magEnd; i++)
+            sample_1d_linear(ctx, tObj, tObj->Image[0][tObj->BaseLevel],
+                             texcoords[i], rgba[i]);
+         break;
+      default:
+         _mesa_problem(ctx, "Bad mag filter in sample_1d_texture");
+         return;
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*                    2-D Texture Sampling Functions                  */
+/**********************************************************************/
+
+
+/*
+ * Return the texture sample for coordinate (s,t) using GL_NEAREST filter.
+ */
+static INLINE void
+sample_2d_nearest(GLcontext *ctx,
+                  const struct gl_texture_object *tObj,
+                  const struct gl_texture_image *img,
+                  const GLfloat texcoord[4],
+                  GLchan rgba[])
+{
+   const GLint width = img->Width2;    /* without border, power of two */
+   const GLint height = img->Height2;  /* without border, power of two */
+   GLint i, j;
+   (void) ctx;
+
+   COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapS, texcoord[0], width,  i);
+   COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapT, texcoord[1], height, j);
+
+   /* skip over the border, if any */
+   i += img->Border;
+   j += img->Border;
+
+   if (i < 0 || i >= (GLint) img->Width || j < 0 || j >= (GLint) img->Height) {
+      /* Need this test for GL_CLAMP_TO_BORDER mode */
+      COPY_CHAN4(rgba, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i, j, 0, rgba);
+   }
+}
+
+
+
+/**
+ * Return the texture sample for coordinate (s,t) using GL_LINEAR filter.
+ * New sampling code contributed by Lynn Quam <quam@ai.sri.com>.
+ */
+static INLINE void
+sample_2d_linear(GLcontext *ctx,
+                 const struct gl_texture_object *tObj,
+                 const struct gl_texture_image *img,
+                 const GLfloat texcoord[4],
+                 GLchan rgba[])
+{
+   const GLint width = img->Width2;
+   const GLint height = img->Height2;
+   GLint i0, j0, i1, j1;
+   GLbitfield useBorderColor = 0x0;
+   GLfloat u, v;
+   GLfloat a, b;
+   GLchan t00[4], t10[4], t01[4], t11[4]; /* sampled texel colors */
+
+   COMPUTE_LINEAR_TEXEL_LOCATIONS(tObj->WrapS, texcoord[0], u, width,  i0, i1);
+   COMPUTE_LINEAR_TEXEL_LOCATIONS(tObj->WrapT, texcoord[1], v, height, j0, j1);
+
+   if (img->Border) {
+      i0 += img->Border;
+      i1 += img->Border;
+      j0 += img->Border;
+      j1 += img->Border;
+   }
+   else {
+      if (i0 < 0 || i0 >= width)   useBorderColor |= I0BIT;
+      if (i1 < 0 || i1 >= width)   useBorderColor |= I1BIT;
+      if (j0 < 0 || j0 >= height)  useBorderColor |= J0BIT;
+      if (j1 < 0 || j1 >= height)  useBorderColor |= J1BIT;
+   }
+
+   /* fetch four texel colors */
+   if (useBorderColor & (I0BIT | J0BIT)) {
+      COPY_CHAN4(t00, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i0, j0, 0, t00);
+   }
+   if (useBorderColor & (I1BIT | J0BIT)) {
+      COPY_CHAN4(t10, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i1, j0, 0, t10);
+   }
+   if (useBorderColor & (I0BIT | J1BIT)) {
+      COPY_CHAN4(t01, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i0, j1, 0, t01);
+   }
+   if (useBorderColor & (I1BIT | J1BIT)) {
+      COPY_CHAN4(t11, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i1, j1, 0, t11);
+   }
+
+   a = FRAC(u);
+   b = FRAC(v);
+   lerp_rgba_2d(rgba, a, b, t00, t10, t01, t11);
+}
+
+
+/*
+ * As above, but we know WRAP_S == REPEAT and WRAP_T == REPEAT.
+ * We don't have to worry about the texture border.
+ */
+static INLINE void
+sample_2d_linear_repeat(GLcontext *ctx,
+                        const struct gl_texture_object *tObj,
+                        const struct gl_texture_image *img,
+                        const GLfloat texcoord[4],
+                        GLchan rgba[])
+{
+   const GLint width = img->Width2;
+   const GLint height = img->Height2;
+   GLint i0, j0, i1, j1;
+   GLfloat u, v;
+   GLfloat a, b;
+   GLchan t00[4], t10[4], t01[4], t11[4]; /* sampled texel colors */
+
+   (void) ctx;
+
+   ASSERT(tObj->WrapS == GL_REPEAT);
+   ASSERT(tObj->WrapT == GL_REPEAT);
+   ASSERT(img->Border == 0);
+   ASSERT(img->_BaseFormat != GL_COLOR_INDEX);
+   ASSERT(img->_IsPowerOfTwo);
+
+   COMPUTE_LINEAR_REPEAT_TEXEL_LOCATION(texcoord[0], u, width,  i0, i1);
+   COMPUTE_LINEAR_REPEAT_TEXEL_LOCATION(texcoord[1], v, height, j0, j1);
+
+   img->FetchTexelc(img, i0, j0, 0, t00);
+   img->FetchTexelc(img, i1, j0, 0, t10);
+   img->FetchTexelc(img, i0, j1, 0, t01);
+   img->FetchTexelc(img, i1, j1, 0, t11);
+
+   a = FRAC(u);
+   b = FRAC(v);
+   lerp_rgba_2d(rgba, a, b, t00, t10, t01, t11);
+}
+
+
+
+static void
+sample_2d_nearest_mipmap_nearest(GLcontext *ctx,
+                                 const struct gl_texture_object *tObj,
+                                 GLuint n, const GLfloat texcoord[][4],
+                                 const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      GLint level = nearest_mipmap_level(tObj, lambda[i]);
+      sample_2d_nearest(ctx, tObj, tObj->Image[0][level], texcoord[i], rgba[i]);
+   }
+}
+
+
+
+static void
+sample_2d_linear_mipmap_nearest(GLcontext *ctx,
+                                const struct gl_texture_object *tObj,
+                                GLuint n, const GLfloat texcoord[][4],
+                                const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = nearest_mipmap_level(tObj, lambda[i]);
+      sample_2d_linear(ctx, tObj, tObj->Image[0][level], texcoord[i], rgba[i]);
+   }
+}
+
+
+
+static void
+sample_2d_nearest_mipmap_linear(GLcontext *ctx,
+                                const struct gl_texture_object *tObj,
+                                GLuint n, const GLfloat texcoord[][4],
+                                const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      if (level >= tObj->_MaxLevel) {
+         sample_2d_nearest(ctx, tObj, tObj->Image[0][tObj->_MaxLevel],
+                           texcoord[i], rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];  /* texels */
+         const GLfloat f = FRAC(lambda[i]);
+         sample_2d_nearest(ctx, tObj, tObj->Image[0][level  ], texcoord[i], t0);
+         sample_2d_nearest(ctx, tObj, tObj->Image[0][level+1], texcoord[i], t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+
+/* Trilinear filtering */
+static void
+sample_2d_linear_mipmap_linear( GLcontext *ctx,
+                                const struct gl_texture_object *tObj,
+                                GLuint n, const GLfloat texcoord[][4],
+                                const GLfloat lambda[], GLchan rgba[][4] )
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      if (level >= tObj->_MaxLevel) {
+         sample_2d_linear(ctx, tObj, tObj->Image[0][tObj->_MaxLevel],
+                          texcoord[i], rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];  /* texels */
+         const GLfloat f = FRAC(lambda[i]);
+         sample_2d_linear(ctx, tObj, tObj->Image[0][level  ], texcoord[i], t0);
+         sample_2d_linear(ctx, tObj, tObj->Image[0][level+1], texcoord[i], t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+static void
+sample_2d_linear_mipmap_linear_repeat( GLcontext *ctx,
+                                       const struct gl_texture_object *tObj,
+                                       GLuint n, const GLfloat texcoord[][4],
+                                       const GLfloat lambda[], GLchan rgba[][4] )
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   ASSERT(tObj->WrapS == GL_REPEAT);
+   ASSERT(tObj->WrapT == GL_REPEAT);
+   for (i = 0; i < n; i++) {
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      if (level >= tObj->_MaxLevel) {
+         sample_2d_linear_repeat(ctx, tObj, tObj->Image[0][tObj->_MaxLevel],
+                                 texcoord[i], rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];  /* texels */
+         const GLfloat f = FRAC(lambda[i]);
+         sample_2d_linear_repeat(ctx, tObj, tObj->Image[0][level  ], texcoord[i], t0);
+         sample_2d_linear_repeat(ctx, tObj, tObj->Image[0][level+1], texcoord[i], t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+static void
+sample_nearest_2d( GLcontext *ctx,
+                   const struct gl_texture_object *tObj, GLuint n,
+                   const GLfloat texcoords[][4],
+                   const GLfloat lambda[], GLchan rgba[][4] )
+{
+   GLuint i;
+   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   (void) lambda;
+   for (i=0;i<n;i++) {
+      sample_2d_nearest(ctx, tObj, image, texcoords[i], rgba[i]);
+   }
+}
+
+
+
+static void
+sample_linear_2d( GLcontext *ctx,
+                  const struct gl_texture_object *tObj, GLuint n,
+                  const GLfloat texcoords[][4],
+                  const GLfloat lambda[], GLchan rgba[][4] )
+{
+   GLuint i;
+   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   (void) lambda;
+   if (tObj->WrapS == GL_REPEAT && tObj->WrapT == GL_REPEAT) {
+      for (i=0;i<n;i++) {
+         sample_2d_linear_repeat(ctx, tObj, image, texcoords[i], rgba[i]);
+      }
+   }
+   else {
+      for (i=0;i<n;i++) {
+         sample_2d_linear(ctx, tObj, image, texcoords[i], rgba[i]);
+      }
+   }
+}
+
+
+/*
+ * Optimized 2-D texture sampling:
+ *    S and T wrap mode == GL_REPEAT
+ *    GL_NEAREST min/mag filter
+ *    No border, 
+ *    RowStride == Width,
+ *    Format = GL_RGB
+ */
+static void
+opt_sample_rgb_2d( GLcontext *ctx,
+                   const struct gl_texture_object *tObj,
+                   GLuint n, const GLfloat texcoords[][4],
+                   const GLfloat lambda[], GLchan rgba[][4] )
+{
+   const struct gl_texture_image *img = tObj->Image[0][tObj->BaseLevel];
+   const GLfloat width = (GLfloat) img->Width;
+   const GLfloat height = (GLfloat) img->Height;
+   const GLint colMask = img->Width - 1;
+   const GLint rowMask = img->Height - 1;
+   const GLint shift = img->WidthLog2;
+   GLuint k;
+   (void) ctx;
+   (void) lambda;
+   ASSERT(tObj->WrapS==GL_REPEAT);
+   ASSERT(tObj->WrapT==GL_REPEAT);
+   ASSERT(img->Border==0);
+   ASSERT(img->_BaseFormat==GL_RGB);
+   ASSERT(img->_IsPowerOfTwo);
+
+   for (k=0; k<n; k++) {
+      GLint i = IFLOOR(texcoords[k][0] * width) & colMask;
+      GLint j = IFLOOR(texcoords[k][1] * height) & rowMask;
+      GLint pos = (j << shift) | i;
+      GLchan *texel = ((GLchan *) img->Data) + 3*pos;
+      rgba[k][RCOMP] = texel[0];
+      rgba[k][GCOMP] = texel[1];
+      rgba[k][BCOMP] = texel[2];
+   }
+}
+
+
+/*
+ * Optimized 2-D texture sampling:
+ *    S and T wrap mode == GL_REPEAT
+ *    GL_NEAREST min/mag filter
+ *    No border
+ *    RowStride == Width,
+ *    Format = GL_RGBA
+ */
+static void
+opt_sample_rgba_2d( GLcontext *ctx,
+                    const struct gl_texture_object *tObj,
+                    GLuint n, const GLfloat texcoords[][4],
+                    const GLfloat lambda[], GLchan rgba[][4] )
+{
+   const struct gl_texture_image *img = tObj->Image[0][tObj->BaseLevel];
+   const GLfloat width = (GLfloat) img->Width;
+   const GLfloat height = (GLfloat) img->Height;
+   const GLint colMask = img->Width - 1;
+   const GLint rowMask = img->Height - 1;
+   const GLint shift = img->WidthLog2;
+   GLuint i;
+   (void) ctx;
+   (void) lambda;
+   ASSERT(tObj->WrapS==GL_REPEAT);
+   ASSERT(tObj->WrapT==GL_REPEAT);
+   ASSERT(img->Border==0);
+   ASSERT(img->_BaseFormat==GL_RGBA);
+   ASSERT(img->_IsPowerOfTwo);
+
+   for (i = 0; i < n; i++) {
+      const GLint col = IFLOOR(texcoords[i][0] * width) & colMask;
+      const GLint row = IFLOOR(texcoords[i][1] * height) & rowMask;
+      const GLint pos = (row << shift) | col;
+      const GLchan *texel = ((GLchan *) img->Data) + (pos << 2);    /* pos*4 */
+      COPY_CHAN4(rgba[i], texel);
+   }
+}
+
+
+/*
+ * Given an array of texture coordinate and lambda (level of detail)
+ * values, return an array of texture sample.
+ */
+static void
+sample_lambda_2d( GLcontext *ctx,
+                  const struct gl_texture_object *tObj,
+                  GLuint n, const GLfloat texcoords[][4],
+                  const GLfloat lambda[], GLchan rgba[][4] )
+{
+   const struct gl_texture_image *tImg = tObj->Image[0][tObj->BaseLevel];
+   GLuint minStart, minEnd;  /* texels with minification */
+   GLuint magStart, magEnd;  /* texels with magnification */
+
+   const GLboolean repeatNoBorderPOT = (tObj->WrapS == GL_REPEAT)
+      && (tObj->WrapT == GL_REPEAT)
+      && (tImg->Border == 0 && (tImg->Width == tImg->RowStride))
+      && (tImg->_BaseFormat != GL_COLOR_INDEX)
+      && tImg->_IsPowerOfTwo;
+
+   ASSERT(lambda != NULL);
+   compute_min_mag_ranges(tObj, n, lambda,
+                          &minStart, &minEnd, &magStart, &magEnd);
+
+   if (minStart < minEnd) {
+      /* do the minified texels */
+      const GLuint m = minEnd - minStart;
+      switch (tObj->MinFilter) {
+      case GL_NEAREST:
+         if (repeatNoBorderPOT) {
+            switch (tImg->TexFormat->MesaFormat) {
+            case MESA_FORMAT_RGB:
+            case MESA_FORMAT_RGB888:
+            /*case MESA_FORMAT_BGR888:*/
+               opt_sample_rgb_2d(ctx, tObj, m, texcoords + minStart,
+                                 NULL, rgba + minStart);
+               break;
+            case MESA_FORMAT_RGBA:
+            case MESA_FORMAT_RGBA8888:
+            case MESA_FORMAT_ARGB8888:
+            /*case MESA_FORMAT_ABGR8888:*/
+            /*case MESA_FORMAT_BGRA8888:*/
+	       opt_sample_rgba_2d(ctx, tObj, m, texcoords + minStart,
+                                  NULL, rgba + minStart);
+               break;
+            default:
+               sample_nearest_2d(ctx, tObj, m, texcoords + minStart,
+                                 NULL, rgba + minStart );
+            }
+         }
+         else {
+            sample_nearest_2d(ctx, tObj, m, texcoords + minStart,
+                              NULL, rgba + minStart);
+         }
+         break;
+      case GL_LINEAR:
+	 sample_linear_2d(ctx, tObj, m, texcoords + minStart,
+			  NULL, rgba + minStart);
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         sample_2d_nearest_mipmap_nearest(ctx, tObj, m,
+                                          texcoords + minStart,
+                                          lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         sample_2d_linear_mipmap_nearest(ctx, tObj, m, texcoords + minStart,
+                                         lambda + minStart, rgba + minStart);
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         sample_2d_nearest_mipmap_linear(ctx, tObj, m, texcoords + minStart,
+                                         lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         if (repeatNoBorderPOT)
+            sample_2d_linear_mipmap_linear_repeat(ctx, tObj, m,
+                  texcoords + minStart, lambda + minStart, rgba + minStart);
+         else
+            sample_2d_linear_mipmap_linear(ctx, tObj, m, texcoords + minStart,
+                                        lambda + minStart, rgba + minStart);
+         break;
+      default:
+         _mesa_problem(ctx, "Bad min filter in sample_2d_texture");
+         return;
+      }
+   }
+
+   if (magStart < magEnd) {
+      /* do the magnified texels */
+      const GLuint m = magEnd - magStart;
+
+      switch (tObj->MagFilter) {
+      case GL_NEAREST:
+         if (repeatNoBorderPOT) {
+            switch (tImg->TexFormat->MesaFormat) {
+            case MESA_FORMAT_RGB:
+            case MESA_FORMAT_RGB888:
+            /*case MESA_FORMAT_BGR888:*/
+               opt_sample_rgb_2d(ctx, tObj, m, texcoords + magStart,
+                                 NULL, rgba + magStart);
+               break;
+            case MESA_FORMAT_RGBA:
+            case MESA_FORMAT_RGBA8888:
+            case MESA_FORMAT_ARGB8888:
+            /*case MESA_FORMAT_ABGR8888:*/
+            /*case MESA_FORMAT_BGRA8888:*/
+	       opt_sample_rgba_2d(ctx, tObj, m, texcoords + magStart,
+                                  NULL, rgba + magStart);
+               break;
+            default:
+               sample_nearest_2d(ctx, tObj, m, texcoords + magStart,
+                                 NULL, rgba + magStart );
+            }
+         }
+         else {
+            sample_nearest_2d(ctx, tObj, m, texcoords + magStart,
+                              NULL, rgba + magStart);
+         }
+         break;
+      case GL_LINEAR:
+	 sample_linear_2d(ctx, tObj, m, texcoords + magStart,
+			  NULL, rgba + magStart);
+         break;
+      default:
+         _mesa_problem(ctx, "Bad mag filter in sample_lambda_2d");
+      }
+   }
+}
+
+
+
+/**********************************************************************/
+/*                    3-D Texture Sampling Functions                  */
+/**********************************************************************/
+
+/*
+ * Return the texture sample for coordinate (s,t,r) using GL_NEAREST filter.
+ */
+static void
+sample_3d_nearest(GLcontext *ctx,
+                  const struct gl_texture_object *tObj,
+                  const struct gl_texture_image *img,
+                  const GLfloat texcoord[4],
+                  GLchan rgba[4])
+{
+   const GLint width = img->Width2;     /* without border, power of two */
+   const GLint height = img->Height2;   /* without border, power of two */
+   const GLint depth = img->Depth2;     /* without border, power of two */
+   GLint i, j, k;
+   (void) ctx;
+
+   COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapS, texcoord[0], width,  i);
+   COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapT, texcoord[1], height, j);
+   COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapR, texcoord[2], depth,  k);
+
+   if (i < 0 || i >= (GLint) img->Width ||
+       j < 0 || j >= (GLint) img->Height ||
+       k < 0 || k >= (GLint) img->Depth) {
+      /* Need this test for GL_CLAMP_TO_BORDER mode */
+      COPY_CHAN4(rgba, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i, j, k, rgba);
+   }
+}
+
+
+
+/*
+ * Return the texture sample for coordinate (s,t,r) using GL_LINEAR filter.
+ */
+static void
+sample_3d_linear(GLcontext *ctx,
+                 const struct gl_texture_object *tObj,
+                 const struct gl_texture_image *img,
+                 const GLfloat texcoord[4],
+                 GLchan rgba[4])
+{
+   const GLint width = img->Width2;
+   const GLint height = img->Height2;
+   const GLint depth = img->Depth2;
+   GLint i0, j0, k0, i1, j1, k1;
+   GLbitfield useBorderColor = 0x0;
+   GLfloat u, v, w;
+   GLfloat a, b, c;
+   GLchan t000[4], t010[4], t001[4], t011[4];
+   GLchan t100[4], t110[4], t101[4], t111[4];
+
+   COMPUTE_LINEAR_TEXEL_LOCATIONS(tObj->WrapS, texcoord[0], u, width,  i0, i1);
+   COMPUTE_LINEAR_TEXEL_LOCATIONS(tObj->WrapT, texcoord[1], v, height, j0, j1);
+   COMPUTE_LINEAR_TEXEL_LOCATIONS(tObj->WrapR, texcoord[2], w, depth,  k0, k1);
+
+   if (img->Border) {
+      i0 += img->Border;
+      i1 += img->Border;
+      j0 += img->Border;
+      j1 += img->Border;
+      k0 += img->Border;
+      k1 += img->Border;
+   }
+   else {
+      /* check if sampling texture border color */
+      if (i0 < 0 || i0 >= width)   useBorderColor |= I0BIT;
+      if (i1 < 0 || i1 >= width)   useBorderColor |= I1BIT;
+      if (j0 < 0 || j0 >= height)  useBorderColor |= J0BIT;
+      if (j1 < 0 || j1 >= height)  useBorderColor |= J1BIT;
+      if (k0 < 0 || k0 >= depth)   useBorderColor |= K0BIT;
+      if (k1 < 0 || k1 >= depth)   useBorderColor |= K1BIT;
+   }
+
+   /* Fetch texels */
+   if (useBorderColor & (I0BIT | J0BIT | K0BIT)) {
+      COPY_CHAN4(t000, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i0, j0, k0, t000);
+   }
+   if (useBorderColor & (I1BIT | J0BIT | K0BIT)) {
+      COPY_CHAN4(t100, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i1, j0, k0, t100);
+   }
+   if (useBorderColor & (I0BIT | J1BIT | K0BIT)) {
+      COPY_CHAN4(t010, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i0, j1, k0, t010);
+   }
+   if (useBorderColor & (I1BIT | J1BIT | K0BIT)) {
+      COPY_CHAN4(t110, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i1, j1, k0, t110);
+   }
+
+   if (useBorderColor & (I0BIT | J0BIT | K1BIT)) {
+      COPY_CHAN4(t001, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i0, j0, k1, t001);
+   }
+   if (useBorderColor & (I1BIT | J0BIT | K1BIT)) {
+      COPY_CHAN4(t101, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i1, j0, k1, t101);
+   }
+   if (useBorderColor & (I0BIT | J1BIT | K1BIT)) {
+      COPY_CHAN4(t011, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i0, j1, k1, t011);
+   }
+   if (useBorderColor & (I1BIT | J1BIT | K1BIT)) {
+      COPY_CHAN4(t111, tObj->_BorderChan);
+   }
+   else {
+      img->FetchTexelc(img, i1, j1, k1, t111);
+   }
+
+   /* trilinear interpolation of samples */
+   a = FRAC(u);
+   b = FRAC(v);
+   c = FRAC(w);
+   lerp_rgba_3d(rgba, a, b, c, t000, t100, t010, t110, t001, t101, t011, t111);
+}
+
+
+
+static void
+sample_3d_nearest_mipmap_nearest(GLcontext *ctx,
+                                 const struct gl_texture_object *tObj,
+                                 GLuint n, const GLfloat texcoord[][4],
+                                 const GLfloat lambda[], GLchan rgba[][4] )
+{
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      GLint level = nearest_mipmap_level(tObj, lambda[i]);
+      sample_3d_nearest(ctx, tObj, tObj->Image[0][level], texcoord[i], rgba[i]);
+   }
+}
+
+
+static void
+sample_3d_linear_mipmap_nearest(GLcontext *ctx,
+                                const struct gl_texture_object *tObj,
+                                GLuint n, const GLfloat texcoord[][4],
+                                const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = nearest_mipmap_level(tObj, lambda[i]);
+      sample_3d_linear(ctx, tObj, tObj->Image[0][level], texcoord[i], rgba[i]);
+   }
+}
+
+
+static void
+sample_3d_nearest_mipmap_linear(GLcontext *ctx,
+                                const struct gl_texture_object *tObj,
+                                GLuint n, const GLfloat texcoord[][4],
+                                const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      if (level >= tObj->_MaxLevel) {
+         sample_3d_nearest(ctx, tObj, tObj->Image[0][tObj->_MaxLevel],
+                           texcoord[i], rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];  /* texels */
+         const GLfloat f = FRAC(lambda[i]);
+         sample_3d_nearest(ctx, tObj, tObj->Image[0][level  ], texcoord[i], t0);
+         sample_3d_nearest(ctx, tObj, tObj->Image[0][level+1], texcoord[i], t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+static void
+sample_3d_linear_mipmap_linear(GLcontext *ctx,
+                               const struct gl_texture_object *tObj,
+                               GLuint n, const GLfloat texcoord[][4],
+                               const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      if (level >= tObj->_MaxLevel) {
+         sample_3d_linear(ctx, tObj, tObj->Image[0][tObj->_MaxLevel],
+                          texcoord[i], rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];  /* texels */
+         const GLfloat f = FRAC(lambda[i]);
+         sample_3d_linear(ctx, tObj, tObj->Image[0][level  ], texcoord[i], t0);
+         sample_3d_linear(ctx, tObj, tObj->Image[0][level+1], texcoord[i], t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+static void
+sample_nearest_3d(GLcontext *ctx,
+                  const struct gl_texture_object *tObj, GLuint n,
+                  const GLfloat texcoords[][4], const GLfloat lambda[],
+                  GLchan rgba[][4])
+{
+   GLuint i;
+   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   (void) lambda;
+   for (i=0;i<n;i++) {
+      sample_3d_nearest(ctx, tObj, image, texcoords[i], rgba[i]);
+   }
+}
+
+
+
+static void
+sample_linear_3d( GLcontext *ctx,
+                  const struct gl_texture_object *tObj, GLuint n,
+                  const GLfloat texcoords[][4],
+		  const GLfloat lambda[], GLchan rgba[][4] )
+{
+   GLuint i;
+   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   (void) lambda;
+   for (i=0;i<n;i++) {
+      sample_3d_linear(ctx, tObj, image, texcoords[i], rgba[i]);
+   }
+}
+
+
+/*
+ * Given an (s,t,r) texture coordinate and lambda (level of detail) value,
+ * return a texture sample.
+ */
+static void
+sample_lambda_3d( GLcontext *ctx,
+                  const struct gl_texture_object *tObj, GLuint n,
+                  const GLfloat texcoords[][4], const GLfloat lambda[],
+                  GLchan rgba[][4] )
+{
+   GLuint minStart, minEnd;  /* texels with minification */
+   GLuint magStart, magEnd;  /* texels with magnification */
+   GLuint i;
+
+   ASSERT(lambda != NULL);
+   compute_min_mag_ranges(tObj, n, lambda,
+                          &minStart, &minEnd, &magStart, &magEnd);
+
+   if (minStart < minEnd) {
+      /* do the minified texels */
+      GLuint m = minEnd - minStart;
+      switch (tObj->MinFilter) {
+      case GL_NEAREST:
+         for (i = minStart; i < minEnd; i++)
+            sample_3d_nearest(ctx, tObj, tObj->Image[0][tObj->BaseLevel],
+                              texcoords[i], rgba[i]);
+         break;
+      case GL_LINEAR:
+         for (i = minStart; i < minEnd; i++)
+            sample_3d_linear(ctx, tObj, tObj->Image[0][tObj->BaseLevel],
+                             texcoords[i], rgba[i]);
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         sample_3d_nearest_mipmap_nearest(ctx, tObj, m, texcoords + minStart,
+                                          lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         sample_3d_linear_mipmap_nearest(ctx, tObj, m, texcoords + minStart,
+                                         lambda + minStart, rgba + minStart);
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         sample_3d_nearest_mipmap_linear(ctx, tObj, m, texcoords + minStart,
+                                         lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         sample_3d_linear_mipmap_linear(ctx, tObj, m, texcoords + minStart,
+                                        lambda + minStart, rgba + minStart);
+         break;
+      default:
+         _mesa_problem(ctx, "Bad min filter in sample_3d_texture");
+         return;
+      }
+   }
+
+   if (magStart < magEnd) {
+      /* do the magnified texels */
+      switch (tObj->MagFilter) {
+      case GL_NEAREST:
+         for (i = magStart; i < magEnd; i++)
+            sample_3d_nearest(ctx, tObj, tObj->Image[0][tObj->BaseLevel],
+                              texcoords[i], rgba[i]);
+         break;
+      case GL_LINEAR:
+         for (i = magStart; i < magEnd; i++)
+            sample_3d_linear(ctx, tObj, tObj->Image[0][tObj->BaseLevel],
+                             texcoords[i], rgba[i]);
+         break;
+      default:
+         _mesa_problem(ctx, "Bad mag filter in sample_3d_texture");
+         return;
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*                Texture Cube Map Sampling Functions                 */
+/**********************************************************************/
+
+/**
+ * Choose one of six sides of a texture cube map given the texture
+ * coord (rx,ry,rz).  Return pointer to corresponding array of texture
+ * images.
+ */
+static const struct gl_texture_image **
+choose_cube_face(const struct gl_texture_object *texObj,
+                 const GLfloat texcoord[4], GLfloat newCoord[4])
+{
+   /*
+      major axis
+      direction     target                             sc     tc    ma
+      ----------    -------------------------------    ---    ---   ---
+       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
+       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
+       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
+       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
+       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
+       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
+   */
+   const GLfloat rx = texcoord[0];
+   const GLfloat ry = texcoord[1];
+   const GLfloat rz = texcoord[2];
+   const GLfloat arx = FABSF(rx), ary = FABSF(ry), arz = FABSF(rz);
+   GLuint face;
+   GLfloat sc, tc, ma;
+
+   if (arx > ary && arx > arz) {
+      if (rx >= 0.0F) {
+         face = FACE_POS_X;
+         sc = -rz;
+         tc = -ry;
+         ma = arx;
+      }
+      else {
+         face = FACE_NEG_X;
+         sc = rz;
+         tc = -ry;
+         ma = arx;
+      }
+   }
+   else if (ary > arx && ary > arz) {
+      if (ry >= 0.0F) {
+         face = FACE_POS_Y;
+         sc = rx;
+         tc = rz;
+         ma = ary;
+      }
+      else {
+         face = FACE_NEG_Y;
+         sc = rx;
+         tc = -rz;
+         ma = ary;
+      }
+   }
+   else {
+      if (rz > 0.0F) {
+         face = FACE_POS_Z;
+         sc = rx;
+         tc = -ry;
+         ma = arz;
+      }
+      else {
+         face = FACE_NEG_Z;
+         sc = -rx;
+         tc = -ry;
+         ma = arz;
+      }
+   }
+
+   newCoord[0] = ( sc / ma + 1.0F ) * 0.5F;
+   newCoord[1] = ( tc / ma + 1.0F ) * 0.5F;
+   return (const struct gl_texture_image **) texObj->Image[face];
+}
+
+
+static void
+sample_nearest_cube(GLcontext *ctx,
+		    const struct gl_texture_object *tObj, GLuint n,
+                    const GLfloat texcoords[][4], const GLfloat lambda[],
+                    GLchan rgba[][4])
+{
+   GLuint i;
+   (void) lambda;
+   for (i = 0; i < n; i++) {
+      const struct gl_texture_image **images;
+      GLfloat newCoord[4];
+      images = choose_cube_face(tObj, texcoords[i], newCoord);
+      sample_2d_nearest(ctx, tObj, images[tObj->BaseLevel],
+                        newCoord, rgba[i]);
+   }
+}
+
+
+static void
+sample_linear_cube(GLcontext *ctx,
+		   const struct gl_texture_object *tObj, GLuint n,
+                   const GLfloat texcoords[][4],
+		   const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   (void) lambda;
+   for (i = 0; i < n; i++) {
+      const struct gl_texture_image **images;
+      GLfloat newCoord[4];
+      images = choose_cube_face(tObj, texcoords[i], newCoord);
+      sample_2d_linear(ctx, tObj, images[tObj->BaseLevel],
+                       newCoord, rgba[i]);
+   }
+}
+
+
+static void
+sample_cube_nearest_mipmap_nearest(GLcontext *ctx,
+                                   const struct gl_texture_object *tObj,
+                                   GLuint n, const GLfloat texcoord[][4],
+                                   const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      const struct gl_texture_image **images;
+      GLfloat newCoord[4];
+      GLint level = nearest_mipmap_level(tObj, lambda[i]);
+      images = choose_cube_face(tObj, texcoord[i], newCoord);
+      sample_2d_nearest(ctx, tObj, images[level], newCoord, rgba[i]);
+   }
+}
+
+
+static void
+sample_cube_linear_mipmap_nearest(GLcontext *ctx,
+                                  const struct gl_texture_object *tObj,
+                                  GLuint n, const GLfloat texcoord[][4],
+                                  const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      const struct gl_texture_image **images;
+      GLfloat newCoord[4];
+      GLint level = nearest_mipmap_level(tObj, lambda[i]);
+      images = choose_cube_face(tObj, texcoord[i], newCoord);
+      sample_2d_linear(ctx, tObj, images[level], newCoord, rgba[i]);
+   }
+}
+
+
+static void
+sample_cube_nearest_mipmap_linear(GLcontext *ctx,
+                                  const struct gl_texture_object *tObj,
+                                  GLuint n, const GLfloat texcoord[][4],
+                                  const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      const struct gl_texture_image **images;
+      GLfloat newCoord[4];
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      images = choose_cube_face(tObj, texcoord[i], newCoord);
+      if (level >= tObj->_MaxLevel) {
+         sample_2d_nearest(ctx, tObj, images[tObj->_MaxLevel],
+                           newCoord, rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];  /* texels */
+         const GLfloat f = FRAC(lambda[i]);
+         sample_2d_nearest(ctx, tObj, images[level  ], newCoord, t0);
+         sample_2d_nearest(ctx, tObj, images[level+1], newCoord, t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+static void
+sample_cube_linear_mipmap_linear(GLcontext *ctx,
+                                 const struct gl_texture_object *tObj,
+                                 GLuint n, const GLfloat texcoord[][4],
+                                 const GLfloat lambda[], GLchan rgba[][4])
+{
+   GLuint i;
+   ASSERT(lambda != NULL);
+   for (i = 0; i < n; i++) {
+      const struct gl_texture_image **images;
+      GLfloat newCoord[4];
+      GLint level = linear_mipmap_level(tObj, lambda[i]);
+      images = choose_cube_face(tObj, texcoord[i], newCoord);
+      if (level >= tObj->_MaxLevel) {
+         sample_2d_linear(ctx, tObj, images[tObj->_MaxLevel],
+                          newCoord, rgba[i]);
+      }
+      else {
+         GLchan t0[4], t1[4];
+         const GLfloat f = FRAC(lambda[i]);
+         sample_2d_linear(ctx, tObj, images[level  ], newCoord, t0);
+         sample_2d_linear(ctx, tObj, images[level+1], newCoord, t1);
+         lerp_rgba(rgba[i], f, t0, t1);
+      }
+   }
+}
+
+
+static void
+sample_lambda_cube( GLcontext *ctx,
+		    const struct gl_texture_object *tObj, GLuint n,
+		    const GLfloat texcoords[][4], const GLfloat lambda[],
+		    GLchan rgba[][4])
+{
+   GLuint minStart, minEnd;  /* texels with minification */
+   GLuint magStart, magEnd;  /* texels with magnification */
+
+   ASSERT(lambda != NULL);
+   compute_min_mag_ranges(tObj, n, lambda,
+                          &minStart, &minEnd, &magStart, &magEnd);
+
+   if (minStart < minEnd) {
+      /* do the minified texels */
+      const GLuint m = minEnd - minStart;
+      switch (tObj->MinFilter) {
+      case GL_NEAREST:
+         sample_nearest_cube(ctx, tObj, m, texcoords + minStart,
+                             lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR:
+         sample_linear_cube(ctx, tObj, m, texcoords + minStart,
+                            lambda + minStart, rgba + minStart);
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         sample_cube_nearest_mipmap_nearest(ctx, tObj, m,
+                                            texcoords + minStart,
+                                           lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         sample_cube_linear_mipmap_nearest(ctx, tObj, m,
+                                           texcoords + minStart,
+                                           lambda + minStart, rgba + minStart);
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         sample_cube_nearest_mipmap_linear(ctx, tObj, m,
+                                           texcoords + minStart,
+                                           lambda + minStart, rgba + minStart);
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         sample_cube_linear_mipmap_linear(ctx, tObj, m,
+                                          texcoords + minStart,
+                                          lambda + minStart, rgba + minStart);
+         break;
+      default:
+         _mesa_problem(ctx, "Bad min filter in sample_lambda_cube");
+      }
+   }
+
+   if (magStart < magEnd) {
+      /* do the magnified texels */
+      const GLuint m = magEnd - magStart;
+      switch (tObj->MagFilter) {
+      case GL_NEAREST:
+         sample_nearest_cube(ctx, tObj, m, texcoords + magStart,
+                             lambda + magStart, rgba + magStart);
+         break;
+      case GL_LINEAR:
+         sample_linear_cube(ctx, tObj, m, texcoords + magStart,
+                            lambda + magStart, rgba + magStart);
+         break;
+      default:
+         _mesa_problem(ctx, "Bad mag filter in sample_lambda_cube");
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*               Texture Rectangle Sampling Functions                 */
+/**********************************************************************/
+
+static void
+sample_nearest_rect(GLcontext *ctx,
+		    const struct gl_texture_object *tObj, GLuint n,
+                    const GLfloat texcoords[][4], const GLfloat lambda[],
+                    GLchan rgba[][4])
+{
+   const struct gl_texture_image *img = tObj->Image[0][0];
+   const GLfloat width = (GLfloat) img->Width;
+   const GLfloat height = (GLfloat) img->Height;
+   const GLint width_minus_1 = img->Width - 1;
+   const GLint height_minus_1 = img->Height - 1;
+   GLuint i;
+
+   (void) ctx;
+   (void) lambda;
+
+   ASSERT(tObj->WrapS == GL_CLAMP ||
+          tObj->WrapS == GL_CLAMP_TO_EDGE ||
+          tObj->WrapS == GL_CLAMP_TO_BORDER);
+   ASSERT(tObj->WrapT == GL_CLAMP ||
+          tObj->WrapT == GL_CLAMP_TO_EDGE ||
+          tObj->WrapT == GL_CLAMP_TO_BORDER);
+   ASSERT(img->_BaseFormat != GL_COLOR_INDEX);
+
+   /* XXX move Wrap mode tests outside of loops for common cases */
+   for (i = 0; i < n; i++) {
+      GLint row, col;
+      /* NOTE: we DO NOT use [0, 1] texture coordinates! */
+      if (tObj->WrapS == GL_CLAMP) {
+         col = IFLOOR( CLAMP(texcoords[i][0], 0.0F, width - 1) );
+      }
+      else if (tObj->WrapS == GL_CLAMP_TO_EDGE) {
+         col = IFLOOR( CLAMP(texcoords[i][0], 0.5F, width - 0.5F) );
+      }
+      else {
+         col = IFLOOR( CLAMP(texcoords[i][0], -0.5F, width + 0.5F) );
+      }
+      if (tObj->WrapT == GL_CLAMP) {
+         row = IFLOOR( CLAMP(texcoords[i][1], 0.0F, height - 1) );
+      }
+      else if (tObj->WrapT == GL_CLAMP_TO_EDGE) {
+         row = IFLOOR( CLAMP(texcoords[i][1], 0.5F, height - 0.5F) );
+      }
+      else {
+         row = IFLOOR( CLAMP(texcoords[i][1], -0.5F, height + 0.5F) );
+      }
+
+      if (col < 0 || col > width_minus_1 || row < 0 || row > height_minus_1)
+         COPY_CHAN4(rgba[i], tObj->_BorderChan);
+      else
+         img->FetchTexelc(img, col, row, 0, rgba[i]);
+   }
+}
+
+
+static void
+sample_linear_rect(GLcontext *ctx,
+		   const struct gl_texture_object *tObj, GLuint n,
+                   const GLfloat texcoords[][4],
+		   const GLfloat lambda[], GLchan rgba[][4])
+{
+   const struct gl_texture_image *img = tObj->Image[0][0];
+   const GLfloat width = (GLfloat) img->Width;
+   const GLfloat height = (GLfloat) img->Height;
+   const GLint width_minus_1 = img->Width - 1;
+   const GLint height_minus_1 = img->Height - 1;
+   GLuint i;
+
+   (void) ctx;
+   (void) lambda;
+
+   ASSERT(tObj->WrapS == GL_CLAMP ||
+          tObj->WrapS == GL_CLAMP_TO_EDGE ||
+          tObj->WrapS == GL_CLAMP_TO_BORDER);
+   ASSERT(tObj->WrapT == GL_CLAMP ||
+          tObj->WrapT == GL_CLAMP_TO_EDGE ||
+          tObj->WrapT == GL_CLAMP_TO_BORDER);
+   ASSERT(img->_BaseFormat != GL_COLOR_INDEX);
+
+   /* XXX lots of opportunity for optimization in this loop */
+   for (i = 0; i < n; i++) {
+      GLfloat frow, fcol;
+      GLint i0, j0, i1, j1;
+      GLchan t00[4], t01[4], t10[4], t11[4];
+      GLfloat a, b;
+      GLbitfield useBorderColor = 0x0;
+
+      /* NOTE: we DO NOT use [0, 1] texture coordinates! */
+      if (tObj->WrapS == GL_CLAMP) {
+         /* Not exactly what the spec says, but it matches NVIDIA output */
+         fcol = CLAMP(texcoords[i][0] - 0.5F, 0.0, width_minus_1);
+         i0 = IFLOOR(fcol);
+         i1 = i0 + 1;
+      }
+      else if (tObj->WrapS == GL_CLAMP_TO_EDGE) {
+         fcol = CLAMP(texcoords[i][0], 0.5F, width - 0.5F);
+         fcol -= 0.5F;
+         i0 = IFLOOR(fcol);
+         i1 = i0 + 1;
+         if (i1 > width_minus_1)
+            i1 = width_minus_1;
+      }
+      else {
+         ASSERT(tObj->WrapS == GL_CLAMP_TO_BORDER);
+         fcol = CLAMP(texcoords[i][0], -0.5F, width + 0.5F);
+         fcol -= 0.5F;
+         i0 = IFLOOR(fcol);
+         i1 = i0 + 1;
+      }
+
+      if (tObj->WrapT == GL_CLAMP) {
+         /* Not exactly what the spec says, but it matches NVIDIA output */
+         frow = CLAMP(texcoords[i][1] - 0.5F, 0.0, width_minus_1);
+         j0 = IFLOOR(frow);
+         j1 = j0 + 1;
+      }
+      else if (tObj->WrapT == GL_CLAMP_TO_EDGE) {
+         frow = CLAMP(texcoords[i][1], 0.5F, height - 0.5F);
+         frow -= 0.5F;
+         j0 = IFLOOR(frow);
+         j1 = j0 + 1;
+         if (j1 > height_minus_1)
+            j1 = height_minus_1;
+      }
+      else {
+         ASSERT(tObj->WrapT == GL_CLAMP_TO_BORDER);
+         frow = CLAMP(texcoords[i][1], -0.5F, height + 0.5F);
+         frow -= 0.5F;
+         j0 = IFLOOR(frow);
+         j1 = j0 + 1;
+      }
+
+      /* compute integer rows/columns */
+      if (i0 < 0 || i0 > width_minus_1)   useBorderColor |= I0BIT;
+      if (i1 < 0 || i1 > width_minus_1)   useBorderColor |= I1BIT;
+      if (j0 < 0 || j0 > height_minus_1)  useBorderColor |= J0BIT;
+      if (j1 < 0 || j1 > height_minus_1)  useBorderColor |= J1BIT;
+
+      /* get four texel samples */
+      if (useBorderColor & (I0BIT | J0BIT))
+         COPY_CHAN4(t00, tObj->_BorderChan);
+      else
+         img->FetchTexelc(img, i0, j0, 0, t00);
+
+      if (useBorderColor & (I1BIT | J0BIT))
+         COPY_CHAN4(t10, tObj->_BorderChan);
+      else
+         img->FetchTexelc(img, i1, j0, 0, t10);
+
+      if (useBorderColor & (I0BIT | J1BIT))
+         COPY_CHAN4(t01, tObj->_BorderChan);
+      else
+         img->FetchTexelc(img, i0, j1, 0, t01);
+
+      if (useBorderColor & (I1BIT | J1BIT))
+         COPY_CHAN4(t11, tObj->_BorderChan);
+      else
+         img->FetchTexelc(img, i1, j1, 0, t11);
+
+      /* compute interpolants */
+      a = FRAC(fcol);
+      b = FRAC(frow);
+
+      lerp_rgba_2d(rgba[i], a, b, t00, t10, t01, t11);
+   }
+}
+
+
+static void
+sample_lambda_rect( GLcontext *ctx,
+		    const struct gl_texture_object *tObj, GLuint n,
+		    const GLfloat texcoords[][4], const GLfloat lambda[],
+		    GLchan rgba[][4])
+{
+   GLuint minStart, minEnd, magStart, magEnd;
+
+   /* We only need lambda to decide between minification and magnification.
+    * There is no mipmapping with rectangular textures.
+    */
+   compute_min_mag_ranges(tObj, n, lambda,
+                          &minStart, &minEnd, &magStart, &magEnd);
+
+   if (minStart < minEnd) {
+      if (tObj->MinFilter == GL_NEAREST) {
+         sample_nearest_rect( ctx, tObj, minEnd - minStart,
+                              texcoords + minStart, NULL, rgba + minStart);
+      }
+      else {
+         sample_linear_rect( ctx, tObj, minEnd - minStart,
+                             texcoords + minStart, NULL, rgba + minStart);
+      }
+   }
+   if (magStart < magEnd) {
+      if (tObj->MagFilter == GL_NEAREST) {
+         sample_nearest_rect( ctx, tObj, magEnd - magStart,
+                              texcoords + magStart, NULL, rgba + magStart);
+      }
+      else {
+         sample_linear_rect( ctx, tObj, magEnd - magStart,
+                             texcoords + magStart, NULL, rgba + magStart);
+      }
+   }
+}
+
+
+
+/*
+ * Sample a shadow/depth texture.
+ */
+static void
+sample_depth_texture( GLcontext *ctx,
+                      const struct gl_texture_object *tObj, GLuint n,
+                      const GLfloat texcoords[][4], const GLfloat lambda[],
+                      GLchan texel[][4] )
+{
+   const GLint baseLevel = tObj->BaseLevel;
+   const struct gl_texture_image *img = tObj->Image[0][baseLevel];
+   const GLint width = img->Width;
+   const GLint height = img->Height;
+   GLchan ambient;
+   GLenum function;
+   GLchan result;
+
+   (void) lambda;
+
+   ASSERT(tObj->Image[0][tObj->BaseLevel]->_BaseFormat == GL_DEPTH_COMPONENT ||
+          tObj->Image[0][tObj->BaseLevel]->_BaseFormat == GL_DEPTH_STENCIL_EXT);
+
+   ASSERT(tObj->Target == GL_TEXTURE_1D ||
+          tObj->Target == GL_TEXTURE_2D ||
+          tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+
+   UNCLAMPED_FLOAT_TO_CHAN(ambient, tObj->ShadowAmbient);
+
+   /* XXXX if tObj->MinFilter != tObj->MagFilter, we're ignoring lambda */
+
+   /* XXX this could be precomputed and saved in the texture object */
+   if (tObj->CompareFlag) {
+      /* GL_SGIX_shadow */
+      if (tObj->CompareOperator == GL_TEXTURE_LEQUAL_R_SGIX) {
+         function = GL_LEQUAL;
+      }
+      else {
+         ASSERT(tObj->CompareOperator == GL_TEXTURE_GEQUAL_R_SGIX);
+         function = GL_GEQUAL;
+      }
+   }
+   else if (tObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+      /* GL_ARB_shadow */
+      function = tObj->CompareFunc;
+   }
+   else {
+      function = GL_NONE;  /* pass depth through as grayscale */
+   }
+
+   if (tObj->MagFilter == GL_NEAREST) {
+      GLuint i;
+      for (i = 0; i < n; i++) {
+         GLfloat depthSample;
+         GLint col, row;
+         /* XXX fix for texture rectangle! */
+         COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapS, texcoords[i][0], width, col);
+         COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapT, texcoords[i][1], height, row);
+         if (col >= 0 && row >= 0 && col < width && row < height) {
+            img->FetchTexelf(img, col, row, 0, &depthSample);
+         }
+         else {
+            depthSample = tObj->BorderColor[0];
+         }
+
+         switch (function) {
+         case GL_LEQUAL:
+            result = (texcoords[i][2] <= depthSample) ? CHAN_MAX : ambient;
+            break;
+         case GL_GEQUAL:
+            result = (texcoords[i][2] >= depthSample) ? CHAN_MAX : ambient;
+            break;
+         case GL_LESS:
+            result = (texcoords[i][2] < depthSample) ? CHAN_MAX : ambient;
+            break;
+         case GL_GREATER:
+            result = (texcoords[i][2] > depthSample) ? CHAN_MAX : ambient;
+            break;
+         case GL_EQUAL:
+            result = (texcoords[i][2] == depthSample) ? CHAN_MAX : ambient;
+            break;
+         case GL_NOTEQUAL:
+            result = (texcoords[i][2] != depthSample) ? CHAN_MAX : ambient;
+            break;
+         case GL_ALWAYS:
+            result = CHAN_MAX;
+            break;
+         case GL_NEVER:
+            result = ambient;
+            break;
+         case GL_NONE:
+            CLAMPED_FLOAT_TO_CHAN(result, depthSample);
+            break;
+         default:
+            _mesa_problem(ctx, "Bad compare func in sample_depth_texture");
+            return;
+         }
+
+         switch (tObj->DepthMode) {
+         case GL_LUMINANCE:
+            texel[i][RCOMP] = result;
+            texel[i][GCOMP] = result;
+            texel[i][BCOMP] = result;
+            texel[i][ACOMP] = CHAN_MAX;
+            break;
+         case GL_INTENSITY:
+            texel[i][RCOMP] = result;
+            texel[i][GCOMP] = result;
+            texel[i][BCOMP] = result;
+            texel[i][ACOMP] = result;
+            break;
+         case GL_ALPHA:
+            texel[i][RCOMP] = 0;
+            texel[i][GCOMP] = 0;
+            texel[i][BCOMP] = 0;
+            texel[i][ACOMP] = result;
+            break;
+         default:
+            _mesa_problem(ctx, "Bad depth texture mode");
+         }
+      }
+   }
+   else {
+      GLuint i;
+      ASSERT(tObj->MagFilter == GL_LINEAR);
+      for (i = 0; i < n; i++) {
+         GLfloat depth00, depth01, depth10, depth11;
+         GLint i0, i1, j0, j1;
+         GLfloat u, v;
+         GLuint useBorderTexel;
+
+         /* XXX fix for texture rectangle! */
+         COMPUTE_LINEAR_TEXEL_LOCATIONS(tObj->WrapS, texcoords[i][0], u, width, i0, i1);
+         COMPUTE_LINEAR_TEXEL_LOCATIONS(tObj->WrapT, texcoords[i][1], v, height,j0, j1);
+
+         useBorderTexel = 0;
+         if (img->Border) {
+            i0 += img->Border;
+            i1 += img->Border;
+            j0 += img->Border;
+            j1 += img->Border;
+         }
+         else {
+            if (i0 < 0 || i0 >= (GLint) width)   useBorderTexel |= I0BIT;
+            if (i1 < 0 || i1 >= (GLint) width)   useBorderTexel |= I1BIT;
+            if (j0 < 0 || j0 >= (GLint) height)  useBorderTexel |= J0BIT;
+            if (j1 < 0 || j1 >= (GLint) height)  useBorderTexel |= J1BIT;
+         }
+
+         /* get four depth samples from the texture */
+         if (useBorderTexel & (I0BIT | J0BIT)) {
+            depth00 = tObj->BorderColor[0];
+         }
+         else {
+            img->FetchTexelf(img, i0, j0, 0, &depth00);
+         }
+         if (useBorderTexel & (I1BIT | J0BIT)) {
+            depth10 = tObj->BorderColor[0];
+         }
+         else {
+            img->FetchTexelf(img, i1, j0, 0, &depth10);
+         }
+         if (useBorderTexel & (I0BIT | J1BIT)) {
+            depth01 = tObj->BorderColor[0];
+         }
+         else {
+            img->FetchTexelf(img, i0, j1, 0, &depth01);
+         }
+         if (useBorderTexel & (I1BIT | J1BIT)) {
+            depth11 = tObj->BorderColor[0];
+         }
+         else {
+            img->FetchTexelf(img, i1, j1, 0, &depth11);
+         }
+
+         if (0) {
+            /* compute a single weighted depth sample and do one comparison */
+            const GLfloat a = FRAC(u + 1.0F);
+            const GLfloat b = FRAC(v + 1.0F);
+            const GLfloat depthSample
+               = lerp_2d(a, b, depth00, depth10, depth01, depth11);
+            if ((depthSample <= texcoords[i][2] && function == GL_LEQUAL) ||
+                (depthSample >= texcoords[i][2] && function == GL_GEQUAL)) {
+               result  = ambient;
+            }
+            else {
+               result = CHAN_MAX;
+            }
+         }
+         else {
+            /* Do four depth/R comparisons and compute a weighted result.
+             * If this touches on somebody's I.P., I'll remove this code
+             * upon request.
+             */
+            const GLfloat d = (CHAN_MAXF - (GLfloat) ambient) * 0.25F;
+            GLfloat luminance = CHAN_MAXF;
+
+            switch (function) {
+            case GL_LEQUAL:
+               if (depth00 <= texcoords[i][2])  luminance -= d;
+               if (depth01 <= texcoords[i][2])  luminance -= d;
+               if (depth10 <= texcoords[i][2])  luminance -= d;
+               if (depth11 <= texcoords[i][2])  luminance -= d;
+               result = (GLchan) luminance;
+               break;
+            case GL_GEQUAL:
+               if (depth00 >= texcoords[i][2])  luminance -= d;
+               if (depth01 >= texcoords[i][2])  luminance -= d;
+               if (depth10 >= texcoords[i][2])  luminance -= d;
+               if (depth11 >= texcoords[i][2])  luminance -= d;
+               result = (GLchan) luminance;
+               break;
+            case GL_LESS:
+               if (depth00 < texcoords[i][2])  luminance -= d;
+               if (depth01 < texcoords[i][2])  luminance -= d;
+               if (depth10 < texcoords[i][2])  luminance -= d;
+               if (depth11 < texcoords[i][2])  luminance -= d;
+               result = (GLchan) luminance;
+               break;
+            case GL_GREATER:
+               if (depth00 > texcoords[i][2])  luminance -= d;
+               if (depth01 > texcoords[i][2])  luminance -= d;
+               if (depth10 > texcoords[i][2])  luminance -= d;
+               if (depth11 > texcoords[i][2])  luminance -= d;
+               result = (GLchan) luminance;
+               break;
+            case GL_EQUAL:
+               if (depth00 == texcoords[i][2])  luminance -= d;
+               if (depth01 == texcoords[i][2])  luminance -= d;
+               if (depth10 == texcoords[i][2])  luminance -= d;
+               if (depth11 == texcoords[i][2])  luminance -= d;
+               result = (GLchan) luminance;
+               break;
+            case GL_NOTEQUAL:
+               if (depth00 != texcoords[i][2])  luminance -= d;
+               if (depth01 != texcoords[i][2])  luminance -= d;
+               if (depth10 != texcoords[i][2])  luminance -= d;
+               if (depth11 != texcoords[i][2])  luminance -= d;
+               result = (GLchan) luminance;
+               break;
+            case GL_ALWAYS:
+               result = 0;
+               break;
+            case GL_NEVER:
+               result = CHAN_MAX;
+               break;
+            case GL_NONE:
+               /* ordinary bilinear filtering */
+               {
+                  const GLfloat a = FRAC(u + 1.0F);
+                  const GLfloat b = FRAC(v + 1.0F);
+                  const GLfloat depthSample
+                     = lerp_2d(a, b, depth00, depth10, depth01, depth11);
+                  CLAMPED_FLOAT_TO_CHAN(result, depthSample);
+               }
+               break;
+            default:
+               _mesa_problem(ctx, "Bad compare func in sample_depth_texture");
+               return;
+            }
+         }
+
+         switch (tObj->DepthMode) {
+         case GL_LUMINANCE:
+            texel[i][RCOMP] = result;
+            texel[i][GCOMP] = result;
+            texel[i][BCOMP] = result;
+            texel[i][ACOMP] = CHAN_MAX;
+            break;
+         case GL_INTENSITY:
+            texel[i][RCOMP] = result;
+            texel[i][GCOMP] = result;
+            texel[i][BCOMP] = result;
+            texel[i][ACOMP] = result;
+            break;
+         case GL_ALPHA:
+            texel[i][RCOMP] = 0;
+            texel[i][GCOMP] = 0;
+            texel[i][BCOMP] = 0;
+            texel[i][ACOMP] = result;
+            break;
+         default:
+            _mesa_problem(ctx, "Bad depth texture mode");
+         }
+      }  /* for */
+   }  /* if filter */
+}
+
+
+#if 0
+/*
+ * Experimental depth texture sampling function.
+ */
+static void
+sample_depth_texture2(const GLcontext *ctx,
+                     const struct gl_texture_unit *texUnit,
+                     GLuint n, const GLfloat texcoords[][4],
+                     GLchan texel[][4])
+{
+   const struct gl_texture_object *texObj = texUnit->_Current;
+   const GLint baseLevel = texObj->BaseLevel;
+   const struct gl_texture_image *texImage = texObj->Image[0][baseLevel];
+   const GLuint width = texImage->Width;
+   const GLuint height = texImage->Height;
+   GLchan ambient;
+   GLboolean lequal, gequal;
+
+   if (texObj->Target != GL_TEXTURE_2D) {
+      _mesa_problem(ctx, "only 2-D depth textures supported at this time");
+      return;
+   }
+
+   if (texObj->MinFilter != texObj->MagFilter) {
+      _mesa_problem(ctx, "mipmapped depth textures not supported at this time");
+      return;
+   }
+
+   /* XXX the GL_SGIX_shadow extension spec doesn't say what to do if
+    * GL_TEXTURE_COMPARE_SGIX == GL_TRUE but the current texture object
+    * isn't a depth texture.
+    */
+   if (texImage->_BaseFormat != GL_DEPTH_COMPONENT) {
+      _mesa_problem(ctx,"GL_TEXTURE_COMPARE_SGIX enabled with non-depth texture");
+      return;
+   }
+
+   UNCLAMPED_FLOAT_TO_CHAN(ambient, tObj->ShadowAmbient);
+
+   if (texObj->CompareOperator == GL_TEXTURE_LEQUAL_R_SGIX) {
+      lequal = GL_TRUE;
+      gequal = GL_FALSE;
+   }
+   else {
+      lequal = GL_FALSE;
+      gequal = GL_TRUE;
+   }
+
+   {
+      GLuint i;
+      for (i = 0; i < n; i++) {
+         const GLint K = 3;
+         GLint col, row, ii, jj, imin, imax, jmin, jmax, samples, count;
+         GLfloat w;
+         GLchan lum;
+         COMPUTE_NEAREST_TEXEL_LOCATION(texObj->WrapS, texcoords[i][0],
+					width, col);
+         COMPUTE_NEAREST_TEXEL_LOCATION(texObj->WrapT, texcoords[i][1],
+					height, row);
+
+         imin = col - K;
+         imax = col + K;
+         jmin = row - K;
+         jmax = row + K;
+
+         if (imin < 0)  imin = 0;
+         if (imax >= width)  imax = width - 1;
+         if (jmin < 0)  jmin = 0;
+         if (jmax >= height) jmax = height - 1;
+
+         samples = (imax - imin + 1) * (jmax - jmin + 1);
+         count = 0;
+         for (jj = jmin; jj <= jmax; jj++) {
+            for (ii = imin; ii <= imax; ii++) {
+               GLfloat depthSample;
+               texImage->FetchTexelf(texImage, ii, jj, 0, &depthSample);
+               if ((depthSample <= r[i] && lequal) ||
+                   (depthSample >= r[i] && gequal)) {
+                  count++;
+               }
+            }
+         }
+
+         w = (GLfloat) count / (GLfloat) samples;
+         w = CHAN_MAXF - w * (CHAN_MAXF - (GLfloat) ambient);
+         lum = (GLint) w;
+
+         texel[i][RCOMP] = lum;
+         texel[i][GCOMP] = lum;
+         texel[i][BCOMP] = lum;
+         texel[i][ACOMP] = CHAN_MAX;
+      }
+   }
+}
+#endif
+
+
+/**
+ * We use this function when a texture object is in an "incomplete" state.
+ * When a fragment program attempts to sample an incomplete texture we
+ * return black (see issue 23 in GL_ARB_fragment_program spec).
+ * Note: fragment programs don't observe the texture enable/disable flags.
+ */
+static void
+null_sample_func( GLcontext *ctx,
+		  const struct gl_texture_object *tObj, GLuint n,
+		  const GLfloat texcoords[][4], const GLfloat lambda[],
+		  GLchan rgba[][4])
+{
+   GLuint i;
+   (void) ctx;
+   (void) tObj;
+   (void) texcoords;
+   (void) lambda;
+   for (i = 0; i < n; i++) {
+      rgba[i][RCOMP] = 0;
+      rgba[i][GCOMP] = 0;
+      rgba[i][BCOMP] = 0;
+      rgba[i][ACOMP] = CHAN_MAX;
+   }
+}
+
+
+/**
+ * Choose the texture sampling function for the given texture object.
+ */
+texture_sample_func
+_swrast_choose_texture_sample_func( GLcontext *ctx,
+				    const struct gl_texture_object *t )
+{
+   if (!t || !t->Complete) {
+      return &null_sample_func;
+   }
+   else {
+      const GLboolean needLambda = (GLboolean) (t->MinFilter != t->MagFilter);
+      const GLenum format = t->Image[0][t->BaseLevel]->_BaseFormat;
+
+      switch (t->Target) {
+      case GL_TEXTURE_1D:
+         if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL_EXT) {
+            return &sample_depth_texture;
+         }
+         else if (needLambda) {
+            return &sample_lambda_1d;
+         }
+         else if (t->MinFilter == GL_LINEAR) {
+            return &sample_linear_1d;
+         }
+         else {
+            ASSERT(t->MinFilter == GL_NEAREST);
+            return &sample_nearest_1d;
+         }
+      case GL_TEXTURE_2D:
+         if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL_EXT) {
+            return &sample_depth_texture;
+         }
+         else if (needLambda) {
+            return &sample_lambda_2d;
+         }
+         else if (t->MinFilter == GL_LINEAR) {
+            return &sample_linear_2d;
+         }
+         else {
+            /* check for a few optimized cases */
+            const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+            ASSERT(t->MinFilter == GL_NEAREST);
+            if (t->WrapS == GL_REPEAT &&
+                t->WrapT == GL_REPEAT &&
+                img->_IsPowerOfTwo &&
+                img->Border == 0 &&
+                img->TexFormat->MesaFormat == MESA_FORMAT_RGB) {
+               return &opt_sample_rgb_2d;
+            }
+            else if (t->WrapS == GL_REPEAT &&
+                     t->WrapT == GL_REPEAT &&
+                     img->_IsPowerOfTwo &&
+                     img->Border == 0 &&
+                     img->TexFormat->MesaFormat == MESA_FORMAT_RGBA) {
+               return &opt_sample_rgba_2d;
+            }
+            else {
+               return &sample_nearest_2d;
+            }
+         }
+      case GL_TEXTURE_3D:
+         if (needLambda) {
+            return &sample_lambda_3d;
+         }
+         else if (t->MinFilter == GL_LINEAR) {
+            return &sample_linear_3d;
+         }
+         else {
+            ASSERT(t->MinFilter == GL_NEAREST);
+            return &sample_nearest_3d;
+         }
+      case GL_TEXTURE_CUBE_MAP:
+         if (needLambda) {
+            return &sample_lambda_cube;
+         }
+         else if (t->MinFilter == GL_LINEAR) {
+            return &sample_linear_cube;
+         }
+         else {
+            ASSERT(t->MinFilter == GL_NEAREST);
+            return &sample_nearest_cube;
+         }
+      case GL_TEXTURE_RECTANGLE_NV:
+         if (needLambda) {
+            return &sample_lambda_rect;
+         }
+         else if (t->MinFilter == GL_LINEAR) {
+            return &sample_linear_rect;
+         }
+         else {
+            ASSERT(t->MinFilter == GL_NEAREST);
+            return &sample_nearest_rect;
+         }
+      default:
+         _mesa_problem(ctx,
+                       "invalid target in _swrast_choose_texture_sample_func");
+         return &null_sample_func;
+      }
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_texfilter.h b/dist/Mesa/src/mesa/swrast/s_texfilter.h
new file mode 100644
index 000000000..e4445e79a
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_texfilter.h
@@ -0,0 +1,39 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_TEXFILTER_H
+#define S_TEXFILTER_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern texture_sample_func
+_swrast_choose_texture_sample_func( GLcontext *ctx,
+				    const struct gl_texture_object *tObj );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_texstore.c b/dist/Mesa/src/mesa/swrast/s_texstore.c
new file mode 100644
index 000000000..e9f4faeed
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_texstore.c
@@ -0,0 +1,591 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Brian Paul
+ */
+
+
+/*
+ * The functions in this file are mostly related to software texture fallbacks.
+ * This includes texture image transfer/packing and texel fetching.
+ * Hardware drivers will likely override most of this.
+ */
+
+
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "context.h"
+#include "convolve.h"
+#include "image.h"
+#include "macros.h"
+#include "texformat.h"
+#include "teximage.h"
+#include "texstore.h"
+
+#include "s_context.h"
+#include "s_depth.h"
+#include "s_span.h"
+
+/*
+ * Read an RGBA image from the frame buffer.
+ * This is used by glCopyTex[Sub]Image[12]D().
+ * Input:  ctx - the context
+ *         x, y - lower left corner
+ *         width, height - size of region to read
+ * Return: pointer to block of GL_RGBA, GLchan data.
+ */
+static GLchan *
+read_color_image( GLcontext *ctx, GLint x, GLint y,
+                  GLsizei width, GLsizei height )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLint stride = 4 * width;
+   GLint i;
+   GLchan *image, *dst;
+
+   image = (GLchan *) _mesa_malloc(width * height * 4 * sizeof(GLchan));
+   if (!image)
+      return NULL;
+
+   RENDER_START(swrast, ctx);
+
+   dst = image;
+   for (i = 0; i < height; i++) {
+      _swrast_read_rgba_span(ctx, ctx->ReadBuffer->_ColorReadBuffer,
+                             width, x, y + i, (GLchan (*)[4]) dst);
+      dst += stride;
+   }
+
+   RENDER_FINISH(swrast, ctx);
+
+   return image;
+}
+
+
+/**
+ * As above, but read data from depth buffer.  Returned as GLuints.
+ * \sa read_color_image
+ */
+static GLuint *
+read_depth_image( GLcontext *ctx, GLint x, GLint y,
+                  GLsizei width, GLsizei height )
+{
+   struct gl_renderbuffer *rb = ctx->ReadBuffer->_DepthBuffer;
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint *image, *dst;
+   GLint i;
+
+   image = (GLuint *) _mesa_malloc(width * height * sizeof(GLuint));
+   if (!image)
+      return NULL;
+
+   RENDER_START(swrast, ctx);
+
+   dst = image;
+   for (i = 0; i < height; i++) {
+      _swrast_read_depth_span_uint(ctx, rb, width, x, y + i, dst);
+      dst += width;
+   }
+
+   RENDER_FINISH(swrast, ctx);
+
+   return image;
+}
+
+
+/**
+ * As above, but read data from depth+stencil buffers.
+ */
+static GLuint *
+read_depth_stencil_image(GLcontext *ctx, GLint x, GLint y,
+                         GLsizei width, GLsizei height)
+{
+   struct gl_renderbuffer *depthRb = ctx->ReadBuffer->_DepthBuffer;
+   struct gl_renderbuffer *stencilRb = ctx->ReadBuffer->_StencilBuffer;
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint *image, *dst;
+   GLint i;
+
+   ASSERT(depthRb);
+   ASSERT(stencilRb);
+
+   image = (GLuint *) _mesa_malloc(width * height * sizeof(GLuint));
+   if (!image)
+      return NULL;
+
+   RENDER_START(swrast, ctx);
+
+   /* read from depth buffer */
+   dst = image;
+   if (depthRb->DataType == GL_UNSIGNED_INT) {
+      for (i = 0; i < height; i++) {
+         _swrast_get_row(ctx, depthRb, width, x, y + i, dst, sizeof(GLuint));
+         dst += width;
+      }
+   }
+   else {
+      GLushort z16[MAX_WIDTH];
+      ASSERT(depthRb->DataType == GL_UNSIGNED_SHORT);
+      for (i = 0; i < height; i++) {
+         GLint j;
+         _swrast_get_row(ctx, depthRb, width, x, y + i, z16, sizeof(GLushort));
+         /* convert GLushorts to GLuints */
+         for (j = 0; j < width; j++) {
+            dst[j] = z16[j];
+         }
+         dst += width;
+      }
+   }
+
+   /* put depth values into bits 0xffffff00 */
+   if (ctx->ReadBuffer->Visual.depthBits == 24) {
+      GLint j;
+      for (j = 0; j < width * height; j++) {
+         image[j] <<= 8;
+      }
+   }
+   else if (ctx->ReadBuffer->Visual.depthBits == 16) {
+      GLint j;
+      for (j = 0; j < width * height; j++) {
+         image[j] = (image[j] << 16) | (image[j] & 0xff00);
+      }      
+   }
+   else {
+      /* this handles arbitrary depthBits >= 12 */
+      const GLint rShift = ctx->ReadBuffer->Visual.depthBits;
+      const GLint lShift = 32 - rShift;
+      GLint j;
+      for (j = 0; j < width * height; j++) {
+         GLuint z = (image[j] << lShift);
+         image[j] = z | (z >> rShift);
+      }
+   }
+
+   /* read stencil values and interleave into image array */
+   dst = image;
+   for (i = 0; i < height; i++) {
+      GLstencil stencil[MAX_WIDTH];
+      GLint j;
+      ASSERT(8 * sizeof(GLstencil) == stencilRb->StencilBits);
+      _swrast_get_row(ctx, stencilRb, width, x, y + i,
+                      stencil, sizeof(GLstencil));
+      for (j = 0; j < width; j++) {
+         dst[j] = (dst[j] & 0xffffff00) | (stencil[j] & 0xff);
+      }
+      dst += width;
+   }
+
+   RENDER_FINISH(swrast, ctx);
+
+   return image;
+}
+
+
+static GLboolean
+is_depth_format(GLenum format)
+{
+   switch (format) {
+      case GL_DEPTH_COMPONENT:
+      case GL_DEPTH_COMPONENT16_SGIX:
+      case GL_DEPTH_COMPONENT24_SGIX:
+      case GL_DEPTH_COMPONENT32_SGIX:
+         return GL_TRUE;
+      default:
+         return GL_FALSE;
+   }
+}
+
+
+static GLboolean
+is_depth_stencil_format(GLenum format)
+{
+   switch (format) {
+      case GL_DEPTH_STENCIL_EXT:
+      case GL_DEPTH24_STENCIL8_EXT:
+         return GL_TRUE;
+      default:
+         return GL_FALSE;
+   }
+}
+
+
+/*
+ * Fallback for Driver.CopyTexImage1D().
+ */
+void
+_swrast_copy_teximage1d( GLcontext *ctx, GLenum target, GLint level,
+                         GLenum internalFormat,
+                         GLint x, GLint y, GLsizei width, GLint border )
+{
+   struct gl_texture_unit *texUnit;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+
+   texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   texObj = _mesa_select_tex_object(ctx, texUnit, target);
+   ASSERT(texObj);
+   texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+   ASSERT(texImage);
+
+   ASSERT(ctx->Driver.TexImage1D);
+
+   if (is_depth_format(internalFormat)) {
+      /* read depth image from framebuffer */
+      GLuint *image = read_depth_image(ctx, x, y, width, 1);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage1D");
+         return;
+      }
+      /* call glTexImage1D to redefine the texture */
+      ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                             width, border,
+                             GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image,
+                             &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else if (is_depth_stencil_format(internalFormat)) {
+      /* read depth/stencil image from framebuffer */
+      GLuint *image = read_depth_stencil_image(ctx, x, y, width, 1);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage1D");
+         return;
+      }
+      /* call glTexImage1D to redefine the texture */
+      ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                             width, border,
+                             GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT,
+                             image, &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else {
+      /* read RGBA image from framebuffer */
+      GLchan *image = read_color_image(ctx, x, y, width, 1);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage1D");
+         return;
+      }
+      /* call glTexImage1D to redefine the texture */
+      ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                             width, border,
+                             GL_RGBA, CHAN_TYPE, image,
+                             &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      _mesa_generate_mipmap(ctx, target, texUnit, texObj);
+   }
+}
+
+
+/**
+ * Fallback for Driver.CopyTexImage2D().
+ *
+ * We implement CopyTexImage by reading the image from the framebuffer
+ * then passing it to the ctx->Driver.TexImage2D() function.
+ *
+ * Device drivers should try to implement direct framebuffer->texture copies.
+ */
+void
+_swrast_copy_teximage2d( GLcontext *ctx, GLenum target, GLint level,
+                         GLenum internalFormat,
+                         GLint x, GLint y, GLsizei width, GLsizei height,
+                         GLint border )
+{
+   struct gl_texture_unit *texUnit;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+
+   texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   texObj = _mesa_select_tex_object(ctx, texUnit, target);
+   ASSERT(texObj);
+   texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+   ASSERT(texImage);
+
+   ASSERT(ctx->Driver.TexImage2D);
+
+   if (is_depth_format(internalFormat)) {
+      /* read depth image from framebuffer */
+      GLuint *image = read_depth_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage2D");
+         return;
+      }
+      /* call glTexImage2D to redefine the texture */
+      ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                             width, height, border,
+                             GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image,
+                             &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else if (is_depth_stencil_format(internalFormat)) {
+      GLuint *image = read_depth_stencil_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage2D");
+         return;
+      }
+      /* call glTexImage2D to redefine the texture */
+      ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                             width, height, border,
+                             GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT,
+                             image, &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else {
+      /* read RGBA image from framebuffer */
+      GLchan *image = read_color_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage2D");
+         return;
+      }
+      /* call glTexImage2D to redefine the texture */
+      ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                             width, height, border,
+                             GL_RGBA, CHAN_TYPE, image,
+                             &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      _mesa_generate_mipmap(ctx, target, texUnit, texObj);
+   }
+}
+
+
+/*
+ * Fallback for Driver.CopyTexSubImage1D().
+ */
+void
+_swrast_copy_texsubimage1d( GLcontext *ctx, GLenum target, GLint level,
+                            GLint xoffset, GLint x, GLint y, GLsizei width )
+{
+   struct gl_texture_unit *texUnit;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+
+   texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   texObj = _mesa_select_tex_object(ctx, texUnit, target);
+   ASSERT(texObj);
+   texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+   ASSERT(texImage);
+
+   ASSERT(ctx->Driver.TexImage1D);
+
+   if (texImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+      /* read depth image from framebuffer */
+      GLuint *image = read_depth_image(ctx, x, y, width, 1);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage1D");
+         return;
+      }
+
+      /* call glTexSubImage1D to redefine the texture */
+      ctx->Driver.TexSubImage1D(ctx, target, level, xoffset, width,
+                                GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image,
+                                &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else if (texImage->_BaseFormat == GL_DEPTH_STENCIL_EXT) {
+      /* read depth/stencil image from framebuffer */
+      GLuint *image = read_depth_stencil_image(ctx, x, y, width, 1);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage1D");
+         return;
+      }
+      /* call glTexImage1D to redefine the texture */
+      ctx->Driver.TexSubImage1D(ctx, target, level, xoffset, width,
+                                GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT,
+                                image, &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else {
+      /* read RGBA image from framebuffer */
+      GLchan *image = read_color_image(ctx, x, y, width, 1);
+      if (!image) {
+         _mesa_error( ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage1D" );
+         return;
+      }
+      /* now call glTexSubImage1D to do the real work */
+      ctx->Driver.TexSubImage1D(ctx, target, level, xoffset, width,
+                                GL_RGBA, CHAN_TYPE, image,
+                                &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      _mesa_generate_mipmap(ctx, target, texUnit, texObj);
+   }
+}
+
+
+/**
+ * Fallback for Driver.CopyTexSubImage2D().
+ *
+ * Read the image from the framebuffer then hand it
+ * off to ctx->Driver.TexSubImage2D().
+ */
+void
+_swrast_copy_texsubimage2d( GLcontext *ctx,
+                            GLenum target, GLint level,
+                            GLint xoffset, GLint yoffset,
+                            GLint x, GLint y, GLsizei width, GLsizei height )
+{
+   struct gl_texture_unit *texUnit;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+
+   texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   texObj = _mesa_select_tex_object(ctx, texUnit, target);
+   ASSERT(texObj);
+   texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+   ASSERT(texImage);
+
+   ASSERT(ctx->Driver.TexImage2D);
+
+   if (texImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+      /* read depth image from framebuffer */
+      GLuint *image = read_depth_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage2D");
+         return;
+      }
+      /* call glTexImage2D to redefine the texture */
+      ctx->Driver.TexSubImage2D(ctx, target, level,
+                                xoffset, yoffset, width, height,
+                                GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image,
+                                &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else if (texImage->_BaseFormat == GL_DEPTH_STENCIL_EXT) {
+      /* read depth/stencil image from framebuffer */
+      GLuint *image = read_depth_stencil_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage2D");
+         return;
+      }
+      /* call glTexImage2D to redefine the texture */
+      ctx->Driver.TexSubImage2D(ctx, target, level,
+                                xoffset, yoffset, width, height,
+                                GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT,
+                                image, &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else {
+      /* read RGBA image from framebuffer */
+      GLchan *image = read_color_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error( ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage2D" );
+         return;
+      }
+      /* now call glTexSubImage2D to do the real work */
+      ctx->Driver.TexSubImage2D(ctx, target, level,
+                                xoffset, yoffset, width, height,
+                                GL_RGBA, CHAN_TYPE, image,
+                                &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      _mesa_generate_mipmap(ctx, target, texUnit, texObj);
+   }
+}
+
+
+/*
+ * Fallback for Driver.CopyTexSubImage3D().
+ */
+void
+_swrast_copy_texsubimage3d( GLcontext *ctx,
+                            GLenum target, GLint level,
+                            GLint xoffset, GLint yoffset, GLint zoffset,
+                            GLint x, GLint y, GLsizei width, GLsizei height )
+{
+   struct gl_texture_unit *texUnit;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+
+   texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   texObj = _mesa_select_tex_object(ctx, texUnit, target);
+   ASSERT(texObj);
+   texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+   ASSERT(texImage);
+
+   ASSERT(ctx->Driver.TexImage3D);
+
+   if (texImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+      /* read depth image from framebuffer */
+      GLuint *image = read_depth_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage3D");
+         return;
+      }
+      /* call glTexImage3D to redefine the texture */
+      ctx->Driver.TexSubImage3D(ctx, target, level,
+                                xoffset, yoffset, zoffset, width, height, 1,
+                                GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image,
+                                &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else if (texImage->_BaseFormat == GL_DEPTH_STENCIL_EXT) {
+      /* read depth/stencil image from framebuffer */
+      GLuint *image = read_depth_stencil_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage3D");
+         return;
+      }
+      /* call glTexImage3D to redefine the texture */
+      ctx->Driver.TexSubImage3D(ctx, target, level,
+                                xoffset, yoffset, zoffset, width, height, 1,
+                                GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT,
+                                image, &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+   else {
+      /* read RGBA image from framebuffer */
+      GLchan *image = read_color_image(ctx, x, y, width, height);
+      if (!image) {
+         _mesa_error( ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage3D" );
+         return;
+      }
+      /* now call glTexSubImage3D to do the real work */
+      ctx->Driver.TexSubImage3D(ctx, target, level,
+                                xoffset, yoffset, zoffset, width, height, 1,
+                                GL_RGBA, CHAN_TYPE, image,
+                                &ctx->DefaultPacking, texObj, texImage);
+      _mesa_free(image);
+   }
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      _mesa_generate_mipmap(ctx, target, texUnit, texObj);
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_triangle.c b/dist/Mesa/src/mesa/swrast/s_triangle.c
new file mode 100644
index 000000000..a1cbd881a
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_triangle.c
@@ -0,0 +1,1176 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * When the device driver doesn't implement triangle rasterization it
+ * can hook in _swrast_Triangle, which eventually calls one of these
+ * functions to draw triangles.
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "imports.h"
+#include "macros.h"
+#include "texformat.h"
+
+#include "s_aatriangle.h"
+#include "s_context.h"
+#include "s_feedback.h"
+#include "s_span.h"
+#include "s_triangle.h"
+
+
+/*
+ * Just used for feedback mode.
+ */
+GLboolean
+_swrast_culltriangle( GLcontext *ctx,
+                      const SWvertex *v0,
+                      const SWvertex *v1,
+                      const SWvertex *v2 )
+{
+   GLfloat ex = v1->win[0] - v0->win[0];
+   GLfloat ey = v1->win[1] - v0->win[1];
+   GLfloat fx = v2->win[0] - v0->win[0];
+   GLfloat fy = v2->win[1] - v0->win[1];
+   GLfloat c = ex*fy-ey*fx;
+
+   if (c * SWRAST_CONTEXT(ctx)->_BackfaceSign > 0)
+      return 0;
+
+   return 1;
+}
+
+
+
+/*
+ * Render a flat-shaded color index triangle.
+ */
+#define NAME flat_ci_triangle
+#define INTERP_Z 1
+#define INTERP_FOG 1
+#define SETUP_CODE			\
+   span.interpMask |= SPAN_INDEX;	\
+   span.index = FloatToFixed(v2->index);\
+   span.indexStep = 0;
+#define RENDER_SPAN( span )  _swrast_write_index_span(ctx, &span);
+#include "s_tritemp.h"
+
+
+
+/*
+ * Render a smooth-shaded color index triangle.
+ */
+#define NAME smooth_ci_triangle
+#define INTERP_Z 1
+#define INTERP_FOG 1
+#define INTERP_INDEX 1
+#define RENDER_SPAN( span )  _swrast_write_index_span(ctx, &span);
+#include "s_tritemp.h"
+
+
+
+/*
+ * Render a flat-shaded RGBA triangle.
+ */
+#define NAME flat_rgba_triangle
+#define INTERP_Z 1
+#define INTERP_FOG 1
+#define SETUP_CODE				\
+   ASSERT(ctx->Texture._EnabledCoordUnits == 0);\
+   ASSERT(ctx->Light.ShadeModel==GL_FLAT);	\
+   span.interpMask |= SPAN_RGBA;		\
+   span.red = ChanToFixed(v2->color[0]);	\
+   span.green = ChanToFixed(v2->color[1]);	\
+   span.blue = ChanToFixed(v2->color[2]);	\
+   span.alpha = ChanToFixed(v2->color[3]);	\
+   span.redStep = 0;				\
+   span.greenStep = 0;				\
+   span.blueStep = 0;				\
+   span.alphaStep = 0;
+#define RENDER_SPAN( span )  _swrast_write_rgba_span(ctx, &span);
+#include "s_tritemp.h"
+
+
+
+/*
+ * Render a smooth-shaded RGBA triangle.
+ */
+#define NAME smooth_rgba_triangle
+#define INTERP_Z 1
+#define INTERP_FOG 1
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define SETUP_CODE				\
+   {						\
+      /* texturing must be off */		\
+      ASSERT(ctx->Texture._EnabledCoordUnits == 0);	\
+      ASSERT(ctx->Light.ShadeModel==GL_SMOOTH);	\
+   }
+#define RENDER_SPAN( span )  _swrast_write_rgba_span(ctx, &span);
+#include "s_tritemp.h"
+
+
+
+/*
+ * Render an RGB, GL_DECAL, textured triangle.
+ * Interpolate S,T only w/out mipmapping or perspective correction.
+ *
+ * No fog.
+ */
+#define NAME simple_textured_triangle
+#define INTERP_INT_TEX 1
+#define S_SCALE twidth
+#define T_SCALE theight
+
+#define SETUP_CODE							\
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];\
+   struct gl_texture_object *obj = ctx->Texture.Unit[0].Current2D;	\
+   const GLint b = obj->BaseLevel;					\
+   const GLfloat twidth = (GLfloat) obj->Image[0][b]->Width;		\
+   const GLfloat theight = (GLfloat) obj->Image[0][b]->Height;		\
+   const GLint twidth_log2 = obj->Image[0][b]->WidthLog2;		\
+   const GLchan *texture = (const GLchan *) obj->Image[0][b]->Data;	\
+   const GLint smask = obj->Image[0][b]->Width - 1;			\
+   const GLint tmask = obj->Image[0][b]->Height - 1;			\
+   if (!texture) {							\
+      /* this shouldn't happen */					\
+      return;								\
+   }
+
+#define RENDER_SPAN( span )						\
+   GLuint i;								\
+   span.intTex[0] -= FIXED_HALF; /* off-by-one error? */		\
+   span.intTex[1] -= FIXED_HALF;					\
+   for (i = 0; i < span.end; i++) {					\
+      GLint s = FixedToInt(span.intTex[0]) & smask;			\
+      GLint t = FixedToInt(span.intTex[1]) & tmask;			\
+      GLint pos = (t << twidth_log2) + s;				\
+      pos = pos + pos + pos;  /* multiply by 3 */			\
+      span.array->rgb[i][RCOMP] = texture[pos];				\
+      span.array->rgb[i][GCOMP] = texture[pos+1];			\
+      span.array->rgb[i][BCOMP] = texture[pos+2];			\
+      span.intTex[0] += span.intTexStep[0];				\
+      span.intTex[1] += span.intTexStep[1];				\
+   }									\
+   rb->PutRowRGB(ctx, rb, span.end, span.x, span.y, span.array->rgb, NULL);
+
+#include "s_tritemp.h"
+
+
+
+/*
+ * Render an RGB, GL_DECAL, textured triangle.
+ * Interpolate S,T, GL_LESS depth test, w/out mipmapping or
+ * perspective correction.
+ * Depth buffer bits must be <= sizeof(DEFAULT_SOFTWARE_DEPTH_TYPE)
+ *
+ * No fog.
+ */
+#define NAME simple_z_textured_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_INT_TEX 1
+#define S_SCALE twidth
+#define T_SCALE theight
+
+#define SETUP_CODE							\
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0][0];\
+   struct gl_texture_object *obj = ctx->Texture.Unit[0].Current2D;	\
+   const GLint b = obj->BaseLevel;					\
+   const GLfloat twidth = (GLfloat) obj->Image[0][b]->Width;		\
+   const GLfloat theight = (GLfloat) obj->Image[0][b]->Height;		\
+   const GLint twidth_log2 = obj->Image[0][b]->WidthLog2;		\
+   const GLchan *texture = (const GLchan *) obj->Image[0][b]->Data;	\
+   const GLint smask = obj->Image[0][b]->Width - 1;			\
+   const GLint tmask = obj->Image[0][b]->Height - 1;			\
+   if (!texture) {							\
+      /* this shouldn't happen */					\
+      return;								\
+   }
+
+#define RENDER_SPAN( span )						\
+   GLuint i;				    				\
+   span.intTex[0] -= FIXED_HALF; /* off-by-one error? */		\
+   span.intTex[1] -= FIXED_HALF;					\
+   for (i = 0; i < span.end; i++) {					\
+      const GLuint z = FixedToDepth(span.z);				\
+      if (z < zRow[i]) {						\
+         GLint s = FixedToInt(span.intTex[0]) & smask;			\
+         GLint t = FixedToInt(span.intTex[1]) & tmask;			\
+         GLint pos = (t << twidth_log2) + s;				\
+         pos = pos + pos + pos;  /* multiply by 3 */			\
+         span.array->rgb[i][RCOMP] = texture[pos];			\
+         span.array->rgb[i][GCOMP] = texture[pos+1];			\
+         span.array->rgb[i][BCOMP] = texture[pos+2];			\
+         zRow[i] = z;							\
+         span.array->mask[i] = 1;					\
+      }									\
+      else {								\
+         span.array->mask[i] = 0;					\
+      }									\
+      span.intTex[0] += span.intTexStep[0];				\
+      span.intTex[1] += span.intTexStep[1];				\
+      span.z += span.zStep;						\
+   }									\
+   rb->PutRowRGB(ctx, rb, span.end, span.x, span.y,			\
+                 span.array->rgb, span.array->mask);
+
+#include "s_tritemp.h"
+
+
+
+#if CHAN_TYPE != GL_FLOAT
+
+struct affine_info
+{
+   GLenum filter;
+   GLenum format;
+   GLenum envmode;
+   GLint smask, tmask;
+   GLint twidth_log2;
+   const GLchan *texture;
+   GLfixed er, eg, eb, ea;
+   GLint tbytesline, tsize;
+};
+
+
+static INLINE GLint
+ilerp(GLint t, GLint a, GLint b)
+{
+   return a + ((t * (b - a)) >> FIXED_SHIFT);
+}
+
+static INLINE GLint
+ilerp_2d(GLint ia, GLint ib, GLint v00, GLint v10, GLint v01, GLint v11)
+{
+   const GLint temp0 = ilerp(ia, v00, v10);
+   const GLint temp1 = ilerp(ia, v01, v11);
+   return ilerp(ib, temp0, temp1);
+}
+
+
+/* This function can handle GL_NEAREST or GL_LINEAR sampling of 2D RGB or RGBA
+ * textures with GL_REPLACE, GL_MODULATE, GL_BLEND, GL_DECAL or GL_ADD
+ * texture env modes.
+ */
+static INLINE void
+affine_span(GLcontext *ctx, struct sw_span *span,
+            struct affine_info *info)
+{
+   GLchan sample[4];  /* the filtered texture sample */
+
+   /* Instead of defining a function for each mode, a test is done
+    * between the outer and inner loops. This is to reduce code size
+    * and complexity. Observe that an optimizing compiler kills
+    * unused variables (for instance tf,sf,ti,si in case of GL_NEAREST).
+    */
+
+#define NEAREST_RGB			\
+   sample[RCOMP] = tex00[RCOMP];	\
+   sample[GCOMP] = tex00[GCOMP];	\
+   sample[BCOMP] = tex00[BCOMP];	\
+   sample[ACOMP] = CHAN_MAX
+
+#define LINEAR_RGB							\
+   sample[RCOMP] = ilerp_2d(sf, tf, tex00[0], tex01[0], tex10[0], tex11[0]);\
+   sample[GCOMP] = ilerp_2d(sf, tf, tex00[1], tex01[1], tex10[1], tex11[1]);\
+   sample[BCOMP] = ilerp_2d(sf, tf, tex00[2], tex01[2], tex10[2], tex11[2]);\
+   sample[ACOMP] = CHAN_MAX;
+
+#define NEAREST_RGBA  COPY_CHAN4(sample, tex00)
+
+#define LINEAR_RGBA							\
+   sample[RCOMP] = ilerp_2d(sf, tf, tex00[0], tex01[0], tex10[0], tex11[0]);\
+   sample[GCOMP] = ilerp_2d(sf, tf, tex00[1], tex01[1], tex10[1], tex11[1]);\
+   sample[BCOMP] = ilerp_2d(sf, tf, tex00[2], tex01[2], tex10[2], tex11[2]);\
+   sample[ACOMP] = ilerp_2d(sf, tf, tex00[3], tex01[3], tex10[3], tex11[3])
+
+#define MODULATE							  \
+   dest[RCOMP] = span->red   * (sample[RCOMP] + 1u) >> (FIXED_SHIFT + 8); \
+   dest[GCOMP] = span->green * (sample[GCOMP] + 1u) >> (FIXED_SHIFT + 8); \
+   dest[BCOMP] = span->blue  * (sample[BCOMP] + 1u) >> (FIXED_SHIFT + 8); \
+   dest[ACOMP] = span->alpha * (sample[ACOMP] + 1u) >> (FIXED_SHIFT + 8)
+
+#define DECAL								\
+   dest[RCOMP] = ((CHAN_MAX - sample[ACOMP]) * span->red +		\
+               ((sample[ACOMP] + 1) * sample[RCOMP] << FIXED_SHIFT))	\
+               >> (FIXED_SHIFT + 8);					\
+   dest[GCOMP] = ((CHAN_MAX - sample[ACOMP]) * span->green +		\
+               ((sample[ACOMP] + 1) * sample[GCOMP] << FIXED_SHIFT))	\
+               >> (FIXED_SHIFT + 8);					\
+   dest[BCOMP] = ((CHAN_MAX - sample[ACOMP]) * span->blue +		\
+               ((sample[ACOMP] + 1) * sample[BCOMP] << FIXED_SHIFT))	\
+               >> (FIXED_SHIFT + 8);					\
+   dest[ACOMP] = FixedToInt(span->alpha)
+
+#define BLEND								\
+   dest[RCOMP] = ((CHAN_MAX - sample[RCOMP]) * span->red		\
+               + (sample[RCOMP] + 1) * info->er) >> (FIXED_SHIFT + 8);	\
+   dest[GCOMP] = ((CHAN_MAX - sample[GCOMP]) * span->green		\
+               + (sample[GCOMP] + 1) * info->eg) >> (FIXED_SHIFT + 8);	\
+   dest[BCOMP] = ((CHAN_MAX - sample[BCOMP]) * span->blue		\
+               + (sample[BCOMP] + 1) * info->eb) >> (FIXED_SHIFT + 8);	\
+   dest[ACOMP] = span->alpha * (sample[ACOMP] + 1) >> (FIXED_SHIFT + 8)
+
+#define REPLACE  COPY_CHAN4(dest, sample)
+
+#define ADD								\
+   {									\
+      GLint rSum = FixedToInt(span->red)   + (GLint) sample[RCOMP];	\
+      GLint gSum = FixedToInt(span->green) + (GLint) sample[GCOMP];	\
+      GLint bSum = FixedToInt(span->blue)  + (GLint) sample[BCOMP];	\
+      dest[RCOMP] = MIN2(rSum, CHAN_MAX);				\
+      dest[GCOMP] = MIN2(gSum, CHAN_MAX);				\
+      dest[BCOMP] = MIN2(bSum, CHAN_MAX);				\
+      dest[ACOMP] = span->alpha * (sample[ACOMP] + 1) >> (FIXED_SHIFT + 8); \
+  }
+
+/* shortcuts */
+
+#define NEAREST_RGB_REPLACE		\
+   NEAREST_RGB;				\
+   dest[0] = sample[0];			\
+   dest[1] = sample[1];			\
+   dest[2] = sample[2];			\
+   dest[3] = FixedToInt(span->alpha);
+
+#define NEAREST_RGBA_REPLACE  COPY_CHAN4(dest, tex00)
+
+#define SPAN_NEAREST(DO_TEX, COMPS)					\
+	for (i = 0; i < span->end; i++) {				\
+           /* Isn't it necessary to use FixedFloor below?? */		\
+           GLint s = FixedToInt(span->intTex[0]) & info->smask;		\
+           GLint t = FixedToInt(span->intTex[1]) & info->tmask;		\
+           GLint pos = (t << info->twidth_log2) + s;			\
+           const GLchan *tex00 = info->texture + COMPS * pos;		\
+           DO_TEX;							\
+           span->red += span->redStep;					\
+	   span->green += span->greenStep;				\
+           span->blue += span->blueStep;				\
+	   span->alpha += span->alphaStep;				\
+	   span->intTex[0] += span->intTexStep[0];			\
+	   span->intTex[1] += span->intTexStep[1];			\
+           dest += 4;							\
+	}
+
+#define SPAN_LINEAR(DO_TEX, COMPS)					\
+	for (i = 0; i < span->end; i++) {				\
+           /* Isn't it necessary to use FixedFloor below?? */		\
+           const GLint s = FixedToInt(span->intTex[0]) & info->smask;	\
+           const GLint t = FixedToInt(span->intTex[1]) & info->tmask;	\
+           const GLfixed sf = span->intTex[0] & FIXED_FRAC_MASK;	\
+           const GLfixed tf = span->intTex[1] & FIXED_FRAC_MASK;	\
+           const GLint pos = (t << info->twidth_log2) + s;		\
+           const GLchan *tex00 = info->texture + COMPS * pos;		\
+           const GLchan *tex10 = tex00 + info->tbytesline;		\
+           const GLchan *tex01 = tex00 + COMPS;				\
+           const GLchan *tex11 = tex10 + COMPS;				\
+           if (t == info->tmask) {					\
+              tex10 -= info->tsize;					\
+              tex11 -= info->tsize;					\
+           }								\
+           if (s == info->smask) {					\
+              tex01 -= info->tbytesline;				\
+              tex11 -= info->tbytesline;				\
+           }								\
+           DO_TEX;							\
+           span->red += span->redStep;					\
+	   span->green += span->greenStep;				\
+           span->blue += span->blueStep;				\
+	   span->alpha += span->alphaStep;				\
+	   span->intTex[0] += span->intTexStep[0];			\
+	   span->intTex[1] += span->intTexStep[1];			\
+           dest += 4;							\
+	}
+
+
+   GLuint i;
+   GLchan *dest = span->array->rgba[0];
+
+   span->intTex[0] -= FIXED_HALF;
+   span->intTex[1] -= FIXED_HALF;
+   switch (info->filter) {
+   case GL_NEAREST:
+      switch (info->format) {
+      case GL_RGB:
+         switch (info->envmode) {
+         case GL_MODULATE:
+            SPAN_NEAREST(NEAREST_RGB;MODULATE,3);
+            break;
+         case GL_DECAL:
+         case GL_REPLACE:
+            SPAN_NEAREST(NEAREST_RGB_REPLACE,3);
+            break;
+         case GL_BLEND:
+            SPAN_NEAREST(NEAREST_RGB;BLEND,3);
+            break;
+         case GL_ADD:
+            SPAN_NEAREST(NEAREST_RGB;ADD,3);
+            break;
+         default:
+            _mesa_problem(ctx, "bad tex env mode in SPAN_LINEAR");
+            return;
+         }
+         break;
+      case GL_RGBA:
+         switch(info->envmode) {
+         case GL_MODULATE:
+            SPAN_NEAREST(NEAREST_RGBA;MODULATE,4);
+            break;
+         case GL_DECAL:
+            SPAN_NEAREST(NEAREST_RGBA;DECAL,4);
+            break;
+         case GL_BLEND:
+            SPAN_NEAREST(NEAREST_RGBA;BLEND,4);
+            break;
+         case GL_ADD:
+            SPAN_NEAREST(NEAREST_RGBA;ADD,4);
+            break;
+         case GL_REPLACE:
+            SPAN_NEAREST(NEAREST_RGBA_REPLACE,4);
+            break;
+         default:
+            _mesa_problem(ctx, "bad tex env mode (2) in SPAN_LINEAR");
+            return;
+         }
+         break;
+      }
+      break;
+
+   case GL_LINEAR:
+      span->intTex[0] -= FIXED_HALF;
+      span->intTex[1] -= FIXED_HALF;
+      switch (info->format) {
+      case GL_RGB:
+         switch (info->envmode) {
+         case GL_MODULATE:
+            SPAN_LINEAR(LINEAR_RGB;MODULATE,3);
+            break;
+         case GL_DECAL:
+         case GL_REPLACE:
+            SPAN_LINEAR(LINEAR_RGB;REPLACE,3);
+            break;
+         case GL_BLEND:
+            SPAN_LINEAR(LINEAR_RGB;BLEND,3);
+            break;
+         case GL_ADD:
+            SPAN_LINEAR(LINEAR_RGB;ADD,3);
+            break;
+         default:
+            _mesa_problem(ctx, "bad tex env mode (3) in SPAN_LINEAR");
+            return;
+         }
+         break;
+      case GL_RGBA:
+         switch (info->envmode) {
+         case GL_MODULATE:
+            SPAN_LINEAR(LINEAR_RGBA;MODULATE,4);
+            break;
+         case GL_DECAL:
+            SPAN_LINEAR(LINEAR_RGBA;DECAL,4);
+            break;
+         case GL_BLEND:
+            SPAN_LINEAR(LINEAR_RGBA;BLEND,4);
+            break;
+         case GL_ADD:
+            SPAN_LINEAR(LINEAR_RGBA;ADD,4);
+            break;
+         case GL_REPLACE:
+            SPAN_LINEAR(LINEAR_RGBA;REPLACE,4);
+            break;
+         default:
+            _mesa_problem(ctx, "bad tex env mode (4) in SPAN_LINEAR");
+            return;
+         }
+         break;
+      }
+      break;
+   }
+   span->interpMask &= ~SPAN_RGBA;
+   ASSERT(span->arrayMask & SPAN_RGBA);
+   _swrast_write_rgba_span(ctx, span);
+
+#undef SPAN_NEAREST
+#undef SPAN_LINEAR
+}
+
+
+
+/*
+ * Render an RGB/RGBA textured triangle without perspective correction.
+ */
+#define NAME affine_textured_triangle
+#define INTERP_Z 1
+#define INTERP_FOG 1
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define INTERP_INT_TEX 1
+#define S_SCALE twidth
+#define T_SCALE theight
+
+#define SETUP_CODE							\
+   struct affine_info info;						\
+   struct gl_texture_unit *unit = ctx->Texture.Unit+0;			\
+   struct gl_texture_object *obj = unit->Current2D;			\
+   const GLint b = obj->BaseLevel;					\
+   const GLfloat twidth = (GLfloat) obj->Image[0][b]->Width;		\
+   const GLfloat theight = (GLfloat) obj->Image[0][b]->Height;		\
+   info.texture = (const GLchan *) obj->Image[0][b]->Data;		\
+   info.twidth_log2 = obj->Image[0][b]->WidthLog2;			\
+   info.smask = obj->Image[0][b]->Width - 1;				\
+   info.tmask = obj->Image[0][b]->Height - 1;				\
+   info.format = obj->Image[0][b]->_BaseFormat;				\
+   info.filter = obj->MinFilter;					\
+   info.envmode = unit->EnvMode;					\
+   span.arrayMask |= SPAN_RGBA;						\
+									\
+   if (info.envmode == GL_BLEND) {					\
+      /* potential off-by-one error here? (1.0f -> 2048 -> 0) */	\
+      info.er = FloatToFixed(unit->EnvColor[RCOMP] * CHAN_MAXF);	\
+      info.eg = FloatToFixed(unit->EnvColor[GCOMP] * CHAN_MAXF);	\
+      info.eb = FloatToFixed(unit->EnvColor[BCOMP] * CHAN_MAXF);	\
+      info.ea = FloatToFixed(unit->EnvColor[ACOMP] * CHAN_MAXF);	\
+   }									\
+   if (!info.texture) {							\
+      /* this shouldn't happen */					\
+      return;								\
+   }									\
+									\
+   switch (info.format) {						\
+   case GL_ALPHA:							\
+   case GL_LUMINANCE:							\
+   case GL_INTENSITY:							\
+      info.tbytesline = obj->Image[0][b]->Width;			\
+      break;								\
+   case GL_LUMINANCE_ALPHA:						\
+      info.tbytesline = obj->Image[0][b]->Width * 2;			\
+      break;								\
+   case GL_RGB:								\
+      info.tbytesline = obj->Image[0][b]->Width * 3;			\
+      break;								\
+   case GL_RGBA:							\
+      info.tbytesline = obj->Image[0][b]->Width * 4;			\
+      break;								\
+   default:								\
+      _mesa_problem(NULL, "Bad texture format in affine_texture_triangle");\
+      return;								\
+   }									\
+   info.tsize = obj->Image[0][b]->Height * info.tbytesline;
+
+#define RENDER_SPAN( span )   affine_span(ctx, &span, &info);
+
+#include "s_tritemp.h"
+
+
+
+struct persp_info
+{
+   GLenum filter;
+   GLenum format;
+   GLenum envmode;
+   GLint smask, tmask;
+   GLint twidth_log2;
+   const GLchan *texture;
+   GLfixed er, eg, eb, ea;   /* texture env color */
+   GLint tbytesline, tsize;
+};
+
+
+static INLINE void
+fast_persp_span(GLcontext *ctx, struct sw_span *span,
+		struct persp_info *info)
+{
+   GLchan sample[4];  /* the filtered texture sample */
+
+  /* Instead of defining a function for each mode, a test is done
+   * between the outer and inner loops. This is to reduce code size
+   * and complexity. Observe that an optimizing compiler kills
+   * unused variables (for instance tf,sf,ti,si in case of GL_NEAREST).
+   */
+#define SPAN_NEAREST(DO_TEX,COMP)					\
+	for (i = 0; i < span->end; i++) {				\
+           GLdouble invQ = tex_coord[2] ?				\
+                                 (1.0 / tex_coord[2]) : 1.0;            \
+           GLfloat s_tmp = (GLfloat) (tex_coord[0] * invQ);		\
+           GLfloat t_tmp = (GLfloat) (tex_coord[1] * invQ);		\
+           GLint s = IFLOOR(s_tmp) & info->smask;	        	\
+           GLint t = IFLOOR(t_tmp) & info->tmask;	        	\
+           GLint pos = (t << info->twidth_log2) + s;			\
+           const GLchan *tex00 = info->texture + COMP * pos;		\
+           DO_TEX;							\
+           span->red += span->redStep;					\
+	   span->green += span->greenStep;				\
+           span->blue += span->blueStep;				\
+	   span->alpha += span->alphaStep;				\
+	   tex_coord[0] += tex_step[0];					\
+	   tex_coord[1] += tex_step[1];					\
+	   tex_coord[2] += tex_step[2];					\
+           dest += 4;							\
+	}
+
+#define SPAN_LINEAR(DO_TEX,COMP)					\
+	for (i = 0; i < span->end; i++) {				\
+           GLdouble invQ = tex_coord[2] ?				\
+                                 (1.0 / tex_coord[2]) : 1.0;            \
+           const GLfloat s_tmp = (GLfloat) (tex_coord[0] * invQ);	\
+           const GLfloat t_tmp = (GLfloat) (tex_coord[1] * invQ);	\
+           const GLfixed s_fix = FloatToFixed(s_tmp) - FIXED_HALF;	\
+           const GLfixed t_fix = FloatToFixed(t_tmp) - FIXED_HALF;      \
+           const GLint s = FixedToInt(FixedFloor(s_fix)) & info->smask;	\
+           const GLint t = FixedToInt(FixedFloor(t_fix)) & info->tmask;	\
+           const GLfixed sf = s_fix & FIXED_FRAC_MASK;			\
+           const GLfixed tf = t_fix & FIXED_FRAC_MASK;			\
+           const GLint pos = (t << info->twidth_log2) + s;		\
+           const GLchan *tex00 = info->texture + COMP * pos;		\
+           const GLchan *tex10 = tex00 + info->tbytesline;		\
+           const GLchan *tex01 = tex00 + COMP;				\
+           const GLchan *tex11 = tex10 + COMP;				\
+           if (t == info->tmask) {					\
+              tex10 -= info->tsize;					\
+              tex11 -= info->tsize;					\
+           }								\
+           if (s == info->smask) {					\
+              tex01 -= info->tbytesline;				\
+              tex11 -= info->tbytesline;				\
+           }								\
+           DO_TEX;							\
+           span->red   += span->redStep;				\
+	   span->green += span->greenStep;				\
+           span->blue  += span->blueStep;				\
+	   span->alpha += span->alphaStep;				\
+	   tex_coord[0] += tex_step[0];					\
+	   tex_coord[1] += tex_step[1];					\
+	   tex_coord[2] += tex_step[2];					\
+           dest += 4;							\
+	}
+
+   GLuint i;
+   GLfloat tex_coord[3], tex_step[3];
+   GLchan *dest = span->array->rgba[0];
+
+   const GLuint savedTexEnable = ctx->Texture._EnabledUnits;
+   ctx->Texture._EnabledUnits = 0;
+
+   tex_coord[0] = span->tex[0][0]  * (info->smask + 1);
+   tex_step[0] = span->texStepX[0][0] * (info->smask + 1);
+   tex_coord[1] = span->tex[0][1] * (info->tmask + 1);
+   tex_step[1] = span->texStepX[0][1] * (info->tmask + 1);
+   /* span->tex[0][2] only if 3D-texturing, here only 2D */
+   tex_coord[2] = span->tex[0][3];
+   tex_step[2] = span->texStepX[0][3];
+
+   switch (info->filter) {
+   case GL_NEAREST:
+      switch (info->format) {
+      case GL_RGB:
+         switch (info->envmode) {
+         case GL_MODULATE:
+            SPAN_NEAREST(NEAREST_RGB;MODULATE,3);
+            break;
+         case GL_DECAL:
+         case GL_REPLACE:
+            SPAN_NEAREST(NEAREST_RGB_REPLACE,3);
+            break;
+         case GL_BLEND:
+            SPAN_NEAREST(NEAREST_RGB;BLEND,3);
+            break;
+         case GL_ADD:
+            SPAN_NEAREST(NEAREST_RGB;ADD,3);
+            break;
+         default:
+            _mesa_problem(ctx, "bad tex env mode (5) in SPAN_LINEAR");
+            return;
+         }
+         break;
+      case GL_RGBA:
+         switch(info->envmode) {
+         case GL_MODULATE:
+            SPAN_NEAREST(NEAREST_RGBA;MODULATE,4);
+            break;
+         case GL_DECAL:
+            SPAN_NEAREST(NEAREST_RGBA;DECAL,4);
+            break;
+         case GL_BLEND:
+            SPAN_NEAREST(NEAREST_RGBA;BLEND,4);
+            break;
+         case GL_ADD:
+            SPAN_NEAREST(NEAREST_RGBA;ADD,4);
+            break;
+         case GL_REPLACE:
+            SPAN_NEAREST(NEAREST_RGBA_REPLACE,4);
+            break;
+         default:
+            _mesa_problem(ctx, "bad tex env mode (6) in SPAN_LINEAR");
+            return;
+         }
+         break;
+      }
+      break;
+
+   case GL_LINEAR:
+      switch (info->format) {
+      case GL_RGB:
+         switch (info->envmode) {
+         case GL_MODULATE:
+            SPAN_LINEAR(LINEAR_RGB;MODULATE,3);
+            break;
+         case GL_DECAL:
+         case GL_REPLACE:
+            SPAN_LINEAR(LINEAR_RGB;REPLACE,3);
+            break;
+         case GL_BLEND:
+            SPAN_LINEAR(LINEAR_RGB;BLEND,3);
+            break;
+         case GL_ADD:
+            SPAN_LINEAR(LINEAR_RGB;ADD,3);
+            break;
+         default:
+            _mesa_problem(ctx, "bad tex env mode (7) in SPAN_LINEAR");
+            return;
+         }
+         break;
+      case GL_RGBA:
+         switch (info->envmode) {
+         case GL_MODULATE:
+            SPAN_LINEAR(LINEAR_RGBA;MODULATE,4);
+            break;
+         case GL_DECAL:
+            SPAN_LINEAR(LINEAR_RGBA;DECAL,4);
+            break;
+         case GL_BLEND:
+            SPAN_LINEAR(LINEAR_RGBA;BLEND,4);
+            break;
+         case GL_ADD:
+            SPAN_LINEAR(LINEAR_RGBA;ADD,4);
+            break;
+         case GL_REPLACE:
+            SPAN_LINEAR(LINEAR_RGBA;REPLACE,4);
+            break;
+         default:
+            _mesa_problem(ctx, "bad tex env mode (8) in SPAN_LINEAR");
+            return;
+         }
+         break;
+      }
+      break;
+   }
+   
+   ASSERT(span->arrayMask & SPAN_RGBA);
+   _swrast_write_rgba_span(ctx, span);
+
+#undef SPAN_NEAREST
+#undef SPAN_LINEAR
+
+   /* restore state */
+   ctx->Texture._EnabledUnits = savedTexEnable;
+}
+
+
+/*
+ * Render an perspective corrected RGB/RGBA textured triangle.
+ * The Q (aka V in Mesa) coordinate must be zero such that the divide
+ * by interpolated Q/W comes out right.
+ *
+ */
+#define NAME persp_textured_triangle
+#define INTERP_Z 1
+#define INTERP_W 1
+#define INTERP_FOG 1
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define INTERP_TEX 1
+
+#define SETUP_CODE							\
+   struct persp_info info;						\
+   const struct gl_texture_unit *unit = ctx->Texture.Unit+0;		\
+   const struct gl_texture_object *obj = unit->Current2D;		\
+   const GLint b = obj->BaseLevel;					\
+   info.texture = (const GLchan *) obj->Image[0][b]->Data;		\
+   info.twidth_log2 = obj->Image[0][b]->WidthLog2;			\
+   info.smask = obj->Image[0][b]->Width - 1;				\
+   info.tmask = obj->Image[0][b]->Height - 1;				\
+   info.format = obj->Image[0][b]->_BaseFormat;				\
+   info.filter = obj->MinFilter;					\
+   info.envmode = unit->EnvMode;					\
+									\
+   if (info.envmode == GL_BLEND) {					\
+      /* potential off-by-one error here? (1.0f -> 2048 -> 0) */	\
+      info.er = FloatToFixed(unit->EnvColor[RCOMP] * CHAN_MAXF);	\
+      info.eg = FloatToFixed(unit->EnvColor[GCOMP] * CHAN_MAXF);	\
+      info.eb = FloatToFixed(unit->EnvColor[BCOMP] * CHAN_MAXF);	\
+      info.ea = FloatToFixed(unit->EnvColor[ACOMP] * CHAN_MAXF);	\
+   }									\
+   if (!info.texture) {							\
+      /* this shouldn't happen */					\
+      return;								\
+   }									\
+									\
+   switch (info.format) {						\
+   case GL_ALPHA:							\
+   case GL_LUMINANCE:							\
+   case GL_INTENSITY:							\
+      info.tbytesline = obj->Image[0][b]->Width;			\
+      break;								\
+   case GL_LUMINANCE_ALPHA:						\
+      info.tbytesline = obj->Image[0][b]->Width * 2;			\
+      break;								\
+   case GL_RGB:								\
+      info.tbytesline = obj->Image[0][b]->Width * 3;			\
+      break;								\
+   case GL_RGBA:							\
+      info.tbytesline = obj->Image[0][b]->Width * 4;			\
+      break;								\
+   default:								\
+      _mesa_problem(NULL, "Bad texture format in persp_textured_triangle");\
+      return;								\
+   }									\
+   info.tsize = obj->Image[0][b]->Height * info.tbytesline;
+
+#define RENDER_SPAN( span )			\
+   span.interpMask &= ~SPAN_RGBA;		\
+   span.arrayMask |= SPAN_RGBA;			\
+   fast_persp_span(ctx, &span, &info);
+
+#include "s_tritemp.h"
+
+
+#endif /* CHAN_BITS != GL_FLOAT */
+
+                
+
+
+/*
+ * Render a smooth-shaded, textured, RGBA triangle.
+ * Interpolate S,T,R with perspective correction, w/out mipmapping.
+ */
+#define NAME general_textured_triangle
+#define INTERP_Z 1
+#define INTERP_W 1
+#define INTERP_FOG 1
+#define INTERP_RGB 1
+#define INTERP_SPEC 1
+#define INTERP_ALPHA 1
+#define INTERP_TEX 1
+#define RENDER_SPAN( span )   _swrast_write_rgba_span(ctx, &span);
+#include "s_tritemp.h"
+
+
+
+/*
+ * This is the big one!
+ * Interpolate Z, RGB, Alpha, specular, fog, N sets of texture coordinates, and varying floats.
+ * Yup, it's slow.
+ */
+#define NAME multitextured_triangle
+#define INTERP_Z 1
+#define INTERP_W 1
+#define INTERP_FOG 1
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define INTERP_SPEC 1
+#define INTERP_MULTITEX 1
+#define INTERP_VARYING 1
+#define RENDER_SPAN( span )   _swrast_write_rgba_span(ctx, &span);
+#include "s_tritemp.h"
+
+
+
+/*
+ * Special tri function for occlusion testing
+ */
+#define NAME occlusion_zless_triangle
+#define INTERP_Z 1
+#define SETUP_CODE							\
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_DepthBuffer;		\
+   struct gl_query_object *q = ctx->Query.CurrentOcclusionObject;	\
+   ASSERT(ctx->Depth.Test);						\
+   ASSERT(!ctx->Depth.Mask);						\
+   ASSERT(ctx->Depth.Func == GL_LESS);					\
+   if (!q) {								\
+      return;								\
+   }
+#define RENDER_SPAN( span )						\
+   if (rb->DepthBits <= 16) {						\
+      GLuint i;								\
+      const GLushort *zRow = (const GLushort *)				\
+         rb->GetPointer(ctx, rb, span.x, span.y);			\
+      for (i = 0; i < span.end; i++) {					\
+         GLuint z = FixedToDepth(span.z);				\
+         if (z < zRow[i]) {						\
+            q->Result++;						\
+         }								\
+         span.z += span.zStep;						\
+      }									\
+   }									\
+   else {								\
+      GLuint i;								\
+      const GLuint *zRow = (const GLuint *)				\
+         rb->GetPointer(ctx, rb, span.x, span.y);			\
+      for (i = 0; i < span.end; i++) {					\
+         if ((GLuint)span.z < zRow[i]) {				\
+            q->Result++;						\
+         }								\
+         span.z += span.zStep;						\
+      }									\
+   }
+#include "s_tritemp.h"
+
+
+
+static void
+nodraw_triangle( GLcontext *ctx,
+                 const SWvertex *v0,
+                 const SWvertex *v1,
+                 const SWvertex *v2 )
+{
+   (void) (ctx && v0 && v1 && v2);
+}
+
+
+/*
+ * This is used when separate specular color is enabled, but not
+ * texturing.  We add the specular color to the primary color,
+ * draw the triangle, then restore the original primary color.
+ * Inefficient, but seldom needed.
+ */
+void _swrast_add_spec_terms_triangle( GLcontext *ctx,
+				      const SWvertex *v0,
+				      const SWvertex *v1,
+				      const SWvertex *v2 )
+{
+   SWvertex *ncv0 = (SWvertex *)v0; /* drop const qualifier */
+   SWvertex *ncv1 = (SWvertex *)v1;
+   SWvertex *ncv2 = (SWvertex *)v2;
+#if CHAN_TYPE == GL_FLOAT
+   GLfloat rSum, gSum, bSum;
+#else
+   GLint rSum, gSum, bSum;
+#endif
+   GLchan c[3][4];
+   /* save original colors */
+   COPY_CHAN4( c[0], ncv0->color );
+   COPY_CHAN4( c[1], ncv1->color );
+   COPY_CHAN4( c[2], ncv2->color );
+   /* sum v0 */
+   rSum = ncv0->color[0] + ncv0->specular[0];
+   gSum = ncv0->color[1] + ncv0->specular[1];
+   bSum = ncv0->color[2] + ncv0->specular[2];
+   ncv0->color[0] = MIN2(rSum, CHAN_MAX);
+   ncv0->color[1] = MIN2(gSum, CHAN_MAX);
+   ncv0->color[2] = MIN2(bSum, CHAN_MAX);
+   /* sum v1 */
+   rSum = ncv1->color[0] + ncv1->specular[0];
+   gSum = ncv1->color[1] + ncv1->specular[1];
+   bSum = ncv1->color[2] + ncv1->specular[2];
+   ncv1->color[0] = MIN2(rSum, CHAN_MAX);
+   ncv1->color[1] = MIN2(gSum, CHAN_MAX);
+   ncv1->color[2] = MIN2(bSum, CHAN_MAX);
+   /* sum v2 */
+   rSum = ncv2->color[0] + ncv2->specular[0];
+   gSum = ncv2->color[1] + ncv2->specular[1];
+   bSum = ncv2->color[2] + ncv2->specular[2];
+   ncv2->color[0] = MIN2(rSum, CHAN_MAX);
+   ncv2->color[1] = MIN2(gSum, CHAN_MAX);
+   ncv2->color[2] = MIN2(bSum, CHAN_MAX);
+   /* draw */
+   SWRAST_CONTEXT(ctx)->SpecTriangle( ctx, ncv0, ncv1, ncv2 );
+   /* restore original colors */
+   COPY_CHAN4( ncv0->color, c[0] );
+   COPY_CHAN4( ncv1->color, c[1] );
+   COPY_CHAN4( ncv2->color, c[2] );
+}
+
+
+
+#ifdef DEBUG
+
+/* record the current triangle function name */
+const char *_mesa_triFuncName = NULL;
+
+#define USE(triFunc)				\
+do {						\
+    _mesa_triFuncName = #triFunc;		\
+    /*printf("%s\n", _mesa_triFuncName);*/	\
+    swrast->Triangle = triFunc;			\
+} while (0)
+
+#else
+
+#define USE(triFunc)  swrast->Triangle = triFunc;
+
+#endif
+
+
+
+
+/*
+ * Determine which triangle rendering function to use given the current
+ * rendering context.
+ *
+ * Please update the summary flag _SWRAST_NEW_TRIANGLE if you add or
+ * remove tests to this code.
+ */
+void
+_swrast_choose_triangle( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const GLboolean rgbmode = ctx->Visual.rgbMode;
+
+   if (ctx->Polygon.CullFlag &&
+       ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) {
+      USE(nodraw_triangle);
+      return;
+   }
+
+   if (ctx->RenderMode==GL_RENDER) {
+
+      if (ctx->Polygon.SmoothFlag) {
+         _swrast_set_aa_triangle_function(ctx);
+         ASSERT(swrast->Triangle);
+         return;
+      }
+
+      /* special case for occlusion testing */
+      if (ctx->Query.CurrentOcclusionObject &&
+          ctx->Depth.Test &&
+          ctx->Depth.Mask == GL_FALSE &&
+          ctx->Depth.Func == GL_LESS &&
+          !ctx->Stencil.Enabled) {
+         if ((rgbmode &&
+              ctx->Color.ColorMask[0] == 0 &&
+              ctx->Color.ColorMask[1] == 0 &&
+              ctx->Color.ColorMask[2] == 0 &&
+              ctx->Color.ColorMask[3] == 0)
+             ||
+             (!rgbmode && ctx->Color.IndexMask == 0)) {
+            USE(occlusion_zless_triangle);
+            return;
+         }
+      }
+
+      if (ctx->Texture._EnabledCoordUnits || ctx->FragmentProgram._Active ||
+          ctx->ATIFragmentShader._Enabled || ctx->ShaderObjects._FragmentShaderPresent) {
+         /* Ugh, we do a _lot_ of tests to pick the best textured tri func */
+         const struct gl_texture_object *texObj2D;
+         const struct gl_texture_image *texImg;
+         GLenum minFilter, magFilter, envMode;
+         GLint format;
+         texObj2D = ctx->Texture.Unit[0].Current2D;
+         texImg = texObj2D ? texObj2D->Image[0][texObj2D->BaseLevel] : NULL;
+         format = texImg ? texImg->TexFormat->MesaFormat : -1;
+         minFilter = texObj2D ? texObj2D->MinFilter : (GLenum) 0;
+         magFilter = texObj2D ? texObj2D->MagFilter : (GLenum) 0;
+         envMode = ctx->Texture.Unit[0].EnvMode;
+
+         /* First see if we can use an optimized 2-D texture function */
+         if (ctx->Texture._EnabledCoordUnits == 0x1
+             && !ctx->FragmentProgram._Active
+             && !ctx->ATIFragmentShader._Enabled
+             && !ctx->ShaderObjects._FragmentShaderPresent
+             && ctx->Texture.Unit[0]._ReallyEnabled == TEXTURE_2D_BIT
+             && texObj2D->WrapS == GL_REPEAT
+             && texObj2D->WrapT == GL_REPEAT
+             && texImg->_IsPowerOfTwo
+             && texImg->Border == 0
+             && texImg->Width == texImg->RowStride
+             && (format == MESA_FORMAT_RGB || format == MESA_FORMAT_RGBA)
+             && minFilter == magFilter
+             && ctx->Light.Model.ColorControl == GL_SINGLE_COLOR
+             && ctx->Texture.Unit[0].EnvMode != GL_COMBINE_EXT) {
+	    if (ctx->Hint.PerspectiveCorrection==GL_FASTEST) {
+	       if (minFilter == GL_NEAREST
+		   && format == MESA_FORMAT_RGB
+		   && (envMode == GL_REPLACE || envMode == GL_DECAL)
+		   && ((swrast->_RasterMask == (DEPTH_BIT | TEXTURE_BIT)
+			&& ctx->Depth.Func == GL_LESS
+			&& ctx->Depth.Mask == GL_TRUE)
+		       || swrast->_RasterMask == TEXTURE_BIT)
+		   && ctx->Polygon.StippleFlag == GL_FALSE
+                   && ctx->DrawBuffer->Visual.depthBits <= 16) {
+		  if (swrast->_RasterMask == (DEPTH_BIT | TEXTURE_BIT)) {
+		     USE(simple_z_textured_triangle);
+		  }
+		  else {
+		     USE(simple_textured_triangle);
+		  }
+	       }
+	       else {
+#if (CHAN_BITS == 16 || CHAN_BITS == 32)
+                  USE(general_textured_triangle);
+#else
+                  USE(affine_textured_triangle);
+#endif
+	       }
+	    }
+	    else {
+#if (CHAN_BITS == 16 || CHAN_BITS == 32)
+               USE(general_textured_triangle);
+#else
+               USE(persp_textured_triangle);
+#endif
+	    }
+	 }
+         else {
+            /* general case textured triangles */
+            if (ctx->Texture._EnabledCoordUnits > 1) {
+               USE(multitextured_triangle);
+            }
+            else {
+               USE(general_textured_triangle);
+            }
+         }
+      }
+      else {
+         ASSERT(!ctx->Texture._EnabledCoordUnits);
+	 if (ctx->Light.ShadeModel==GL_SMOOTH) {
+	    /* smooth shaded, no texturing, stippled or some raster ops */
+            if (rgbmode) {
+	       USE(smooth_rgba_triangle);
+            }
+            else {
+               USE(smooth_ci_triangle);
+            }
+	 }
+	 else {
+	    /* flat shaded, no texturing, stippled or some raster ops */
+            if (rgbmode) {
+	       USE(flat_rgba_triangle);
+            }
+            else {
+               USE(flat_ci_triangle);
+            }
+	 }
+      }
+   }
+   else if (ctx->RenderMode==GL_FEEDBACK) {
+      USE(_swrast_feedback_triangle);
+   }
+   else {
+      /* GL_SELECT mode */
+      USE(_swrast_select_triangle);
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_triangle.h b/dist/Mesa/src/mesa/swrast/s_triangle.h
new file mode 100644
index 000000000..0de812500
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_triangle.h
@@ -0,0 +1,51 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  5.1
+ *
+ * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_TRIANGLES_H
+#define S_TRIANGLES_H
+
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern GLboolean
+_swrast_culltriangle( GLcontext *ctx,
+                     const SWvertex *v0,
+                     const SWvertex *v1,
+                     const SWvertex *v2);
+
+extern void
+_swrast_choose_triangle( GLcontext *ctx );
+
+extern void
+_swrast_add_spec_terms_triangle( GLcontext *ctx,
+				 const SWvertex *v0,
+				 const SWvertex *v1,
+				 const SWvertex *v2 );
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/s_trispan.h b/dist/Mesa/src/mesa/swrast/s_trispan.h
new file mode 100644
index 000000000..15207e863
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_trispan.h
@@ -0,0 +1,31 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef S_TRISPAN_H
+#define S_TRISPAN_H
+
+
+#endif /* S_TRISPAN_H */
diff --git a/dist/Mesa/src/mesa/swrast/s_tritemp.h b/dist/Mesa/src/mesa/swrast/s_tritemp.h
new file mode 100644
index 000000000..29a7a94da
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_tritemp.h
@@ -0,0 +1,1377 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Triangle Rasterizer Template
+ *
+ * This file is #include'd to generate custom triangle rasterizers.
+ *
+ * The following macros may be defined to indicate what auxillary information
+ * must be interpolated across the triangle:
+ *    INTERP_Z        - if defined, interpolate vertex Z values
+ *    INTERP_W        - if defined, interpolate vertex W values
+ *    INTERP_FOG      - if defined, interpolate fog values
+ *    INTERP_RGB      - if defined, interpolate RGB values
+ *    INTERP_ALPHA    - if defined, interpolate Alpha values (req's INTERP_RGB)
+ *    INTERP_SPEC     - if defined, interpolate specular RGB values
+ *    INTERP_INDEX    - if defined, interpolate color index values
+ *    INTERP_INT_TEX  - if defined, interpolate integer ST texcoords
+ *                         (fast, simple 2-D texture mapping)
+ *    INTERP_TEX      - if defined, interpolate set 0 float STRQ texcoords
+ *                         NOTE:  OpenGL STRQ = Mesa STUV (R was taken for red)
+ *    INTERP_MULTITEX - if defined, interpolate N units of STRQ texcoords
+ *    INTERP_VARYING  - if defined, interpolate M floats of GLSL varyings
+ *
+ * When one can directly address pixels in the color buffer the following
+ * macros can be defined and used to compute pixel addresses during
+ * rasterization (see pRow):
+ *    PIXEL_TYPE          - the datatype of a pixel (GLubyte, GLushort, GLuint)
+ *    BYTES_PER_ROW       - number of bytes per row in the color buffer
+ *    PIXEL_ADDRESS(X,Y)  - returns the address of pixel at (X,Y) where
+ *                          Y==0 at bottom of screen and increases upward.
+ *
+ * Similarly, for direct depth buffer access, this type is used for depth
+ * buffer addressing:
+ *    DEPTH_TYPE          - either GLushort or GLuint
+ *
+ * Optionally, one may provide one-time setup code per triangle:
+ *    SETUP_CODE    - code which is to be executed once per triangle
+ *    CLEANUP_CODE    - code to execute at end of triangle
+ *
+ * The following macro MUST be defined:
+ *    RENDER_SPAN(span) - code to write a span of pixels.
+ *
+ * This code was designed for the origin to be in the lower-left corner.
+ *
+ * Inspired by triangle rasterizer code written by Allen Akin.  Thanks Allen!
+ *
+ *
+ * Some notes on rasterization accuracy:
+ *
+ * This code uses fixed point arithmetic (the GLfixed type) to iterate
+ * over the triangle edges and interpolate ancillary data (such as Z,
+ * color, secondary color, etc).  The number of fractional bits in
+ * GLfixed and the value of SUB_PIXEL_BITS has a direct bearing on the
+ * accuracy of rasterization.
+ *
+ * If SUB_PIXEL_BITS=4 then we'll snap the vertices to the nearest
+ * 1/16 of a pixel.  If we're walking up a long, nearly vertical edge
+ * (dx=1/16, dy=1024) we'll need 4 + 10 = 14 fractional bits in
+ * GLfixed to walk the edge without error.  If the maximum viewport
+ * height is 4K pixels, then we'll need 4 + 12 = 16 fractional bits.
+ *
+ * Historically, Mesa has used 11 fractional bits in GLfixed, snaps
+ * vertices to 1/16 pixel and allowed a maximum viewport height of 2K
+ * pixels.  11 fractional bits is actually insufficient for accurately
+ * rasterizing some triangles.  More recently, the maximum viewport
+ * height was increased to 4K pixels.  Thus, Mesa should be using 16
+ * fractional bits in GLfixed.  Unfortunately, there may be some issues
+ * with setting FIXED_FRAC_BITS=16, such as multiplication overflow.
+ * This will have to be examined in some detail...
+ *
+ * For now, if you find rasterization errors, particularly with tall,
+ * sliver triangles, try increasing FIXED_FRAC_BITS and/or decreasing
+ * SUB_PIXEL_BITS.
+ */
+
+/*
+ * ColorTemp is used for intermediate color values.
+ */
+#if CHAN_TYPE == GL_FLOAT
+#define ColorTemp GLfloat
+#else
+#define ColorTemp GLint  /* same as GLfixed */
+#endif
+
+
+/*
+ * Walk triangle edges with GLfixed or GLdouble
+ */
+#if TRIANGLE_WALK_DOUBLE
+#define GLinterp        GLdouble
+#define InterpToInt(X)  ((GLint) (X))
+#define INTERP_ONE      1.0
+#else
+#define GLinterp        GLfixed
+#define InterpToInt(X)  FixedToInt(X)
+#define INTERP_ONE      FIXED_ONE
+#endif
+
+
+/*
+ * Either loop over all texture units, or just use unit zero.
+ */
+#ifdef INTERP_MULTITEX
+#define TEX_UNIT_LOOP(CODE)					\
+   {								\
+      GLuint u;							\
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {	\
+         if (ctx->Texture._EnabledCoordUnits & (1 << u)) {	\
+            CODE						\
+         }							\
+      }								\
+   }
+#define INTERP_TEX
+#elif defined(INTERP_TEX)
+#define TEX_UNIT_LOOP(CODE)					\
+   {								\
+      const GLuint u = 0;					\
+      CODE							\
+   }
+#endif
+
+
+
+#ifdef INTERP_VARYING
+#define VARYING_LOOP(CODE)\
+   {\
+      GLuint iv, ic;\
+      for (iv = 0; iv < MAX_VARYING_VECTORS; iv++) {\
+         for (ic = 0; ic < VARYINGS_PER_VECTOR; ic++) {\
+            CODE\
+         }\
+      }\
+   }
+#endif
+
+
+
+/*
+ * Some code we unfortunately need to prevent negative interpolated colors.
+ */
+#ifndef CLAMP_INTERPOLANT
+#define CLAMP_INTERPOLANT(CHANNEL, CHANNELSTEP, LEN)		\
+do {								\
+   GLfixed endVal = span.CHANNEL + (LEN) * span.CHANNELSTEP;	\
+   if (endVal < 0) {						\
+      span.CHANNEL -= endVal;					\
+   }								\
+   if (span.CHANNEL < 0) {					\
+      span.CHANNEL = 0;						\
+   }								\
+} while (0)
+#endif
+
+
+static void NAME(GLcontext *ctx, const SWvertex *v0,
+                                 const SWvertex *v1,
+                                 const SWvertex *v2 )
+{
+   typedef struct {
+      const SWvertex *v0, *v1;   /* Y(v0) < Y(v1) */
+#if TRIANGLE_WALK_DOUBLE
+      GLdouble dx;	/* X(v1) - X(v0) */
+      GLdouble dy;	/* Y(v1) - Y(v0) */
+      GLdouble dxdy;	/* dx/dy */
+      GLdouble adjy;	/* adjust from v[0]->fy to fsy, scaled */
+      GLdouble fsx;	/* first sample point x coord */
+      GLdouble fsy;
+      GLdouble fx0;	/*X of lower endpoint */
+#else
+      GLfloat dx;	/* X(v1) - X(v0) */
+      GLfloat dy;	/* Y(v1) - Y(v0) */
+      GLfloat dxdy;	/* dx/dy */
+      GLfixed fdxdy;	/* dx/dy in fixed-point */
+      GLfloat adjy;	/* adjust from v[0]->fy to fsy, scaled */
+      GLfixed fsx;	/* first sample point x coord */
+      GLfixed fsy;
+      GLfixed fx0;	/* fixed pt X of lower endpoint */
+#endif
+      GLint lines;	/* number of lines to be sampled on this edge */
+   } EdgeT;
+
+#ifdef INTERP_Z
+   const GLint depthBits = ctx->DrawBuffer->Visual.depthBits;
+   const GLint fixedToDepthShift = depthBits <= 16 ? FIXED_SHIFT : 0;
+   const GLfloat maxDepth = ctx->DrawBuffer->_DepthMaxF;
+#define FixedToDepth(F)  ((F) >> fixedToDepthShift)
+#endif
+   EdgeT eMaj, eTop, eBot;
+   GLfloat oneOverArea;
+   const SWvertex *vMin, *vMid, *vMax;  /* Y(vMin)<=Y(vMid)<=Y(vMax) */
+   GLfloat bf = SWRAST_CONTEXT(ctx)->_BackfaceSign;
+#if !TRIANGLE_WALK_DOUBLE
+   const GLint snapMask = ~((FIXED_ONE / (1 << SUB_PIXEL_BITS)) - 1); /* for x/y coord snapping */
+#endif
+   GLinterp vMin_fx, vMin_fy, vMid_fx, vMid_fy, vMax_fx, vMax_fy;
+
+   struct sw_span span;
+
+   INIT_SPAN(span, GL_POLYGON, 0, 0, 0);
+
+#ifdef INTERP_Z
+   (void) fixedToDepthShift;
+#endif
+
+   /*
+   printf("%s()\n", __FUNCTION__);
+   printf("  %g, %g, %g\n", v0->win[0], v0->win[1], v0->win[2]);
+   printf("  %g, %g, %g\n", v1->win[0], v1->win[1], v1->win[2]);
+   printf("  %g, %g, %g\n", v2->win[0], v2->win[1], v2->win[2]);
+   */
+   /*
+   ASSERT(v0->win[2] >= 0.0);
+   ASSERT(v1->win[2] >= 0.0);
+   ASSERT(v2->win[2] >= 0.0);
+   */
+   /* Compute fixed point x,y coords w/ half-pixel offsets and snapping.
+    * And find the order of the 3 vertices along the Y axis.
+    */
+   {
+#if TRIANGLE_WALK_DOUBLE
+      const GLdouble fy0 = v0->win[1] - 0.5;
+      const GLdouble fy1 = v1->win[1] - 0.5;
+      const GLdouble fy2 = v2->win[1] - 0.5;
+#else
+      const GLfixed fy0 = FloatToFixed(v0->win[1] - 0.5F) & snapMask;
+      const GLfixed fy1 = FloatToFixed(v1->win[1] - 0.5F) & snapMask;
+      const GLfixed fy2 = FloatToFixed(v2->win[1] - 0.5F) & snapMask;
+#endif
+      if (fy0 <= fy1) {
+         if (fy1 <= fy2) {
+            /* y0 <= y1 <= y2 */
+            vMin = v0;   vMid = v1;   vMax = v2;
+            vMin_fy = fy0;  vMid_fy = fy1;  vMax_fy = fy2;
+         }
+         else if (fy2 <= fy0) {
+            /* y2 <= y0 <= y1 */
+            vMin = v2;   vMid = v0;   vMax = v1;
+            vMin_fy = fy2;  vMid_fy = fy0;  vMax_fy = fy1;
+         }
+         else {
+            /* y0 <= y2 <= y1 */
+            vMin = v0;   vMid = v2;   vMax = v1;
+            vMin_fy = fy0;  vMid_fy = fy2;  vMax_fy = fy1;
+            bf = -bf;
+         }
+      }
+      else {
+         if (fy0 <= fy2) {
+            /* y1 <= y0 <= y2 */
+            vMin = v1;   vMid = v0;   vMax = v2;
+            vMin_fy = fy1;  vMid_fy = fy0;  vMax_fy = fy2;
+            bf = -bf;
+         }
+         else if (fy2 <= fy1) {
+            /* y2 <= y1 <= y0 */
+            vMin = v2;   vMid = v1;   vMax = v0;
+            vMin_fy = fy2;  vMid_fy = fy1;  vMax_fy = fy0;
+            bf = -bf;
+         }
+         else {
+            /* y1 <= y2 <= y0 */
+            vMin = v1;   vMid = v2;   vMax = v0;
+            vMin_fy = fy1;  vMid_fy = fy2;  vMax_fy = fy0;
+         }
+      }
+
+      /* fixed point X coords */
+#if TRIANGLE_WALK_DOUBLE
+      vMin_fx = vMin->win[0] + 0.5;
+      vMid_fx = vMid->win[0] + 0.5;
+      vMax_fx = vMax->win[0] + 0.5;
+#else
+      vMin_fx = FloatToFixed(vMin->win[0] + 0.5F) & snapMask;
+      vMid_fx = FloatToFixed(vMid->win[0] + 0.5F) & snapMask;
+      vMax_fx = FloatToFixed(vMax->win[0] + 0.5F) & snapMask;
+#endif
+   }
+
+   /* vertex/edge relationship */
+   eMaj.v0 = vMin;   eMaj.v1 = vMax;   /*TODO: .v1's not needed */
+   eTop.v0 = vMid;   eTop.v1 = vMax;
+   eBot.v0 = vMin;   eBot.v1 = vMid;
+
+   /* compute deltas for each edge:  vertex[upper] - vertex[lower] */
+#if TRIANGLE_WALK_DOUBLE
+   eMaj.dx = vMax_fx - vMin_fx;
+   eMaj.dy = vMax_fy - vMin_fy;
+   eTop.dx = vMax_fx - vMid_fx;
+   eTop.dy = vMax_fy - vMid_fy;
+   eBot.dx = vMid_fx - vMin_fx;
+   eBot.dy = vMid_fy - vMin_fy;
+#else
+   eMaj.dx = FixedToFloat(vMax_fx - vMin_fx);
+   eMaj.dy = FixedToFloat(vMax_fy - vMin_fy);
+   eTop.dx = FixedToFloat(vMax_fx - vMid_fx);
+   eTop.dy = FixedToFloat(vMax_fy - vMid_fy);
+   eBot.dx = FixedToFloat(vMid_fx - vMin_fx);
+   eBot.dy = FixedToFloat(vMid_fy - vMin_fy);
+#endif
+
+   /* compute area, oneOverArea and perform backface culling */
+   {
+#if TRIANGLE_WALK_DOUBLE
+      const GLdouble area = eMaj.dx * eBot.dy - eBot.dx * eMaj.dy;
+#else
+      const GLfloat area = eMaj.dx * eBot.dy - eBot.dx * eMaj.dy;
+#endif
+      /* Do backface culling */
+      if (area * bf < 0.0)
+         return;
+
+      if (IS_INF_OR_NAN(area) || area == 0.0F)
+         return;
+
+      oneOverArea = 1.0F / area;
+   }
+
+
+   span.facing = ctx->_Facing; /* for 2-sided stencil test */
+
+   /* Edge setup.  For a triangle strip these could be reused... */
+   {
+#if TRIANGLE_WALK_DOUBLE
+      eMaj.fsy = CEILF(vMin_fy);
+      eMaj.lines = (GLint) CEILF(vMax_fy - eMaj.fsy);
+#else
+      eMaj.fsy = FixedCeil(vMin_fy);
+      eMaj.lines = FixedToInt(FixedCeil(vMax_fy - eMaj.fsy));
+#endif
+      if (eMaj.lines > 0) {
+         eMaj.dxdy = eMaj.dx / eMaj.dy;
+#if TRIANGLE_WALK_DOUBLE
+         eMaj.adjy = (eMaj.fsy - vMin_fy) * FIXED_SCALE;  /* SCALED! */
+         eMaj.fx0 = vMin_fx;
+         eMaj.fsx = eMaj.fx0 + (eMaj.adjy * eMaj.dxdy) / (GLdouble) FIXED_SCALE;
+#else
+         eMaj.fdxdy = SignedFloatToFixed(eMaj.dxdy);
+         eMaj.adjy = (GLfloat) (eMaj.fsy - vMin_fy);  /* SCALED! */
+         eMaj.fx0 = vMin_fx;
+         eMaj.fsx = eMaj.fx0 + (GLfixed) (eMaj.adjy * eMaj.dxdy);
+#endif
+      }
+      else {
+         return;  /*CULLED*/
+      }
+
+#if TRIANGLE_WALK_DOUBLE
+      eTop.fsy = CEILF(vMid_fy);
+      eTop.lines = (GLint) CEILF(vMax_fy - eTop.fsy);
+#else
+      eTop.fsy = FixedCeil(vMid_fy);
+      eTop.lines = FixedToInt(FixedCeil(vMax_fy - eTop.fsy));
+#endif
+      if (eTop.lines > 0) {
+         eTop.dxdy = eTop.dx / eTop.dy;
+#if TRIANGLE_WALK_DOUBLE
+         eTop.adjy = (eTop.fsy - vMid_fy) * FIXED_SCALE; /* SCALED! */
+         eTop.fx0 = vMid_fx;
+         eTop.fsx = eTop.fx0 + (eTop.adjy * eTop.dxdy) / (GLdouble) FIXED_SCALE;
+#else
+         eTop.fdxdy = SignedFloatToFixed(eTop.dxdy);
+         eTop.adjy = (GLfloat) (eTop.fsy - vMid_fy); /* SCALED! */
+         eTop.fx0 = vMid_fx;
+         eTop.fsx = eTop.fx0 + (GLfixed) (eTop.adjy * eTop.dxdy);
+#endif
+      }
+
+#if TRIANGLE_WALK_DOUBLE
+      eBot.fsy = CEILF(vMin_fy);
+      eBot.lines = (GLint) CEILF(vMid_fy - eBot.fsy);
+#else
+      eBot.fsy = FixedCeil(vMin_fy);
+      eBot.lines = FixedToInt(FixedCeil(vMid_fy - eBot.fsy));
+#endif
+      if (eBot.lines > 0) {
+         eBot.dxdy = eBot.dx / eBot.dy;
+#if TRIANGLE_WALK_DOUBLE
+         eBot.adjy = (eBot.fsy - vMin_fy) * FIXED_SCALE;  /* SCALED! */
+         eBot.fx0 = vMin_fx;
+         eBot.fsx = eBot.fx0 + (eBot.adjy * eBot.dxdy) / (GLdouble) FIXED_SCALE;
+#else
+         eBot.fdxdy = SignedFloatToFixed(eBot.dxdy);
+         eBot.adjy = (GLfloat) (eBot.fsy - vMin_fy);  /* SCALED! */
+         eBot.fx0 = vMin_fx;
+         eBot.fsx = eBot.fx0 + (GLfixed) (eBot.adjy * eBot.dxdy);
+#endif
+      }
+   }
+
+   /*
+    * Conceptually, we view a triangle as two subtriangles
+    * separated by a perfectly horizontal line.  The edge that is
+    * intersected by this line is one with maximal absolute dy; we
+    * call it a ``major'' edge.  The other two edges are the
+    * ``top'' edge (for the upper subtriangle) and the ``bottom''
+    * edge (for the lower subtriangle).  If either of these two
+    * edges is horizontal or very close to horizontal, the
+    * corresponding subtriangle might cover zero sample points;
+    * we take care to handle such cases, for performance as well
+    * as correctness.
+    *
+    * By stepping rasterization parameters along the major edge,
+    * we can avoid recomputing them at the discontinuity where
+    * the top and bottom edges meet.  However, this forces us to
+    * be able to scan both left-to-right and right-to-left.
+    * Also, we must determine whether the major edge is at the
+    * left or right side of the triangle.  We do this by
+    * computing the magnitude of the cross-product of the major
+    * and top edges.  Since this magnitude depends on the sine of
+    * the angle between the two edges, its sign tells us whether
+    * we turn to the left or to the right when travelling along
+    * the major edge to the top edge, and from this we infer
+    * whether the major edge is on the left or the right.
+    *
+    * Serendipitously, this cross-product magnitude is also a
+    * value we need to compute the iteration parameter
+    * derivatives for the triangle, and it can be used to perform
+    * backface culling because its sign tells us whether the
+    * triangle is clockwise or counterclockwise.  In this code we
+    * refer to it as ``area'' because it's also proportional to
+    * the pixel area of the triangle.
+    */
+
+   {
+      GLint scan_from_left_to_right;  /* true if scanning left-to-right */
+#ifdef INTERP_INDEX
+      GLfloat didx, didy;
+#endif
+
+      /*
+       * Execute user-supplied setup code
+       */
+#ifdef SETUP_CODE
+      SETUP_CODE
+#endif
+
+      scan_from_left_to_right = (oneOverArea < 0.0F);
+
+
+      /* compute d?/dx and d?/dy derivatives */
+#ifdef INTERP_Z
+      span.interpMask |= SPAN_Z;
+      {
+         GLfloat eMaj_dz = vMax->win[2] - vMin->win[2];
+         GLfloat eBot_dz = vMid->win[2] - vMin->win[2];
+         span.dzdx = oneOverArea * (eMaj_dz * eBot.dy - eMaj.dy * eBot_dz);
+         if (span.dzdx > maxDepth || span.dzdx < -maxDepth) {
+            /* probably a sliver triangle */
+            span.dzdx = 0.0;
+            span.dzdy = 0.0;
+         }
+         else {
+            span.dzdy = oneOverArea * (eMaj.dx * eBot_dz - eMaj_dz * eBot.dx);
+         }
+         if (depthBits <= 16)
+            span.zStep = SignedFloatToFixed(span.dzdx);
+         else
+            span.zStep = (GLint) span.dzdx;
+      }
+#endif
+#ifdef INTERP_W
+      span.interpMask |= SPAN_W;
+      {
+         const GLfloat eMaj_dw = vMax->win[3] - vMin->win[3];
+         const GLfloat eBot_dw = vMid->win[3] - vMin->win[3];
+         span.dwdx = oneOverArea * (eMaj_dw * eBot.dy - eMaj.dy * eBot_dw);
+         span.dwdy = oneOverArea * (eMaj.dx * eBot_dw - eMaj_dw * eBot.dx);
+      }
+#endif
+#ifdef INTERP_FOG
+      span.interpMask |= SPAN_FOG;
+      {
+#  ifdef INTERP_W
+         const GLfloat wMax = vMax->win[3], wMin = vMin->win[3], wMid = vMid->win[3];
+         const GLfloat eMaj_dfog = vMax->fog * wMax - vMin->fog * wMin;
+         const GLfloat eBot_dfog = vMid->fog * wMid - vMin->fog * wMin;
+#  else
+         const GLfloat eMaj_dfog = vMax->fog - vMin->fog;
+         const GLfloat eBot_dfog = vMid->fog - vMin->fog;
+#  endif
+         span.dfogdx = oneOverArea * (eMaj_dfog * eBot.dy - eMaj.dy * eBot_dfog);
+         span.dfogdy = oneOverArea * (eMaj.dx * eBot_dfog - eMaj_dfog * eBot.dx);
+         span.fogStep = span.dfogdx;
+      }
+#endif
+#ifdef INTERP_RGB
+      span.interpMask |= SPAN_RGBA;
+      if (ctx->Light.ShadeModel == GL_SMOOTH) {
+         GLfloat eMaj_dr = (GLfloat) ((ColorTemp) vMax->color[RCOMP] - (ColorTemp) vMin->color[RCOMP]);
+         GLfloat eBot_dr = (GLfloat) ((ColorTemp) vMid->color[RCOMP] - (ColorTemp) vMin->color[RCOMP]);
+         GLfloat eMaj_dg = (GLfloat) ((ColorTemp) vMax->color[GCOMP] - (ColorTemp) vMin->color[GCOMP]);
+         GLfloat eBot_dg = (GLfloat) ((ColorTemp) vMid->color[GCOMP] - (ColorTemp) vMin->color[GCOMP]);
+         GLfloat eMaj_db = (GLfloat) ((ColorTemp) vMax->color[BCOMP] - (ColorTemp) vMin->color[BCOMP]);
+         GLfloat eBot_db = (GLfloat) ((ColorTemp) vMid->color[BCOMP] - (ColorTemp) vMin->color[BCOMP]);
+#  ifdef INTERP_ALPHA
+         GLfloat eMaj_da = (GLfloat) ((ColorTemp) vMax->color[ACOMP] - (ColorTemp) vMin->color[ACOMP]);
+         GLfloat eBot_da = (GLfloat) ((ColorTemp) vMid->color[ACOMP] - (ColorTemp) vMin->color[ACOMP]);
+#  endif
+         span.drdx = oneOverArea * (eMaj_dr * eBot.dy - eMaj.dy * eBot_dr);
+         span.drdy = oneOverArea * (eMaj.dx * eBot_dr - eMaj_dr * eBot.dx);
+         span.dgdx = oneOverArea * (eMaj_dg * eBot.dy - eMaj.dy * eBot_dg);
+         span.dgdy = oneOverArea * (eMaj.dx * eBot_dg - eMaj_dg * eBot.dx);
+         span.dbdx = oneOverArea * (eMaj_db * eBot.dy - eMaj.dy * eBot_db);
+         span.dbdy = oneOverArea * (eMaj.dx * eBot_db - eMaj_db * eBot.dx);
+#  if CHAN_TYPE == GL_FLOAT
+         span.redStep   = span.drdx;
+         span.greenStep = span.dgdx;
+         span.blueStep  = span.dbdx;
+#  else
+         span.redStep   = SignedFloatToFixed(span.drdx);
+         span.greenStep = SignedFloatToFixed(span.dgdx);
+         span.blueStep  = SignedFloatToFixed(span.dbdx);
+#  endif /* GL_FLOAT */
+#  ifdef INTERP_ALPHA
+         span.dadx = oneOverArea * (eMaj_da * eBot.dy - eMaj.dy * eBot_da);
+         span.dady = oneOverArea * (eMaj.dx * eBot_da - eMaj_da * eBot.dx);
+#    if CHAN_TYPE == GL_FLOAT
+         span.alphaStep = span.dadx;
+#    else
+         span.alphaStep = SignedFloatToFixed(span.dadx);
+#    endif /* GL_FLOAT */
+#  endif /* INTERP_ALPHA */
+      }
+      else {
+         ASSERT(ctx->Light.ShadeModel == GL_FLAT);
+         span.interpMask |= SPAN_FLAT;
+         span.drdx = span.drdy = 0.0F;
+         span.dgdx = span.dgdy = 0.0F;
+         span.dbdx = span.dbdy = 0.0F;
+#    if CHAN_TYPE == GL_FLOAT
+	 span.redStep   = 0.0F;
+	 span.greenStep = 0.0F;
+	 span.blueStep  = 0.0F;
+#    else
+	 span.redStep   = 0;
+	 span.greenStep = 0;
+	 span.blueStep  = 0;
+#    endif /* GL_FLOAT */
+#  ifdef INTERP_ALPHA
+         span.dadx = span.dady = 0.0F;
+#    if CHAN_TYPE == GL_FLOAT
+	 span.alphaStep = 0.0F;
+#    else
+	 span.alphaStep = 0;
+#    endif /* GL_FLOAT */
+#  endif
+      }
+#endif /* INTERP_RGB */
+#ifdef INTERP_SPEC
+      span.interpMask |= SPAN_SPEC;
+      if (ctx->Light.ShadeModel == GL_SMOOTH) {
+         GLfloat eMaj_dsr = (GLfloat) ((ColorTemp) vMax->specular[RCOMP] - (ColorTemp) vMin->specular[RCOMP]);
+         GLfloat eBot_dsr = (GLfloat) ((ColorTemp) vMid->specular[RCOMP] - (ColorTemp) vMin->specular[RCOMP]);
+         GLfloat eMaj_dsg = (GLfloat) ((ColorTemp) vMax->specular[GCOMP] - (ColorTemp) vMin->specular[GCOMP]);
+         GLfloat eBot_dsg = (GLfloat) ((ColorTemp) vMid->specular[GCOMP] - (ColorTemp) vMin->specular[GCOMP]);
+         GLfloat eMaj_dsb = (GLfloat) ((ColorTemp) vMax->specular[BCOMP] - (ColorTemp) vMin->specular[BCOMP]);
+         GLfloat eBot_dsb = (GLfloat) ((ColorTemp) vMid->specular[BCOMP] - (ColorTemp) vMin->specular[BCOMP]);
+         span.dsrdx = oneOverArea * (eMaj_dsr * eBot.dy - eMaj.dy * eBot_dsr);
+         span.dsrdy = oneOverArea * (eMaj.dx * eBot_dsr - eMaj_dsr * eBot.dx);
+         span.dsgdx = oneOverArea * (eMaj_dsg * eBot.dy - eMaj.dy * eBot_dsg);
+         span.dsgdy = oneOverArea * (eMaj.dx * eBot_dsg - eMaj_dsg * eBot.dx);
+         span.dsbdx = oneOverArea * (eMaj_dsb * eBot.dy - eMaj.dy * eBot_dsb);
+         span.dsbdy = oneOverArea * (eMaj.dx * eBot_dsb - eMaj_dsb * eBot.dx);
+#  if CHAN_TYPE == GL_FLOAT
+         span.specRedStep   = span.dsrdx;
+         span.specGreenStep = span.dsgdx;
+         span.specBlueStep  = span.dsbdx;
+#  else
+         span.specRedStep   = SignedFloatToFixed(span.dsrdx);
+         span.specGreenStep = SignedFloatToFixed(span.dsgdx);
+         span.specBlueStep  = SignedFloatToFixed(span.dsbdx);
+#  endif
+      }
+      else {
+         span.dsrdx = span.dsrdy = 0.0F;
+         span.dsgdx = span.dsgdy = 0.0F;
+         span.dsbdx = span.dsbdy = 0.0F;
+#  if CHAN_TYPE == GL_FLOAT
+	 span.specRedStep   = 0.0F;
+	 span.specGreenStep = 0.0F;
+	 span.specBlueStep  = 0.0F;
+#  else
+	 span.specRedStep   = 0;
+	 span.specGreenStep = 0;
+	 span.specBlueStep  = 0;
+#  endif
+      }
+#endif /* INTERP_SPEC */
+#ifdef INTERP_INDEX
+      span.interpMask |= SPAN_INDEX;
+      if (ctx->Light.ShadeModel == GL_SMOOTH) {
+         GLfloat eMaj_di = vMax->index - vMin->index;
+         GLfloat eBot_di = vMid->index - vMin->index;
+         didx = oneOverArea * (eMaj_di * eBot.dy - eMaj.dy * eBot_di);
+         didy = oneOverArea * (eMaj.dx * eBot_di - eMaj_di * eBot.dx);
+         span.indexStep = SignedFloatToFixed(didx);
+      }
+      else {
+         span.interpMask |= SPAN_FLAT;
+         didx = didy = 0.0F;
+         span.indexStep = 0;
+      }
+#endif
+#ifdef INTERP_INT_TEX
+      span.interpMask |= SPAN_INT_TEXTURE;
+      {
+         GLfloat eMaj_ds = (vMax->texcoord[0][0] - vMin->texcoord[0][0]) * S_SCALE;
+         GLfloat eBot_ds = (vMid->texcoord[0][0] - vMin->texcoord[0][0]) * S_SCALE;
+         GLfloat eMaj_dt = (vMax->texcoord[0][1] - vMin->texcoord[0][1]) * T_SCALE;
+         GLfloat eBot_dt = (vMid->texcoord[0][1] - vMin->texcoord[0][1]) * T_SCALE;
+         span.texStepX[0][0] = oneOverArea * (eMaj_ds * eBot.dy - eMaj.dy * eBot_ds);
+         span.texStepY[0][0] = oneOverArea * (eMaj.dx * eBot_ds - eMaj_ds * eBot.dx);
+         span.texStepX[0][1] = oneOverArea * (eMaj_dt * eBot.dy - eMaj.dy * eBot_dt);
+         span.texStepY[0][1] = oneOverArea * (eMaj.dx * eBot_dt - eMaj_dt * eBot.dx);
+         span.intTexStep[0] = SignedFloatToFixed(span.texStepX[0][0]);
+         span.intTexStep[1] = SignedFloatToFixed(span.texStepX[0][1]);
+      }
+#endif
+#ifdef INTERP_TEX
+      span.interpMask |= SPAN_TEXTURE;
+      {
+         /* win[3] is 1/W */
+         const GLfloat wMax = vMax->win[3], wMin = vMin->win[3], wMid = vMid->win[3];
+         TEX_UNIT_LOOP(
+            GLfloat eMaj_ds = vMax->texcoord[u][0] * wMax - vMin->texcoord[u][0] * wMin;
+            GLfloat eBot_ds = vMid->texcoord[u][0] * wMid - vMin->texcoord[u][0] * wMin;
+            GLfloat eMaj_dt = vMax->texcoord[u][1] * wMax - vMin->texcoord[u][1] * wMin;
+            GLfloat eBot_dt = vMid->texcoord[u][1] * wMid - vMin->texcoord[u][1] * wMin;
+            GLfloat eMaj_du = vMax->texcoord[u][2] * wMax - vMin->texcoord[u][2] * wMin;
+            GLfloat eBot_du = vMid->texcoord[u][2] * wMid - vMin->texcoord[u][2] * wMin;
+            GLfloat eMaj_dv = vMax->texcoord[u][3] * wMax - vMin->texcoord[u][3] * wMin;
+            GLfloat eBot_dv = vMid->texcoord[u][3] * wMid - vMin->texcoord[u][3] * wMin;
+            span.texStepX[u][0] = oneOverArea * (eMaj_ds * eBot.dy - eMaj.dy * eBot_ds);
+            span.texStepY[u][0] = oneOverArea * (eMaj.dx * eBot_ds - eMaj_ds * eBot.dx);
+            span.texStepX[u][1] = oneOverArea * (eMaj_dt * eBot.dy - eMaj.dy * eBot_dt);
+            span.texStepY[u][1] = oneOverArea * (eMaj.dx * eBot_dt - eMaj_dt * eBot.dx);
+            span.texStepX[u][2] = oneOverArea * (eMaj_du * eBot.dy - eMaj.dy * eBot_du);
+            span.texStepY[u][2] = oneOverArea * (eMaj.dx * eBot_du - eMaj_du * eBot.dx);
+            span.texStepX[u][3] = oneOverArea * (eMaj_dv * eBot.dy - eMaj.dy * eBot_dv);
+            span.texStepY[u][3] = oneOverArea * (eMaj.dx * eBot_dv - eMaj_dv * eBot.dx);
+         )
+      }
+#endif
+#ifdef INTERP_VARYING
+      span.interpMask |= SPAN_VARYING;
+      {
+         /* win[3] is 1/W */
+         const GLfloat wMax = vMax->win[3], wMin = vMin->win[3], wMid = vMid->win[3];
+         VARYING_LOOP(
+            GLfloat eMaj_dvar = vMax->attribute[iv][ic] * wMax - vMin->attribute[iv][ic] * wMin;
+            GLfloat eBot_dvar = vMid->attribute[iv][ic] * wMid - vMin->attribute[iv][ic] * wMin;
+            span.varStepX[iv][ic] = oneOverArea * (eMaj_dvar * eBot.dy - eMaj.dy * eBot_dvar);
+            span.varStepY[iv][ic] = oneOverArea * (eMaj.dx * eBot_dvar - eMaj_dvar * eBot.dx);
+         )
+      }
+#endif
+
+      /*
+       * We always sample at pixel centers.  However, we avoid
+       * explicit half-pixel offsets in this code by incorporating
+       * the proper offset in each of x and y during the
+       * transformation to window coordinates.
+       *
+       * We also apply the usual rasterization rules to prevent
+       * cracks and overlaps.  A pixel is considered inside a
+       * subtriangle if it meets all of four conditions: it is on or
+       * to the right of the left edge, strictly to the left of the
+       * right edge, on or below the top edge, and strictly above
+       * the bottom edge.  (Some edges may be degenerate.)
+       *
+       * The following discussion assumes left-to-right scanning
+       * (that is, the major edge is on the left); the right-to-left
+       * case is a straightforward variation.
+       *
+       * We start by finding the half-integral y coordinate that is
+       * at or below the top of the triangle.  This gives us the
+       * first scan line that could possibly contain pixels that are
+       * inside the triangle.
+       *
+       * Next we creep down the major edge until we reach that y,
+       * and compute the corresponding x coordinate on the edge.
+       * Then we find the half-integral x that lies on or just
+       * inside the edge.  This is the first pixel that might lie in
+       * the interior of the triangle.  (We won't know for sure
+       * until we check the other edges.)
+       *
+       * As we rasterize the triangle, we'll step down the major
+       * edge.  For each step in y, we'll move an integer number
+       * of steps in x.  There are two possible x step sizes, which
+       * we'll call the ``inner'' step (guaranteed to land on the
+       * edge or inside it) and the ``outer'' step (guaranteed to
+       * land on the edge or outside it).  The inner and outer steps
+       * differ by one.  During rasterization we maintain an error
+       * term that indicates our distance from the true edge, and
+       * select either the inner step or the outer step, whichever
+       * gets us to the first pixel that falls inside the triangle.
+       *
+       * All parameters (z, red, etc.) as well as the buffer
+       * addresses for color and z have inner and outer step values,
+       * so that we can increment them appropriately.  This method
+       * eliminates the need to adjust parameters by creeping a
+       * sub-pixel amount into the triangle at each scanline.
+       */
+
+      {
+         GLint subTriangle;
+         GLinterp fxLeftEdge = 0, fxRightEdge = 0;
+         GLinterp fdxLeftEdge = 0, fdxRightEdge = 0;
+         GLinterp fError = 0, fdError = 0;
+#ifdef PIXEL_ADDRESS
+         PIXEL_TYPE *pRow = NULL;
+         GLint dPRowOuter = 0, dPRowInner;  /* offset in bytes */
+#endif
+#ifdef INTERP_Z
+#  ifdef DEPTH_TYPE
+         struct gl_renderbuffer *zrb
+            = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+         DEPTH_TYPE *zRow = NULL;
+         GLint dZRowOuter = 0, dZRowInner;  /* offset in bytes */
+#  endif
+         GLuint zLeft = 0;
+         GLfixed fdzOuter = 0, fdzInner;
+#endif
+#ifdef INTERP_W
+         GLfloat wLeft = 0, dwOuter = 0, dwInner;
+#endif
+#ifdef INTERP_FOG
+         GLfloat fogLeft = 0, dfogOuter = 0, dfogInner;
+#endif
+#ifdef INTERP_RGB
+         ColorTemp rLeft = 0, fdrOuter = 0, fdrInner;
+         ColorTemp gLeft = 0, fdgOuter = 0, fdgInner;
+         ColorTemp bLeft = 0, fdbOuter = 0, fdbInner;
+#endif
+#ifdef INTERP_ALPHA
+         ColorTemp aLeft = 0, fdaOuter = 0, fdaInner;
+#endif
+#ifdef INTERP_SPEC
+         ColorTemp srLeft=0, dsrOuter=0, dsrInner;
+         ColorTemp sgLeft=0, dsgOuter=0, dsgInner;
+         ColorTemp sbLeft=0, dsbOuter=0, dsbInner;
+#endif
+#ifdef INTERP_INDEX
+         GLfixed iLeft=0, diOuter=0, diInner;
+#endif
+#ifdef INTERP_INT_TEX
+         GLfixed sLeft=0, dsOuter=0, dsInner;
+         GLfixed tLeft=0, dtOuter=0, dtInner;
+#endif
+#ifdef INTERP_TEX
+         GLfloat sLeft[MAX_TEXTURE_COORD_UNITS];
+         GLfloat tLeft[MAX_TEXTURE_COORD_UNITS];
+         GLfloat uLeft[MAX_TEXTURE_COORD_UNITS];
+         GLfloat vLeft[MAX_TEXTURE_COORD_UNITS];
+         GLfloat dsOuter[MAX_TEXTURE_COORD_UNITS], dsInner[MAX_TEXTURE_COORD_UNITS];
+         GLfloat dtOuter[MAX_TEXTURE_COORD_UNITS], dtInner[MAX_TEXTURE_COORD_UNITS];
+         GLfloat duOuter[MAX_TEXTURE_COORD_UNITS], duInner[MAX_TEXTURE_COORD_UNITS];
+         GLfloat dvOuter[MAX_TEXTURE_COORD_UNITS], dvInner[MAX_TEXTURE_COORD_UNITS];
+#endif
+#ifdef INTERP_VARYING
+         GLfloat varLeft[MAX_VARYING_VECTORS][VARYINGS_PER_VECTOR];
+         GLfloat dvarOuter[MAX_VARYING_VECTORS][VARYINGS_PER_VECTOR];
+         GLfloat dvarInner[MAX_VARYING_VECTORS][VARYINGS_PER_VECTOR];
+#endif
+
+         for (subTriangle=0; subTriangle<=1; subTriangle++) {
+            EdgeT *eLeft, *eRight;
+            int setupLeft, setupRight;
+            int lines;
+
+            if (subTriangle==0) {
+               /* bottom half */
+               if (scan_from_left_to_right) {
+                  eLeft = &eMaj;
+                  eRight = &eBot;
+                  lines = eRight->lines;
+                  setupLeft = 1;
+                  setupRight = 1;
+               }
+               else {
+                  eLeft = &eBot;
+                  eRight = &eMaj;
+                  lines = eLeft->lines;
+                  setupLeft = 1;
+                  setupRight = 1;
+               }
+            }
+            else {
+               /* top half */
+               if (scan_from_left_to_right) {
+                  eLeft = &eMaj;
+                  eRight = &eTop;
+                  lines = eRight->lines;
+                  setupLeft = 0;
+                  setupRight = 1;
+               }
+               else {
+                  eLeft = &eTop;
+                  eRight = &eMaj;
+                  lines = eLeft->lines;
+                  setupLeft = 1;
+                  setupRight = 0;
+               }
+               if (lines == 0)
+                  return;
+            }
+
+            if (setupLeft && eLeft->lines > 0) {
+               const SWvertex *vLower = eLeft->v0;
+#if TRIANGLE_WALK_DOUBLE
+               const GLdouble fsy = eLeft->fsy;
+               const GLdouble fsx = eLeft->fsx;
+               const GLdouble fx = CEILF(fsx);
+               const GLdouble adjx = (fx - eLeft->fx0) * FIXED_SCALE;  /* SCALED! */
+#else
+               const GLfixed fsy = eLeft->fsy;
+               const GLfixed fsx = eLeft->fsx;  /* no fractional part */
+               const GLfixed fx = FixedCeil(fsx);  /* no fractional part */
+               const GLfixed adjx = (GLinterp) (fx - eLeft->fx0); /* SCALED! */
+#endif
+               const GLinterp adjy = (GLinterp) eLeft->adjy;      /* SCALED! */
+               GLint idxOuter;
+#if TRIANGLE_WALK_DOUBLE
+               GLdouble dxOuter;
+
+               fError = fx - fsx - 1.0;
+               fxLeftEdge = fsx;
+               fdxLeftEdge = eLeft->dxdy;
+               dxOuter = FLOORF(fdxLeftEdge);
+               fdError = dxOuter - fdxLeftEdge + 1.0;
+               idxOuter = (GLint) dxOuter;
+               span.y = (GLint) fsy;
+#else
+               GLfloat dxOuter;
+               GLfixed fdxOuter;
+
+               fError = fx - fsx - FIXED_ONE;
+               fxLeftEdge = fsx - FIXED_EPSILON;
+               fdxLeftEdge = eLeft->fdxdy;
+               fdxOuter = FixedFloor(fdxLeftEdge - FIXED_EPSILON);
+               fdError = fdxOuter - fdxLeftEdge + FIXED_ONE;
+               idxOuter = FixedToInt(fdxOuter);
+               dxOuter = (GLfloat) idxOuter;
+               span.y = FixedToInt(fsy);
+#endif
+
+               /* silence warnings on some compilers */
+               (void) dxOuter;
+               (void) adjx;
+               (void) adjy;
+               (void) vLower;
+
+#ifdef PIXEL_ADDRESS
+               {
+                  pRow = (PIXEL_TYPE *) PIXEL_ADDRESS(InterpToInt(fxLeftEdge), span.y);
+                  dPRowOuter = -((int)BYTES_PER_ROW) + idxOuter * sizeof(PIXEL_TYPE);
+                  /* negative because Y=0 at bottom and increases upward */
+               }
+#endif
+               /*
+                * Now we need the set of parameter (z, color, etc.) values at
+                * the point (fx, fsy).  This gives us properly-sampled parameter
+                * values that we can step from pixel to pixel.  Furthermore,
+                * although we might have intermediate results that overflow
+                * the normal parameter range when we step temporarily outside
+                * the triangle, we shouldn't overflow or underflow for any
+                * pixel that's actually inside the triangle.
+                */
+
+#ifdef INTERP_Z
+               {
+                  GLfloat z0 = vLower->win[2];
+                  if (depthBits <= 16) {
+                     /* interpolate fixed-pt values */
+                     GLfloat tmp = (z0 * FIXED_SCALE + span.dzdx * adjx
+                                    + span.dzdy * adjy) + FIXED_HALF;
+                     if (tmp < MAX_GLUINT / 2)
+                        zLeft = (GLfixed) tmp;
+                     else
+                        zLeft = MAX_GLUINT / 2;
+                     fdzOuter = SignedFloatToFixed(span.dzdy + dxOuter * span.dzdx);
+                  }
+                  else {
+                     /* interpolate depth values w/out scaling */
+                     zLeft = (GLuint) (z0 + span.dzdx * FixedToFloat(adjx)
+                                       + span.dzdy * FixedToFloat(adjy));
+                     fdzOuter = (GLint) (span.dzdy + dxOuter * span.dzdx);
+                  }
+#  ifdef DEPTH_TYPE
+                  zRow = (DEPTH_TYPE *)
+                    zrb->GetPointer(ctx, zrb, InterpToInt(fxLeftEdge), span.y);
+                  dZRowOuter = (ctx->DrawBuffer->Width + idxOuter) * sizeof(DEPTH_TYPE);
+#  endif
+               }
+#endif
+#ifdef INTERP_W
+               wLeft = vLower->win[3] + (span.dwdx * adjx + span.dwdy * adjy) * (1.0F/FIXED_SCALE);
+               dwOuter = span.dwdy + dxOuter * span.dwdx;
+#endif
+#ifdef INTERP_FOG
+#  ifdef INTERP_W
+               fogLeft = vLower->fog * vLower->win[3] + (span.dfogdx * adjx + span.dfogdy * adjy) * (1.0F/FIXED_SCALE);
+#  else
+               fogLeft = vLower->fog + (span.dfogdx * adjx + span.dfogdy * adjy) * (1.0F/FIXED_SCALE);
+#  endif
+               dfogOuter = span.dfogdy + dxOuter * span.dfogdx;
+#endif
+#ifdef INTERP_RGB
+               if (ctx->Light.ShadeModel == GL_SMOOTH) {
+#  if CHAN_TYPE == GL_FLOAT
+                  rLeft = vLower->color[RCOMP] + (span.drdx * adjx + span.drdy * adjy) * (1.0F / FIXED_SCALE);
+                  gLeft = vLower->color[GCOMP] + (span.dgdx * adjx + span.dgdy * adjy) * (1.0F / FIXED_SCALE);
+                  bLeft = vLower->color[BCOMP] + (span.dbdx * adjx + span.dbdy * adjy) * (1.0F / FIXED_SCALE);
+                  fdrOuter = span.drdy + dxOuter * span.drdx;
+                  fdgOuter = span.dgdy + dxOuter * span.dgdx;
+                  fdbOuter = span.dbdy + dxOuter * span.dbdx;
+#  else
+                  rLeft = (GLint)(ChanToFixed(vLower->color[RCOMP]) + span.drdx * adjx + span.drdy * adjy) + FIXED_HALF;
+                  gLeft = (GLint)(ChanToFixed(vLower->color[GCOMP]) + span.dgdx * adjx + span.dgdy * adjy) + FIXED_HALF;
+                  bLeft = (GLint)(ChanToFixed(vLower->color[BCOMP]) + span.dbdx * adjx + span.dbdy * adjy) + FIXED_HALF;
+                  fdrOuter = SignedFloatToFixed(span.drdy + dxOuter * span.drdx);
+                  fdgOuter = SignedFloatToFixed(span.dgdy + dxOuter * span.dgdx);
+                  fdbOuter = SignedFloatToFixed(span.dbdy + dxOuter * span.dbdx);
+#  endif
+#  ifdef INTERP_ALPHA
+#    if CHAN_TYPE == GL_FLOAT
+                  aLeft = vLower->color[ACOMP] + (span.dadx * adjx + span.dady * adjy) * (1.0F / FIXED_SCALE);
+                  fdaOuter = span.dady + dxOuter * span.dadx;
+#    else
+                  aLeft = (GLint)(ChanToFixed(vLower->color[ACOMP]) + span.dadx * adjx + span.dady * adjy) + FIXED_HALF;
+                  fdaOuter = SignedFloatToFixed(span.dady + dxOuter * span.dadx);
+#    endif
+#  endif
+               }
+               else {
+                  ASSERT(ctx->Light.ShadeModel == GL_FLAT);
+#  if CHAN_TYPE == GL_FLOAT
+                  rLeft = v2->color[RCOMP];
+                  gLeft = v2->color[GCOMP];
+                  bLeft = v2->color[BCOMP];
+                  fdrOuter = fdgOuter = fdbOuter = 0.0F;
+#  else
+                  rLeft = ChanToFixed(v2->color[RCOMP]);
+                  gLeft = ChanToFixed(v2->color[GCOMP]);
+                  bLeft = ChanToFixed(v2->color[BCOMP]);
+                  fdrOuter = fdgOuter = fdbOuter = 0;
+#  endif
+#  ifdef INTERP_ALPHA
+#    if CHAN_TYPE == GL_FLOAT
+                  aLeft = v2->color[ACOMP];
+                  fdaOuter = 0.0F;
+#    else
+                  aLeft = ChanToFixed(v2->color[ACOMP]);
+                  fdaOuter = 0;
+#    endif
+#  endif
+               }
+#endif /* INTERP_RGB */
+
+
+#ifdef INTERP_SPEC
+               if (ctx->Light.ShadeModel == GL_SMOOTH) {
+#  if CHAN_TYPE == GL_FLOAT
+                  srLeft = vLower->specular[RCOMP] + (span.dsrdx * adjx + span.dsrdy * adjy) * (1.0F / FIXED_SCALE);
+                  sgLeft = vLower->specular[GCOMP] + (span.dsgdx * adjx + span.dsgdy * adjy) * (1.0F / FIXED_SCALE);
+                  sbLeft = vLower->specular[BCOMP] + (span.dsbdx * adjx + span.dsbdy * adjy) * (1.0F / FIXED_SCALE);
+                  dsrOuter = span.dsrdy + dxOuter * span.dsrdx;
+                  dsgOuter = span.dsgdy + dxOuter * span.dsgdx;
+                  dsbOuter = span.dsbdy + dxOuter * span.dsbdx;
+#  else
+                  srLeft = (GLfixed) (ChanToFixed(vLower->specular[RCOMP]) + span.dsrdx * adjx + span.dsrdy * adjy) + FIXED_HALF;
+                  sgLeft = (GLfixed) (ChanToFixed(vLower->specular[GCOMP]) + span.dsgdx * adjx + span.dsgdy * adjy) + FIXED_HALF;
+                  sbLeft = (GLfixed) (ChanToFixed(vLower->specular[BCOMP]) + span.dsbdx * adjx + span.dsbdy * adjy) + FIXED_HALF;
+                  dsrOuter = SignedFloatToFixed(span.dsrdy + dxOuter * span.dsrdx);
+                  dsgOuter = SignedFloatToFixed(span.dsgdy + dxOuter * span.dsgdx);
+                  dsbOuter = SignedFloatToFixed(span.dsbdy + dxOuter * span.dsbdx);
+#  endif
+               }
+               else {
+                  ASSERT(ctx->Light.ShadeModel == GL_FLAT);
+#if  CHAN_TYPE == GL_FLOAT
+                  srLeft = v2->specular[RCOMP];
+                  sgLeft = v2->specular[GCOMP];
+                  sbLeft = v2->specular[BCOMP];
+                  dsrOuter = dsgOuter = dsbOuter = 0.0F;
+#  else
+                  srLeft = ChanToFixed(v2->specular[RCOMP]);
+                  sgLeft = ChanToFixed(v2->specular[GCOMP]);
+                  sbLeft = ChanToFixed(v2->specular[BCOMP]);
+                  dsrOuter = dsgOuter = dsbOuter = 0;
+#  endif
+               }
+#endif
+
+#ifdef INTERP_INDEX
+               if (ctx->Light.ShadeModel == GL_SMOOTH) {
+                  iLeft = (GLfixed)(vLower->index * FIXED_SCALE
+                                 + didx * adjx + didy * adjy) + FIXED_HALF;
+                  diOuter = SignedFloatToFixed(didy + dxOuter * didx);
+               }
+               else {
+                  ASSERT(ctx->Light.ShadeModel == GL_FLAT);
+                  iLeft = FloatToFixed(v2->index);
+                  diOuter = 0;
+               }
+#endif
+#ifdef INTERP_INT_TEX
+               {
+                  GLfloat s0, t0;
+                  s0 = vLower->texcoord[0][0] * S_SCALE;
+                  sLeft = (GLfixed)(s0 * FIXED_SCALE + span.texStepX[0][0] * adjx
+                                 + span.texStepY[0][0] * adjy) + FIXED_HALF;
+                  dsOuter = SignedFloatToFixed(span.texStepY[0][0] + dxOuter * span.texStepX[0][0]);
+
+                  t0 = vLower->texcoord[0][1] * T_SCALE;
+                  tLeft = (GLfixed)(t0 * FIXED_SCALE + span.texStepX[0][1] * adjx
+                                 + span.texStepY[0][1] * adjy) + FIXED_HALF;
+                  dtOuter = SignedFloatToFixed(span.texStepY[0][1] + dxOuter * span.texStepX[0][1]);
+               }
+#endif
+#ifdef INTERP_TEX
+               TEX_UNIT_LOOP(
+                  const GLfloat invW = vLower->win[3];
+                  const GLfloat s0 = vLower->texcoord[u][0] * invW;
+                  const GLfloat t0 = vLower->texcoord[u][1] * invW;
+                  const GLfloat u0 = vLower->texcoord[u][2] * invW;
+                  const GLfloat v0 = vLower->texcoord[u][3] * invW;
+                  sLeft[u] = s0 + (span.texStepX[u][0] * adjx + span.texStepY[u][0] * adjy) * (1.0F/FIXED_SCALE);
+                  tLeft[u] = t0 + (span.texStepX[u][1] * adjx + span.texStepY[u][1] * adjy) * (1.0F/FIXED_SCALE);
+                  uLeft[u] = u0 + (span.texStepX[u][2] * adjx + span.texStepY[u][2] * adjy) * (1.0F/FIXED_SCALE);
+                  vLeft[u] = v0 + (span.texStepX[u][3] * adjx + span.texStepY[u][3] * adjy) * (1.0F/FIXED_SCALE);
+                  dsOuter[u] = span.texStepY[u][0] + dxOuter * span.texStepX[u][0];
+                  dtOuter[u] = span.texStepY[u][1] + dxOuter * span.texStepX[u][1];
+                  duOuter[u] = span.texStepY[u][2] + dxOuter * span.texStepX[u][2];
+                  dvOuter[u] = span.texStepY[u][3] + dxOuter * span.texStepX[u][3];
+               )
+#endif
+#ifdef INTERP_VARYING
+               VARYING_LOOP(
+                  const GLfloat invW = vLower->win[3];
+                  const GLfloat var0 = vLower->attribute[iv][ic] * invW;
+                  varLeft[iv][ic] = var0 + (span.varStepX[iv][ic] * adjx +
+                     span.varStepY[iv][ic] * adjy) * (1.0f / FIXED_SCALE);
+                  dvarOuter[iv][ic] = span.varStepY[iv][ic] + dxOuter * span.varStepX[iv][ic];
+               )
+#endif
+            } /*if setupLeft*/
+
+
+            if (setupRight && eRight->lines>0) {
+#if TRIANGLE_WALK_DOUBLE
+               fxRightEdge = eRight->fsx;
+               fdxRightEdge = eRight->dxdy;
+#else
+               fxRightEdge = eRight->fsx - FIXED_EPSILON;
+               fdxRightEdge = eRight->fdxdy;
+#endif
+            }
+
+            if (lines==0) {
+               continue;
+            }
+
+
+            /* Rasterize setup */
+#ifdef PIXEL_ADDRESS
+            dPRowInner = dPRowOuter + sizeof(PIXEL_TYPE);
+#endif
+#ifdef INTERP_Z
+#  ifdef DEPTH_TYPE
+            dZRowInner = dZRowOuter + sizeof(DEPTH_TYPE);
+#  endif
+            fdzInner = fdzOuter + span.zStep;
+#endif
+#ifdef INTERP_W
+            dwInner = dwOuter + span.dwdx;
+#endif
+#ifdef INTERP_FOG
+            dfogInner = dfogOuter + span.dfogdx;
+#endif
+#ifdef INTERP_RGB
+            fdrInner = fdrOuter + span.redStep;
+            fdgInner = fdgOuter + span.greenStep;
+            fdbInner = fdbOuter + span.blueStep;
+#endif
+#ifdef INTERP_ALPHA
+            fdaInner = fdaOuter + span.alphaStep;
+#endif
+#ifdef INTERP_SPEC
+            dsrInner = dsrOuter + span.specRedStep;
+            dsgInner = dsgOuter + span.specGreenStep;
+            dsbInner = dsbOuter + span.specBlueStep;
+#endif
+#ifdef INTERP_INDEX
+            diInner = diOuter + span.indexStep;
+#endif
+#ifdef INTERP_INT_TEX
+            dsInner = dsOuter + span.intTexStep[0];
+            dtInner = dtOuter + span.intTexStep[1];
+#endif
+#ifdef INTERP_TEX
+            TEX_UNIT_LOOP(
+               dsInner[u] = dsOuter[u] + span.texStepX[u][0];
+               dtInner[u] = dtOuter[u] + span.texStepX[u][1];
+               duInner[u] = duOuter[u] + span.texStepX[u][2];
+               dvInner[u] = dvOuter[u] + span.texStepX[u][3];
+            )
+#endif
+#ifdef INTERP_VARYING
+            VARYING_LOOP(
+               dvarInner[iv][ic] = dvarOuter[iv][ic] + span.varStepX[iv][ic];
+            )
+#endif
+
+            while (lines > 0) {
+               /* initialize the span interpolants to the leftmost value */
+               /* ff = fixed-pt fragment */
+               const GLint right = InterpToInt(fxRightEdge);
+               span.x = InterpToInt(fxLeftEdge);
+               if (right <= span.x)
+                  span.end = 0;
+               else
+                  span.end = right - span.x;
+
+#ifdef INTERP_Z
+               span.z = zLeft;
+#endif
+#ifdef INTERP_W
+               span.w = wLeft;
+#endif
+#ifdef INTERP_FOG
+               span.fog = fogLeft;
+#endif
+#ifdef INTERP_RGB
+               span.red = rLeft;
+               span.green = gLeft;
+               span.blue = bLeft;
+#endif
+#ifdef INTERP_ALPHA
+               span.alpha = aLeft;
+#endif
+#ifdef INTERP_SPEC
+               span.specRed = srLeft;
+               span.specGreen = sgLeft;
+               span.specBlue = sbLeft;
+#endif
+#ifdef INTERP_INDEX
+               span.index = iLeft;
+#endif
+#ifdef INTERP_INT_TEX
+               span.intTex[0] = sLeft;
+               span.intTex[1] = tLeft;
+#endif
+
+#ifdef INTERP_TEX
+               TEX_UNIT_LOOP(
+                  span.tex[u][0] = sLeft[u];
+                  span.tex[u][1] = tLeft[u];
+                  span.tex[u][2] = uLeft[u];
+                  span.tex[u][3] = vLeft[u];
+               )
+#endif
+#ifdef INTERP_VARYING
+               VARYING_LOOP(
+                  span.var[iv][ic] = varLeft[iv][ic];
+               )
+#endif
+
+               /* This is where we actually generate fragments */
+               /* XXX the test for span.y > 0 _shouldn't_ be needed but
+                * it fixes a problem on 64-bit Opterons (bug 4842).
+                */
+               if (span.end > 0 && span.y >= 0) {
+                  const GLint len = span.end - 1;
+                  (void) len;
+#ifdef INTERP_RGB
+                  CLAMP_INTERPOLANT(red, redStep, len);
+                  CLAMP_INTERPOLANT(green, greenStep, len);
+                  CLAMP_INTERPOLANT(blue, blueStep, len);
+#endif
+#ifdef INTERP_ALPHA
+                  CLAMP_INTERPOLANT(alpha, alphaStep, len);
+#endif
+#ifdef INTERP_SPEC
+                  CLAMP_INTERPOLANT(specRed, specRedStep, len);
+                  CLAMP_INTERPOLANT(specGreen, specGreenStep, len);
+                  CLAMP_INTERPOLANT(specBlue, specBlueStep, len);
+#endif
+#ifdef INTERP_INDEX
+                  CLAMP_INTERPOLANT(index, indexStep, len);
+#endif
+                  {
+                     RENDER_SPAN( span );
+                  }
+               }
+
+               /*
+                * Advance to the next scan line.  Compute the
+                * new edge coordinates, and adjust the
+                * pixel-center x coordinate so that it stays
+                * on or inside the major edge.
+                */
+               span.y++;
+               lines--;
+
+               fxLeftEdge += fdxLeftEdge;
+               fxRightEdge += fdxRightEdge;
+
+               fError += fdError;
+               if (fError >= 0) {
+                  fError -= INTERP_ONE;
+
+#ifdef PIXEL_ADDRESS
+                  pRow = (PIXEL_TYPE *) ((GLubyte *) pRow + dPRowOuter);
+#endif
+#ifdef INTERP_Z
+#  ifdef DEPTH_TYPE
+                  zRow = (DEPTH_TYPE *) ((GLubyte *) zRow + dZRowOuter);
+#  endif
+                  zLeft += fdzOuter;
+#endif
+#ifdef INTERP_W
+                  wLeft += dwOuter;
+#endif
+#ifdef INTERP_FOG
+                  fogLeft += dfogOuter;
+#endif
+#ifdef INTERP_RGB
+                  rLeft += fdrOuter;
+                  gLeft += fdgOuter;
+                  bLeft += fdbOuter;
+#endif
+#ifdef INTERP_ALPHA
+                  aLeft += fdaOuter;
+#endif
+#ifdef INTERP_SPEC
+                  srLeft += dsrOuter;
+                  sgLeft += dsgOuter;
+                  sbLeft += dsbOuter;
+#endif
+#ifdef INTERP_INDEX
+                  iLeft += diOuter;
+#endif
+#ifdef INTERP_INT_TEX
+                  sLeft += dsOuter;
+                  tLeft += dtOuter;
+#endif
+#ifdef INTERP_TEX
+                  TEX_UNIT_LOOP(
+                     sLeft[u] += dsOuter[u];
+                     tLeft[u] += dtOuter[u];
+                     uLeft[u] += duOuter[u];
+                     vLeft[u] += dvOuter[u];
+                  )
+#endif
+#ifdef INTERP_VARYING
+                  VARYING_LOOP(
+                     varLeft[iv][ic] += dvarOuter[iv][ic];
+                  )
+#endif
+               }
+               else {
+#ifdef PIXEL_ADDRESS
+                  pRow = (PIXEL_TYPE *) ((GLubyte *) pRow + dPRowInner);
+#endif
+#ifdef INTERP_Z
+#  ifdef DEPTH_TYPE
+                  zRow = (DEPTH_TYPE *) ((GLubyte *) zRow + dZRowInner);
+#  endif
+                  zLeft += fdzInner;
+#endif
+#ifdef INTERP_W
+                  wLeft += dwInner;
+#endif
+#ifdef INTERP_FOG
+                  fogLeft += dfogInner;
+#endif
+#ifdef INTERP_RGB
+                  rLeft += fdrInner;
+                  gLeft += fdgInner;
+                  bLeft += fdbInner;
+#endif
+#ifdef INTERP_ALPHA
+                  aLeft += fdaInner;
+#endif
+#ifdef INTERP_SPEC
+                  srLeft += dsrInner;
+                  sgLeft += dsgInner;
+                  sbLeft += dsbInner;
+#endif
+#ifdef INTERP_INDEX
+                  iLeft += diInner;
+#endif
+#ifdef INTERP_INT_TEX
+                  sLeft += dsInner;
+                  tLeft += dtInner;
+#endif
+#ifdef INTERP_TEX
+                  TEX_UNIT_LOOP(
+                     sLeft[u] += dsInner[u];
+                     tLeft[u] += dtInner[u];
+                     uLeft[u] += duInner[u];
+                     vLeft[u] += dvInner[u];
+                  )
+#endif
+#ifdef INTERP_VARYING
+                  VARYING_LOOP(
+                     varLeft[iv][ic] += dvarInner[iv][ic];
+                  )
+#endif
+               }
+            } /*while lines>0*/
+
+         } /* for subTriangle */
+
+      }
+#ifdef CLEANUP_CODE
+      CLEANUP_CODE
+#endif
+   }
+}
+
+#undef SETUP_CODE
+#undef CLEANUP_CODE
+#undef RENDER_SPAN
+
+#undef PIXEL_TYPE
+#undef BYTES_PER_ROW
+#undef PIXEL_ADDRESS
+#undef DEPTH_TYPE
+
+#undef INTERP_Z
+#undef INTERP_W
+#undef INTERP_FOG
+#undef INTERP_RGB
+#undef INTERP_ALPHA
+#undef INTERP_SPEC
+#undef INTERP_INDEX
+#undef INTERP_INT_TEX
+#undef INTERP_TEX
+#undef INTERP_MULTITEX
+#undef INTERP_VARYING
+#undef TEX_UNIT_LOOP
+#undef VARYING_LOOP
+
+#undef S_SCALE
+#undef T_SCALE
+
+#undef FixedToDepth
+#undef ColorTemp
+#undef GLinterp
+#undef InterpToInt
+#undef INTERP_ONE
+
+#undef NAME
diff --git a/dist/Mesa/src/mesa/swrast/s_zoom.c b/dist/Mesa/src/mesa/swrast/s_zoom.c
new file mode 100644
index 000000000..3fd914083
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_zoom.c
@@ -0,0 +1,398 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "glheader.h"
+#include "macros.h"
+#include "imports.h"
+#include "colormac.h"
+
+#include "s_context.h"
+#include "s_span.h"
+#include "s_stencil.h"
+#include "s_zoom.h"
+
+
+/**
+ * Compute the bounds of the region resulting from zooming a pixel span.
+ * The resulting region will be entirely inside the window/scissor bounds
+ * so no additional clipping is needed.
+ * \param imageX, imageY  position of the overall image being drawn
+ * \param spanX, spanY  position of span being drawing
+ * \param x0, x1  returned X bounds of zoomed region [x0, x1)
+ * \param y0, y1  returned Y bounds of zoomed region [y0, y1)
+ * \return GL_TRUE if any zoomed pixels visible, GL_FALSE if totally clipped
+ */
+static GLboolean
+compute_zoomed_bounds(GLcontext *ctx, GLint imageX, GLint imageY,
+                      GLint spanX, GLint spanY, GLint width,
+                      GLint *x0, GLint *x1, GLint *y0, GLint *y1)
+{
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLint c0, c1, r0, r1;
+
+   ASSERT(spanX >= imageX);
+   ASSERT(spanY >= imageY);
+
+   /*
+    * Compute destination columns: [c0, c1)
+    */
+   c0 = imageX + (GLint) ((spanX - imageX) * ctx->Pixel.ZoomX);
+   c1 = imageX + (GLint) ((spanX + width - imageX) * ctx->Pixel.ZoomX);
+   if (c1 < c0) {
+      /* swap */
+      GLint tmp = c1;
+      c1 = c0;
+      c0 = tmp;
+   }
+   c0 = CLAMP(c0, fb->_Xmin, fb->_Xmax);
+   c1 = CLAMP(c1, fb->_Xmin, fb->_Xmax);
+   if (c0 == c1) {
+      return GL_FALSE; /* no width */
+   }
+
+   /*
+    * Compute destination rows: [r0, r1)
+    */
+   r0 = imageY + (GLint) ((spanY - imageY) * ctx->Pixel.ZoomY);
+   r1 = imageY + (GLint) ((spanY + 1 - imageY) * ctx->Pixel.ZoomY);
+   if (r1 < r0) {
+      /* swap */
+      GLint tmp = r1;
+      r1 = r0;
+      r0 = tmp;
+   }
+   r0 = CLAMP(r0, fb->_Ymin, fb->_Ymax);
+   r1 = CLAMP(r1, fb->_Ymin, fb->_Ymax);
+   if (r0 == r1) {
+      return GL_FALSE; /* no height */
+   }
+
+   *x0 = c0;
+   *x1 = c1;
+   *y0 = r0;
+   *y1 = r1;
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Can use this for unzooming X or Y values.
+ */
+static INLINE GLint
+unzoom_x(GLfloat zoomX, GLint imageX, GLint zx)
+{
+   /*
+   zx = imageX + (x - imageX) * zoomX;
+   zx - imageX = (x - imageX) * zoomX;
+   (zx - imageX) / zoomX = x - imageX;
+   */
+   GLint x = imageX + (GLint) ((zx - imageX) / zoomX);
+   return x;
+}
+
+
+
+/**
+ * Helper function called from _swrast_write_zoomed_rgba/rgb/
+ * index/depth_span().
+ */
+static void
+zoom_span( GLcontext *ctx, GLint imgX, GLint imgY, const struct sw_span *span,
+           const GLvoid *src, GLenum format )
+{
+   struct sw_span zoomed;
+   struct span_arrays zoomed_arrays;  /* this is big! */
+   GLint x0, x1, y0, y1;
+   GLint zoomedWidth;
+
+   if (!compute_zoomed_bounds(ctx, imgX, imgY, span->x, span->y, span->end,
+                              &x0, &x1, &y0, &y1)) {
+      return;  /* totally clipped */
+   }
+
+   zoomedWidth = x1 - x0;
+   ASSERT(zoomedWidth > 0);
+   ASSERT(zoomedWidth <= MAX_WIDTH);
+
+   /* no pixel arrays! must be horizontal spans. */
+   ASSERT((span->arrayMask & SPAN_XY) == 0);
+   ASSERT(span->primitive == GL_BITMAP);
+
+   INIT_SPAN(zoomed, GL_BITMAP, 0, 0, 0);
+   zoomed.x = x0;
+   zoomed.end = zoomedWidth;
+   zoomed.array = &zoomed_arrays;
+
+   /* copy fog interp info */
+   zoomed.fog = span->fog;
+   zoomed.fogStep = span->fogStep;
+   /* XXX copy texcoord info? */
+
+   if (format == GL_RGBA || format == GL_RGB) {
+      /* copy Z info */
+      zoomed.z = span->z;
+      zoomed.zStep = span->zStep;
+      /* we'll generate an array of colorss */
+      zoomed.interpMask = span->interpMask & ~SPAN_RGBA;
+      zoomed.arrayMask |= SPAN_RGBA;
+      ASSERT(span->arrayMask & SPAN_RGBA);
+   }
+   else if (format == GL_COLOR_INDEX) {
+      /* copy Z info */
+      zoomed.z = span->z;
+      zoomed.zStep = span->zStep;
+      /* we'll generate an array of color indexes */
+      zoomed.interpMask = span->interpMask & ~SPAN_INDEX;
+      zoomed.arrayMask |= SPAN_INDEX;
+      ASSERT(span->arrayMask & SPAN_INDEX);
+   }
+   else if (format == GL_DEPTH_COMPONENT) {
+      /* Copy color info */
+      zoomed.red = span->red;
+      zoomed.green = span->green;
+      zoomed.blue = span->blue;
+      zoomed.alpha = span->alpha;
+      zoomed.redStep = span->redStep;
+      zoomed.greenStep = span->greenStep;
+      zoomed.blueStep = span->blueStep;
+      zoomed.alphaStep = span->alphaStep;
+      /* we'll generate an array of depth values */
+      zoomed.interpMask = span->interpMask & ~SPAN_Z;
+      zoomed.arrayMask |= SPAN_Z;
+      ASSERT(span->arrayMask & SPAN_Z);
+   }
+   else {
+      _mesa_problem(ctx, "Bad format in zoom_span");
+      return;
+   }
+
+   /* zoom the span horizontally */
+   if (format == GL_RGBA) {
+      const GLchan (*rgba)[4] = (const GLchan (*)[4]) src;
+      GLint i;
+      for (i = 0; i < zoomedWidth; i++) {
+         GLint j = unzoom_x(ctx->Pixel.ZoomX, imgX, x0 + i) - span->x;
+         ASSERT(j >= 0);
+         ASSERT(j < span->end);
+         COPY_CHAN4(zoomed.array->rgba[i], rgba[j]);
+      }
+   }
+   else if (format == GL_RGB) {
+      const GLchan (*rgb)[3] = (const GLchan (*)[3]) src;
+      GLint i;
+      for (i = 0; i < zoomedWidth; i++) {
+         GLint j = unzoom_x(ctx->Pixel.ZoomX, imgX, x0 + i) - span->x;
+         ASSERT(j >= 0);
+         ASSERT(j < span->end);
+         zoomed.array->rgba[i][0] = rgb[j][0];
+         zoomed.array->rgba[i][1] = rgb[j][1];
+         zoomed.array->rgba[i][2] = rgb[j][2];
+         zoomed.array->rgba[i][3] = CHAN_MAX;
+      }
+   }
+   else if (format == GL_COLOR_INDEX) {
+      const GLuint *indexes = (const GLuint *) src;
+      GLint i;
+      for (i = 0; i < zoomedWidth; i++) {
+         GLint j = unzoom_x(ctx->Pixel.ZoomX, imgX, x0 + i) - span->x;
+         ASSERT(j >= 0);
+         ASSERT(j < span->end);
+         zoomed.array->index[i] = indexes[j];
+      }
+   }
+   else if (format == GL_DEPTH_COMPONENT) {
+      const GLuint *zValues = (const GLuint *) src;
+      GLint i;
+      for (i = 0; i < zoomedWidth; i++) {
+         GLint j = unzoom_x(ctx->Pixel.ZoomX, imgX, x0 + i) - span->x;
+         ASSERT(j >= 0);
+         ASSERT(j < span->end);
+         zoomed.array->z[i] = zValues[j];
+      }
+      /* Now, fall into either the RGB or COLOR_INDEX path below */
+      format = ctx->Visual.rgbMode ? GL_RGBA : GL_COLOR_INDEX;
+   }
+
+   /* write the span in rows [r0, r1) */
+   if (format == GL_RGBA || format == GL_RGB) {
+      /* Writing the span may modify the colors, so make a backup now if we're
+       * going to call _swrast_write_zoomed_span() more than once.
+       * Also, clipping may change the span end value, so store it as well.
+       */
+      GLchan rgbaSave[MAX_WIDTH][4];
+      const GLint end = zoomed.end; /* save */
+      if (y1 - y0 > 1) {
+         MEMCPY(rgbaSave, zoomed.array->rgba, zoomed.end * 4 * sizeof(GLchan));
+      }
+      for (zoomed.y = y0; zoomed.y < y1; zoomed.y++) {
+         _swrast_write_rgba_span(ctx, &zoomed);
+         zoomed.end = end;  /* restore */
+         if (y1 - y0 > 1) {
+            /* restore the colors */
+            MEMCPY(zoomed.array->rgba, rgbaSave, zoomed.end*4 * sizeof(GLchan));
+         }
+      }
+   }
+   else if (format == GL_COLOR_INDEX) {
+      GLuint indexSave[MAX_WIDTH];
+      const GLint end = zoomed.end; /* save */
+      if (y1 - y0 > 1) {
+         MEMCPY(indexSave, zoomed.array->index, zoomed.end * sizeof(GLuint));
+      }
+      for (zoomed.y = y0; zoomed.y < y1; zoomed.y++) {
+         _swrast_write_index_span(ctx, &zoomed);
+         zoomed.end = end;  /* restore */
+         if (y1 - y0 > 1) {
+            /* restore the colors */
+            MEMCPY(zoomed.array->index, indexSave, zoomed.end * sizeof(GLuint));
+         }
+      }
+   }
+}
+
+
+void
+_swrast_write_zoomed_rgba_span( GLcontext *ctx, GLint imgX, GLint imgY,
+                               const struct sw_span *span,
+                               CONST GLchan rgba[][4])
+{
+   zoom_span(ctx, imgX, imgY, span, (const GLvoid *) rgba, GL_RGBA);
+}
+
+
+void
+_swrast_write_zoomed_rgb_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                              const struct sw_span *span,
+                              CONST GLchan rgb[][3])
+{
+   zoom_span(ctx, imgX, imgY, span, (const GLvoid *) rgb, GL_RGB);
+}
+
+
+void
+_swrast_write_zoomed_index_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                                const struct sw_span *span)
+{
+   zoom_span(ctx, imgX, imgY, span,
+             (const GLvoid *) span->array->index, GL_COLOR_INDEX);
+}
+
+
+void
+_swrast_write_zoomed_depth_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                                const struct sw_span *span)
+{
+   zoom_span(ctx, imgX, imgY, span,
+             (const GLvoid *) span->array->z, GL_DEPTH_COMPONENT);
+}
+
+
+/**
+ * Zoom/write stencil values.
+ * No per-fragment operations are applied.
+ */
+void
+_swrast_write_zoomed_stencil_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                                  GLint width, GLint spanX, GLint spanY,
+                                  const GLstencil stencil[])
+{
+   GLstencil zoomedVals[MAX_WIDTH];
+   GLint x0, x1, y0, y1, y;
+   GLint i, zoomedWidth;
+
+   if (!compute_zoomed_bounds(ctx, imgX, imgY, spanX, spanY, width,
+                              &x0, &x1, &y0, &y1)) {
+      return;  /* totally clipped */
+   }
+
+   zoomedWidth = x1 - x0;
+   ASSERT(zoomedWidth > 0);
+   ASSERT(zoomedWidth <= MAX_WIDTH);
+
+   /* zoom the span horizontally */
+   for (i = 0; i < zoomedWidth; i++) {
+      GLint j = unzoom_x(ctx->Pixel.ZoomX, imgX, x0 + i) - spanX;
+      ASSERT(j >= 0);
+      ASSERT(j < width);
+      zoomedVals[i] = stencil[j];
+   }
+
+   /* write the zoomed spans */
+   for (y = y0; y < y1; y++) {
+      _swrast_write_stencil_span(ctx, zoomedWidth, x0, y, zoomedVals);
+   }
+}
+
+
+/**
+ * Zoom/write z values (16 or 32-bit).
+ * No per-fragment operations are applied.
+ */
+void
+_swrast_write_zoomed_z_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                            GLint width, GLint spanX, GLint spanY,
+                            const GLvoid *z)
+{
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_DepthBuffer;
+   GLushort zoomedVals16[MAX_WIDTH];
+   GLuint zoomedVals32[MAX_WIDTH];
+   GLint x0, x1, y0, y1, y;
+   GLint i, zoomedWidth;
+
+   if (!compute_zoomed_bounds(ctx, imgX, imgY, spanX, spanY, width,
+                              &x0, &x1, &y0, &y1)) {
+      return;  /* totally clipped */
+   }
+
+   zoomedWidth = x1 - x0;
+   ASSERT(zoomedWidth > 0);
+   ASSERT(zoomedWidth <= MAX_WIDTH);
+
+   /* zoom the span horizontally */
+   if (rb->DataType == GL_UNSIGNED_SHORT) {
+      for (i = 0; i < zoomedWidth; i++) {
+         GLint j = unzoom_x(ctx->Pixel.ZoomX, imgX, x0 + i) - spanX;
+         ASSERT(j >= 0);
+         ASSERT(j < width);
+         zoomedVals16[i] = ((GLushort *) z)[j];
+      }
+      z = zoomedVals16;
+   }
+   else {
+      ASSERT(rb->DataType == GL_UNSIGNED_INT);
+      for (i = 0; i < zoomedWidth; i++) {
+         GLint j = unzoom_x(ctx->Pixel.ZoomX, imgX, x0 + i) - spanX;
+         ASSERT(j >= 0);
+         ASSERT(j < width);
+         zoomedVals32[i] = ((GLuint *) z)[j];
+      }
+      z = zoomedVals32;
+   }
+
+   /* write the zoomed spans */
+   for (y = y0; y < y1; y++) {
+      rb->PutRow(ctx, rb, zoomedWidth, x0, y, z, NULL);
+   }
+}
diff --git a/dist/Mesa/src/mesa/swrast/s_zoom.h b/dist/Mesa/src/mesa/swrast/s_zoom.h
new file mode 100644
index 000000000..d10c27024
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/s_zoom.h
@@ -0,0 +1,62 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef S_ZOOM_H
+#define S_ZOOM_H
+
+#include "mtypes.h"
+#include "swrast.h"
+
+
+extern void
+_swrast_write_zoomed_rgba_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                               const struct sw_span *span,
+                               CONST GLchan rgb[][4]);
+
+extern void
+_swrast_write_zoomed_rgb_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                              const struct sw_span *span,
+                              CONST GLchan rgb[][3]);
+
+extern void
+_swrast_write_zoomed_index_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                                const struct sw_span *span);
+
+extern void
+_swrast_write_zoomed_depth_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                                const struct sw_span *span);
+
+
+extern void
+_swrast_write_zoomed_stencil_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                                  GLint width, GLint spanX, GLint spanY,
+                                  const GLstencil stencil[]);
+
+extern void
+_swrast_write_zoomed_z_span(GLcontext *ctx, GLint imgX, GLint imgY,
+                            GLint width, GLint spanX, GLint spanY,
+                            const GLvoid *z);
+
+
+#endif
diff --git a/dist/Mesa/src/mesa/swrast/swrast.h b/dist/Mesa/src/mesa/swrast/swrast.h
new file mode 100644
index 000000000..2a2123425
--- /dev/null
+++ b/dist/Mesa/src/mesa/swrast/swrast.h
@@ -0,0 +1,280 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file swrast/swrast.h
+ * \brief Public interface to the software rasterization functions.
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SWRAST_H
+#define SWRAST_H
+
+#include "mtypes.h"
+
+/**
+ * \struct SWvertex
+ * \brief Data-structure to handle vertices in the software rasterizer.
+ * 
+ * The software rasterizer now uses this format for vertices.  Thus a
+ * 'RasterSetup' stage or other translation is required between the
+ * tnl module and the swrast rasterization functions.  This serves to
+ * isolate the swrast module from the internals of the tnl module, and
+ * improve its usefulness as a fallback mechanism for hardware
+ * drivers.
+ *
+ * Full software drivers:
+ *   - Register the rastersetup and triangle functions from
+ *     utils/software_helper.
+ *   - On statechange, update the rasterization pointers in that module.
+ *
+ * Rasterization hardware drivers:
+ *   - Keep native rastersetup.
+ *   - Implement native twoside,offset and unfilled triangle setup.
+ *   - Implement a translator from native vertices to swrast vertices.
+ *   - On partial fallback (mix of accelerated and unaccelerated
+ *   prims), call a pass-through function which translates native
+ *   vertices to SWvertices and calls the appropriate swrast function.
+ *   - On total fallback (vertex format insufficient for state or all
+ *     primitives unaccelerated), hook in swrast_setup instead.
+ */
+typedef struct {
+   /** win[0], win[1] are the screen-coords of SWvertex.
+    * win[2] is the z-buffer coord (if 16-bit Z buffer, in range [0,65535]).
+    * win[3] is 1/w where w is the clip-space W coord.  This is the value
+    * that clip{XYZ} were multiplied by to get ndc{XYZ}.
+    */
+   GLfloat win[4];
+   GLfloat texcoord[MAX_TEXTURE_COORD_UNITS][4];
+   GLchan color[4];
+   GLchan specular[4];
+   GLfloat fog;
+   GLfloat index;
+   GLfloat pointSize;
+   GLfloat attribute[MAX_VERTEX_ATTRIBS][4];
+} SWvertex;
+
+
+struct swrast_device_driver;
+
+
+/* These are the public-access functions exported from swrast.
+ */
+
+extern GLboolean
+_swrast_CreateContext( GLcontext *ctx );
+
+extern void
+_swrast_DestroyContext( GLcontext *ctx );
+
+/* Get a (non-const) reference to the device driver struct for swrast.
+ */
+extern struct swrast_device_driver *
+_swrast_GetDeviceDriverReference( GLcontext *ctx );
+
+extern void
+_swrast_Bitmap( GLcontext *ctx,
+		GLint px, GLint py,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap );
+
+extern void
+_swrast_CopyPixels( GLcontext *ctx,
+		    GLint srcx, GLint srcy,
+		    GLint destx, GLint desty,
+		    GLsizei width, GLsizei height,
+		    GLenum type );
+
+extern void
+_swrast_DrawPixels( GLcontext *ctx,
+		    GLint x, GLint y,
+		    GLsizei width, GLsizei height,
+		    GLenum format, GLenum type,
+		    const struct gl_pixelstore_attrib *unpack,
+		    const GLvoid *pixels );
+
+extern void
+_swrast_ReadPixels( GLcontext *ctx,
+		    GLint x, GLint y, GLsizei width, GLsizei height,
+		    GLenum format, GLenum type,
+		    const struct gl_pixelstore_attrib *unpack,
+		    GLvoid *pixels );
+
+extern void
+_swrast_BlitFramebuffer(GLcontext *ctx,
+                        GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                        GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                        GLbitfield mask, GLenum filter);
+
+extern void
+_swrast_Clear( GLcontext *ctx, GLbitfield mask, GLboolean all,
+	       GLint x, GLint y, GLint width, GLint height );
+
+extern void
+_swrast_Accum( GLcontext *ctx, GLenum op,
+	       GLfloat value, GLint xpos, GLint ypos,
+	       GLint width, GLint height );
+
+
+
+/* Reset the stipple counter
+ */
+extern void
+_swrast_ResetLineStipple( GLcontext *ctx );
+
+/* These will always render the correct point/line/triangle for the
+ * current state.
+ *
+ * For flatshaded primitives, the provoking vertex is the final one.
+ */
+extern void
+_swrast_Point( GLcontext *ctx, const SWvertex *v );
+
+extern void
+_swrast_Line( GLcontext *ctx, const SWvertex *v0, const SWvertex *v1 );
+
+extern void
+_swrast_Triangle( GLcontext *ctx, const SWvertex *v0,
+                  const SWvertex *v1, const SWvertex *v2 );
+
+extern void
+_swrast_Quad( GLcontext *ctx,
+              const SWvertex *v0, const SWvertex *v1,
+	      const SWvertex *v2,  const SWvertex *v3);
+
+extern void
+_swrast_flush( GLcontext *ctx );
+
+extern void
+_swrast_render_primitive( GLcontext *ctx, GLenum mode );
+
+extern void
+_swrast_render_start( GLcontext *ctx );
+
+extern void
+_swrast_render_finish( GLcontext *ctx );
+
+/* Tell the software rasterizer about core state changes.
+ */
+extern void
+_swrast_InvalidateState( GLcontext *ctx, GLbitfield new_state );
+
+/* Configure software rasterizer to match hardware rasterizer characteristics:
+ */
+extern void
+_swrast_allow_vertex_fog( GLcontext *ctx, GLboolean value );
+
+extern void
+_swrast_allow_pixel_fog( GLcontext *ctx, GLboolean value );
+
+/* Debug:
+ */
+extern void
+_swrast_print_vertex( GLcontext *ctx, const SWvertex *v );
+
+
+/*
+ * Imaging fallbacks (a better solution should be found, perhaps
+ * moving all the imaging fallback code to a new module) 
+ */
+extern void
+_swrast_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target, 
+				GLenum internalFormat, 
+				GLint x, GLint y, GLsizei width, 
+				GLsizei height);
+extern void
+_swrast_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target, 
+				GLenum internalFormat, 
+				GLint x, GLint y, GLsizei width);
+extern void
+_swrast_CopyColorSubTable( GLcontext *ctx,GLenum target, GLsizei start,
+			   GLint x, GLint y, GLsizei width);
+extern void
+_swrast_CopyColorTable( GLcontext *ctx, 
+			GLenum target, GLenum internalformat,
+			GLint x, GLint y, GLsizei width);
+
+
+/*
+ * Texture fallbacks.  Could also live in a new module
+ * with the rest of the texture store fallbacks?
+ */
+extern void
+_swrast_copy_teximage1d(GLcontext *ctx, GLenum target, GLint level,
+                        GLenum internalFormat,
+                        GLint x, GLint y, GLsizei width, GLint border);
+
+extern void
+_swrast_copy_teximage2d(GLcontext *ctx, GLenum target, GLint level,
+                        GLenum internalFormat,
+                        GLint x, GLint y, GLsizei width, GLsizei height,
+                        GLint border);
+
+
+extern void
+_swrast_copy_texsubimage1d(GLcontext *ctx, GLenum target, GLint level,
+                           GLint xoffset, GLint x, GLint y, GLsizei width);
+
+extern void
+_swrast_copy_texsubimage2d(GLcontext *ctx,
+                           GLenum target, GLint level,
+                           GLint xoffset, GLint yoffset,
+                           GLint x, GLint y, GLsizei width, GLsizei height);
+
+extern void
+_swrast_copy_texsubimage3d(GLcontext *ctx,
+                           GLenum target, GLint level,
+                           GLint xoffset, GLint yoffset, GLint zoffset,
+                           GLint x, GLint y, GLsizei width, GLsizei height);
+
+
+extern void
+_swrast_eject_texture_images(GLcontext *ctx);
+
+
+/**
+ * The driver interface for the software rasterizer.
+ * XXX this may go away.
+ * We may move these functions to ctx->Driver.RenderStart, RenderEnd.
+ */
+struct swrast_device_driver {
+   /*
+    * These are called before and after accessing renderbuffers during
+    * software rasterization.
+    *
+    * These are a suitable place for grabbing/releasing hardware locks.
+    *
+    * NOTE: The swrast triangle/line/point routines *DO NOT* call
+    * these functions.  Locking in that case must be organized by the
+    * driver by other mechanisms.
+    */
+   void (*SpanRenderStart)(GLcontext *ctx);
+   void (*SpanRenderFinish)(GLcontext *ctx);
+};
+
+
+
+#endif
author	Matthieu Herrb <matthieu@cvs.openbsd.org>	2006-11-25 18:55:19 +0000
committer	Matthieu Herrb <matthieu@cvs.openbsd.org>	2006-11-25 18:55:19 +0000
commit	0641eddccd060a4ae333378927ffb3d9c34ddb98 (patch)
tree	4a34bd4f0351f8b1cb676d91595b5b8e14b3c563 /dist/Mesa/src/mesa/swrast
parent	d2ecd06125d3ab42e53ffdd86d7d3103a57e810b (diff)