diff options
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | src/sna/Makefile.am | 4 | ||||
-rw-r--r-- | src/sna/brw/Makefile.am | 42 | ||||
-rw-r--r-- | src/sna/brw/brw_disasm.c | 1101 | ||||
-rw-r--r-- | src/sna/brw/brw_eu.c | 150 | ||||
-rw-r--r-- | src/sna/brw/brw_eu.h | 2266 | ||||
-rw-r--r-- | src/sna/brw/brw_eu_debug.c | 95 | ||||
-rw-r--r-- | src/sna/brw/brw_eu_emit.c | 2002 | ||||
-rw-r--r-- | src/sna/brw/brw_eu_util.c | 126 |
9 files changed, 5785 insertions, 2 deletions
diff --git a/configure.ac b/configure.ac index 9945d5b8..2a8d08b5 100644 --- a/configure.ac +++ b/configure.ac @@ -391,6 +391,7 @@ AC_CONFIG_FILES([ src/legacy/i810/Makefile src/legacy/i810/xvmc/Makefile src/sna/Makefile + src/sna/brw/Makefile src/sna/fb/Makefile man/Makefile src/render_program/Makefile diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am index 8463a80b..306996b5 100644 --- a/src/sna/Makefile.am +++ b/src/sna/Makefile.am @@ -18,7 +18,7 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -SUBDIRS = fb +SUBDIRS = brw fb AM_CFLAGS = \ @CWARNFLAGS@ \ @@ -34,7 +34,7 @@ AM_CFLAGS += @VALGRIND_CFLAGS@ endif noinst_LTLIBRARIES = libsna.la -libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ fb/libfb.la +libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la fb/libfb.la libsna_la_SOURCES = \ blt.c \ diff --git a/src/sna/brw/Makefile.am b/src/sna/brw/Makefile.am new file mode 100644 index 00000000..edb3db4f --- /dev/null +++ b/src/sna/brw/Makefile.am @@ -0,0 +1,42 @@ + +# Copyright 2005 Adam Jackson. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +noinst_LTLIBRARIES = libbrw.la + +AM_CFLAGS = \ + @CWARNFLAGS@ \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/render_program \ + @XORG_CFLAGS@ \ + @UDEV_CFLAGS@ \ + @DRM_CFLAGS@ \ + $(NULL) + +if DEBUG +AM_CFLAGS += @VALGRIND_CFLAGS@ +endif + +libbrw_la_SOURCES = \ + brw_disasm.c \ + brw_eu.h \ + brw_eu.c \ + brw_eu_emit.c \ + $(NULL) diff --git a/src/sna/brw/brw_disasm.c b/src/sna/brw/brw_disasm.c new file mode 100644 index 00000000..106eed33 --- /dev/null +++ b/src/sna/brw/brw_disasm.c @@ -0,0 +1,1101 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +#include "brw_eu.h" + +static const struct { + const char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +static const char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +static const char *negate[2] = { + [0] = "", + [1] = "-", +}; + +static const char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +static const char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +static const char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +static const char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +static const char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +#if 0 +static const char *dest_condmod[16] = { +}; + +static const char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [6] = "V", + [7] = "F" +}; +#endif + +static const char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +static const char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +static const char *accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + +static const char *wectrl[2] = { + [0] = "WE_normal", + [1] = "WE_all" +}; + +static const char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +static const char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +static const char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +static const char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +static const char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +static const char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", + [3] = "compr4", +}; + +static const char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +static const char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +static const char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +static const char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +static const int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + +static const char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +static const char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +static const char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +static const char *target_function[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_DATAPORT_READ] = "read", + [BRW_SFID_DATAPORT_WRITE] = "write", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner" +}; + +static const char *target_function_gen6[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner", + [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler", + [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render", + [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const", + [GEN7_SFID_DATAPORT_DATA_CACHE] = "data" +}; + +static const char *dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + +static const char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod", +}; + +static const char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +static const char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +static const char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +static const char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +static const char *urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + +static const char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +static const char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +static const char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +static const char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +static const char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + +static int column; + +static int string(FILE *file, const char *str) +{ + fputs(str, file); + column += strlen(str); + return 0; +} + +static int format(FILE *f, const char *fmt, ...) +{ + char buf[1024]; + va_list args; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf) - 1, fmt, args); + va_end(args); + + string(f, buf); + return 0; +} + +static void newline(FILE *f) +{ + putc('\n', f); + column = 0; +} + +static void pad(FILE *f, int c) +{ + do + string(f, " "); + while (column < c); +} + +static void control(FILE *file, const char *name, const char *ctrl[], unsigned id, int *space) +{ + if (!ctrl[id]) { + fprintf(file, "*** invalid %s value %d ", + name, id); + assert(0); + } + if (ctrl[id][0]) { + if (space && *space) + string(file, " "); + string(file, ctrl[id]); + if (space) + *space = 1; + } +} + +static void print_opcode(FILE *file, int id) +{ + if (!opcode[id].name) { + format(file, "*** invalid opcode value %d ", id); + assert(0); + } + string(file, opcode[id].name); +} + +static int reg(FILE *file, unsigned _reg_file, unsigned _reg_nr) +{ + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string(file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format(file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format(file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_FLAG: + format(file, "f%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format(file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format(file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format(file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format(file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format(file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string(file, "ip"); + return -1; + default: + format(file, "ARF%d", _reg_nr); + break; + } + } else { + control(file, "src reg file", reg_file, _reg_file, NULL); + format(file, "%d", _reg_nr); + } + return 0; +} + +static void dest(FILE *file, const struct brw_instruction *inst) +{ + if (inst->header.access_mode == BRW_ALIGN_1) { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) { + if (reg(file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr)) + return; + + if (inst->bits1.da1.dest_subreg_nr) + format(file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); + format(file, "<%d>", inst->bits1.da1.dest_horiz_stride); + control(file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } else { + string(file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format(file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); + if (inst->bits1.ia1.dest_indirect_offset) + format(file, " %d", inst->bits1.ia1.dest_indirect_offset); + string(file, "]"); + format(file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + control(file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } else { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) { + if (reg(file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr)) + return; + + if (inst->bits1.da16.dest_subreg_nr) + format(file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); + string(file, "<1>"); + control(file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + control(file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } else { + string(file, "Indirect align16 address mode not supported"); + } + } +} + +static void src_align1_region(FILE *file, + unsigned _vert_stride, unsigned _width, unsigned _horiz_stride) +{ + string(file, "<"); + control(file, "vert stride", vert_stride, _vert_stride, NULL); + string(file, ","); + control(file, "width", width, _width, NULL); + string(file, ","); + control(file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string(file, ">"); +} + +static void src_da1(FILE *file, unsigned type, unsigned _reg_file, + unsigned _vert_stride, unsigned _width, unsigned _horiz_stride, + unsigned reg_num, unsigned sub_reg_num, unsigned __abs, unsigned _negate) +{ + control(file, "negate", negate, _negate, NULL); + control(file, "abs", _abs, __abs, NULL); + + if (reg(file, _reg_file, reg_num)) + return; + + if (sub_reg_num) + format(file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ + src_align1_region(file, _vert_stride, _width, _horiz_stride); + control(file, "src reg encoding", reg_encoding, type, NULL); +} + +static void src_ia1(FILE *file, + unsigned type, + unsigned _reg_file, + int _addr_imm, + unsigned _addr_subreg_nr, + unsigned _negate, + unsigned __abs, + unsigned _addr_mode, + unsigned _horiz_stride, + unsigned _width, + unsigned _vert_stride) +{ + control(file, "negate", negate, _negate, NULL); + control(file, "abs", _abs, __abs, NULL); + + string(file, "g[a0"); + if (_addr_subreg_nr) + format(file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format(file, " %d", _addr_imm); + string(file, "]"); + src_align1_region(file, _vert_stride, _width, _horiz_stride); + control(file, "src reg encoding", reg_encoding, type, NULL); +} + +static void src_da16(FILE *file, + unsigned _reg_type, + unsigned _reg_file, + unsigned _vert_stride, + unsigned _reg_nr, + unsigned _subreg_nr, + unsigned __abs, + unsigned _negate, + unsigned swz_x, + unsigned swz_y, + unsigned swz_z, + unsigned swz_w) +{ + control(file, "negate", negate, _negate, NULL); + control(file, "abs", _abs, __abs, NULL); + + if (reg(file, _reg_file, _reg_nr)) + return; + + if (_subreg_nr) + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format(file, ".%d", 16 / reg_type_size[_reg_type]); + string(file, "<"); + control(file, "vert stride", vert_stride, _vert_stride, NULL); + string(file, ",4,1>"); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string(file, "."); + control(file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string(file, "."); + control(file, "channel select", chan_sel, swz_x, NULL); + control(file, "channel select", chan_sel, swz_y, NULL); + control(file, "channel select", chan_sel, swz_z, NULL); + control(file, "channel select", chan_sel, swz_w, NULL); + } + control(file, "src da16 reg type", reg_encoding, _reg_type, NULL); +} + +static void imm(FILE *file, unsigned type, const struct brw_instruction *inst) +{ + switch (type) { + case BRW_REGISTER_TYPE_UD: + format(file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format(file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format(file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format(file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format(file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format(file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format(file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format(file, "%-gF", inst->bits3.f); + } +} + +static void src0(FILE *file, const struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + imm(file, inst->bits1.da1.src0_reg_type, inst); + else if (inst->header.access_mode == BRW_ALIGN_1) { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) { + src_da1(file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } else { + src_ia1(file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } else { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) { + src_da16(file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } else { + string(file, "Indirect align16 address mode not supported"); + } + } +} + +static void src1(FILE *file, const struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + imm(file, inst->bits1.da1.src1_reg_type, inst); + else if (inst->header.access_mode == BRW_ALIGN_1) { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) { + src_da1(file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } else { + src_ia1(file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } else { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) { + src_da16(file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } else { + string(file, "Indirect align16 address mode not supported"); + } + } +} + +static const int esize[6] = { + [0] = 1, + [1] = 2, + [2] = 4, + [3] = 8, + [4] = 16, + [5] = 32, +}; + +static int qtr_ctrl(FILE *file, const struct brw_instruction *inst) +{ + int qtr_ctl = inst->header.compression_control; + int size = esize[inst->header.execution_size]; + + if (size == 8) { + switch (qtr_ctl) { + case 0: + string(file, " 1Q"); + break; + case 1: + string(file, " 2Q"); + break; + case 2: + string(file, " 3Q"); + break; + case 3: + string(file, " 4Q"); + break; + } + } else if (size == 16){ + if (qtr_ctl < 2) + string(file, " 1H"); + else + string(file, " 2H"); + } + return 0; +} + +void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) +{ + int space = 0; + + format(file, "%08x %08x %08x %08x\n", + ((uint32_t*)inst)[0], + ((uint32_t*)inst)[1], + ((uint32_t*)inst)[2], + ((uint32_t*)inst)[3]); + + if (inst->header.predicate_control) { + string(file, "("); + control(file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string(file, "f0"); + if (inst->bits2.da1.flag_subreg_nr) + format(file, ".%d", inst->bits2.da1.flag_subreg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + control(file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + control(file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string(file, ") "); + } + + print_opcode(file, inst->header.opcode); + control(file, "saturate", saturate, inst->header.saturate, NULL); + control(file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode == BRW_OPCODE_MATH) { + string(file, " "); + control(file, "function", math_function, + inst->header.destreg__conditionalmod, NULL); + } else if (inst->header.opcode != BRW_OPCODE_SEND && + inst->header.opcode != BRW_OPCODE_SENDC) + control(file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string(file, "("); + control(file, "execution size", exec_size, inst->header.execution_size, NULL); + string(file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND && gen < 60) + format(file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad(file, 16); + dest(file, inst); + } else if (gen >= 60 && (inst->header.opcode == BRW_OPCODE_IF || + inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format(file, " %d", inst->bits1.branch_gen6.jump_count); + } + + if (opcode[inst->header.opcode].nsrc > 0) { + pad(file, 32); + src0(file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad(file, 48); + src1(file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) { + enum brw_message_target target; + + if (gen >= 60) + target = inst->header.destreg__conditionalmod; + else if (gen >= 50) + target = inst->bits2.send_gen5.sfid; + else + target = inst->bits3.generic.msg_target; + + newline (file); + pad (file, 16); + space = 0; + + if (gen >= 60) { + control (file, "target function", target_function_gen6, + target, &space); + } else { + control (file, "target function", target_function, + target, &space); + } + + switch (target) { + case BRW_SFID_MATH: + control (file, "math function", math_function, + inst->bits3.math.function, &space); + control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_SFID_SAMPLER: + if (gen >= 70) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen7.binding_table_index, + inst->bits3.sampler_gen7.sampler, + inst->bits3.sampler_gen7.msg_type, + inst->bits3.sampler_gen7.simd_mode); + } else if (gen >= 50) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (gen >= 45) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + control (file, "sampler target format", + sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } + break; + case BRW_SFID_DATAPORT_READ: + if (gen >= 60) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else if (gen >= 45) { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read_gen5.binding_table_index, + inst->bits3.dp_read_gen5.msg_control, + inst->bits3.dp_read_gen5.msg_type); + } else { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read.binding_table_index, + inst->bits3.dp_read.msg_control, + inst->bits3.dp_read.msg_type); + } + break; + + case BRW_SFID_DATAPORT_WRITE: + if (gen >= 70) { + format (file, " ("); + + control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen7_dp.msg_type, &space); + + format (file, ", %d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + } else if (gen >= 60) { + format (file, " ("); + + control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else { + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.last_render_target << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + } + break; + + case BRW_SFID_URB: + if (gen >= 50) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } + + space = 1; + if (gen >= 50) { + control (file, "urb opcode", urb_opcode, + inst->bits3.urb_gen5.opcode, &space); + } + control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_SFID_THREAD_SPAWNER: + break; + case GEN7_SFID_DATAPORT_DATA_CACHE: + format (file, " (%d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + break; + + + default: + format (file, "unsupported target %d", target); + break; + } + if (space) + string (file, " "); + if (gen >= 50) { + format (file, "mlen %d", + inst->bits3.generic_gen5.msg_length); + format (file, " rlen %d", + inst->bits3.generic_gen5.response_length); + } else { + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + } + pad(file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string(file, "{"); + space = 1; + control(file, "access mode", access_mode, inst->header.access_mode, &space); + if (gen >= 60) + control(file, "write enable control", wectrl, inst->header.mask_control, &space); + else + control(file, "mask control", mask_ctrl, inst->header.mask_control, &space); + control(file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + + if (gen >= 60) + qtr_ctrl(file, inst); + else { + if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED && + opcode[inst->header.opcode].ndst > 0 && + inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE && + inst->bits1.da1.dest_reg_nr & (1 << 7)) { + format(file, " compr4"); + } else { + control(file, "compression control", compr_ctrl, + inst->header.compression_control, &space); + } + } + + control(file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (gen >= 60) + control(file, "acc write control", accwr, inst->header.acc_wr_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) + control(file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string(file, " "); + string(file, "}"); + } + string(file, ";"); + newline(file); +} diff --git a/src/sna/brw/brw_eu.c b/src/sna/brw/brw_eu.c new file mode 100644 index 00000000..7c32ea19 --- /dev/null +++ b/src/sna/brw/brw_eu.c @@ -0,0 +1,150 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_eu.h" + +#include <string.h> +#include <stdlib.h> + +/* Returns the corresponding conditional mod for swapping src0 and + * src1 in e.g. CMP. + */ +uint32_t +brw_swap_cmod(uint32_t cmod) +{ + switch (cmod) { + case BRW_CONDITIONAL_Z: + case BRW_CONDITIONAL_NZ: + return cmod; + case BRW_CONDITIONAL_G: + return BRW_CONDITIONAL_LE; + case BRW_CONDITIONAL_GE: + return BRW_CONDITIONAL_L; + case BRW_CONDITIONAL_L: + return BRW_CONDITIONAL_GE; + case BRW_CONDITIONAL_LE: + return BRW_CONDITIONAL_G; + default: + return ~0; + } +} + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_compression_control(struct brw_compile *p, + enum brw_compression compression_control) +{ + p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); + + if (p->gen >= 60) { + /* Since we don't use the 32-wide support in gen6, we translate + * the pre-gen6 compression control here. + */ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1Q; + break; + case BRW_COMPRESSION_2NDHALF: + /* For 8-wide, this is "use the second set of 8 bits." */ + p->current->header.compression_control = GEN6_COMPRESSION_2Q; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For 16-wide instruction compression, use the first set of 16 bits + * since we don't do 32-wide dispatch. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + default: + assert(!"not reached"); + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + } + } else { + p->current->header.compression_control = compression_control; + } +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->compressed_stack[p->current - p->stack] = p->compressed; + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; + p->compressed = p->compressed_stack[p->current - p->stack]; +} + +void brw_compile_init(struct brw_compile *p, int gen, void *store) +{ + assert(gen); + + p->gen = gen; + p->store = store; + + p->nr_insn = 0; + p->current = p->stack; + p->compressed = false; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); + + p->if_stack_depth = 0; + p->if_stack_array_size = 0; + p->if_stack = NULL; +} diff --git a/src/sna/brw/brw_eu.h b/src/sna/brw/brw_eu.h new file mode 100644 index 00000000..65e66d5e --- /dev/null +++ b/src/sna/brw/brw_eu.h @@ -0,0 +1,2266 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + +#define WRITEMASK_X 0x1 +#define WRITEMASK_Y 0x2 +#define WRITEMASK_Z 0x4 +#define WRITEMASK_W 0x8 + +#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y) +#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z) +#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +enum brw_compression { + BRW_COMPRESSION_NONE, + BRW_COMPRESSION_2NDHALF, + BRW_COMPRESSION_COMPRESSED, +}; + +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_R 7 +#define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +/** @{ + * + * Gen6 has replaced "mask enable/disable" with WECtrl, which is + * effectively the same but much simpler to think about. Now, there + * are two contributors ANDed together to whether channels are + * executed: The predication on the instruction, and the channel write + * enable. + */ +/** + * This is the default value. It means that a channel's write enable is set + * if the per-channel IP is pointing at this instruction. + */ +#define BRW_WE_NORMAL 0 +/** + * This is used like BRW_MASK_DISABLE, and causes all channels to have + * their write enable set. Note that predication still contributes to + * whether the channel actually gets written. + */ +#define BRW_WE_ALL 1 +/** @} */ + +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXF, + FS_OPCODE_TXL, + FS_OPCODE_TXS, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, + VS_OPCODE_PULL_CONSTANT_LOAD, +}; + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_MRF_COMPR4 (1 << 7) + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4 +#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5 +#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 + +/* for GEN5 only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +/* This one stays the same across generations. */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +/* GEN4 */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 +/* G45, GEN5 */ +#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 +#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 +/* GEN6 */ +#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +/** + * Message target: Shared Function ID for where to SEND a message. + * + * These are enumerated in the ISA reference under "send - Send Message". + * In particular, see the following tables: + * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" + * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" + * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / + * Overview / GPE Function IDs + */ +enum brw_message_target { + BRW_SFID_NULL = 0, + BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ + BRW_SFID_SAMPLER = 2, + BRW_SFID_MESSAGE_GATEWAY = 3, + BRW_SFID_DATAPORT_READ = 4, + BRW_SFID_DATAPORT_WRITE = 5, + BRW_SFID_URB = 6, + BRW_SFID_THREAD_SPAWNER = 7, + + GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, + GEN6_SFID_DATAPORT_RENDER_CACHE = 5, + GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, + + GEN7_SFID_DATAPORT_DATA_CACHE = 10, +}; + +#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +/* GEN6 */ +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 +#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 +#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + +#define REG_SIZE (8*4) + +struct brw_instruction { + struct { + unsigned opcode:7; + unsigned pad:1; + unsigned access_mode:1; + unsigned mask_control:1; + unsigned dependency_control:2; + unsigned compression_control:2; /* gen6: quater control */ + unsigned thread_control:2; + unsigned predicate_control:4; + unsigned predicate_inverse:1; + unsigned execution_size:3; + /** + * Conditional Modifier for most instructions. On Gen6+, this is also + * used for the SEND instruction's Message Target/SFID. + */ + unsigned destreg__conditionalmod:4; + unsigned acc_wr_control:1; + unsigned cmpt_control:1; + unsigned debug_control:1; + unsigned saturate:1; + } header; + + union { + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_subreg_nr:5; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da1; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; /* 0x00000c00 */ + unsigned src1_reg_type:3; /* 0x00007000 */ + unsigned pad:1; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia1; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:1; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da16; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad0:6; + unsigned dest_writemask:4; + int dest_indirect_offset:6; + unsigned dest_subreg_nr:3; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia16; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + + int jump_count:16; + } branch_gen6; + + struct { + unsigned dest_reg_file:1; + unsigned flag_subreg_num:1; + unsigned pad0:2; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src2_abs:1; + unsigned src2_negate:1; + unsigned pad1:7; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:3; + unsigned dest_reg_nr:8; + } da3src; + } bits1; + + + union { + struct { + unsigned src0_subreg_nr:5; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad:5; + } da1; + + struct { + int src0_indirect_offset:10; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad:5; + } ia1; + + struct { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + unsigned src0_subreg_nr:1; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } da16; + + struct { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + int src0_indirect_offset:6; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } ia16; + + /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. + * + * Does not apply to Gen6+. The SFID/message target moved to bits + * 27:24 of the header (destreg__conditionalmod); EOT is in bits3. + */ + struct { + unsigned pad:26; + unsigned end_of_thread:1; + unsigned pad1:1; + unsigned sfid:4; + } send_gen5; /* for Ironlake only */ + + struct { + unsigned src0_rep_ctrl:1; + unsigned src0_swizzle:8; + unsigned src0_subreg_nr:3; + unsigned src0_reg_nr:8; + unsigned pad0:1; + unsigned src1_rep_ctrl:1; + unsigned src1_swizzle:8; + unsigned src1_subreg_nr_low:2; + } da3src; + } bits2; + + union { + struct { + unsigned src1_subreg_nr:5; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad0:7; + } da1; + + struct { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + unsigned src1_subreg_nr:1; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } da16; + + struct { + int src1_indirect_offset:10; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } ia1; + + struct { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + int src1_indirect_offset:6; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad2:5; + } ia16; + + struct { + int jump_count:16; /* note: signed */ + unsigned pop_count:4; + unsigned pad0:12; + } if_else; + + /* This is also used for gen7 IF/ELSE instructions */ + struct { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + + /** + * \defgroup SEND instructions / Message Descriptors + * + * @{ + */ + + /** + * Generic Message Descriptor for Gen4 SEND instructions. The structs + * below expand function_control to something specific for their + * message. Due to struct packing issues, they duplicate these bits. + * + * See the G45 PRM, Volume 4, Table 14-15. + */ + struct { + unsigned function_control:16; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } generic; + + /** + * Generic Message Descriptor for Gen5-7 SEND instructions. + * + * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most + * of the information on the SEND instruction is missing from the public + * Ironlake PRM.) + * + * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. + * According to the SEND instruction description: + * "The MSb of the message description, the EOT field, always comes from + * bit 127 of the instruction word"...which is bit 31 of this field. + */ + struct { + unsigned function_control:19; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } generic_gen5; + + /** G45 PRM, Volume 4, Section 6.1.1.1 */ + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned pad0:8; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } math; + + /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned snapshot:1; + unsigned pad0:10; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } math_gen5; + + /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned return_format:2; + unsigned msg_type:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler; + + /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:4; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:4; + unsigned simd_mode:2; + unsigned pad0:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } sampler_gen5; + + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:5; + unsigned simd_mode:2; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } sampler_gen7; + + struct brw_urb_immediate { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } urb; + + struct { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } urb_gen5; + + struct { + unsigned opcode:3; + unsigned offset:11; + unsigned swizzle_control:1; + unsigned complete:1; + unsigned per_slot_offset:1; + unsigned pad0:2; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } urb_gen7; + + /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2 */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned msg_type:3; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned msg_type:3; + unsigned target_cache:2; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } dp_read_gen5; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned last_render_target:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_write; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned last_render_target:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } dp_write_gen5; + + /** + * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. + * + * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. + **/ + struct { + unsigned binding_table_index:8; + unsigned msg_control:5; + unsigned msg_type:3; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } gen6_dp_sampler_const_cache; + + /** + * Message for the Sandybridge Render Cache Data Port. + * + * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, + * Section 3.9.2.1.1: Message Descriptor. + * + * "Slot Group Select" and "Last Render Target" are part of the + * 5-bit message control for Render Target Write messages. See + * Section 3.9.9.2.1 of the same volume. + */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned slot_group_select:1; + unsigned last_render_target:1; + unsigned msg_type:4; + unsigned send_commit_msg:1; + unsigned pad0:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } gen6_dp; + + /** + * Message for any of the Gen7 Data Port caches. + * + * Most fields are defined in BSpec volume 5c.2 Data Port / Messages / + * Data Port Messages / Message Descriptor. Once again, "Slot Group + * Select" and "Last Render Target" are part of the 6-bit message + * control for Render Target Writes. + */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned slot_group_select:1; + unsigned last_render_target:1; + unsigned msg_control_pad:1; + unsigned msg_type:4; + unsigned pad1:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad2:2; + unsigned end_of_thread:1; + } gen7_dp; + /** @} */ + + struct { + unsigned src1_subreg_nr_high:1; + unsigned src1_reg_nr:8; + unsigned pad0:1; + unsigned src2_rep_ctrl:1; + unsigned src2_swizzle:8; + unsigned src2_subreg_nr:3; + unsigned src2_reg_nr:8; + unsigned pad1:2; + } da3src; + + int d; + unsigned ud; + float f; + } bits3; +}; + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg { + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 10000 + +struct brw_compile { + struct brw_instruction *store; + unsigned nr_insn; + + int gen; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + unsigned flag_value; + bool single_program_flow; + bool compressed; + + /* Control flow stacks: + * - if_stack contains IF and ELSE instructions which must be patched + * (and popped) once the matching ENDIF instruction is encountered. + */ + struct brw_instruction **if_stack; + int if_stack_depth; + int if_stack_array_size; +}; + +static inline int type_sz(unsigned type) +{ + switch (type) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask WRITEMASK_X/Y/Z/W bitfield + */ +static inline struct brw_reg brw_reg(unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + struct brw_reg reg; + if (file == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (file == BRW_MESSAGE_REGISTER_FILE) + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + else if (file == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +/** Construct float[16] register */ +static inline struct brw_reg brw_vec16_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static inline struct brw_reg brw_vec8_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static inline struct brw_reg brw_vec4_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static inline struct brw_reg brw_vec2_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + WRITEMASK_XY); +} + +/** Construct float[1] register */ +static inline struct brw_reg brw_vec1_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + + +static inline struct brw_reg __retype(struct brw_reg reg, + unsigned type) +{ + reg.type = type; + return reg; +} + +static inline struct brw_reg __retype_d(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_D); +} + +static inline struct brw_reg __retype_ud(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg __retype_uw(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_UW); +} + +static inline struct brw_reg __sechalf(struct brw_reg reg) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + +static inline struct brw_reg __suboffset(struct brw_reg reg, + unsigned delta) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + +static inline struct brw_reg __offset(struct brw_reg reg, + unsigned delta) +{ + reg.nr += delta; + return reg; +} + +static inline struct brw_reg byte_offset(struct brw_reg reg, + unsigned bytes) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +/** Construct unsigned word[16] register */ +static inline struct brw_reg brw_uw16_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static inline struct brw_reg brw_uw8_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static inline struct brw_reg brw_uw1_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static inline struct brw_reg brw_imm_reg(unsigned type) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static inline struct brw_reg brw_imm_f(float f) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static inline struct brw_reg brw_imm_d(int d) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static inline struct brw_reg brw_imm_ud(unsigned ud) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static inline struct brw_reg brw_imm_uw(uint16_t uw) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static inline struct brw_reg brw_imm_w(int16_t w) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static inline struct brw_reg brw_imm_v(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static inline struct brw_reg brw_imm_vf(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static inline struct brw_reg brw_imm_vf4(unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + +static inline struct brw_reg brw_address(struct brw_reg reg) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct null register (usually used for setting condition codes) */ +static inline struct brw_reg brw_null_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static inline struct brw_reg brw_address_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static inline struct brw_reg brw_ip_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + WRITEMASK_XYZW); /* NOTE! */ +} + +static inline struct brw_reg brw_acc_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + +static inline struct brw_reg brw_notification_1_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NOTIFICATION_COUNT, + 1, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + +static inline struct brw_reg brw_flag_reg(void) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + +static inline struct brw_reg brw_mask_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static inline struct brw_reg brw_message_reg(unsigned nr) +{ + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); +} + +static inline struct brw_reg brw_message4_reg(unsigned nr, + int subnr) +{ + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr); +} + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static inline unsigned cvt(unsigned val) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static inline struct brw_reg __stride(struct brw_reg reg, + unsigned vstride, + unsigned width, + unsigned hstride) +{ + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + +static inline struct brw_reg vec16(struct brw_reg reg) +{ + return __stride(reg, 16,16,1); +} + +static inline struct brw_reg vec8(struct brw_reg reg) +{ + return __stride(reg, 8,8,1); +} + +static inline struct brw_reg vec4(struct brw_reg reg) +{ + return __stride(reg, 4,4,1); +} + +static inline struct brw_reg vec2(struct brw_reg reg) +{ + return __stride(reg, 2,2,1); +} + +static inline struct brw_reg vec1(struct brw_reg reg) +{ + return __stride(reg, 0,1,0); +} + +static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt) +{ + return vec1(__suboffset(reg, elt)); +} + +static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt) +{ + return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + +static inline struct brw_reg brw_swizzle(struct brw_reg reg, + unsigned x, + unsigned y, + unsigned z, + unsigned w) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + +static inline struct brw_reg brw_swizzle1(struct brw_reg reg, + unsigned x) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static inline struct brw_reg brw_writemask(struct brw_reg reg, + unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask &= mask; + return reg; +} + +static inline struct brw_reg brw_set_writemask(struct brw_reg reg, + unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask = mask; + return reg; +} + +static inline struct brw_reg brw_negate(struct brw_reg reg) +{ + reg.negate ^= 1; + return reg; +} + +static inline struct brw_reg brw_abs(struct brw_reg reg) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** +*/ +static inline struct brw_reg brw_vec4_indirect(unsigned subnr, + int offset) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg brw_vec1_indirect(unsigned subnr, + int offset) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) +{ + return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset) +{ + ptr.addr_offset += offset; + return ptr; +} + +static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +/** Do two brw_regs refer to the same register? */ +static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + +static inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ + p->current->header.predicate_control = pc; +} + +static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse) +{ + p->current->header.predicate_inverse = predicate_inverse; +} + +static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ + p->current->header.destreg__conditionalmod = conditional; +} + +static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode) +{ + p->current->header.access_mode = access_mode; +} + +static inline void brw_set_mask_control(struct brw_compile *p, unsigned value) +{ + p->current->header.mask_control = value; +} + +static inline void brw_set_saturate(struct brw_compile *p, unsigned value) +{ + p->current->header.saturate = value; +} + +static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value) +{ + if (p->gen >= 60) + p->current->header.acc_wr_control = value; +} + +void brw_pop_insn_state(struct brw_compile *p); +void brw_push_insn_state(struct brw_compile *p); +void brw_set_compression_control(struct brw_compile *p, enum brw_compression control); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); + +void brw_compile_init(struct brw_compile *p, int gen, void *store); + +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg); + +void gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + unsigned msg_reg_nr); + +static inline struct brw_instruction * +brw_next_insn(struct brw_compile *p, unsigned opcode) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + *insn = *p->current; + + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + +/* Helpers for regular instructions: */ +#define ALU1(OP) \ +static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + +/* Rounding operations (other than RNDD) require two instructions - the first + * stores a rounded value (possibly the wrong way) in the dest register, but + * also sets a per-channel "increment bit" in the flag register. A predicated + * add of 1.0 fixes dest to contain the desired result. + * + * Sandybridge and later appear to round correctly without an ADD. + */ +#define ROUND(OP) \ +static inline void brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src) \ +{ \ + struct brw_instruction *rnd, *add; \ + rnd = brw_next_insn(p, BRW_OPCODE_##OP); \ + brw_set_dest(p, rnd, dest); \ + brw_set_src0(p, rnd, src); \ + if (p->gen < 60) { \ + /* turn on round-increments */ \ + rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ + add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ + add->header.predicate_control = BRW_PREDICATE_NORMAL; \ + } \ +} + +static inline struct brw_instruction *brw_alu1(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src) +{ + struct brw_instruction *insn = brw_next_insn(p, opcode); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + return insn; +} + +static inline struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = brw_next_insn(p, opcode); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + return insn; +} + +static inline struct brw_instruction *brw_ADD(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.2.2: add */ + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); +} + +static inline struct brw_instruction *brw_MUL(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.32.38: mul */ + if (src0.type == BRW_REGISTER_TYPE_D || + src0.type == BRW_REGISTER_TYPE_UD || + src1.type == BRW_REGISTER_TYPE_D || + src1.type == BRW_REGISTER_TYPE_UD) { + assert(dest.type != BRW_REGISTER_TYPE_F); + } + + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_ACCUMULATOR); + assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || + src1.nr != BRW_ARF_ACCUMULATOR); + + return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); +} + +static inline struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +ALU1(MOV); +ALU2(SEL); +ALU1(NOT); +ALU2(AND); +ALU2(OR); +ALU2(XOR); +ALU2(SHR); +ALU2(SHL); +ALU2(RSR); +ALU2(RSL); +ALU2(ASR); +ALU1(FRC); +ALU1(RNDD); +ALU2(MAC); +ALU2(MACH); +ALU1(LZD); +ALU2(DP4); +ALU2(DPH); +ALU2(DP3); +ALU2(DP2); +ALU2(LINE); +ALU2(PLN); + +ROUND(RNDZ); +ROUND(RNDE); + +#undef ALU1 +#undef ALU2 +#undef ROUND + +/* Helpers for SEND instruction */ +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + bool header_present, + bool last_render_target, + unsigned response_length, + bool end_of_thread, + bool send_commit_msg); + +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool eot, + bool writes_complete, + unsigned offset, + unsigned swizzle); + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + unsigned response_length, + bool eot); + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool header_present); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode); + +void brw_math_16(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision); + +void brw_math(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision); + +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + struct brw_reg src0, + struct brw_reg src1); + +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index); + +void brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + unsigned offset); + +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + unsigned offset); + +void brw_dword_scattered_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index); + +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + unsigned location, + unsigned bind_table_index); + +void brw_dp_READ_4_vs_relative(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg addrReg, + unsigned offset, + unsigned bind_table_index); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + unsigned execute_size); +struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1); + +void brw_ELSE(struct brw_compile *p); +void brw_ENDIF(struct brw_compile *p); + +/* DO/WHILE loops: +*/ +struct brw_instruction *brw_DO(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); +struct brw_instruction *gen6_CONT(struct brw_compile *p, + struct brw_instruction *do_insn); +/* Forward jumps: +*/ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + +void brw_NOP(struct brw_compile *p); + +void brw_WAIT(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg(struct brw_reg reg); + +static inline void brw_math_invert(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR); +} + +void brw_set_uip_jip(struct brw_compile *p); + +uint32_t brw_swap_cmod(uint32_t cmod); + +void brw_disasm(FILE *file, + const struct brw_instruction *inst, + int gen); + +#endif diff --git a/src/sna/brw/brw_eu_debug.c b/src/sna/brw/brw_eu_debug.c new file mode 100644 index 00000000..99453afd --- /dev/null +++ b/src/sna/brw/brw_eu_debug.c @@ -0,0 +1,95 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "main/mtypes.h" +#include "main/imports.h" +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ + printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ + printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + printf("imm %f", hwreg.dw1.f); + } + else { + printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1<<hwreg.width, + hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, + type[hwreg.type]); + } +} + + + diff --git a/src/sna/brw/brw_eu_emit.c b/src/sna/brw/brw_eu_emit.c new file mode 100644 index 00000000..3f01ae7b --- /dev/null +++ b/src/sna/brw/brw_eu_emit.c @@ -0,0 +1,2002 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_eu.h" + +#include <string.h> +#include <stdlib.h> + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) +{ + if (reg.width == BRW_WIDTH_8 && p->compressed) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; +} + + +/** + * Prior to Sandybridge, the SEND instruction accepted non-MRF source + * registers, implicitly moving the operand to a message register. + * + * On Sandybridge, this is no longer the case. This function performs the + * explicit move; it should be called before emitting a SEND instruction. + */ +void +gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + unsigned msg_reg_nr) +{ + if (p->gen < 60) + return; + + if (src->file == BRW_MESSAGE_REGISTER_FILE) + return; + + if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src)); + brw_pop_insn_state(p); + } + *src = brw_message_reg(msg_reg_nr); +} + +static void +gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) +{ + /* From the BSpec / ISA Reference / send - [DevIVB+]: + * "The send with EOT should use register space R112-R127 for <src>. This is + * to enable loading of a new thread into the same slot while the message + * with EOT for current thread is pending dispatch." + * + * Since we're pretending to have 16 MRFs anyway, we may as well use the + * registers required for messages with EOT. + */ + if (p->gen >= 70 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + reg->file = BRW_GENERAL_REGISTER_FILE; + reg->nr += 111; + } +} + +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) +{ + if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && + dest.file != BRW_MESSAGE_REGISTER_FILE) + assert(dest.nr < 128); + + gen7_convert_mrf_to_grf(p, &dest); + + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.da16.dest_horiz_stride = 1; + } + } else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.ia16.dest_horiz_stride = 1; + } + } + + guess_execution_size(p, insn, dest); +} + +static const int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + +static void +validate_reg(struct brw_instruction *insn, struct brw_reg reg) +{ + int hstride_for_reg[] = {0, 1, 2, 4}; + int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; + int width_for_reg[] = {1, 2, 4, 8, 16}; + int execsize_for_reg[] = {1, 2, 4, 8, 16}; + int width, hstride, vstride, execsize; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* 3.3.6: Region Parameters. Restriction: Immediate vectors + * mean the destination has to be 128-bit aligned and the + * destination horiz stride has to be a word. + */ + if (reg.type == BRW_REGISTER_TYPE_V) { + assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * + reg_type_size[insn->bits1.da1.dest_reg_type] == 2); + } + + return; + } + + if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.file == BRW_ARF_NULL) + return; + + hstride = hstride_for_reg[reg.hstride]; + + if (reg.vstride == 0xf) { + vstride = -1; + } else { + vstride = vstride_for_reg[reg.vstride]; + } + + width = width_for_reg[reg.width]; + + execsize = execsize_for_reg[insn->header.execution_size]; + + /* Restrictions from 3.3.10: Register Region Restrictions. */ + /* 3. */ + assert(execsize >= width); + + /* 4. */ + if (execsize == width && hstride != 0) { + assert(vstride == -1 || vstride == width * hstride); + } + + /* 5. */ + if (execsize == width && hstride == 0) { + /* no restriction on vstride. */ + } + + /* 6. */ + if (width == 1) { + assert(hstride == 0); + } + + /* 7. */ + if (execsize == 1 && width == 1) { + assert(hstride == 0); + assert(vstride == 0); + } + + /* 8. */ + if (vstride == 0 && hstride == 0) { + assert(width == 1); + } + + /* 10. Check destination issues. */ +} + +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) +{ + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + + gen7_convert_mrf_to_grf(p, ®); + + validate_reg(insn, reg); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } else { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + assert(reg.nr < 128); + + gen7_convert_mrf_to_grf(p, ®); + + validate_reg(insn, reg); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + +/** + * Set the Message Descriptor and Extended Message Descriptor fields + * for SEND messages. + * + * \note This zeroes out the Function Control bits, so it must be called + * \b before filling out any message-specific data. Callers can + * choose not to fill in irrelevant bits; they will be zero. + */ +static void +brw_set_message_descriptor(struct brw_compile *p, + struct brw_instruction *inst, + enum brw_message_target sfid, + unsigned msg_length, + unsigned response_length, + bool header_present, + bool end_of_thread) +{ + brw_set_src1(p, inst, brw_imm_d(0)); + + if (p->gen >= 50) { + inst->bits3.generic_gen5.header_present = header_present; + inst->bits3.generic_gen5.response_length = response_length; + inst->bits3.generic_gen5.msg_length = msg_length; + inst->bits3.generic_gen5.end_of_thread = end_of_thread; + + if (p->gen >= 60) { + /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ + inst->header.destreg__conditionalmod = sfid; + } else { + /* Set Extended Message Descriptor (ex_desc) */ + inst->bits2.send_gen5.sfid = sfid; + inst->bits2.send_gen5.end_of_thread = end_of_thread; + } + } else { + inst->bits3.generic.response_length = response_length; + inst->bits3.generic.msg_length = msg_length; + inst->bits3.generic.msg_target = sfid; + inst->bits3.generic.end_of_thread = end_of_thread; + } +} + + +static void brw_set_math_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned function, + unsigned integer_type, + bool low_precision, + bool saturate, + unsigned dataType) +{ + unsigned msg_length; + unsigned response_length; + + /* Infer message length from the function */ + switch (function) { + case BRW_MATH_FUNCTION_POW: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: + case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + msg_length = 2; + break; + default: + msg_length = 1; + break; + } + + /* Infer response length from the function */ + switch (function) { + case BRW_MATH_FUNCTION_SINCOS: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + response_length = 2; + break; + default: + response_length = 1; + break; + } + + brw_set_message_descriptor(p, insn, BRW_SFID_MATH, + msg_length, response_length, + false, false); + if (p->gen == 50) { + insn->bits3.math_gen5.function = function; + insn->bits3.math_gen5.int_type = integer_type; + insn->bits3.math_gen5.precision = low_precision; + insn->bits3.math_gen5.saturate = saturate; + insn->bits3.math_gen5.data_type = dataType; + insn->bits3.math_gen5.snapshot = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + } +} + +static void brw_set_ff_sync_message(struct brw_compile *p, + struct brw_instruction *insn, + bool allocate, + unsigned response_length, + bool end_of_thread) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_URB, + 1, response_length, + true, end_of_thread); + insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ + insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ +} + +static void brw_set_urb_message(struct brw_compile *p, + struct brw_instruction *insn, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool end_of_thread, + bool complete, + unsigned offset, + unsigned swizzle_control) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_URB, + msg_length, response_length, true, end_of_thread); + if (p->gen >= 70) { + insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ + insn->bits3.urb_gen7.offset = offset; + assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); + insn->bits3.urb_gen7.swizzle_control = swizzle_control; + /* per_slot_offset = 0 makes it ignore offsets in message header */ + insn->bits3.urb_gen7.per_slot_offset = 0; + insn->bits3.urb_gen7.complete = complete; + } else if (p->gen >= 50) { + insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; /* ? */ + insn->bits3.urb_gen5.complete = complete; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + } +} + +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + bool header_present, + bool last_render_target, + unsigned response_length, + bool end_of_thread, + bool send_commit_msg) +{ + unsigned sfid; + + if (p->gen >= 70) { + /* Use the Render Cache for RT writes; otherwise use the Data Cache */ + if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + } else if (p->gen >= 60) { + /* Use the render cache for all write messages. */ + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + } else { + sfid = BRW_SFID_DATAPORT_WRITE; + } + + brw_set_message_descriptor(p, insn, sfid, + msg_length, response_length, + header_present, end_of_thread); + + if (p->gen >= 70) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.last_render_target = last_render_target; + insn->bits3.gen7_dp.msg_type = msg_type; + } else if (p->gen >= 60) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.last_render_target = last_render_target; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; + } else if (p->gen >= 50) { + insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_write_gen5.msg_control = msg_control; + insn->bits3.dp_write_gen5.last_render_target = last_render_target; + insn->bits3.dp_write_gen5.msg_type = msg_type; + insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.last_render_target = last_render_target; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = send_commit_msg; + } +} + +void +brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length) +{ + unsigned sfid; + + if (p->gen >= 70) { + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + } else if (p->gen >= 60) { + if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; + } else { + sfid = BRW_SFID_DATAPORT_READ; + } + + brw_set_message_descriptor(p, insn, sfid, + msg_length, response_length, + true, false); + + if (p->gen >= 70) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.last_render_target = 0; + insn->bits3.gen7_dp.msg_type = msg_type; + } else if (p->gen >= 60) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.last_render_target = 0; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = 0; + } else if (p->gen >= 50) { + insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_read_gen5.msg_control = msg_control; + insn->bits3.dp_read_gen5.msg_type = msg_type; + insn->bits3.dp_read_gen5.target_cache = target_cache; + } else if (p->gen >= 45) { + insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ + insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ + insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + } +} + +static void brw_set_sampler_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, + msg_length, response_length, + header_present, false); + + if (p->gen >= 70) { + insn->bits3.sampler_gen7.binding_table_index = binding_table_index; + insn->bits3.sampler_gen7.sampler = sampler; + insn->bits3.sampler_gen7.msg_type = msg_type; + insn->bits3.sampler_gen7.simd_mode = simd_mode; + } else if (p->gen >= 50) { + insn->bits3.sampler_gen5.binding_table_index = binding_table_index; + insn->bits3.sampler_gen5.sampler = sampler; + insn->bits3.sampler_gen5.msg_type = msg_type; + insn->bits3.sampler_gen5.simd_mode = simd_mode; + } else if (p->gen >= 45) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + } +} + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src1(p, insn, brw_imm_ud(0x0)); +} + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +static void +push_if_stack(struct brw_compile *p, struct brw_instruction *inst) +{ + p->if_stack[p->if_stack_depth] = inst; + + p->if_stack_depth++; + if (p->if_stack_array_size <= p->if_stack_depth) { + p->if_stack_array_size *= 2; + p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size); + } +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + */ +struct brw_instruction * +brw_IF(struct brw_compile *p, unsigned execute_size) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_IF); + + /* Override the defaults for this instruction: */ + if (p->gen < 60) { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 70) { + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; + } + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + push_if_stack(p, insn); + return insn; +} + +/* This function is only used for gen6-style IF instructions with an + * embedded comparison (conditional modifier). It is not used on gen7. + */ +struct brw_instruction * +gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_IF); + + brw_set_dest(p, insn, brw_imm_w(0)); + if (p->compressed) { + insn->header.execution_size = BRW_EXECUTE_16; + } else { + insn->header.execution_size = BRW_EXECUTE_8; + } + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + + assert(insn->header.compression_control == BRW_COMPRESSION_NONE); + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = conditional; + + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + push_if_stack(p, insn); + return insn; +} + +/** + * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. + */ +static void +convert_IF_ELSE_to_ADD(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst) +{ + /* The next instruction (where the ENDIF would be, if it existed) */ + struct brw_instruction *next_inst = &p->store[p->nr_insn]; + + assert(p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + assert(if_inst->header.execution_size == BRW_EXECUTE_1); + + /* Convert IF to an ADD instruction that moves the instruction pointer + * to the first instruction of the ELSE block. If there is no ELSE + * block, point to where ENDIF would be. Reverse the predicate. + * + * There's no need to execute an ENDIF since we don't need to do any + * stack operations, and if we're currently executing, we just want to + * continue normally. + */ + if_inst->header.opcode = BRW_OPCODE_ADD; + if_inst->header.predicate_inverse = 1; + + if (else_inst != NULL) { + /* Convert ELSE to an ADD instruction that points where the ENDIF + * would be. + */ + else_inst->header.opcode = BRW_OPCODE_ADD; + + if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; + else_inst->bits3.ud = (next_inst - else_inst) * 16; + } else { + if_inst->bits3.ud = (next_inst - if_inst) * 16; + } +} + +/** + * Patch IF and ELSE instructions with appropriate jump targets. + */ +static void +patch_IF_ELSE(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst, + struct brw_instruction *endif_inst) +{ + unsigned br = 1; + + assert(!p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(endif_inst != NULL); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + + /* Jump count is for 64bit data chunk each, so one 128bit instruction + * requires 2 chunks. + */ + if (p->gen >= 50) + br = 2; + + assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); + endif_inst->header.execution_size = if_inst->header.execution_size; + + if (else_inst == NULL) { + /* Patch IF -> ENDIF */ + if (p->gen < 60) { + /* Turn it into an IFF, which means no mask stack operations for + * all-false and jumping past the ENDIF. + */ + if_inst->header.opcode = BRW_OPCODE_IFF; + if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (p->gen < 70) { + /* As of gen6, there is no IFF and IF must point to the ENDIF. */ + if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); + } else { + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); + } + } else { + else_inst->header.execution_size = if_inst->header.execution_size; + + /* Patch IF -> ELSE */ + if (p->gen < 60) { + if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (p->gen <= 70) { + if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); + } + + /* Patch ELSE -> ENDIF */ + if (p->gen < 60) { + /* BRW_OPCODE_ELSE pre-gen6 should point just past the + * matching ENDIF. + */ + else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); + else_inst->bits3.if_else.pop_count = 1; + else_inst->bits3.if_else.pad0 = 0; + } else if (p->gen < 70) { + /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ + else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); + } else { + /* The IF instruction's JIP should point just past the ELSE */ + if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); + /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); + } + } +} + +void +brw_ELSE(struct brw_compile *p) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_ELSE); + + if (p->gen < 60) { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 70) { + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; + } + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + push_if_stack(p, insn); +} + +void +brw_ENDIF(struct brw_compile *p) +{ + struct brw_instruction *insn; + struct brw_instruction *else_inst = NULL; + struct brw_instruction *if_inst = NULL; + + /* Pop the IF and (optional) ELSE instructions from the stack */ + p->if_stack_depth--; + if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { + else_inst = p->if_stack[p->if_stack_depth]; + p->if_stack_depth--; + } + if_inst = p->if_stack[p->if_stack_depth]; + + if (p->single_program_flow) { + /* ENDIF is useless; don't bother emitting it. */ + convert_IF_ELSE_to_ADD(p, if_inst, else_inst); + return; + } + + insn = brw_next_insn(p, BRW_OPCODE_ENDIF); + + if (p->gen < 60) { + brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 70) { + brw_set_dest(p, insn, brw_imm_w(0)); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + } + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Also pop item off the stack in the endif instruction: */ + if (p->gen < 60) { + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } else if (p->gen < 70) { + insn->bits1.branch_gen6.jump_count = 2; + } else { + insn->bits3.break_cont.jip = 2; + } + patch_IF_ELSE(p, if_inst, else_inst, insn); +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_BREAK); + if (p->gen >= 60) { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *gen6_CONT(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) +{ + struct brw_instruction *insn; + insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + return insn; +} + +/* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ + if (p->gen >= 60 || p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(p, insn, brw_null_reg()); + brw_set_src0(p, insn, brw_null_reg()); + brw_set_src1(p, insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + unsigned br = 1; + + if (p->gen >= 50) + br = 2; + + if (p->gen >= 70) { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = br * (do_insn - insn); + + insn->header.execution_size = BRW_EXECUTE_8; + } else if (p->gen >= 60) { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + + insn->header.execution_size = BRW_EXECUTE_8; + } else { + if (p->single_program_flow) { + insn = brw_next_insn(p, BRW_OPCODE_ADD); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0)); + + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + unsigned jmpi = 1; + + if (p->gen >= 50) + jmpi = 2; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditionalmod = conditional; + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + +/* Issue 'wait' instruction for n1, host could program MMIO + to wake up thread. */ +void brw_WAIT(struct brw_compile *p) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT); + struct brw_reg src = brw_notification_1_reg(); + + brw_set_dest(p, insn, src); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + insn->header.execution_size = 0; /* must */ + insn->header.predicate_control = 0; + insn->header.compression_control = 0; +} + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/** Extended math function, float[8]. + */ +void brw_math(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision) +{ + if (p->gen >= 60) { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src.type == BRW_REGISTER_TYPE_F); + } + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + } else { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + src.type == BRW_REGISTER_TYPE_D, + precision, + saturate, + data_type); + } +} + +/** Extended math function, float[8]. + */ +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src0.type == BRW_REGISTER_TYPE_F); + assert(src1.type == BRW_REGISTER_TYPE_F); + } + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); +} + +/** + * Extended math function, float[16]. + * Use 2 send instructions. + */ +void brw_math_16(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision) +{ + struct brw_instruction *insn; + + if (p->gen >= 60) { + insn = brw_next_insn(p, BRW_OPCODE_MATH); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + return; + } + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditionalmod = msg_reg_nr+1; + + brw_set_dest(p, insn, __offset(dest,1)); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + +/** + * Write a block of OWORDs (half a GRF each) from the scratch buffer, + * using a constant offset per channel. + * + * The offset must be aligned to oword size (16 bytes). Used for + * register spilling. + */ +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + unsigned offset) +{ + uint32_t msg_control, msg_type; + int mlen; + + if (p->gen >= 60) + offset /= 16; + + mrf = __retype_ud(mrf); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + mlen = 2; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + mlen = 3; + } + + /* Set up the message header. This is g0, with g0.2 filled with + * the offset. We don't want to leave our offset around in g0 or + * it'll screw up texture samples, so set it up inside the message + * reg. + */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_reg dest; + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + int send_commit_msg; + struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0)); + + if (insn->header.compression_control != BRW_COMPRESSION_NONE) { + insn->header.compression_control = BRW_COMPRESSION_NONE; + src_header = vec16(src_header); + } + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = mrf.nr; + + /* Until gen6, writes followed by reads from the same location + * are not guaranteed to be ordered unless write_commit is set. + * If set, then a no-op write is issued to the destination + * register to set a dependency, and a read from the destination + * can be used to ensure the ordering. + * + * For gen6, only writes between different threads need ordering + * protection. Our use of DP writes is all about register + * spilling within a thread. + */ + if (p->gen >= 60) { + dest = __retype_uw(vec16(brw_null_reg())); + send_commit_msg = 0; + } else { + dest = src_header; + send_commit_msg = 1; + } + + brw_set_dest(p, insn, dest); + if (p->gen >= 60) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + if (p->gen >= 60) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + + brw_set_dp_write_message(p, + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + msg_type, + mlen, + true, /* header_present */ + 0, /* pixel scoreboard */ + send_commit_msg, /* response_length */ + 0, /* eot */ + send_commit_msg); + } +} + + +/** + * Read a block of owords (half a GRF each) from the scratch buffer + * using a constant index per channel. + * + * Offset must be aligned to oword size (16 bytes). Used for register + * spilling. + */ +void +brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + unsigned offset) +{ + uint32_t msg_control; + int rlen; + + if (p->gen >= 60) + offset /= 16; + + mrf = __retype_ud(mrf); + dest = __retype_uw(dest); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + rlen = 1; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + rlen = 2; + } + + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + + assert(insn->header.predicate_control == 0); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = mrf.nr; + + brw_set_dest(p, insn, dest); /* UW? */ + if (p->gen >= 60) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 1, /* msg_length */ + rlen); + } +} + +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + */ +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index) +{ + struct brw_instruction *insn; + + /* On newer hardware, offset is in units of owords. */ + if (p->gen >= 60) + offset /= 16; + + mrf = __retype_ud(mrf); + + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = mrf.nr; + + /* cast dest to a uword[8] vector */ + dest = __retype_uw(vec8(dest)); + + brw_set_dest(p, insn, dest); + if (p->gen >= 60) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + 1); /* response_length (1 reg, 2 owords!) */ + + brw_pop_insn_state(p); +} + +/** + * Read a set of dwords from the data port Data Cache (const buffer). + * + * Location (in buffer) appears as UD offsets in the register after + * the provided mrf header reg. + */ +void brw_dword_scattered_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index) +{ + struct brw_instruction *insn; + + mrf = __retype_ud(mrf); + + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + brw_pop_insn_state(p); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = mrf.nr; + + /* cast dest to a uword[8] vector */ + dest = __retype_uw(vec8(dest)); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, brw_null_reg()); + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, + BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ +} + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + unsigned location, + unsigned bind_table_index) +{ + struct brw_instruction *insn; + unsigned msg_reg_nr = 1; + + if (p->gen >= 60) + location /= 16; + + /* Setup MRF[1] with location/offset into const buffer */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)), + brw_imm_ud(location)); + brw_pop_insn_state(p); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + brw_set_dest(p, insn, dest); + if (p->gen >= 60) { + brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + bind_table_index, + 0, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + 1); /* response_length (1 Oword) */ +} + +/** + * Read a float[4] constant per vertex from VS constant buffer, with + * relative addressing. + */ +void brw_dp_READ_4_vs_relative(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg addr_reg, + unsigned offset, + unsigned bind_table_index) +{ + struct brw_reg src = brw_vec8_grf(0, 0); + struct brw_instruction *insn; + int msg_type; + + /* Setup MRF[1] with offset into const buffer */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* M1.0 is block offset 0, M1.4 is block offset 1, all other + * fields ignored. + */ + brw_ADD(p, __retype_d(brw_message_reg(1)), + addr_reg, brw_imm_d(offset)); + brw_pop_insn_state(p); + + gen6_resolve_implied_move(p, &src, 0); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = 0; + insn->header.mask_control = BRW_MASK_DISABLE; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + + if (p->gen >= 60) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (p->gen >= 45) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ +} + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool header_present) +{ + struct brw_instruction *insn; + unsigned msg_type; + struct brw_reg dest; + + if (dispatch_width == 16) + dest = __retype_uw(vec16(brw_null_reg())); + else + dest = __retype_uw(vec8(brw_null_reg())); + + if (p->gen >= 60 && binding_table_index == 0) { + insn = brw_next_insn(p, BRW_OPCODE_SENDC); + } else { + insn = brw_next_insn(p, BRW_OPCODE_SEND); + } + /* The execution mask is ignored for render target writes. */ + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->gen >= 60) { + /* headerless version, just submit color payload */ + src0 = brw_message_reg(msg_reg_nr); + + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } else { + insn->header.destreg__conditionalmod = msg_reg_nr; + + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, + insn, + binding_table_index, + msg_control, + msg_type, + msg_length, + header_present, + eot, + response_length, + eot, + 0 /* send_commit_msg */); +} + +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode) +{ + assert(writemask); + + if (p->gen < 50 || writemask != WRITEMASK_XYZW) { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + writemask = ~writemask & WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0))); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12)); + + brw_pop_insn_state(p); + + src0 = __retype_uw(brw_null_reg()); + } + + { + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + if (p->gen < 60) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_sampler_message(p, insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + header_present, + simd_mode); + } +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool eot, + bool writes_complete, + unsigned offset, + unsigned swizzle) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + if (p->gen >= 70) { + /* Enable Channel Masks in the URB_WRITE_HWORD message header */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)), + __retype_ud(brw_vec1_grf(0, 5)), + brw_imm_ud(0xff00)); + brw_pop_insn_state(p); + } + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < BRW_MAX_MRF); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + + if (p->gen <= 60) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_urb_message(p, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + int jip = p->gen <= 70 ? insn->bits1.branch_gen6.jump_count + : insn->bits3.break_cont.jip; + if (ip + jip / br <= start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + int ip; + int br = 2; + + if (p->gen <= 60) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + /* Gen7 UIP points to WHILE; Gen6 points just after it */ + insn->bits3.break_cont.uip = + br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 70 ? 1 : 0)); + break; + case BRW_OPCODE_CONTINUE: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); + + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + unsigned response_length, + bool eot) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + + if (p->gen < 60) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p, + insn, + allocate, + response_length, + eot); +} diff --git a/src/sna/brw/brw_eu_util.c b/src/sna/brw/brw_eu_util.c new file mode 100644 index 00000000..5405cf17 --- /dev/null +++ b/src/sna/brw/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count) +{ + GLuint i; + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + |