summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/tgsi')
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c143
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h5
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c163
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c1575
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h155
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c536
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c345
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c12
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h3
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c6
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c26
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c848
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h56
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c43
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h4
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c225
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h127
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c484
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h73
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c98
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h18
21 files changed, 3875 insertions, 1070 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c
index fdb7febf7..d525c8ff3 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -110,6 +110,8 @@ tgsi_default_declaration( void )
declaration.Invariant = 0;
declaration.Local = 0;
declaration.Array = 0;
+ declaration.Atomic = 0;
+ declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL;
declaration.Padding = 0;
return declaration;
@@ -125,6 +127,8 @@ tgsi_build_declaration(
unsigned invariant,
unsigned local,
unsigned array,
+ unsigned atomic,
+ unsigned mem_type,
struct tgsi_header *header )
{
struct tgsi_declaration declaration;
@@ -141,6 +145,8 @@ tgsi_build_declaration(
declaration.Invariant = invariant;
declaration.Local = local;
declaration.Array = array;
+ declaration.Atomic = atomic;
+ declaration.MemType = mem_type;
header_bodysize_grow( header );
return declaration;
@@ -259,36 +265,39 @@ tgsi_build_declaration_semantic(
return ds;
}
-static struct tgsi_declaration_resource
-tgsi_default_declaration_resource(void)
+static struct tgsi_declaration_image
+tgsi_default_declaration_image(void)
{
- struct tgsi_declaration_resource dr;
+ struct tgsi_declaration_image di;
- dr.Resource = TGSI_TEXTURE_BUFFER;
- dr.Raw = 0;
- dr.Writable = 0;
- dr.Padding = 0;
+ di.Resource = TGSI_TEXTURE_BUFFER;
+ di.Raw = 0;
+ di.Writable = 0;
+ di.Format = 0;
+ di.Padding = 0;
- return dr;
+ return di;
}
-static struct tgsi_declaration_resource
-tgsi_build_declaration_resource(unsigned texture,
- unsigned raw,
- unsigned writable,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header)
+static struct tgsi_declaration_image
+tgsi_build_declaration_image(unsigned texture,
+ unsigned format,
+ unsigned raw,
+ unsigned writable,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header)
{
- struct tgsi_declaration_resource dr;
+ struct tgsi_declaration_image di;
- dr = tgsi_default_declaration_resource();
- dr.Resource = texture;
- dr.Raw = raw;
- dr.Writable = writable;
+ di = tgsi_default_declaration_image();
+ di.Resource = texture;
+ di.Format = format;
+ di.Raw = raw;
+ di.Writable = writable;
declaration_grow(declaration, header);
- return dr;
+ return di;
}
static struct tgsi_declaration_sampler_view
@@ -364,7 +373,7 @@ tgsi_default_full_declaration( void )
full_declaration.Range = tgsi_default_declaration_range();
full_declaration.Semantic = tgsi_default_declaration_semantic();
full_declaration.Interp = tgsi_default_declaration_interp();
- full_declaration.Resource = tgsi_default_declaration_resource();
+ full_declaration.Image = tgsi_default_declaration_image();
full_declaration.SamplerView = tgsi_default_declaration_sampler_view();
full_declaration.Array = tgsi_default_declaration_array();
@@ -396,6 +405,8 @@ tgsi_build_full_declaration(
full_decl->Declaration.Invariant,
full_decl->Declaration.Local,
full_decl->Declaration.Array,
+ full_decl->Declaration.Atomic,
+ full_decl->Declaration.MemType,
header );
if (maxsize <= size)
@@ -454,20 +465,21 @@ tgsi_build_full_declaration(
header );
}
- if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) {
- struct tgsi_declaration_resource *dr;
+ if (full_decl->Declaration.File == TGSI_FILE_IMAGE) {
+ struct tgsi_declaration_image *di;
if (maxsize <= size) {
return 0;
}
- dr = (struct tgsi_declaration_resource *)&tokens[size];
+ di = (struct tgsi_declaration_image *)&tokens[size];
size++;
- *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource,
- full_decl->Resource.Raw,
- full_decl->Resource.Writable,
- declaration,
- header);
+ *di = tgsi_build_declaration_image(full_decl->Image.Resource,
+ full_decl->Image.Format,
+ full_decl->Image.Raw,
+ full_decl->Image.Writable,
+ declaration,
+ header);
}
if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
@@ -616,7 +628,8 @@ tgsi_default_instruction( void )
instruction.NumSrcRegs = 1;
instruction.Label = 0;
instruction.Texture = 0;
- instruction.Padding = 0;
+ instruction.Memory = 0;
+ instruction.Padding = 0;
return instruction;
}
@@ -762,6 +775,40 @@ tgsi_build_instruction_texture(
return instruction_texture;
}
+static struct tgsi_instruction_memory
+tgsi_default_instruction_memory( void )
+{
+ struct tgsi_instruction_memory instruction_memory;
+
+ instruction_memory.Qualifier = 0;
+ instruction_memory.Texture = 0;
+ instruction_memory.Format = 0;
+ instruction_memory.Padding = 0;
+
+ return instruction_memory;
+}
+
+static struct tgsi_instruction_memory
+tgsi_build_instruction_memory(
+ unsigned qualifier,
+ unsigned texture,
+ unsigned format,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_instruction_memory instruction_memory;
+
+ instruction_memory.Qualifier = qualifier;
+ instruction_memory.Texture = texture;
+ instruction_memory.Format = format;
+ instruction_memory.Padding = 0;
+ instruction->Memory = 1;
+
+ instruction_grow( instruction, header );
+
+ return instruction_memory;
+}
static struct tgsi_texture_offset
tgsi_default_texture_offset( void )
@@ -1008,6 +1055,7 @@ tgsi_default_full_instruction( void )
full_instruction.Predicate = tgsi_default_instruction_predicate();
full_instruction.Label = tgsi_default_instruction_label();
full_instruction.Texture = tgsi_default_instruction_texture();
+ full_instruction.Memory = tgsi_default_instruction_memory();
for( i = 0; i < TGSI_FULL_MAX_TEX_OFFSETS; i++ ) {
full_instruction.TexOffsets[i] = tgsi_default_texture_offset();
}
@@ -1119,6 +1167,26 @@ tgsi_build_full_instruction(
prev_token = (struct tgsi_token *) texture_offset;
}
}
+
+ if (full_inst->Instruction.Memory) {
+ struct tgsi_instruction_memory *instruction_memory;
+
+ if( maxsize <= size )
+ return 0;
+ instruction_memory =
+ (struct tgsi_instruction_memory *) &tokens[size];
+ size++;
+
+ *instruction_memory = tgsi_build_instruction_memory(
+ full_inst->Memory.Qualifier,
+ full_inst->Memory.Texture,
+ full_inst->Memory.Format,
+ prev_token,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) instruction_memory;
+ }
+
for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) {
const struct tgsi_full_dst_register *reg = &full_inst->Dst[i];
struct tgsi_dst_register *dst_register;
@@ -1371,3 +1439,18 @@ tgsi_build_full_property(
return size;
}
+
+struct tgsi_full_src_register
+tgsi_full_src_register_from_dst(const struct tgsi_full_dst_register *dst)
+{
+ struct tgsi_full_src_register src;
+ src.Register = tgsi_default_src_register();
+ src.Register.File = dst->Register.File;
+ src.Register.Indirect = dst->Register.Indirect;
+ src.Register.Dimension = dst->Register.Dimension;
+ src.Register.Index = dst->Register.Index;
+ src.Indirect = dst->Indirect;
+ src.Dimension = dst->Dimension;
+ src.DimIndirect = dst->DimIndirect;
+ return src;
+}
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h
index c5127e185..34d181ab2 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -30,6 +30,8 @@
struct tgsi_token;
+struct tgsi_full_dst_register;
+struct tgsi_full_src_register;
#if defined __cplusplus
@@ -111,6 +113,9 @@ tgsi_build_full_instruction(
struct tgsi_instruction_predicate
tgsi_default_instruction_predicate(void);
+struct tgsi_full_src_register
+tgsi_full_src_register_from_dst(const struct tgsi_full_dst_register *dst);
+
#if defined __cplusplus
}
#endif
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 8ceb5b475..405114d09 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -29,6 +29,7 @@
#include "util/u_string.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "tgsi_dump.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
@@ -43,6 +44,8 @@ struct dump_ctx
{
struct tgsi_iterate_context iter;
+ boolean dump_float_as_hex;
+
uint instno;
uint immno;
int indent;
@@ -88,6 +91,7 @@ dump_enum(
#define SID(I) ctx->dump_printf( ctx, "%d", I )
#define FLT(F) ctx->dump_printf( ctx, "%10.4f", F )
#define DBL(D) ctx->dump_printf( ctx, "%10.8f", D )
+#define HFLT(F) ctx->dump_printf( ctx, "0x%08x", fui((F)) )
#define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
const char *
@@ -250,8 +254,25 @@ dump_imm_data(struct tgsi_iterate_context *iter,
i++;
break;
}
+ case TGSI_IMM_INT64: {
+ union di d;
+ d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
+ UID( d.i );
+ i++;
+ break;
+ }
+ case TGSI_IMM_UINT64: {
+ union di d;
+ d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
+ UID( d.ui );
+ i++;
+ break;
+ }
case TGSI_IMM_FLOAT32:
- FLT( data[i].Float );
+ if (ctx->dump_float_as_hex)
+ HFLT( data[i].Float );
+ else
+ FLT( data[i].Float );
break;
case TGSI_IMM_UINT32:
UID(data[i].Uint);
@@ -288,17 +309,17 @@ iter_declaration(
* two dimensional
*/
if (decl->Declaration.File == TGSI_FILE_INPUT &&
- (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY ||
+ (iter->processor.Processor == PIPE_SHADER_GEOMETRY ||
(!patch &&
- (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL ||
- iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL)))) {
+ (iter->processor.Processor == PIPE_SHADER_TESS_CTRL ||
+ iter->processor.Processor == PIPE_SHADER_TESS_EVAL)))) {
TXT("[]");
}
/* all non-patch tess ctrl shader outputs are two dimensional */
if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
!patch &&
- iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL) {
+ iter->processor.Processor == PIPE_SHADER_TESS_CTRL) {
TXT("[]");
}
@@ -341,15 +362,32 @@ iter_declaration(
}
}
- if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
+ if (decl->Declaration.File == TGSI_FILE_IMAGE) {
TXT(", ");
- ENM(decl->Resource.Resource, tgsi_texture_names);
- if (decl->Resource.Writable)
+ ENM(decl->Image.Resource, tgsi_texture_names);
+ TXT(", ");
+ TXT(util_format_name(decl->Image.Format));
+ if (decl->Image.Writable)
TXT(", WR");
- if (decl->Resource.Raw)
+ if (decl->Image.Raw)
TXT(", RAW");
}
+ if (decl->Declaration.File == TGSI_FILE_BUFFER) {
+ if (decl->Declaration.Atomic)
+ TXT(", ATOMIC");
+ }
+
+ if (decl->Declaration.File == TGSI_FILE_MEMORY) {
+ switch (decl->Declaration.MemType) {
+ /* Note: ,GLOBAL is optional / the default */
+ case TGSI_MEMORY_TYPE_GLOBAL: TXT(", GLOBAL"); break;
+ case TGSI_MEMORY_TYPE_SHARED: TXT(", SHARED"); break;
+ case TGSI_MEMORY_TYPE_PRIVATE: TXT(", PRIVATE"); break;
+ case TGSI_MEMORY_TYPE_INPUT: TXT(", INPUT"); break;
+ }
+ }
+
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
TXT(", ");
ENM(decl->SamplerView.Resource, tgsi_texture_names);
@@ -370,7 +408,7 @@ iter_declaration(
}
if (decl->Declaration.Interpolate) {
- if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT &&
+ if (iter->processor.Processor == PIPE_SHADER_FRAGMENT &&
decl->Declaration.File == TGSI_FILE_INPUT)
{
TXT( ", " );
@@ -413,6 +451,7 @@ tgsi_dump_declaration(
const struct tgsi_full_declaration *decl )
{
struct dump_ctx ctx;
+ memset(&ctx, 0, sizeof(ctx));
ctx.dump_printf = dump_ctx_printf;
@@ -445,6 +484,9 @@ iter_property(
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
ENM(prop->u[i].Data, tgsi_fs_coord_pixel_center_names);
break;
+ case TGSI_PROPERTY_NEXT_SHADER:
+ ENM(prop->u[i].Data, tgsi_processor_type_names);
+ break;
default:
SID( prop->u[i].Data );
break;
@@ -461,6 +503,7 @@ void tgsi_dump_property(
const struct tgsi_full_property *prop )
{
struct dump_ctx ctx;
+ memset(&ctx, 0, sizeof(ctx));
ctx.dump_printf = dump_ctx_printf;
@@ -492,6 +535,7 @@ tgsi_dump_immediate(
const struct tgsi_full_immediate *imm )
{
struct dump_ctx ctx;
+ memset(&ctx, 0, sizeof(ctx));
ctx.dump_printf = dump_ctx_printf;
@@ -610,17 +654,37 @@ iter_instruction(
}
}
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_IF:
- case TGSI_OPCODE_UIF:
- case TGSI_OPCODE_ELSE:
- case TGSI_OPCODE_BGNLOOP:
- case TGSI_OPCODE_ENDLOOP:
- case TGSI_OPCODE_CAL:
- case TGSI_OPCODE_BGNSUB:
- TXT( " :" );
- UID( inst->Label.Label );
- break;
+ if (inst->Instruction.Memory) {
+ uint32_t qualifier = inst->Memory.Qualifier;
+ while (qualifier) {
+ int bit = ffs(qualifier) - 1;
+ qualifier &= ~(1U << bit);
+ TXT(", ");
+ ENM(bit, tgsi_memory_names);
+ }
+ if (inst->Memory.Texture) {
+ TXT( ", " );
+ ENM( inst->Memory.Texture, tgsi_texture_names );
+ }
+ if (inst->Memory.Format) {
+ TXT( ", " );
+ TXT( util_format_name(inst->Memory.Format) );
+ }
+ }
+
+ if (inst->Instruction.Label) {
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
+ case TGSI_OPCODE_ELSE:
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_CAL:
+ case TGSI_OPCODE_BGNSUB:
+ TXT( " :" );
+ UID( inst->Label.Label );
+ break;
+ }
}
/* update indentation */
@@ -642,12 +706,14 @@ tgsi_dump_instruction(
uint instno )
{
struct dump_ctx ctx;
+ memset(&ctx, 0, sizeof(ctx));
ctx.instno = instno;
ctx.immno = instno;
ctx.indent = 0;
ctx.dump_printf = dump_ctx_printf;
ctx.indentation = 0;
+ ctx.file = NULL;
iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst );
}
@@ -662,23 +728,30 @@ prolog(
return TRUE;
}
+static void
+init_dump_ctx(struct dump_ctx *ctx, uint flags)
+{
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->iter.prolog = prolog;
+ ctx->iter.iterate_instruction = iter_instruction;
+ ctx->iter.iterate_declaration = iter_declaration;
+ ctx->iter.iterate_immediate = iter_immediate;
+ ctx->iter.iterate_property = iter_property;
+
+ if (flags & TGSI_DUMP_FLOAT_AS_HEX)
+ ctx->dump_float_as_hex = TRUE;
+}
+
void
tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
{
struct dump_ctx ctx;
+ memset(&ctx, 0, sizeof(ctx));
- ctx.iter.prolog = prolog;
- ctx.iter.iterate_instruction = iter_instruction;
- ctx.iter.iterate_declaration = iter_declaration;
- ctx.iter.iterate_immediate = iter_immediate;
- ctx.iter.iterate_property = iter_property;
- ctx.iter.epilog = NULL;
+ init_dump_ctx(&ctx, flags);
- ctx.instno = 0;
- ctx.immno = 0;
- ctx.indent = 0;
ctx.dump_printf = dump_ctx_printf;
- ctx.indentation = 0;
ctx.file = file;
tgsi_iterate_shader( tokens, &ctx.iter );
@@ -696,6 +769,7 @@ struct str_dump_ctx
char *str;
char *ptr;
int left;
+ bool nospace;
};
static void
@@ -703,7 +777,7 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
{
struct str_dump_ctx *sctx = (struct str_dump_ctx *)ctx;
- if(sctx->left > 1) {
+ if (!sctx->nospace) {
int written;
va_list ap;
va_start(ap, format);
@@ -714,14 +788,17 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
* vsnprintf:
*/
if (written > 0) {
- written = MIN2(sctx->left, written);
+ if (written >= sctx->left) {
+ sctx->nospace = true;
+ written = sctx->left;
+ }
sctx->ptr += written;
sctx->left -= written;
}
}
}
-void
+bool
tgsi_dump_str(
const struct tgsi_token *tokens,
uint flags,
@@ -729,27 +806,21 @@ tgsi_dump_str(
size_t size)
{
struct str_dump_ctx ctx;
+ memset(&ctx, 0, sizeof(ctx));
- ctx.base.iter.prolog = prolog;
- ctx.base.iter.iterate_instruction = iter_instruction;
- ctx.base.iter.iterate_declaration = iter_declaration;
- ctx.base.iter.iterate_immediate = iter_immediate;
- ctx.base.iter.iterate_property = iter_property;
- ctx.base.iter.epilog = NULL;
+ init_dump_ctx(&ctx.base, flags);
- ctx.base.instno = 0;
- ctx.base.immno = 0;
- ctx.base.indent = 0;
ctx.base.dump_printf = &str_dump_ctx_printf;
- ctx.base.indentation = 0;
- ctx.base.file = NULL;
ctx.str = str;
ctx.str[0] = 0;
ctx.ptr = str;
ctx.left = (int)size;
+ ctx.nospace = false;
tgsi_iterate_shader( tokens, &ctx.base.iter );
+
+ return !ctx.nospace;
}
void
@@ -760,6 +831,7 @@ tgsi_dump_instruction_str(
size_t size)
{
struct str_dump_ctx ctx;
+ memset(&ctx, 0, sizeof(ctx));
ctx.base.instno = instno;
ctx.base.immno = instno;
@@ -772,6 +844,7 @@ tgsi_dump_instruction_str(
ctx.str[0] = 0;
ctx.ptr = str;
ctx.left = (int)size;
+ ctx.nospace = false;
iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst );
}
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 75cd0d53c..7b5c56d9d 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -58,8 +58,10 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
+#include "util/u_half.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/rounding.h"
#define DEBUG_EXECUTION 0
@@ -75,6 +77,8 @@
union tgsi_double_channel {
double d[TGSI_QUAD_SIZE];
unsigned u[TGSI_QUAD_SIZE][2];
+ uint64_t u64[TGSI_QUAD_SIZE];
+ int64_t i64[TGSI_QUAD_SIZE];
};
struct tgsi_double_vector {
@@ -542,10 +546,10 @@ static void
micro_rnd(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
- dst->f[0] = floorf(src->f[0] + 0.5f);
- dst->f[1] = floorf(src->f[1] + 0.5f);
- dst->f[2] = floorf(src->f[2] + 0.5f);
- dst->f[3] = floorf(src->f[3] + 0.5f);
+ dst->f[0] = _mesa_roundevenf(src->f[0]);
+ dst->f[1] = _mesa_roundevenf(src->f[1]);
+ dst->f[2] = _mesa_roundevenf(src->f[2]);
+ dst->f[3] = _mesa_roundevenf(src->f[3]);
}
static void
@@ -674,10 +678,10 @@ static void
micro_trunc(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
- dst->f[0] = (float)(int)src->f[0];
- dst->f[1] = (float)(int)src->f[1];
- dst->f[2] = (float)(int)src->f[2];
- dst->f[3] = (float)(int)src->f[3];
+ dst->f[0] = truncf(src->f[0]);
+ dst->f[1] = truncf(src->f[1]);
+ dst->f[2] = truncf(src->f[2]);
+ dst->f[3] = truncf(src->f[3]);
}
static void
@@ -690,11 +694,251 @@ micro_u2d(union tgsi_double_channel *dst,
dst->d[3] = (double)src->u[3];
}
+static void
+micro_i64abs(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
+ dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
+ dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
+ dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
+}
+
+static void
+micro_i64sgn(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
+ dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
+ dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
+ dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
+}
+
+static void
+micro_i64neg(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = -src->i64[0];
+ dst->i64[1] = -src->i64[1];
+ dst->i64[2] = -src->i64[2];
+ dst->i64[3] = -src->i64[3];
+}
+
+static void
+micro_u64seq(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sne(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64slt(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64slt(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64sge(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sge(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64max(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
+}
+
+static void
+micro_i64max(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
+ dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
+ dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
+ dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
+}
+
+static void
+micro_u64min(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
+}
+
+static void
+micro_i64min(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
+ dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
+ dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
+ dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
+}
+
+static void
+micro_u64add(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] + src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] + src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] + src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] + src[1].u64[3];
+}
+
+static void
+micro_u64mul(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] * src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] * src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] * src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] * src[1].u64[3];
+}
+
+static void
+micro_u64div(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] / src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] / src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] / src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] / src[1].u64[3];
+}
+
+static void
+micro_i64div(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src[0].i64[0] / src[1].i64[0];
+ dst->i64[1] = src[0].i64[1] / src[1].i64[1];
+ dst->i64[2] = src[0].i64[2] / src[1].i64[2];
+ dst->i64[3] = src[0].i64[3] / src[1].i64[3];
+}
+
+static void
+micro_u64mod(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] % src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] % src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] % src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] % src[1].u64[3];
+}
+
+static void
+micro_i64mod(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src[0].i64[0] % src[1].i64[0];
+ dst->i64[1] = src[0].i64[1] % src[1].i64[1];
+ dst->i64[2] = src[0].i64[2] % src[1].i64[2];
+ dst->i64[3] = src[0].i64[3] % src[1].i64[3];
+}
+
+static void
+micro_u64shl(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src0,
+ union tgsi_exec_channel *src1)
+{
+ unsigned masked_count;
+ masked_count = src1->u[0] & 0x3f;
+ dst->u64[0] = src0->u64[0] << masked_count;
+ masked_count = src1->u[1] & 0x3f;
+ dst->u64[1] = src0->u64[1] << masked_count;
+ masked_count = src1->u[2] & 0x3f;
+ dst->u64[2] = src0->u64[2] << masked_count;
+ masked_count = src1->u[3] & 0x3f;
+ dst->u64[3] = src0->u64[3] << masked_count;
+}
+
+static void
+micro_i64shr(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src0,
+ union tgsi_exec_channel *src1)
+{
+ unsigned masked_count;
+ masked_count = src1->u[0] & 0x3f;
+ dst->i64[0] = src0->i64[0] >> masked_count;
+ masked_count = src1->u[1] & 0x3f;
+ dst->i64[1] = src0->i64[1] >> masked_count;
+ masked_count = src1->u[2] & 0x3f;
+ dst->i64[2] = src0->i64[2] >> masked_count;
+ masked_count = src1->u[3] & 0x3f;
+ dst->i64[3] = src0->i64[3] >> masked_count;
+}
+
+static void
+micro_u64shr(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src0,
+ union tgsi_exec_channel *src1)
+{
+ unsigned masked_count;
+ masked_count = src1->u[0] & 0x3f;
+ dst->u64[0] = src0->u64[0] >> masked_count;
+ masked_count = src1->u[1] & 0x3f;
+ dst->u64[1] = src0->u64[1] >> masked_count;
+ masked_count = src1->u[2] & 0x3f;
+ dst->u64[2] = src0->u64[2] >> masked_count;
+ masked_count = src1->u[3] & 0x3f;
+ dst->u64[3] = src0->u64[3] >> masked_count;
+}
+
enum tgsi_exec_datatype {
TGSI_EXEC_DATA_FLOAT,
TGSI_EXEC_DATA_INT,
TGSI_EXEC_DATA_UINT,
- TGSI_EXEC_DATA_DOUBLE
+ TGSI_EXEC_DATA_DOUBLE,
+ TGSI_EXEC_DATA_INT64,
+ TGSI_EXEC_DATA_UINT64,
};
/*
@@ -852,7 +1096,9 @@ void
tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
- struct tgsi_sampler *sampler)
+ struct tgsi_sampler *sampler,
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer)
{
uint k;
struct tgsi_parse_context parse;
@@ -870,6 +1116,8 @@ tgsi_exec_machine_bind_shader(
mach->Tokens = tokens;
mach->Sampler = sampler;
+ mach->Image = image;
+ mach->Buffer = buffer;
if (!tokens) {
/* unbind and free all */
@@ -890,11 +1138,13 @@ tgsi_exec_machine_bind_shader(
return;
}
- mach->Processor = parse.FullHeader.Processor.Processor;
mach->ImmLimit = 0;
mach->NumOutputs = 0;
- if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
+ for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
+ mach->SysSemanticToIndex[k] = -1;
+
+ if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
!mach->UsedGeometryShader) {
struct tgsi_exec_vector *inputs;
struct tgsi_exec_vector *outputs;
@@ -960,6 +1210,11 @@ tgsi_exec_machine_bind_shader(
++mach->NumOutputs;
}
}
+ else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+ const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+ mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
+ }
+
memcpy(declarations + numDeclarations,
&parse.FullToken.FullDeclaration,
sizeof(declarations[0]));
@@ -1000,7 +1255,7 @@ tgsi_exec_machine_bind_shader(
break;
case TGSI_TOKEN_TYPE_PROPERTY:
- if (mach->Processor == TGSI_PROCESSOR_GEOMETRY) {
+ if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
}
@@ -1024,7 +1279,7 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *
-tgsi_exec_machine_create( void )
+tgsi_exec_machine_create(enum pipe_shader_type shader_type)
{
struct tgsi_exec_machine *mach;
uint i;
@@ -1035,14 +1290,17 @@ tgsi_exec_machine_create( void )
memset(mach, 0, sizeof(*mach));
+ mach->ShaderType = shader_type;
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
- mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
- mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
- if (!mach->Inputs || !mach->Outputs)
- goto fail;
+ if (shader_type != PIPE_SHADER_COMPUTE) {
+ mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
+ mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
+ if (!mach->Inputs || !mach->Outputs)
+ goto fail;
+ }
/* Setup constants needed by the SSE2 executor. */
for( i = 0; i < 4; i++ ) {
@@ -1255,7 +1513,7 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
case TGSI_FILE_INPUT:
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
/*
- if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
+ if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
index2D->i[i], index->i[i]);
@@ -1272,7 +1530,7 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
* gl_FragCoord, for example, in a sys value register.
*/
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
- chan->u[i] = mach->SystemValue[index->i[i]].u[i];
+ chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
}
break;
@@ -1649,7 +1907,7 @@ store_dest_dstret(struct tgsi_exec_machine *mach,
debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
reg->Register.Index);
- if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
+ if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
for (i = 0; i < TGSI_QUAD_SIZE; i++)
if (execmask & (1 << i))
@@ -1883,7 +2141,7 @@ emit_primitive(struct tgsi_exec_machine *mach)
static void
conditional_emit_primitive(struct tgsi_exec_machine *mach)
{
- if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
+ if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
int emitted_verts =
mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
if (emitted_verts) {
@@ -1993,12 +2251,12 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
uint sampler)
{
- uint unit;
-
+ uint unit = 0;
+ int i;
if (inst->Src[sampler].Register.Indirect) {
const struct tgsi_full_src_register *reg = &inst->Src[sampler];
union tgsi_exec_channel indir_index, index2;
-
+ const uint execmask = mach->ExecMask;
index2.i[0] =
index2.i[1] =
index2.i[2] =
@@ -2011,7 +2269,13 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
&index2,
&ZeroVec,
&indir_index);
- unit = inst->Src[sampler].Register.Index + indir_index.i[0];
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ if (execmask & (1 << i)) {
+ unit = inst->Src[sampler].Register.Index + indir_index.i[i];
+ break;
+ }
+ }
+
} else {
unit = inst->Src[sampler].Register.Index;
}
@@ -2021,7 +2285,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
/*
* execute a texture instruction.
*
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
* instruction variants like proj, lod, and texture with lod bias.
* sampler indicates which src register the sampler is contained in.
*/
@@ -2032,7 +2296,7 @@ exec_tex(struct tgsi_exec_machine *mach,
{
const union tgsi_exec_channel *args[5], *proj = NULL;
union tgsi_exec_channel r[5];
- enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+ enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
uint chan;
uint unit;
int8_t offsets[3];
@@ -2045,15 +2309,16 @@ exec_tex(struct tgsi_exec_machine *mach,
assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
- dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref);
+ dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
+ shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
assert(dim <= 4);
if (shadow_ref >= 0)
- assert(shadow_ref >= dim && shadow_ref < Elements(args));
+ assert(shadow_ref >= dim && shadow_ref < ARRAY_SIZE(args));
/* fetch modifier to the last argument */
if (modifier != TEX_MODIFIER_NONE) {
- const int last = Elements(args) - 1;
+ const int last = ARRAY_SIZE(args) - 1;
/* fetch modifier from src0.w or src1.x */
if (sampler == 1) {
@@ -2078,14 +2343,14 @@ exec_tex(struct tgsi_exec_machine *mach,
args[i] = &ZeroVec;
if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
- control = tgsi_sampler_lod_explicit;
+ control = TGSI_SAMPLER_LOD_EXPLICIT;
else if (modifier == TEX_MODIFIER_LOD_BIAS)
- control = tgsi_sampler_lod_bias;
+ control = TGSI_SAMPLER_LOD_BIAS;
else if (modifier == TEX_MODIFIER_GATHER)
- control = tgsi_sampler_gather;
+ control = TGSI_SAMPLER_GATHER;
}
else {
- for (i = dim; i < Elements(args); i++)
+ for (i = dim; i < ARRAY_SIZE(args); i++)
args[i] = &ZeroVec;
}
@@ -2132,6 +2397,46 @@ exec_tex(struct tgsi_exec_machine *mach,
}
}
+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ uint unit;
+ int dim;
+ int i;
+ union tgsi_exec_channel coords[4];
+ const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
+ union tgsi_exec_channel r[2];
+
+ unit = fetch_sampler_unit(mach, inst, 1);
+ dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
+ assert(dim <= ARRAY_SIZE(coords));
+ /* fetch coordinates */
+ for (i = 0; i < dim; i++) {
+ FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+ args[i] = &coords[i];
+ }
+ for (i = dim; i < ARRAY_SIZE(coords); i++) {
+ args[i] = &ZeroVec;
+ }
+ mach->Sampler->query_lod(mach->Sampler, unit, unit,
+ args[0]->f,
+ args[1]->f,
+ args[2]->f,
+ args[3]->f,
+ TGSI_SAMPLER_LOD_NONE,
+ r[0].f,
+ r[1].f);
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
+ TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
+ TGSI_EXEC_DATA_FLOAT);
+ }
+}
static void
exec_txd(struct tgsi_exec_machine *mach,
@@ -2155,7 +2460,7 @@ exec_txd(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, unit, unit,
&r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
@@ -2171,7 +2476,7 @@ exec_txd(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, unit, unit,
&r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
@@ -2185,7 +2490,7 @@ exec_txd(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, unit, unit,
&r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
@@ -2205,7 +2510,7 @@ exec_txd(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, unit, unit,
&r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
@@ -2225,7 +2530,7 @@ exec_txd(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, unit, unit,
&r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
@@ -2259,7 +2564,8 @@ exec_txf(struct tgsi_exec_machine *mach,
IFETCH(&r[3], 0, TGSI_CHAN_W);
- if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
+ inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
target = mach->SamplerViews[unit].Resource;
}
else {
@@ -2301,7 +2607,8 @@ exec_txf(struct tgsi_exec_machine *mach,
r[3].f[j] = rgba[3][j];
}
- if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
+ inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
unsigned char swizzles[4];
swizzles[0] = inst->Src[1].Register.SwizzleX;
swizzles[1] = inst->Src[1].Register.SwizzleY;
@@ -2364,7 +2671,7 @@ exec_sample(struct tgsi_exec_machine *mach,
const uint sampler_unit = inst->Src[2].Register.Index;
union tgsi_exec_channel r[5], c1;
const union tgsi_exec_channel *lod = &ZeroVec;
- enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+ enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
uint chan;
unsigned char swizzles[4];
int8_t offsets[3];
@@ -2378,16 +2685,16 @@ exec_sample(struct tgsi_exec_machine *mach,
if (modifier == TEX_MODIFIER_LOD_BIAS) {
FETCH(&c1, 3, TGSI_CHAN_X);
lod = &c1;
- control = tgsi_sampler_lod_bias;
+ control = TGSI_SAMPLER_LOD_BIAS;
}
else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
FETCH(&c1, 3, TGSI_CHAN_X);
lod = &c1;
- control = tgsi_sampler_lod_explicit;
+ control = TGSI_SAMPLER_LOD_EXPLICIT;
}
else {
assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
- control = tgsi_sampler_lod_zero;
+ control = TGSI_SAMPLER_LOD_ZERO;
}
}
@@ -2513,7 +2820,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
&r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
@@ -2529,7 +2836,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
&r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
@@ -2547,7 +2854,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
&r[0], &r[1], &r[2], &r[3], &ZeroVec,
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]);
break;
@@ -2645,7 +2952,7 @@ exec_declaration(struct tgsi_exec_machine *mach,
return;
}
- if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+ if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
if (decl->Declaration.File == TGSI_FILE_INPUT) {
uint first, last, mask;
@@ -2723,9 +3030,6 @@ exec_declaration(struct tgsi_exec_machine *mach,
}
}
- if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
- mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First;
- }
}
typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
@@ -3018,6 +3322,45 @@ exec_dp2(struct tgsi_exec_machine *mach,
}
static void
+exec_pk2h(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned chan;
+ union tgsi_exec_channel arg[2], dst;
+
+ fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
+ dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
+ (util_float_to_half(arg[1].f[chan]) << 16);
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
+ }
+ }
+}
+
+static void
+exec_up2h(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned chan;
+ union tgsi_exec_channel arg, dst[2];
+
+ fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
+ for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
+ dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
+ dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
exec_scs(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
@@ -3295,6 +3638,16 @@ exec_endswitch(struct tgsi_exec_machine *mach)
typedef void (* micro_dop)(union tgsi_double_channel *dst,
const union tgsi_double_channel *src);
+typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src0,
+ union tgsi_exec_channel *src1);
+
+typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src);
+
+typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
+ const union tgsi_double_channel *src);
+
static void
fetch_double_channel(struct tgsi_exec_machine *mach,
union tgsi_double_channel *chan,
@@ -3447,177 +3800,805 @@ exec_double_trinary(struct tgsi_exec_machine *mach,
}
static void
-exec_f2d(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_dldexp(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
- union tgsi_exec_channel src;
+ union tgsi_double_channel src0;
+ union tgsi_exec_channel src1;
union tgsi_double_channel dst;
+ int wmask;
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- micro_f2d(&dst, &src);
+ wmask = inst->Dst[0].Register.WriteMask;
+ if (wmask & TGSI_WRITEMASK_XY) {
+ fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+ fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
+ micro_dldexp(&dst, &src0, &src1);
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
}
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- micro_f2d(&dst, &src);
+
+ if (wmask & TGSI_WRITEMASK_ZW) {
+ fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
+ fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
+ micro_dldexp(&dst, &src0, &src1);
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
}
}
static void
-exec_d2f(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_dfracexp(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
union tgsi_double_channel src;
- union tgsi_exec_channel dst;
- int wm = inst->Dst[0].Register.WriteMask;
- int i;
- int bit;
- for (i = 0; i < 2; i++) {
- bit = ffs(wm);
- if (bit) {
- wm &= ~(1 << (bit - 1));
- if (i == 0)
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- else
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_d2f(&dst, &src);
- store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_FLOAT);
- }
+ union tgsi_double_channel dst;
+ union tgsi_exec_channel dst_exp;
+
+ if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
+ fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+ micro_dfracexp(&dst, &dst_exp, &src);
+ store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
+ store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
+ }
+ if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
+ fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
+ micro_dfracexp(&dst, &dst_exp, &src);
+ store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
+ store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
}
}
static void
-exec_i2d(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_dop_sop op)
{
- union tgsi_exec_channel src;
+ union tgsi_double_channel src0;
+ union tgsi_exec_channel src1;
union tgsi_double_channel dst;
+ int wmask;
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
- micro_i2d(&dst, &src);
+ wmask = inst->Dst[0].Register.WriteMask;
+ if (wmask & TGSI_WRITEMASK_XY) {
+ fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+ fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
+ op(&dst, &src0, &src1);
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
}
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_INT);
- micro_i2d(&dst, &src);
+
+ if (wmask & TGSI_WRITEMASK_ZW) {
+ fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
+ fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
+ op(&dst, &src0, &src1);
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
}
}
+static int
+get_image_coord_dim(unsigned tgsi_tex)
+{
+ int dim;
+ switch (tgsi_tex) {
+ case TGSI_TEXTURE_BUFFER:
+ case TGSI_TEXTURE_1D:
+ dim = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_2D_MSAA:
+ dim = 2;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ case TGSI_TEXTURE_CUBE_ARRAY:
+ dim = 3;
+ break;
+ default:
+ assert(!"unknown texture target");
+ dim = 0;
+ break;
+ }
+
+ return dim;
+}
+
+static int
+get_image_coord_sample(unsigned tgsi_tex)
+{
+ int sample = 0;
+ switch (tgsi_tex) {
+ case TGSI_TEXTURE_2D_MSAA:
+ sample = 3;
+ break;
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ sample = 4;
+ break;
+ default:
+ break;
+ }
+ return sample;
+}
+
static void
-exec_d2i(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_load_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
- union tgsi_double_channel src;
- union tgsi_exec_channel dst;
- int wm = inst->Dst[0].Register.WriteMask;
- int i;
- int bit;
- for (i = 0; i < 2; i++) {
- bit = ffs(wm);
- if (bit) {
- wm &= ~(1 << (bit - 1));
- if (i == 0)
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- else
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_d2i(&dst, &src);
- store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_INT);
+ union tgsi_exec_channel r[4], sample_r;
+ uint unit;
+ int sample;
+ int i, j;
+ int dim;
+ uint chan;
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_image_params params;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+ dim = get_image_coord_dim(inst->Memory.Texture);
+ sample = get_image_coord_sample(inst->Memory.Texture);
+ assert(dim <= 3);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.tgsi_tex_instr = inst->Memory.Texture;
+ params.format = inst->Memory.Format;
+
+ for (i = 0; i < dim; i++) {
+ IFETCH(&r[i], 1, TGSI_CHAN_X + i);
+ }
+
+ if (sample)
+ IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
+
+ mach->Image->load(mach->Image, &params,
+ r[0].i, r[1].i, r[2].i, sample_r.i,
+ rgba);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ r[0].f[j] = rgba[0][j];
+ r[1].f[j] = rgba[1][j];
+ r[2].f[j] = rgba[2][j];
+ r[3].f[j] = rgba[3][j];
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
}
}
}
+
static void
-exec_u2d(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_load_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
- union tgsi_exec_channel src;
- union tgsi_double_channel dst;
+ union tgsi_exec_channel r[4];
+ uint unit;
+ int j;
+ uint chan;
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
- micro_u2d(&dst, &src);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ IFETCH(&r[0], 1, TGSI_CHAN_X);
+
+ mach->Buffer->load(mach->Buffer, &params,
+ r[0].i, rgba);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ r[0].f[j] = rgba[0][j];
+ r[1].f[j] = rgba[1][j];
+ r[2].f[j] = rgba[2][j];
+ r[3].f[j] = rgba[3][j];
}
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_UINT);
- micro_u2d(&dst, &src);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
}
}
static void
-exec_d2u(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
+exec_load_mem(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
{
- union tgsi_double_channel src;
- union tgsi_exec_channel dst;
- int wm = inst->Dst[0].Register.WriteMask;
- int i;
- int bit;
- for (i = 0; i < 2; i++) {
- bit = ffs(wm);
- if (bit) {
- wm &= ~(1 << (bit - 1));
- if (i == 0)
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- else
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_d2u(&dst, &src);
- store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_UINT);
+ union tgsi_exec_channel r[4];
+ uint chan;
+ char *ptr = mach->LocalMem;
+ uint32_t offset;
+ int j;
+
+ IFETCH(&r[0], 1, TGSI_CHAN_X);
+ if (r[0].u[0] >= mach->LocalMemSize)
+ return;
+
+ offset = r[0].u[0];
+ ptr += offset;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ memcpy(&r[chan].u[j], ptr + (4 * chan), 4);
+ }
+ }
+ }
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
}
}
}
static void
-exec_dldexp(struct tgsi_exec_machine *mach,
+exec_load(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_load_img(mach, inst);
+ else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
+ exec_load_buf(mach, inst);
+ else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
+ exec_load_mem(mach, inst);
+}
+
+static void
+exec_store_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3], sample_r;
+ union tgsi_exec_channel value[4];
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_image_params params;
+ int dim;
+ int sample;
+ int i, j;
+ uint unit;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+ unit = inst->Dst[0].Register.Index;
+ dim = get_image_coord_dim(inst->Memory.Texture);
+ sample = get_image_coord_sample(inst->Memory.Texture);
+ assert(dim <= 3);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.tgsi_tex_instr = inst->Memory.Texture;
+ params.format = inst->Memory.Format;
+
+ for (i = 0; i < dim; i++) {
+ IFETCH(&r[i], 0, TGSI_CHAN_X + i);
+ }
+
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 1, TGSI_CHAN_X + i);
+ }
+ if (sample)
+ IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = value[0].f[j];
+ rgba[1][j] = value[1].f[j];
+ rgba[2][j] = value[2].f[j];
+ rgba[3][j] = value[3].f[j];
+ }
+
+ mach->Image->store(mach->Image, &params,
+ r[0].i, r[1].i, r[2].i, sample_r.i,
+ rgba);
+}
+
+static void
+exec_store_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+ union tgsi_exec_channel value[4];
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int i, j;
+ uint unit;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = inst->Dst[0].Register.Index;
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.writemask = inst->Dst[0].Register.WriteMask;
+
+ IFETCH(&r[0], 0, TGSI_CHAN_X);
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 1, TGSI_CHAN_X + i);
+ }
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = value[0].f[j];
+ rgba[1][j] = value[1].f[j];
+ rgba[2][j] = value[2].f[j];
+ rgba[3][j] = value[3].f[j];
+ }
+
+ mach->Buffer->store(mach->Buffer, &params,
+ r[0].i,
+ rgba);
+}
+
+static void
+exec_store_mem(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+ union tgsi_exec_channel value[4];
+ uint i, chan;
+ char *ptr = mach->LocalMem;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+ int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+
+ IFETCH(&r[0], 0, TGSI_CHAN_X);
+
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 1, TGSI_CHAN_X + i);
+ }
+
+ if (r[0].u[0] >= mach->LocalMemSize)
+ return;
+ ptr += r[0].u[0];
+
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ if (execmask & (1 << i)) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
+ }
+ }
+ }
+ }
+}
+
+static void
+exec_store(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
+ exec_store_img(mach, inst);
+ else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
+ exec_store_buf(mach, inst);
+ else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
+ exec_store_mem(mach, inst);
+}
+
+static void
+exec_atomop_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[4], sample_r;
+ union tgsi_exec_channel value[4], value2[4];
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_image_params params;
+ int dim;
+ int sample;
+ int i, j;
+ uint unit, chan;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+ unit = fetch_sampler_unit(mach, inst, 0);
+ dim = get_image_coord_dim(inst->Memory.Texture);
+ sample = get_image_coord_sample(inst->Memory.Texture);
+ assert(dim <= 3);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.tgsi_tex_instr = inst->Memory.Texture;
+ params.format = inst->Memory.Format;
+
+ for (i = 0; i < dim; i++) {
+ IFETCH(&r[i], 1, TGSI_CHAN_X + i);
+ }
+
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 2, TGSI_CHAN_X + i);
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+ FETCH(&value2[i], 3, TGSI_CHAN_X + i);
+ }
+ if (sample)
+ IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = value[0].f[j];
+ rgba[1][j] = value[1].f[j];
+ rgba[2][j] = value[2].f[j];
+ rgba[3][j] = value[3].f[j];
+ }
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba2[0][j] = value2[0].f[j];
+ rgba2[1][j] = value2[1].f[j];
+ rgba2[2][j] = value2[2].f[j];
+ rgba2[3][j] = value2[3].f[j];
+ }
+ }
+
+ mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
+ r[0].i, r[1].i, r[2].i, sample_r.i,
+ rgba, rgba2);
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ r[0].f[j] = rgba[0][j];
+ r[1].f[j] = rgba[1][j];
+ r[2].f[j] = rgba[2][j];
+ r[3].f[j] = rgba[3][j];
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_atomop_buf(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[4];
+ union tgsi_exec_channel value[4], value2[4];
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
+ struct tgsi_buffer_params params;
+ int i, j;
+ uint unit, chan;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.writemask = inst->Dst[0].Register.WriteMask;
+
+ IFETCH(&r[0], 1, TGSI_CHAN_X);
+
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 2, TGSI_CHAN_X + i);
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+ FETCH(&value2[i], 3, TGSI_CHAN_X + i);
+ }
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba[0][j] = value[0].f[j];
+ rgba[1][j] = value[1].f[j];
+ rgba[2][j] = value[2].f[j];
+ rgba[3][j] = value[3].f[j];
+ }
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ rgba2[0][j] = value2[0].f[j];
+ rgba2[1][j] = value2[1].f[j];
+ rgba2[2][j] = value2[2].f[j];
+ rgba2[3][j] = value2[3].f[j];
+ }
+ }
+
+ mach->Buffer->op(mach->Buffer, &params, inst->Instruction.Opcode,
+ r[0].i,
+ rgba, rgba2);
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ r[0].f[j] = rgba[0][j];
+ r[1].f[j] = rgba[1][j];
+ r[2].f[j] = rgba[2][j];
+ r[3].f[j] = rgba[3][j];
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_atomop_mem(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[4];
+ union tgsi_exec_channel value[4], value2[4];
+ char *ptr = mach->LocalMem;
+ uint32_t val;
+ uint chan, i;
+ uint32_t offset;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+ int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ IFETCH(&r[0], 1, TGSI_CHAN_X);
+
+ if (r[0].u[0] >= mach->LocalMemSize)
+ return;
+
+ offset = r[0].u[0];
+ ptr += offset;
+ for (i = 0; i < 4; i++) {
+ FETCH(&value[i], 2, TGSI_CHAN_X + i);
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+ FETCH(&value2[i], 3, TGSI_CHAN_X + i);
+ }
+
+ memcpy(&r[0].u[0], ptr, 4);
+ val = r[0].u[0];
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ATOMUADD:
+ val += value[0].u[0];
+ break;
+ case TGSI_OPCODE_ATOMXOR:
+ val ^= value[0].u[0];
+ break;
+ case TGSI_OPCODE_ATOMOR:
+ val |= value[0].u[0];
+ break;
+ case TGSI_OPCODE_ATOMAND:
+ val &= value[0].u[0];
+ break;
+ case TGSI_OPCODE_ATOMUMIN:
+ val = MIN2(val, value[0].u[0]);
+ break;
+ case TGSI_OPCODE_ATOMUMAX:
+ val = MAX2(val, value[0].u[0]);
+ break;
+ case TGSI_OPCODE_ATOMIMIN:
+ val = MIN2(r[0].i[0], value[0].i[0]);
+ break;
+ case TGSI_OPCODE_ATOMIMAX:
+ val = MAX2(r[0].i[0], value[0].i[0]);
+ break;
+ case TGSI_OPCODE_ATOMXCHG:
+ val = value[0].i[0];
+ break;
+ case TGSI_OPCODE_ATOMCAS:
+ if (val == value[0].u[0])
+ val = value2[0].u[0];
+ break;
+ default:
+ break;
+ }
+ for (i = 0; i < TGSI_QUAD_SIZE; i++)
+ if (execmask & (1 << i))
+ memcpy(ptr, &val, 4);
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_atomop(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
- union tgsi_double_channel src0;
- union tgsi_exec_channel src1;
- union tgsi_double_channel dst;
- int wmask;
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_atomop_img(mach, inst);
+ else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
+ exec_atomop_buf(mach, inst);
+ else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
+ exec_atomop_mem(mach, inst);
+}
- wmask = inst->Dst[0].Register.WriteMask;
- if (wmask & TGSI_WRITEMASK_XY) {
- fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
- micro_dldexp(&dst, &src0, &src1);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
+static void
+exec_resq_img(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ int result[4];
+ union tgsi_exec_channel r[4];
+ uint unit;
+ int i, chan, j;
+ struct tgsi_image_params params;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+ params.tgsi_tex_instr = inst->Memory.Texture;
+ params.format = inst->Memory.Format;
+
+ mach->Image->get_dims(mach->Image, &params, result);
+
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ for (j = 0; j < 4; j++) {
+ r[j].i[i] = result[j];
+ }
}
- if (wmask & TGSI_WRITEMASK_ZW) {
- fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
- micro_dldexp(&dst, &src0, &src1);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
+ TGSI_EXEC_DATA_INT);
+ }
}
}
static void
-exec_dfracexp(struct tgsi_exec_machine *mach,
+exec_resq_buf(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
- union tgsi_double_channel src;
+ int result;
+ union tgsi_exec_channel r[4];
+ uint unit;
+ int i, chan;
+ struct tgsi_buffer_params params;
+ int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+
+ unit = fetch_sampler_unit(mach, inst, 0);
+
+ params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
+ params.unit = unit;
+
+ mach->Buffer->get_dims(mach->Buffer, &params, &result);
+
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ r[0].i[i] = result;
+ }
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
+ TGSI_EXEC_DATA_INT);
+ }
+ }
+}
+
+static void
+exec_resq(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
+ exec_resq_img(mach, inst);
+ else
+ exec_resq_buf(mach, inst);
+}
+
+static void
+micro_f2u64(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u64[0] = (uint64_t)src->f[0];
+ dst->u64[1] = (uint64_t)src->f[1];
+ dst->u64[2] = (uint64_t)src->f[2];
+ dst->u64[3] = (uint64_t)src->f[3];
+}
+
+static void
+micro_f2i64(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i64[0] = (int64_t)src->f[0];
+ dst->i64[1] = (int64_t)src->f[1];
+ dst->i64[2] = (int64_t)src->f[2];
+ dst->i64[3] = (int64_t)src->f[3];
+}
+
+static void
+micro_u2i64(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u64[0] = (uint64_t)src->u[0];
+ dst->u64[1] = (uint64_t)src->u[1];
+ dst->u64[2] = (uint64_t)src->u[2];
+ dst->u64[3] = (uint64_t)src->u[3];
+}
+
+static void
+micro_i2i64(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i64[0] = (int64_t)src->i[0];
+ dst->i64[1] = (int64_t)src->i[1];
+ dst->i64[2] = (int64_t)src->i[2];
+ dst->i64[3] = (int64_t)src->i[3];
+}
+
+static void
+micro_d2u64(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = (uint64_t)src->d[0];
+ dst->u64[1] = (uint64_t)src->d[1];
+ dst->u64[2] = (uint64_t)src->d[2];
+ dst->u64[3] = (uint64_t)src->d[3];
+}
+
+static void
+micro_d2i64(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = (int64_t)src->d[0];
+ dst->i64[1] = (int64_t)src->d[1];
+ dst->i64[2] = (int64_t)src->d[2];
+ dst->i64[3] = (int64_t)src->d[3];
+}
+
+static void
+micro_u642d(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->d[0] = (double)src->u64[0];
+ dst->d[1] = (double)src->u64[1];
+ dst->d[2] = (double)src->u64[2];
+ dst->d[3] = (double)src->u64[3];
+}
+
+static void
+micro_i642d(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->d[0] = (double)src->i64[0];
+ dst->d[1] = (double)src->i64[1];
+ dst->d[2] = (double)src->i64[2];
+ dst->d[3] = (double)src->i64[3];
+}
+
+static void
+micro_u642f(union tgsi_exec_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->f[0] = (float)src->u64[0];
+ dst->f[1] = (float)src->u64[1];
+ dst->f[2] = (float)src->u64[2];
+ dst->f[3] = (float)src->u64[3];
+}
+
+static void
+micro_i642f(union tgsi_exec_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->f[0] = (float)src->i64[0];
+ dst->f[1] = (float)src->i64[1];
+ dst->f[2] = (float)src->i64[2];
+ dst->f[3] = (float)src->i64[3];
+}
+
+static void
+exec_t_2_64(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_dop_s op,
+ enum tgsi_exec_datatype src_datatype)
+{
+ union tgsi_exec_channel src;
union tgsi_double_channel dst;
- union tgsi_exec_channel dst_exp;
- if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- micro_dfracexp(&dst, &dst_exp, &src);
+ if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
+ fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
+ op(&dst, &src);
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
- store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
}
- if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_dfracexp(&dst, &dst_exp, &src);
+ if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
+ fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
+ op(&dst, &src);
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
- store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
}
}
+static void
+exec_64_2_t(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_sop_d op,
+ enum tgsi_exec_datatype dst_datatype)
+{
+ union tgsi_double_channel src;
+ union tgsi_exec_channel dst;
+ int wm = inst->Dst[0].Register.WriteMask;
+ int i;
+ int bit;
+ for (i = 0; i < 2; i++) {
+ bit = ffs(wm);
+ if (bit) {
+ wm &= ~(1 << (bit - 1));
+ if (i == 0)
+ fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+ else
+ fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
+ op(&dst, &src);
+ store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
+ }
+ }
+}
static void
micro_i2f(union tgsi_exec_channel *dst,
@@ -4143,7 +5124,12 @@ micro_umsb(union tgsi_exec_channel *dst,
dst->i[3] = util_last_bit(src->u[3]) - 1;
}
-static void
+/**
+ * Execute a TGSI instruction.
+ * Returns TRUE if a barrier instruction is hit,
+ * otherwise FALSE.
+ */
+static boolean
exec_instruction(
struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
@@ -4299,7 +5285,7 @@ exec_instruction(
break;
case TGSI_OPCODE_PK2H:
- assert (0);
+ exec_pk2h(mach, inst);
break;
case TGSI_OPCODE_PK2US:
@@ -4378,8 +5364,14 @@ exec_instruction(
exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
break;
+ case TGSI_OPCODE_LODQ:
+ /* src[0] = texcoord */
+ /* src[1] = sampler unit */
+ exec_lodq(mach, inst);
+ break;
+
case TGSI_OPCODE_UP2H:
- assert (0);
+ exec_up2h(mach, inst);
break;
case TGSI_OPCODE_UP2US:
@@ -4455,8 +5447,12 @@ exec_instruction(
/* returning from main() */
mach->CondStackTop = 0;
mach->LoopStackTop = 0;
+ mach->ContStackTop = 0;
+ mach->LoopLabelStackTop = 0;
+ mach->SwitchStackTop = 0;
+ mach->BreakStackTop = 0;
*pc = -1;
- return;
+ return FALSE;
}
assert(mach->CallStackTop > 0);
@@ -4881,7 +5877,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SAMPLE_I_MS:
- assert(0);
+ exec_txf(mach, inst);
break;
case TGSI_OPCODE_SAMPLE:
@@ -4988,11 +5984,11 @@ exec_instruction(
break;
case TGSI_OPCODE_F2D:
- exec_f2d(mach, inst);
+ exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_D2F:
- exec_d2f(mach, inst);
+ exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DABS:
@@ -5064,47 +6060,184 @@ exec_instruction(
break;
case TGSI_OPCODE_I2D:
- exec_i2d(mach, inst);
+ exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
break;
case TGSI_OPCODE_D2I:
- exec_d2i(mach, inst);
+ exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
break;
case TGSI_OPCODE_U2D:
- exec_u2d(mach, inst);
+ exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_D2U:
- exec_d2u(mach, inst);
+ exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_LOAD:
+ exec_load(mach, inst);
+ break;
+
+ case TGSI_OPCODE_STORE:
+ exec_store(mach, inst);
+ break;
+
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ exec_atomop(mach, inst);
+ break;
+
+ case TGSI_OPCODE_RESQ:
+ exec_resq(mach, inst);
+ break;
+ case TGSI_OPCODE_BARRIER:
+ case TGSI_OPCODE_MEMBAR:
+ return TRUE;
+ break;
+
+ case TGSI_OPCODE_I64ABS:
+ exec_double_unary(mach, inst, micro_i64abs);
+ break;
+
+ case TGSI_OPCODE_I64SSG:
+ exec_double_unary(mach, inst, micro_i64sgn);
+ break;
+
+ case TGSI_OPCODE_I64NEG:
+ exec_double_unary(mach, inst, micro_i64neg);
+ break;
+
+ case TGSI_OPCODE_U64SEQ:
+ exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_U64SNE:
+ exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_I64SLT:
+ exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
+ break;
+ case TGSI_OPCODE_U64SLT:
+ exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_I64SGE:
+ exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
+ break;
+ case TGSI_OPCODE_U64SGE:
+ exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_I64MIN:
+ exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
+ break;
+ case TGSI_OPCODE_U64MIN:
+ exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_I64MAX:
+ exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
+ break;
+ case TGSI_OPCODE_U64MAX:
+ exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_U64ADD:
+ exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_U64MUL:
+ exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_U64SHL:
+ exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
+ break;
+ case TGSI_OPCODE_I64SHR:
+ exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
+ break;
+ case TGSI_OPCODE_U64SHR:
+ exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
+ break;
+ case TGSI_OPCODE_U64DIV:
+ exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_I64DIV:
+ exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
+ break;
+ case TGSI_OPCODE_U64MOD:
+ exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_I64MOD:
+ exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
+ break;
+
+ case TGSI_OPCODE_F2U64:
+ exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_F2I64:
+ exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_U2I64:
+ exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
+ break;
+ case TGSI_OPCODE_I2I64:
+ exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_D2U64:
+ exec_double_unary(mach, inst, micro_d2u64);
+ break;
+
+ case TGSI_OPCODE_D2I64:
+ exec_double_unary(mach, inst, micro_d2i64);
+ break;
+
+ case TGSI_OPCODE_U642F:
+ exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
break;
+ case TGSI_OPCODE_I642F:
+ exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_U642D:
+ exec_double_unary(mach, inst, micro_u642d);
+ break;
+ case TGSI_OPCODE_I642D:
+ exec_double_unary(mach, inst, micro_i642d);
+ break;
+
default:
assert( 0 );
}
+ return FALSE;
}
-
-/**
- * Run TGSI interpreter.
- * \return bitmask of "alive" quad components
- */
-uint
-tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
+static void
+tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
{
- uint i;
- int pc = 0;
uint default_mask = 0xf;
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
- if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
+ if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
mach->Primitives[0] = 0;
/* GS runs on a single primitive for now */
default_mask = 0x1;
}
+ if (mach->NonHelperMask == 0)
+ mach->NonHelperMask = default_mask;
mach->CondMask = default_mask;
mach->LoopMask = default_mask;
mach->ContMask = default_mask;
@@ -5119,11 +6252,26 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
assert(mach->SwitchStackTop == 0);
assert(mach->BreakStackTop == 0);
assert(mach->CallStackTop == 0);
+}
+
+/**
+ * Run TGSI interpreter.
+ * \return bitmask of "alive" quad components
+ */
+uint
+tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
+{
+ uint i;
+ mach->pc = start_pc;
- /* execute declarations (interpolants) */
- for (i = 0; i < mach->NumDeclarations; i++) {
- exec_declaration( mach, mach->Declarations+i );
+ if (!start_pc) {
+ tgsi_exec_machine_setup_masks(mach);
+
+ /* execute declarations (interpolants) */
+ for (i = 0; i < mach->NumDeclarations; i++) {
+ exec_declaration( mach, mach->Declarations+i );
+ }
}
{
@@ -5132,23 +6280,30 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
uint inst = 1;
- memset(mach->Temps, 0, sizeof(temps));
- memset(mach->Outputs, 0, sizeof(outputs));
- memset(temps, 0, sizeof(temps));
- memset(outputs, 0, sizeof(outputs));
+ if (!start_pc) {
+ memset(mach->Temps, 0, sizeof(temps));
+ if (mach->Outputs)
+ memset(mach->Outputs, 0, sizeof(outputs));
+ memset(temps, 0, sizeof(temps));
+ memset(outputs, 0, sizeof(outputs));
+ }
#endif
/* execute instructions, until pc is set to -1 */
- while (pc != -1) {
-
+ while (mach->pc != -1) {
+ boolean barrier_hit;
#if DEBUG_EXECUTION
uint i;
- tgsi_dump_instruction(&mach->Instructions[pc], inst++);
+ tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
#endif
- assert(pc < (int) mach->NumInstructions);
- exec_instruction(mach, mach->Instructions + pc, &pc);
+ assert(mach->pc < (int) mach->NumInstructions);
+ barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
+
+ /* for compute shaders if we hit a barrier return now for later rescheduling */
+ if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
+ return 0;
#if DEBUG_EXECUTION
for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
@@ -5169,21 +6324,23 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
}
}
}
- for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
- if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
- uint j;
-
- memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
- debug_printf("OUT[%2u] = ", i);
- for (j = 0; j < 4; j++) {
- if (j > 0) {
- debug_printf(" ");
+ if (mach->Outputs) {
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
+ uint j;
+
+ memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
+ debug_printf("OUT[%2u] = ", i);
+ for (j = 0; j < 4; j++) {
+ if (j > 0) {
+ debug_printf(" ");
+ }
+ debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
+ outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
+ outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
+ outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
+ outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
}
- debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
- outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
- outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
- outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
- outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
}
}
}
@@ -5193,7 +6350,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
#if 0
/* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
- if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+ if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
/*
* Scale back depth component.
*/
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h
index db5c56b6b..9343d788d 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -88,13 +88,84 @@ struct tgsi_interp_coef
float dady[TGSI_NUM_CHANNELS];
};
-enum tgsi_sampler_control {
- tgsi_sampler_lod_none,
- tgsi_sampler_lod_bias,
- tgsi_sampler_lod_explicit,
- tgsi_sampler_lod_zero,
- tgsi_sampler_derivs_explicit,
- tgsi_sampler_gather,
+enum tgsi_sampler_control
+{
+ TGSI_SAMPLER_LOD_NONE,
+ TGSI_SAMPLER_LOD_BIAS,
+ TGSI_SAMPLER_LOD_EXPLICIT,
+ TGSI_SAMPLER_LOD_ZERO,
+ TGSI_SAMPLER_DERIVS_EXPLICIT,
+ TGSI_SAMPLER_GATHER,
+};
+
+struct tgsi_image_params {
+ unsigned unit;
+ unsigned tgsi_tex_instr;
+ enum pipe_format format;
+ unsigned execmask;
+};
+
+struct tgsi_image {
+ /* image interfaces */
+ void (*load)(const struct tgsi_image *image,
+ const struct tgsi_image_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ const int t[TGSI_QUAD_SIZE],
+ const int r[TGSI_QUAD_SIZE],
+ const int sample[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*store)(const struct tgsi_image *image,
+ const struct tgsi_image_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ const int t[TGSI_QUAD_SIZE],
+ const int r[TGSI_QUAD_SIZE],
+ const int sample[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*op)(const struct tgsi_image *image,
+ const struct tgsi_image_params *params,
+ unsigned opcode,
+ const int s[TGSI_QUAD_SIZE],
+ const int t[TGSI_QUAD_SIZE],
+ const int r[TGSI_QUAD_SIZE],
+ const int sample[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*get_dims)(const struct tgsi_image *image,
+ const struct tgsi_image_params *params,
+ int dims[4]);
+};
+
+struct tgsi_buffer_params {
+ unsigned unit;
+ unsigned execmask;
+ unsigned writemask;
+};
+
+struct tgsi_buffer {
+ /* buffer interfaces */
+ void (*load)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*store)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*op)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ unsigned opcode,
+ const int s[TGSI_QUAD_SIZE],
+ float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+ float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+
+ void (*get_dims)(const struct tgsi_buffer *buffer,
+ const struct tgsi_buffer_params *params,
+ int *dim);
};
/**
@@ -138,6 +209,16 @@ struct tgsi_sampler
const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+ void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
+ const unsigned sview_index,
+ const unsigned sampler_index,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ const enum tgsi_sampler_control control,
+ float mipmap[TGSI_QUAD_SIZE],
+ float lod[TGSI_QUAD_SIZE]);
};
#define TGSI_EXEC_NUM_TEMPS 4096
@@ -185,21 +266,18 @@ struct tgsi_sampler
#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
#define TGSI_EXEC_TEMP_HALF_C 0
-/* execution mask, each value is either 0 or ~0 */
-#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_MASK_C 1
-
/* 4 register buffer for various purposes */
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
#define TGSI_EXEC_NUM_TEMP_R 4
#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8)
+#define TGSI_EXEC_NUM_ADDRS 3
/* predicate register */
-#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9)
+#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 11)
#define TGSI_EXEC_NUM_PREDS 1
-#define TGSI_EXEC_NUM_TEMP_EXTRAS 10
+#define TGSI_EXEC_NUM_TEMP_EXTRAS 12
@@ -278,20 +356,22 @@ struct tgsi_exec_machine
/* System values */
unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT];
- union tgsi_exec_channel SystemValue[TGSI_MAX_MISC_INPUTS];
+ struct tgsi_exec_vector SystemValue[TGSI_MAX_MISC_INPUTS];
struct tgsi_exec_vector *Addrs;
struct tgsi_exec_vector *Predicates;
struct tgsi_sampler *Sampler;
+ struct tgsi_image *Image;
+ struct tgsi_buffer *Buffer;
unsigned ImmLimit;
const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];
unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS];
const struct tgsi_token *Tokens; /**< Declarations, instructions */
- unsigned Processor; /**< TGSI_PROCESSOR_x */
+ enum pipe_shader_type ShaderType; /**< PIPE_SHADER_x */
/* GEOMETRY processor only. */
unsigned *Primitives;
@@ -304,6 +384,13 @@ struct tgsi_exec_machine
struct tgsi_exec_vector QuadPos;
float Face; /**< +1 if front facing, -1 if back facing */
bool flatshade_color;
+
+ /* Compute Only */
+ void *LocalMem;
+ unsigned LocalMemSize;
+
+ /* See GLSL 4.50 specification for definition of helper invocations */
+ uint NonHelperMask; /**< non-helpers */
/* Conditional execution masks */
uint CondMask; /**< For IF/ELSE/ENDIF */
uint LoopMask; /**< For BGNLOOP/ENDLOOP */
@@ -358,10 +445,12 @@ struct tgsi_exec_machine
SamplerViews[PIPE_MAX_SHADER_SAMPLER_VIEWS];
boolean UsedGeometryShader;
+
+ int pc;
};
struct tgsi_exec_machine *
-tgsi_exec_machine_create( void );
+tgsi_exec_machine_create(enum pipe_shader_type shader_type);
void
tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach);
@@ -371,11 +460,13 @@ void
tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
- struct tgsi_sampler *sampler);
+ struct tgsi_sampler *sampler,
+ struct tgsi_image *image,
+ struct tgsi_buffer *buffer);
uint
tgsi_exec_machine_run(
- struct tgsi_exec_machine *mach );
+ struct tgsi_exec_machine *mach, int start_pc );
void
@@ -386,27 +477,6 @@ boolean
tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst);
-static inline void
-tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
-{
- mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
- mask;
-}
-
-
-/** Set execution mask values prior to executing the shader */
-static inline void
-tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
- boolean ch0, boolean ch1, boolean ch2, boolean ch3)
-{
- int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i;
- mask[0] = ch0 ? ~0 : 0;
- mask[1] = ch1 ? ~0 : 0;
- mask[2] = ch2 ? ~0 : 0;
- mask[3] = ch3 ? ~0 : 0;
-}
-
-
extern void
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
unsigned num_bufs,
@@ -454,6 +524,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
return PIPE_MAX_SHADER_SAMPLER_VIEWS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 1 << PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_DOUBLES:
@@ -463,6 +535,11 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return PIPE_MAX_SHADER_BUFFERS;
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ return PIPE_MAX_SHADER_IMAGES;
+
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
}
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c
index fb29ea0d5..37549aae7 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -37,231 +37,256 @@
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{
- { 1, 1, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL },
- { 1, 1, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV },
- { 1, 1, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT },
- { 1, 1, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP },
- { 1, 1, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ },
- { 1, 1, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP },
- { 1, 1, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG },
- { 1, 2, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL },
- { 1, 2, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD },
- { 1, 2, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 },
- { 1, 2, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 },
- { 1, 2, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST },
- { 1, 2, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
- { 1, 2, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
- { 1, 2, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
- { 1, 2, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
- { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
- { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
- { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
- { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
- { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
- { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
- { 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
- { 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */
- { 1, 1, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
- { 1, 3, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
- { 1, 1, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
- { 1, 1, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
- { 1, 1, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
- { 1, 1, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
- { 1, 2, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
- { 1, 2, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
- { 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */
- { 1, 1, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
- { 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */
- { 1, 2, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
- { 1, 1, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
- { 1, 1, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
- { 1, 1, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
- { 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
- { 1, 1, 0, 0, 0, 0, COMP, "PK2H", TGSI_OPCODE_PK2H },
- { 1, 1, 0, 0, 0, 0, COMP, "PK2US", TGSI_OPCODE_PK2US },
- { 1, 1, 0, 0, 0, 0, COMP, "PK4B", TGSI_OPCODE_PK4B },
- { 1, 1, 0, 0, 0, 0, COMP, "PK4UB", TGSI_OPCODE_PK4UB },
- { 0, 1, 0, 0, 0, 1, NONE, "", 44 }, /* removed */
- { 1, 2, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
- { 0, 1, 0, 0, 0, 1, NONE, "", 46 }, /* removed */
- { 1, 2, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
- { 1, 1, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
- { 1, 2, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
- { 1, 2, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
- { 0, 1, 0, 0, 0, 1, NONE, "", 51 }, /* removed */
- { 1, 2, 1, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
- { 1, 4, 1, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
- { 1, 2, 1, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
- { 1, 1, 0, 0, 0, 0, COMP, "UP2H", TGSI_OPCODE_UP2H },
- { 1, 1, 0, 0, 0, 0, COMP, "UP2US", TGSI_OPCODE_UP2US },
- { 1, 1, 0, 0, 0, 0, COMP, "UP4B", TGSI_OPCODE_UP4B },
- { 1, 1, 0, 0, 0, 0, COMP, "UP4UB", TGSI_OPCODE_UP4UB },
- { 0, 1, 0, 0, 0, 1, NONE, "", 59 }, /* removed */
- { 0, 1, 0, 0, 0, 1, NONE, "", 60 }, /* removed */
- { 1, 1, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
- { 0, 1, 0, 0, 0, 1, NONE, "", 62 }, /* removed */
- { 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
- { 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
- { 1, 1, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
- { 1, 3, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
- { 1, 1, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
- { 1, 2, 1, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
- { 0, 1, 0, 0, 0, 1, NONE, "", 69 }, /* removed */
- { 1, 2, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
- { 1, 2, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
- { 1, 2, 1, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
- { 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
- { 0, 1, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
- { 0, 1, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
- { 0, 1, 0, 0, 0, 1, NONE, "", 76 }, /* removed */
- { 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
- { 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
- { 1, 1, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
- { 1, 1, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
- { 0, 1, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
- { 1, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
- { 1, 1, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
- { 1, 1, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
- { 1, 1, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
- { 1, 1, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
- { 1, 2, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
- { 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */
- { 1, 2, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
- { 1, 2, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
- { 1, 2, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
- { 1, 2, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
- { 1, 3, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
- { 1, 2, 1, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
- { 1, 2, 1, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
- { 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
- { 0, 1, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
- { 0, 1, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
- { 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
- { 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
- { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
- { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
- { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
- { 0, 0, 0, 0, 0, 0, NONE, "", 104 }, /* removed */
- { 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */
- { 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
- { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
- { 1, 2, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
- { 1, 2, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
- { 1, 2, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
- { 1, 2, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
- { 0, 1, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
- { 0, 1, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
- { 0, 1, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
- { 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
- { 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
- { 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
- { 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
- { 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
- { 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
- { 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
- { 1, 2, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN },
- { 1, 1, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG },
- { 1, 2, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE },
- { 1, 2, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR },
- { 1, 2, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT },
- { 1, 1, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U },
- { 1, 1, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F },
- { 1, 2, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD },
- { 1, 2, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV },
- { 1, 3, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD },
- { 1, 2, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX },
- { 1, 2, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN },
- { 1, 2, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD },
- { 1, 2, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL },
- { 1, 2, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ },
- { 1, 2, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE },
- { 1, 2, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR },
- { 1, 2, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT },
- { 1, 2, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE },
- { 0, 1, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH },
- { 0, 1, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE },
- { 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT },
- { 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV },
+ { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ },
+ { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP },
+ { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD },
+ { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 },
+ { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 },
+ { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
+ { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
+ { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
+ { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
+ { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
+ { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
+ { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
+ { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D },
+ { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
+ { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
+ { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
+ { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
+ { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
+ { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
+ { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
+ { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
+ { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
+ { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
+ { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
+ { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
+ { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
+ { 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 }, /* removed */
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
+ { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
+ { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
+ { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
+ { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
+ { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */
+ { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
+ { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
+ { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
+ { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
+ { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
+ { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
+ { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
+ { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
+ { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
+ { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
+ { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
+ { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
+ { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE },
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH },
+ { 0, 1, 0, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT },
+ { 0, 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
- { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE },
- { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I },
- { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
- { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
- { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
- { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
- { 1, 5, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D },
- { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L },
- { 1, 3, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 },
- { 1, 2, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO },
- { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS },
- { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
- { 1, 1, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL },
- { 1, 3, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP },
- { 1, 1, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS },
- { 1, 1, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG },
- { 1, 2, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD },
- { 1, 2, 0, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE },
- { 1, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE },
- { 1, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE },
- { 1, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE },
- { 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER },
+ { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE },
+ { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I },
+ { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
+ { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
+ { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
+ { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
+ { 1, 5, 0, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D },
+ { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L },
+ { 1, 3, 0, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 },
+ { 1, 2, 0, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO },
+ { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS },
+ { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG },
+ { 1, 2, 0, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD },
+ { 1, 2, 0, 1, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE },
+ { 1, 0, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE },
+ { 1, 0, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE },
+ { 1, 0, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE },
+ { 0, 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG },
- { 1, 4, 0, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN },
- { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX },
- { 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
- { 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
- { 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
- { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
- { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
- { 1, 3, 1, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 },
- { 1, 2, 1, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ },
- { 1, 3, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE },
- { 1, 3, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE },
- { 1, 4, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI },
- { 1, 1, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV },
- { 1, 1, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC },
- { 1, 1, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB },
- { 1, 1, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB },
- { 1, 1, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB },
- { 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
- { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
- { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
- { 1, 1, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D },
- { 1, 1, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F },
- { 1, 1, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS },
- { 1, 1, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG },
- { 1, 2, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD },
- { 1, 2, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL },
- { 1, 2, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX },
- { 1, 2, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN },
- { 1, 2, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT },
- { 1, 2, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE },
- { 1, 2, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ },
- { 1, 2, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE },
- { 1, 1, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP },
- { 1, 1, 0, 0 ,0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT },
- { 1, 3, 0, 0 ,0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD },
- { 1, 1, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC},
- { 1, 2, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP},
- { 2, 1, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP},
- { 1, 1, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I },
- { 1, 1, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D },
- { 1, 1, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U },
- { 1, 1, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
- { 1, 1, 0, 0 ,0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
- { 1, 1, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
- { 1, 1, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
- { 1, 1, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
- { 1, 1, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
- { 1, 1, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG },
+ { 1, 4, 0, 1, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN },
+ { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX },
+ { 1, 3, 1, 0, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
+ { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
+ { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
+ { 1, 3, 1, 0, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 },
+ { 1, 2, 1, 0, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE },
+ { 1, 4, 0, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB },
+ { 1, 1, 0, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
+ { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
+ { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT },
+ { 1, 3, 0, 0, 0, 0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC},
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP},
+ { 2, 1, 0, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP},
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD },
};
const struct tgsi_opcode_info *
@@ -272,7 +297,7 @@ tgsi_get_opcode_info( uint opcode )
if (firsttime) {
unsigned i;
firsttime = 0;
- for (i = 0; i < Elements(opcode_info); i++)
+ for (i = 0; i < ARRAY_SIZE(opcode_info); i++)
assert(opcode_info[i].opcode == i);
}
@@ -296,15 +321,15 @@ const char *
tgsi_get_processor_name( uint processor )
{
switch (processor) {
- case TGSI_PROCESSOR_VERTEX:
+ case PIPE_SHADER_VERTEX:
return "vertex shader";
- case TGSI_PROCESSOR_FRAGMENT:
+ case PIPE_SHADER_FRAGMENT:
return "fragment shader";
- case TGSI_PROCESSOR_GEOMETRY:
+ case PIPE_SHADER_GEOMETRY:
return "geometry shader";
- case TGSI_PROCESSOR_TESS_CTRL:
+ case PIPE_SHADER_TESS_CTRL:
return "tessellation control shader";
- case TGSI_PROCESSOR_TESS_EVAL:
+ case PIPE_SHADER_TESS_EVAL:
return "tessellation evaluation shader";
default:
return "unknown shader type!";
@@ -331,6 +356,7 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_TXQ_LZ:
+ case TGSI_OPCODE_TXQS:
case TGSI_OPCODE_F2U:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_UMAD:
@@ -378,6 +404,12 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
return TGSI_TYPE_SIGNED;
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DABS:
@@ -401,7 +433,33 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_F2D:
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_U642D:
+ case TGSI_OPCODE_I642D:
return TGSI_TYPE_DOUBLE;
+ case TGSI_OPCODE_U64MAX:
+ case TGSI_OPCODE_U64MIN:
+ case TGSI_OPCODE_U64ADD:
+ case TGSI_OPCODE_U64MUL:
+ case TGSI_OPCODE_U64DIV:
+ case TGSI_OPCODE_U64MOD:
+ case TGSI_OPCODE_U64SHL:
+ case TGSI_OPCODE_U64SHR:
+ case TGSI_OPCODE_F2U64:
+ case TGSI_OPCODE_D2U64:
+ return TGSI_TYPE_UNSIGNED64;
+ case TGSI_OPCODE_I64MAX:
+ case TGSI_OPCODE_I64MIN:
+ case TGSI_OPCODE_I64ABS:
+ case TGSI_OPCODE_I64SSG:
+ case TGSI_OPCODE_I64NEG:
+ case TGSI_OPCODE_I64SHR:
+ case TGSI_OPCODE_I64DIV:
+ case TGSI_OPCODE_I64MOD:
+ case TGSI_OPCODE_F2I64:
+ case TGSI_OPCODE_U2I64:
+ case TGSI_OPCODE_I2I64:
+ case TGSI_OPCODE_D2I64:
+ return TGSI_TYPE_SIGNED64;
default:
return TGSI_TYPE_FLOAT;
}
@@ -425,10 +483,14 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_SAMPLE_I:
case TGSI_OPCODE_SAMPLE_I_MS:
case TGSI_OPCODE_UMUL_HI:
+ case TGSI_OPCODE_UP2H:
+ case TGSI_OPCODE_U2I64:
+ case TGSI_OPCODE_MEMBAR:
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_I2I64:
return TGSI_TYPE_SIGNED;
case TGSI_OPCODE_ARL:
case TGSI_OPCODE_ARR:
@@ -441,6 +503,8 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_FSLT:
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_UCMP:
+ case TGSI_OPCODE_F2U64:
+ case TGSI_OPCODE_F2I64:
return TGSI_TYPE_FLOAT;
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_D2U:
@@ -449,7 +513,21 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_D2U64:
+ case TGSI_OPCODE_D2I64:
return TGSI_TYPE_DOUBLE;
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_U642F:
+ case TGSI_OPCODE_U642D:
+ return TGSI_TYPE_UNSIGNED64;
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_I642D:
+ return TGSI_TYPE_SIGNED64;
default:
return tgsi_opcode_infer_type(opcode);
}
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index a3b90bdb5..b0a28f271 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -264,14 +264,13 @@ transform_dst(struct tgsi_transform_context *tctx,
* dst.z = src0.x \times src1.y - src1.x \times src0.y
* dst.w = 1.0
*
- * ; needs: 2 tmp, imm{1.0}
- * MUL tmpA.xyz, src0.yzx, src1.zxy
- * MUL tmpB.xyz, src1.yzx, src0.zxy
- * SUB dst.xyz, tmpA.xyz, tmpB.xyz
+ * ; needs: 1 tmp, imm{1.0}
+ * MUL tmpA.xyz, src1.yzx, src0.zxy
+ * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
* MOV dst.w, imm{1.0}
*/
-#define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
-#define XPD_TMP 2
+#define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
+#define XPD_TMP 1
static void
transform_xpd(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction *inst)
@@ -283,34 +282,26 @@ transform_xpd(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction new_inst;
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
- /* MUL tmpA.xyz, src0.yzx, src1.zxy */
+ /* MUL tmpA.xyz, src1.yzx, src0.zxy */
new_inst = tgsi_default_full_instruction();
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
new_inst.Instruction.NumDstRegs = 1;
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
- reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
- tctx->emit_instruction(tctx, &new_inst);
-
- /* MUL tmpB.xyz, src1.yzx, src0.zxy */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
- new_inst.Instruction.NumSrcRegs = 2;
reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
tctx->emit_instruction(tctx, &new_inst);
- /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
+ /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
new_inst.Instruction.NumDstRegs = 1;
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
- new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
- reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, _));
+ new_inst.Instruction.NumSrcRegs = 3;
+ reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
+ reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
+ reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
+ new_inst.Src[2].Register.Negate = true;
tctx->emit_instruction(tctx, &new_inst);
}
@@ -397,14 +388,15 @@ transform_scs(struct tgsi_transform_context *tctx,
* dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
* dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
*
- * ; needs: 2 tmp, imm{1.0}
- * MUL tmpA, src0, src1
- * SUB tmpB, imm{1.0}, src0
- * MUL tmpB, tmpB, src2
- * ADD dst, tmpA, tmpB
+ * This becomes: src0 \times src1 + src2 - src0 \times src2, which
+ * can then become: src0 \times src1 - (src0 \times src2 - src2)
+ *
+ * ; needs: 1 tmp
+ * MAD tmpA, src0, src2, -src2
+ * MAD dst, src0, src1, -tmpA
*/
-#define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
-#define LRP_TMP 2
+#define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
+#define LRP_TMP 1
static void
transform_lrp(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction *inst)
@@ -417,44 +409,28 @@ transform_lrp(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction new_inst;
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
- /* MUL tmpA, src0, src1 */
+ /* MAD tmpA, src0, src2, -src2 */
new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
new_inst.Instruction.NumDstRegs = 1;
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
- new_inst.Instruction.NumSrcRegs = 2;
+ new_inst.Instruction.NumSrcRegs = 3;
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
- reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
- tctx->emit_instruction(tctx, &new_inst);
-
- /* SUB tmpB, imm{1.0}, src0 */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
- new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, Y, Y, Y));
- reg_src(&new_inst.Src[1], src0, SWIZ(X, Y, Z, W));
- tctx->emit_instruction(tctx, &new_inst);
-
- /* MUL tmpB, tmpB, src2 */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
- new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
+ reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
+ new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
tctx->emit_instruction(tctx, &new_inst);
- /* ADD dst, tmpA, tmpB */
+ /* MAD dst, src0, src1, -tmpA */
new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+ new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
new_inst.Instruction.NumDstRegs = 1;
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
- new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
- reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
+ new_inst.Instruction.NumSrcRegs = 3;
+ reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
+ reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
+ reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+ new_inst.Src[2].Register.Negate = true;
tctx->emit_instruction(tctx, &new_inst);
}
}
@@ -676,14 +652,19 @@ transform_lit(struct tgsi_transform_context *tctx,
* dst.w = 1.0
*
* ; needs: 1 tmp, imm{1.0}
- * FLR tmpA.x, src.x
+ * if (lowering FLR) {
+ * FRC tmpA.x, src.x
+ * SUB tmpA.x, src.x, tmpA.x
+ * } else {
+ * FLR tmpA.x, src.x
+ * }
* EX2 tmpA.y, src.x
* SUB dst.y, src.x, tmpA.x
* EX2 dst.x, tmpA.x
* MOV dst.z, tmpA.y
* MOV dst.w, imm{1.0}
*/
-#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
+#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
NINST(1)+ NINST(1) - OINST(1))
#define EXP_TMP 1
static void
@@ -696,14 +677,35 @@ transform_exp(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction new_inst;
if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
- /* FLR tmpA.x, src.x */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
- new_inst.Instruction.NumSrcRegs = 1;
- reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
- tctx->emit_instruction(tctx, &new_inst);
+ if (ctx->config->lower_FLR) {
+ /* FRC tmpA.x, src.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
+ tctx->emit_instruction(tctx, &new_inst);
+
+ /* SUB tmpA.x, src.x, tmpA.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
+ new_inst.Instruction.NumSrcRegs = 2;
+ reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
+ reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
+ tctx->emit_instruction(tctx, &new_inst);
+ } else {
+ /* FLR tmpA.x, src.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
+ tctx->emit_instruction(tctx, &new_inst);
+ }
}
if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
@@ -771,14 +773,19 @@ transform_exp(struct tgsi_transform_context *tctx,
*
* ; needs: 1 tmp, imm{1.0}
* LG2 tmpA.x, |src.x|
- * FLR tmpA.y, tmpA.x
+ * if (lowering FLR) {
+ * FRC tmpA.y, tmpA.x
+ * SUB tmpA.y, tmpA.x, tmpA.y
+ * } else {
+ * FLR tmpA.y, tmpA.x
+ * }
* EX2 tmpA.z, tmpA.y
* RCP tmpA.z, tmpA.z
* MUL dst.y, |src.x|, tmpA.z
* MOV dst.xz, tmpA.yx
* MOV dst.w, imm{1.0}
*/
-#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
+#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
NINST(2) + NINST(1) + NINST(1) - OINST(1))
#define LOG_TMP 1
static void
@@ -803,14 +810,35 @@ transform_log(struct tgsi_transform_context *tctx,
}
if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
- /* FLR tmpA.y, tmpA.x */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
- new_inst.Instruction.NumSrcRegs = 1;
- reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
- tctx->emit_instruction(tctx, &new_inst);
+ if (ctx->config->lower_FLR) {
+ /* FRC tmpA.y, tmpA.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
+ tctx->emit_instruction(tctx, &new_inst);
+
+ /* SUB tmpA.y, tmpA.x, tmpA.y */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
+ new_inst.Instruction.NumSrcRegs = 2;
+ reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
+ reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
+ tctx->emit_instruction(tctx, &new_inst);
+ } else {
+ /* FLR tmpA.y, tmpA.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
+ tctx->emit_instruction(tctx, &new_inst);
+ }
}
if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
@@ -1005,6 +1033,131 @@ transform_dotp(struct tgsi_transform_context *tctx,
}
}
+/* FLR - floor, CEIL - ceil
+ * ; needs: 1 tmp
+ * if (CEIL) {
+ * FRC tmpA, -src
+ * ADD dst, src, tmpA
+ * } else {
+ * FRC tmpA, src
+ * SUB dst, src, tmpA
+ * }
+ */
+#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
+#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
+#define FLR_TMP 1
+#define CEIL_TMP 1
+static void
+transform_flr_ceil(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
+ struct tgsi_full_dst_register *dst = &inst->Dst[0];
+ struct tgsi_full_src_register *src0 = &inst->Src[0];
+ struct tgsi_full_instruction new_inst;
+ unsigned opcode = inst->Instruction.Opcode;
+
+ if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
+ /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
+
+ if (opcode == TGSI_OPCODE_CEIL)
+ new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
+ tctx->emit_instruction(tctx, &new_inst);
+
+ /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
+ new_inst = tgsi_default_full_instruction();
+ if (opcode == TGSI_OPCODE_CEIL)
+ new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+ else
+ new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
+ new_inst.Instruction.NumSrcRegs = 2;
+ reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
+ reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &new_inst);
+ }
+}
+
+/* TRUNC - truncate off fractional part
+ * dst.x = trunc(src.x)
+ * dst.y = trunc(src.y)
+ * dst.z = trunc(src.z)
+ * dst.w = trunc(src.w)
+ *
+ * ; needs: 1 tmp
+ * if (lower FLR) {
+ * FRC tmpA, |src|
+ * SUB tmpA, |src|, tmpA
+ * } else {
+ * FLR tmpA, |src|
+ * }
+ * CMP dst, src, -tmpA, tmpA
+ */
+#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
+#define TRUNC_TMP 1
+static void
+transform_trunc(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
+ struct tgsi_full_dst_register *dst = &inst->Dst[0];
+ struct tgsi_full_src_register *src0 = &inst->Src[0];
+ struct tgsi_full_instruction new_inst;
+
+ if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
+ if (ctx->config->lower_FLR) {
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
+ new_inst.Src[0].Register.Absolute = true;
+ new_inst.Src[0].Register.Negate = false;
+ tctx->emit_instruction(tctx, &new_inst);
+
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
+ new_inst.Instruction.NumSrcRegs = 2;
+ reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
+ new_inst.Src[0].Register.Absolute = true;
+ new_inst.Src[0].Register.Negate = false;
+ reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &new_inst);
+ } else {
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
+ new_inst.Src[0].Register.Absolute = true;
+ new_inst.Src[0].Register.Negate = false;
+ tctx->emit_instruction(tctx, &new_inst);
+ }
+
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
+ new_inst.Instruction.NumSrcRegs = 3;
+ reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
+ reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+ new_inst.Src[1].Register.Negate = true;
+ reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+ tctx->emit_instruction(tctx, &new_inst);
+ }
+}
+
/* Inserts a MOV_SAT for the needed components of tex coord. Note that
* in the case of TXP, the clamping must happen *after* projection, so
* we need to lower TXP to TEX.
@@ -1401,6 +1554,21 @@ transform_instr(struct tgsi_transform_context *tctx,
goto skip;
transform_dotp(tctx, inst);
break;
+ case TGSI_OPCODE_FLR:
+ if (!ctx->config->lower_FLR)
+ goto skip;
+ transform_flr_ceil(tctx, inst);
+ break;
+ case TGSI_OPCODE_CEIL:
+ if (!ctx->config->lower_CEIL)
+ goto skip;
+ transform_flr_ceil(tctx, inst);
+ break;
+ case TGSI_OPCODE_TRUNC:
+ if (!ctx->config->lower_TRUNC)
+ goto skip;
+ transform_trunc(tctx, inst);
+ break;
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXP:
case TGSI_OPCODE_TXB:
@@ -1430,7 +1598,11 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
int newlen, numtmp;
/* sanity check in case limit is ever increased: */
- assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
+ STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
+
+ /* sanity check the lowering */
+ assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
+ assert(!(config->lower_FRC && config->lower_TRUNC));
memset(&ctx, 0, sizeof(ctx));
ctx.base.transform_instruction = transform_instr;
@@ -1443,7 +1615,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
* color, then figure out the number of additional inputs we need
* to create for BCOLOR's..
*/
- if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
+ if ((info->processor == PIPE_SHADER_FRAGMENT) &&
config->color_two_side) {
int i;
ctx.face_idx = -1;
@@ -1473,6 +1645,9 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
OPCS(DPH) ||
OPCS(DP2) ||
OPCS(DP2A) ||
+ OPCS(FLR) ||
+ OPCS(CEIL) ||
+ OPCS(TRUNC) ||
OPCS(TXP) ||
ctx.two_side_colors ||
ctx.saturate))
@@ -1541,6 +1716,18 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
newlen += DP2A_GROW * OPCS(DP2A);
numtmp = MAX2(numtmp, DOTP_TMP);
}
+ if (OPCS(FLR)) {
+ newlen += FLR_GROW * OPCS(FLR);
+ numtmp = MAX2(numtmp, FLR_TMP);
+ }
+ if (OPCS(CEIL)) {
+ newlen += CEIL_GROW * OPCS(CEIL);
+ numtmp = MAX2(numtmp, CEIL_TMP);
+ }
+ if (OPCS(TRUNC)) {
+ newlen += TRUNC_GROW * OPCS(TRUNC);
+ numtmp = MAX2(numtmp, TRUNC_TMP);
+ }
if (ctx.saturate || config->lower_TXP) {
int n = 0;
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 0729b5d24..940af7d30 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -121,8 +121,8 @@ tgsi_parse_token(
next_token( ctx, &decl->Semantic );
}
- if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
- next_token(ctx, &decl->Resource);
+ if (decl->Declaration.File == TGSI_FILE_IMAGE) {
+ next_token(ctx, &decl->Image);
}
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
@@ -155,12 +155,14 @@ tgsi_parse_token(
break;
case TGSI_IMM_UINT32:
+ case TGSI_IMM_UINT64:
for (i = 0; i < imm_count; i++) {
next_token(ctx, &imm->u[i].Uint);
}
break;
case TGSI_IMM_INT32:
+ case TGSI_IMM_INT64:
for (i = 0; i < imm_count; i++) {
next_token(ctx, &imm->u[i].Int);
}
@@ -195,6 +197,10 @@ tgsi_parse_token(
}
}
+ if (inst->Instruction.Memory) {
+ next_token(ctx, &inst->Memory);
+ }
+
assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
@@ -309,7 +315,7 @@ tgsi_dump_tokens(const struct tgsi_token *tokens)
int nr = tgsi_num_tokens(tokens);
int i;
- assert(sizeof(*tokens) == sizeof(unsigned));
+ STATIC_ASSERT(sizeof(*tokens) == sizeof(unsigned));
debug_printf("const unsigned tokens[%d] = {\n", nr);
for (i = 0; i < nr; i++)
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 35e1c7cfd..4689fb797 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -64,7 +64,7 @@ struct tgsi_full_declaration
struct tgsi_declaration_dimension Dim;
struct tgsi_declaration_interp Interp;
struct tgsi_declaration_semantic Semantic;
- struct tgsi_declaration_resource Resource;
+ struct tgsi_declaration_image Image;
struct tgsi_declaration_sampler_view SamplerView;
struct tgsi_declaration_array Array;
};
@@ -91,6 +91,7 @@ struct tgsi_full_instruction
struct tgsi_instruction_predicate Predicate;
struct tgsi_instruction_label Label;
struct tgsi_instruction_texture Texture;
+ struct tgsi_instruction_memory Memory;
struct tgsi_full_dst_register Dst[TGSI_FULL_MAX_DST_REGISTERS];
struct tgsi_full_src_register Src[TGSI_FULL_MAX_SRC_REGISTERS];
struct tgsi_texture_offset TexOffsets[TGSI_FULL_MAX_TEX_OFFSETS];
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
index cb8dbcb29..713bd609d 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
@@ -96,7 +96,7 @@ struct psprite_transform_context
unsigned stream_out_point_pos:1; // set if to stream out original point pos
unsigned aa_point:1; // set if doing aa point
unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS];
- int max_generic;
+ int max_generic; // max generic semantic index
};
static inline struct psprite_transform_context *
@@ -133,7 +133,7 @@ psprite_decl(struct tgsi_transform_context *ctx,
else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
decl->Semantic.Index < 32) {
ts->point_coord_decl |= 1 << decl->Semantic.Index;
- ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index);
+ ts->max_generic = MAX2(ts->max_generic, (int)decl->Semantic.Index);
}
ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1);
}
@@ -216,7 +216,7 @@ psprite_prolog(struct tgsi_transform_context *ctx)
if (en & 0x1) {
tgsi_transform_output_decl(ctx, ts->num_out++,
TGSI_SEMANTIC_GENERIC, i, 0);
- ts->max_generic = MAX2(ts->max_generic, i);
+ ts->max_generic = MAX2(ts->max_generic, (int)i);
}
}
}
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index d14372feb..239a2c938 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -321,7 +321,7 @@ iter_instruction(
}
info = tgsi_get_opcode_info( inst->Instruction.Opcode );
- if (info == NULL) {
+ if (!info) {
report_error( ctx, "(%u): Invalid instruction opcode", inst->Instruction.Opcode );
return TRUE;
}
@@ -414,9 +414,9 @@ iter_declaration(
decl->Semantic.Name == TGSI_SEMANTIC_TESSOUTER ||
decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER;
if (file == TGSI_FILE_INPUT && !patch && (
- processor == TGSI_PROCESSOR_GEOMETRY ||
- processor == TGSI_PROCESSOR_TESS_CTRL ||
- processor == TGSI_PROCESSOR_TESS_EVAL)) {
+ processor == PIPE_SHADER_GEOMETRY ||
+ processor == PIPE_SHADER_TESS_CTRL ||
+ processor == PIPE_SHADER_TESS_EVAL)) {
uint vert;
for (vert = 0; vert < ctx->implied_array_size; ++vert) {
scan_register *reg = MALLOC(sizeof(scan_register));
@@ -424,7 +424,7 @@ iter_declaration(
check_and_declare(ctx, reg);
}
} else if (file == TGSI_FILE_OUTPUT && !patch &&
- processor == TGSI_PROCESSOR_TESS_CTRL) {
+ processor == PIPE_SHADER_TESS_CTRL) {
uint vert;
for (vert = 0; vert < ctx->implied_out_array_size; ++vert) {
scan_register *reg = MALLOC(sizeof(scan_register));
@@ -485,11 +485,11 @@ iter_property(
{
struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
- if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY &&
+ if (iter->processor.Processor == PIPE_SHADER_GEOMETRY &&
prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) {
ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data);
}
- if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL &&
+ if (iter->processor.Processor == PIPE_SHADER_TESS_CTRL &&
prop->Property.PropertyName == TGSI_PROPERTY_TCS_VERTICES_OUT)
ctx->implied_out_array_size = prop->u[0].Data;
return TRUE;
@@ -499,8 +499,8 @@ static boolean
prolog(struct tgsi_iterate_context *iter)
{
struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
- if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL ||
- iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL)
+ if (iter->processor.Processor == PIPE_SHADER_TESS_CTRL ||
+ iter->processor.Processor == PIPE_SHADER_TESS_EVAL)
ctx->implied_array_size = 32;
return TRUE;
}
@@ -559,6 +559,7 @@ tgsi_sanity_check(
const struct tgsi_token *tokens )
{
struct sanity_check_ctx ctx;
+ boolean retval;
ctx.iter.prolog = prolog;
ctx.iter.iterate_instruction = iter_instruction;
@@ -580,11 +581,12 @@ tgsi_sanity_check(
ctx.implied_array_size = 0;
ctx.print = debug_get_option_print_sanity();
- if (!tgsi_iterate_shader( tokens, &ctx.iter ))
- return FALSE;
-
+ retval = tgsi_iterate_shader( tokens, &ctx.iter );
regs_hash_destroy(ctx.regs_decl);
regs_hash_destroy(ctx.regs_used);
regs_hash_destroy(ctx.regs_ind_used);
+ if (retval == FALSE)
+ return FALSE;
+
return ctx.errors == 0;
}
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 7523baf4c..b86207883 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -38,11 +38,521 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
+#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_scan.h"
+static bool
+is_memory_file(unsigned file)
+{
+ return file == TGSI_FILE_SAMPLER ||
+ file == TGSI_FILE_SAMPLER_VIEW ||
+ file == TGSI_FILE_IMAGE ||
+ file == TGSI_FILE_BUFFER;
+}
+
+
+/**
+ * Is the opcode a "true" texture instruction which samples from a
+ * texture map?
+ */
+static bool
+is_texture_inst(unsigned opcode)
+{
+ return (opcode != TGSI_OPCODE_TXQ &&
+ opcode != TGSI_OPCODE_TXQS &&
+ opcode != TGSI_OPCODE_TXQ_LZ &&
+ opcode != TGSI_OPCODE_LODQ &&
+ tgsi_get_opcode_info(opcode)->is_tex);
+}
+
+
+/**
+ * Is the opcode an instruction which computes a derivative explicitly or
+ * implicitly?
+ */
+static bool
+computes_derivative(unsigned opcode)
+{
+ if (tgsi_get_opcode_info(opcode)->is_tex) {
+ return opcode != TGSI_OPCODE_TG4 &&
+ opcode != TGSI_OPCODE_TXD &&
+ opcode != TGSI_OPCODE_TXF &&
+ opcode != TGSI_OPCODE_TXL &&
+ opcode != TGSI_OPCODE_TXL2 &&
+ opcode != TGSI_OPCODE_TXQ &&
+ opcode != TGSI_OPCODE_TXQ_LZ &&
+ opcode != TGSI_OPCODE_TXQS;
+ }
+
+ return opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE ||
+ opcode == TGSI_OPCODE_DDY || opcode == TGSI_OPCODE_DDY_FINE ||
+ opcode == TGSI_OPCODE_SAMPLE ||
+ opcode == TGSI_OPCODE_SAMPLE_B ||
+ opcode == TGSI_OPCODE_SAMPLE_C;
+}
+
+
+static void
+scan_instruction(struct tgsi_shader_info *info,
+ const struct tgsi_full_instruction *fullinst,
+ unsigned *current_depth)
+{
+ unsigned i;
+ bool is_mem_inst = false;
+ bool is_interp_instruction = false;
+
+ assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
+ info->opcode_count[fullinst->Instruction.Opcode]++;
+
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
+ case TGSI_OPCODE_BGNLOOP:
+ (*current_depth)++;
+ info->max_depth = MAX2(info->max_depth, *current_depth);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ case TGSI_OPCODE_ENDLOOP:
+ (*current_depth)--;
+ break;
+ default:
+ break;
+ }
+
+ if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
+ unsigned input;
+
+ is_interp_instruction = true;
+
+ if (src0->Register.Indirect && src0->Indirect.ArrayID)
+ input = info->input_array_first[src0->Indirect.ArrayID];
+ else
+ input = src0->Register.Index;
+
+ /* For the INTERP opcodes, the interpolation is always
+ * PERSPECTIVE unless LINEAR is specified.
+ */
+ switch (info->input_interpolate[input]) {
+ case TGSI_INTERPOLATE_COLOR:
+ case TGSI_INTERPOLATE_CONSTANT:
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_INTERP_CENTROID:
+ info->uses_persp_opcode_interp_centroid = TRUE;
+ break;
+ case TGSI_OPCODE_INTERP_OFFSET:
+ info->uses_persp_opcode_interp_offset = TRUE;
+ break;
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ info->uses_persp_opcode_interp_sample = TRUE;
+ break;
+ }
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_INTERP_CENTROID:
+ info->uses_linear_opcode_interp_centroid = TRUE;
+ break;
+ case TGSI_OPCODE_INTERP_OFFSET:
+ info->uses_linear_opcode_interp_offset = TRUE;
+ break;
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ info->uses_linear_opcode_interp_sample = TRUE;
+ break;
+ }
+ break;
+ }
+ }
+
+ if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+ fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+ info->uses_doubles = TRUE;
+
+ for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *src = &fullinst->Src[i];
+ int ind = src->Register.Index;
+
+ /* Mark which inputs are effectively used */
+ if (src->Register.File == TGSI_FILE_INPUT) {
+ unsigned usage_mask;
+ usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
+ if (src->Register.Indirect) {
+ for (ind = 0; ind < info->num_inputs; ++ind) {
+ info->input_usage_mask[ind] |= usage_mask;
+ }
+ } else {
+ assert(ind >= 0);
+ assert(ind < PIPE_MAX_SHADER_INPUTS);
+ info->input_usage_mask[ind] |= usage_mask;
+ }
+
+ if (info->processor == PIPE_SHADER_FRAGMENT) {
+ unsigned name, index, input;
+
+ if (src->Register.Indirect && src->Indirect.ArrayID)
+ input = info->input_array_first[src->Indirect.ArrayID];
+ else
+ input = src->Register.Index;
+
+ name = info->input_semantic_name[input];
+ index = info->input_semantic_index[input];
+
+ if (name == TGSI_SEMANTIC_POSITION &&
+ (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
+ src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
+ src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
+ src->Register.SwizzleW == TGSI_SWIZZLE_Z))
+ info->reads_z = TRUE;
+
+ if (name == TGSI_SEMANTIC_COLOR) {
+ unsigned mask =
+ (1 << src->Register.SwizzleX) |
+ (1 << src->Register.SwizzleY) |
+ (1 << src->Register.SwizzleZ) |
+ (1 << src->Register.SwizzleW);
+
+ info->colors_read |= mask << (index * 4);
+ }
+
+ /* Process only interpolated varyings. Don't include POSITION.
+ * Don't include integer varyings, because they are not
+ * interpolated. Don't process inputs interpolated by INTERP
+ * opcodes. Those are tracked separately.
+ */
+ if ((!is_interp_instruction || i != 0) &&
+ (name == TGSI_SEMANTIC_GENERIC ||
+ name == TGSI_SEMANTIC_TEXCOORD ||
+ name == TGSI_SEMANTIC_COLOR ||
+ name == TGSI_SEMANTIC_BCOLOR ||
+ name == TGSI_SEMANTIC_FOG ||
+ name == TGSI_SEMANTIC_CLIPDIST)) {
+ switch (info->input_interpolate[input]) {
+ case TGSI_INTERPOLATE_COLOR:
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ switch (info->input_interpolate_loc[input]) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ info->uses_persp_center = TRUE;
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ info->uses_persp_centroid = TRUE;
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ info->uses_persp_sample = TRUE;
+ break;
+ }
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (info->input_interpolate_loc[input]) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ info->uses_linear_center = TRUE;
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ info->uses_linear_centroid = TRUE;
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ info->uses_linear_sample = TRUE;
+ break;
+ }
+ break;
+ /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
+ }
+ }
+ }
+ }
+
+ /* check for indirect register reads */
+ if (src->Register.Indirect) {
+ info->indirect_files |= (1 << src->Register.File);
+ info->indirect_files_read |= (1 << src->Register.File);
+ }
+
+ /* Texture samplers */
+ if (src->Register.File == TGSI_FILE_SAMPLER) {
+ const unsigned index = src->Register.Index;
+
+ assert(fullinst->Instruction.Texture);
+ assert(index < ARRAY_SIZE(info->is_msaa_sampler));
+ assert(index < PIPE_MAX_SAMPLERS);
+
+ if (is_texture_inst(fullinst->Instruction.Opcode)) {
+ const unsigned target = fullinst->Texture.Texture;
+ assert(target < TGSI_TEXTURE_UNKNOWN);
+ /* for texture instructions, check that the texture instruction
+ * target matches the previous sampler view declaration (if there
+ * was one.)
+ */
+ if (info->sampler_targets[index] == TGSI_TEXTURE_UNKNOWN) {
+ /* probably no sampler view declaration */
+ info->sampler_targets[index] = target;
+ } else {
+ /* Make sure the texture instruction's sampler/target info
+ * agrees with the sampler view declaration.
+ */
+ assert(info->sampler_targets[index] == target);
+ }
+ /* MSAA samplers */
+ if (target == TGSI_TEXTURE_2D_MSAA ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ info->is_msaa_sampler[src->Register.Index] = TRUE;
+ }
+ }
+ }
+
+ if (is_memory_file(src->Register.File)) {
+ is_mem_inst = true;
+
+ if (tgsi_get_opcode_info(fullinst->Instruction.Opcode)->is_store) {
+ info->writes_memory = TRUE;
+
+ if (src->Register.File == TGSI_FILE_IMAGE &&
+ !src->Register.Indirect)
+ info->images_writemask |= 1 << src->Register.Index;
+ }
+ }
+ }
+
+ /* check for indirect register writes */
+ for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
+ if (dst->Register.Indirect) {
+ info->indirect_files |= (1 << dst->Register.File);
+ info->indirect_files_written |= (1 << dst->Register.File);
+ }
+
+ if (is_memory_file(dst->Register.File)) {
+ assert(fullinst->Instruction.Opcode == TGSI_OPCODE_STORE);
+
+ is_mem_inst = true;
+ info->writes_memory = TRUE;
+
+ if (dst->Register.File == TGSI_FILE_IMAGE &&
+ !dst->Register.Indirect)
+ info->images_writemask |= 1 << dst->Register.Index;
+ }
+ }
+
+ if (is_mem_inst)
+ info->num_memory_instructions++;
+
+ if (computes_derivative(fullinst->Instruction.Opcode))
+ info->uses_derivatives = true;
+
+ info->num_instructions++;
+}
+
+
+static void
+scan_declaration(struct tgsi_shader_info *info,
+ const struct tgsi_full_declaration *fulldecl)
+{
+ const uint file = fulldecl->Declaration.File;
+ const unsigned procType = info->processor;
+ uint reg;
+
+ if (fulldecl->Declaration.Array) {
+ unsigned array_id = fulldecl->Array.ArrayID;
+
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ assert(array_id < ARRAY_SIZE(info->input_array_first));
+ info->input_array_first[array_id] = fulldecl->Range.First;
+ info->input_array_last[array_id] = fulldecl->Range.Last;
+ break;
+ case TGSI_FILE_OUTPUT:
+ assert(array_id < ARRAY_SIZE(info->output_array_first));
+ info->output_array_first[array_id] = fulldecl->Range.First;
+ info->output_array_last[array_id] = fulldecl->Range.Last;
+ break;
+ }
+ info->array_max[file] = MAX2(info->array_max[file], array_id);
+ }
+
+ for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) {
+ unsigned semName = fulldecl->Semantic.Name;
+ unsigned semIndex = fulldecl->Semantic.Index +
+ (reg - fulldecl->Range.First);
+
+ /* only first 32 regs will appear in this bitfield */
+ info->file_mask[file] |= (1 << reg);
+ info->file_count[file]++;
+ info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+
+ if (file == TGSI_FILE_CONSTANT) {
+ int buffer = 0;
+
+ if (fulldecl->Declaration.Dimension)
+ buffer = fulldecl->Dim.Index2D;
+
+ info->const_file_max[buffer] =
+ MAX2(info->const_file_max[buffer], (int)reg);
+ }
+ else if (file == TGSI_FILE_INPUT) {
+ info->input_semantic_name[reg] = (ubyte) semName;
+ info->input_semantic_index[reg] = (ubyte) semIndex;
+ info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
+ info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
+ info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
+
+ /* Vertex shaders can have inputs with holes between them. */
+ info->num_inputs = MAX2(info->num_inputs, reg + 1);
+
+ if (semName == TGSI_SEMANTIC_PRIMID)
+ info->uses_primid = TRUE;
+ else if (procType == PIPE_SHADER_FRAGMENT) {
+ if (semName == TGSI_SEMANTIC_POSITION)
+ info->reads_position = TRUE;
+ else if (semName == TGSI_SEMANTIC_FACE)
+ info->uses_frontface = TRUE;
+ }
+ }
+ else if (file == TGSI_FILE_SYSTEM_VALUE) {
+ unsigned index = fulldecl->Range.First;
+
+ info->system_value_semantic_name[index] = semName;
+ info->num_system_values = MAX2(info->num_system_values, index + 1);
+
+ switch (semName) {
+ case TGSI_SEMANTIC_INSTANCEID:
+ info->uses_instanceid = TRUE;
+ break;
+ case TGSI_SEMANTIC_VERTEXID:
+ info->uses_vertexid = TRUE;
+ break;
+ case TGSI_SEMANTIC_VERTEXID_NOBASE:
+ info->uses_vertexid_nobase = TRUE;
+ break;
+ case TGSI_SEMANTIC_BASEVERTEX:
+ info->uses_basevertex = TRUE;
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ info->uses_primid = TRUE;
+ break;
+ case TGSI_SEMANTIC_INVOCATIONID:
+ info->uses_invocationid = TRUE;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ info->reads_position = TRUE;
+ break;
+ case TGSI_SEMANTIC_FACE:
+ info->uses_frontface = TRUE;
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->reads_samplemask = TRUE;
+ break;
+ }
+ }
+ else if (file == TGSI_FILE_OUTPUT) {
+ info->output_semantic_name[reg] = (ubyte) semName;
+ info->output_semantic_index[reg] = (ubyte) semIndex;
+ info->num_outputs = MAX2(info->num_outputs, reg + 1);
+
+ if (semName == TGSI_SEMANTIC_COLOR)
+ info->colors_written |= 1 << semIndex;
+
+ if (procType == PIPE_SHADER_VERTEX ||
+ procType == PIPE_SHADER_GEOMETRY ||
+ procType == PIPE_SHADER_TESS_CTRL ||
+ procType == PIPE_SHADER_TESS_EVAL) {
+ switch (semName) {
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ info->writes_viewport_index = TRUE;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ info->writes_layer = TRUE;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ info->writes_psize = TRUE;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ info->writes_clipvertex = TRUE;
+ break;
+ }
+ }
+
+ if (procType == PIPE_SHADER_FRAGMENT) {
+ switch (semName) {
+ case TGSI_SEMANTIC_POSITION:
+ info->writes_z = TRUE;
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ info->writes_stencil = TRUE;
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->writes_samplemask = TRUE;
+ break;
+ }
+ }
+
+ if (procType == PIPE_SHADER_VERTEX) {
+ if (semName == TGSI_SEMANTIC_EDGEFLAG) {
+ info->writes_edgeflag = TRUE;
+ }
+ }
+ } else if (file == TGSI_FILE_SAMPLER) {
+ STATIC_ASSERT(sizeof(info->samplers_declared) * 8 >= PIPE_MAX_SAMPLERS);
+ info->samplers_declared |= 1u << reg;
+ } else if (file == TGSI_FILE_SAMPLER_VIEW) {
+ unsigned target = fulldecl->SamplerView.Resource;
+ unsigned type = fulldecl->SamplerView.ReturnTypeX;
+
+ assert(target < TGSI_TEXTURE_UNKNOWN);
+ if (info->sampler_targets[reg] == TGSI_TEXTURE_UNKNOWN) {
+ /* Save sampler target for this sampler index */
+ info->sampler_targets[reg] = target;
+ info->sampler_type[reg] = type;
+ } else {
+ /* if previously declared, make sure targets agree */
+ assert(info->sampler_targets[reg] == target);
+ assert(info->sampler_type[reg] == type);
+ }
+ } else if (file == TGSI_FILE_IMAGE) {
+ if (fulldecl->Image.Resource == TGSI_TEXTURE_BUFFER)
+ info->images_buffers |= 1 << reg;
+ }
+ }
+}
+
+
+static void
+scan_immediate(struct tgsi_shader_info *info)
+{
+ uint reg = info->immediate_count++;
+ uint file = TGSI_FILE_IMMEDIATE;
+
+ info->file_mask[file] |= (1 << reg);
+ info->file_count[file]++;
+ info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+}
+
+
+static void
+scan_property(struct tgsi_shader_info *info,
+ const struct tgsi_full_property *fullprop)
+{
+ unsigned name = fullprop->Property.PropertyName;
+ unsigned value = fullprop->u[0].Data;
+
+ assert(name < ARRAY_SIZE(info->properties));
+ info->properties[name] = value;
+
+ switch (name) {
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ info->num_written_clipdistance = value;
+ info->clipdist_writemask |= (1 << value) - 1;
+ break;
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ info->num_written_culldistance = value;
+ info->culldist_writemask |= (1 << value) - 1;
+ break;
+ }
+}
/**
@@ -56,13 +566,16 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
{
uint procType, i;
struct tgsi_parse_context parse;
+ unsigned current_depth = 0;
memset(info, 0, sizeof(*info));
for (i = 0; i < TGSI_FILE_COUNT; i++)
info->file_max[i] = -1;
- for (i = 0; i < Elements(info->const_file_max); i++)
+ for (i = 0; i < ARRAY_SIZE(info->const_file_max); i++)
info->const_file_max[i] = -1;
info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = 1;
+ for (i = 0; i < ARRAY_SIZE(info->sampler_targets); i++)
+ info->sampler_targets[i] = TGSI_TEXTURE_UNKNOWN;
/**
** Setup to begin parsing input shader
@@ -72,265 +585,38 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
return;
}
procType = parse.FullHeader.Processor.Processor;
- assert(procType == TGSI_PROCESSOR_FRAGMENT ||
- procType == TGSI_PROCESSOR_VERTEX ||
- procType == TGSI_PROCESSOR_GEOMETRY ||
- procType == TGSI_PROCESSOR_TESS_CTRL ||
- procType == TGSI_PROCESSOR_TESS_EVAL ||
- procType == TGSI_PROCESSOR_COMPUTE);
+ assert(procType == PIPE_SHADER_FRAGMENT ||
+ procType == PIPE_SHADER_VERTEX ||
+ procType == PIPE_SHADER_GEOMETRY ||
+ procType == PIPE_SHADER_TESS_CTRL ||
+ procType == PIPE_SHADER_TESS_EVAL ||
+ procType == PIPE_SHADER_COMPUTE);
info->processor = procType;
-
/**
** Loop over incoming program tokens/instructions
*/
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
-
+ while (!tgsi_parse_end_of_tokens(&parse)) {
info->num_tokens++;
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_INSTRUCTION:
- {
- const struct tgsi_full_instruction *fullinst
- = &parse.FullToken.FullInstruction;
- uint i;
-
- assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
- info->opcode_count[fullinst->Instruction.Opcode]++;
-
- for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *src =
- &fullinst->Src[i];
- int ind = src->Register.Index;
-
- /* Mark which inputs are effectively used */
- if (src->Register.File == TGSI_FILE_INPUT) {
- unsigned usage_mask;
- usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
- if (src->Register.Indirect) {
- for (ind = 0; ind < info->num_inputs; ++ind) {
- info->input_usage_mask[ind] |= usage_mask;
- }
- } else {
- assert(ind >= 0);
- assert(ind < PIPE_MAX_SHADER_INPUTS);
- info->input_usage_mask[ind] |= usage_mask;
- }
-
- if (procType == TGSI_PROCESSOR_FRAGMENT &&
- info->reads_position &&
- src->Register.Index == 0 &&
- (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleW == TGSI_SWIZZLE_Z)) {
- info->reads_z = TRUE;
- }
- }
-
- /* check for indirect register reads */
- if (src->Register.Indirect) {
- info->indirect_files |= (1 << src->Register.File);
- info->indirect_files_read |= (1 << src->Register.File);
- }
-
- /* MSAA samplers */
- if (src->Register.File == TGSI_FILE_SAMPLER) {
- assert(fullinst->Instruction.Texture);
- assert(src->Register.Index < Elements(info->is_msaa_sampler));
-
- if (fullinst->Instruction.Texture &&
- (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
- fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
- info->is_msaa_sampler[src->Register.Index] = TRUE;
- }
- }
- }
-
- /* check for indirect register writes */
- for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
- const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
- if (dst->Register.Indirect) {
- info->indirect_files |= (1 << dst->Register.File);
- info->indirect_files_written |= (1 << dst->Register.File);
- }
- }
-
- info->num_instructions++;
- }
+ scan_instruction(info, &parse.FullToken.FullInstruction,
+ &current_depth);
break;
-
case TGSI_TOKEN_TYPE_DECLARATION:
- {
- const struct tgsi_full_declaration *fulldecl
- = &parse.FullToken.FullDeclaration;
- const uint file = fulldecl->Declaration.File;
- uint reg;
-
- if (fulldecl->Declaration.Array) {
- unsigned array_id = fulldecl->Array.ArrayID;
-
- switch (file) {
- case TGSI_FILE_INPUT:
- assert(array_id < ARRAY_SIZE(info->input_array_first));
- info->input_array_first[array_id] = fulldecl->Range.First;
- info->input_array_last[array_id] = fulldecl->Range.Last;
- break;
- case TGSI_FILE_OUTPUT:
- assert(array_id < ARRAY_SIZE(info->output_array_first));
- info->output_array_first[array_id] = fulldecl->Range.First;
- info->output_array_last[array_id] = fulldecl->Range.Last;
- break;
- }
- info->array_max[file] = MAX2(info->array_max[file], array_id);
- }
-
- for (reg = fulldecl->Range.First;
- reg <= fulldecl->Range.Last;
- reg++) {
- unsigned semName = fulldecl->Semantic.Name;
- unsigned semIndex =
- fulldecl->Semantic.Index + (reg - fulldecl->Range.First);
-
- /* only first 32 regs will appear in this bitfield */
- info->file_mask[file] |= (1 << reg);
- info->file_count[file]++;
- info->file_max[file] = MAX2(info->file_max[file], (int)reg);
-
- if (file == TGSI_FILE_CONSTANT) {
- int buffer = 0;
-
- if (fulldecl->Declaration.Dimension)
- buffer = fulldecl->Dim.Index2D;
-
- info->const_file_max[buffer] =
- MAX2(info->const_file_max[buffer], (int)reg);
- }
- else if (file == TGSI_FILE_INPUT) {
- info->input_semantic_name[reg] = (ubyte) semName;
- info->input_semantic_index[reg] = (ubyte) semIndex;
- info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
- info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
- info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
- info->num_inputs++;
-
- if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
- info->uses_centroid = TRUE;
-
- if (semName == TGSI_SEMANTIC_PRIMID)
- info->uses_primid = TRUE;
- else if (procType == TGSI_PROCESSOR_FRAGMENT) {
- if (semName == TGSI_SEMANTIC_POSITION)
- info->reads_position = TRUE;
- else if (semName == TGSI_SEMANTIC_FACE)
- info->uses_frontface = TRUE;
- }
- }
- else if (file == TGSI_FILE_SYSTEM_VALUE) {
- unsigned index = fulldecl->Range.First;
-
- info->system_value_semantic_name[index] = semName;
- info->num_system_values = MAX2(info->num_system_values,
- index + 1);
-
- if (semName == TGSI_SEMANTIC_INSTANCEID) {
- info->uses_instanceid = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_VERTEXID) {
- info->uses_vertexid = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) {
- info->uses_vertexid_nobase = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_BASEVERTEX) {
- info->uses_basevertex = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_PRIMID) {
- info->uses_primid = TRUE;
- } else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
- info->uses_invocationid = TRUE;
- }
- }
- else if (file == TGSI_FILE_OUTPUT) {
- info->output_semantic_name[reg] = (ubyte) semName;
- info->output_semantic_index[reg] = (ubyte) semIndex;
- info->num_outputs++;
-
- if (procType == TGSI_PROCESSOR_VERTEX ||
- procType == TGSI_PROCESSOR_GEOMETRY ||
- procType == TGSI_PROCESSOR_TESS_CTRL ||
- procType == TGSI_PROCESSOR_TESS_EVAL) {
- if (semName == TGSI_SEMANTIC_CLIPDIST) {
- info->num_written_clipdistance +=
- util_bitcount(fulldecl->Declaration.UsageMask);
- info->clipdist_writemask |=
- fulldecl->Declaration.UsageMask << (semIndex*4);
- }
- else if (semName == TGSI_SEMANTIC_CULLDIST) {
- info->num_written_culldistance +=
- util_bitcount(fulldecl->Declaration.UsageMask);
- info->culldist_writemask |=
- fulldecl->Declaration.UsageMask << (semIndex*4);
- }
- else if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
- info->writes_viewport_index = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_LAYER) {
- info->writes_layer = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_PSIZE) {
- info->writes_psize = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_CLIPVERTEX) {
- info->writes_clipvertex = TRUE;
- }
- }
-
- if (procType == TGSI_PROCESSOR_FRAGMENT) {
- if (semName == TGSI_SEMANTIC_POSITION) {
- info->writes_z = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_STENCIL) {
- info->writes_stencil = TRUE;
- }
- }
-
- if (procType == TGSI_PROCESSOR_VERTEX) {
- if (semName == TGSI_SEMANTIC_EDGEFLAG) {
- info->writes_edgeflag = TRUE;
- }
- }
- }
- }
- }
+ scan_declaration(info, &parse.FullToken.FullDeclaration);
break;
-
case TGSI_TOKEN_TYPE_IMMEDIATE:
- {
- uint reg = info->immediate_count++;
- uint file = TGSI_FILE_IMMEDIATE;
-
- info->file_mask[file] |= (1 << reg);
- info->file_count[file]++;
- info->file_max[file] = MAX2(info->file_max[file], (int)reg);
- }
+ scan_immediate(info);
break;
-
case TGSI_TOKEN_TYPE_PROPERTY:
- {
- const struct tgsi_full_property *fullprop
- = &parse.FullToken.FullProperty;
- unsigned name = fullprop->Property.PropertyName;
-
- assert(name < Elements(info->properties));
- info->properties[name] = fullprop->u[0].Data;
- }
+ scan_property(info, &parse.FullToken.FullProperty);
break;
-
default:
- assert( 0 );
+ assert(!"Unexpected TGSI token type");
}
}
@@ -340,7 +626,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
/* The dimensions of the IN decleration in geometry shader have
* to be deduced from the type of the input primitive.
*/
- if (procType == TGSI_PROCESSOR_GEOMETRY) {
+ if (procType == PIPE_SHADER_GEOMETRY) {
unsigned input_primitive =
info->properties[TGSI_PROPERTY_GS_INPUT_PRIM];
int num_verts = u_vertices_per_prim(input_primitive);
@@ -353,9 +639,85 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}
}
- tgsi_parse_free (&parse);
+ tgsi_parse_free(&parse);
}
+/**
+ * Collect information about the arrays of a given register file.
+ *
+ * @param tokens TGSI shader
+ * @param file the register file to scan through
+ * @param max_array_id number of entries in @p arrays; should be equal to the
+ * highest array id, i.e. tgsi_shader_info::array_max[file].
+ * @param arrays info for array of each ID will be written to arrays[ID - 1].
+ */
+void
+tgsi_scan_arrays(const struct tgsi_token *tokens,
+ unsigned file,
+ unsigned max_array_id,
+ struct tgsi_array_info *arrays)
+{
+ struct tgsi_parse_context parse;
+
+ if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
+ debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n");
+ return;
+ }
+
+ memset(arrays, 0, sizeof(arrays[0]) * max_array_id);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ struct tgsi_full_instruction *inst;
+
+ tgsi_parse_token(&parse);
+
+ if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
+ struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+
+ if (decl->Declaration.Array && decl->Declaration.File == file &&
+ decl->Array.ArrayID > 0 && decl->Array.ArrayID <= max_array_id) {
+ struct tgsi_array_info *array = &arrays[decl->Array.ArrayID - 1];
+ assert(!array->declared);
+ array->declared = true;
+ array->range = decl->Range;
+ }
+ }
+
+ if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ continue;
+
+ inst = &parse.FullToken.FullInstruction;
+ for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *dst = &inst->Dst[i];
+ if (dst->Register.File != file)
+ continue;
+
+ if (dst->Register.Indirect) {
+ if (dst->Indirect.ArrayID > 0 &&
+ dst->Indirect.ArrayID <= max_array_id) {
+ arrays[dst->Indirect.ArrayID - 1].writemask |= dst->Register.WriteMask;
+ } else {
+ /* Indirect writes without an ArrayID can write anywhere. */
+ for (unsigned j = 0; j < max_array_id; ++j)
+ arrays[j].writemask |= dst->Register.WriteMask;
+ }
+ } else {
+ /* Check whether the write falls into any of the arrays anyway. */
+ for (unsigned j = 0; j < max_array_id; ++j) {
+ struct tgsi_array_info *array = &arrays[j];
+ if (array->declared &&
+ dst->Register.Index >= array->range.First &&
+ dst->Register.Index <= array->range.Last)
+ array->writemask |= dst->Register.WriteMask;
+ }
+ }
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ return;
+}
/**
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h
index b81bdd71f..0c5f2ba06 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -64,6 +64,9 @@ struct tgsi_shader_info
uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */
int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */
int const_file_max[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned samplers_declared; /**< bitmask of declared samplers */
+ ubyte sampler_targets[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_TEXTURE_x values */
+ ubyte sampler_type[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_RETURN_TYPE_x */
ubyte input_array_first[PIPE_MAX_SHADER_INPUTS];
ubyte input_array_last[PIPE_MAX_SHADER_INPUTS];
@@ -73,16 +76,32 @@ struct tgsi_shader_info
uint immediate_count; /**< number of immediates declared */
uint num_instructions;
+ uint num_memory_instructions; /**< sampler, buffer, and image instructions */
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
+ ubyte colors_read; /**< which color components are read by the FS */
+ ubyte colors_written;
boolean reads_position; /**< does fragment shader read position? */
boolean reads_z; /**< does fragment shader read depth? */
+ boolean reads_samplemask; /**< does fragment shader read sample mask? */
boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_stencil; /**< does fragment shader write stencil value? */
+ boolean writes_samplemask; /**< does fragment shader write sample mask? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KILL or KILL_IF instruction used? */
- boolean uses_centroid;
+ boolean uses_persp_center;
+ boolean uses_persp_centroid;
+ boolean uses_persp_sample;
+ boolean uses_linear_center;
+ boolean uses_linear_centroid;
+ boolean uses_linear_sample;
+ boolean uses_persp_opcode_interp_centroid;
+ boolean uses_persp_opcode_interp_offset;
+ boolean uses_persp_opcode_interp_sample;
+ boolean uses_linear_opcode_interp_centroid;
+ boolean uses_linear_opcode_interp_offset;
+ boolean uses_linear_opcode_interp_sample;
boolean uses_instanceid;
boolean uses_vertexid;
boolean uses_vertexid_nobase;
@@ -94,13 +113,24 @@ struct tgsi_shader_info
boolean writes_clipvertex;
boolean writes_viewport_index;
boolean writes_layer;
+ boolean writes_memory; /**< contains stores or atomics to buffers or images */
boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
-
+ boolean uses_doubles; /**< uses any of the double instructions */
+ boolean uses_derivatives;
unsigned clipdist_writemask;
unsigned culldist_writemask;
unsigned num_written_culldistance;
unsigned num_written_clipdistance;
/**
+ * Bitmask indicating which images are written to (STORE / ATOM*).
+ * Indirect image accesses are not reflected in this mask.
+ */
+ unsigned images_writemask;
+ /**
+ * Bitmask indicating which declared image is a buffer.
+ */
+ unsigned images_buffers;
+ /**
* Bitmask indicating which register files are accessed with
* indirect addressing. The bits are (1 << TGSI_FILE_x), etc.
*/
@@ -113,12 +143,34 @@ struct tgsi_shader_info
unsigned indirect_files_written;
unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */
+
+ /**
+ * Max nesting limit of loops/if's
+ */
+ unsigned max_depth;
+};
+
+struct tgsi_array_info
+{
+ /** Whether an array with this ID was declared. */
+ bool declared;
+
+ /** The OR of all writemasks used to write to this array. */
+ ubyte writemask;
+
+ /** The range with which the array was declared. */
+ struct tgsi_declaration_range range;
};
extern void
tgsi_scan_shader(const struct tgsi_token *tokens,
struct tgsi_shader_info *info);
+void
+tgsi_scan_arrays(const struct tgsi_token *tokens,
+ unsigned file,
+ unsigned max_array_id,
+ struct tgsi_array_info *arrays);
extern boolean
tgsi_is_passthrough_shader(const struct tgsi_token *tokens);
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 8271ea081..536a4c8f3 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -34,8 +34,8 @@
const char *tgsi_processor_type_names[6] =
{
- "FRAG",
"VERT",
+ "FRAG",
"GEOM",
"TESS_CTRL",
"TESS_EVAL",
@@ -54,8 +54,10 @@ static const char *tgsi_file_names[] =
"IMM",
"PRED",
"SV",
- "RES",
- "SVIEW"
+ "IMAGE",
+ "SVIEW",
+ "BUFFER",
+ "MEMORY",
};
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
@@ -83,7 +85,6 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
"PCOORD",
"VIEWPORT_INDEX",
"LAYER",
- "CULLDIST",
"SAMPLEID",
"SAMPLEPOS",
"SAMPLEMASK",
@@ -95,6 +96,10 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
"TESSOUTER",
"TESSINNER",
"VERTICESIN",
+ "HELPER_INVOCATION",
+ "BASEINSTANCE",
+ "DRAWID",
+ "WORK_DIM",
};
const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
@@ -137,6 +142,13 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
"TES_SPACING",
"TES_VERTEX_ORDER_CW",
"TES_POINT_MODE",
+ "NUM_CLIPDIST_ENABLED",
+ "NUM_CULLDIST_ENABLED",
+ "FS_EARLY_DEPTH_STENCIL",
+ "NEXT_SHADER",
+ "CS_FIXED_BLOCK_WIDTH",
+ "CS_FIXED_BLOCK_HEIGHT",
+ "CS_FIXED_BLOCK_DEPTH"
};
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =
@@ -202,16 +214,23 @@ const char *tgsi_immediate_type_names[4] =
"FLT64"
};
+const char *tgsi_memory_names[3] =
+{
+ "COHERENT",
+ "RESTRICT",
+ "VOLATILE",
+};
+
static inline void
tgsi_strings_check(void)
{
- STATIC_ASSERT(Elements(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT);
- STATIC_ASSERT(Elements(tgsi_texture_names) == TGSI_TEXTURE_COUNT);
- STATIC_ASSERT(Elements(tgsi_property_names) == TGSI_PROPERTY_COUNT);
- STATIC_ASSERT(Elements(tgsi_primitive_names) == PIPE_PRIM_MAX);
- STATIC_ASSERT(Elements(tgsi_interpolate_names) == TGSI_INTERPOLATE_COUNT);
- STATIC_ASSERT(Elements(tgsi_return_type_names) == TGSI_RETURN_TYPE_COUNT);
+ STATIC_ASSERT(ARRAY_SIZE(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT);
+ STATIC_ASSERT(ARRAY_SIZE(tgsi_texture_names) == TGSI_TEXTURE_COUNT);
+ STATIC_ASSERT(ARRAY_SIZE(tgsi_property_names) == TGSI_PROPERTY_COUNT);
+ STATIC_ASSERT(ARRAY_SIZE(tgsi_primitive_names) == PIPE_PRIM_MAX);
+ STATIC_ASSERT(ARRAY_SIZE(tgsi_interpolate_names) == TGSI_INTERPOLATE_COUNT);
+ STATIC_ASSERT(ARRAY_SIZE(tgsi_return_type_names) == TGSI_RETURN_TYPE_COUNT);
(void) tgsi_processor_type_names;
(void) tgsi_return_type_names;
(void) tgsi_immediate_type_names;
@@ -223,8 +242,8 @@ tgsi_strings_check(void)
const char *
tgsi_file_name(unsigned file)
{
- STATIC_ASSERT(Elements(tgsi_file_names) == TGSI_FILE_COUNT);
- if (file < Elements(tgsi_file_names))
+ STATIC_ASSERT(ARRAY_SIZE(tgsi_file_names) == TGSI_FILE_COUNT);
+ if (file < ARRAY_SIZE(tgsi_file_names))
return tgsi_file_names[file];
else
return "invalid file";
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h
index 71e74372f..9a9362e91 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h
@@ -38,7 +38,7 @@ extern "C" {
#endif
-extern const char *tgsi_processor_type_names[6];
+extern const char *tgsi_processor_type_names[PIPE_SHADER_TYPES];
extern const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT];
@@ -60,6 +60,8 @@ extern const char *tgsi_fs_coord_pixel_center_names[2];
extern const char *tgsi_immediate_type_names[4];
+extern const char *tgsi_memory_names[3];
+
const char *
tgsi_file_name(unsigned file);
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c
index 3e3ed5b19..be8084251 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -119,6 +119,42 @@ static boolean str_match_nocase_whole( const char **pcur, const char *str )
return FALSE;
}
+/* Return the array index that matches starting at *pcur, where the string at
+ * *pcur is terminated by a non-digit non-letter non-underscore.
+ * Returns -1 if no match is found.
+ *
+ * On success, the pointer to the first string is moved to the end of the read
+ * word.
+ */
+static int str_match_name_from_array(const char **pcur,
+ const char * const *array,
+ unsigned array_size)
+{
+ for (unsigned j = 0; j < array_size; ++j) {
+ if (str_match_nocase_whole(pcur, array[j]))
+ return j;
+ }
+ return -1;
+}
+
+/* Return the format corresponding to the name at *pcur.
+ * Returns -1 if there is no format name.
+ *
+ * On success, the pointer to the string is moved to the end of the read format
+ * name.
+ */
+static int str_match_format(const char **pcur)
+{
+ for (unsigned i = 0; i < PIPE_FORMAT_COUNT; i++) {
+ const struct util_format_description *desc =
+ util_format_description(i);
+ if (desc && str_match_nocase_whole(pcur, desc->name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
/* Eat zero or more whitespaces.
*/
static void eat_opt_white( const char **pcur )
@@ -195,8 +231,15 @@ static boolean parse_float( const char **pcur, float *val )
boolean integral_part = FALSE;
boolean fractional_part = FALSE;
- *val = (float) atof( cur );
+ if (*cur == '0' && *(cur + 1) == 'x') {
+ union fi fi;
+ fi.ui = strtoul(cur, NULL, 16);
+ *val = fi.f;
+ cur += 10;
+ goto out;
+ }
+ *val = (float) atof( cur );
if (*cur == '-' || *cur == '+')
cur++;
if (is_digit( cur )) {
@@ -228,6 +271,8 @@ static boolean parse_float( const char **pcur, float *val )
else
return FALSE;
}
+
+out:
*pcur = cur;
return TRUE;
}
@@ -250,6 +295,42 @@ static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1)
return TRUE;
}
+static boolean parse_int64( const char **pcur, uint32_t *val0, uint32_t *val1)
+{
+ const char *cur = *pcur;
+ union {
+ int64_t i64val;
+ uint32_t uval[2];
+ } v;
+
+ v.i64val = strtoll(cur, (char**)pcur, 0);
+ if (*pcur == cur)
+ return FALSE;
+
+ *val0 = v.uval[0];
+ *val1 = v.uval[1];
+
+ return TRUE;
+}
+
+static boolean parse_uint64( const char **pcur, uint32_t *val0, uint32_t *val1)
+{
+ const char *cur = *pcur;
+ union {
+ uint64_t u64val;
+ uint32_t uval[2];
+ } v;
+
+ v.u64val = strtoull(cur, (char**)pcur, 0);
+ if (*pcur == cur)
+ return FALSE;
+
+ *val0 = v.uval[0];
+ *val1 = v.uval[1];
+
+ return TRUE;
+}
+
struct translate_ctx
{
const char *text;
@@ -292,17 +373,17 @@ static boolean parse_header( struct translate_ctx *ctx )
uint processor;
if (str_match_nocase_whole( &ctx->cur, "FRAG" ))
- processor = TGSI_PROCESSOR_FRAGMENT;
+ processor = PIPE_SHADER_FRAGMENT;
else if (str_match_nocase_whole( &ctx->cur, "VERT" ))
- processor = TGSI_PROCESSOR_VERTEX;
+ processor = PIPE_SHADER_VERTEX;
else if (str_match_nocase_whole( &ctx->cur, "GEOM" ))
- processor = TGSI_PROCESSOR_GEOMETRY;
+ processor = PIPE_SHADER_GEOMETRY;
else if (str_match_nocase_whole( &ctx->cur, "TESS_CTRL" ))
- processor = TGSI_PROCESSOR_TESS_CTRL;
+ processor = PIPE_SHADER_TESS_CTRL;
else if (str_match_nocase_whole( &ctx->cur, "TESS_EVAL" ))
- processor = TGSI_PROCESSOR_TESS_EVAL;
+ processor = PIPE_SHADER_TESS_EVAL;
else if (str_match_nocase_whole( &ctx->cur, "COMP" ))
- processor = TGSI_PROCESSOR_COMPUTE;
+ processor = PIPE_SHADER_COMPUTE;
else {
report_error( ctx, "Unknown header" );
return FALSE;
@@ -689,9 +770,9 @@ parse_register_dcl(
* the second bracket */
/* tessellation has similar constraints to geometry shader */
- if ((ctx->processor == TGSI_PROCESSOR_GEOMETRY && is_in) ||
- (ctx->processor == TGSI_PROCESSOR_TESS_EVAL && is_in) ||
- (ctx->processor == TGSI_PROCESSOR_TESS_CTRL && (is_in || is_out))) {
+ if ((ctx->processor == PIPE_SHADER_GEOMETRY && is_in) ||
+ (ctx->processor == PIPE_SHADER_TESS_EVAL && is_in) ||
+ (ctx->processor == PIPE_SHADER_TESS_CTRL && (is_in || is_out))) {
brackets[0] = brackets[1];
*num_brackets = 1;
} else {
@@ -1030,6 +1111,15 @@ parse_instruction(
inst.Texture.Texture = TGSI_TEXTURE_UNKNOWN;
}
+ if ((i >= TGSI_OPCODE_LOAD && i <= TGSI_OPCODE_ATOMIMAX) ||
+ i == TGSI_OPCODE_RESQ) {
+ inst.Instruction.Memory = 1;
+ inst.Memory.Qualifier = 0;
+ }
+
+ assume(info->num_dst <= TGSI_FULL_MAX_DST_REGISTERS);
+ assume(info->num_src <= TGSI_FULL_MAX_SRC_REGISTERS);
+
/* Parse instruction operands.
*/
for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) {
@@ -1082,6 +1172,41 @@ parse_instruction(
inst.Texture.NumOffsets = i;
cur = ctx->cur;
+ eat_opt_white(&cur);
+
+ for (; inst.Instruction.Memory && *cur == ',';
+ ctx->cur = cur, eat_opt_white(&cur)) {
+ int j;
+
+ cur++;
+ eat_opt_white(&cur);
+
+ j = str_match_name_from_array(&cur, tgsi_memory_names,
+ ARRAY_SIZE(tgsi_memory_names));
+ if (j >= 0) {
+ inst.Memory.Qualifier |= 1U << j;
+ continue;
+ }
+
+ j = str_match_name_from_array(&cur, tgsi_texture_names,
+ ARRAY_SIZE(tgsi_texture_names));
+ if (j >= 0) {
+ inst.Memory.Texture = j;
+ continue;
+ }
+
+ j = str_match_format(&cur);
+ if (j >= 0) {
+ inst.Memory.Format = j;
+ continue;
+ }
+
+ ctx->cur = cur;
+ report_error(ctx, "Expected memory qualifier, texture target, or format\n");
+ return FALSE;
+ }
+
+ cur = ctx->cur;
eat_opt_white( &cur );
if (info->is_branch && *cur == ':') {
uint target;
@@ -1139,6 +1264,14 @@ static boolean parse_immediate_data(struct translate_ctx *ctx, unsigned type,
ret = parse_double(&ctx->cur, &values[i].Uint, &values[i+1].Uint);
i++;
break;
+ case TGSI_IMM_INT64:
+ ret = parse_int64(&ctx->cur, &values[i].Uint, &values[i+1].Uint);
+ i++;
+ break;
+ case TGSI_IMM_UINT64:
+ ret = parse_uint64(&ctx->cur, &values[i].Uint, &values[i+1].Uint);
+ i++;
+ break;
case TGSI_IMM_FLOAT32:
ret = parse_float(&ctx->cur, &values[i].Float);
break;
@@ -1205,7 +1338,7 @@ static boolean parse_declaration( struct translate_ctx *ctx )
}
is_vs_input = (file == TGSI_FILE_INPUT &&
- ctx->processor == TGSI_PROCESSOR_VERTEX);
+ ctx->processor == PIPE_SHADER_VERTEX);
cur = ctx->cur;
eat_opt_white( &cur );
@@ -1242,10 +1375,10 @@ static boolean parse_declaration( struct translate_ctx *ctx )
cur++;
eat_opt_white( &cur );
- if (file == TGSI_FILE_RESOURCE) {
+ if (file == TGSI_FILE_IMAGE) {
for (i = 0; i < TGSI_TEXTURE_COUNT; i++) {
if (str_match_nocase_whole(&cur, tgsi_texture_names[i])) {
- decl.Resource.Resource = i;
+ decl.Image.Resource = i;
break;
}
}
@@ -1260,13 +1393,17 @@ static boolean parse_declaration( struct translate_ctx *ctx )
cur2++;
eat_opt_white(&cur2);
if (str_match_nocase_whole(&cur2, "RAW")) {
- decl.Resource.Raw = 1;
+ decl.Image.Raw = 1;
} else if (str_match_nocase_whole(&cur2, "WR")) {
- decl.Resource.Writable = 1;
+ decl.Image.Writable = 1;
} else {
- break;
+ int format = str_match_format(&cur2);
+ if (format < 0)
+ break;
+
+ decl.Image.Format = format;
}
cur = cur2;
eat_opt_white(&cur2);
@@ -1339,6 +1476,26 @@ static boolean parse_declaration( struct translate_ctx *ctx )
decl.SamplerView.ReturnTypeX;
}
ctx->cur = cur;
+ } else if (file == TGSI_FILE_BUFFER) {
+ if (str_match_nocase_whole(&cur, "ATOMIC")) {
+ decl.Declaration.Atomic = 1;
+ ctx->cur = cur;
+ }
+ } else if (file == TGSI_FILE_MEMORY) {
+ if (str_match_nocase_whole(&cur, "GLOBAL")) {
+ /* Note this is a no-op global is the default */
+ decl.Declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL;
+ ctx->cur = cur;
+ } else if (str_match_nocase_whole(&cur, "SHARED")) {
+ decl.Declaration.MemType = TGSI_MEMORY_TYPE_SHARED;
+ ctx->cur = cur;
+ } else if (str_match_nocase_whole(&cur, "PRIVATE")) {
+ decl.Declaration.MemType = TGSI_MEMORY_TYPE_PRIVATE;
+ ctx->cur = cur;
+ } else if (str_match_nocase_whole(&cur, "INPUT")) {
+ decl.Declaration.MemType = TGSI_MEMORY_TYPE_INPUT;
+ ctx->cur = cur;
+ }
} else {
if (str_match_nocase_whole(&cur, "LOCAL")) {
decl.Declaration.Local = 1;
@@ -1474,11 +1631,11 @@ static boolean parse_immediate( struct translate_ctx *ctx )
report_error( ctx, "Syntax error" );
return FALSE;
}
- for (type = 0; type < Elements(tgsi_immediate_type_names); ++type) {
+ for (type = 0; type < ARRAY_SIZE(tgsi_immediate_type_names); ++type) {
if (str_match_nocase_whole(&ctx->cur, tgsi_immediate_type_names[type]))
break;
}
- if (type == Elements(tgsi_immediate_type_names)) {
+ if (type == ARRAY_SIZE(tgsi_immediate_type_names)) {
report_error( ctx, "Expected immediate type" );
return FALSE;
}
@@ -1524,7 +1681,7 @@ parse_fs_coord_origin( const char **pcur, uint *fs_coord_origin )
{
uint i;
- for (i = 0; i < Elements(tgsi_fs_coord_origin_names); i++) {
+ for (i = 0; i < ARRAY_SIZE(tgsi_fs_coord_origin_names); i++) {
const char *cur = *pcur;
if (str_match_nocase_whole( &cur, tgsi_fs_coord_origin_names[i])) {
@@ -1541,7 +1698,7 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center )
{
uint i;
- for (i = 0; i < Elements(tgsi_fs_coord_pixel_center_names); i++) {
+ for (i = 0; i < ARRAY_SIZE(tgsi_fs_coord_pixel_center_names); i++) {
const char *cur = *pcur;
if (str_match_nocase_whole( &cur, tgsi_fs_coord_pixel_center_names[i])) {
@@ -1553,6 +1710,22 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center )
return FALSE;
}
+static boolean
+parse_property_next_shader( const char **pcur, uint *next_shader )
+{
+ uint i;
+
+ for (i = 0; i < ARRAY_SIZE(tgsi_processor_type_names); i++) {
+ const char *cur = *pcur;
+
+ if (str_match_nocase_whole( &cur, tgsi_processor_type_names[i])) {
+ *next_shader = i;
+ *pcur = cur;
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
static boolean parse_property( struct translate_ctx *ctx )
{
@@ -1590,7 +1763,7 @@ static boolean parse_property( struct translate_ctx *ctx )
return FALSE;
}
if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM &&
- ctx->processor == TGSI_PROCESSOR_GEOMETRY) {
+ ctx->processor == PIPE_SHADER_GEOMETRY) {
ctx->implied_array_size = u_vertices_per_prim(values[0]);
}
break;
@@ -1606,6 +1779,12 @@ static boolean parse_property( struct translate_ctx *ctx )
return FALSE;
}
break;
+ case TGSI_PROPERTY_NEXT_SHADER:
+ if (!parse_property_next_shader(&ctx->cur, &values[0] )) {
+ report_error( ctx, "Unknown next shader property value." );
+ return FALSE;
+ }
+ break;
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
default:
if (!parse_uint(&ctx->cur, &values[0] )) {
@@ -1638,8 +1817,8 @@ static boolean translate( struct translate_ctx *ctx )
if (!parse_header( ctx ))
return FALSE;
- if (ctx->processor == TGSI_PROCESSOR_TESS_CTRL ||
- ctx->processor == TGSI_PROCESSOR_TESS_EVAL)
+ if (ctx->processor == PIPE_SHADER_TESS_CTRL ||
+ ctx->processor == PIPE_SHADER_TESS_EVAL)
ctx->implied_array_size = 32;
while (*ctx->cur != '\0') {
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h
index ceb7c2e0f..c21ff959c 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -95,20 +95,39 @@ struct tgsi_transform_context
* Helper for emitting temporary register declarations.
*/
static inline void
-tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
- unsigned index)
+tgsi_transform_temps_decl(struct tgsi_transform_context *ctx,
+ unsigned firstIdx, unsigned lastIdx)
{
struct tgsi_full_declaration decl;
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.Range.First =
- decl.Range.Last = index;
+ decl.Range.First = firstIdx;
+ decl.Range.Last = lastIdx;
ctx->emit_declaration(ctx, &decl);
}
+static inline void
+tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
+ unsigned index)
+{
+ tgsi_transform_temps_decl(ctx, index, index);
+}
static inline void
+tgsi_transform_const_decl(struct tgsi_transform_context *ctx,
+ unsigned firstIdx, unsigned lastIdx)
+{
+ struct tgsi_full_declaration decl;
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Range.First = firstIdx;
+ decl.Range.Last = lastIdx;
+ ctx->emit_declaration(ctx, &decl);
+}
+
+static inline void
tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
unsigned index,
unsigned sem_name, unsigned sem_index,
@@ -129,6 +148,26 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
ctx->emit_declaration(ctx, &decl);
}
+static inline void
+tgsi_transform_output_decl(struct tgsi_transform_context *ctx,
+ unsigned index,
+ unsigned sem_name, unsigned sem_index,
+ unsigned interp)
+{
+ struct tgsi_full_declaration decl;
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Interpolate = 1;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = sem_name;
+ decl.Semantic.Index = sem_index;
+ decl.Range.First =
+ decl.Range.Last = index;
+ decl.Interp.Interpolate = interp;
+
+ ctx->emit_declaration(ctx, &decl);
+}
static inline void
tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
@@ -153,7 +192,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW;
- decl.Declaration.UsageMask = 0xf;
+ decl.Declaration.UsageMask = TGSI_WRITEMASK_XYZW;
decl.Range.First =
decl.Range.Last = index;
decl.SamplerView.Resource = target;
@@ -182,6 +221,28 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
ctx->emit_immediate(ctx, &immed);
}
+static inline void
+tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg,
+ unsigned file, unsigned index, unsigned writemask)
+{
+ reg->Register.File = file;
+ reg->Register.Index = index;
+ reg->Register.WriteMask = writemask;
+}
+
+static inline void
+tgsi_transform_src_reg(struct tgsi_full_src_register *reg,
+ unsigned file, unsigned index,
+ unsigned swizzleX, unsigned swizzleY,
+ unsigned swizzleZ, unsigned swizzleW)
+{
+ reg->Register.File = file;
+ reg->Register.Index = index;
+ reg->Register.SwizzleX = swizzleX;
+ reg->Register.SwizzleY = swizzleY;
+ reg->Register.SwizzleZ = swizzleZ;
+ reg->Register.SwizzleW = swizzleW;
+}
/**
* Helper for emitting 1-operand instructions.
@@ -241,6 +302,40 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx,
static inline void
+tgsi_transform_op3_inst(struct tgsi_transform_context *ctx,
+ unsigned opcode,
+ unsigned dst_file,
+ unsigned dst_index,
+ unsigned dst_writemask,
+ unsigned src0_file,
+ unsigned src0_index,
+ unsigned src1_file,
+ unsigned src1_index,
+ unsigned src2_file,
+ unsigned src2_index)
+{
+ struct tgsi_full_instruction inst;
+
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = opcode;
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = dst_file,
+ inst.Dst[0].Register.Index = dst_index;
+ inst.Dst[0].Register.WriteMask = dst_writemask;
+ inst.Instruction.NumSrcRegs = 3;
+ inst.Src[0].Register.File = src0_file;
+ inst.Src[0].Register.Index = src0_index;
+ inst.Src[1].Register.File = src1_file;
+ inst.Src[1].Register.Index = src1_index;
+ inst.Src[2].Register.File = src2_file;
+ inst.Src[2].Register.Index = src2_index;
+
+ ctx->emit_instruction(ctx, &inst);
+}
+
+
+
+static inline void
tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx,
unsigned opcode,
unsigned dst_file,
@@ -399,7 +494,8 @@ static inline void
tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
unsigned src_file,
unsigned src_index,
- unsigned src_swizzle)
+ unsigned src_swizzle,
+ boolean negate)
{
struct tgsi_full_instruction inst;
@@ -413,22 +509,25 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
inst.Src[0].Register.SwizzleY =
inst.Src[0].Register.SwizzleZ =
inst.Src[0].Register.SwizzleW = src_swizzle;
- inst.Src[0].Register.Negate = 1;
+ inst.Src[0].Register.Negate = negate;
ctx->emit_instruction(ctx, &inst);
}
static inline void
-tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
- unsigned dst_file,
- unsigned dst_index,
- unsigned src_file,
- unsigned src_index,
- unsigned sampler_index)
+tgsi_transform_tex_inst(struct tgsi_transform_context *ctx,
+ unsigned dst_file,
+ unsigned dst_index,
+ unsigned src_file,
+ unsigned src_index,
+ unsigned tex_target,
+ unsigned sampler_index)
{
struct tgsi_full_instruction inst;
+ assert(tex_target < TGSI_TEXTURE_COUNT);
+
inst = tgsi_default_full_instruction();
inst.Instruction.Opcode = TGSI_OPCODE_TEX;
inst.Instruction.NumDstRegs = 1;
@@ -436,7 +535,7 @@ tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
inst.Dst[0].Register.Index = dst_index;
inst.Instruction.NumSrcRegs = 2;
inst.Instruction.Texture = TRUE;
- inst.Texture.Texture = TGSI_TEXTURE_2D;
+ inst.Texture.Texture = tex_target;
inst.Src[0].Register.File = src_file;
inst.Src[0].Register.Index = src_index;
inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 3d2131950..7bcd24297 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -35,6 +35,7 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_sanity.h"
#include "util/u_debug.h"
+#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_bitmask.h"
@@ -49,6 +50,7 @@ union tgsi_any_token {
struct tgsi_declaration_range decl_range;
struct tgsi_declaration_dimension decl_dim;
struct tgsi_declaration_interp decl_interp;
+ struct tgsi_declaration_image decl_image;
struct tgsi_declaration_semantic decl_semantic;
struct tgsi_declaration_sampler_view decl_sampler_view;
struct tgsi_declaration_array array;
@@ -58,6 +60,7 @@ union tgsi_any_token {
struct tgsi_instruction_predicate insn_predicate;
struct tgsi_instruction_label insn_label;
struct tgsi_instruction_texture insn_texture;
+ struct tgsi_instruction_memory insn_memory;
struct tgsi_texture_offset insn_texture_offset;
struct tgsi_src_register src;
struct tgsi_ind_register ind;
@@ -74,9 +77,9 @@ struct ureg_tokens {
unsigned count;
};
-#define UREG_MAX_INPUT PIPE_MAX_SHADER_INPUTS
+#define UREG_MAX_INPUT (4 * PIPE_MAX_SHADER_INPUTS)
#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
-#define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS
+#define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
#define UREG_MAX_CONSTANT_RANGE 32
#define UREG_MAX_IMMEDIATE 4096
#define UREG_MAX_ADDR 3
@@ -98,12 +101,14 @@ struct ureg_program
{
unsigned processor;
bool supports_any_inout_decl_range;
+ int next_shader_processor;
struct {
unsigned semantic_name;
unsigned semantic_index;
unsigned interp;
unsigned char cylindrical_wrap;
+ unsigned char usage_mask;
unsigned interp_location;
unsigned first;
unsigned last;
@@ -114,7 +119,6 @@ struct ureg_program
unsigned vs_inputs[PIPE_MAX_ATTRIBS/32];
struct {
- unsigned index;
unsigned semantic_name;
unsigned semantic_index;
} system_value[UREG_MAX_SYSTEM_VALUE];
@@ -154,6 +158,21 @@ struct ureg_program
} sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned nr_sampler_views;
+ struct {
+ unsigned index;
+ unsigned target;
+ unsigned format;
+ boolean wr;
+ boolean raw;
+ } image[PIPE_MAX_SHADER_IMAGES];
+ unsigned nr_images;
+
+ struct {
+ unsigned index;
+ bool atomic;
+ } buffer[PIPE_MAX_SHADER_BUFFERS];
+ unsigned nr_buffers;
+
struct util_bitmask *free_temps;
struct util_bitmask *local_temps;
struct util_bitmask *decl_temps;
@@ -172,6 +191,8 @@ struct ureg_program
unsigned nr_instructions;
struct ureg_tokens domain[2];
+
+ bool use_memory[TGSI_MEMORY_TYPE_COUNT];
};
static union tgsi_any_token error_tokens[32];
@@ -182,7 +203,7 @@ static void tokens_error( struct ureg_tokens *tokens )
FREE(tokens->tokens);
tokens->tokens = error_tokens;
- tokens->size = Elements(error_tokens);
+ tokens->size = ARRAY_SIZE(error_tokens);
tokens->count = 0;
}
@@ -244,30 +265,38 @@ static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
void
ureg_property(struct ureg_program *ureg, unsigned name, unsigned value)
{
- assert(name < Elements(ureg->properties));
+ assert(name < ARRAY_SIZE(ureg->properties));
ureg->properties[name] = value;
}
struct ureg_src
-ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
+ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
unsigned interp_mode,
unsigned cylindrical_wrap,
unsigned interp_location,
+ unsigned index,
+ unsigned usage_mask,
unsigned array_id,
unsigned array_size)
{
unsigned i;
+ assert(usage_mask != 0);
+ assert(usage_mask <= TGSI_WRITEMASK_XYZW);
+
for (i = 0; i < ureg->nr_inputs; i++) {
if (ureg->input[i].semantic_name == semantic_name &&
ureg->input[i].semantic_index == semantic_index) {
assert(ureg->input[i].interp == interp_mode);
assert(ureg->input[i].cylindrical_wrap == cylindrical_wrap);
assert(ureg->input[i].interp_location == interp_location);
- assert(ureg->input[i].array_id == array_id);
- goto out;
+ if (ureg->input[i].array_id == array_id) {
+ ureg->input[i].usage_mask |= usage_mask;
+ goto out;
+ }
+ assert((ureg->input[i].usage_mask & usage_mask) == 0);
}
}
@@ -278,10 +307,11 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
ureg->input[i].interp = interp_mode;
ureg->input[i].cylindrical_wrap = cylindrical_wrap;
ureg->input[i].interp_location = interp_location;
- ureg->input[i].first = ureg->nr_input_regs;
- ureg->input[i].last = ureg->nr_input_regs + array_size - 1;
+ ureg->input[i].first = index;
+ ureg->input[i].last = index + array_size - 1;
ureg->input[i].array_id = array_id;
- ureg->nr_input_regs += array_size;
+ ureg->input[i].usage_mask = usage_mask;
+ ureg->nr_input_regs = MAX2(ureg->nr_input_regs, index + array_size);
ureg->nr_inputs++;
} else {
set_bad(ureg);
@@ -292,12 +322,27 @@ out:
array_id);
}
+struct ureg_src
+ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned interp_mode,
+ unsigned cylindrical_wrap,
+ unsigned interp_location,
+ unsigned array_id,
+ unsigned array_size)
+{
+ return ureg_DECL_fs_input_cyl_centroid_layout(ureg,
+ semantic_name, semantic_index, interp_mode, cylindrical_wrap, interp_location,
+ ureg->nr_input_regs, TGSI_WRITEMASK_XYZW, array_id, array_size);
+}
+
struct ureg_src
ureg_DECL_vs_input( struct ureg_program *ureg,
unsigned index )
{
- assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
+ assert(ureg->processor == PIPE_SHADER_VERTEX);
assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs));
ureg->vs_inputs[index/32] |= 1 << (index % 32);
@@ -306,6 +351,21 @@ ureg_DECL_vs_input( struct ureg_program *ureg,
struct ureg_src
+ureg_DECL_input_layout(struct ureg_program *ureg,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned index,
+ unsigned usage_mask,
+ unsigned array_id,
+ unsigned array_size)
+{
+ return ureg_DECL_fs_input_cyl_centroid_layout(ureg,
+ semantic_name, semantic_index, 0, 0, 0,
+ index, usage_mask, array_id, array_size);
+}
+
+
+struct ureg_src
ureg_DECL_input(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
@@ -319,26 +379,36 @@ ureg_DECL_input(struct ureg_program *ureg,
struct ureg_src
ureg_DECL_system_value(struct ureg_program *ureg,
- unsigned index,
unsigned semantic_name,
unsigned semantic_index)
{
+ unsigned i;
+
+ for (i = 0; i < ureg->nr_system_values; i++) {
+ if (ureg->system_value[i].semantic_name == semantic_name &&
+ ureg->system_value[i].semantic_index == semantic_index) {
+ goto out;
+ }
+ }
+
if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
- ureg->system_value[ureg->nr_system_values].index = index;
ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
+ i = ureg->nr_system_values;
ureg->nr_system_values++;
} else {
set_bad(ureg);
}
- return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index);
+out:
+ return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
}
-struct ureg_dst
-ureg_DECL_output_masked(struct ureg_program *ureg,
- unsigned name,
+struct ureg_dst
+ureg_DECL_output_layout(struct ureg_program *ureg,
+ unsigned semantic_name,
+ unsigned semantic_index,
unsigned index,
unsigned usage_mask,
unsigned array_id,
@@ -349,22 +419,24 @@ ureg_DECL_output_masked(struct ureg_program *ureg,
assert(usage_mask != 0);
for (i = 0; i < ureg->nr_outputs; i++) {
- if (ureg->output[i].semantic_name == name &&
- ureg->output[i].semantic_index == index) {
- assert(ureg->output[i].array_id == array_id);
- ureg->output[i].usage_mask |= usage_mask;
- goto out;
+ if (ureg->output[i].semantic_name == semantic_name &&
+ ureg->output[i].semantic_index == semantic_index) {
+ if (ureg->output[i].array_id == array_id) {
+ ureg->output[i].usage_mask |= usage_mask;
+ goto out;
+ }
+ assert((ureg->output[i].usage_mask & usage_mask) == 0);
}
}
if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
- ureg->output[i].semantic_name = name;
- ureg->output[i].semantic_index = index;
+ ureg->output[i].semantic_name = semantic_name;
+ ureg->output[i].semantic_index = semantic_index;
ureg->output[i].usage_mask = usage_mask;
- ureg->output[i].first = ureg->nr_output_regs;
- ureg->output[i].last = ureg->nr_output_regs + array_size - 1;
+ ureg->output[i].first = index;
+ ureg->output[i].last = index + array_size - 1;
ureg->output[i].array_id = array_id;
- ureg->nr_output_regs += array_size;
+ ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size);
ureg->nr_outputs++;
}
else {
@@ -377,6 +449,19 @@ out:
}
+struct ureg_dst
+ureg_DECL_output_masked(struct ureg_program *ureg,
+ unsigned name,
+ unsigned index,
+ unsigned usage_mask,
+ unsigned array_id,
+ unsigned array_size)
+{
+ return ureg_DECL_output_layout(ureg, name, index,
+ ureg->nr_output_regs, usage_mask, array_id, array_size);
+}
+
+
struct ureg_dst
ureg_DECL_output(struct ureg_program *ureg,
unsigned name,
@@ -428,7 +513,7 @@ ureg_DECL_constant2D(struct ureg_program *ureg,
}
-/* A one-dimensional, depricated version of ureg_DECL_constant2D().
+/* A one-dimensional, deprecated version of ureg_DECL_constant2D().
*
* Constant operands declared with this function must be addressed
* with a one-dimensional index.
@@ -647,6 +732,71 @@ ureg_DECL_sampler_view(struct ureg_program *ureg,
return reg;
}
+/* Allocate a new image.
+ */
+struct ureg_src
+ureg_DECL_image(struct ureg_program *ureg,
+ unsigned index,
+ unsigned target,
+ unsigned format,
+ boolean wr,
+ boolean raw)
+{
+ struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index);
+ unsigned i;
+
+ for (i = 0; i < ureg->nr_images; i++)
+ if (ureg->image[i].index == index)
+ return reg;
+
+ if (i < PIPE_MAX_SHADER_IMAGES) {
+ ureg->image[i].index = index;
+ ureg->image[i].target = target;
+ ureg->image[i].wr = wr;
+ ureg->image[i].raw = raw;
+ ureg->image[i].format = format;
+ ureg->nr_images++;
+ return reg;
+ }
+
+ assert(0);
+ return reg;
+}
+
+/* Allocate a new buffer.
+ */
+struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
+ bool atomic)
+{
+ struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
+ unsigned i;
+
+ for (i = 0; i < ureg->nr_buffers; i++)
+ if (ureg->buffer[i].index == nr)
+ return reg;
+
+ if (i < PIPE_MAX_SHADER_BUFFERS) {
+ ureg->buffer[i].index = nr;
+ ureg->buffer[i].atomic = atomic;
+ ureg->nr_buffers++;
+ return reg;
+ }
+
+ assert(0);
+ return reg;
+}
+
+/* Allocate a memory area.
+ */
+struct ureg_src ureg_DECL_memory(struct ureg_program *ureg,
+ unsigned memory_type)
+{
+ struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type);
+
+ ureg->use_memory[memory_type] = true;
+ return reg;
+}
+
static int
match_or_expand_immediate64( const unsigned *v,
int type,
@@ -698,7 +848,9 @@ match_or_expand_immediate( const unsigned *v,
unsigned nr2 = *pnr2;
unsigned i, j;
- if (type == TGSI_IMM_FLOAT64)
+ if (type == TGSI_IMM_FLOAT64 ||
+ type == TGSI_IMM_UINT64 ||
+ type == TGSI_IMM_INT64)
return match_or_expand_immediate64(v, type, nr, v2, pnr2, swizzle);
*swizzle = 0;
@@ -777,7 +929,9 @@ out:
/* Make sure that all referenced elements are from this immediate.
* Has the effect of making size-one immediates into scalars.
*/
- if (type == TGSI_IMM_FLOAT64) {
+ if (type == TGSI_IMM_FLOAT64 ||
+ type == TGSI_IMM_UINT64 ||
+ type == TGSI_IMM_INT64) {
for (j = nr; j < 4; j+=2) {
swizzle |= (swizzle & 0xf) << (j * 2);
}
@@ -877,6 +1031,43 @@ ureg_DECL_immediate_int( struct ureg_program *ureg,
return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
}
+struct ureg_src
+ureg_DECL_immediate_uint64( struct ureg_program *ureg,
+ const uint64_t *v,
+ unsigned nr )
+{
+ union {
+ unsigned u[4];
+ uint64_t u64[2];
+ } fu;
+ unsigned int i;
+
+ assert((nr / 2) < 3);
+ for (i = 0; i < nr / 2; i++) {
+ fu.u64[i] = v[i];
+ }
+
+ return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64);
+}
+
+struct ureg_src
+ureg_DECL_immediate_int64( struct ureg_program *ureg,
+ const int64_t *v,
+ unsigned nr )
+{
+ union {
+ unsigned u[4];
+ int64_t i64[2];
+ } fu;
+ unsigned int i;
+
+ assert((nr / 2) < 3);
+ for (i = 0; i < nr / 2; i++) {
+ fu.i64[i] = v[i];
+ }
+
+ return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64);
+}
void
ureg_emit_src( struct ureg_program *ureg,
@@ -1017,7 +1208,7 @@ static void validate( unsigned opcode,
#ifdef DEBUG
const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
assert(info);
- if(info) {
+ if (info) {
assert(nr_dst == info->num_dst);
assert(nr_src == info->num_src);
}
@@ -1082,7 +1273,7 @@ ureg_emit_label(struct ureg_program *ureg,
{
union tgsi_any_token *out, *insn;
- if(!label_token)
+ if (!label_token)
return;
out = get_tokens( ureg, DOMAIN_INSN, 1 );
@@ -1147,6 +1338,25 @@ ureg_emit_texture_offset(struct ureg_program *ureg,
}
+void
+ureg_emit_memory(struct ureg_program *ureg,
+ unsigned extended_token,
+ unsigned qualifier,
+ unsigned texture,
+ unsigned format)
+{
+ union tgsi_any_token *out, *insn;
+
+ out = get_tokens( ureg, DOMAIN_INSN, 1 );
+ insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
+
+ insn->insn.Memory = 1;
+
+ out[0].value = 0;
+ out[0].insn_memory.Qualifier = qualifier;
+ out[0].insn_memory.Texture = texture;
+ out[0].insn_memory.Format = format;
+}
void
ureg_fixup_insn_size(struct ureg_program *ureg,
@@ -1299,6 +1509,44 @@ ureg_label_insn(struct ureg_program *ureg,
}
+void
+ureg_memory_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ const struct ureg_dst *dst,
+ unsigned nr_dst,
+ const struct ureg_src *src,
+ unsigned nr_src,
+ unsigned qualifier,
+ unsigned texture,
+ unsigned format)
+{
+ struct ureg_emit_insn_result insn;
+ unsigned i;
+
+ insn = ureg_emit_insn(ureg,
+ opcode,
+ FALSE,
+ FALSE,
+ FALSE,
+ TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Z,
+ TGSI_SWIZZLE_W,
+ nr_dst,
+ nr_src);
+
+ ureg_emit_memory(ureg, insn.extended_token, qualifier, texture, format);
+
+ for (i = 0; i < nr_dst; i++)
+ ureg_emit_dst(ureg, dst[i]);
+
+ for (i = 0; i < nr_src; i++)
+ ureg_emit_src(ureg, src[i]);
+
+ ureg_fixup_insn_size(ureg, insn.insn_token);
+}
+
+
static void
emit_decl_semantic(struct ureg_program *ureg,
unsigned file,
@@ -1344,7 +1592,8 @@ emit_decl_fs(struct ureg_program *ureg,
unsigned interpolate,
unsigned cylindrical_wrap,
unsigned interpolate_location,
- unsigned array_id)
+ unsigned array_id,
+ unsigned usage_mask)
{
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL,
array_id ? 5 : 4);
@@ -1353,7 +1602,7 @@ emit_decl_fs(struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 4;
out[0].decl.File = file;
- out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
+ out[0].decl.UsageMask = usage_mask;
out[0].decl.Interpolate = 1;
out[0].decl.Semantic = 1;
out[0].decl.Array = array_id != 0;
@@ -1462,7 +1711,7 @@ emit_decl_sampler_view(struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 3;
out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
- out[0].decl.UsageMask = 0xf;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
out[1].value = 0;
out[1].decl_range.First = index;
@@ -1477,6 +1726,69 @@ emit_decl_sampler_view(struct ureg_program *ureg,
}
static void
+emit_decl_image(struct ureg_program *ureg,
+ unsigned index,
+ unsigned target,
+ unsigned format,
+ boolean wr,
+ boolean raw)
+{
+ union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
+
+ out[0].value = 0;
+ out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+ out[0].decl.NrTokens = 3;
+ out[0].decl.File = TGSI_FILE_IMAGE;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+
+ out[1].value = 0;
+ out[1].decl_range.First = index;
+ out[1].decl_range.Last = index;
+
+ out[2].value = 0;
+ out[2].decl_image.Resource = target;
+ out[2].decl_image.Writable = wr;
+ out[2].decl_image.Raw = raw;
+ out[2].decl_image.Format = format;
+}
+
+static void
+emit_decl_buffer(struct ureg_program *ureg,
+ unsigned index,
+ bool atomic)
+{
+ union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
+
+ out[0].value = 0;
+ out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+ out[0].decl.NrTokens = 2;
+ out[0].decl.File = TGSI_FILE_BUFFER;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+ out[0].decl.Atomic = atomic;
+
+ out[1].value = 0;
+ out[1].decl_range.First = index;
+ out[1].decl_range.Last = index;
+}
+
+static void
+emit_decl_memory(struct ureg_program *ureg, unsigned memory_type)
+{
+ union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
+
+ out[0].value = 0;
+ out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+ out[0].decl.NrTokens = 2;
+ out[0].decl.File = TGSI_FILE_MEMORY;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+ out[0].decl.MemType = memory_type;
+
+ out[1].value = 0;
+ out[1].decl_range.First = memory_type;
+ out[1].decl_range.Last = memory_type;
+}
+
+static void
emit_immediate( struct ureg_program *ureg,
const unsigned *v,
unsigned type )
@@ -1515,17 +1827,17 @@ static void emit_decls( struct ureg_program *ureg )
{
unsigned i,j;
- for (i = 0; i < Elements(ureg->properties); i++)
+ for (i = 0; i < ARRAY_SIZE(ureg->properties); i++)
if (ureg->properties[i] != ~0)
emit_property(ureg, i, ureg->properties[i]);
- if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
+ if (ureg->processor == PIPE_SHADER_VERTEX) {
for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
- if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
+ if (ureg->vs_inputs[i/32] & (1u << (i%32))) {
emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
}
}
- } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
+ } else if (ureg->processor == PIPE_SHADER_FRAGMENT) {
if (ureg->supports_any_inout_decl_range) {
for (i = 0; i < ureg->nr_inputs; i++) {
emit_decl_fs(ureg,
@@ -1537,7 +1849,8 @@ static void emit_decls( struct ureg_program *ureg )
ureg->input[i].interp,
ureg->input[i].cylindrical_wrap,
ureg->input[i].interp_location,
- ureg->input[i].array_id);
+ ureg->input[i].array_id,
+ ureg->input[i].usage_mask);
}
}
else {
@@ -1551,7 +1864,8 @@ static void emit_decls( struct ureg_program *ureg )
(j - ureg->input[i].first),
ureg->input[i].interp,
ureg->input[i].cylindrical_wrap,
- ureg->input[i].interp_location, 0);
+ ureg->input[i].interp_location, 0,
+ ureg->input[i].usage_mask);
}
}
}
@@ -1586,8 +1900,8 @@ static void emit_decls( struct ureg_program *ureg )
for (i = 0; i < ureg->nr_system_values; i++) {
emit_decl_semantic(ureg,
TGSI_FILE_SYSTEM_VALUE,
- ureg->system_value[i].index,
- ureg->system_value[i].index,
+ i,
+ i,
ureg->system_value[i].semantic_name,
ureg->system_value[i].semantic_index,
TGSI_WRITEMASK_XYZW, 0);
@@ -1635,6 +1949,24 @@ static void emit_decls( struct ureg_program *ureg )
ureg->sampler_view[i].return_type_w);
}
+ for (i = 0; i < ureg->nr_images; i++) {
+ emit_decl_image(ureg,
+ ureg->image[i].index,
+ ureg->image[i].target,
+ ureg->image[i].format,
+ ureg->image[i].wr,
+ ureg->image[i].raw);
+ }
+
+ for (i = 0; i < ureg->nr_buffers; i++) {
+ emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
+ }
+
+ for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) {
+ if (ureg->use_memory[i])
+ emit_decl_memory(ureg, i);
+ }
+
if (ureg->const_decls.nr_constant_ranges) {
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
emit_decl_range(ureg,
@@ -1738,6 +2070,16 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
{
const struct tgsi_token *tokens;
+ switch (ureg->processor) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_TESS_EVAL:
+ ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER,
+ ureg->next_shader_processor == -1 ?
+ PIPE_SHADER_FRAGMENT :
+ ureg->next_shader_processor);
+ break;
+ }
+
emit_header( ureg );
emit_decls( ureg );
copy_instructions( ureg );
@@ -1777,25 +2119,23 @@ void *ureg_create_shader( struct ureg_program *ureg,
{
struct pipe_shader_state state;
- state.tokens = ureg_finalize(ureg);
+ pipe_shader_state_from_tgsi(&state, ureg_finalize(ureg));
if(!state.tokens)
return NULL;
if (so)
state.stream_output = *so;
- else
- memset(&state.stream_output, 0, sizeof(state.stream_output));
switch (ureg->processor) {
- case TGSI_PROCESSOR_VERTEX:
+ case PIPE_SHADER_VERTEX:
return pipe->create_vs_state(pipe, &state);
- case TGSI_PROCESSOR_TESS_CTRL:
+ case PIPE_SHADER_TESS_CTRL:
return pipe->create_tcs_state(pipe, &state);
- case TGSI_PROCESSOR_TESS_EVAL:
+ case PIPE_SHADER_TESS_EVAL:
return pipe->create_tes_state(pipe, &state);
- case TGSI_PROCESSOR_GEOMETRY:
+ case PIPE_SHADER_GEOMETRY:
return pipe->create_gs_state(pipe, &state);
- case TGSI_PROCESSOR_FRAGMENT:
+ case PIPE_SHADER_FRAGMENT:
return pipe->create_fs_state(pipe, &state);
default:
return NULL;
@@ -1830,29 +2170,6 @@ void ureg_free_tokens( const struct tgsi_token *tokens )
}
-static inline unsigned
-pipe_shader_from_tgsi_processor(unsigned processor)
-{
- switch (processor) {
- case TGSI_PROCESSOR_VERTEX:
- return PIPE_SHADER_VERTEX;
- case TGSI_PROCESSOR_TESS_CTRL:
- return PIPE_SHADER_TESS_CTRL;
- case TGSI_PROCESSOR_TESS_EVAL:
- return PIPE_SHADER_TESS_EVAL;
- case TGSI_PROCESSOR_GEOMETRY:
- return PIPE_SHADER_GEOMETRY;
- case TGSI_PROCESSOR_FRAGMENT:
- return PIPE_SHADER_FRAGMENT;
- case TGSI_PROCESSOR_COMPUTE:
- return PIPE_SHADER_COMPUTE;
- default:
- assert(0);
- return PIPE_SHADER_VERTEX;
- }
-}
-
-
struct ureg_program *
ureg_create(unsigned processor)
{
@@ -1865,17 +2182,17 @@ ureg_create_with_screen(unsigned processor, struct pipe_screen *screen)
{
int i;
struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
- if (ureg == NULL)
+ if (!ureg)
goto no_ureg;
ureg->processor = processor;
ureg->supports_any_inout_decl_range =
screen &&
- screen->get_shader_param(screen,
- pipe_shader_from_tgsi_processor(processor),
+ screen->get_shader_param(screen, processor,
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
+ ureg->next_shader_processor = -1;
- for (i = 0; i < Elements(ureg->properties); i++)
+ for (i = 0; i < ARRAY_SIZE(ureg->properties); i++)
ureg->properties[i] = ~0;
ureg->free_temps = util_bitmask_create();
@@ -1903,6 +2220,13 @@ no_ureg:
}
+void
+ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor)
+{
+ ureg->next_shader_processor = processor;
+}
+
+
unsigned
ureg_get_nr_outputs( const struct ureg_program *ureg )
{
@@ -1916,7 +2240,7 @@ void ureg_destroy( struct ureg_program *ureg )
{
unsigned i;
- for (i = 0; i < Elements(ureg->domain); i++) {
+ for (i = 0; i < ARRAY_SIZE(ureg->domain); i++) {
if (ureg->domain[i].tokens &&
ureg->domain[i].tokens != error_tokens)
FREE(ureg->domain[i].tokens);
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 0aae550d6..d3c28b33e 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -114,6 +114,8 @@ ureg_create_shader( struct ureg_program *,
struct pipe_context *pipe,
const struct pipe_stream_output_info *so );
+void
+ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor);
/* Alternately, return the built token stream and hand ownership of
* that memory to the caller:
@@ -171,6 +173,18 @@ ureg_property(struct ureg_program *ureg, unsigned name, unsigned value);
*/
struct ureg_src
+ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned interp_mode,
+ unsigned cylindrical_wrap,
+ unsigned interp_location,
+ unsigned index,
+ unsigned usage_mask,
+ unsigned array_id,
+ unsigned array_size);
+
+struct ureg_src
ureg_DECL_fs_input_cyl_centroid(struct ureg_program *,
unsigned semantic_name,
unsigned semantic_index,
@@ -213,6 +227,15 @@ ureg_DECL_vs_input( struct ureg_program *,
unsigned index );
struct ureg_src
+ureg_DECL_input_layout(struct ureg_program *,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned index,
+ unsigned usage_mask,
+ unsigned array_id,
+ unsigned array_size);
+
+struct ureg_src
ureg_DECL_input(struct ureg_program *,
unsigned semantic_name,
unsigned semantic_index,
@@ -221,11 +244,19 @@ ureg_DECL_input(struct ureg_program *,
struct ureg_src
ureg_DECL_system_value(struct ureg_program *,
- unsigned index,
unsigned semantic_name,
unsigned semantic_index);
struct ureg_dst
+ureg_DECL_output_layout(struct ureg_program *,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned index,
+ unsigned usage_mask,
+ unsigned array_id,
+ unsigned array_size);
+
+struct ureg_dst
ureg_DECL_output_masked(struct ureg_program *,
unsigned semantic_name,
unsigned semantic_index,
@@ -270,6 +301,16 @@ ureg_DECL_immediate_int( struct ureg_program *,
const int *v,
unsigned nr );
+struct ureg_src
+ureg_DECL_immediate_uint64( struct ureg_program *,
+ const uint64_t *v,
+ unsigned nr );
+
+struct ureg_src
+ureg_DECL_immediate_int64( struct ureg_program *,
+ const int64_t *v,
+ unsigned nr );
+
void
ureg_DECL_constant2D(struct ureg_program *ureg,
unsigned first,
@@ -327,6 +368,19 @@ ureg_DECL_sampler_view(struct ureg_program *,
unsigned return_type_z,
unsigned return_type_w );
+struct ureg_src
+ureg_DECL_image(struct ureg_program *ureg,
+ unsigned index,
+ unsigned target,
+ unsigned format,
+ boolean wr,
+ boolean raw);
+
+struct ureg_src
+ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic);
+
+struct ureg_src
+ureg_DECL_memory(struct ureg_program *ureg, unsigned memory_type);
static inline struct ureg_src
ureg_imm4f( struct ureg_program *ureg,
@@ -522,6 +576,16 @@ ureg_label_insn(struct ureg_program *ureg,
unsigned nr_src,
unsigned *label);
+void
+ureg_memory_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ const struct ureg_dst *dst,
+ unsigned nr_dst,
+ const struct ureg_src *src,
+ unsigned nr_src,
+ unsigned qualifier,
+ unsigned texture,
+ unsigned format);
/***********************************************************************
* Internal instruction helpers, don't call these directly:
@@ -559,6 +623,13 @@ void
ureg_emit_texture_offset(struct ureg_program *ureg,
const struct tgsi_texture_offset *offset);
+void
+ureg_emit_memory(struct ureg_program *ureg,
+ unsigned insn_token,
+ unsigned qualifier,
+ unsigned texture,
+ unsigned format);
+
void
ureg_emit_dst( struct ureg_program *ureg,
struct ureg_dst dst );
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c
index e5b8427a0..fbe29626a 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -29,6 +29,7 @@
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_util.h"
+#include "tgsi_exec.h"
union pointer_hack
{
@@ -53,17 +54,17 @@ tgsi_util_get_src_register_swizzle(
const struct tgsi_src_register *reg,
unsigned component )
{
- switch( component ) {
- case 0:
+ switch (component) {
+ case TGSI_CHAN_X:
return reg->SwizzleX;
- case 1:
+ case TGSI_CHAN_Y:
return reg->SwizzleY;
- case 2:
+ case TGSI_CHAN_Z:
return reg->SwizzleZ;
- case 3:
+ case TGSI_CHAN_W:
return reg->SwizzleW;
default:
- assert( 0 );
+ assert(0);
}
return 0;
}
@@ -374,10 +375,8 @@ tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg)
* sample index.
*/
int
-tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
+tgsi_util_get_texture_coord_dim(unsigned tgsi_tex)
{
- int dim;
-
/*
* Depending on the texture target, (src0.xyzw, src1.x) is interpreted
* differently:
@@ -406,8 +405,7 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
case TGSI_TEXTURE_BUFFER:
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
- dim = 1;
- break;
+ return 1;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_1D_ARRAY:
@@ -415,50 +413,64 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_2D_MSAA:
- dim = 2;
- break;
+ return 2;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
- dim = 3;
- break;
+ return 3;
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
- dim = 4;
- break;
+ return 4;
default:
assert(!"unknown texture target");
- dim = 0;
- break;
+ return 0;
}
+}
- if (shadow_or_sample) {
- switch (tgsi_tex) {
- case TGSI_TEXTURE_SHADOW1D:
- /* there is a gap */
- *shadow_or_sample = 2;
- break;
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_SHADOWCUBE:
- case TGSI_TEXTURE_SHADOW1D_ARRAY:
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
- *shadow_or_sample = dim;
- break;
- case TGSI_TEXTURE_2D_MSAA:
- case TGSI_TEXTURE_2D_ARRAY_MSAA:
- *shadow_or_sample = 3;
- break;
- default:
- /* no shadow nor sample */
- *shadow_or_sample = -1;
- break;
- }
+
+/**
+ * Given a TGSI_TEXTURE_x target, return the src register index for the
+ * shadow reference coordinate.
+ */
+int
+tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex)
+{
+ switch (tgsi_tex) {
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ return 2;
+ case TGSI_TEXTURE_SHADOWCUBE:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ case TGSI_TEXTURE_2D_MSAA:
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ return 3;
+ case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ return 4;
+ default:
+ /* no shadow nor sample */
+ return -1;
}
+}
+
- return dim;
+boolean
+tgsi_is_shadow_target(unsigned target)
+{
+ switch (target) {
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ case TGSI_TEXTURE_SHADOWCUBE:
+ case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ return TRUE;
+ default:
+ return FALSE;
+ }
}
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h
index deb1ecc66..83a930b69 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -32,6 +32,8 @@
extern "C" {
#endif
+#include "pipe/p_shader_tokens.h"
+
struct tgsi_src_register;
struct tgsi_full_src_register;
struct tgsi_full_instruction;
@@ -80,7 +82,21 @@ struct tgsi_src_register
tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg);
int
-tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample);
+tgsi_util_get_texture_coord_dim(unsigned tgsi_tex);
+
+int
+tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex);
+
+boolean
+tgsi_is_shadow_target(unsigned target);
+
+
+static inline boolean
+tgsi_is_msaa_target(unsigned target)
+{
+ return (target == TGSI_TEXTURE_2D_MSAA ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA);
+}
#if defined __cplusplus
}