diff options
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/tgsi')
21 files changed, 3875 insertions, 1070 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c index fdb7febf7..d525c8ff3 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -110,6 +110,8 @@ tgsi_default_declaration( void ) declaration.Invariant = 0; declaration.Local = 0; declaration.Array = 0; + declaration.Atomic = 0; + declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL; declaration.Padding = 0; return declaration; @@ -125,6 +127,8 @@ tgsi_build_declaration( unsigned invariant, unsigned local, unsigned array, + unsigned atomic, + unsigned mem_type, struct tgsi_header *header ) { struct tgsi_declaration declaration; @@ -141,6 +145,8 @@ tgsi_build_declaration( declaration.Invariant = invariant; declaration.Local = local; declaration.Array = array; + declaration.Atomic = atomic; + declaration.MemType = mem_type; header_bodysize_grow( header ); return declaration; @@ -259,36 +265,39 @@ tgsi_build_declaration_semantic( return ds; } -static struct tgsi_declaration_resource -tgsi_default_declaration_resource(void) +static struct tgsi_declaration_image +tgsi_default_declaration_image(void) { - struct tgsi_declaration_resource dr; + struct tgsi_declaration_image di; - dr.Resource = TGSI_TEXTURE_BUFFER; - dr.Raw = 0; - dr.Writable = 0; - dr.Padding = 0; + di.Resource = TGSI_TEXTURE_BUFFER; + di.Raw = 0; + di.Writable = 0; + di.Format = 0; + di.Padding = 0; - return dr; + return di; } -static struct tgsi_declaration_resource -tgsi_build_declaration_resource(unsigned texture, - unsigned raw, - unsigned writable, - struct tgsi_declaration *declaration, - struct tgsi_header *header) +static struct tgsi_declaration_image +tgsi_build_declaration_image(unsigned texture, + unsigned format, + unsigned raw, + unsigned writable, + struct tgsi_declaration *declaration, + struct tgsi_header *header) { - struct tgsi_declaration_resource dr; + struct tgsi_declaration_image di; - dr = tgsi_default_declaration_resource(); - dr.Resource = texture; - dr.Raw = raw; - dr.Writable = writable; + di = tgsi_default_declaration_image(); + di.Resource = texture; + di.Format = format; + di.Raw = raw; + di.Writable = writable; declaration_grow(declaration, header); - return dr; + return di; } static struct tgsi_declaration_sampler_view @@ -364,7 +373,7 @@ tgsi_default_full_declaration( void ) full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); full_declaration.Interp = tgsi_default_declaration_interp(); - full_declaration.Resource = tgsi_default_declaration_resource(); + full_declaration.Image = tgsi_default_declaration_image(); full_declaration.SamplerView = tgsi_default_declaration_sampler_view(); full_declaration.Array = tgsi_default_declaration_array(); @@ -396,6 +405,8 @@ tgsi_build_full_declaration( full_decl->Declaration.Invariant, full_decl->Declaration.Local, full_decl->Declaration.Array, + full_decl->Declaration.Atomic, + full_decl->Declaration.MemType, header ); if (maxsize <= size) @@ -454,20 +465,21 @@ tgsi_build_full_declaration( header ); } - if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) { - struct tgsi_declaration_resource *dr; + if (full_decl->Declaration.File == TGSI_FILE_IMAGE) { + struct tgsi_declaration_image *di; if (maxsize <= size) { return 0; } - dr = (struct tgsi_declaration_resource *)&tokens[size]; + di = (struct tgsi_declaration_image *)&tokens[size]; size++; - *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource, - full_decl->Resource.Raw, - full_decl->Resource.Writable, - declaration, - header); + *di = tgsi_build_declaration_image(full_decl->Image.Resource, + full_decl->Image.Format, + full_decl->Image.Raw, + full_decl->Image.Writable, + declaration, + header); } if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { @@ -616,7 +628,8 @@ tgsi_default_instruction( void ) instruction.NumSrcRegs = 1; instruction.Label = 0; instruction.Texture = 0; - instruction.Padding = 0; + instruction.Memory = 0; + instruction.Padding = 0; return instruction; } @@ -762,6 +775,40 @@ tgsi_build_instruction_texture( return instruction_texture; } +static struct tgsi_instruction_memory +tgsi_default_instruction_memory( void ) +{ + struct tgsi_instruction_memory instruction_memory; + + instruction_memory.Qualifier = 0; + instruction_memory.Texture = 0; + instruction_memory.Format = 0; + instruction_memory.Padding = 0; + + return instruction_memory; +} + +static struct tgsi_instruction_memory +tgsi_build_instruction_memory( + unsigned qualifier, + unsigned texture, + unsigned format, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_memory instruction_memory; + + instruction_memory.Qualifier = qualifier; + instruction_memory.Texture = texture; + instruction_memory.Format = format; + instruction_memory.Padding = 0; + instruction->Memory = 1; + + instruction_grow( instruction, header ); + + return instruction_memory; +} static struct tgsi_texture_offset tgsi_default_texture_offset( void ) @@ -1008,6 +1055,7 @@ tgsi_default_full_instruction( void ) full_instruction.Predicate = tgsi_default_instruction_predicate(); full_instruction.Label = tgsi_default_instruction_label(); full_instruction.Texture = tgsi_default_instruction_texture(); + full_instruction.Memory = tgsi_default_instruction_memory(); for( i = 0; i < TGSI_FULL_MAX_TEX_OFFSETS; i++ ) { full_instruction.TexOffsets[i] = tgsi_default_texture_offset(); } @@ -1119,6 +1167,26 @@ tgsi_build_full_instruction( prev_token = (struct tgsi_token *) texture_offset; } } + + if (full_inst->Instruction.Memory) { + struct tgsi_instruction_memory *instruction_memory; + + if( maxsize <= size ) + return 0; + instruction_memory = + (struct tgsi_instruction_memory *) &tokens[size]; + size++; + + *instruction_memory = tgsi_build_instruction_memory( + full_inst->Memory.Qualifier, + full_inst->Memory.Texture, + full_inst->Memory.Format, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_memory; + } + for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { const struct tgsi_full_dst_register *reg = &full_inst->Dst[i]; struct tgsi_dst_register *dst_register; @@ -1371,3 +1439,18 @@ tgsi_build_full_property( return size; } + +struct tgsi_full_src_register +tgsi_full_src_register_from_dst(const struct tgsi_full_dst_register *dst) +{ + struct tgsi_full_src_register src; + src.Register = tgsi_default_src_register(); + src.Register.File = dst->Register.File; + src.Register.Indirect = dst->Register.Indirect; + src.Register.Dimension = dst->Register.Dimension; + src.Register.Index = dst->Register.Index; + src.Indirect = dst->Indirect; + src.Dimension = dst->Dimension; + src.DimIndirect = dst->DimIndirect; + return src; +} diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h index c5127e185..34d181ab2 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -30,6 +30,8 @@ struct tgsi_token; +struct tgsi_full_dst_register; +struct tgsi_full_src_register; #if defined __cplusplus @@ -111,6 +113,9 @@ tgsi_build_full_instruction( struct tgsi_instruction_predicate tgsi_default_instruction_predicate(void); +struct tgsi_full_src_register +tgsi_full_src_register_from_dst(const struct tgsi_full_dst_register *dst); + #if defined __cplusplus } #endif diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c index 8ceb5b475..405114d09 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -29,6 +29,7 @@ #include "util/u_string.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_math.h" #include "tgsi_dump.h" #include "tgsi_info.h" #include "tgsi_iterate.h" @@ -43,6 +44,8 @@ struct dump_ctx { struct tgsi_iterate_context iter; + boolean dump_float_as_hex; + uint instno; uint immno; int indent; @@ -88,6 +91,7 @@ dump_enum( #define SID(I) ctx->dump_printf( ctx, "%d", I ) #define FLT(F) ctx->dump_printf( ctx, "%10.4f", F ) #define DBL(D) ctx->dump_printf( ctx, "%10.8f", D ) +#define HFLT(F) ctx->dump_printf( ctx, "0x%08x", fui((F)) ) #define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) const char * @@ -250,8 +254,25 @@ dump_imm_data(struct tgsi_iterate_context *iter, i++; break; } + case TGSI_IMM_INT64: { + union di d; + d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32; + UID( d.i ); + i++; + break; + } + case TGSI_IMM_UINT64: { + union di d; + d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32; + UID( d.ui ); + i++; + break; + } case TGSI_IMM_FLOAT32: - FLT( data[i].Float ); + if (ctx->dump_float_as_hex) + HFLT( data[i].Float ); + else + FLT( data[i].Float ); break; case TGSI_IMM_UINT32: UID(data[i].Uint); @@ -288,17 +309,17 @@ iter_declaration( * two dimensional */ if (decl->Declaration.File == TGSI_FILE_INPUT && - (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY || + (iter->processor.Processor == PIPE_SHADER_GEOMETRY || (!patch && - (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL || - iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL)))) { + (iter->processor.Processor == PIPE_SHADER_TESS_CTRL || + iter->processor.Processor == PIPE_SHADER_TESS_EVAL)))) { TXT("[]"); } /* all non-patch tess ctrl shader outputs are two dimensional */ if (decl->Declaration.File == TGSI_FILE_OUTPUT && !patch && - iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL) { + iter->processor.Processor == PIPE_SHADER_TESS_CTRL) { TXT("[]"); } @@ -341,15 +362,32 @@ iter_declaration( } } - if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + if (decl->Declaration.File == TGSI_FILE_IMAGE) { TXT(", "); - ENM(decl->Resource.Resource, tgsi_texture_names); - if (decl->Resource.Writable) + ENM(decl->Image.Resource, tgsi_texture_names); + TXT(", "); + TXT(util_format_name(decl->Image.Format)); + if (decl->Image.Writable) TXT(", WR"); - if (decl->Resource.Raw) + if (decl->Image.Raw) TXT(", RAW"); } + if (decl->Declaration.File == TGSI_FILE_BUFFER) { + if (decl->Declaration.Atomic) + TXT(", ATOMIC"); + } + + if (decl->Declaration.File == TGSI_FILE_MEMORY) { + switch (decl->Declaration.MemType) { + /* Note: ,GLOBAL is optional / the default */ + case TGSI_MEMORY_TYPE_GLOBAL: TXT(", GLOBAL"); break; + case TGSI_MEMORY_TYPE_SHARED: TXT(", SHARED"); break; + case TGSI_MEMORY_TYPE_PRIVATE: TXT(", PRIVATE"); break; + case TGSI_MEMORY_TYPE_INPUT: TXT(", INPUT"); break; + } + } + if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { TXT(", "); ENM(decl->SamplerView.Resource, tgsi_texture_names); @@ -370,7 +408,7 @@ iter_declaration( } if (decl->Declaration.Interpolate) { - if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && + if (iter->processor.Processor == PIPE_SHADER_FRAGMENT && decl->Declaration.File == TGSI_FILE_INPUT) { TXT( ", " ); @@ -413,6 +451,7 @@ tgsi_dump_declaration( const struct tgsi_full_declaration *decl ) { struct dump_ctx ctx; + memset(&ctx, 0, sizeof(ctx)); ctx.dump_printf = dump_ctx_printf; @@ -445,6 +484,9 @@ iter_property( case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: ENM(prop->u[i].Data, tgsi_fs_coord_pixel_center_names); break; + case TGSI_PROPERTY_NEXT_SHADER: + ENM(prop->u[i].Data, tgsi_processor_type_names); + break; default: SID( prop->u[i].Data ); break; @@ -461,6 +503,7 @@ void tgsi_dump_property( const struct tgsi_full_property *prop ) { struct dump_ctx ctx; + memset(&ctx, 0, sizeof(ctx)); ctx.dump_printf = dump_ctx_printf; @@ -492,6 +535,7 @@ tgsi_dump_immediate( const struct tgsi_full_immediate *imm ) { struct dump_ctx ctx; + memset(&ctx, 0, sizeof(ctx)); ctx.dump_printf = dump_ctx_printf; @@ -610,17 +654,37 @@ iter_instruction( } } - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_IF: - case TGSI_OPCODE_UIF: - case TGSI_OPCODE_ELSE: - case TGSI_OPCODE_BGNLOOP: - case TGSI_OPCODE_ENDLOOP: - case TGSI_OPCODE_CAL: - case TGSI_OPCODE_BGNSUB: - TXT( " :" ); - UID( inst->Label.Label ); - break; + if (inst->Instruction.Memory) { + uint32_t qualifier = inst->Memory.Qualifier; + while (qualifier) { + int bit = ffs(qualifier) - 1; + qualifier &= ~(1U << bit); + TXT(", "); + ENM(bit, tgsi_memory_names); + } + if (inst->Memory.Texture) { + TXT( ", " ); + ENM( inst->Memory.Texture, tgsi_texture_names ); + } + if (inst->Memory.Format) { + TXT( ", " ); + TXT( util_format_name(inst->Memory.Format) ); + } + } + + if (inst->Instruction.Label) { + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BGNSUB: + TXT( " :" ); + UID( inst->Label.Label ); + break; + } } /* update indentation */ @@ -642,12 +706,14 @@ tgsi_dump_instruction( uint instno ) { struct dump_ctx ctx; + memset(&ctx, 0, sizeof(ctx)); ctx.instno = instno; ctx.immno = instno; ctx.indent = 0; ctx.dump_printf = dump_ctx_printf; ctx.indentation = 0; + ctx.file = NULL; iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst ); } @@ -662,23 +728,30 @@ prolog( return TRUE; } +static void +init_dump_ctx(struct dump_ctx *ctx, uint flags) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->iter.prolog = prolog; + ctx->iter.iterate_instruction = iter_instruction; + ctx->iter.iterate_declaration = iter_declaration; + ctx->iter.iterate_immediate = iter_immediate; + ctx->iter.iterate_property = iter_property; + + if (flags & TGSI_DUMP_FLOAT_AS_HEX) + ctx->dump_float_as_hex = TRUE; +} + void tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file) { struct dump_ctx ctx; + memset(&ctx, 0, sizeof(ctx)); - ctx.iter.prolog = prolog; - ctx.iter.iterate_instruction = iter_instruction; - ctx.iter.iterate_declaration = iter_declaration; - ctx.iter.iterate_immediate = iter_immediate; - ctx.iter.iterate_property = iter_property; - ctx.iter.epilog = NULL; + init_dump_ctx(&ctx, flags); - ctx.instno = 0; - ctx.immno = 0; - ctx.indent = 0; ctx.dump_printf = dump_ctx_printf; - ctx.indentation = 0; ctx.file = file; tgsi_iterate_shader( tokens, &ctx.iter ); @@ -696,6 +769,7 @@ struct str_dump_ctx char *str; char *ptr; int left; + bool nospace; }; static void @@ -703,7 +777,7 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) { struct str_dump_ctx *sctx = (struct str_dump_ctx *)ctx; - if(sctx->left > 1) { + if (!sctx->nospace) { int written; va_list ap; va_start(ap, format); @@ -714,14 +788,17 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) * vsnprintf: */ if (written > 0) { - written = MIN2(sctx->left, written); + if (written >= sctx->left) { + sctx->nospace = true; + written = sctx->left; + } sctx->ptr += written; sctx->left -= written; } } } -void +bool tgsi_dump_str( const struct tgsi_token *tokens, uint flags, @@ -729,27 +806,21 @@ tgsi_dump_str( size_t size) { struct str_dump_ctx ctx; + memset(&ctx, 0, sizeof(ctx)); - ctx.base.iter.prolog = prolog; - ctx.base.iter.iterate_instruction = iter_instruction; - ctx.base.iter.iterate_declaration = iter_declaration; - ctx.base.iter.iterate_immediate = iter_immediate; - ctx.base.iter.iterate_property = iter_property; - ctx.base.iter.epilog = NULL; + init_dump_ctx(&ctx.base, flags); - ctx.base.instno = 0; - ctx.base.immno = 0; - ctx.base.indent = 0; ctx.base.dump_printf = &str_dump_ctx_printf; - ctx.base.indentation = 0; - ctx.base.file = NULL; ctx.str = str; ctx.str[0] = 0; ctx.ptr = str; ctx.left = (int)size; + ctx.nospace = false; tgsi_iterate_shader( tokens, &ctx.base.iter ); + + return !ctx.nospace; } void @@ -760,6 +831,7 @@ tgsi_dump_instruction_str( size_t size) { struct str_dump_ctx ctx; + memset(&ctx, 0, sizeof(ctx)); ctx.base.instno = instno; ctx.base.immno = instno; @@ -772,6 +844,7 @@ tgsi_dump_instruction_str( ctx.str[0] = 0; ctx.ptr = str; ctx.left = (int)size; + ctx.nospace = false; iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst ); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c index 75cd0d53c..7b5c56d9d 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -58,8 +58,10 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_half.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/rounding.h" #define DEBUG_EXECUTION 0 @@ -75,6 +77,8 @@ union tgsi_double_channel { double d[TGSI_QUAD_SIZE]; unsigned u[TGSI_QUAD_SIZE][2]; + uint64_t u64[TGSI_QUAD_SIZE]; + int64_t i64[TGSI_QUAD_SIZE]; }; struct tgsi_double_vector { @@ -542,10 +546,10 @@ static void micro_rnd(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { - dst->f[0] = floorf(src->f[0] + 0.5f); - dst->f[1] = floorf(src->f[1] + 0.5f); - dst->f[2] = floorf(src->f[2] + 0.5f); - dst->f[3] = floorf(src->f[3] + 0.5f); + dst->f[0] = _mesa_roundevenf(src->f[0]); + dst->f[1] = _mesa_roundevenf(src->f[1]); + dst->f[2] = _mesa_roundevenf(src->f[2]); + dst->f[3] = _mesa_roundevenf(src->f[3]); } static void @@ -674,10 +678,10 @@ static void micro_trunc(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { - dst->f[0] = (float)(int)src->f[0]; - dst->f[1] = (float)(int)src->f[1]; - dst->f[2] = (float)(int)src->f[2]; - dst->f[3] = (float)(int)src->f[3]; + dst->f[0] = truncf(src->f[0]); + dst->f[1] = truncf(src->f[1]); + dst->f[2] = truncf(src->f[2]); + dst->f[3] = truncf(src->f[3]); } static void @@ -690,11 +694,251 @@ micro_u2d(union tgsi_double_channel *dst, dst->d[3] = (double)src->u[3]; } +static void +micro_i64abs(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; + dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; + dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; + dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; +} + +static void +micro_i64sgn(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; + dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; + dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; + dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; +} + +static void +micro_i64neg(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = -src->i64[0]; + dst->i64[1] = -src->i64[1]; + dst->i64[2] = -src->i64[2]; + dst->i64[3] = -src->i64[3]; +} + +static void +micro_u64seq(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_u64sne(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_i64slt(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; +} + +static void +micro_u64slt(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_i64sge(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; +} + +static void +micro_u64sge(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_u64max(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; + dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; + dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; + dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; +} + +static void +micro_i64max(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; + dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; + dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; + dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; +} + +static void +micro_u64min(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; + dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; + dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; + dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; +} + +static void +micro_i64min(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; + dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; + dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; + dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; +} + +static void +micro_u64add(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] + src[1].u64[0]; + dst->u64[1] = src[0].u64[1] + src[1].u64[1]; + dst->u64[2] = src[0].u64[2] + src[1].u64[2]; + dst->u64[3] = src[0].u64[3] + src[1].u64[3]; +} + +static void +micro_u64mul(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] * src[1].u64[0]; + dst->u64[1] = src[0].u64[1] * src[1].u64[1]; + dst->u64[2] = src[0].u64[2] * src[1].u64[2]; + dst->u64[3] = src[0].u64[3] * src[1].u64[3]; +} + +static void +micro_u64div(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] / src[1].u64[0]; + dst->u64[1] = src[0].u64[1] / src[1].u64[1]; + dst->u64[2] = src[0].u64[2] / src[1].u64[2]; + dst->u64[3] = src[0].u64[3] / src[1].u64[3]; +} + +static void +micro_i64div(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] / src[1].i64[0]; + dst->i64[1] = src[0].i64[1] / src[1].i64[1]; + dst->i64[2] = src[0].i64[2] / src[1].i64[2]; + dst->i64[3] = src[0].i64[3] / src[1].i64[3]; +} + +static void +micro_u64mod(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] % src[1].u64[0]; + dst->u64[1] = src[0].u64[1] % src[1].u64[1]; + dst->u64[2] = src[0].u64[2] % src[1].u64[2]; + dst->u64[3] = src[0].u64[3] % src[1].u64[3]; +} + +static void +micro_i64mod(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] % src[1].i64[0]; + dst->i64[1] = src[0].i64[1] % src[1].i64[1]; + dst->i64[2] = src[0].i64[2] % src[1].i64[2]; + dst->i64[3] = src[0].i64[3] % src[1].i64[3]; +} + +static void +micro_u64shl(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->u64[0] = src0->u64[0] << masked_count; + masked_count = src1->u[1] & 0x3f; + dst->u64[1] = src0->u64[1] << masked_count; + masked_count = src1->u[2] & 0x3f; + dst->u64[2] = src0->u64[2] << masked_count; + masked_count = src1->u[3] & 0x3f; + dst->u64[3] = src0->u64[3] << masked_count; +} + +static void +micro_i64shr(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->i64[0] = src0->i64[0] >> masked_count; + masked_count = src1->u[1] & 0x3f; + dst->i64[1] = src0->i64[1] >> masked_count; + masked_count = src1->u[2] & 0x3f; + dst->i64[2] = src0->i64[2] >> masked_count; + masked_count = src1->u[3] & 0x3f; + dst->i64[3] = src0->i64[3] >> masked_count; +} + +static void +micro_u64shr(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->u64[0] = src0->u64[0] >> masked_count; + masked_count = src1->u[1] & 0x3f; + dst->u64[1] = src0->u64[1] >> masked_count; + masked_count = src1->u[2] & 0x3f; + dst->u64[2] = src0->u64[2] >> masked_count; + masked_count = src1->u[3] & 0x3f; + dst->u64[3] = src0->u64[3] >> masked_count; +} + enum tgsi_exec_datatype { TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT, - TGSI_EXEC_DATA_DOUBLE + TGSI_EXEC_DATA_DOUBLE, + TGSI_EXEC_DATA_INT64, + TGSI_EXEC_DATA_UINT64, }; /* @@ -852,7 +1096,9 @@ void tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, - struct tgsi_sampler *sampler) + struct tgsi_sampler *sampler, + struct tgsi_image *image, + struct tgsi_buffer *buffer) { uint k; struct tgsi_parse_context parse; @@ -870,6 +1116,8 @@ tgsi_exec_machine_bind_shader( mach->Tokens = tokens; mach->Sampler = sampler; + mach->Image = image; + mach->Buffer = buffer; if (!tokens) { /* unbind and free all */ @@ -890,11 +1138,13 @@ tgsi_exec_machine_bind_shader( return; } - mach->Processor = parse.FullHeader.Processor.Processor; mach->ImmLimit = 0; mach->NumOutputs = 0; - if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && + for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) + mach->SysSemanticToIndex[k] = -1; + + if (mach->ShaderType == PIPE_SHADER_GEOMETRY && !mach->UsedGeometryShader) { struct tgsi_exec_vector *inputs; struct tgsi_exec_vector *outputs; @@ -960,6 +1210,11 @@ tgsi_exec_machine_bind_shader( ++mach->NumOutputs; } } + else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; + } + memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -1000,7 +1255,7 @@ tgsi_exec_machine_bind_shader( break; case TGSI_TOKEN_TYPE_PROPERTY: - if (mach->Processor == TGSI_PROCESSOR_GEOMETRY) { + if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; } @@ -1024,7 +1279,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine * -tgsi_exec_machine_create( void ) +tgsi_exec_machine_create(enum pipe_shader_type shader_type) { struct tgsi_exec_machine *mach; uint i; @@ -1035,14 +1290,17 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); + mach->ShaderType = shader_type; mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; - mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); - mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); - if (!mach->Inputs || !mach->Outputs) - goto fail; + if (shader_type != PIPE_SHADER_COMPUTE) { + mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); + mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); + if (!mach->Inputs || !mach->Outputs) + goto fail; + } /* Setup constants needed by the SSE2 executor. */ for( i = 0; i < 4; i++ ) { @@ -1255,7 +1513,7 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, case TGSI_FILE_INPUT: for (i = 0; i < TGSI_QUAD_SIZE; i++) { /* - if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], index2D->i[i], index->i[i]); @@ -1272,7 +1530,7 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, * gl_FragCoord, for example, in a sys value register. */ for (i = 0; i < TGSI_QUAD_SIZE; i++) { - chan->u[i] = mach->SystemValue[index->i[i]].u[i]; + chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; } break; @@ -1649,7 +1907,7 @@ store_dest_dstret(struct tgsi_exec_machine *mach, debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], reg->Register.Index); - if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) @@ -1883,7 +2141,7 @@ emit_primitive(struct tgsi_exec_machine *mach) static void conditional_emit_primitive(struct tgsi_exec_machine *mach) { - if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { int emitted_verts = mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; if (emitted_verts) { @@ -1993,12 +2251,12 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, uint sampler) { - uint unit; - + uint unit = 0; + int i; if (inst->Src[sampler].Register.Indirect) { const struct tgsi_full_src_register *reg = &inst->Src[sampler]; union tgsi_exec_channel indir_index, index2; - + const uint execmask = mach->ExecMask; index2.i[0] = index2.i[1] = index2.i[2] = @@ -2011,7 +2269,13 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach, &index2, &ZeroVec, &indir_index); - unit = inst->Src[sampler].Register.Index + indir_index.i[0]; + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + if (execmask & (1 << i)) { + unit = inst->Src[sampler].Register.Index + indir_index.i[i]; + break; + } + } + } else { unit = inst->Src[sampler].Register.Index; } @@ -2021,7 +2285,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach, /* * execute a texture instruction. * - * modifier is used to control the channel routing for the\ + * modifier is used to control the channel routing for the * instruction variants like proj, lod, and texture with lod bias. * sampler indicates which src register the sampler is contained in. */ @@ -2032,7 +2296,7 @@ exec_tex(struct tgsi_exec_machine *mach, { const union tgsi_exec_channel *args[5], *proj = NULL; union tgsi_exec_channel r[5]; - enum tgsi_sampler_control control = tgsi_sampler_lod_none; + enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; uint chan; uint unit; int8_t offsets[3]; @@ -2045,15 +2309,16 @@ exec_tex(struct tgsi_exec_machine *mach, assert(modifier != TEX_MODIFIER_LEVEL_ZERO); assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); - dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref); + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); assert(dim <= 4); if (shadow_ref >= 0) - assert(shadow_ref >= dim && shadow_ref < Elements(args)); + assert(shadow_ref >= dim && shadow_ref < ARRAY_SIZE(args)); /* fetch modifier to the last argument */ if (modifier != TEX_MODIFIER_NONE) { - const int last = Elements(args) - 1; + const int last = ARRAY_SIZE(args) - 1; /* fetch modifier from src0.w or src1.x */ if (sampler == 1) { @@ -2078,14 +2343,14 @@ exec_tex(struct tgsi_exec_machine *mach, args[i] = &ZeroVec; if (modifier == TEX_MODIFIER_EXPLICIT_LOD) - control = tgsi_sampler_lod_explicit; + control = TGSI_SAMPLER_LOD_EXPLICIT; else if (modifier == TEX_MODIFIER_LOD_BIAS) - control = tgsi_sampler_lod_bias; + control = TGSI_SAMPLER_LOD_BIAS; else if (modifier == TEX_MODIFIER_GATHER) - control = tgsi_sampler_gather; + control = TGSI_SAMPLER_GATHER; } else { - for (i = dim; i < Elements(args); i++) + for (i = dim; i < ARRAY_SIZE(args); i++) args[i] = &ZeroVec; } @@ -2132,6 +2397,46 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_lodq(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint unit; + int dim; + int i; + union tgsi_exec_channel coords[4]; + const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; + union tgsi_exec_channel r[2]; + + unit = fetch_sampler_unit(mach, inst, 1); + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + assert(dim <= ARRAY_SIZE(coords)); + /* fetch coordinates */ + for (i = 0; i < dim; i++) { + FETCH(&coords[i], 0, TGSI_CHAN_X + i); + args[i] = &coords[i]; + } + for (i = dim; i < ARRAY_SIZE(coords); i++) { + args[i] = &ZeroVec; + } + mach->Sampler->query_lod(mach->Sampler, unit, unit, + args[0]->f, + args[1]->f, + args[2]->f, + args[3]->f, + TGSI_SAMPLER_LOD_NONE, + r[0].f, + r[1].f); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, + TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, + TGSI_EXEC_DATA_FLOAT); + } +} static void exec_txd(struct tgsi_exec_machine *mach, @@ -2155,7 +2460,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2171,7 +2476,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2185,7 +2490,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2205,7 +2510,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2225,7 +2530,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2259,7 +2564,8 @@ exec_txf(struct tgsi_exec_machine *mach, IFETCH(&r[3], 0, TGSI_CHAN_W); - if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { + if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || + inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { target = mach->SamplerViews[unit].Resource; } else { @@ -2301,7 +2607,8 @@ exec_txf(struct tgsi_exec_machine *mach, r[3].f[j] = rgba[3][j]; } - if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { + if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || + inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { unsigned char swizzles[4]; swizzles[0] = inst->Src[1].Register.SwizzleX; swizzles[1] = inst->Src[1].Register.SwizzleY; @@ -2364,7 +2671,7 @@ exec_sample(struct tgsi_exec_machine *mach, const uint sampler_unit = inst->Src[2].Register.Index; union tgsi_exec_channel r[5], c1; const union tgsi_exec_channel *lod = &ZeroVec; - enum tgsi_sampler_control control = tgsi_sampler_lod_none; + enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; uint chan; unsigned char swizzles[4]; int8_t offsets[3]; @@ -2378,16 +2685,16 @@ exec_sample(struct tgsi_exec_machine *mach, if (modifier == TEX_MODIFIER_LOD_BIAS) { FETCH(&c1, 3, TGSI_CHAN_X); lod = &c1; - control = tgsi_sampler_lod_bias; + control = TGSI_SAMPLER_LOD_BIAS; } else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { FETCH(&c1, 3, TGSI_CHAN_X); lod = &c1; - control = tgsi_sampler_lod_explicit; + control = TGSI_SAMPLER_LOD_EXPLICIT; } else { assert(modifier == TEX_MODIFIER_LEVEL_ZERO); - control = tgsi_sampler_lod_zero; + control = TGSI_SAMPLER_LOD_ZERO; } } @@ -2513,7 +2820,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2529,7 +2836,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2547,7 +2854,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); break; @@ -2645,7 +2952,7 @@ exec_declaration(struct tgsi_exec_machine *mach, return; } - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { if (decl->Declaration.File == TGSI_FILE_INPUT) { uint first, last, mask; @@ -2723,9 +3030,6 @@ exec_declaration(struct tgsi_exec_machine *mach, } } - if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { - mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; - } } typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, @@ -3018,6 +3322,45 @@ exec_dp2(struct tgsi_exec_machine *mach, } static void +exec_pk2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg[2], dst; + + fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst.u[chan] = util_float_to_half(arg[0].f[chan]) | + (util_float_to_half(arg[1].f[chan]) << 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); + } + } +} + +static void +exec_up2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg, dst[2]; + + fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); + dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void exec_scs(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -3295,6 +3638,16 @@ exec_endswitch(struct tgsi_exec_machine *mach) typedef void (* micro_dop)(union tgsi_double_channel *dst, const union tgsi_double_channel *src); +typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1); + +typedef void (* micro_dop_s)(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src); + +typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, + const union tgsi_double_channel *src); + static void fetch_double_channel(struct tgsi_exec_machine *mach, union tgsi_double_channel *chan, @@ -3447,177 +3800,805 @@ exec_double_trinary(struct tgsi_exec_machine *mach, } static void -exec_f2d(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_dldexp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_exec_channel src; + union tgsi_double_channel src0; + union tgsi_exec_channel src1; union tgsi_double_channel dst; + int wmask; - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_f2d(&dst, &src); + wmask = inst->Dst[0].Register.WriteMask; + if (wmask & TGSI_WRITEMASK_XY) { + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); + micro_dldexp(&dst, &src0, &src1); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); } - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_f2d(&dst, &src); + + if (wmask & TGSI_WRITEMASK_ZW) { + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); + micro_dldexp(&dst, &src0, &src1); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); } } static void -exec_d2f(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_dfracexp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { union tgsi_double_channel src; - union tgsi_exec_channel dst; - int wm = inst->Dst[0].Register.WriteMask; - int i; - int bit; - for (i = 0; i < 2; i++) { - bit = ffs(wm); - if (bit) { - wm &= ~(1 << (bit - 1)); - if (i == 0) - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - else - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_d2f(&dst, &src); - store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_FLOAT); - } + union tgsi_double_channel dst; + union tgsi_exec_channel dst_exp; + + if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) { + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + micro_dfracexp(&dst, &dst_exp, &src); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); + store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); + } + if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) { + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + micro_dfracexp(&dst, &dst_exp, &src); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); + store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); } } static void -exec_i2d(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_dop_sop op) { - union tgsi_exec_channel src; + union tgsi_double_channel src0; + union tgsi_exec_channel src1; union tgsi_double_channel dst; + int wmask; - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); - micro_i2d(&dst, &src); + wmask = inst->Dst[0].Register.WriteMask; + if (wmask & TGSI_WRITEMASK_XY) { + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); + op(&dst, &src0, &src1); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); } - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_INT); - micro_i2d(&dst, &src); + + if (wmask & TGSI_WRITEMASK_ZW) { + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); + op(&dst, &src0, &src1); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); } } +static int +get_image_coord_dim(unsigned tgsi_tex) +{ + int dim; + switch (tgsi_tex) { + case TGSI_TEXTURE_BUFFER: + case TGSI_TEXTURE_1D: + dim = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_2D_MSAA: + dim = 2; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_2D_ARRAY_MSAA: + case TGSI_TEXTURE_CUBE_ARRAY: + dim = 3; + break; + default: + assert(!"unknown texture target"); + dim = 0; + break; + } + + return dim; +} + +static int +get_image_coord_sample(unsigned tgsi_tex) +{ + int sample = 0; + switch (tgsi_tex) { + case TGSI_TEXTURE_2D_MSAA: + sample = 3; + break; + case TGSI_TEXTURE_2D_ARRAY_MSAA: + sample = 4; + break; + default: + break; + } + return sample; +} + static void -exec_d2i(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_load_img(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_double_channel src; - union tgsi_exec_channel dst; - int wm = inst->Dst[0].Register.WriteMask; - int i; - int bit; - for (i = 0; i < 2; i++) { - bit = ffs(wm); - if (bit) { - wm &= ~(1 << (bit - 1)); - if (i == 0) - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - else - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_d2i(&dst, &src); - store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_INT); + union tgsi_exec_channel r[4], sample_r; + uint unit; + int sample; + int i, j; + int dim; + uint chan; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_image_params params; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + dim = get_image_coord_dim(inst->Memory.Texture); + sample = get_image_coord_sample(inst->Memory.Texture); + assert(dim <= 3); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.tgsi_tex_instr = inst->Memory.Texture; + params.format = inst->Memory.Format; + + for (i = 0; i < dim; i++) { + IFETCH(&r[i], 1, TGSI_CHAN_X + i); + } + + if (sample) + IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); + + mach->Image->load(mach->Image, ¶ms, + r[0].i, r[1].i, r[2].i, sample_r.i, + rgba); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); } } } + static void -exec_u2d(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_load_buf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_exec_channel src; - union tgsi_double_channel dst; + union tgsi_exec_channel r[4]; + uint unit; + int j; + uint chan; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_buffer_params params; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); - micro_u2d(&dst, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); + unit = fetch_sampler_unit(mach, inst, 0); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + IFETCH(&r[0], 1, TGSI_CHAN_X); + + mach->Buffer->load(mach->Buffer, ¶ms, + r[0].i, rgba); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; } - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_UINT); - micro_u2d(&dst, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } } } static void -exec_d2u(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_load_mem(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_double_channel src; - union tgsi_exec_channel dst; - int wm = inst->Dst[0].Register.WriteMask; - int i; - int bit; - for (i = 0; i < 2; i++) { - bit = ffs(wm); - if (bit) { - wm &= ~(1 << (bit - 1)); - if (i == 0) - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - else - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_d2u(&dst, &src); - store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_UINT); + union tgsi_exec_channel r[4]; + uint chan; + char *ptr = mach->LocalMem; + uint32_t offset; + int j; + + IFETCH(&r[0], 1, TGSI_CHAN_X); + if (r[0].u[0] >= mach->LocalMemSize) + return; + + offset = r[0].u[0]; + ptr += offset; + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + memcpy(&r[chan].u[j], ptr + (4 * chan), 4); + } + } + } + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); } } } static void -exec_dldexp(struct tgsi_exec_machine *mach, +exec_load(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) + exec_load_img(mach, inst); + else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) + exec_load_buf(mach, inst); + else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) + exec_load_mem(mach, inst); +} + +static void +exec_store_img(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3], sample_r; + union tgsi_exec_channel value[4]; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_image_params params; + int dim; + int sample; + int i, j; + uint unit; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + unit = inst->Dst[0].Register.Index; + dim = get_image_coord_dim(inst->Memory.Texture); + sample = get_image_coord_sample(inst->Memory.Texture); + assert(dim <= 3); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.tgsi_tex_instr = inst->Memory.Texture; + params.format = inst->Memory.Format; + + for (i = 0; i < dim; i++) { + IFETCH(&r[i], 0, TGSI_CHAN_X + i); + } + + for (i = 0; i < 4; i++) { + FETCH(&value[i], 1, TGSI_CHAN_X + i); + } + if (sample) + IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = value[0].f[j]; + rgba[1][j] = value[1].f[j]; + rgba[2][j] = value[2].f[j]; + rgba[3][j] = value[3].f[j]; + } + + mach->Image->store(mach->Image, ¶ms, + r[0].i, r[1].i, r[2].i, sample_r.i, + rgba); +} + +static void +exec_store_buf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + union tgsi_exec_channel value[4]; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_buffer_params params; + int i, j; + uint unit; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = inst->Dst[0].Register.Index; + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.writemask = inst->Dst[0].Register.WriteMask; + + IFETCH(&r[0], 0, TGSI_CHAN_X); + for (i = 0; i < 4; i++) { + FETCH(&value[i], 1, TGSI_CHAN_X + i); + } + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = value[0].f[j]; + rgba[1][j] = value[1].f[j]; + rgba[2][j] = value[2].f[j]; + rgba[3][j] = value[3].f[j]; + } + + mach->Buffer->store(mach->Buffer, ¶ms, + r[0].i, + rgba); +} + +static void +exec_store_mem(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + union tgsi_exec_channel value[4]; + uint i, chan; + char *ptr = mach->LocalMem; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + + IFETCH(&r[0], 0, TGSI_CHAN_X); + + for (i = 0; i < 4; i++) { + FETCH(&value[i], 1, TGSI_CHAN_X + i); + } + + if (r[0].u[0] >= mach->LocalMemSize) + return; + ptr += r[0].u[0]; + + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + if (execmask & (1 << i)) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + memcpy(ptr + (chan * 4), &value[chan].u[0], 4); + } + } + } + } +} + +static void +exec_store(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) + exec_store_img(mach, inst); + else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) + exec_store_buf(mach, inst); + else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) + exec_store_mem(mach, inst); +} + +static void +exec_atomop_img(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4], sample_r; + union tgsi_exec_channel value[4], value2[4]; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_image_params params; + int dim; + int sample; + int i, j; + uint unit, chan; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + unit = fetch_sampler_unit(mach, inst, 0); + dim = get_image_coord_dim(inst->Memory.Texture); + sample = get_image_coord_sample(inst->Memory.Texture); + assert(dim <= 3); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.tgsi_tex_instr = inst->Memory.Texture; + params.format = inst->Memory.Format; + + for (i = 0; i < dim; i++) { + IFETCH(&r[i], 1, TGSI_CHAN_X + i); + } + + for (i = 0; i < 4; i++) { + FETCH(&value[i], 2, TGSI_CHAN_X + i); + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + FETCH(&value2[i], 3, TGSI_CHAN_X + i); + } + if (sample) + IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = value[0].f[j]; + rgba[1][j] = value[1].f[j]; + rgba[2][j] = value[2].f[j]; + rgba[3][j] = value[3].f[j]; + } + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba2[0][j] = value2[0].f[j]; + rgba2[1][j] = value2[1].f[j]; + rgba2[2][j] = value2[2].f[j]; + rgba2[3][j] = value2[3].f[j]; + } + } + + mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, + r[0].i, r[1].i, r[2].i, sample_r.i, + rgba, rgba2); + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_atomop_buf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4]; + union tgsi_exec_channel value[4], value2[4]; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_buffer_params params; + int i, j; + uint unit, chan; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.writemask = inst->Dst[0].Register.WriteMask; + + IFETCH(&r[0], 1, TGSI_CHAN_X); + + for (i = 0; i < 4; i++) { + FETCH(&value[i], 2, TGSI_CHAN_X + i); + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + FETCH(&value2[i], 3, TGSI_CHAN_X + i); + } + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = value[0].f[j]; + rgba[1][j] = value[1].f[j]; + rgba[2][j] = value[2].f[j]; + rgba[3][j] = value[3].f[j]; + } + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba2[0][j] = value2[0].f[j]; + rgba2[1][j] = value2[1].f[j]; + rgba2[2][j] = value2[2].f[j]; + rgba2[3][j] = value2[3].f[j]; + } + } + + mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode, + r[0].i, + rgba, rgba2); + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_atomop_mem(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4]; + union tgsi_exec_channel value[4], value2[4]; + char *ptr = mach->LocalMem; + uint32_t val; + uint chan, i; + uint32_t offset; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + IFETCH(&r[0], 1, TGSI_CHAN_X); + + if (r[0].u[0] >= mach->LocalMemSize) + return; + + offset = r[0].u[0]; + ptr += offset; + for (i = 0; i < 4; i++) { + FETCH(&value[i], 2, TGSI_CHAN_X + i); + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + FETCH(&value2[i], 3, TGSI_CHAN_X + i); + } + + memcpy(&r[0].u[0], ptr, 4); + val = r[0].u[0]; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ATOMUADD: + val += value[0].u[0]; + break; + case TGSI_OPCODE_ATOMXOR: + val ^= value[0].u[0]; + break; + case TGSI_OPCODE_ATOMOR: + val |= value[0].u[0]; + break; + case TGSI_OPCODE_ATOMAND: + val &= value[0].u[0]; + break; + case TGSI_OPCODE_ATOMUMIN: + val = MIN2(val, value[0].u[0]); + break; + case TGSI_OPCODE_ATOMUMAX: + val = MAX2(val, value[0].u[0]); + break; + case TGSI_OPCODE_ATOMIMIN: + val = MIN2(r[0].i[0], value[0].i[0]); + break; + case TGSI_OPCODE_ATOMIMAX: + val = MAX2(r[0].i[0], value[0].i[0]); + break; + case TGSI_OPCODE_ATOMXCHG: + val = value[0].i[0]; + break; + case TGSI_OPCODE_ATOMCAS: + if (val == value[0].u[0]) + val = value2[0].u[0]; + break; + default: + break; + } + for (i = 0; i < TGSI_QUAD_SIZE; i++) + if (execmask & (1 << i)) + memcpy(ptr, &val, 4); + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_atomop(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - union tgsi_double_channel src0; - union tgsi_exec_channel src1; - union tgsi_double_channel dst; - int wmask; + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) + exec_atomop_img(mach, inst); + else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) + exec_atomop_buf(mach, inst); + else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) + exec_atomop_mem(mach, inst); +} - wmask = inst->Dst[0].Register.WriteMask; - if (wmask & TGSI_WRITEMASK_XY) { - fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); - micro_dldexp(&dst, &src0, &src1); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); +static void +exec_resq_img(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + int result[4]; + union tgsi_exec_channel r[4]; + uint unit; + int i, chan, j; + struct tgsi_image_params params; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.tgsi_tex_instr = inst->Memory.Texture; + params.format = inst->Memory.Format; + + mach->Image->get_dims(mach->Image, ¶ms, result); + + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + for (j = 0; j < 4; j++) { + r[j].i[i] = result[j]; + } } - if (wmask & TGSI_WRITEMASK_ZW) { - fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); - micro_dldexp(&dst, &src0, &src1); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, + TGSI_EXEC_DATA_INT); + } } } static void -exec_dfracexp(struct tgsi_exec_machine *mach, +exec_resq_buf(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - union tgsi_double_channel src; + int result; + union tgsi_exec_channel r[4]; + uint unit; + int i, chan; + struct tgsi_buffer_params params; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + + mach->Buffer->get_dims(mach->Buffer, ¶ms, &result); + + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + r[0].i[i] = result; + } + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, + TGSI_EXEC_DATA_INT); + } + } +} + +static void +exec_resq(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) + exec_resq_img(mach, inst); + else + exec_resq_buf(mach, inst); +} + +static void +micro_f2u64(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u64[0] = (uint64_t)src->f[0]; + dst->u64[1] = (uint64_t)src->f[1]; + dst->u64[2] = (uint64_t)src->f[2]; + dst->u64[3] = (uint64_t)src->f[3]; +} + +static void +micro_f2i64(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i64[0] = (int64_t)src->f[0]; + dst->i64[1] = (int64_t)src->f[1]; + dst->i64[2] = (int64_t)src->f[2]; + dst->i64[3] = (int64_t)src->f[3]; +} + +static void +micro_u2i64(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u64[0] = (uint64_t)src->u[0]; + dst->u64[1] = (uint64_t)src->u[1]; + dst->u64[2] = (uint64_t)src->u[2]; + dst->u64[3] = (uint64_t)src->u[3]; +} + +static void +micro_i2i64(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i64[0] = (int64_t)src->i[0]; + dst->i64[1] = (int64_t)src->i[1]; + dst->i64[2] = (int64_t)src->i[2]; + dst->i64[3] = (int64_t)src->i[3]; +} + +static void +micro_d2u64(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = (uint64_t)src->d[0]; + dst->u64[1] = (uint64_t)src->d[1]; + dst->u64[2] = (uint64_t)src->d[2]; + dst->u64[3] = (uint64_t)src->d[3]; +} + +static void +micro_d2i64(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = (int64_t)src->d[0]; + dst->i64[1] = (int64_t)src->d[1]; + dst->i64[2] = (int64_t)src->d[2]; + dst->i64[3] = (int64_t)src->d[3]; +} + +static void +micro_u642d(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->d[0] = (double)src->u64[0]; + dst->d[1] = (double)src->u64[1]; + dst->d[2] = (double)src->u64[2]; + dst->d[3] = (double)src->u64[3]; +} + +static void +micro_i642d(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->d[0] = (double)src->i64[0]; + dst->d[1] = (double)src->i64[1]; + dst->d[2] = (double)src->i64[2]; + dst->d[3] = (double)src->i64[3]; +} + +static void +micro_u642f(union tgsi_exec_channel *dst, + const union tgsi_double_channel *src) +{ + dst->f[0] = (float)src->u64[0]; + dst->f[1] = (float)src->u64[1]; + dst->f[2] = (float)src->u64[2]; + dst->f[3] = (float)src->u64[3]; +} + +static void +micro_i642f(union tgsi_exec_channel *dst, + const union tgsi_double_channel *src) +{ + dst->f[0] = (float)src->i64[0]; + dst->f[1] = (float)src->i64[1]; + dst->f[2] = (float)src->i64[2]; + dst->f[3] = (float)src->i64[3]; +} + +static void +exec_t_2_64(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_dop_s op, + enum tgsi_exec_datatype src_datatype) +{ + union tgsi_exec_channel src; union tgsi_double_channel dst; - union tgsi_exec_channel dst_exp; - if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) { - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - micro_dfracexp(&dst, &dst_exp, &src); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); + op(&dst, &src); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); - store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); } - if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) { - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_dfracexp(&dst, &dst_exp, &src); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); + op(&dst, &src); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); - store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); } } +static void +exec_64_2_t(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_sop_d op, + enum tgsi_exec_datatype dst_datatype) +{ + union tgsi_double_channel src; + union tgsi_exec_channel dst; + int wm = inst->Dst[0].Register.WriteMask; + int i; + int bit; + for (i = 0; i < 2; i++) { + bit = ffs(wm); + if (bit) { + wm &= ~(1 << (bit - 1)); + if (i == 0) + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + else + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + op(&dst, &src); + store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype); + } + } +} static void micro_i2f(union tgsi_exec_channel *dst, @@ -4143,7 +5124,12 @@ micro_umsb(union tgsi_exec_channel *dst, dst->i[3] = util_last_bit(src->u[3]) - 1; } -static void +/** + * Execute a TGSI instruction. + * Returns TRUE if a barrier instruction is hit, + * otherwise FALSE. + */ +static boolean exec_instruction( struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, @@ -4299,7 +5285,7 @@ exec_instruction( break; case TGSI_OPCODE_PK2H: - assert (0); + exec_pk2h(mach, inst); break; case TGSI_OPCODE_PK2US: @@ -4378,8 +5364,14 @@ exec_instruction( exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); break; + case TGSI_OPCODE_LODQ: + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_lodq(mach, inst); + break; + case TGSI_OPCODE_UP2H: - assert (0); + exec_up2h(mach, inst); break; case TGSI_OPCODE_UP2US: @@ -4455,8 +5447,12 @@ exec_instruction( /* returning from main() */ mach->CondStackTop = 0; mach->LoopStackTop = 0; + mach->ContStackTop = 0; + mach->LoopLabelStackTop = 0; + mach->SwitchStackTop = 0; + mach->BreakStackTop = 0; *pc = -1; - return; + return FALSE; } assert(mach->CallStackTop > 0); @@ -4881,7 +5877,7 @@ exec_instruction( break; case TGSI_OPCODE_SAMPLE_I_MS: - assert(0); + exec_txf(mach, inst); break; case TGSI_OPCODE_SAMPLE: @@ -4988,11 +5984,11 @@ exec_instruction( break; case TGSI_OPCODE_F2D: - exec_f2d(mach, inst); + exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_D2F: - exec_d2f(mach, inst); + exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DABS: @@ -5064,47 +6060,184 @@ exec_instruction( break; case TGSI_OPCODE_I2D: - exec_i2d(mach, inst); + exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_D2I: - exec_d2i(mach, inst); + exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_U2D: - exec_u2d(mach, inst); + exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_D2U: - exec_d2u(mach, inst); + exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_LOAD: + exec_load(mach, inst); + break; + + case TGSI_OPCODE_STORE: + exec_store(mach, inst); + break; + + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + exec_atomop(mach, inst); + break; + + case TGSI_OPCODE_RESQ: + exec_resq(mach, inst); + break; + case TGSI_OPCODE_BARRIER: + case TGSI_OPCODE_MEMBAR: + return TRUE; + break; + + case TGSI_OPCODE_I64ABS: + exec_double_unary(mach, inst, micro_i64abs); + break; + + case TGSI_OPCODE_I64SSG: + exec_double_unary(mach, inst, micro_i64sgn); + break; + + case TGSI_OPCODE_I64NEG: + exec_double_unary(mach, inst, micro_i64neg); + break; + + case TGSI_OPCODE_U64SEQ: + exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_U64SNE: + exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_I64SLT: + exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_U64SLT: + exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_I64SGE: + exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_U64SGE: + exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_I64MIN: + exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); + break; + case TGSI_OPCODE_U64MIN: + exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_I64MAX: + exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); + break; + case TGSI_OPCODE_U64MAX: + exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_U64ADD: + exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_U64MUL: + exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_U64SHL: + exec_arg0_64_arg1_32(mach, inst, micro_u64shl); + break; + case TGSI_OPCODE_I64SHR: + exec_arg0_64_arg1_32(mach, inst, micro_i64shr); + break; + case TGSI_OPCODE_U64SHR: + exec_arg0_64_arg1_32(mach, inst, micro_u64shr); + break; + case TGSI_OPCODE_U64DIV: + exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_I64DIV: + exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); + break; + case TGSI_OPCODE_U64MOD: + exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_I64MOD: + exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); + break; + + case TGSI_OPCODE_F2U64: + exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_F2I64: + exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2I64: + exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); + break; + case TGSI_OPCODE_I2I64: + exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_D2U64: + exec_double_unary(mach, inst, micro_d2u64); + break; + + case TGSI_OPCODE_D2I64: + exec_double_unary(mach, inst, micro_d2i64); + break; + + case TGSI_OPCODE_U642F: + exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT); break; + case TGSI_OPCODE_I642F: + exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U642D: + exec_double_unary(mach, inst, micro_u642d); + break; + case TGSI_OPCODE_I642D: + exec_double_unary(mach, inst, micro_i642d); + break; + default: assert( 0 ); } + return FALSE; } - -/** - * Run TGSI interpreter. - * \return bitmask of "alive" quad components - */ -uint -tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) +static void +tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) { - uint i; - int pc = 0; uint default_mask = 0xf; mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; - if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; mach->Primitives[0] = 0; /* GS runs on a single primitive for now */ default_mask = 0x1; } + if (mach->NonHelperMask == 0) + mach->NonHelperMask = default_mask; mach->CondMask = default_mask; mach->LoopMask = default_mask; mach->ContMask = default_mask; @@ -5119,11 +6252,26 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) assert(mach->SwitchStackTop == 0); assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); +} + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) +{ + uint i; + mach->pc = start_pc; - /* execute declarations (interpolants) */ - for (i = 0; i < mach->NumDeclarations; i++) { - exec_declaration( mach, mach->Declarations+i ); + if (!start_pc) { + tgsi_exec_machine_setup_masks(mach); + + /* execute declarations (interpolants) */ + for (i = 0; i < mach->NumDeclarations; i++) { + exec_declaration( mach, mach->Declarations+i ); + } } { @@ -5132,23 +6280,30 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; uint inst = 1; - memset(mach->Temps, 0, sizeof(temps)); - memset(mach->Outputs, 0, sizeof(outputs)); - memset(temps, 0, sizeof(temps)); - memset(outputs, 0, sizeof(outputs)); + if (!start_pc) { + memset(mach->Temps, 0, sizeof(temps)); + if (mach->Outputs) + memset(mach->Outputs, 0, sizeof(outputs)); + memset(temps, 0, sizeof(temps)); + memset(outputs, 0, sizeof(outputs)); + } #endif /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - + while (mach->pc != -1) { + boolean barrier_hit; #if DEBUG_EXECUTION uint i; - tgsi_dump_instruction(&mach->Instructions[pc], inst++); + tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); #endif - assert(pc < (int) mach->NumInstructions); - exec_instruction(mach, mach->Instructions + pc, &pc); + assert(mach->pc < (int) mach->NumInstructions); + barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); + + /* for compute shaders if we hit a barrier return now for later rescheduling */ + if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) + return 0; #if DEBUG_EXECUTION for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { @@ -5169,21 +6324,23 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) } } } - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { - uint j; - - memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); - debug_printf("OUT[%2u] = ", i); - for (j = 0; j < 4; j++) { - if (j > 0) { - debug_printf(" "); + if (mach->Outputs) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { + uint j; + + memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); + debug_printf("OUT[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } - debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", - outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], - outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], - outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], - outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } @@ -5193,7 +6350,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) #if 0 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { /* * Scale back depth component. */ diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h index db5c56b6b..9343d788d 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -88,13 +88,84 @@ struct tgsi_interp_coef float dady[TGSI_NUM_CHANNELS]; }; -enum tgsi_sampler_control { - tgsi_sampler_lod_none, - tgsi_sampler_lod_bias, - tgsi_sampler_lod_explicit, - tgsi_sampler_lod_zero, - tgsi_sampler_derivs_explicit, - tgsi_sampler_gather, +enum tgsi_sampler_control +{ + TGSI_SAMPLER_LOD_NONE, + TGSI_SAMPLER_LOD_BIAS, + TGSI_SAMPLER_LOD_EXPLICIT, + TGSI_SAMPLER_LOD_ZERO, + TGSI_SAMPLER_DERIVS_EXPLICIT, + TGSI_SAMPLER_GATHER, +}; + +struct tgsi_image_params { + unsigned unit; + unsigned tgsi_tex_instr; + enum pipe_format format; + unsigned execmask; +}; + +struct tgsi_image { + /* image interfaces */ + void (*load)(const struct tgsi_image *image, + const struct tgsi_image_params *params, + const int s[TGSI_QUAD_SIZE], + const int t[TGSI_QUAD_SIZE], + const int r[TGSI_QUAD_SIZE], + const int sample[TGSI_QUAD_SIZE], + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + + void (*store)(const struct tgsi_image *image, + const struct tgsi_image_params *params, + const int s[TGSI_QUAD_SIZE], + const int t[TGSI_QUAD_SIZE], + const int r[TGSI_QUAD_SIZE], + const int sample[TGSI_QUAD_SIZE], + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + + void (*op)(const struct tgsi_image *image, + const struct tgsi_image_params *params, + unsigned opcode, + const int s[TGSI_QUAD_SIZE], + const int t[TGSI_QUAD_SIZE], + const int r[TGSI_QUAD_SIZE], + const int sample[TGSI_QUAD_SIZE], + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], + float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + + void (*get_dims)(const struct tgsi_image *image, + const struct tgsi_image_params *params, + int dims[4]); +}; + +struct tgsi_buffer_params { + unsigned unit; + unsigned execmask; + unsigned writemask; +}; + +struct tgsi_buffer { + /* buffer interfaces */ + void (*load)(const struct tgsi_buffer *buffer, + const struct tgsi_buffer_params *params, + const int s[TGSI_QUAD_SIZE], + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + + void (*store)(const struct tgsi_buffer *buffer, + const struct tgsi_buffer_params *params, + const int s[TGSI_QUAD_SIZE], + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + + void (*op)(const struct tgsi_buffer *buffer, + const struct tgsi_buffer_params *params, + unsigned opcode, + const int s[TGSI_QUAD_SIZE], + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], + float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + + void (*get_dims)(const struct tgsi_buffer *buffer, + const struct tgsi_buffer_params *params, + int *dim); }; /** @@ -138,6 +209,16 @@ struct tgsi_sampler const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE], const int lod[TGSI_QUAD_SIZE], const int8_t offset[3], float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + void (*query_lod)(const struct tgsi_sampler *tgsi_sampler, + const unsigned sview_index, + const unsigned sampler_index, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + const enum tgsi_sampler_control control, + float mipmap[TGSI_QUAD_SIZE], + float lod[TGSI_QUAD_SIZE]); }; #define TGSI_EXEC_NUM_TEMPS 4096 @@ -185,21 +266,18 @@ struct tgsi_sampler #define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) #define TGSI_EXEC_TEMP_HALF_C 0 -/* execution mask, each value is either 0 or ~0 */ -#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_MASK_C 1 - /* 4 register buffer for various purposes */ #define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) #define TGSI_EXEC_NUM_TEMP_R 4 #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) +#define TGSI_EXEC_NUM_ADDRS 3 /* predicate register */ -#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9) +#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 11) #define TGSI_EXEC_NUM_PREDS 1 -#define TGSI_EXEC_NUM_TEMP_EXTRAS 10 +#define TGSI_EXEC_NUM_TEMP_EXTRAS 12 @@ -278,20 +356,22 @@ struct tgsi_exec_machine /* System values */ unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT]; - union tgsi_exec_channel SystemValue[TGSI_MAX_MISC_INPUTS]; + struct tgsi_exec_vector SystemValue[TGSI_MAX_MISC_INPUTS]; struct tgsi_exec_vector *Addrs; struct tgsi_exec_vector *Predicates; struct tgsi_sampler *Sampler; + struct tgsi_image *Image; + struct tgsi_buffer *Buffer; unsigned ImmLimit; const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS]; const struct tgsi_token *Tokens; /**< Declarations, instructions */ - unsigned Processor; /**< TGSI_PROCESSOR_x */ + enum pipe_shader_type ShaderType; /**< PIPE_SHADER_x */ /* GEOMETRY processor only. */ unsigned *Primitives; @@ -304,6 +384,13 @@ struct tgsi_exec_machine struct tgsi_exec_vector QuadPos; float Face; /**< +1 if front facing, -1 if back facing */ bool flatshade_color; + + /* Compute Only */ + void *LocalMem; + unsigned LocalMemSize; + + /* See GLSL 4.50 specification for definition of helper invocations */ + uint NonHelperMask; /**< non-helpers */ /* Conditional execution masks */ uint CondMask; /**< For IF/ELSE/ENDIF */ uint LoopMask; /**< For BGNLOOP/ENDLOOP */ @@ -358,10 +445,12 @@ struct tgsi_exec_machine SamplerViews[PIPE_MAX_SHADER_SAMPLER_VIEWS]; boolean UsedGeometryShader; + + int pc; }; struct tgsi_exec_machine * -tgsi_exec_machine_create( void ); +tgsi_exec_machine_create(enum pipe_shader_type shader_type); void tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach); @@ -371,11 +460,13 @@ void tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, - struct tgsi_sampler *sampler); + struct tgsi_sampler *sampler, + struct tgsi_image *image, + struct tgsi_buffer *buffer); uint tgsi_exec_machine_run( - struct tgsi_exec_machine *mach ); + struct tgsi_exec_machine *mach, int start_pc ); void @@ -386,27 +477,6 @@ boolean tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst); -static inline void -tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask) -{ - mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = - mask; -} - - -/** Set execution mask values prior to executing the shader */ -static inline void -tgsi_set_exec_mask(struct tgsi_exec_machine *mach, - boolean ch0, boolean ch1, boolean ch2, boolean ch3) -{ - int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i; - mask[0] = ch0 ? ~0 : 0; - mask[1] = ch1 ? ~0 : 0; - mask[2] = ch2 ? ~0 : 0; - mask[3] = ch3 ? ~0 : 0; -} - - extern void tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, unsigned num_bufs, @@ -454,6 +524,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return PIPE_MAX_SHADER_SAMPLER_VIEWS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 1 << PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; case PIPE_SHADER_CAP_DOUBLES: @@ -463,6 +535,11 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return PIPE_MAX_SHADER_BUFFERS; + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + return PIPE_MAX_SHADER_IMAGES; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c index fb29ea0d5..37549aae7 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -37,231 +37,256 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, - { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, - { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, - { 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ - { 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */ - { 1, 1, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, - { 1, 3, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP }, - { 1, 1, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, - { 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */ - { 1, 1, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS }, - { 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */ - { 1, 2, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, - { 1, 1, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, - { 1, 1, 0, 0, 0, 0, COMP, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, 0, 0, COMP, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, 0, 0, COMP, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, 0, 0, COMP, "PK4UB", TGSI_OPCODE_PK4UB }, - { 0, 1, 0, 0, 0, 1, NONE, "", 44 }, /* removed */ - { 1, 2, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 0, 1, 0, 0, 0, 1, NONE, "", 46 }, /* removed */ - { 1, 2, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 0, 1, 0, 0, 0, 1, NONE, "", 51 }, /* removed */ - { 1, 2, 1, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, 0, 0, COMP, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, 0, 0, COMP, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, 0, 0, COMP, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, 0, 0, COMP, "UP4UB", TGSI_OPCODE_UP4UB }, - { 0, 1, 0, 0, 0, 1, NONE, "", 59 }, /* removed */ - { 0, 1, 0, 0, 0, 1, NONE, "", 60 }, /* removed */ - { 1, 1, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, - { 0, 1, 0, 0, 0, 1, NONE, "", 62 }, /* removed */ - { 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, - { 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, - { 1, 1, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, - { 1, 3, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, - { 1, 1, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, - { 1, 2, 1, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, - { 0, 1, 0, 0, 0, 1, NONE, "", 69 }, /* removed */ - { 1, 2, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, - { 1, 2, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, - { 1, 2, 1, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, - { 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK }, - { 0, 1, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF }, - { 0, 1, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF }, - { 0, 1, 0, 0, 0, 1, NONE, "", 76 }, /* removed */ - { 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, - { 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 1, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, - { 1, 1, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, - { 0, 1, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, - { 1, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, - { 1, 1, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, - { 1, 1, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F }, - { 1, 1, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT }, - { 1, 1, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC }, - { 1, 2, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL }, - { 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */ - { 1, 2, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND }, - { 1, 2, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR }, - { 1, 2, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD }, - { 1, 2, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR }, - { 1, 3, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD }, - { 1, 2, 1, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, - { 1, 2, 1, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, - { 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, - { 0, 1, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, - { 0, 1, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, - { 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, - { 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, - { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, - { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ }, - { 0, 0, 0, 0, 0, 0, NONE, "", 104 }, /* removed */ - { 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */ - { 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */ - { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP }, - { 1, 2, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ }, - { 1, 2, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, - { 1, 2, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, - { 1, 2, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, - { 0, 1, 0, 0, 0, 1, NONE, "", 112 }, /* removed */ - { 0, 1, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, - { 0, 1, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ - { 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, - { 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, - { 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END }, - { 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA }, - { 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I }, - { 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV }, - { 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX }, - { 1, 2, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN }, - { 1, 1, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG }, - { 1, 2, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE }, - { 1, 2, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR }, - { 1, 2, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT }, - { 1, 1, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U }, - { 1, 1, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F }, - { 1, 2, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD }, - { 1, 2, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV }, - { 1, 3, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD }, - { 1, 2, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX }, - { 1, 2, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN }, - { 1, 2, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD }, - { 1, 2, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL }, - { 1, 2, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ }, - { 1, 2, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE }, - { 1, 2, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR }, - { 1, 2, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT }, - { 1, 2, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE }, - { 0, 1, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH }, - { 0, 1, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE }, - { 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT }, - { 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, + { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, + { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, + { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, + { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, + { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, + { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, + { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, + { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, + { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, + { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D }, + { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, + { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, + { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, + { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, + { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, + { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, + { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB }, + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F }, + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D }, + { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, + { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, + { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, + { 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 }, /* removed */ + { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, + { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, + { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK }, + { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF }, + { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF }, + { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */ + { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, + { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, + { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, + { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */ + { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD }, + { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, + { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, + { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, + { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, + { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, + { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, + { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, + { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB }, + { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ }, + { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS }, + { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */ + { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, + { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR }, + { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, + { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ + { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, + { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE }, + { 0, 1, 0, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH }, + { 0, 1, 0, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT }, + { 0, 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }, - { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE }, - { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I }, - { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS }, - { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B }, - { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C }, - { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ }, - { 1, 5, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D }, - { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L }, - { 1, 3, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 }, - { 1, 2, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO }, - { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS }, - { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO }, - { 1, 1, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL }, - { 1, 3, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP }, - { 1, 1, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS }, - { 1, 1, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG }, - { 1, 2, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD }, - { 1, 2, 0, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE }, - { 1, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE }, - { 1, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE }, - { 1, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE }, - { 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER }, + { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE }, + { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I }, + { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS }, + { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B }, + { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C }, + { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ }, + { 1, 5, 0, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D }, + { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L }, + { 1, 3, 0, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 }, + { 1, 2, 0, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO }, + { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS }, + { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG }, + { 1, 2, 0, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD }, + { 1, 2, 0, 1, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE }, + { 1, 0, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE }, + { 1, 0, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE }, + { 1, 0, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE }, + { 0, 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG }, - { 1, 4, 0, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN }, - { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX }, - { 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 }, - { 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 }, - { 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, - { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, - { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, - { 1, 3, 1, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 }, - { 1, 2, 1, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ }, - { 1, 3, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE }, - { 1, 3, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE }, - { 1, 4, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI }, - { 1, 1, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV }, - { 1, 1, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC }, - { 1, 1, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB }, - { 1, 1, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB }, - { 1, 1, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB }, - { 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID }, - { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE }, - { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET }, - { 1, 1, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D }, - { 1, 1, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F }, - { 1, 1, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS }, - { 1, 1, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG }, - { 1, 2, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD }, - { 1, 2, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL }, - { 1, 2, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX }, - { 1, 2, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN }, - { 1, 2, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT }, - { 1, 2, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE }, - { 1, 2, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ }, - { 1, 2, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE }, - { 1, 1, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP }, - { 1, 1, 0, 0 ,0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT }, - { 1, 3, 0, 0 ,0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD }, - { 1, 1, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC}, - { 1, 2, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP}, - { 2, 1, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP}, - { 1, 1, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I }, - { 1, 1, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D }, - { 1, 1, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U }, - { 1, 1, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D }, - { 1, 1, 0, 0 ,0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ }, - { 1, 1, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC }, - { 1, 1, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL }, - { 1, 1, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR }, - { 1, 1, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND }, - { 1, 1, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG }, + { 1, 4, 0, 1, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN }, + { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX }, + { 1, 3, 1, 0, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 }, + { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 }, + { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, + { 1, 3, 1, 0, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 }, + { 1, 2, 1, 0, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE }, + { 1, 4, 0, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB }, + { 1, 1, 0, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID }, + { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE }, + { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT }, + { 1, 3, 0, 0, 0, 0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC}, + { 1, 2, 0, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP}, + { 2, 1, 0, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP}, + { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD }, }; const struct tgsi_opcode_info * @@ -272,7 +297,7 @@ tgsi_get_opcode_info( uint opcode ) if (firsttime) { unsigned i; firsttime = 0; - for (i = 0; i < Elements(opcode_info); i++) + for (i = 0; i < ARRAY_SIZE(opcode_info); i++) assert(opcode_info[i].opcode == i); } @@ -296,15 +321,15 @@ const char * tgsi_get_processor_name( uint processor ) { switch (processor) { - case TGSI_PROCESSOR_VERTEX: + case PIPE_SHADER_VERTEX: return "vertex shader"; - case TGSI_PROCESSOR_FRAGMENT: + case PIPE_SHADER_FRAGMENT: return "fragment shader"; - case TGSI_PROCESSOR_GEOMETRY: + case PIPE_SHADER_GEOMETRY: return "geometry shader"; - case TGSI_PROCESSOR_TESS_CTRL: + case PIPE_SHADER_TESS_CTRL: return "tessellation control shader"; - case TGSI_PROCESSOR_TESS_EVAL: + case PIPE_SHADER_TESS_EVAL: return "tessellation evaluation shader"; default: return "unknown shader type!"; @@ -331,6 +356,7 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */ case TGSI_OPCODE_TXQ: case TGSI_OPCODE_TXQ_LZ: + case TGSI_OPCODE_TXQS: case TGSI_OPCODE_F2U: case TGSI_OPCODE_UDIV: case TGSI_OPCODE_UMAD: @@ -378,6 +404,12 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSLT: case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: return TGSI_TYPE_SIGNED; case TGSI_OPCODE_DADD: case TGSI_OPCODE_DABS: @@ -401,7 +433,33 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_F2D: case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: + case TGSI_OPCODE_U642D: + case TGSI_OPCODE_I642D: return TGSI_TYPE_DOUBLE; + case TGSI_OPCODE_U64MAX: + case TGSI_OPCODE_U64MIN: + case TGSI_OPCODE_U64ADD: + case TGSI_OPCODE_U64MUL: + case TGSI_OPCODE_U64DIV: + case TGSI_OPCODE_U64MOD: + case TGSI_OPCODE_U64SHL: + case TGSI_OPCODE_U64SHR: + case TGSI_OPCODE_F2U64: + case TGSI_OPCODE_D2U64: + return TGSI_TYPE_UNSIGNED64; + case TGSI_OPCODE_I64MAX: + case TGSI_OPCODE_I64MIN: + case TGSI_OPCODE_I64ABS: + case TGSI_OPCODE_I64SSG: + case TGSI_OPCODE_I64NEG: + case TGSI_OPCODE_I64SHR: + case TGSI_OPCODE_I64DIV: + case TGSI_OPCODE_I64MOD: + case TGSI_OPCODE_F2I64: + case TGSI_OPCODE_U2I64: + case TGSI_OPCODE_I2I64: + case TGSI_OPCODE_D2I64: + return TGSI_TYPE_SIGNED64; default: return TGSI_TYPE_FLOAT; } @@ -425,10 +483,14 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_SAMPLE_I: case TGSI_OPCODE_SAMPLE_I_MS: case TGSI_OPCODE_UMUL_HI: + case TGSI_OPCODE_UP2H: + case TGSI_OPCODE_U2I64: + case TGSI_OPCODE_MEMBAR: return TGSI_TYPE_UNSIGNED; case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_I2F: case TGSI_OPCODE_I2D: + case TGSI_OPCODE_I2I64: return TGSI_TYPE_SIGNED; case TGSI_OPCODE_ARL: case TGSI_OPCODE_ARR: @@ -441,6 +503,8 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_FSLT: case TGSI_OPCODE_FSNE: case TGSI_OPCODE_UCMP: + case TGSI_OPCODE_F2U64: + case TGSI_OPCODE_F2I64: return TGSI_TYPE_FLOAT; case TGSI_OPCODE_D2F: case TGSI_OPCODE_D2U: @@ -449,7 +513,21 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSLT: case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_D2U64: + case TGSI_OPCODE_D2I64: return TGSI_TYPE_DOUBLE; + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_U642F: + case TGSI_OPCODE_U642D: + return TGSI_TYPE_UNSIGNED64; + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_I642D: + return TGSI_TYPE_SIGNED64; default: return tgsi_opcode_infer_type(opcode); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c index a3b90bdb5..b0a28f271 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -264,14 +264,13 @@ transform_dst(struct tgsi_transform_context *tctx, * dst.z = src0.x \times src1.y - src1.x \times src0.y * dst.w = 1.0 * - * ; needs: 2 tmp, imm{1.0} - * MUL tmpA.xyz, src0.yzx, src1.zxy - * MUL tmpB.xyz, src1.yzx, src0.zxy - * SUB dst.xyz, tmpA.xyz, tmpB.xyz + * ; needs: 1 tmp, imm{1.0} + * MUL tmpA.xyz, src1.yzx, src0.zxy + * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz * MOV dst.w, imm{1.0} */ -#define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2)) -#define XPD_TMP 2 +#define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2)) +#define XPD_TMP 1 static void transform_xpd(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) @@ -283,34 +282,26 @@ transform_xpd(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { - /* MUL tmpA.xyz, src0.yzx, src1.zxy */ + /* MUL tmpA.xyz, src1.yzx, src0.zxy */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _)); - reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _)); - tctx->emit_instruction(tctx, &new_inst); - - /* MUL tmpB.xyz, src1.yzx, src0.zxy */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ); - new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _)); reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _)); tctx->emit_instruction(tctx, &new_inst); - /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */ + /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); - reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, _)); + new_inst.Instruction.NumSrcRegs = 3; + reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _)); + reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _)); + reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); + new_inst.Src[2].Register.Negate = true; tctx->emit_instruction(tctx, &new_inst); } @@ -397,14 +388,15 @@ transform_scs(struct tgsi_transform_context *tctx, * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w * - * ; needs: 2 tmp, imm{1.0} - * MUL tmpA, src0, src1 - * SUB tmpB, imm{1.0}, src0 - * MUL tmpB, tmpB, src2 - * ADD dst, tmpA, tmpB + * This becomes: src0 \times src1 + src2 - src0 \times src2, which + * can then become: src0 \times src1 - (src0 \times src2 - src2) + * + * ; needs: 1 tmp + * MAD tmpA, src0, src2, -src2 + * MAD dst, src0, src1, -tmpA */ -#define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3)) -#define LRP_TMP 2 +#define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) +#define LRP_TMP 1 static void transform_lrp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) @@ -417,44 +409,28 @@ transform_lrp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { - /* MUL tmpA, src0, src1 */ + /* MAD tmpA, src0, src2, -src2 */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; + new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 2; + new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); - reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); - tctx->emit_instruction(tctx, &new_inst); - - /* SUB tmpB, imm{1.0}, src0 */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, Y, Y, Y)); - reg_src(&new_inst.Src[1], src0, SWIZ(X, Y, Z, W)); - tctx->emit_instruction(tctx, &new_inst); - - /* MUL tmpB, tmpB, src2 */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); + reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); + new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; tctx->emit_instruction(tctx, &new_inst); - /* ADD dst, tmpA, tmpB */ + /* MAD dst, src0, src1, -tmpA */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; + new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, W)); + new_inst.Instruction.NumSrcRegs = 3; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); + reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + new_inst.Src[2].Register.Negate = true; tctx->emit_instruction(tctx, &new_inst); } } @@ -676,14 +652,19 @@ transform_lit(struct tgsi_transform_context *tctx, * dst.w = 1.0 * * ; needs: 1 tmp, imm{1.0} - * FLR tmpA.x, src.x + * if (lowering FLR) { + * FRC tmpA.x, src.x + * SUB tmpA.x, src.x, tmpA.x + * } else { + * FLR tmpA.x, src.x + * } * EX2 tmpA.y, src.x * SUB dst.y, src.x, tmpA.x * EX2 dst.x, tmpA.x * MOV dst.z, tmpA.y * MOV dst.w, imm{1.0} */ -#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ +#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ NINST(1)+ NINST(1) - OINST(1)) #define EXP_TMP 1 static void @@ -696,14 +677,35 @@ transform_exp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { - /* FLR tmpA.x, src.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); - tctx->emit_instruction(tctx, &new_inst); + if (ctx->config->lower_FLR) { + /* FRC tmpA.x, src.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, &new_inst); + + /* SUB tmpA.x, src.x, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, &new_inst); + } else { + /* FLR tmpA.x, src.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, &new_inst); + } } if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { @@ -771,14 +773,19 @@ transform_exp(struct tgsi_transform_context *tctx, * * ; needs: 1 tmp, imm{1.0} * LG2 tmpA.x, |src.x| - * FLR tmpA.y, tmpA.x + * if (lowering FLR) { + * FRC tmpA.y, tmpA.x + * SUB tmpA.y, tmpA.x, tmpA.y + * } else { + * FLR tmpA.y, tmpA.x + * } * EX2 tmpA.z, tmpA.y * RCP tmpA.z, tmpA.z * MUL dst.y, |src.x|, tmpA.z * MOV dst.xz, tmpA.yx * MOV dst.w, imm{1.0} */ -#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \ +#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ NINST(2) + NINST(1) + NINST(1) - OINST(1)) #define LOG_TMP 1 static void @@ -803,14 +810,35 @@ transform_log(struct tgsi_transform_context *tctx, } if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { - /* FLR tmpA.y, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); - tctx->emit_instruction(tctx, &new_inst); + if (ctx->config->lower_FLR) { + /* FRC tmpA.y, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + tctx->emit_instruction(tctx, &new_inst); + + /* SUB tmpA.y, tmpA.x, tmpA.y */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); + tctx->emit_instruction(tctx, &new_inst); + } else { + /* FLR tmpA.y, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + tctx->emit_instruction(tctx, &new_inst); + } } if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { @@ -1005,6 +1033,131 @@ transform_dotp(struct tgsi_transform_context *tctx, } } +/* FLR - floor, CEIL - ceil + * ; needs: 1 tmp + * if (CEIL) { + * FRC tmpA, -src + * ADD dst, src, tmpA + * } else { + * FRC tmpA, src + * SUB dst, src, tmpA + * } + */ +#define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) +#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) +#define FLR_TMP 1 +#define CEIL_TMP 1 +static void +transform_flr_ceil(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); + struct tgsi_full_dst_register *dst = &inst->Dst[0]; + struct tgsi_full_src_register *src0 = &inst->Src[0]; + struct tgsi_full_instruction new_inst; + unsigned opcode = inst->Instruction.Opcode; + + if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { + /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + + if (opcode == TGSI_OPCODE_CEIL) + new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; + tctx->emit_instruction(tctx, &new_inst); + + /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ + new_inst = tgsi_default_full_instruction(); + if (opcode == TGSI_OPCODE_CEIL) + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; + else + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &new_inst); + } +} + +/* TRUNC - truncate off fractional part + * dst.x = trunc(src.x) + * dst.y = trunc(src.y) + * dst.z = trunc(src.z) + * dst.w = trunc(src.w) + * + * ; needs: 1 tmp + * if (lower FLR) { + * FRC tmpA, |src| + * SUB tmpA, |src|, tmpA + * } else { + * FLR tmpA, |src| + * } + * CMP dst, src, -tmpA, tmpA + */ +#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) +#define TRUNC_TMP 1 +static void +transform_trunc(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); + struct tgsi_full_dst_register *dst = &inst->Dst[0]; + struct tgsi_full_src_register *src0 = &inst->Src[0]; + struct tgsi_full_instruction new_inst; + + if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { + if (ctx->config->lower_FLR) { + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + new_inst.Src[0].Register.Absolute = true; + new_inst.Src[0].Register.Negate = false; + tctx->emit_instruction(tctx, &new_inst); + + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + new_inst.Src[0].Register.Absolute = true; + new_inst.Src[0].Register.Negate = false; + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &new_inst); + } else { + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + new_inst.Src[0].Register.Absolute = true; + new_inst.Src[0].Register.Negate = false; + tctx->emit_instruction(tctx, &new_inst); + } + + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 3; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + new_inst.Src[1].Register.Negate = true; + reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &new_inst); + } +} + /* Inserts a MOV_SAT for the needed components of tex coord. Note that * in the case of TXP, the clamping must happen *after* projection, so * we need to lower TXP to TEX. @@ -1401,6 +1554,21 @@ transform_instr(struct tgsi_transform_context *tctx, goto skip; transform_dotp(tctx, inst); break; + case TGSI_OPCODE_FLR: + if (!ctx->config->lower_FLR) + goto skip; + transform_flr_ceil(tctx, inst); + break; + case TGSI_OPCODE_CEIL: + if (!ctx->config->lower_CEIL) + goto skip; + transform_flr_ceil(tctx, inst); + break; + case TGSI_OPCODE_TRUNC: + if (!ctx->config->lower_TRUNC) + goto skip; + transform_trunc(tctx, inst); + break; case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: case TGSI_OPCODE_TXB: @@ -1430,7 +1598,11 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, int newlen, numtmp; /* sanity check in case limit is ever increased: */ - assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); + STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); + + /* sanity check the lowering */ + assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); + assert(!(config->lower_FRC && config->lower_TRUNC)); memset(&ctx, 0, sizeof(ctx)); ctx.base.transform_instruction = transform_instr; @@ -1443,7 +1615,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, * color, then figure out the number of additional inputs we need * to create for BCOLOR's.. */ - if ((info->processor == TGSI_PROCESSOR_FRAGMENT) && + if ((info->processor == PIPE_SHADER_FRAGMENT) && config->color_two_side) { int i; ctx.face_idx = -1; @@ -1473,6 +1645,9 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, OPCS(DPH) || OPCS(DP2) || OPCS(DP2A) || + OPCS(FLR) || + OPCS(CEIL) || + OPCS(TRUNC) || OPCS(TXP) || ctx.two_side_colors || ctx.saturate)) @@ -1541,6 +1716,18 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, newlen += DP2A_GROW * OPCS(DP2A); numtmp = MAX2(numtmp, DOTP_TMP); } + if (OPCS(FLR)) { + newlen += FLR_GROW * OPCS(FLR); + numtmp = MAX2(numtmp, FLR_TMP); + } + if (OPCS(CEIL)) { + newlen += CEIL_GROW * OPCS(CEIL); + numtmp = MAX2(numtmp, CEIL_TMP); + } + if (OPCS(TRUNC)) { + newlen += TRUNC_GROW * OPCS(TRUNC); + numtmp = MAX2(numtmp, TRUNC_TMP); + } if (ctx.saturate || config->lower_TXP) { int n = 0; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c index 0729b5d24..940af7d30 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -121,8 +121,8 @@ tgsi_parse_token( next_token( ctx, &decl->Semantic ); } - if (decl->Declaration.File == TGSI_FILE_RESOURCE) { - next_token(ctx, &decl->Resource); + if (decl->Declaration.File == TGSI_FILE_IMAGE) { + next_token(ctx, &decl->Image); } if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { @@ -155,12 +155,14 @@ tgsi_parse_token( break; case TGSI_IMM_UINT32: + case TGSI_IMM_UINT64: for (i = 0; i < imm_count; i++) { next_token(ctx, &imm->u[i].Uint); } break; case TGSI_IMM_INT32: + case TGSI_IMM_INT64: for (i = 0; i < imm_count; i++) { next_token(ctx, &imm->u[i].Int); } @@ -195,6 +197,10 @@ tgsi_parse_token( } } + if (inst->Instruction.Memory) { + next_token(ctx, &inst->Memory); + } + assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); for (i = 0; i < inst->Instruction.NumDstRegs; i++) { @@ -309,7 +315,7 @@ tgsi_dump_tokens(const struct tgsi_token *tokens) int nr = tgsi_num_tokens(tokens); int i; - assert(sizeof(*tokens) == sizeof(unsigned)); + STATIC_ASSERT(sizeof(*tokens) == sizeof(unsigned)); debug_printf("const unsigned tokens[%d] = {\n", nr); for (i = 0; i < nr; i++) diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h index 35e1c7cfd..4689fb797 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -64,7 +64,7 @@ struct tgsi_full_declaration struct tgsi_declaration_dimension Dim; struct tgsi_declaration_interp Interp; struct tgsi_declaration_semantic Semantic; - struct tgsi_declaration_resource Resource; + struct tgsi_declaration_image Image; struct tgsi_declaration_sampler_view SamplerView; struct tgsi_declaration_array Array; }; @@ -91,6 +91,7 @@ struct tgsi_full_instruction struct tgsi_instruction_predicate Predicate; struct tgsi_instruction_label Label; struct tgsi_instruction_texture Texture; + struct tgsi_instruction_memory Memory; struct tgsi_full_dst_register Dst[TGSI_FULL_MAX_DST_REGISTERS]; struct tgsi_full_src_register Src[TGSI_FULL_MAX_SRC_REGISTERS]; struct tgsi_texture_offset TexOffsets[TGSI_FULL_MAX_TEX_OFFSETS]; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c index cb8dbcb29..713bd609d 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c @@ -96,7 +96,7 @@ struct psprite_transform_context unsigned stream_out_point_pos:1; // set if to stream out original point pos unsigned aa_point:1; // set if doing aa point unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS]; - int max_generic; + int max_generic; // max generic semantic index }; static inline struct psprite_transform_context * @@ -133,7 +133,7 @@ psprite_decl(struct tgsi_transform_context *ctx, else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && decl->Semantic.Index < 32) { ts->point_coord_decl |= 1 << decl->Semantic.Index; - ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index); + ts->max_generic = MAX2(ts->max_generic, (int)decl->Semantic.Index); } ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1); } @@ -216,7 +216,7 @@ psprite_prolog(struct tgsi_transform_context *ctx) if (en & 0x1) { tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC, i, 0); - ts->max_generic = MAX2(ts->max_generic, i); + ts->max_generic = MAX2(ts->max_generic, (int)i); } } } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c index d14372feb..239a2c938 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -321,7 +321,7 @@ iter_instruction( } info = tgsi_get_opcode_info( inst->Instruction.Opcode ); - if (info == NULL) { + if (!info) { report_error( ctx, "(%u): Invalid instruction opcode", inst->Instruction.Opcode ); return TRUE; } @@ -414,9 +414,9 @@ iter_declaration( decl->Semantic.Name == TGSI_SEMANTIC_TESSOUTER || decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER; if (file == TGSI_FILE_INPUT && !patch && ( - processor == TGSI_PROCESSOR_GEOMETRY || - processor == TGSI_PROCESSOR_TESS_CTRL || - processor == TGSI_PROCESSOR_TESS_EVAL)) { + processor == PIPE_SHADER_GEOMETRY || + processor == PIPE_SHADER_TESS_CTRL || + processor == PIPE_SHADER_TESS_EVAL)) { uint vert; for (vert = 0; vert < ctx->implied_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); @@ -424,7 +424,7 @@ iter_declaration( check_and_declare(ctx, reg); } } else if (file == TGSI_FILE_OUTPUT && !patch && - processor == TGSI_PROCESSOR_TESS_CTRL) { + processor == PIPE_SHADER_TESS_CTRL) { uint vert; for (vert = 0; vert < ctx->implied_out_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); @@ -485,11 +485,11 @@ iter_property( { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY && + if (iter->processor.Processor == PIPE_SHADER_GEOMETRY && prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); } - if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL && + if (iter->processor.Processor == PIPE_SHADER_TESS_CTRL && prop->Property.PropertyName == TGSI_PROPERTY_TCS_VERTICES_OUT) ctx->implied_out_array_size = prop->u[0].Data; return TRUE; @@ -499,8 +499,8 @@ static boolean prolog(struct tgsi_iterate_context *iter) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL || - iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL) + if (iter->processor.Processor == PIPE_SHADER_TESS_CTRL || + iter->processor.Processor == PIPE_SHADER_TESS_EVAL) ctx->implied_array_size = 32; return TRUE; } @@ -559,6 +559,7 @@ tgsi_sanity_check( const struct tgsi_token *tokens ) { struct sanity_check_ctx ctx; + boolean retval; ctx.iter.prolog = prolog; ctx.iter.iterate_instruction = iter_instruction; @@ -580,11 +581,12 @@ tgsi_sanity_check( ctx.implied_array_size = 0; ctx.print = debug_get_option_print_sanity(); - if (!tgsi_iterate_shader( tokens, &ctx.iter )) - return FALSE; - + retval = tgsi_iterate_shader( tokens, &ctx.iter ); regs_hash_destroy(ctx.regs_decl); regs_hash_destroy(ctx.regs_used); regs_hash_destroy(ctx.regs_ind_used); + if (retval == FALSE) + return FALSE; + return ctx.errors == 0; } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c index 7523baf4c..b86207883 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -38,11 +38,521 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_prim.h" +#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_scan.h" +static bool +is_memory_file(unsigned file) +{ + return file == TGSI_FILE_SAMPLER || + file == TGSI_FILE_SAMPLER_VIEW || + file == TGSI_FILE_IMAGE || + file == TGSI_FILE_BUFFER; +} + + +/** + * Is the opcode a "true" texture instruction which samples from a + * texture map? + */ +static bool +is_texture_inst(unsigned opcode) +{ + return (opcode != TGSI_OPCODE_TXQ && + opcode != TGSI_OPCODE_TXQS && + opcode != TGSI_OPCODE_TXQ_LZ && + opcode != TGSI_OPCODE_LODQ && + tgsi_get_opcode_info(opcode)->is_tex); +} + + +/** + * Is the opcode an instruction which computes a derivative explicitly or + * implicitly? + */ +static bool +computes_derivative(unsigned opcode) +{ + if (tgsi_get_opcode_info(opcode)->is_tex) { + return opcode != TGSI_OPCODE_TG4 && + opcode != TGSI_OPCODE_TXD && + opcode != TGSI_OPCODE_TXF && + opcode != TGSI_OPCODE_TXL && + opcode != TGSI_OPCODE_TXL2 && + opcode != TGSI_OPCODE_TXQ && + opcode != TGSI_OPCODE_TXQ_LZ && + opcode != TGSI_OPCODE_TXQS; + } + + return opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE || + opcode == TGSI_OPCODE_DDY || opcode == TGSI_OPCODE_DDY_FINE || + opcode == TGSI_OPCODE_SAMPLE || + opcode == TGSI_OPCODE_SAMPLE_B || + opcode == TGSI_OPCODE_SAMPLE_C; +} + + +static void +scan_instruction(struct tgsi_shader_info *info, + const struct tgsi_full_instruction *fullinst, + unsigned *current_depth) +{ + unsigned i; + bool is_mem_inst = false; + bool is_interp_instruction = false; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + + switch (fullinst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + case TGSI_OPCODE_BGNLOOP: + (*current_depth)++; + info->max_depth = MAX2(info->max_depth, *current_depth); + break; + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ENDLOOP: + (*current_depth)--; + break; + default: + break; + } + + if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID || + fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || + fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { + const struct tgsi_full_src_register *src0 = &fullinst->Src[0]; + unsigned input; + + is_interp_instruction = true; + + if (src0->Register.Indirect && src0->Indirect.ArrayID) + input = info->input_array_first[src0->Indirect.ArrayID]; + else + input = src0->Register.Index; + + /* For the INTERP opcodes, the interpolation is always + * PERSPECTIVE unless LINEAR is specified. + */ + switch (info->input_interpolate[input]) { + case TGSI_INTERPOLATE_COLOR: + case TGSI_INTERPOLATE_CONSTANT: + case TGSI_INTERPOLATE_PERSPECTIVE: + switch (fullinst->Instruction.Opcode) { + case TGSI_OPCODE_INTERP_CENTROID: + info->uses_persp_opcode_interp_centroid = TRUE; + break; + case TGSI_OPCODE_INTERP_OFFSET: + info->uses_persp_opcode_interp_offset = TRUE; + break; + case TGSI_OPCODE_INTERP_SAMPLE: + info->uses_persp_opcode_interp_sample = TRUE; + break; + } + break; + + case TGSI_INTERPOLATE_LINEAR: + switch (fullinst->Instruction.Opcode) { + case TGSI_OPCODE_INTERP_CENTROID: + info->uses_linear_opcode_interp_centroid = TRUE; + break; + case TGSI_OPCODE_INTERP_OFFSET: + info->uses_linear_opcode_interp_offset = TRUE; + break; + case TGSI_OPCODE_INTERP_SAMPLE: + info->uses_linear_opcode_interp_sample = TRUE; + break; + } + break; + } + } + + if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && + fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) + info->uses_doubles = TRUE; + + for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = &fullinst->Src[i]; + int ind = src->Register.Index; + + /* Mark which inputs are effectively used */ + if (src->Register.File == TGSI_FILE_INPUT) { + unsigned usage_mask; + usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); + if (src->Register.Indirect) { + for (ind = 0; ind < info->num_inputs; ++ind) { + info->input_usage_mask[ind] |= usage_mask; + } + } else { + assert(ind >= 0); + assert(ind < PIPE_MAX_SHADER_INPUTS); + info->input_usage_mask[ind] |= usage_mask; + } + + if (info->processor == PIPE_SHADER_FRAGMENT) { + unsigned name, index, input; + + if (src->Register.Indirect && src->Indirect.ArrayID) + input = info->input_array_first[src->Indirect.ArrayID]; + else + input = src->Register.Index; + + name = info->input_semantic_name[input]; + index = info->input_semantic_index[input]; + + if (name == TGSI_SEMANTIC_POSITION && + (src->Register.SwizzleX == TGSI_SWIZZLE_Z || + src->Register.SwizzleY == TGSI_SWIZZLE_Z || + src->Register.SwizzleZ == TGSI_SWIZZLE_Z || + src->Register.SwizzleW == TGSI_SWIZZLE_Z)) + info->reads_z = TRUE; + + if (name == TGSI_SEMANTIC_COLOR) { + unsigned mask = + (1 << src->Register.SwizzleX) | + (1 << src->Register.SwizzleY) | + (1 << src->Register.SwizzleZ) | + (1 << src->Register.SwizzleW); + + info->colors_read |= mask << (index * 4); + } + + /* Process only interpolated varyings. Don't include POSITION. + * Don't include integer varyings, because they are not + * interpolated. Don't process inputs interpolated by INTERP + * opcodes. Those are tracked separately. + */ + if ((!is_interp_instruction || i != 0) && + (name == TGSI_SEMANTIC_GENERIC || + name == TGSI_SEMANTIC_TEXCOORD || + name == TGSI_SEMANTIC_COLOR || + name == TGSI_SEMANTIC_BCOLOR || + name == TGSI_SEMANTIC_FOG || + name == TGSI_SEMANTIC_CLIPDIST)) { + switch (info->input_interpolate[input]) { + case TGSI_INTERPOLATE_COLOR: + case TGSI_INTERPOLATE_PERSPECTIVE: + switch (info->input_interpolate_loc[input]) { + case TGSI_INTERPOLATE_LOC_CENTER: + info->uses_persp_center = TRUE; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + info->uses_persp_centroid = TRUE; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + info->uses_persp_sample = TRUE; + break; + } + break; + case TGSI_INTERPOLATE_LINEAR: + switch (info->input_interpolate_loc[input]) { + case TGSI_INTERPOLATE_LOC_CENTER: + info->uses_linear_center = TRUE; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + info->uses_linear_centroid = TRUE; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + info->uses_linear_sample = TRUE; + break; + } + break; + /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */ + } + } + } + } + + /* check for indirect register reads */ + if (src->Register.Indirect) { + info->indirect_files |= (1 << src->Register.File); + info->indirect_files_read |= (1 << src->Register.File); + } + + /* Texture samplers */ + if (src->Register.File == TGSI_FILE_SAMPLER) { + const unsigned index = src->Register.Index; + + assert(fullinst->Instruction.Texture); + assert(index < ARRAY_SIZE(info->is_msaa_sampler)); + assert(index < PIPE_MAX_SAMPLERS); + + if (is_texture_inst(fullinst->Instruction.Opcode)) { + const unsigned target = fullinst->Texture.Texture; + assert(target < TGSI_TEXTURE_UNKNOWN); + /* for texture instructions, check that the texture instruction + * target matches the previous sampler view declaration (if there + * was one.) + */ + if (info->sampler_targets[index] == TGSI_TEXTURE_UNKNOWN) { + /* probably no sampler view declaration */ + info->sampler_targets[index] = target; + } else { + /* Make sure the texture instruction's sampler/target info + * agrees with the sampler view declaration. + */ + assert(info->sampler_targets[index] == target); + } + /* MSAA samplers */ + if (target == TGSI_TEXTURE_2D_MSAA || + target == TGSI_TEXTURE_2D_ARRAY_MSAA) { + info->is_msaa_sampler[src->Register.Index] = TRUE; + } + } + } + + if (is_memory_file(src->Register.File)) { + is_mem_inst = true; + + if (tgsi_get_opcode_info(fullinst->Instruction.Opcode)->is_store) { + info->writes_memory = TRUE; + + if (src->Register.File == TGSI_FILE_IMAGE && + !src->Register.Indirect) + info->images_writemask |= 1 << src->Register.Index; + } + } + } + + /* check for indirect register writes */ + for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; + if (dst->Register.Indirect) { + info->indirect_files |= (1 << dst->Register.File); + info->indirect_files_written |= (1 << dst->Register.File); + } + + if (is_memory_file(dst->Register.File)) { + assert(fullinst->Instruction.Opcode == TGSI_OPCODE_STORE); + + is_mem_inst = true; + info->writes_memory = TRUE; + + if (dst->Register.File == TGSI_FILE_IMAGE && + !dst->Register.Indirect) + info->images_writemask |= 1 << dst->Register.Index; + } + } + + if (is_mem_inst) + info->num_memory_instructions++; + + if (computes_derivative(fullinst->Instruction.Opcode)) + info->uses_derivatives = true; + + info->num_instructions++; +} + + +static void +scan_declaration(struct tgsi_shader_info *info, + const struct tgsi_full_declaration *fulldecl) +{ + const uint file = fulldecl->Declaration.File; + const unsigned procType = info->processor; + uint reg; + + if (fulldecl->Declaration.Array) { + unsigned array_id = fulldecl->Array.ArrayID; + + switch (file) { + case TGSI_FILE_INPUT: + assert(array_id < ARRAY_SIZE(info->input_array_first)); + info->input_array_first[array_id] = fulldecl->Range.First; + info->input_array_last[array_id] = fulldecl->Range.Last; + break; + case TGSI_FILE_OUTPUT: + assert(array_id < ARRAY_SIZE(info->output_array_first)); + info->output_array_first[array_id] = fulldecl->Range.First; + info->output_array_last[array_id] = fulldecl->Range.Last; + break; + } + info->array_max[file] = MAX2(info->array_max[file], array_id); + } + + for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) { + unsigned semName = fulldecl->Semantic.Name; + unsigned semIndex = fulldecl->Semantic.Index + + (reg - fulldecl->Range.First); + + /* only first 32 regs will appear in this bitfield */ + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); + + if (file == TGSI_FILE_CONSTANT) { + int buffer = 0; + + if (fulldecl->Declaration.Dimension) + buffer = fulldecl->Dim.Index2D; + + info->const_file_max[buffer] = + MAX2(info->const_file_max[buffer], (int)reg); + } + else if (file == TGSI_FILE_INPUT) { + info->input_semantic_name[reg] = (ubyte) semName; + info->input_semantic_index[reg] = (ubyte) semIndex; + info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; + info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; + info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; + + /* Vertex shaders can have inputs with holes between them. */ + info->num_inputs = MAX2(info->num_inputs, reg + 1); + + if (semName == TGSI_SEMANTIC_PRIMID) + info->uses_primid = TRUE; + else if (procType == PIPE_SHADER_FRAGMENT) { + if (semName == TGSI_SEMANTIC_POSITION) + info->reads_position = TRUE; + else if (semName == TGSI_SEMANTIC_FACE) + info->uses_frontface = TRUE; + } + } + else if (file == TGSI_FILE_SYSTEM_VALUE) { + unsigned index = fulldecl->Range.First; + + info->system_value_semantic_name[index] = semName; + info->num_system_values = MAX2(info->num_system_values, index + 1); + + switch (semName) { + case TGSI_SEMANTIC_INSTANCEID: + info->uses_instanceid = TRUE; + break; + case TGSI_SEMANTIC_VERTEXID: + info->uses_vertexid = TRUE; + break; + case TGSI_SEMANTIC_VERTEXID_NOBASE: + info->uses_vertexid_nobase = TRUE; + break; + case TGSI_SEMANTIC_BASEVERTEX: + info->uses_basevertex = TRUE; + break; + case TGSI_SEMANTIC_PRIMID: + info->uses_primid = TRUE; + break; + case TGSI_SEMANTIC_INVOCATIONID: + info->uses_invocationid = TRUE; + break; + case TGSI_SEMANTIC_POSITION: + info->reads_position = TRUE; + break; + case TGSI_SEMANTIC_FACE: + info->uses_frontface = TRUE; + break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->reads_samplemask = TRUE; + break; + } + } + else if (file == TGSI_FILE_OUTPUT) { + info->output_semantic_name[reg] = (ubyte) semName; + info->output_semantic_index[reg] = (ubyte) semIndex; + info->num_outputs = MAX2(info->num_outputs, reg + 1); + + if (semName == TGSI_SEMANTIC_COLOR) + info->colors_written |= 1 << semIndex; + + if (procType == PIPE_SHADER_VERTEX || + procType == PIPE_SHADER_GEOMETRY || + procType == PIPE_SHADER_TESS_CTRL || + procType == PIPE_SHADER_TESS_EVAL) { + switch (semName) { + case TGSI_SEMANTIC_VIEWPORT_INDEX: + info->writes_viewport_index = TRUE; + break; + case TGSI_SEMANTIC_LAYER: + info->writes_layer = TRUE; + break; + case TGSI_SEMANTIC_PSIZE: + info->writes_psize = TRUE; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + info->writes_clipvertex = TRUE; + break; + } + } + + if (procType == PIPE_SHADER_FRAGMENT) { + switch (semName) { + case TGSI_SEMANTIC_POSITION: + info->writes_z = TRUE; + break; + case TGSI_SEMANTIC_STENCIL: + info->writes_stencil = TRUE; + break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->writes_samplemask = TRUE; + break; + } + } + + if (procType == PIPE_SHADER_VERTEX) { + if (semName == TGSI_SEMANTIC_EDGEFLAG) { + info->writes_edgeflag = TRUE; + } + } + } else if (file == TGSI_FILE_SAMPLER) { + STATIC_ASSERT(sizeof(info->samplers_declared) * 8 >= PIPE_MAX_SAMPLERS); + info->samplers_declared |= 1u << reg; + } else if (file == TGSI_FILE_SAMPLER_VIEW) { + unsigned target = fulldecl->SamplerView.Resource; + unsigned type = fulldecl->SamplerView.ReturnTypeX; + + assert(target < TGSI_TEXTURE_UNKNOWN); + if (info->sampler_targets[reg] == TGSI_TEXTURE_UNKNOWN) { + /* Save sampler target for this sampler index */ + info->sampler_targets[reg] = target; + info->sampler_type[reg] = type; + } else { + /* if previously declared, make sure targets agree */ + assert(info->sampler_targets[reg] == target); + assert(info->sampler_type[reg] == type); + } + } else if (file == TGSI_FILE_IMAGE) { + if (fulldecl->Image.Resource == TGSI_TEXTURE_BUFFER) + info->images_buffers |= 1 << reg; + } + } +} + + +static void +scan_immediate(struct tgsi_shader_info *info) +{ + uint reg = info->immediate_count++; + uint file = TGSI_FILE_IMMEDIATE; + + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); +} + + +static void +scan_property(struct tgsi_shader_info *info, + const struct tgsi_full_property *fullprop) +{ + unsigned name = fullprop->Property.PropertyName; + unsigned value = fullprop->u[0].Data; + + assert(name < ARRAY_SIZE(info->properties)); + info->properties[name] = value; + + switch (name) { + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + info->num_written_clipdistance = value; + info->clipdist_writemask |= (1 << value) - 1; + break; + case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: + info->num_written_culldistance = value; + info->culldist_writemask |= (1 << value) - 1; + break; + } +} /** @@ -56,13 +566,16 @@ tgsi_scan_shader(const struct tgsi_token *tokens, { uint procType, i; struct tgsi_parse_context parse; + unsigned current_depth = 0; memset(info, 0, sizeof(*info)); for (i = 0; i < TGSI_FILE_COUNT; i++) info->file_max[i] = -1; - for (i = 0; i < Elements(info->const_file_max); i++) + for (i = 0; i < ARRAY_SIZE(info->const_file_max); i++) info->const_file_max[i] = -1; info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = 1; + for (i = 0; i < ARRAY_SIZE(info->sampler_targets); i++) + info->sampler_targets[i] = TGSI_TEXTURE_UNKNOWN; /** ** Setup to begin parsing input shader @@ -72,265 +585,38 @@ tgsi_scan_shader(const struct tgsi_token *tokens, return; } procType = parse.FullHeader.Processor.Processor; - assert(procType == TGSI_PROCESSOR_FRAGMENT || - procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY || - procType == TGSI_PROCESSOR_TESS_CTRL || - procType == TGSI_PROCESSOR_TESS_EVAL || - procType == TGSI_PROCESSOR_COMPUTE); + assert(procType == PIPE_SHADER_FRAGMENT || + procType == PIPE_SHADER_VERTEX || + procType == PIPE_SHADER_GEOMETRY || + procType == PIPE_SHADER_TESS_CTRL || + procType == PIPE_SHADER_TESS_EVAL || + procType == PIPE_SHADER_COMPUTE); info->processor = procType; - /** ** Loop over incoming program tokens/instructions */ - while( !tgsi_parse_end_of_tokens( &parse ) ) { - + while (!tgsi_parse_end_of_tokens(&parse)) { info->num_tokens++; tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *fullinst - = &parse.FullToken.FullInstruction; - uint i; - - assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); - info->opcode_count[fullinst->Instruction.Opcode]++; - - for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = - &fullinst->Src[i]; - int ind = src->Register.Index; - - /* Mark which inputs are effectively used */ - if (src->Register.File == TGSI_FILE_INPUT) { - unsigned usage_mask; - usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); - if (src->Register.Indirect) { - for (ind = 0; ind < info->num_inputs; ++ind) { - info->input_usage_mask[ind] |= usage_mask; - } - } else { - assert(ind >= 0); - assert(ind < PIPE_MAX_SHADER_INPUTS); - info->input_usage_mask[ind] |= usage_mask; - } - - if (procType == TGSI_PROCESSOR_FRAGMENT && - info->reads_position && - src->Register.Index == 0 && - (src->Register.SwizzleX == TGSI_SWIZZLE_Z || - src->Register.SwizzleY == TGSI_SWIZZLE_Z || - src->Register.SwizzleZ == TGSI_SWIZZLE_Z || - src->Register.SwizzleW == TGSI_SWIZZLE_Z)) { - info->reads_z = TRUE; - } - } - - /* check for indirect register reads */ - if (src->Register.Indirect) { - info->indirect_files |= (1 << src->Register.File); - info->indirect_files_read |= (1 << src->Register.File); - } - - /* MSAA samplers */ - if (src->Register.File == TGSI_FILE_SAMPLER) { - assert(fullinst->Instruction.Texture); - assert(src->Register.Index < Elements(info->is_msaa_sampler)); - - if (fullinst->Instruction.Texture && - (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || - fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { - info->is_msaa_sampler[src->Register.Index] = TRUE; - } - } - } - - /* check for indirect register writes */ - for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; - if (dst->Register.Indirect) { - info->indirect_files |= (1 << dst->Register.File); - info->indirect_files_written |= (1 << dst->Register.File); - } - } - - info->num_instructions++; - } + scan_instruction(info, &parse.FullToken.FullInstruction, + ¤t_depth); break; - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fulldecl - = &parse.FullToken.FullDeclaration; - const uint file = fulldecl->Declaration.File; - uint reg; - - if (fulldecl->Declaration.Array) { - unsigned array_id = fulldecl->Array.ArrayID; - - switch (file) { - case TGSI_FILE_INPUT: - assert(array_id < ARRAY_SIZE(info->input_array_first)); - info->input_array_first[array_id] = fulldecl->Range.First; - info->input_array_last[array_id] = fulldecl->Range.Last; - break; - case TGSI_FILE_OUTPUT: - assert(array_id < ARRAY_SIZE(info->output_array_first)); - info->output_array_first[array_id] = fulldecl->Range.First; - info->output_array_last[array_id] = fulldecl->Range.Last; - break; - } - info->array_max[file] = MAX2(info->array_max[file], array_id); - } - - for (reg = fulldecl->Range.First; - reg <= fulldecl->Range.Last; - reg++) { - unsigned semName = fulldecl->Semantic.Name; - unsigned semIndex = - fulldecl->Semantic.Index + (reg - fulldecl->Range.First); - - /* only first 32 regs will appear in this bitfield */ - info->file_mask[file] |= (1 << reg); - info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)reg); - - if (file == TGSI_FILE_CONSTANT) { - int buffer = 0; - - if (fulldecl->Declaration.Dimension) - buffer = fulldecl->Dim.Index2D; - - info->const_file_max[buffer] = - MAX2(info->const_file_max[buffer], (int)reg); - } - else if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[reg] = (ubyte) semName; - info->input_semantic_index[reg] = (ubyte) semIndex; - info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; - info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; - info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; - info->num_inputs++; - - if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID) - info->uses_centroid = TRUE; - - if (semName == TGSI_SEMANTIC_PRIMID) - info->uses_primid = TRUE; - else if (procType == TGSI_PROCESSOR_FRAGMENT) { - if (semName == TGSI_SEMANTIC_POSITION) - info->reads_position = TRUE; - else if (semName == TGSI_SEMANTIC_FACE) - info->uses_frontface = TRUE; - } - } - else if (file == TGSI_FILE_SYSTEM_VALUE) { - unsigned index = fulldecl->Range.First; - - info->system_value_semantic_name[index] = semName; - info->num_system_values = MAX2(info->num_system_values, - index + 1); - - if (semName == TGSI_SEMANTIC_INSTANCEID) { - info->uses_instanceid = TRUE; - } - else if (semName == TGSI_SEMANTIC_VERTEXID) { - info->uses_vertexid = TRUE; - } - else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) { - info->uses_vertexid_nobase = TRUE; - } - else if (semName == TGSI_SEMANTIC_BASEVERTEX) { - info->uses_basevertex = TRUE; - } - else if (semName == TGSI_SEMANTIC_PRIMID) { - info->uses_primid = TRUE; - } else if (semName == TGSI_SEMANTIC_INVOCATIONID) { - info->uses_invocationid = TRUE; - } - } - else if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[reg] = (ubyte) semName; - info->output_semantic_index[reg] = (ubyte) semIndex; - info->num_outputs++; - - if (procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY || - procType == TGSI_PROCESSOR_TESS_CTRL || - procType == TGSI_PROCESSOR_TESS_EVAL) { - if (semName == TGSI_SEMANTIC_CLIPDIST) { - info->num_written_clipdistance += - util_bitcount(fulldecl->Declaration.UsageMask); - info->clipdist_writemask |= - fulldecl->Declaration.UsageMask << (semIndex*4); - } - else if (semName == TGSI_SEMANTIC_CULLDIST) { - info->num_written_culldistance += - util_bitcount(fulldecl->Declaration.UsageMask); - info->culldist_writemask |= - fulldecl->Declaration.UsageMask << (semIndex*4); - } - else if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) { - info->writes_viewport_index = TRUE; - } - else if (semName == TGSI_SEMANTIC_LAYER) { - info->writes_layer = TRUE; - } - else if (semName == TGSI_SEMANTIC_PSIZE) { - info->writes_psize = TRUE; - } - else if (semName == TGSI_SEMANTIC_CLIPVERTEX) { - info->writes_clipvertex = TRUE; - } - } - - if (procType == TGSI_PROCESSOR_FRAGMENT) { - if (semName == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; - } - else if (semName == TGSI_SEMANTIC_STENCIL) { - info->writes_stencil = TRUE; - } - } - - if (procType == TGSI_PROCESSOR_VERTEX) { - if (semName == TGSI_SEMANTIC_EDGEFLAG) { - info->writes_edgeflag = TRUE; - } - } - } - } - } + scan_declaration(info, &parse.FullToken.FullDeclaration); break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - uint reg = info->immediate_count++; - uint file = TGSI_FILE_IMMEDIATE; - - info->file_mask[file] |= (1 << reg); - info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)reg); - } + scan_immediate(info); break; - case TGSI_TOKEN_TYPE_PROPERTY: - { - const struct tgsi_full_property *fullprop - = &parse.FullToken.FullProperty; - unsigned name = fullprop->Property.PropertyName; - - assert(name < Elements(info->properties)); - info->properties[name] = fullprop->u[0].Data; - } + scan_property(info, &parse.FullToken.FullProperty); break; - default: - assert( 0 ); + assert(!"Unexpected TGSI token type"); } } @@ -340,7 +626,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* The dimensions of the IN decleration in geometry shader have * to be deduced from the type of the input primitive. */ - if (procType == TGSI_PROCESSOR_GEOMETRY) { + if (procType == PIPE_SHADER_GEOMETRY) { unsigned input_primitive = info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]; int num_verts = u_vertices_per_prim(input_primitive); @@ -353,9 +639,85 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } - tgsi_parse_free (&parse); + tgsi_parse_free(&parse); } +/** + * Collect information about the arrays of a given register file. + * + * @param tokens TGSI shader + * @param file the register file to scan through + * @param max_array_id number of entries in @p arrays; should be equal to the + * highest array id, i.e. tgsi_shader_info::array_max[file]. + * @param arrays info for array of each ID will be written to arrays[ID - 1]. + */ +void +tgsi_scan_arrays(const struct tgsi_token *tokens, + unsigned file, + unsigned max_array_id, + struct tgsi_array_info *arrays) +{ + struct tgsi_parse_context parse; + + if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n"); + return; + } + + memset(arrays, 0, sizeof(arrays[0]) * max_array_id); + + while (!tgsi_parse_end_of_tokens(&parse)) { + struct tgsi_full_instruction *inst; + + tgsi_parse_token(&parse); + + if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { + struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + + if (decl->Declaration.Array && decl->Declaration.File == file && + decl->Array.ArrayID > 0 && decl->Array.ArrayID <= max_array_id) { + struct tgsi_array_info *array = &arrays[decl->Array.ArrayID - 1]; + assert(!array->declared); + array->declared = true; + array->range = decl->Range; + } + } + + if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + continue; + + inst = &parse.FullToken.FullInstruction; + for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &inst->Dst[i]; + if (dst->Register.File != file) + continue; + + if (dst->Register.Indirect) { + if (dst->Indirect.ArrayID > 0 && + dst->Indirect.ArrayID <= max_array_id) { + arrays[dst->Indirect.ArrayID - 1].writemask |= dst->Register.WriteMask; + } else { + /* Indirect writes without an ArrayID can write anywhere. */ + for (unsigned j = 0; j < max_array_id; ++j) + arrays[j].writemask |= dst->Register.WriteMask; + } + } else { + /* Check whether the write falls into any of the arrays anyway. */ + for (unsigned j = 0; j < max_array_id; ++j) { + struct tgsi_array_info *array = &arrays[j]; + if (array->declared && + dst->Register.Index >= array->range.First && + dst->Register.Index <= array->range.Last) + array->writemask |= dst->Register.WriteMask; + } + } + } + } + + tgsi_parse_free(&parse); + + return; +} /** diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h index b81bdd71f..0c5f2ba06 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -64,6 +64,9 @@ struct tgsi_shader_info uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ int const_file_max[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned samplers_declared; /**< bitmask of declared samplers */ + ubyte sampler_targets[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_TEXTURE_x values */ + ubyte sampler_type[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_RETURN_TYPE_x */ ubyte input_array_first[PIPE_MAX_SHADER_INPUTS]; ubyte input_array_last[PIPE_MAX_SHADER_INPUTS]; @@ -73,16 +76,32 @@ struct tgsi_shader_info uint immediate_count; /**< number of immediates declared */ uint num_instructions; + uint num_memory_instructions; /**< sampler, buffer, and image instructions */ uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + ubyte colors_read; /**< which color components are read by the FS */ + ubyte colors_written; boolean reads_position; /**< does fragment shader read position? */ boolean reads_z; /**< does fragment shader read depth? */ + boolean reads_samplemask; /**< does fragment shader read sample mask? */ boolean writes_z; /**< does fragment shader write Z value? */ boolean writes_stencil; /**< does fragment shader write stencil value? */ + boolean writes_samplemask; /**< does fragment shader write sample mask? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KILL or KILL_IF instruction used? */ - boolean uses_centroid; + boolean uses_persp_center; + boolean uses_persp_centroid; + boolean uses_persp_sample; + boolean uses_linear_center; + boolean uses_linear_centroid; + boolean uses_linear_sample; + boolean uses_persp_opcode_interp_centroid; + boolean uses_persp_opcode_interp_offset; + boolean uses_persp_opcode_interp_sample; + boolean uses_linear_opcode_interp_centroid; + boolean uses_linear_opcode_interp_offset; + boolean uses_linear_opcode_interp_sample; boolean uses_instanceid; boolean uses_vertexid; boolean uses_vertexid_nobase; @@ -94,13 +113,24 @@ struct tgsi_shader_info boolean writes_clipvertex; boolean writes_viewport_index; boolean writes_layer; + boolean writes_memory; /**< contains stores or atomics to buffers or images */ boolean is_msaa_sampler[PIPE_MAX_SAMPLERS]; - + boolean uses_doubles; /**< uses any of the double instructions */ + boolean uses_derivatives; unsigned clipdist_writemask; unsigned culldist_writemask; unsigned num_written_culldistance; unsigned num_written_clipdistance; /** + * Bitmask indicating which images are written to (STORE / ATOM*). + * Indirect image accesses are not reflected in this mask. + */ + unsigned images_writemask; + /** + * Bitmask indicating which declared image is a buffer. + */ + unsigned images_buffers; + /** * Bitmask indicating which register files are accessed with * indirect addressing. The bits are (1 << TGSI_FILE_x), etc. */ @@ -113,12 +143,34 @@ struct tgsi_shader_info unsigned indirect_files_written; unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */ + + /** + * Max nesting limit of loops/if's + */ + unsigned max_depth; +}; + +struct tgsi_array_info +{ + /** Whether an array with this ID was declared. */ + bool declared; + + /** The OR of all writemasks used to write to this array. */ + ubyte writemask; + + /** The range with which the array was declared. */ + struct tgsi_declaration_range range; }; extern void tgsi_scan_shader(const struct tgsi_token *tokens, struct tgsi_shader_info *info); +void +tgsi_scan_arrays(const struct tgsi_token *tokens, + unsigned file, + unsigned max_array_id, + struct tgsi_array_info *arrays); extern boolean tgsi_is_passthrough_shader(const struct tgsi_token *tokens); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c index 8271ea081..536a4c8f3 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -34,8 +34,8 @@ const char *tgsi_processor_type_names[6] = { - "FRAG", "VERT", + "FRAG", "GEOM", "TESS_CTRL", "TESS_EVAL", @@ -54,8 +54,10 @@ static const char *tgsi_file_names[] = "IMM", "PRED", "SV", - "RES", - "SVIEW" + "IMAGE", + "SVIEW", + "BUFFER", + "MEMORY", }; const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = @@ -83,7 +85,6 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = "PCOORD", "VIEWPORT_INDEX", "LAYER", - "CULLDIST", "SAMPLEID", "SAMPLEPOS", "SAMPLEMASK", @@ -95,6 +96,10 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = "TESSOUTER", "TESSINNER", "VERTICESIN", + "HELPER_INVOCATION", + "BASEINSTANCE", + "DRAWID", + "WORK_DIM", }; const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] = @@ -137,6 +142,13 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "TES_SPACING", "TES_VERTEX_ORDER_CW", "TES_POINT_MODE", + "NUM_CLIPDIST_ENABLED", + "NUM_CULLDIST_ENABLED", + "FS_EARLY_DEPTH_STENCIL", + "NEXT_SHADER", + "CS_FIXED_BLOCK_WIDTH", + "CS_FIXED_BLOCK_HEIGHT", + "CS_FIXED_BLOCK_DEPTH" }; const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] = @@ -202,16 +214,23 @@ const char *tgsi_immediate_type_names[4] = "FLT64" }; +const char *tgsi_memory_names[3] = +{ + "COHERENT", + "RESTRICT", + "VOLATILE", +}; + static inline void tgsi_strings_check(void) { - STATIC_ASSERT(Elements(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT); - STATIC_ASSERT(Elements(tgsi_texture_names) == TGSI_TEXTURE_COUNT); - STATIC_ASSERT(Elements(tgsi_property_names) == TGSI_PROPERTY_COUNT); - STATIC_ASSERT(Elements(tgsi_primitive_names) == PIPE_PRIM_MAX); - STATIC_ASSERT(Elements(tgsi_interpolate_names) == TGSI_INTERPOLATE_COUNT); - STATIC_ASSERT(Elements(tgsi_return_type_names) == TGSI_RETURN_TYPE_COUNT); + STATIC_ASSERT(ARRAY_SIZE(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT); + STATIC_ASSERT(ARRAY_SIZE(tgsi_texture_names) == TGSI_TEXTURE_COUNT); + STATIC_ASSERT(ARRAY_SIZE(tgsi_property_names) == TGSI_PROPERTY_COUNT); + STATIC_ASSERT(ARRAY_SIZE(tgsi_primitive_names) == PIPE_PRIM_MAX); + STATIC_ASSERT(ARRAY_SIZE(tgsi_interpolate_names) == TGSI_INTERPOLATE_COUNT); + STATIC_ASSERT(ARRAY_SIZE(tgsi_return_type_names) == TGSI_RETURN_TYPE_COUNT); (void) tgsi_processor_type_names; (void) tgsi_return_type_names; (void) tgsi_immediate_type_names; @@ -223,8 +242,8 @@ tgsi_strings_check(void) const char * tgsi_file_name(unsigned file) { - STATIC_ASSERT(Elements(tgsi_file_names) == TGSI_FILE_COUNT); - if (file < Elements(tgsi_file_names)) + STATIC_ASSERT(ARRAY_SIZE(tgsi_file_names) == TGSI_FILE_COUNT); + if (file < ARRAY_SIZE(tgsi_file_names)) return tgsi_file_names[file]; else return "invalid file"; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h index 71e74372f..9a9362e91 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h @@ -38,7 +38,7 @@ extern "C" { #endif -extern const char *tgsi_processor_type_names[6]; +extern const char *tgsi_processor_type_names[PIPE_SHADER_TYPES]; extern const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT]; @@ -60,6 +60,8 @@ extern const char *tgsi_fs_coord_pixel_center_names[2]; extern const char *tgsi_immediate_type_names[4]; +extern const char *tgsi_memory_names[3]; + const char * tgsi_file_name(unsigned file); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c index 3e3ed5b19..be8084251 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -119,6 +119,42 @@ static boolean str_match_nocase_whole( const char **pcur, const char *str ) return FALSE; } +/* Return the array index that matches starting at *pcur, where the string at + * *pcur is terminated by a non-digit non-letter non-underscore. + * Returns -1 if no match is found. + * + * On success, the pointer to the first string is moved to the end of the read + * word. + */ +static int str_match_name_from_array(const char **pcur, + const char * const *array, + unsigned array_size) +{ + for (unsigned j = 0; j < array_size; ++j) { + if (str_match_nocase_whole(pcur, array[j])) + return j; + } + return -1; +} + +/* Return the format corresponding to the name at *pcur. + * Returns -1 if there is no format name. + * + * On success, the pointer to the string is moved to the end of the read format + * name. + */ +static int str_match_format(const char **pcur) +{ + for (unsigned i = 0; i < PIPE_FORMAT_COUNT; i++) { + const struct util_format_description *desc = + util_format_description(i); + if (desc && str_match_nocase_whole(pcur, desc->name)) { + return i; + } + } + return -1; +} + /* Eat zero or more whitespaces. */ static void eat_opt_white( const char **pcur ) @@ -195,8 +231,15 @@ static boolean parse_float( const char **pcur, float *val ) boolean integral_part = FALSE; boolean fractional_part = FALSE; - *val = (float) atof( cur ); + if (*cur == '0' && *(cur + 1) == 'x') { + union fi fi; + fi.ui = strtoul(cur, NULL, 16); + *val = fi.f; + cur += 10; + goto out; + } + *val = (float) atof( cur ); if (*cur == '-' || *cur == '+') cur++; if (is_digit( cur )) { @@ -228,6 +271,8 @@ static boolean parse_float( const char **pcur, float *val ) else return FALSE; } + +out: *pcur = cur; return TRUE; } @@ -250,6 +295,42 @@ static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1) return TRUE; } +static boolean parse_int64( const char **pcur, uint32_t *val0, uint32_t *val1) +{ + const char *cur = *pcur; + union { + int64_t i64val; + uint32_t uval[2]; + } v; + + v.i64val = strtoll(cur, (char**)pcur, 0); + if (*pcur == cur) + return FALSE; + + *val0 = v.uval[0]; + *val1 = v.uval[1]; + + return TRUE; +} + +static boolean parse_uint64( const char **pcur, uint32_t *val0, uint32_t *val1) +{ + const char *cur = *pcur; + union { + uint64_t u64val; + uint32_t uval[2]; + } v; + + v.u64val = strtoull(cur, (char**)pcur, 0); + if (*pcur == cur) + return FALSE; + + *val0 = v.uval[0]; + *val1 = v.uval[1]; + + return TRUE; +} + struct translate_ctx { const char *text; @@ -292,17 +373,17 @@ static boolean parse_header( struct translate_ctx *ctx ) uint processor; if (str_match_nocase_whole( &ctx->cur, "FRAG" )) - processor = TGSI_PROCESSOR_FRAGMENT; + processor = PIPE_SHADER_FRAGMENT; else if (str_match_nocase_whole( &ctx->cur, "VERT" )) - processor = TGSI_PROCESSOR_VERTEX; + processor = PIPE_SHADER_VERTEX; else if (str_match_nocase_whole( &ctx->cur, "GEOM" )) - processor = TGSI_PROCESSOR_GEOMETRY; + processor = PIPE_SHADER_GEOMETRY; else if (str_match_nocase_whole( &ctx->cur, "TESS_CTRL" )) - processor = TGSI_PROCESSOR_TESS_CTRL; + processor = PIPE_SHADER_TESS_CTRL; else if (str_match_nocase_whole( &ctx->cur, "TESS_EVAL" )) - processor = TGSI_PROCESSOR_TESS_EVAL; + processor = PIPE_SHADER_TESS_EVAL; else if (str_match_nocase_whole( &ctx->cur, "COMP" )) - processor = TGSI_PROCESSOR_COMPUTE; + processor = PIPE_SHADER_COMPUTE; else { report_error( ctx, "Unknown header" ); return FALSE; @@ -689,9 +770,9 @@ parse_register_dcl( * the second bracket */ /* tessellation has similar constraints to geometry shader */ - if ((ctx->processor == TGSI_PROCESSOR_GEOMETRY && is_in) || - (ctx->processor == TGSI_PROCESSOR_TESS_EVAL && is_in) || - (ctx->processor == TGSI_PROCESSOR_TESS_CTRL && (is_in || is_out))) { + if ((ctx->processor == PIPE_SHADER_GEOMETRY && is_in) || + (ctx->processor == PIPE_SHADER_TESS_EVAL && is_in) || + (ctx->processor == PIPE_SHADER_TESS_CTRL && (is_in || is_out))) { brackets[0] = brackets[1]; *num_brackets = 1; } else { @@ -1030,6 +1111,15 @@ parse_instruction( inst.Texture.Texture = TGSI_TEXTURE_UNKNOWN; } + if ((i >= TGSI_OPCODE_LOAD && i <= TGSI_OPCODE_ATOMIMAX) || + i == TGSI_OPCODE_RESQ) { + inst.Instruction.Memory = 1; + inst.Memory.Qualifier = 0; + } + + assume(info->num_dst <= TGSI_FULL_MAX_DST_REGISTERS); + assume(info->num_src <= TGSI_FULL_MAX_SRC_REGISTERS); + /* Parse instruction operands. */ for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) { @@ -1082,6 +1172,41 @@ parse_instruction( inst.Texture.NumOffsets = i; cur = ctx->cur; + eat_opt_white(&cur); + + for (; inst.Instruction.Memory && *cur == ','; + ctx->cur = cur, eat_opt_white(&cur)) { + int j; + + cur++; + eat_opt_white(&cur); + + j = str_match_name_from_array(&cur, tgsi_memory_names, + ARRAY_SIZE(tgsi_memory_names)); + if (j >= 0) { + inst.Memory.Qualifier |= 1U << j; + continue; + } + + j = str_match_name_from_array(&cur, tgsi_texture_names, + ARRAY_SIZE(tgsi_texture_names)); + if (j >= 0) { + inst.Memory.Texture = j; + continue; + } + + j = str_match_format(&cur); + if (j >= 0) { + inst.Memory.Format = j; + continue; + } + + ctx->cur = cur; + report_error(ctx, "Expected memory qualifier, texture target, or format\n"); + return FALSE; + } + + cur = ctx->cur; eat_opt_white( &cur ); if (info->is_branch && *cur == ':') { uint target; @@ -1139,6 +1264,14 @@ static boolean parse_immediate_data(struct translate_ctx *ctx, unsigned type, ret = parse_double(&ctx->cur, &values[i].Uint, &values[i+1].Uint); i++; break; + case TGSI_IMM_INT64: + ret = parse_int64(&ctx->cur, &values[i].Uint, &values[i+1].Uint); + i++; + break; + case TGSI_IMM_UINT64: + ret = parse_uint64(&ctx->cur, &values[i].Uint, &values[i+1].Uint); + i++; + break; case TGSI_IMM_FLOAT32: ret = parse_float(&ctx->cur, &values[i].Float); break; @@ -1205,7 +1338,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) } is_vs_input = (file == TGSI_FILE_INPUT && - ctx->processor == TGSI_PROCESSOR_VERTEX); + ctx->processor == PIPE_SHADER_VERTEX); cur = ctx->cur; eat_opt_white( &cur ); @@ -1242,10 +1375,10 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur++; eat_opt_white( &cur ); - if (file == TGSI_FILE_RESOURCE) { + if (file == TGSI_FILE_IMAGE) { for (i = 0; i < TGSI_TEXTURE_COUNT; i++) { if (str_match_nocase_whole(&cur, tgsi_texture_names[i])) { - decl.Resource.Resource = i; + decl.Image.Resource = i; break; } } @@ -1260,13 +1393,17 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur2++; eat_opt_white(&cur2); if (str_match_nocase_whole(&cur2, "RAW")) { - decl.Resource.Raw = 1; + decl.Image.Raw = 1; } else if (str_match_nocase_whole(&cur2, "WR")) { - decl.Resource.Writable = 1; + decl.Image.Writable = 1; } else { - break; + int format = str_match_format(&cur2); + if (format < 0) + break; + + decl.Image.Format = format; } cur = cur2; eat_opt_white(&cur2); @@ -1339,6 +1476,26 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl.SamplerView.ReturnTypeX; } ctx->cur = cur; + } else if (file == TGSI_FILE_BUFFER) { + if (str_match_nocase_whole(&cur, "ATOMIC")) { + decl.Declaration.Atomic = 1; + ctx->cur = cur; + } + } else if (file == TGSI_FILE_MEMORY) { + if (str_match_nocase_whole(&cur, "GLOBAL")) { + /* Note this is a no-op global is the default */ + decl.Declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL; + ctx->cur = cur; + } else if (str_match_nocase_whole(&cur, "SHARED")) { + decl.Declaration.MemType = TGSI_MEMORY_TYPE_SHARED; + ctx->cur = cur; + } else if (str_match_nocase_whole(&cur, "PRIVATE")) { + decl.Declaration.MemType = TGSI_MEMORY_TYPE_PRIVATE; + ctx->cur = cur; + } else if (str_match_nocase_whole(&cur, "INPUT")) { + decl.Declaration.MemType = TGSI_MEMORY_TYPE_INPUT; + ctx->cur = cur; + } } else { if (str_match_nocase_whole(&cur, "LOCAL")) { decl.Declaration.Local = 1; @@ -1474,11 +1631,11 @@ static boolean parse_immediate( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - for (type = 0; type < Elements(tgsi_immediate_type_names); ++type) { + for (type = 0; type < ARRAY_SIZE(tgsi_immediate_type_names); ++type) { if (str_match_nocase_whole(&ctx->cur, tgsi_immediate_type_names[type])) break; } - if (type == Elements(tgsi_immediate_type_names)) { + if (type == ARRAY_SIZE(tgsi_immediate_type_names)) { report_error( ctx, "Expected immediate type" ); return FALSE; } @@ -1524,7 +1681,7 @@ parse_fs_coord_origin( const char **pcur, uint *fs_coord_origin ) { uint i; - for (i = 0; i < Elements(tgsi_fs_coord_origin_names); i++) { + for (i = 0; i < ARRAY_SIZE(tgsi_fs_coord_origin_names); i++) { const char *cur = *pcur; if (str_match_nocase_whole( &cur, tgsi_fs_coord_origin_names[i])) { @@ -1541,7 +1698,7 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center ) { uint i; - for (i = 0; i < Elements(tgsi_fs_coord_pixel_center_names); i++) { + for (i = 0; i < ARRAY_SIZE(tgsi_fs_coord_pixel_center_names); i++) { const char *cur = *pcur; if (str_match_nocase_whole( &cur, tgsi_fs_coord_pixel_center_names[i])) { @@ -1553,6 +1710,22 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center ) return FALSE; } +static boolean +parse_property_next_shader( const char **pcur, uint *next_shader ) +{ + uint i; + + for (i = 0; i < ARRAY_SIZE(tgsi_processor_type_names); i++) { + const char *cur = *pcur; + + if (str_match_nocase_whole( &cur, tgsi_processor_type_names[i])) { + *next_shader = i; + *pcur = cur; + return TRUE; + } + } + return FALSE; +} static boolean parse_property( struct translate_ctx *ctx ) { @@ -1590,7 +1763,7 @@ static boolean parse_property( struct translate_ctx *ctx ) return FALSE; } if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM && - ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + ctx->processor == PIPE_SHADER_GEOMETRY) { ctx->implied_array_size = u_vertices_per_prim(values[0]); } break; @@ -1606,6 +1779,12 @@ static boolean parse_property( struct translate_ctx *ctx ) return FALSE; } break; + case TGSI_PROPERTY_NEXT_SHADER: + if (!parse_property_next_shader(&ctx->cur, &values[0] )) { + report_error( ctx, "Unknown next shader property value." ); + return FALSE; + } + break; case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: default: if (!parse_uint(&ctx->cur, &values[0] )) { @@ -1638,8 +1817,8 @@ static boolean translate( struct translate_ctx *ctx ) if (!parse_header( ctx )) return FALSE; - if (ctx->processor == TGSI_PROCESSOR_TESS_CTRL || - ctx->processor == TGSI_PROCESSOR_TESS_EVAL) + if (ctx->processor == PIPE_SHADER_TESS_CTRL || + ctx->processor == PIPE_SHADER_TESS_EVAL) ctx->implied_array_size = 32; while (*ctx->cur != '\0') { diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h index ceb7c2e0f..c21ff959c 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -95,20 +95,39 @@ struct tgsi_transform_context * Helper for emitting temporary register declarations. */ static inline void -tgsi_transform_temp_decl(struct tgsi_transform_context *ctx, - unsigned index) +tgsi_transform_temps_decl(struct tgsi_transform_context *ctx, + unsigned firstIdx, unsigned lastIdx) { struct tgsi_full_declaration decl; decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.Range.First = - decl.Range.Last = index; + decl.Range.First = firstIdx; + decl.Range.Last = lastIdx; ctx->emit_declaration(ctx, &decl); } +static inline void +tgsi_transform_temp_decl(struct tgsi_transform_context *ctx, + unsigned index) +{ + tgsi_transform_temps_decl(ctx, index, index); +} static inline void +tgsi_transform_const_decl(struct tgsi_transform_context *ctx, + unsigned firstIdx, unsigned lastIdx) +{ + struct tgsi_full_declaration decl; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Range.First = firstIdx; + decl.Range.Last = lastIdx; + ctx->emit_declaration(ctx, &decl); +} + +static inline void tgsi_transform_input_decl(struct tgsi_transform_context *ctx, unsigned index, unsigned sem_name, unsigned sem_index, @@ -129,6 +148,26 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx, ctx->emit_declaration(ctx, &decl); } +static inline void +tgsi_transform_output_decl(struct tgsi_transform_context *ctx, + unsigned index, + unsigned sem_name, unsigned sem_index, + unsigned interp) +{ + struct tgsi_full_declaration decl; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Interpolate = 1; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = sem_name; + decl.Semantic.Index = sem_index; + decl.Range.First = + decl.Range.Last = index; + decl.Interp.Interpolate = interp; + + ctx->emit_declaration(ctx, &decl); +} static inline void tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx, @@ -153,7 +192,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW; - decl.Declaration.UsageMask = 0xf; + decl.Declaration.UsageMask = TGSI_WRITEMASK_XYZW; decl.Range.First = decl.Range.Last = index; decl.SamplerView.Resource = target; @@ -182,6 +221,28 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx, ctx->emit_immediate(ctx, &immed); } +static inline void +tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg, + unsigned file, unsigned index, unsigned writemask) +{ + reg->Register.File = file; + reg->Register.Index = index; + reg->Register.WriteMask = writemask; +} + +static inline void +tgsi_transform_src_reg(struct tgsi_full_src_register *reg, + unsigned file, unsigned index, + unsigned swizzleX, unsigned swizzleY, + unsigned swizzleZ, unsigned swizzleW) +{ + reg->Register.File = file; + reg->Register.Index = index; + reg->Register.SwizzleX = swizzleX; + reg->Register.SwizzleY = swizzleY; + reg->Register.SwizzleZ = swizzleZ; + reg->Register.SwizzleW = swizzleW; +} /** * Helper for emitting 1-operand instructions. @@ -241,6 +302,40 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx, static inline void +tgsi_transform_op3_inst(struct tgsi_transform_context *ctx, + unsigned opcode, + unsigned dst_file, + unsigned dst_index, + unsigned dst_writemask, + unsigned src0_file, + unsigned src0_index, + unsigned src1_file, + unsigned src1_index, + unsigned src2_file, + unsigned src2_index) +{ + struct tgsi_full_instruction inst; + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = dst_file, + inst.Dst[0].Register.Index = dst_index; + inst.Dst[0].Register.WriteMask = dst_writemask; + inst.Instruction.NumSrcRegs = 3; + inst.Src[0].Register.File = src0_file; + inst.Src[0].Register.Index = src0_index; + inst.Src[1].Register.File = src1_file; + inst.Src[1].Register.Index = src1_index; + inst.Src[2].Register.File = src2_file; + inst.Src[2].Register.Index = src2_index; + + ctx->emit_instruction(ctx, &inst); +} + + + +static inline void tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx, unsigned opcode, unsigned dst_file, @@ -399,7 +494,8 @@ static inline void tgsi_transform_kill_inst(struct tgsi_transform_context *ctx, unsigned src_file, unsigned src_index, - unsigned src_swizzle) + unsigned src_swizzle, + boolean negate) { struct tgsi_full_instruction inst; @@ -413,22 +509,25 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx, inst.Src[0].Register.SwizzleY = inst.Src[0].Register.SwizzleZ = inst.Src[0].Register.SwizzleW = src_swizzle; - inst.Src[0].Register.Negate = 1; + inst.Src[0].Register.Negate = negate; ctx->emit_instruction(ctx, &inst); } static inline void -tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx, - unsigned dst_file, - unsigned dst_index, - unsigned src_file, - unsigned src_index, - unsigned sampler_index) +tgsi_transform_tex_inst(struct tgsi_transform_context *ctx, + unsigned dst_file, + unsigned dst_index, + unsigned src_file, + unsigned src_index, + unsigned tex_target, + unsigned sampler_index) { struct tgsi_full_instruction inst; + assert(tex_target < TGSI_TEXTURE_COUNT); + inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; @@ -436,7 +535,7 @@ tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx, inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 2; inst.Instruction.Texture = TRUE; - inst.Texture.Texture = TGSI_TEXTURE_2D; + inst.Texture.Texture = tex_target; inst.Src[0].Register.File = src_file; inst.Src[0].Register.Index = src_index; inst.Src[1].Register.File = TGSI_FILE_SAMPLER; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 3d2131950..7bcd24297 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -35,6 +35,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_sanity.h" #include "util/u_debug.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_bitmask.h" @@ -49,6 +50,7 @@ union tgsi_any_token { struct tgsi_declaration_range decl_range; struct tgsi_declaration_dimension decl_dim; struct tgsi_declaration_interp decl_interp; + struct tgsi_declaration_image decl_image; struct tgsi_declaration_semantic decl_semantic; struct tgsi_declaration_sampler_view decl_sampler_view; struct tgsi_declaration_array array; @@ -58,6 +60,7 @@ union tgsi_any_token { struct tgsi_instruction_predicate insn_predicate; struct tgsi_instruction_label insn_label; struct tgsi_instruction_texture insn_texture; + struct tgsi_instruction_memory insn_memory; struct tgsi_texture_offset insn_texture_offset; struct tgsi_src_register src; struct tgsi_ind_register ind; @@ -74,9 +77,9 @@ struct ureg_tokens { unsigned count; }; -#define UREG_MAX_INPUT PIPE_MAX_SHADER_INPUTS +#define UREG_MAX_INPUT (4 * PIPE_MAX_SHADER_INPUTS) #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS -#define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS +#define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS) #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 4096 #define UREG_MAX_ADDR 3 @@ -98,12 +101,14 @@ struct ureg_program { unsigned processor; bool supports_any_inout_decl_range; + int next_shader_processor; struct { unsigned semantic_name; unsigned semantic_index; unsigned interp; unsigned char cylindrical_wrap; + unsigned char usage_mask; unsigned interp_location; unsigned first; unsigned last; @@ -114,7 +119,6 @@ struct ureg_program unsigned vs_inputs[PIPE_MAX_ATTRIBS/32]; struct { - unsigned index; unsigned semantic_name; unsigned semantic_index; } system_value[UREG_MAX_SYSTEM_VALUE]; @@ -154,6 +158,21 @@ struct ureg_program } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS]; unsigned nr_sampler_views; + struct { + unsigned index; + unsigned target; + unsigned format; + boolean wr; + boolean raw; + } image[PIPE_MAX_SHADER_IMAGES]; + unsigned nr_images; + + struct { + unsigned index; + bool atomic; + } buffer[PIPE_MAX_SHADER_BUFFERS]; + unsigned nr_buffers; + struct util_bitmask *free_temps; struct util_bitmask *local_temps; struct util_bitmask *decl_temps; @@ -172,6 +191,8 @@ struct ureg_program unsigned nr_instructions; struct ureg_tokens domain[2]; + + bool use_memory[TGSI_MEMORY_TYPE_COUNT]; }; static union tgsi_any_token error_tokens[32]; @@ -182,7 +203,7 @@ static void tokens_error( struct ureg_tokens *tokens ) FREE(tokens->tokens); tokens->tokens = error_tokens; - tokens->size = Elements(error_tokens); + tokens->size = ARRAY_SIZE(error_tokens); tokens->count = 0; } @@ -244,30 +265,38 @@ static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, void ureg_property(struct ureg_program *ureg, unsigned name, unsigned value) { - assert(name < Elements(ureg->properties)); + assert(name < ARRAY_SIZE(ureg->properties)); ureg->properties[name] = value; } struct ureg_src -ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, +ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index, unsigned interp_mode, unsigned cylindrical_wrap, unsigned interp_location, + unsigned index, + unsigned usage_mask, unsigned array_id, unsigned array_size) { unsigned i; + assert(usage_mask != 0); + assert(usage_mask <= TGSI_WRITEMASK_XYZW); + for (i = 0; i < ureg->nr_inputs; i++) { if (ureg->input[i].semantic_name == semantic_name && ureg->input[i].semantic_index == semantic_index) { assert(ureg->input[i].interp == interp_mode); assert(ureg->input[i].cylindrical_wrap == cylindrical_wrap); assert(ureg->input[i].interp_location == interp_location); - assert(ureg->input[i].array_id == array_id); - goto out; + if (ureg->input[i].array_id == array_id) { + ureg->input[i].usage_mask |= usage_mask; + goto out; + } + assert((ureg->input[i].usage_mask & usage_mask) == 0); } } @@ -278,10 +307,11 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, ureg->input[i].interp = interp_mode; ureg->input[i].cylindrical_wrap = cylindrical_wrap; ureg->input[i].interp_location = interp_location; - ureg->input[i].first = ureg->nr_input_regs; - ureg->input[i].last = ureg->nr_input_regs + array_size - 1; + ureg->input[i].first = index; + ureg->input[i].last = index + array_size - 1; ureg->input[i].array_id = array_id; - ureg->nr_input_regs += array_size; + ureg->input[i].usage_mask = usage_mask; + ureg->nr_input_regs = MAX2(ureg->nr_input_regs, index + array_size); ureg->nr_inputs++; } else { set_bad(ureg); @@ -292,12 +322,27 @@ out: array_id); } +struct ureg_src +ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, + unsigned semantic_name, + unsigned semantic_index, + unsigned interp_mode, + unsigned cylindrical_wrap, + unsigned interp_location, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_fs_input_cyl_centroid_layout(ureg, + semantic_name, semantic_index, interp_mode, cylindrical_wrap, interp_location, + ureg->nr_input_regs, TGSI_WRITEMASK_XYZW, array_id, array_size); +} + struct ureg_src ureg_DECL_vs_input( struct ureg_program *ureg, unsigned index ) { - assert(ureg->processor == TGSI_PROCESSOR_VERTEX); + assert(ureg->processor == PIPE_SHADER_VERTEX); assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs)); ureg->vs_inputs[index/32] |= 1 << (index % 32); @@ -306,6 +351,21 @@ ureg_DECL_vs_input( struct ureg_program *ureg, struct ureg_src +ureg_DECL_input_layout(struct ureg_program *ureg, + unsigned semantic_name, + unsigned semantic_index, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_fs_input_cyl_centroid_layout(ureg, + semantic_name, semantic_index, 0, 0, 0, + index, usage_mask, array_id, array_size); +} + + +struct ureg_src ureg_DECL_input(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index, @@ -319,26 +379,36 @@ ureg_DECL_input(struct ureg_program *ureg, struct ureg_src ureg_DECL_system_value(struct ureg_program *ureg, - unsigned index, unsigned semantic_name, unsigned semantic_index) { + unsigned i; + + for (i = 0; i < ureg->nr_system_values; i++) { + if (ureg->system_value[i].semantic_name == semantic_name && + ureg->system_value[i].semantic_index == semantic_index) { + goto out; + } + } + if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { - ureg->system_value[ureg->nr_system_values].index = index; ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; + i = ureg->nr_system_values; ureg->nr_system_values++; } else { set_bad(ureg); } - return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); +out: + return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i); } -struct ureg_dst -ureg_DECL_output_masked(struct ureg_program *ureg, - unsigned name, +struct ureg_dst +ureg_DECL_output_layout(struct ureg_program *ureg, + unsigned semantic_name, + unsigned semantic_index, unsigned index, unsigned usage_mask, unsigned array_id, @@ -349,22 +419,24 @@ ureg_DECL_output_masked(struct ureg_program *ureg, assert(usage_mask != 0); for (i = 0; i < ureg->nr_outputs; i++) { - if (ureg->output[i].semantic_name == name && - ureg->output[i].semantic_index == index) { - assert(ureg->output[i].array_id == array_id); - ureg->output[i].usage_mask |= usage_mask; - goto out; + if (ureg->output[i].semantic_name == semantic_name && + ureg->output[i].semantic_index == semantic_index) { + if (ureg->output[i].array_id == array_id) { + ureg->output[i].usage_mask |= usage_mask; + goto out; + } + assert((ureg->output[i].usage_mask & usage_mask) == 0); } } if (ureg->nr_outputs < UREG_MAX_OUTPUT) { - ureg->output[i].semantic_name = name; - ureg->output[i].semantic_index = index; + ureg->output[i].semantic_name = semantic_name; + ureg->output[i].semantic_index = semantic_index; ureg->output[i].usage_mask = usage_mask; - ureg->output[i].first = ureg->nr_output_regs; - ureg->output[i].last = ureg->nr_output_regs + array_size - 1; + ureg->output[i].first = index; + ureg->output[i].last = index + array_size - 1; ureg->output[i].array_id = array_id; - ureg->nr_output_regs += array_size; + ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size); ureg->nr_outputs++; } else { @@ -377,6 +449,19 @@ out: } +struct ureg_dst +ureg_DECL_output_masked(struct ureg_program *ureg, + unsigned name, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_output_layout(ureg, name, index, + ureg->nr_output_regs, usage_mask, array_id, array_size); +} + + struct ureg_dst ureg_DECL_output(struct ureg_program *ureg, unsigned name, @@ -428,7 +513,7 @@ ureg_DECL_constant2D(struct ureg_program *ureg, } -/* A one-dimensional, depricated version of ureg_DECL_constant2D(). +/* A one-dimensional, deprecated version of ureg_DECL_constant2D(). * * Constant operands declared with this function must be addressed * with a one-dimensional index. @@ -647,6 +732,71 @@ ureg_DECL_sampler_view(struct ureg_program *ureg, return reg; } +/* Allocate a new image. + */ +struct ureg_src +ureg_DECL_image(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned format, + boolean wr, + boolean raw) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index); + unsigned i; + + for (i = 0; i < ureg->nr_images; i++) + if (ureg->image[i].index == index) + return reg; + + if (i < PIPE_MAX_SHADER_IMAGES) { + ureg->image[i].index = index; + ureg->image[i].target = target; + ureg->image[i].wr = wr; + ureg->image[i].raw = raw; + ureg->image[i].format = format; + ureg->nr_images++; + return reg; + } + + assert(0); + return reg; +} + +/* Allocate a new buffer. + */ +struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, + bool atomic) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr); + unsigned i; + + for (i = 0; i < ureg->nr_buffers; i++) + if (ureg->buffer[i].index == nr) + return reg; + + if (i < PIPE_MAX_SHADER_BUFFERS) { + ureg->buffer[i].index = nr; + ureg->buffer[i].atomic = atomic; + ureg->nr_buffers++; + return reg; + } + + assert(0); + return reg; +} + +/* Allocate a memory area. + */ +struct ureg_src ureg_DECL_memory(struct ureg_program *ureg, + unsigned memory_type) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type); + + ureg->use_memory[memory_type] = true; + return reg; +} + static int match_or_expand_immediate64( const unsigned *v, int type, @@ -698,7 +848,9 @@ match_or_expand_immediate( const unsigned *v, unsigned nr2 = *pnr2; unsigned i, j; - if (type == TGSI_IMM_FLOAT64) + if (type == TGSI_IMM_FLOAT64 || + type == TGSI_IMM_UINT64 || + type == TGSI_IMM_INT64) return match_or_expand_immediate64(v, type, nr, v2, pnr2, swizzle); *swizzle = 0; @@ -777,7 +929,9 @@ out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - if (type == TGSI_IMM_FLOAT64) { + if (type == TGSI_IMM_FLOAT64 || + type == TGSI_IMM_UINT64 || + type == TGSI_IMM_INT64) { for (j = nr; j < 4; j+=2) { swizzle |= (swizzle & 0xf) << (j * 2); } @@ -877,6 +1031,43 @@ ureg_DECL_immediate_int( struct ureg_program *ureg, return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } +struct ureg_src +ureg_DECL_immediate_uint64( struct ureg_program *ureg, + const uint64_t *v, + unsigned nr ) +{ + union { + unsigned u[4]; + uint64_t u64[2]; + } fu; + unsigned int i; + + assert((nr / 2) < 3); + for (i = 0; i < nr / 2; i++) { + fu.u64[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64); +} + +struct ureg_src +ureg_DECL_immediate_int64( struct ureg_program *ureg, + const int64_t *v, + unsigned nr ) +{ + union { + unsigned u[4]; + int64_t i64[2]; + } fu; + unsigned int i; + + assert((nr / 2) < 3); + for (i = 0; i < nr / 2; i++) { + fu.i64[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64); +} void ureg_emit_src( struct ureg_program *ureg, @@ -1017,7 +1208,7 @@ static void validate( unsigned opcode, #ifdef DEBUG const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); assert(info); - if(info) { + if (info) { assert(nr_dst == info->num_dst); assert(nr_src == info->num_src); } @@ -1082,7 +1273,7 @@ ureg_emit_label(struct ureg_program *ureg, { union tgsi_any_token *out, *insn; - if(!label_token) + if (!label_token) return; out = get_tokens( ureg, DOMAIN_INSN, 1 ); @@ -1147,6 +1338,25 @@ ureg_emit_texture_offset(struct ureg_program *ureg, } +void +ureg_emit_memory(struct ureg_program *ureg, + unsigned extended_token, + unsigned qualifier, + unsigned texture, + unsigned format) +{ + union tgsi_any_token *out, *insn; + + out = get_tokens( ureg, DOMAIN_INSN, 1 ); + insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); + + insn->insn.Memory = 1; + + out[0].value = 0; + out[0].insn_memory.Qualifier = qualifier; + out[0].insn_memory.Texture = texture; + out[0].insn_memory.Format = format; +} void ureg_fixup_insn_size(struct ureg_program *ureg, @@ -1299,6 +1509,44 @@ ureg_label_insn(struct ureg_program *ureg, } +void +ureg_memory_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_dst *dst, + unsigned nr_dst, + const struct ureg_src *src, + unsigned nr_src, + unsigned qualifier, + unsigned texture, + unsigned format) +{ + struct ureg_emit_insn_result insn; + unsigned i; + + insn = ureg_emit_insn(ureg, + opcode, + FALSE, + FALSE, + FALSE, + TGSI_SWIZZLE_X, + TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Z, + TGSI_SWIZZLE_W, + nr_dst, + nr_src); + + ureg_emit_memory(ureg, insn.extended_token, qualifier, texture, format); + + for (i = 0; i < nr_dst; i++) + ureg_emit_dst(ureg, dst[i]); + + for (i = 0; i < nr_src; i++) + ureg_emit_src(ureg, src[i]); + + ureg_fixup_insn_size(ureg, insn.insn_token); +} + + static void emit_decl_semantic(struct ureg_program *ureg, unsigned file, @@ -1344,7 +1592,8 @@ emit_decl_fs(struct ureg_program *ureg, unsigned interpolate, unsigned cylindrical_wrap, unsigned interpolate_location, - unsigned array_id) + unsigned array_id, + unsigned usage_mask) { union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 5 : 4); @@ -1353,7 +1602,7 @@ emit_decl_fs(struct ureg_program *ureg, out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 4; out[0].decl.File = file; - out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ + out[0].decl.UsageMask = usage_mask; out[0].decl.Interpolate = 1; out[0].decl.Semantic = 1; out[0].decl.Array = array_id != 0; @@ -1462,7 +1711,7 @@ emit_decl_sampler_view(struct ureg_program *ureg, out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 3; out[0].decl.File = TGSI_FILE_SAMPLER_VIEW; - out[0].decl.UsageMask = 0xf; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; out[1].value = 0; out[1].decl_range.First = index; @@ -1477,6 +1726,69 @@ emit_decl_sampler_view(struct ureg_program *ureg, } static void +emit_decl_image(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned format, + boolean wr, + boolean raw) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = TGSI_FILE_IMAGE; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; + + out[2].value = 0; + out[2].decl_image.Resource = target; + out[2].decl_image.Writable = wr; + out[2].decl_image.Raw = raw; + out[2].decl_image.Format = format; +} + +static void +emit_decl_buffer(struct ureg_program *ureg, + unsigned index, + bool atomic) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 2; + out[0].decl.File = TGSI_FILE_BUFFER; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.Atomic = atomic; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; +} + +static void +emit_decl_memory(struct ureg_program *ureg, unsigned memory_type) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 2; + out[0].decl.File = TGSI_FILE_MEMORY; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.MemType = memory_type; + + out[1].value = 0; + out[1].decl_range.First = memory_type; + out[1].decl_range.Last = memory_type; +} + +static void emit_immediate( struct ureg_program *ureg, const unsigned *v, unsigned type ) @@ -1515,17 +1827,17 @@ static void emit_decls( struct ureg_program *ureg ) { unsigned i,j; - for (i = 0; i < Elements(ureg->properties); i++) + for (i = 0; i < ARRAY_SIZE(ureg->properties); i++) if (ureg->properties[i] != ~0) emit_property(ureg, i, ureg->properties[i]); - if (ureg->processor == TGSI_PROCESSOR_VERTEX) { + if (ureg->processor == PIPE_SHADER_VERTEX) { for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (ureg->vs_inputs[i/32] & (1 << (i%32))) { + if (ureg->vs_inputs[i/32] & (1u << (i%32))) { emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); } } - } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { + } else if (ureg->processor == PIPE_SHADER_FRAGMENT) { if (ureg->supports_any_inout_decl_range) { for (i = 0; i < ureg->nr_inputs; i++) { emit_decl_fs(ureg, @@ -1537,7 +1849,8 @@ static void emit_decls( struct ureg_program *ureg ) ureg->input[i].interp, ureg->input[i].cylindrical_wrap, ureg->input[i].interp_location, - ureg->input[i].array_id); + ureg->input[i].array_id, + ureg->input[i].usage_mask); } } else { @@ -1551,7 +1864,8 @@ static void emit_decls( struct ureg_program *ureg ) (j - ureg->input[i].first), ureg->input[i].interp, ureg->input[i].cylindrical_wrap, - ureg->input[i].interp_location, 0); + ureg->input[i].interp_location, 0, + ureg->input[i].usage_mask); } } } @@ -1586,8 +1900,8 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_system_values; i++) { emit_decl_semantic(ureg, TGSI_FILE_SYSTEM_VALUE, - ureg->system_value[i].index, - ureg->system_value[i].index, + i, + i, ureg->system_value[i].semantic_name, ureg->system_value[i].semantic_index, TGSI_WRITEMASK_XYZW, 0); @@ -1635,6 +1949,24 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler_view[i].return_type_w); } + for (i = 0; i < ureg->nr_images; i++) { + emit_decl_image(ureg, + ureg->image[i].index, + ureg->image[i].target, + ureg->image[i].format, + ureg->image[i].wr, + ureg->image[i].raw); + } + + for (i = 0; i < ureg->nr_buffers; i++) { + emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic); + } + + for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) { + if (ureg->use_memory[i]) + emit_decl_memory(ureg, i); + } + if (ureg->const_decls.nr_constant_ranges) { for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { emit_decl_range(ureg, @@ -1738,6 +2070,16 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) { const struct tgsi_token *tokens; + switch (ureg->processor) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_TESS_EVAL: + ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER, + ureg->next_shader_processor == -1 ? + PIPE_SHADER_FRAGMENT : + ureg->next_shader_processor); + break; + } + emit_header( ureg ); emit_decls( ureg ); copy_instructions( ureg ); @@ -1777,25 +2119,23 @@ void *ureg_create_shader( struct ureg_program *ureg, { struct pipe_shader_state state; - state.tokens = ureg_finalize(ureg); + pipe_shader_state_from_tgsi(&state, ureg_finalize(ureg)); if(!state.tokens) return NULL; if (so) state.stream_output = *so; - else - memset(&state.stream_output, 0, sizeof(state.stream_output)); switch (ureg->processor) { - case TGSI_PROCESSOR_VERTEX: + case PIPE_SHADER_VERTEX: return pipe->create_vs_state(pipe, &state); - case TGSI_PROCESSOR_TESS_CTRL: + case PIPE_SHADER_TESS_CTRL: return pipe->create_tcs_state(pipe, &state); - case TGSI_PROCESSOR_TESS_EVAL: + case PIPE_SHADER_TESS_EVAL: return pipe->create_tes_state(pipe, &state); - case TGSI_PROCESSOR_GEOMETRY: + case PIPE_SHADER_GEOMETRY: return pipe->create_gs_state(pipe, &state); - case TGSI_PROCESSOR_FRAGMENT: + case PIPE_SHADER_FRAGMENT: return pipe->create_fs_state(pipe, &state); default: return NULL; @@ -1830,29 +2170,6 @@ void ureg_free_tokens( const struct tgsi_token *tokens ) } -static inline unsigned -pipe_shader_from_tgsi_processor(unsigned processor) -{ - switch (processor) { - case TGSI_PROCESSOR_VERTEX: - return PIPE_SHADER_VERTEX; - case TGSI_PROCESSOR_TESS_CTRL: - return PIPE_SHADER_TESS_CTRL; - case TGSI_PROCESSOR_TESS_EVAL: - return PIPE_SHADER_TESS_EVAL; - case TGSI_PROCESSOR_GEOMETRY: - return PIPE_SHADER_GEOMETRY; - case TGSI_PROCESSOR_FRAGMENT: - return PIPE_SHADER_FRAGMENT; - case TGSI_PROCESSOR_COMPUTE: - return PIPE_SHADER_COMPUTE; - default: - assert(0); - return PIPE_SHADER_VERTEX; - } -} - - struct ureg_program * ureg_create(unsigned processor) { @@ -1865,17 +2182,17 @@ ureg_create_with_screen(unsigned processor, struct pipe_screen *screen) { int i; struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); - if (ureg == NULL) + if (!ureg) goto no_ureg; ureg->processor = processor; ureg->supports_any_inout_decl_range = screen && - screen->get_shader_param(screen, - pipe_shader_from_tgsi_processor(processor), + screen->get_shader_param(screen, processor, PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0; + ureg->next_shader_processor = -1; - for (i = 0; i < Elements(ureg->properties); i++) + for (i = 0; i < ARRAY_SIZE(ureg->properties); i++) ureg->properties[i] = ~0; ureg->free_temps = util_bitmask_create(); @@ -1903,6 +2220,13 @@ no_ureg: } +void +ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor) +{ + ureg->next_shader_processor = processor; +} + + unsigned ureg_get_nr_outputs( const struct ureg_program *ureg ) { @@ -1916,7 +2240,7 @@ void ureg_destroy( struct ureg_program *ureg ) { unsigned i; - for (i = 0; i < Elements(ureg->domain); i++) { + for (i = 0; i < ARRAY_SIZE(ureg->domain); i++) { if (ureg->domain[i].tokens && ureg->domain[i].tokens != error_tokens) FREE(ureg->domain[i].tokens); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 0aae550d6..d3c28b33e 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -114,6 +114,8 @@ ureg_create_shader( struct ureg_program *, struct pipe_context *pipe, const struct pipe_stream_output_info *so ); +void +ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor); /* Alternately, return the built token stream and hand ownership of * that memory to the caller: @@ -171,6 +173,18 @@ ureg_property(struct ureg_program *ureg, unsigned name, unsigned value); */ struct ureg_src +ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index, + unsigned interp_mode, + unsigned cylindrical_wrap, + unsigned interp_location, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size); + +struct ureg_src ureg_DECL_fs_input_cyl_centroid(struct ureg_program *, unsigned semantic_name, unsigned semantic_index, @@ -213,6 +227,15 @@ ureg_DECL_vs_input( struct ureg_program *, unsigned index ); struct ureg_src +ureg_DECL_input_layout(struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size); + +struct ureg_src ureg_DECL_input(struct ureg_program *, unsigned semantic_name, unsigned semantic_index, @@ -221,11 +244,19 @@ ureg_DECL_input(struct ureg_program *, struct ureg_src ureg_DECL_system_value(struct ureg_program *, - unsigned index, unsigned semantic_name, unsigned semantic_index); struct ureg_dst +ureg_DECL_output_layout(struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size); + +struct ureg_dst ureg_DECL_output_masked(struct ureg_program *, unsigned semantic_name, unsigned semantic_index, @@ -270,6 +301,16 @@ ureg_DECL_immediate_int( struct ureg_program *, const int *v, unsigned nr ); +struct ureg_src +ureg_DECL_immediate_uint64( struct ureg_program *, + const uint64_t *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_immediate_int64( struct ureg_program *, + const int64_t *v, + unsigned nr ); + void ureg_DECL_constant2D(struct ureg_program *ureg, unsigned first, @@ -327,6 +368,19 @@ ureg_DECL_sampler_view(struct ureg_program *, unsigned return_type_z, unsigned return_type_w ); +struct ureg_src +ureg_DECL_image(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned format, + boolean wr, + boolean raw); + +struct ureg_src +ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic); + +struct ureg_src +ureg_DECL_memory(struct ureg_program *ureg, unsigned memory_type); static inline struct ureg_src ureg_imm4f( struct ureg_program *ureg, @@ -522,6 +576,16 @@ ureg_label_insn(struct ureg_program *ureg, unsigned nr_src, unsigned *label); +void +ureg_memory_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_dst *dst, + unsigned nr_dst, + const struct ureg_src *src, + unsigned nr_src, + unsigned qualifier, + unsigned texture, + unsigned format); /*********************************************************************** * Internal instruction helpers, don't call these directly: @@ -559,6 +623,13 @@ void ureg_emit_texture_offset(struct ureg_program *ureg, const struct tgsi_texture_offset *offset); +void +ureg_emit_memory(struct ureg_program *ureg, + unsigned insn_token, + unsigned qualifier, + unsigned texture, + unsigned format); + void ureg_emit_dst( struct ureg_program *ureg, struct ureg_dst dst ); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c index e5b8427a0..fbe29626a 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -29,6 +29,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_util.h" +#include "tgsi_exec.h" union pointer_hack { @@ -53,17 +54,17 @@ tgsi_util_get_src_register_swizzle( const struct tgsi_src_register *reg, unsigned component ) { - switch( component ) { - case 0: + switch (component) { + case TGSI_CHAN_X: return reg->SwizzleX; - case 1: + case TGSI_CHAN_Y: return reg->SwizzleY; - case 2: + case TGSI_CHAN_Z: return reg->SwizzleZ; - case 3: + case TGSI_CHAN_W: return reg->SwizzleW; default: - assert( 0 ); + assert(0); } return 0; } @@ -374,10 +375,8 @@ tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg) * sample index. */ int -tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample) +tgsi_util_get_texture_coord_dim(unsigned tgsi_tex) { - int dim; - /* * Depending on the texture target, (src0.xyzw, src1.x) is interpreted * differently: @@ -406,8 +405,7 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample) case TGSI_TEXTURE_BUFFER: case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: - dim = 1; - break; + return 1; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_1D_ARRAY: @@ -415,50 +413,64 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample) case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_SHADOW1D_ARRAY: case TGSI_TEXTURE_2D_MSAA: - dim = 2; - break; + return 2; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOWCUBE: case TGSI_TEXTURE_SHADOW2D_ARRAY: case TGSI_TEXTURE_2D_ARRAY_MSAA: - dim = 3; - break; + return 3; case TGSI_TEXTURE_CUBE_ARRAY: case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - dim = 4; - break; + return 4; default: assert(!"unknown texture target"); - dim = 0; - break; + return 0; } +} - if (shadow_or_sample) { - switch (tgsi_tex) { - case TGSI_TEXTURE_SHADOW1D: - /* there is a gap */ - *shadow_or_sample = 2; - break; - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_SHADOWCUBE: - case TGSI_TEXTURE_SHADOW1D_ARRAY: - case TGSI_TEXTURE_SHADOW2D_ARRAY: - case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - *shadow_or_sample = dim; - break; - case TGSI_TEXTURE_2D_MSAA: - case TGSI_TEXTURE_2D_ARRAY_MSAA: - *shadow_or_sample = 3; - break; - default: - /* no shadow nor sample */ - *shadow_or_sample = -1; - break; - } + +/** + * Given a TGSI_TEXTURE_x target, return the src register index for the + * shadow reference coordinate. + */ +int +tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex) +{ + switch (tgsi_tex) { + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + return 2; + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_2D_MSAA: + case TGSI_TEXTURE_2D_ARRAY_MSAA: + return 3; + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + return 4; + default: + /* no shadow nor sample */ + return -1; } +} + - return dim; +boolean +tgsi_is_shadow_target(unsigned target) +{ + switch (target) { + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + return TRUE; + default: + return FALSE; + } } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h index deb1ecc66..83a930b69 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -32,6 +32,8 @@ extern "C" { #endif +#include "pipe/p_shader_tokens.h" + struct tgsi_src_register; struct tgsi_full_src_register; struct tgsi_full_instruction; @@ -80,7 +82,21 @@ struct tgsi_src_register tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg); int -tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample); +tgsi_util_get_texture_coord_dim(unsigned tgsi_tex); + +int +tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex); + +boolean +tgsi_is_shadow_target(unsigned target); + + +static inline boolean +tgsi_is_msaa_target(unsigned target) +{ + return (target == TGSI_TEXTURE_2D_MSAA || + target == TGSI_TEXTURE_2D_ARRAY_MSAA); +} #if defined __cplusplus } |