diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2015-11-22 02:46:45 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2015-11-22 02:46:45 +0000 |
commit | 3e40341f9dcd7c1bbc9afb8ddb812304820396cf (patch) | |
tree | 274b3f522afe1da16ab2b5347758c908bc23fac4 /lib/mesa/src/gallium/auxiliary/tgsi | |
parent | 7b644ad52b574bec410d557155d666ac17fdf51a (diff) |
import Mesa 11.0.6
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/tgsi')
21 files changed, 1574 insertions, 4219 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c index 39c20b5e8..fdb7febf7 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -110,8 +110,6 @@ tgsi_default_declaration( void ) declaration.Invariant = 0; declaration.Local = 0; declaration.Array = 0; - declaration.Atomic = 0; - declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL; declaration.Padding = 0; return declaration; @@ -127,8 +125,6 @@ tgsi_build_declaration( unsigned invariant, unsigned local, unsigned array, - unsigned atomic, - unsigned mem_type, struct tgsi_header *header ) { struct tgsi_declaration declaration; @@ -145,8 +141,6 @@ tgsi_build_declaration( declaration.Invariant = invariant; declaration.Local = local; declaration.Array = array; - declaration.Atomic = atomic; - declaration.MemType = mem_type; header_bodysize_grow( header ); return declaration; @@ -239,10 +233,7 @@ tgsi_default_declaration_semantic( void ) ds.Name = TGSI_SEMANTIC_POSITION; ds.Index = 0; - ds.StreamX = 0; - ds.StreamY = 0; - ds.StreamZ = 0; - ds.StreamW = 0; + ds.Padding = 0; return ds; } @@ -251,10 +242,6 @@ static struct tgsi_declaration_semantic tgsi_build_declaration_semantic( unsigned semantic_name, unsigned semantic_index, - unsigned streamx, - unsigned streamy, - unsigned streamz, - unsigned streamw, struct tgsi_declaration *declaration, struct tgsi_header *header ) { @@ -265,49 +252,43 @@ tgsi_build_declaration_semantic( ds.Name = semantic_name; ds.Index = semantic_index; - ds.StreamX = streamx; - ds.StreamY = streamy; - ds.StreamZ = streamz; - ds.StreamW = streamw; + ds.Padding = 0; declaration_grow( declaration, header ); return ds; } -static struct tgsi_declaration_image -tgsi_default_declaration_image(void) +static struct tgsi_declaration_resource +tgsi_default_declaration_resource(void) { - struct tgsi_declaration_image di; + struct tgsi_declaration_resource dr; - di.Resource = TGSI_TEXTURE_BUFFER; - di.Raw = 0; - di.Writable = 0; - di.Format = 0; - di.Padding = 0; + dr.Resource = TGSI_TEXTURE_BUFFER; + dr.Raw = 0; + dr.Writable = 0; + dr.Padding = 0; - return di; + return dr; } -static struct tgsi_declaration_image -tgsi_build_declaration_image(unsigned texture, - unsigned format, - unsigned raw, - unsigned writable, - struct tgsi_declaration *declaration, - struct tgsi_header *header) +static struct tgsi_declaration_resource +tgsi_build_declaration_resource(unsigned texture, + unsigned raw, + unsigned writable, + struct tgsi_declaration *declaration, + struct tgsi_header *header) { - struct tgsi_declaration_image di; + struct tgsi_declaration_resource dr; - di = tgsi_default_declaration_image(); - di.Resource = texture; - di.Format = format; - di.Raw = raw; - di.Writable = writable; + dr = tgsi_default_declaration_resource(); + dr.Resource = texture; + dr.Raw = raw; + dr.Writable = writable; declaration_grow(declaration, header); - return di; + return dr; } static struct tgsi_declaration_sampler_view @@ -383,7 +364,7 @@ tgsi_default_full_declaration( void ) full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); full_declaration.Interp = tgsi_default_declaration_interp(); - full_declaration.Image = tgsi_default_declaration_image(); + full_declaration.Resource = tgsi_default_declaration_resource(); full_declaration.SamplerView = tgsi_default_declaration_sampler_view(); full_declaration.Array = tgsi_default_declaration_array(); @@ -415,8 +396,6 @@ tgsi_build_full_declaration( full_decl->Declaration.Invariant, full_decl->Declaration.Local, full_decl->Declaration.Array, - full_decl->Declaration.Atomic, - full_decl->Declaration.MemType, header ); if (maxsize <= size) @@ -471,29 +450,24 @@ tgsi_build_full_declaration( *ds = tgsi_build_declaration_semantic( full_decl->Semantic.Name, full_decl->Semantic.Index, - full_decl->Semantic.StreamX, - full_decl->Semantic.StreamY, - full_decl->Semantic.StreamZ, - full_decl->Semantic.StreamW, declaration, header ); } - if (full_decl->Declaration.File == TGSI_FILE_IMAGE) { - struct tgsi_declaration_image *di; + if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) { + struct tgsi_declaration_resource *dr; if (maxsize <= size) { return 0; } - di = (struct tgsi_declaration_image *)&tokens[size]; + dr = (struct tgsi_declaration_resource *)&tokens[size]; size++; - *di = tgsi_build_declaration_image(full_decl->Image.Resource, - full_decl->Image.Format, - full_decl->Image.Raw, - full_decl->Image.Writable, - declaration, - header); + *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource, + full_decl->Resource.Raw, + full_decl->Resource.Writable, + declaration, + header); } if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { @@ -637,12 +611,12 @@ tgsi_default_instruction( void ) instruction.NrTokens = 0; instruction.Opcode = TGSI_OPCODE_MOV; instruction.Saturate = 0; + instruction.Predicate = 0; instruction.NumDstRegs = 1; instruction.NumSrcRegs = 1; instruction.Label = 0; instruction.Texture = 0; - instruction.Memory = 0; - instruction.Padding = 0; + instruction.Padding = 0; return instruction; } @@ -650,6 +624,7 @@ tgsi_default_instruction( void ) static struct tgsi_instruction tgsi_build_instruction(unsigned opcode, unsigned saturate, + unsigned predicate, unsigned num_dst_regs, unsigned num_src_regs, struct tgsi_header *header) @@ -664,6 +639,7 @@ tgsi_build_instruction(unsigned opcode, instruction = tgsi_default_instruction(); instruction.Opcode = opcode; instruction.Saturate = saturate; + instruction.Predicate = predicate; instruction.NumDstRegs = num_dst_regs; instruction.NumSrcRegs = num_src_regs; @@ -684,6 +660,47 @@ instruction_grow( header_bodysize_grow( header ); } +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void) +{ + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate.SwizzleX = TGSI_SWIZZLE_X; + instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y; + instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z; + instruction_predicate.SwizzleW = TGSI_SWIZZLE_W; + instruction_predicate.Negate = 0; + instruction_predicate.Index = 0; + instruction_predicate.Padding = 0; + + return instruction_predicate; +} + +static struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header) +{ + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate = tgsi_default_instruction_predicate(); + instruction_predicate.SwizzleX = swizzleX; + instruction_predicate.SwizzleY = swizzleY; + instruction_predicate.SwizzleZ = swizzleZ; + instruction_predicate.SwizzleW = swizzleW; + instruction_predicate.Negate = negate; + instruction_predicate.Index = index; + + instruction_grow(instruction, header); + + return instruction_predicate; +} + static struct tgsi_instruction_label tgsi_default_instruction_label( void ) { @@ -745,40 +762,6 @@ tgsi_build_instruction_texture( return instruction_texture; } -static struct tgsi_instruction_memory -tgsi_default_instruction_memory( void ) -{ - struct tgsi_instruction_memory instruction_memory; - - instruction_memory.Qualifier = 0; - instruction_memory.Texture = 0; - instruction_memory.Format = 0; - instruction_memory.Padding = 0; - - return instruction_memory; -} - -static struct tgsi_instruction_memory -tgsi_build_instruction_memory( - unsigned qualifier, - unsigned texture, - unsigned format, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_memory instruction_memory; - - instruction_memory.Qualifier = qualifier; - instruction_memory.Texture = texture; - instruction_memory.Format = format; - instruction_memory.Padding = 0; - instruction->Memory = 1; - - instruction_grow( instruction, header ); - - return instruction_memory; -} static struct tgsi_texture_offset tgsi_default_texture_offset( void ) @@ -1022,9 +1005,9 @@ tgsi_default_full_instruction( void ) unsigned i; full_instruction.Instruction = tgsi_default_instruction(); + full_instruction.Predicate = tgsi_default_instruction_predicate(); full_instruction.Label = tgsi_default_instruction_label(); full_instruction.Texture = tgsi_default_instruction_texture(); - full_instruction.Memory = tgsi_default_instruction_memory(); for( i = 0; i < TGSI_FULL_MAX_TEX_OFFSETS; i++ ) { full_instruction.TexOffsets[i] = tgsi_default_texture_offset(); } @@ -1057,11 +1040,32 @@ tgsi_build_full_instruction( *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode, full_inst->Instruction.Saturate, + full_inst->Instruction.Predicate, full_inst->Instruction.NumDstRegs, full_inst->Instruction.NumSrcRegs, header); prev_token = (struct tgsi_token *) instruction; + if (full_inst->Instruction.Predicate) { + struct tgsi_instruction_predicate *instruction_predicate; + + if (maxsize <= size) { + return 0; + } + instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size]; + size++; + + *instruction_predicate = + tgsi_build_instruction_predicate(full_inst->Predicate.Index, + full_inst->Predicate.Negate, + full_inst->Predicate.SwizzleX, + full_inst->Predicate.SwizzleY, + full_inst->Predicate.SwizzleZ, + full_inst->Predicate.SwizzleW, + instruction, + header); + } + if (full_inst->Instruction.Label) { struct tgsi_instruction_label *instruction_label; @@ -1115,26 +1119,6 @@ tgsi_build_full_instruction( prev_token = (struct tgsi_token *) texture_offset; } } - - if (full_inst->Instruction.Memory) { - struct tgsi_instruction_memory *instruction_memory; - - if( maxsize <= size ) - return 0; - instruction_memory = - (struct tgsi_instruction_memory *) &tokens[size]; - size++; - - *instruction_memory = tgsi_build_instruction_memory( - full_inst->Memory.Qualifier, - full_inst->Memory.Texture, - full_inst->Memory.Format, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_memory; - } - for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { const struct tgsi_full_dst_register *reg = &full_inst->Dst[i]; struct tgsi_dst_register *dst_register; @@ -1387,18 +1371,3 @@ tgsi_build_full_property( return size; } - -struct tgsi_full_src_register -tgsi_full_src_register_from_dst(const struct tgsi_full_dst_register *dst) -{ - struct tgsi_full_src_register src; - src.Register = tgsi_default_src_register(); - src.Register.File = dst->Register.File; - src.Register.Indirect = dst->Register.Indirect; - src.Register.Dimension = dst->Register.Dimension; - src.Register.Index = dst->Register.Index; - src.Indirect = dst->Indirect; - src.Dimension = dst->Dimension; - src.DimIndirect = dst->DimIndirect; - return src; -} diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h index 53f31932c..c5127e185 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -30,8 +30,6 @@ struct tgsi_token; -struct tgsi_full_dst_register; -struct tgsi_full_src_register; #if defined __cplusplus @@ -110,8 +108,8 @@ tgsi_build_full_instruction( struct tgsi_header *header, unsigned maxsize ); -struct tgsi_full_src_register -tgsi_full_src_register_from_dst(const struct tgsi_full_dst_register *dst); +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void); #if defined __cplusplus } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c index f6eba7424..8ceb5b475 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -25,13 +25,10 @@ * **************************************************************************/ -#include <inttypes.h> - #include "util/u_debug.h" #include "util/u_string.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_math.h" #include "tgsi_dump.h" #include "tgsi_info.h" #include "tgsi_iterate.h" @@ -46,8 +43,6 @@ struct dump_ctx { struct tgsi_iterate_context iter; - boolean dump_float_as_hex; - uint instno; uint immno; int indent; @@ -89,13 +84,10 @@ dump_enum( #define CHR(C) ctx->dump_printf( ctx, "%c", C ) #define UIX(I) ctx->dump_printf( ctx, "0x%x", I ) #define UID(I) ctx->dump_printf( ctx, "%u", I ) -#define SI64D(I) ctx->dump_printf( ctx, "%"PRId64, I ) -#define UI64D(I) ctx->dump_printf( ctx, "%"PRIu64, I ) #define INSTID(I) ctx->dump_printf( ctx, "% 3u", I ) #define SID(I) ctx->dump_printf( ctx, "%d", I ) #define FLT(F) ctx->dump_printf( ctx, "%10.4f", F ) #define DBL(D) ctx->dump_printf( ctx, "%10.8f", D ) -#define HFLT(F) ctx->dump_printf( ctx, "0x%08x", fui((F)) ) #define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) const char * @@ -258,25 +250,8 @@ dump_imm_data(struct tgsi_iterate_context *iter, i++; break; } - case TGSI_IMM_INT64: { - union di d; - d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32; - SI64D( d.i ); - i++; - break; - } - case TGSI_IMM_UINT64: { - union di d; - d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32; - UI64D( d.ui ); - i++; - break; - } case TGSI_IMM_FLOAT32: - if (ctx->dump_float_as_hex) - HFLT( data[i].Float ); - else - FLT( data[i].Float ); + FLT( data[i].Float ); break; case TGSI_IMM_UINT32: UID(data[i].Uint); @@ -313,17 +288,17 @@ iter_declaration( * two dimensional */ if (decl->Declaration.File == TGSI_FILE_INPUT && - (iter->processor.Processor == PIPE_SHADER_GEOMETRY || + (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY || (!patch && - (iter->processor.Processor == PIPE_SHADER_TESS_CTRL || - iter->processor.Processor == PIPE_SHADER_TESS_EVAL)))) { + (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL || + iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL)))) { TXT("[]"); } /* all non-patch tess ctrl shader outputs are two dimensional */ if (decl->Declaration.File == TGSI_FILE_OUTPUT && !patch && - iter->processor.Processor == PIPE_SHADER_TESS_CTRL) { + iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL) { TXT("[]"); } @@ -364,47 +339,17 @@ iter_declaration( UID( decl->Semantic.Index ); CHR( ']' ); } - - if (decl->Semantic.StreamX != 0 || decl->Semantic.StreamY != 0 || - decl->Semantic.StreamZ != 0 || decl->Semantic.StreamW != 0) { - TXT(", STREAM("); - UID(decl->Semantic.StreamX); - TXT(", "); - UID(decl->Semantic.StreamY); - TXT(", "); - UID(decl->Semantic.StreamZ); - TXT(", "); - UID(decl->Semantic.StreamW); - CHR(')'); - } } - if (decl->Declaration.File == TGSI_FILE_IMAGE) { + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { TXT(", "); - ENM(decl->Image.Resource, tgsi_texture_names); - TXT(", "); - TXT(util_format_name(decl->Image.Format)); - if (decl->Image.Writable) + ENM(decl->Resource.Resource, tgsi_texture_names); + if (decl->Resource.Writable) TXT(", WR"); - if (decl->Image.Raw) + if (decl->Resource.Raw) TXT(", RAW"); } - if (decl->Declaration.File == TGSI_FILE_BUFFER) { - if (decl->Declaration.Atomic) - TXT(", ATOMIC"); - } - - if (decl->Declaration.File == TGSI_FILE_MEMORY) { - switch (decl->Declaration.MemType) { - /* Note: ,GLOBAL is optional / the default */ - case TGSI_MEMORY_TYPE_GLOBAL: TXT(", GLOBAL"); break; - case TGSI_MEMORY_TYPE_SHARED: TXT(", SHARED"); break; - case TGSI_MEMORY_TYPE_PRIVATE: TXT(", PRIVATE"); break; - case TGSI_MEMORY_TYPE_INPUT: TXT(", INPUT"); break; - } - } - if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { TXT(", "); ENM(decl->SamplerView.Resource, tgsi_texture_names); @@ -425,7 +370,7 @@ iter_declaration( } if (decl->Declaration.Interpolate) { - if (iter->processor.Processor == PIPE_SHADER_FRAGMENT && + if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && decl->Declaration.File == TGSI_FILE_INPUT) { TXT( ", " ); @@ -468,7 +413,6 @@ tgsi_dump_declaration( const struct tgsi_full_declaration *decl ) { struct dump_ctx ctx; - memset(&ctx, 0, sizeof(ctx)); ctx.dump_printf = dump_ctx_printf; @@ -501,9 +445,6 @@ iter_property( case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: ENM(prop->u[i].Data, tgsi_fs_coord_pixel_center_names); break; - case TGSI_PROPERTY_NEXT_SHADER: - ENM(prop->u[i].Data, tgsi_processor_type_names); - break; default: SID( prop->u[i].Data ); break; @@ -520,7 +461,6 @@ void tgsi_dump_property( const struct tgsi_full_property *prop ) { struct dump_ctx ctx; - memset(&ctx, 0, sizeof(ctx)); ctx.dump_printf = dump_ctx_printf; @@ -552,7 +492,6 @@ tgsi_dump_immediate( const struct tgsi_full_immediate *imm ) { struct dump_ctx ctx; - memset(&ctx, 0, sizeof(ctx)); ctx.dump_printf = dump_ctx_printf; @@ -578,6 +517,30 @@ iter_instruction( TXT( " " ); ctx->indent += info->post_indent; + if (inst->Instruction.Predicate) { + CHR( '(' ); + + if (inst->Predicate.Negate) + CHR( '!' ); + + TXT( "PRED[" ); + SID( inst->Predicate.Index ); + CHR( ']' ); + + if (inst->Predicate.SwizzleX != TGSI_SWIZZLE_X || + inst->Predicate.SwizzleY != TGSI_SWIZZLE_Y || + inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z || + inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) { + CHR( '.' ); + ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names ); + } + + TXT( ") " ); + } + TXT( info->mnemonic ); if (inst->Instruction.Saturate) { @@ -647,37 +610,17 @@ iter_instruction( } } - if (inst->Instruction.Memory) { - uint32_t qualifier = inst->Memory.Qualifier; - while (qualifier) { - int bit = ffs(qualifier) - 1; - qualifier &= ~(1U << bit); - TXT(", "); - ENM(bit, tgsi_memory_names); - } - if (inst->Memory.Texture) { - TXT( ", " ); - ENM( inst->Memory.Texture, tgsi_texture_names ); - } - if (inst->Memory.Format) { - TXT( ", " ); - TXT( util_format_name(inst->Memory.Format) ); - } - } - - if (inst->Instruction.Label) { - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_IF: - case TGSI_OPCODE_UIF: - case TGSI_OPCODE_ELSE: - case TGSI_OPCODE_BGNLOOP: - case TGSI_OPCODE_ENDLOOP: - case TGSI_OPCODE_CAL: - case TGSI_OPCODE_BGNSUB: - TXT( " :" ); - UID( inst->Label.Label ); - break; - } + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BGNSUB: + TXT( " :" ); + UID( inst->Label.Label ); + break; } /* update indentation */ @@ -699,14 +642,12 @@ tgsi_dump_instruction( uint instno ) { struct dump_ctx ctx; - memset(&ctx, 0, sizeof(ctx)); ctx.instno = instno; ctx.immno = instno; ctx.indent = 0; ctx.dump_printf = dump_ctx_printf; ctx.indentation = 0; - ctx.file = NULL; iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst ); } @@ -721,30 +662,23 @@ prolog( return TRUE; } -static void -init_dump_ctx(struct dump_ctx *ctx, uint flags) -{ - memset(ctx, 0, sizeof(*ctx)); - - ctx->iter.prolog = prolog; - ctx->iter.iterate_instruction = iter_instruction; - ctx->iter.iterate_declaration = iter_declaration; - ctx->iter.iterate_immediate = iter_immediate; - ctx->iter.iterate_property = iter_property; - - if (flags & TGSI_DUMP_FLOAT_AS_HEX) - ctx->dump_float_as_hex = TRUE; -} - void tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file) { struct dump_ctx ctx; - memset(&ctx, 0, sizeof(ctx)); - init_dump_ctx(&ctx, flags); + ctx.iter.prolog = prolog; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.iterate_property = iter_property; + ctx.iter.epilog = NULL; + ctx.instno = 0; + ctx.immno = 0; + ctx.indent = 0; ctx.dump_printf = dump_ctx_printf; + ctx.indentation = 0; ctx.file = file; tgsi_iterate_shader( tokens, &ctx.iter ); @@ -762,7 +696,6 @@ struct str_dump_ctx char *str; char *ptr; int left; - bool nospace; }; static void @@ -770,7 +703,7 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) { struct str_dump_ctx *sctx = (struct str_dump_ctx *)ctx; - if (!sctx->nospace) { + if(sctx->left > 1) { int written; va_list ap; va_start(ap, format); @@ -781,17 +714,14 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) * vsnprintf: */ if (written > 0) { - if (written >= sctx->left) { - sctx->nospace = true; - written = sctx->left; - } + written = MIN2(sctx->left, written); sctx->ptr += written; sctx->left -= written; } } } -bool +void tgsi_dump_str( const struct tgsi_token *tokens, uint flags, @@ -799,21 +729,27 @@ tgsi_dump_str( size_t size) { struct str_dump_ctx ctx; - memset(&ctx, 0, sizeof(ctx)); - init_dump_ctx(&ctx.base, flags); + ctx.base.iter.prolog = prolog; + ctx.base.iter.iterate_instruction = iter_instruction; + ctx.base.iter.iterate_declaration = iter_declaration; + ctx.base.iter.iterate_immediate = iter_immediate; + ctx.base.iter.iterate_property = iter_property; + ctx.base.iter.epilog = NULL; + ctx.base.instno = 0; + ctx.base.immno = 0; + ctx.base.indent = 0; ctx.base.dump_printf = &str_dump_ctx_printf; + ctx.base.indentation = 0; + ctx.base.file = NULL; ctx.str = str; ctx.str[0] = 0; ctx.ptr = str; ctx.left = (int)size; - ctx.nospace = false; tgsi_iterate_shader( tokens, &ctx.base.iter ); - - return !ctx.nospace; } void @@ -824,7 +760,6 @@ tgsi_dump_instruction_str( size_t size) { struct str_dump_ctx ctx; - memset(&ctx, 0, sizeof(ctx)); ctx.base.instno = instno; ctx.base.immno = instno; @@ -837,7 +772,6 @@ tgsi_dump_instruction_str( ctx.str[0] = 0; ctx.ptr = str; ctx.left = (int)size; - ctx.nospace = false; iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst ); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c index c41954cbf..75cd0d53c 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -58,10 +58,8 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" -#include "util/u_half.h" #include "util/u_memory.h" #include "util/u_math.h" -#include "util/rounding.h" #define DEBUG_EXECUTION 0 @@ -77,8 +75,6 @@ union tgsi_double_channel { double d[TGSI_QUAD_SIZE]; unsigned u[TGSI_QUAD_SIZE][2]; - uint64_t u64[TGSI_QUAD_SIZE]; - int64_t i64[TGSI_QUAD_SIZE]; }; struct tgsi_double_vector { @@ -127,6 +123,18 @@ micro_ceil(union tgsi_exec_channel *dst, } static void +micro_clamp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; +} + +static void micro_cmp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, @@ -198,16 +206,6 @@ micro_dadd(union tgsi_double_channel *dst, } static void -micro_ddiv(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->d[0] = src[0].d[0] / src[1].d[0]; - dst->d[1] = src[0].d[1] / src[1].d[1]; - dst->d[2] = src[0].d[2] / src[1].d[2]; - dst->d[3] = src[0].d[3] / src[1].d[3]; -} - -static void micro_ddx(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -544,10 +542,10 @@ static void micro_rnd(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { - dst->f[0] = _mesa_roundevenf(src->f[0]); - dst->f[1] = _mesa_roundevenf(src->f[1]); - dst->f[2] = _mesa_roundevenf(src->f[2]); - dst->f[3] = _mesa_roundevenf(src->f[3]); + dst->f[0] = floorf(src->f[0] + 0.5f); + dst->f[1] = floorf(src->f[1] + 0.5f); + dst->f[2] = floorf(src->f[2] + 0.5f); + dst->f[3] = floorf(src->f[3] + 0.5f); } static void @@ -676,10 +674,10 @@ static void micro_trunc(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { - dst->f[0] = truncf(src->f[0]); - dst->f[1] = truncf(src->f[1]); - dst->f[2] = truncf(src->f[2]); - dst->f[3] = truncf(src->f[3]); + dst->f[0] = (float)(int)src->f[0]; + dst->f[1] = (float)(int)src->f[1]; + dst->f[2] = (float)(int)src->f[2]; + dst->f[3] = (float)(int)src->f[3]; } static void @@ -692,251 +690,11 @@ micro_u2d(union tgsi_double_channel *dst, dst->d[3] = (double)src->u[3]; } -static void -micro_i64abs(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; - dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; - dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; - dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; -} - -static void -micro_i64sgn(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; - dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; - dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; - dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; -} - -static void -micro_i64neg(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->i64[0] = -src->i64[0]; - dst->i64[1] = -src->i64[1]; - dst->i64[2] = -src->i64[2]; - dst->i64[3] = -src->i64[3]; -} - -static void -micro_u64seq(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; - dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; - dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; - dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; -} - -static void -micro_u64sne(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; - dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; - dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; - dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; -} - -static void -micro_i64slt(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; - dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; - dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; - dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; -} - -static void -micro_u64slt(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; - dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; - dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; - dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; -} - -static void -micro_i64sge(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; - dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; - dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; - dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; -} - -static void -micro_u64sge(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; - dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; - dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; - dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; -} - -static void -micro_u64max(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; - dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; - dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; - dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; -} - -static void -micro_i64max(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; - dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; - dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; - dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; -} - -static void -micro_u64min(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; - dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; - dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; - dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; -} - -static void -micro_i64min(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; - dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; - dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; - dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; -} - -static void -micro_u64add(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u64[0] = src[0].u64[0] + src[1].u64[0]; - dst->u64[1] = src[0].u64[1] + src[1].u64[1]; - dst->u64[2] = src[0].u64[2] + src[1].u64[2]; - dst->u64[3] = src[0].u64[3] + src[1].u64[3]; -} - -static void -micro_u64mul(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u64[0] = src[0].u64[0] * src[1].u64[0]; - dst->u64[1] = src[0].u64[1] * src[1].u64[1]; - dst->u64[2] = src[0].u64[2] * src[1].u64[2]; - dst->u64[3] = src[0].u64[3] * src[1].u64[3]; -} - -static void -micro_u64div(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u64[0] = src[0].u64[0] / src[1].u64[0]; - dst->u64[1] = src[0].u64[1] / src[1].u64[1]; - dst->u64[2] = src[0].u64[2] / src[1].u64[2]; - dst->u64[3] = src[0].u64[3] / src[1].u64[3]; -} - -static void -micro_i64div(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->i64[0] = src[0].i64[0] / src[1].i64[0]; - dst->i64[1] = src[0].i64[1] / src[1].i64[1]; - dst->i64[2] = src[0].i64[2] / src[1].i64[2]; - dst->i64[3] = src[0].i64[3] / src[1].i64[3]; -} - -static void -micro_u64mod(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u64[0] = src[0].u64[0] % src[1].u64[0]; - dst->u64[1] = src[0].u64[1] % src[1].u64[1]; - dst->u64[2] = src[0].u64[2] % src[1].u64[2]; - dst->u64[3] = src[0].u64[3] % src[1].u64[3]; -} - -static void -micro_i64mod(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->i64[0] = src[0].i64[0] % src[1].i64[0]; - dst->i64[1] = src[0].i64[1] % src[1].i64[1]; - dst->i64[2] = src[0].i64[2] % src[1].i64[2]; - dst->i64[3] = src[0].i64[3] % src[1].i64[3]; -} - -static void -micro_u64shl(union tgsi_double_channel *dst, - const union tgsi_double_channel *src0, - union tgsi_exec_channel *src1) -{ - unsigned masked_count; - masked_count = src1->u[0] & 0x3f; - dst->u64[0] = src0->u64[0] << masked_count; - masked_count = src1->u[1] & 0x3f; - dst->u64[1] = src0->u64[1] << masked_count; - masked_count = src1->u[2] & 0x3f; - dst->u64[2] = src0->u64[2] << masked_count; - masked_count = src1->u[3] & 0x3f; - dst->u64[3] = src0->u64[3] << masked_count; -} - -static void -micro_i64shr(union tgsi_double_channel *dst, - const union tgsi_double_channel *src0, - union tgsi_exec_channel *src1) -{ - unsigned masked_count; - masked_count = src1->u[0] & 0x3f; - dst->i64[0] = src0->i64[0] >> masked_count; - masked_count = src1->u[1] & 0x3f; - dst->i64[1] = src0->i64[1] >> masked_count; - masked_count = src1->u[2] & 0x3f; - dst->i64[2] = src0->i64[2] >> masked_count; - masked_count = src1->u[3] & 0x3f; - dst->i64[3] = src0->i64[3] >> masked_count; -} - -static void -micro_u64shr(union tgsi_double_channel *dst, - const union tgsi_double_channel *src0, - union tgsi_exec_channel *src1) -{ - unsigned masked_count; - masked_count = src1->u[0] & 0x3f; - dst->u64[0] = src0->u64[0] >> masked_count; - masked_count = src1->u[1] & 0x3f; - dst->u64[1] = src0->u64[1] >> masked_count; - masked_count = src1->u[2] & 0x3f; - dst->u64[2] = src0->u64[2] >> masked_count; - masked_count = src1->u[3] & 0x3f; - dst->u64[3] = src0->u64[3] >> masked_count; -} - enum tgsi_exec_datatype { TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT, - TGSI_EXEC_DATA_DOUBLE, - TGSI_EXEC_DATA_INT64, - TGSI_EXEC_DATA_UINT64, + TGSI_EXEC_DATA_DOUBLE }; /* @@ -1094,9 +852,7 @@ void tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, - struct tgsi_sampler *sampler, - struct tgsi_image *image, - struct tgsi_buffer *buffer) + struct tgsi_sampler *sampler) { uint k; struct tgsi_parse_context parse; @@ -1114,8 +870,6 @@ tgsi_exec_machine_bind_shader( mach->Tokens = tokens; mach->Sampler = sampler; - mach->Image = image; - mach->Buffer = buffer; if (!tokens) { /* unbind and free all */ @@ -1136,13 +890,11 @@ tgsi_exec_machine_bind_shader( return; } + mach->Processor = parse.FullHeader.Processor.Processor; mach->ImmLimit = 0; mach->NumOutputs = 0; - for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) - mach->SysSemanticToIndex[k] = -1; - - if (mach->ShaderType == PIPE_SHADER_GEOMETRY && + if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && !mach->UsedGeometryShader) { struct tgsi_exec_vector *inputs; struct tgsi_exec_vector *outputs; @@ -1208,11 +960,6 @@ tgsi_exec_machine_bind_shader( ++mach->NumOutputs; } } - else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { - const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; - } - memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -1253,7 +1000,7 @@ tgsi_exec_machine_bind_shader( break; case TGSI_TOKEN_TYPE_PROPERTY: - if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { + if (mach->Processor == TGSI_PROCESSOR_GEOMETRY) { if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; } @@ -1277,7 +1024,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine * -tgsi_exec_machine_create(enum pipe_shader_type shader_type) +tgsi_exec_machine_create( void ) { struct tgsi_exec_machine *mach; uint i; @@ -1288,16 +1035,14 @@ tgsi_exec_machine_create(enum pipe_shader_type shader_type) memset(mach, 0, sizeof(*mach)); - mach->ShaderType = shader_type; mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; + mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; - if (shader_type != PIPE_SHADER_COMPUTE) { - mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); - mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); - if (!mach->Inputs || !mach->Outputs) - goto fail; - } + mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); + mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); + if (!mach->Inputs || !mach->Outputs) + goto fail; /* Setup constants needed by the SSE2 executor. */ for( i = 0; i < 4; i++ ) { @@ -1510,7 +1255,7 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, case TGSI_FILE_INPUT: for (i = 0; i < TGSI_QUAD_SIZE; i++) { /* - if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], index2D->i[i], index->i[i]); @@ -1527,7 +1272,7 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, * gl_FragCoord, for example, in a sys value register. */ for (i = 0; i < TGSI_QUAD_SIZE; i++) { - chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; + chan->u[i] = mach->SystemValue[index->i[i]].u[i]; } break; @@ -1558,6 +1303,15 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_PREDICATE: + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); + assert(index2D->i[i] == 0); + + chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; + } + break; + case TGSI_FILE_OUTPUT: /* vertex/fragment output vars can be read too */ for (i = 0; i < TGSI_QUAD_SIZE; i++) { @@ -1760,9 +1514,11 @@ store_dest_dstret(struct tgsi_exec_machine *mach, uint chan_index, enum tgsi_exec_datatype dst_datatype) { + uint i; static union tgsi_exec_channel null; union tgsi_exec_channel *dst; union tgsi_exec_channel index2D; + uint execmask = mach->ExecMask; int offset = 0; /* indirection offset */ int index; @@ -1893,7 +1649,7 @@ store_dest_dstret(struct tgsi_exec_machine *mach, debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], reg->Register.Index); - if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) @@ -1914,11 +1670,58 @@ store_dest_dstret(struct tgsi_exec_machine *mach, dst = &mach->Addrs[index].xyzw[chan_index]; break; + case TGSI_FILE_PREDICATE: + index = reg->Register.Index; + assert(index < TGSI_EXEC_NUM_PREDS); + dst = &mach->Predicates[index].xyzw[chan_index]; + break; + default: assert( 0 ); return NULL; } + if (inst->Instruction.Predicate) { + uint swizzle; + union tgsi_exec_channel *pred; + + switch (chan_index) { + case TGSI_CHAN_X: + swizzle = inst->Predicate.SwizzleX; + break; + case TGSI_CHAN_Y: + swizzle = inst->Predicate.SwizzleY; + break; + case TGSI_CHAN_Z: + swizzle = inst->Predicate.SwizzleZ; + break; + case TGSI_CHAN_W: + swizzle = inst->Predicate.SwizzleW; + break; + default: + assert(0); + return NULL; + } + + assert(inst->Predicate.Index == 0); + + pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; + + if (inst->Predicate.Negate) { + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + if (pred->u[i]) { + execmask &= ~(1 << i); + } + } + } else { + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + if (!pred->u[i]) { + execmask &= ~(1 << i); + } + } + } + } + return dst; } @@ -2080,7 +1883,7 @@ emit_primitive(struct tgsi_exec_machine *mach) static void conditional_emit_primitive(struct tgsi_exec_machine *mach) { - if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { int emitted_verts = mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; if (emitted_verts) { @@ -2190,12 +1993,12 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, uint sampler) { - uint unit = 0; - int i; + uint unit; + if (inst->Src[sampler].Register.Indirect) { const struct tgsi_full_src_register *reg = &inst->Src[sampler]; union tgsi_exec_channel indir_index, index2; - const uint execmask = mach->ExecMask; + index2.i[0] = index2.i[1] = index2.i[2] = @@ -2208,13 +2011,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach, &index2, &ZeroVec, &indir_index); - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - if (execmask & (1 << i)) { - unit = inst->Src[sampler].Register.Index + indir_index.i[i]; - break; - } - } - + unit = inst->Src[sampler].Register.Index + indir_index.i[0]; } else { unit = inst->Src[sampler].Register.Index; } @@ -2224,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach, /* * execute a texture instruction. * - * modifier is used to control the channel routing for the + * modifier is used to control the channel routing for the\ * instruction variants like proj, lod, and texture with lod bias. * sampler indicates which src register the sampler is contained in. */ @@ -2235,7 +2032,7 @@ exec_tex(struct tgsi_exec_machine *mach, { const union tgsi_exec_channel *args[5], *proj = NULL; union tgsi_exec_channel r[5]; - enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; + enum tgsi_sampler_control control = tgsi_sampler_lod_none; uint chan; uint unit; int8_t offsets[3]; @@ -2248,16 +2045,15 @@ exec_tex(struct tgsi_exec_machine *mach, assert(modifier != TEX_MODIFIER_LEVEL_ZERO); assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); - dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); - shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref); assert(dim <= 4); if (shadow_ref >= 0) - assert(shadow_ref >= dim && shadow_ref < ARRAY_SIZE(args)); + assert(shadow_ref >= dim && shadow_ref < Elements(args)); /* fetch modifier to the last argument */ if (modifier != TEX_MODIFIER_NONE) { - const int last = ARRAY_SIZE(args) - 1; + const int last = Elements(args) - 1; /* fetch modifier from src0.w or src1.x */ if (sampler == 1) { @@ -2282,14 +2078,14 @@ exec_tex(struct tgsi_exec_machine *mach, args[i] = &ZeroVec; if (modifier == TEX_MODIFIER_EXPLICIT_LOD) - control = TGSI_SAMPLER_LOD_EXPLICIT; + control = tgsi_sampler_lod_explicit; else if (modifier == TEX_MODIFIER_LOD_BIAS) - control = TGSI_SAMPLER_LOD_BIAS; + control = tgsi_sampler_lod_bias; else if (modifier == TEX_MODIFIER_GATHER) - control = TGSI_SAMPLER_GATHER; + control = tgsi_sampler_gather; } else { - for (i = dim; i < ARRAY_SIZE(args); i++) + for (i = dim; i < Elements(args); i++) args[i] = &ZeroVec; } @@ -2336,46 +2132,6 @@ exec_tex(struct tgsi_exec_machine *mach, } } -static void -exec_lodq(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint unit; - int dim; - int i; - union tgsi_exec_channel coords[4]; - const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; - union tgsi_exec_channel r[2]; - - unit = fetch_sampler_unit(mach, inst, 1); - dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); - assert(dim <= ARRAY_SIZE(coords)); - /* fetch coordinates */ - for (i = 0; i < dim; i++) { - FETCH(&coords[i], 0, TGSI_CHAN_X + i); - args[i] = &coords[i]; - } - for (i = dim; i < ARRAY_SIZE(coords); i++) { - args[i] = &ZeroVec; - } - mach->Sampler->query_lod(mach->Sampler, unit, unit, - args[0]->f, - args[1]->f, - args[2]->f, - args[3]->f, - TGSI_SAMPLER_LOD_NONE, - r[0].f, - r[1].f); - - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, - TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, - TGSI_EXEC_DATA_FLOAT); - } -} static void exec_txd(struct tgsi_exec_machine *mach, @@ -2399,7 +2155,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, + derivs, offsets, tgsi_sampler_derivs_explicit, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2415,7 +2171,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, + derivs, offsets, tgsi_sampler_derivs_explicit, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2429,7 +2185,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, + derivs, offsets, tgsi_sampler_derivs_explicit, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2449,7 +2205,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ - derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, + derivs, offsets, tgsi_sampler_derivs_explicit, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2469,7 +2225,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ - derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, + derivs, offsets, tgsi_sampler_derivs_explicit, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2503,8 +2259,7 @@ exec_txf(struct tgsi_exec_machine *mach, IFETCH(&r[3], 0, TGSI_CHAN_W); - if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || - inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { + if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { target = mach->SamplerViews[unit].Resource; } else { @@ -2546,8 +2301,7 @@ exec_txf(struct tgsi_exec_machine *mach, r[3].f[j] = rgba[3][j]; } - if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || - inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { + if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { unsigned char swizzles[4]; swizzles[0] = inst->Src[1].Register.SwizzleX; swizzles[1] = inst->Src[1].Register.SwizzleY; @@ -2610,7 +2364,7 @@ exec_sample(struct tgsi_exec_machine *mach, const uint sampler_unit = inst->Src[2].Register.Index; union tgsi_exec_channel r[5], c1; const union tgsi_exec_channel *lod = &ZeroVec; - enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; + enum tgsi_sampler_control control = tgsi_sampler_lod_none; uint chan; unsigned char swizzles[4]; int8_t offsets[3]; @@ -2624,16 +2378,16 @@ exec_sample(struct tgsi_exec_machine *mach, if (modifier == TEX_MODIFIER_LOD_BIAS) { FETCH(&c1, 3, TGSI_CHAN_X); lod = &c1; - control = TGSI_SAMPLER_LOD_BIAS; + control = tgsi_sampler_lod_bias; } else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { FETCH(&c1, 3, TGSI_CHAN_X); lod = &c1; - control = TGSI_SAMPLER_LOD_EXPLICIT; + control = tgsi_sampler_lod_explicit; } else { assert(modifier == TEX_MODIFIER_LEVEL_ZERO); - control = TGSI_SAMPLER_LOD_ZERO; + control = tgsi_sampler_lod_zero; } } @@ -2759,7 +2513,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, + derivs, offsets, tgsi_sampler_derivs_explicit, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2775,7 +2529,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ - derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, + derivs, offsets, tgsi_sampler_derivs_explicit, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2793,7 +2547,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, - derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, + derivs, offsets, tgsi_sampler_derivs_explicit, &r[0], &r[1], &r[2], &r[3]); break; @@ -2891,7 +2645,7 @@ exec_declaration(struct tgsi_exec_machine *mach, return; } - if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { if (decl->Declaration.File == TGSI_FILE_INPUT) { uint first, last, mask; @@ -2969,6 +2723,9 @@ exec_declaration(struct tgsi_exec_machine *mach, } } + if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; + } } typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, @@ -3261,85 +3018,6 @@ exec_dp2(struct tgsi_exec_machine *mach, } static void -exec_pk2h(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - unsigned chan; - union tgsi_exec_channel arg[2], dst; - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { - dst.u[chan] = util_float_to_half(arg[0].f[chan]) | - (util_float_to_half(arg[1].f[chan]) << 16); - } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); - } - } -} - -static void -exec_up2h(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - unsigned chan; - union tgsi_exec_channel arg, dst[2]; - - fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); - for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { - dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); - dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); - } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void -micro_ucmp(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2) -{ - dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; - dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; - dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; - dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; -} - -static void -exec_ucmp(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - unsigned int chan; - struct tgsi_exec_vector dst; - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - union tgsi_exec_channel src[3]; - - fetch_source(mach, &src[0], &inst->Src[0], chan, - TGSI_EXEC_DATA_UINT); - fetch_source(mach, &src[1], &inst->Src[1], chan, - TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &src[2], &inst->Src[2], chan, - TGSI_EXEC_DATA_FLOAT); - micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); - } - } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, - TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void exec_scs(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -3617,16 +3295,6 @@ exec_endswitch(struct tgsi_exec_machine *mach) typedef void (* micro_dop)(union tgsi_double_channel *dst, const union tgsi_double_channel *src); -typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, - const union tgsi_double_channel *src0, - union tgsi_exec_channel *src1); - -typedef void (* micro_dop_s)(union tgsi_double_channel *dst, - const union tgsi_exec_channel *src); - -typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, - const union tgsi_double_channel *src); - static void fetch_double_channel(struct tgsi_exec_machine *mach, union tgsi_double_channel *chan, @@ -3779,805 +3447,177 @@ exec_double_trinary(struct tgsi_exec_machine *mach, } static void -exec_dldexp(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_f2d(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_double_channel src0; - union tgsi_exec_channel src1; + union tgsi_exec_channel src; union tgsi_double_channel dst; - int wmask; - wmask = inst->Dst[0].Register.WriteMask; - if (wmask & TGSI_WRITEMASK_XY) { - fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); - micro_dldexp(&dst, &src0, &src1); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_f2d(&dst, &src); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); } - - if (wmask & TGSI_WRITEMASK_ZW) { - fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); - micro_dldexp(&dst, &src0, &src1); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_f2d(&dst, &src); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); } } static void -exec_dfracexp(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_d2f(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { union tgsi_double_channel src; - union tgsi_double_channel dst; - union tgsi_exec_channel dst_exp; - - if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) { - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - micro_dfracexp(&dst, &dst_exp, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); - store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); - } - if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) { - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_dfracexp(&dst, &dst_exp, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); - store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); + union tgsi_exec_channel dst; + int wm = inst->Dst[0].Register.WriteMask; + int i; + int bit; + for (i = 0; i < 2; i++) { + bit = ffs(wm); + if (bit) { + wm &= ~(1 << (bit - 1)); + if (i == 0) + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + else + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + micro_d2f(&dst, &src); + store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_FLOAT); + } } } static void -exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - micro_dop_sop op) +exec_i2d(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_double_channel src0; - union tgsi_exec_channel src1; + union tgsi_exec_channel src; union tgsi_double_channel dst; - int wmask; - wmask = inst->Dst[0].Register.WriteMask; - if (wmask & TGSI_WRITEMASK_XY) { - fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); - op(&dst, &src0, &src1); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); + micro_i2d(&dst, &src); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); } - - if (wmask & TGSI_WRITEMASK_ZW) { - fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); - op(&dst, &src0, &src1); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_INT); + micro_i2d(&dst, &src); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); } } -static int -get_image_coord_dim(unsigned tgsi_tex) -{ - int dim; - switch (tgsi_tex) { - case TGSI_TEXTURE_BUFFER: - case TGSI_TEXTURE_1D: - dim = 1; - break; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_1D_ARRAY: - case TGSI_TEXTURE_2D_MSAA: - dim = 2; - break; - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - case TGSI_TEXTURE_2D_ARRAY: - case TGSI_TEXTURE_2D_ARRAY_MSAA: - case TGSI_TEXTURE_CUBE_ARRAY: - dim = 3; - break; - default: - assert(!"unknown texture target"); - dim = 0; - break; - } - - return dim; -} - -static int -get_image_coord_sample(unsigned tgsi_tex) -{ - int sample = 0; - switch (tgsi_tex) { - case TGSI_TEXTURE_2D_MSAA: - sample = 3; - break; - case TGSI_TEXTURE_2D_ARRAY_MSAA: - sample = 4; - break; - default: - break; - } - return sample; -} - -static void -exec_load_img(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[4], sample_r; - uint unit; - int sample; - int i, j; - int dim; - uint chan; - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - struct tgsi_image_params params; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - - unit = fetch_sampler_unit(mach, inst, 0); - dim = get_image_coord_dim(inst->Memory.Texture); - sample = get_image_coord_sample(inst->Memory.Texture); - assert(dim <= 3); - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - params.tgsi_tex_instr = inst->Memory.Texture; - params.format = inst->Memory.Format; - - for (i = 0; i < dim; i++) { - IFETCH(&r[i], 1, TGSI_CHAN_X + i); - } - - if (sample) - IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); - - mach->Image->load(mach->Image, ¶ms, - r[0].i, r[1].i, r[2].i, sample_r.i, - rgba); - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - r[0].f[j] = rgba[0][j]; - r[1].f[j] = rgba[1][j]; - r[2].f[j] = rgba[2][j]; - r[3].f[j] = rgba[3][j]; - } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void -exec_load_buf(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[4]; - uint unit; - int j; - uint chan; - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - struct tgsi_buffer_params params; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - - unit = fetch_sampler_unit(mach, inst, 0); - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - IFETCH(&r[0], 1, TGSI_CHAN_X); - - mach->Buffer->load(mach->Buffer, ¶ms, - r[0].i, rgba); - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - r[0].f[j] = rgba[0][j]; - r[1].f[j] = rgba[1][j]; - r[2].f[j] = rgba[2][j]; - r[3].f[j] = rgba[3][j]; - } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - static void -exec_load_mem(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[4]; - uint chan; - char *ptr = mach->LocalMem; - uint32_t offset; - int j; - - IFETCH(&r[0], 1, TGSI_CHAN_X); - if (r[0].u[0] >= mach->LocalMemSize) - return; - - offset = r[0].u[0]; - ptr += offset; - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - memcpy(&r[chan].u[j], ptr + (4 * chan), 4); - } - } - } - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void -exec_load(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) - exec_load_img(mach, inst); - else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) - exec_load_buf(mach, inst); - else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) - exec_load_mem(mach, inst); -} - -static void -exec_store_img(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[3], sample_r; - union tgsi_exec_channel value[4]; - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - struct tgsi_image_params params; - int dim; - int sample; - int i, j; - uint unit; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - unit = inst->Dst[0].Register.Index; - dim = get_image_coord_dim(inst->Memory.Texture); - sample = get_image_coord_sample(inst->Memory.Texture); - assert(dim <= 3); - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - params.tgsi_tex_instr = inst->Memory.Texture; - params.format = inst->Memory.Format; - - for (i = 0; i < dim; i++) { - IFETCH(&r[i], 0, TGSI_CHAN_X + i); - } - - for (i = 0; i < 4; i++) { - FETCH(&value[i], 1, TGSI_CHAN_X + i); - } - if (sample) - IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba[0][j] = value[0].f[j]; - rgba[1][j] = value[1].f[j]; - rgba[2][j] = value[2].f[j]; - rgba[3][j] = value[3].f[j]; - } - - mach->Image->store(mach->Image, ¶ms, - r[0].i, r[1].i, r[2].i, sample_r.i, - rgba); -} - -static void -exec_store_buf(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[3]; - union tgsi_exec_channel value[4]; - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - struct tgsi_buffer_params params; - int i, j; - uint unit; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - - unit = inst->Dst[0].Register.Index; - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - params.writemask = inst->Dst[0].Register.WriteMask; - - IFETCH(&r[0], 0, TGSI_CHAN_X); - for (i = 0; i < 4; i++) { - FETCH(&value[i], 1, TGSI_CHAN_X + i); - } - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba[0][j] = value[0].f[j]; - rgba[1][j] = value[1].f[j]; - rgba[2][j] = value[2].f[j]; - rgba[3][j] = value[3].f[j]; - } - - mach->Buffer->store(mach->Buffer, ¶ms, - r[0].i, - rgba); -} - -static void -exec_store_mem(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[3]; - union tgsi_exec_channel value[4]; - uint i, chan; - char *ptr = mach->LocalMem; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - - IFETCH(&r[0], 0, TGSI_CHAN_X); - - for (i = 0; i < 4; i++) { - FETCH(&value[i], 1, TGSI_CHAN_X + i); - } - - if (r[0].u[0] >= mach->LocalMemSize) - return; - ptr += r[0].u[0]; - - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - if (execmask & (1 << i)) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - memcpy(ptr + (chan * 4), &value[chan].u[0], 4); - } - } - } - } -} - -static void -exec_store(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) - exec_store_img(mach, inst); - else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) - exec_store_buf(mach, inst); - else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) - exec_store_mem(mach, inst); -} - -static void -exec_atomop_img(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_d2i(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_exec_channel r[4], sample_r; - union tgsi_exec_channel value[4], value2[4]; - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - struct tgsi_image_params params; - int dim; - int sample; - int i, j; - uint unit, chan; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - unit = fetch_sampler_unit(mach, inst, 0); - dim = get_image_coord_dim(inst->Memory.Texture); - sample = get_image_coord_sample(inst->Memory.Texture); - assert(dim <= 3); - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - params.tgsi_tex_instr = inst->Memory.Texture; - params.format = inst->Memory.Format; - - for (i = 0; i < dim; i++) { - IFETCH(&r[i], 1, TGSI_CHAN_X + i); - } - - for (i = 0; i < 4; i++) { - FETCH(&value[i], 2, TGSI_CHAN_X + i); - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - FETCH(&value2[i], 3, TGSI_CHAN_X + i); - } - if (sample) - IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba[0][j] = value[0].f[j]; - rgba[1][j] = value[1].f[j]; - rgba[2][j] = value[2].f[j]; - rgba[3][j] = value[3].f[j]; - } - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba2[0][j] = value2[0].f[j]; - rgba2[1][j] = value2[1].f[j]; - rgba2[2][j] = value2[2].f[j]; - rgba2[3][j] = value2[3].f[j]; - } - } - - mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, - r[0].i, r[1].i, r[2].i, sample_r.i, - rgba, rgba2); - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - r[0].f[j] = rgba[0][j]; - r[1].f[j] = rgba[1][j]; - r[2].f[j] = rgba[2][j]; - r[3].f[j] = rgba[3][j]; - } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + union tgsi_double_channel src; + union tgsi_exec_channel dst; + int wm = inst->Dst[0].Register.WriteMask; + int i; + int bit; + for (i = 0; i < 2; i++) { + bit = ffs(wm); + if (bit) { + wm &= ~(1 << (bit - 1)); + if (i == 0) + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + else + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + micro_d2i(&dst, &src); + store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_INT); } } } - static void -exec_atomop_buf(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_u2d(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_exec_channel r[4]; - union tgsi_exec_channel value[4], value2[4]; - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - struct tgsi_buffer_params params; - int i, j; - uint unit, chan; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - - unit = fetch_sampler_unit(mach, inst, 0); - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - params.writemask = inst->Dst[0].Register.WriteMask; - - IFETCH(&r[0], 1, TGSI_CHAN_X); - - for (i = 0; i < 4; i++) { - FETCH(&value[i], 2, TGSI_CHAN_X + i); - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - FETCH(&value2[i], 3, TGSI_CHAN_X + i); - } - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba[0][j] = value[0].f[j]; - rgba[1][j] = value[1].f[j]; - rgba[2][j] = value[2].f[j]; - rgba[3][j] = value[3].f[j]; - } - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba2[0][j] = value2[0].f[j]; - rgba2[1][j] = value2[1].f[j]; - rgba2[2][j] = value2[2].f[j]; - rgba2[3][j] = value2[3].f[j]; - } - } - - mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode, - r[0].i, - rgba, rgba2); + union tgsi_exec_channel src; + union tgsi_double_channel dst; - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - r[0].f[j] = rgba[0][j]; - r[1].f[j] = rgba[1][j]; - r[2].f[j] = rgba[2][j]; - r[3].f[j] = rgba[3][j]; + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); + micro_u2d(&dst, &src); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_UINT); + micro_u2d(&dst, &src); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); } } static void -exec_atomop_mem(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_d2u(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_exec_channel r[4]; - union tgsi_exec_channel value[4], value2[4]; - char *ptr = mach->LocalMem; - uint32_t val; - uint chan, i; - uint32_t offset; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - IFETCH(&r[0], 1, TGSI_CHAN_X); - - if (r[0].u[0] >= mach->LocalMemSize) - return; - - offset = r[0].u[0]; - ptr += offset; - for (i = 0; i < 4; i++) { - FETCH(&value[i], 2, TGSI_CHAN_X + i); - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - FETCH(&value2[i], 3, TGSI_CHAN_X + i); - } - - memcpy(&r[0].u[0], ptr, 4); - val = r[0].u[0]; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ATOMUADD: - val += value[0].u[0]; - break; - case TGSI_OPCODE_ATOMXOR: - val ^= value[0].u[0]; - break; - case TGSI_OPCODE_ATOMOR: - val |= value[0].u[0]; - break; - case TGSI_OPCODE_ATOMAND: - val &= value[0].u[0]; - break; - case TGSI_OPCODE_ATOMUMIN: - val = MIN2(val, value[0].u[0]); - break; - case TGSI_OPCODE_ATOMUMAX: - val = MAX2(val, value[0].u[0]); - break; - case TGSI_OPCODE_ATOMIMIN: - val = MIN2(r[0].i[0], value[0].i[0]); - break; - case TGSI_OPCODE_ATOMIMAX: - val = MAX2(r[0].i[0], value[0].i[0]); - break; - case TGSI_OPCODE_ATOMXCHG: - val = value[0].i[0]; - break; - case TGSI_OPCODE_ATOMCAS: - if (val == value[0].u[0]) - val = value2[0].u[0]; - break; - default: - break; - } - for (i = 0; i < TGSI_QUAD_SIZE; i++) - if (execmask & (1 << i)) - memcpy(ptr, &val, 4); - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + union tgsi_double_channel src; + union tgsi_exec_channel dst; + int wm = inst->Dst[0].Register.WriteMask; + int i; + int bit; + for (i = 0; i < 2; i++) { + bit = ffs(wm); + if (bit) { + wm &= ~(1 << (bit - 1)); + if (i == 0) + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + else + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + micro_d2u(&dst, &src); + store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_UINT); } } } static void -exec_atomop(struct tgsi_exec_machine *mach, +exec_dldexp(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) - exec_atomop_img(mach, inst); - else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) - exec_atomop_buf(mach, inst); - else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) - exec_atomop_mem(mach, inst); -} - -static void -exec_resq_img(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - int result[4]; - union tgsi_exec_channel r[4]; - uint unit; - int i, chan, j; - struct tgsi_image_params params; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - - unit = fetch_sampler_unit(mach, inst, 0); - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - params.tgsi_tex_instr = inst->Memory.Texture; - params.format = inst->Memory.Format; - - mach->Image->get_dims(mach->Image, ¶ms, result); + union tgsi_double_channel src0; + union tgsi_exec_channel src1; + union tgsi_double_channel dst; + int wmask; - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - for (j = 0; j < 4; j++) { - r[j].i[i] = result[j]; - } + wmask = inst->Dst[0].Register.WriteMask; + if (wmask & TGSI_WRITEMASK_XY) { + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); + micro_dldexp(&dst, &src0, &src1); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, - TGSI_EXEC_DATA_INT); - } + if (wmask & TGSI_WRITEMASK_ZW) { + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); + micro_dldexp(&dst, &src0, &src1); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); } } static void -exec_resq_buf(struct tgsi_exec_machine *mach, +exec_dfracexp(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - int result; - union tgsi_exec_channel r[4]; - uint unit; - int i, chan; - struct tgsi_buffer_params params; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - - unit = fetch_sampler_unit(mach, inst, 0); - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - - mach->Buffer->get_dims(mach->Buffer, ¶ms, &result); - - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - r[0].i[i] = result; - } - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, - TGSI_EXEC_DATA_INT); - } - } -} - -static void -exec_resq(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) - exec_resq_img(mach, inst); - else - exec_resq_buf(mach, inst); -} - -static void -micro_f2u64(union tgsi_double_channel *dst, - const union tgsi_exec_channel *src) -{ - dst->u64[0] = (uint64_t)src->f[0]; - dst->u64[1] = (uint64_t)src->f[1]; - dst->u64[2] = (uint64_t)src->f[2]; - dst->u64[3] = (uint64_t)src->f[3]; -} - -static void -micro_f2i64(union tgsi_double_channel *dst, - const union tgsi_exec_channel *src) -{ - dst->i64[0] = (int64_t)src->f[0]; - dst->i64[1] = (int64_t)src->f[1]; - dst->i64[2] = (int64_t)src->f[2]; - dst->i64[3] = (int64_t)src->f[3]; -} - -static void -micro_u2i64(union tgsi_double_channel *dst, - const union tgsi_exec_channel *src) -{ - dst->u64[0] = (uint64_t)src->u[0]; - dst->u64[1] = (uint64_t)src->u[1]; - dst->u64[2] = (uint64_t)src->u[2]; - dst->u64[3] = (uint64_t)src->u[3]; -} - -static void -micro_i2i64(union tgsi_double_channel *dst, - const union tgsi_exec_channel *src) -{ - dst->i64[0] = (int64_t)src->i[0]; - dst->i64[1] = (int64_t)src->i[1]; - dst->i64[2] = (int64_t)src->i[2]; - dst->i64[3] = (int64_t)src->i[3]; -} - -static void -micro_d2u64(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->u64[0] = (uint64_t)src->d[0]; - dst->u64[1] = (uint64_t)src->d[1]; - dst->u64[2] = (uint64_t)src->d[2]; - dst->u64[3] = (uint64_t)src->d[3]; -} - -static void -micro_d2i64(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->i64[0] = (int64_t)src->d[0]; - dst->i64[1] = (int64_t)src->d[1]; - dst->i64[2] = (int64_t)src->d[2]; - dst->i64[3] = (int64_t)src->d[3]; -} - -static void -micro_u642d(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->d[0] = (double)src->u64[0]; - dst->d[1] = (double)src->u64[1]; - dst->d[2] = (double)src->u64[2]; - dst->d[3] = (double)src->u64[3]; -} - -static void -micro_i642d(union tgsi_double_channel *dst, - const union tgsi_double_channel *src) -{ - dst->d[0] = (double)src->i64[0]; - dst->d[1] = (double)src->i64[1]; - dst->d[2] = (double)src->i64[2]; - dst->d[3] = (double)src->i64[3]; -} - -static void -micro_u642f(union tgsi_exec_channel *dst, - const union tgsi_double_channel *src) -{ - dst->f[0] = (float)src->u64[0]; - dst->f[1] = (float)src->u64[1]; - dst->f[2] = (float)src->u64[2]; - dst->f[3] = (float)src->u64[3]; -} - -static void -micro_i642f(union tgsi_exec_channel *dst, - const union tgsi_double_channel *src) -{ - dst->f[0] = (float)src->i64[0]; - dst->f[1] = (float)src->i64[1]; - dst->f[2] = (float)src->i64[2]; - dst->f[3] = (float)src->i64[3]; -} - -static void -exec_t_2_64(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - micro_dop_s op, - enum tgsi_exec_datatype src_datatype) -{ - union tgsi_exec_channel src; + union tgsi_double_channel src; union tgsi_double_channel dst; + union tgsi_exec_channel dst_exp; - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); - op(&dst, &src); + if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) { + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + micro_dfracexp(&dst, &dst_exp, &src); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); + store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); } - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); - op(&dst, &src); + if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) { + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + micro_dfracexp(&dst, &dst_exp, &src); store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); + store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); } } -static void -exec_64_2_t(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - micro_sop_d op, - enum tgsi_exec_datatype dst_datatype) -{ - union tgsi_double_channel src; - union tgsi_exec_channel dst; - int wm = inst->Dst[0].Register.WriteMask; - int i; - int bit; - for (i = 0; i < 2; i++) { - bit = ffs(wm); - if (bit) { - wm &= ~(1 << (bit - 1)); - if (i == 0) - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - else - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - op(&dst, &src); - store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype); - } - } -} static void micro_i2f(union tgsi_exec_channel *dst, @@ -4978,6 +4018,18 @@ micro_uarl(union tgsi_exec_channel *dst, dst->i[3] = src->u[3]; } +static void +micro_ucmp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; + dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; + dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; + dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; +} + /** * Signed bitfield extract (i.e. sign-extend the extracted bits) */ @@ -5091,12 +4143,7 @@ micro_umsb(union tgsi_exec_channel *dst, dst->i[3] = util_last_bit(src->u[3]) - 1; } -/** - * Execute a TGSI instruction. - * Returns TRUE if a barrier instruction is hit, - * otherwise FALSE. - */ -static boolean +static void exec_instruction( struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, @@ -5175,6 +4222,10 @@ exec_instruction( exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; + case TGSI_OPCODE_SUB: + exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_LRP: exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -5191,6 +4242,10 @@ exec_instruction( exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; + case TGSI_OPCODE_CLAMP: + exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_FLR: exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -5215,6 +4270,10 @@ exec_instruction( exec_xpd(mach, inst); break; + case TGSI_OPCODE_ABS: + exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_DPH: exec_dph(mach, inst); break; @@ -5240,7 +4299,7 @@ exec_instruction( break; case TGSI_OPCODE_PK2H: - exec_pk2h(mach, inst); + assert (0); break; case TGSI_OPCODE_PK2US: @@ -5319,14 +4378,8 @@ exec_instruction( exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); break; - case TGSI_OPCODE_LODQ: - /* src[0] = texcoord */ - /* src[1] = sampler unit */ - exec_lodq(mach, inst); - break; - case TGSI_OPCODE_UP2H: - exec_up2h(mach, inst); + assert (0); break; case TGSI_OPCODE_UP2US: @@ -5402,12 +4455,8 @@ exec_instruction( /* returning from main() */ mach->CondStackTop = 0; mach->LoopStackTop = 0; - mach->ContStackTop = 0; - mach->LoopLabelStackTop = 0; - mach->SwitchStackTop = 0; - mach->BreakStackTop = 0; *pc = -1; - return FALSE; + return; } assert(mach->CallStackTop > 0); @@ -5832,7 +4881,7 @@ exec_instruction( break; case TGSI_OPCODE_SAMPLE_I_MS: - exec_txf(mach, inst); + assert(0); break; case TGSI_OPCODE_SAMPLE: @@ -5880,7 +4929,7 @@ exec_instruction( break; case TGSI_OPCODE_UCMP: - exec_ucmp(mach, inst); + exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_IABS: @@ -5939,11 +4988,11 @@ exec_instruction( break; case TGSI_OPCODE_F2D: - exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); + exec_f2d(mach, inst); break; case TGSI_OPCODE_D2F: - exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT); + exec_d2f(mach, inst); break; case TGSI_OPCODE_DABS: @@ -5958,10 +5007,6 @@ exec_instruction( exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); break; - case TGSI_OPCODE_DDIV: - exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); - break; - case TGSI_OPCODE_DMUL: exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); break; @@ -6019,184 +5064,47 @@ exec_instruction( break; case TGSI_OPCODE_I2D: - exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT); + exec_i2d(mach, inst); break; case TGSI_OPCODE_D2I: - exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT); + exec_d2i(mach, inst); break; case TGSI_OPCODE_U2D: - exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT); + exec_u2d(mach, inst); break; case TGSI_OPCODE_D2U: - exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT); - break; - - case TGSI_OPCODE_LOAD: - exec_load(mach, inst); - break; - - case TGSI_OPCODE_STORE: - exec_store(mach, inst); - break; - - case TGSI_OPCODE_ATOMUADD: - case TGSI_OPCODE_ATOMXCHG: - case TGSI_OPCODE_ATOMCAS: - case TGSI_OPCODE_ATOMAND: - case TGSI_OPCODE_ATOMOR: - case TGSI_OPCODE_ATOMXOR: - case TGSI_OPCODE_ATOMUMIN: - case TGSI_OPCODE_ATOMUMAX: - case TGSI_OPCODE_ATOMIMIN: - case TGSI_OPCODE_ATOMIMAX: - exec_atomop(mach, inst); - break; - - case TGSI_OPCODE_RESQ: - exec_resq(mach, inst); - break; - case TGSI_OPCODE_BARRIER: - case TGSI_OPCODE_MEMBAR: - return TRUE; - break; - - case TGSI_OPCODE_I64ABS: - exec_double_unary(mach, inst, micro_i64abs); - break; - - case TGSI_OPCODE_I64SSG: - exec_double_unary(mach, inst, micro_i64sgn); - break; - - case TGSI_OPCODE_I64NEG: - exec_double_unary(mach, inst, micro_i64neg); - break; - - case TGSI_OPCODE_U64SEQ: - exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); - break; - - case TGSI_OPCODE_U64SNE: - exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); - break; - - case TGSI_OPCODE_I64SLT: - exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); - break; - case TGSI_OPCODE_U64SLT: - exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); - break; - - case TGSI_OPCODE_I64SGE: - exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); - break; - case TGSI_OPCODE_U64SGE: - exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); - break; - - case TGSI_OPCODE_I64MIN: - exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); - break; - case TGSI_OPCODE_U64MIN: - exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); - break; - case TGSI_OPCODE_I64MAX: - exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); - break; - case TGSI_OPCODE_U64MAX: - exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); - break; - case TGSI_OPCODE_U64ADD: - exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); - break; - case TGSI_OPCODE_U64MUL: - exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); - break; - case TGSI_OPCODE_U64SHL: - exec_arg0_64_arg1_32(mach, inst, micro_u64shl); - break; - case TGSI_OPCODE_I64SHR: - exec_arg0_64_arg1_32(mach, inst, micro_i64shr); - break; - case TGSI_OPCODE_U64SHR: - exec_arg0_64_arg1_32(mach, inst, micro_u64shr); + exec_d2u(mach, inst); break; - case TGSI_OPCODE_U64DIV: - exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); - break; - case TGSI_OPCODE_I64DIV: - exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); - break; - case TGSI_OPCODE_U64MOD: - exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); - break; - case TGSI_OPCODE_I64MOD: - exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); - break; - - case TGSI_OPCODE_F2U64: - exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); - break; - - case TGSI_OPCODE_F2I64: - exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); - break; - - case TGSI_OPCODE_U2I64: - exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); - break; - case TGSI_OPCODE_I2I64: - exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); - break; - - case TGSI_OPCODE_D2U64: - exec_double_unary(mach, inst, micro_d2u64); - break; - - case TGSI_OPCODE_D2I64: - exec_double_unary(mach, inst, micro_d2i64); - break; - - case TGSI_OPCODE_U642F: - exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_I642F: - exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT); - break; - - case TGSI_OPCODE_U642D: - exec_double_unary(mach, inst, micro_u642d); - break; - case TGSI_OPCODE_I642D: - exec_double_unary(mach, inst, micro_i642d); - break; - default: assert( 0 ); } - return FALSE; } -static void -tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) { + uint i; + int pc = 0; uint default_mask = 0xf; mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; - if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; mach->Primitives[0] = 0; /* GS runs on a single primitive for now */ default_mask = 0x1; } - if (mach->NonHelperMask == 0) - mach->NonHelperMask = default_mask; mach->CondMask = default_mask; mach->LoopMask = default_mask; mach->ContMask = default_mask; @@ -6211,26 +5119,11 @@ tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) assert(mach->SwitchStackTop == 0); assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); -} -/** - * Run TGSI interpreter. - * \return bitmask of "alive" quad components - */ -uint -tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) -{ - uint i; - mach->pc = start_pc; - - if (!start_pc) { - tgsi_exec_machine_setup_masks(mach); - - /* execute declarations (interpolants) */ - for (i = 0; i < mach->NumDeclarations; i++) { - exec_declaration( mach, mach->Declarations+i ); - } + /* execute declarations (interpolants) */ + for (i = 0; i < mach->NumDeclarations; i++) { + exec_declaration( mach, mach->Declarations+i ); } { @@ -6239,30 +5132,23 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; uint inst = 1; - if (!start_pc) { - memset(mach->Temps, 0, sizeof(temps)); - if (mach->Outputs) - memset(mach->Outputs, 0, sizeof(outputs)); - memset(temps, 0, sizeof(temps)); - memset(outputs, 0, sizeof(outputs)); - } + memset(mach->Temps, 0, sizeof(temps)); + memset(mach->Outputs, 0, sizeof(outputs)); + memset(temps, 0, sizeof(temps)); + memset(outputs, 0, sizeof(outputs)); #endif /* execute instructions, until pc is set to -1 */ - while (mach->pc != -1) { - boolean barrier_hit; + while (pc != -1) { + #if DEBUG_EXECUTION uint i; - tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); + tgsi_dump_instruction(&mach->Instructions[pc], inst++); #endif - assert(mach->pc < (int) mach->NumInstructions); - barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); - - /* for compute shaders if we hit a barrier return now for later rescheduling */ - if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) - return 0; + assert(pc < (int) mach->NumInstructions); + exec_instruction(mach, mach->Instructions + pc, &pc); #if DEBUG_EXECUTION for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { @@ -6283,23 +5169,21 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) } } } - if (mach->Outputs) { - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { - uint j; - - memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); - debug_printf("OUT[%2u] = ", i); - for (j = 0; j < 4; j++) { - if (j > 0) { - debug_printf(" "); - } - debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", - outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], - outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], - outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], - outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { + uint j; + + memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); + debug_printf("OUT[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); } + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } @@ -6309,7 +5193,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) #if 0 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ - if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { /* * Scale back depth component. */ diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h index 5708a5061..db5c56b6b 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -88,84 +88,13 @@ struct tgsi_interp_coef float dady[TGSI_NUM_CHANNELS]; }; -enum tgsi_sampler_control -{ - TGSI_SAMPLER_LOD_NONE, - TGSI_SAMPLER_LOD_BIAS, - TGSI_SAMPLER_LOD_EXPLICIT, - TGSI_SAMPLER_LOD_ZERO, - TGSI_SAMPLER_DERIVS_EXPLICIT, - TGSI_SAMPLER_GATHER, -}; - -struct tgsi_image_params { - unsigned unit; - unsigned tgsi_tex_instr; - enum pipe_format format; - unsigned execmask; -}; - -struct tgsi_image { - /* image interfaces */ - void (*load)(const struct tgsi_image *image, - const struct tgsi_image_params *params, - const int s[TGSI_QUAD_SIZE], - const int t[TGSI_QUAD_SIZE], - const int r[TGSI_QUAD_SIZE], - const int sample[TGSI_QUAD_SIZE], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); - - void (*store)(const struct tgsi_image *image, - const struct tgsi_image_params *params, - const int s[TGSI_QUAD_SIZE], - const int t[TGSI_QUAD_SIZE], - const int r[TGSI_QUAD_SIZE], - const int sample[TGSI_QUAD_SIZE], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); - - void (*op)(const struct tgsi_image *image, - const struct tgsi_image_params *params, - unsigned opcode, - const int s[TGSI_QUAD_SIZE], - const int t[TGSI_QUAD_SIZE], - const int r[TGSI_QUAD_SIZE], - const int sample[TGSI_QUAD_SIZE], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], - float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); - - void (*get_dims)(const struct tgsi_image *image, - const struct tgsi_image_params *params, - int dims[4]); -}; - -struct tgsi_buffer_params { - unsigned unit; - unsigned execmask; - unsigned writemask; -}; - -struct tgsi_buffer { - /* buffer interfaces */ - void (*load)(const struct tgsi_buffer *buffer, - const struct tgsi_buffer_params *params, - const int s[TGSI_QUAD_SIZE], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); - - void (*store)(const struct tgsi_buffer *buffer, - const struct tgsi_buffer_params *params, - const int s[TGSI_QUAD_SIZE], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); - - void (*op)(const struct tgsi_buffer *buffer, - const struct tgsi_buffer_params *params, - unsigned opcode, - const int s[TGSI_QUAD_SIZE], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], - float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); - - void (*get_dims)(const struct tgsi_buffer *buffer, - const struct tgsi_buffer_params *params, - int *dim); +enum tgsi_sampler_control { + tgsi_sampler_lod_none, + tgsi_sampler_lod_bias, + tgsi_sampler_lod_explicit, + tgsi_sampler_lod_zero, + tgsi_sampler_derivs_explicit, + tgsi_sampler_gather, }; /** @@ -209,16 +138,6 @@ struct tgsi_sampler const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE], const int lod[TGSI_QUAD_SIZE], const int8_t offset[3], float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); - void (*query_lod)(const struct tgsi_sampler *tgsi_sampler, - const unsigned sview_index, - const unsigned sampler_index, - const float s[TGSI_QUAD_SIZE], - const float t[TGSI_QUAD_SIZE], - const float p[TGSI_QUAD_SIZE], - const float c0[TGSI_QUAD_SIZE], - const enum tgsi_sampler_control control, - float mipmap[TGSI_QUAD_SIZE], - float lod[TGSI_QUAD_SIZE]); }; #define TGSI_EXEC_NUM_TEMPS 4096 @@ -266,14 +185,21 @@ struct tgsi_sampler #define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) #define TGSI_EXEC_TEMP_HALF_C 0 +/* execution mask, each value is either 0 or ~0 */ +#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3) +#define TGSI_EXEC_MASK_C 1 + /* 4 register buffer for various purposes */ #define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) #define TGSI_EXEC_NUM_TEMP_R 4 #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) -#define TGSI_EXEC_NUM_ADDRS 3 -#define TGSI_EXEC_NUM_TEMP_EXTRAS 12 +/* predicate register */ +#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9) +#define TGSI_EXEC_NUM_PREDS 1 + +#define TGSI_EXEC_NUM_TEMP_EXTRAS 10 @@ -352,21 +278,20 @@ struct tgsi_exec_machine /* System values */ unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT]; - struct tgsi_exec_vector SystemValue[TGSI_MAX_MISC_INPUTS]; + union tgsi_exec_channel SystemValue[TGSI_MAX_MISC_INPUTS]; struct tgsi_exec_vector *Addrs; + struct tgsi_exec_vector *Predicates; struct tgsi_sampler *Sampler; - struct tgsi_image *Image; - struct tgsi_buffer *Buffer; unsigned ImmLimit; const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS]; const struct tgsi_token *Tokens; /**< Declarations, instructions */ - enum pipe_shader_type ShaderType; /**< PIPE_SHADER_x */ + unsigned Processor; /**< TGSI_PROCESSOR_x */ /* GEOMETRY processor only. */ unsigned *Primitives; @@ -379,13 +304,6 @@ struct tgsi_exec_machine struct tgsi_exec_vector QuadPos; float Face; /**< +1 if front facing, -1 if back facing */ bool flatshade_color; - - /* Compute Only */ - void *LocalMem; - unsigned LocalMemSize; - - /* See GLSL 4.50 specification for definition of helper invocations */ - uint NonHelperMask; /**< non-helpers */ /* Conditional execution masks */ uint CondMask; /**< For IF/ELSE/ENDIF */ uint LoopMask; /**< For BGNLOOP/ENDLOOP */ @@ -440,12 +358,10 @@ struct tgsi_exec_machine SamplerViews[PIPE_MAX_SHADER_SAMPLER_VIEWS]; boolean UsedGeometryShader; - - int pc; }; struct tgsi_exec_machine * -tgsi_exec_machine_create(enum pipe_shader_type shader_type); +tgsi_exec_machine_create( void ); void tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach); @@ -455,13 +371,11 @@ void tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, - struct tgsi_sampler *sampler, - struct tgsi_image *image, - struct tgsi_buffer *buffer); + struct tgsi_sampler *sampler); uint tgsi_exec_machine_run( - struct tgsi_exec_machine *mach, int start_pc ); + struct tgsi_exec_machine *mach ); void @@ -472,6 +386,27 @@ boolean tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst); +static inline void +tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask) +{ + mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = + mask; +} + + +/** Set execution mask values prior to executing the shader */ +static inline void +tgsi_set_exec_mask(struct tgsi_exec_machine *mach, + boolean ch0, boolean ch1, boolean ch2, boolean ch3) +{ + int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i; + mask[0] = ch0 ? ~0 : 0; + mask[1] = ch1 ? ~0 : 0; + mask[2] = ch2 ? ~0 : 0; + mask[3] = ch3 ? ~0 : 0; +} + + extern void tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, unsigned num_bufs, @@ -500,6 +435,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return PIPE_MAX_CONSTANT_BUFFERS; case PIPE_SHADER_CAP_MAX_TEMPS: return TGSI_EXEC_NUM_TEMPS; + case PIPE_SHADER_CAP_MAX_PREDS: + return TGSI_EXEC_NUM_PREDS; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: @@ -517,22 +454,15 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return PIPE_MAX_SHADER_SAMPLER_VIEWS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; - case PIPE_SHADER_CAP_SUPPORTED_IRS: - return 1 << PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; + case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: - case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: return 0; - case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: - return PIPE_MAX_SHADER_BUFFERS; - case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: - return PIPE_MAX_SHADER_IMAGES; - case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c index 30bad6d6f..fb29ea0d5 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -37,257 +37,231 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX_LZ", TGSI_OPCODE_TEX_LZ }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D }, - { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, - { 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK }, - { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF }, - { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC }, - { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, - { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, - { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL }, - { 1, 1, 0, 0, 0, 0, 0, OTHR, "BALLOT", TGSI_OPCODE_BALLOT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, - { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, - { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, - { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, - { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ }, - { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS }, - { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "READ_FIRST", TGSI_OPCODE_READ_FIRST }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, - { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ - { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }, + { 1, 1, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, + { 1, 1, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, + { 1, 1, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, + { 1, 1, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, + { 1, 1, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, + { 1, 1, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, + { 1, 1, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, + { 1, 2, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, + { 1, 2, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, + { 1, 2, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, + { 1, 2, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, + { 1, 2, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, + { 1, 2, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, + { 1, 2, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, + { 1, 2, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, + { 1, 2, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, + { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, + { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, + { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, + { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, + { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, + { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, + { 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ + { 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */ + { 1, 1, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, + { 1, 3, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP }, + { 1, 1, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, + { 1, 1, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, + { 1, 1, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, + { 1, 1, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, + { 1, 2, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, + { 1, 2, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, + { 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */ + { 1, 1, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS }, + { 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */ + { 1, 2, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, + { 1, 1, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, + { 1, 1, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, + { 1, 1, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, + { 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, + { 1, 1, 0, 0, 0, 0, COMP, "PK2H", TGSI_OPCODE_PK2H }, + { 1, 1, 0, 0, 0, 0, COMP, "PK2US", TGSI_OPCODE_PK2US }, + { 1, 1, 0, 0, 0, 0, COMP, "PK4B", TGSI_OPCODE_PK4B }, + { 1, 1, 0, 0, 0, 0, COMP, "PK4UB", TGSI_OPCODE_PK4UB }, + { 0, 1, 0, 0, 0, 1, NONE, "", 44 }, /* removed */ + { 1, 2, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, + { 0, 1, 0, 0, 0, 1, NONE, "", 46 }, /* removed */ + { 1, 2, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, + { 1, 1, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, + { 1, 2, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, + { 1, 2, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, + { 0, 1, 0, 0, 0, 1, NONE, "", 51 }, /* removed */ + { 1, 2, 1, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, + { 1, 4, 1, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, + { 1, 2, 1, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, + { 1, 1, 0, 0, 0, 0, COMP, "UP2H", TGSI_OPCODE_UP2H }, + { 1, 1, 0, 0, 0, 0, COMP, "UP2US", TGSI_OPCODE_UP2US }, + { 1, 1, 0, 0, 0, 0, COMP, "UP4B", TGSI_OPCODE_UP4B }, + { 1, 1, 0, 0, 0, 0, COMP, "UP4UB", TGSI_OPCODE_UP4UB }, + { 0, 1, 0, 0, 0, 1, NONE, "", 59 }, /* removed */ + { 0, 1, 0, 0, 0, 1, NONE, "", 60 }, /* removed */ + { 1, 1, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, + { 0, 1, 0, 0, 0, 1, NONE, "", 62 }, /* removed */ + { 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, + { 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, + { 1, 1, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, + { 1, 3, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, + { 1, 1, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, + { 1, 2, 1, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, + { 0, 1, 0, 0, 0, 1, NONE, "", 69 }, /* removed */ + { 1, 2, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, + { 1, 2, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, + { 1, 2, 1, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, + { 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK }, + { 0, 1, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF }, + { 0, 1, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF }, + { 0, 1, 0, 0, 0, 1, NONE, "", 76 }, /* removed */ + { 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, + { 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, + { 1, 1, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, + { 1, 1, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, + { 0, 1, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, + { 1, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, + { 1, 1, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, + { 1, 1, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F }, + { 1, 1, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT }, + { 1, 1, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC }, + { 1, 2, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL }, + { 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */ + { 1, 2, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND }, + { 1, 2, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR }, + { 1, 2, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD }, + { 1, 2, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR }, + { 1, 3, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD }, + { 1, 2, 1, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, + { 1, 2, 1, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, + { 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, + { 0, 1, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, + { 0, 1, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, + { 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, + { 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, + { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, + { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB }, + { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ }, + { 0, 0, 0, 0, 0, 0, NONE, "", 104 }, /* removed */ + { 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */ + { 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */ + { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP }, + { 1, 2, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ }, + { 1, 2, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, + { 1, 2, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, + { 1, 2, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, + { 0, 1, 0, 0, 0, 1, NONE, "", 112 }, /* removed */ + { 0, 1, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, + { 0, 1, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ + { 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, + { 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, + { 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END }, + { 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA }, + { 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I }, + { 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV }, + { 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX }, + { 1, 2, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN }, + { 1, 1, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG }, + { 1, 2, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE }, + { 1, 2, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR }, + { 1, 2, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT }, + { 1, 1, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U }, + { 1, 1, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F }, + { 1, 2, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD }, + { 1, 2, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV }, + { 1, 3, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD }, + { 1, 2, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX }, + { 1, 2, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN }, + { 1, 2, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD }, + { 1, 2, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL }, + { 1, 2, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ }, + { 1, 2, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE }, + { 1, 2, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR }, + { 1, 2, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT }, + { 1, 2, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE }, + { 0, 1, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH }, + { 0, 1, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE }, + { 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT }, + { 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }, - { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I }, - { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS }, - { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B }, - { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C }, - { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ }, - { 1, 5, 0, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D }, - { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L }, - { 1, 3, 0, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD }, - { 1, 2, 0, 1, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE }, - { 0, 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER }, + { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE }, + { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I }, + { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS }, + { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B }, + { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C }, + { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ }, + { 1, 5, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D }, + { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L }, + { 1, 3, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 }, + { 1, 2, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO }, + { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS }, + { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO }, + { 1, 1, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL }, + { 1, 3, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP }, + { 1, 1, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS }, + { 1, 1, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG }, + { 1, 2, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD }, + { 1, 2, 0, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE }, + { 1, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE }, + { 1, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE }, + { 1, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE }, + { 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG }, - { 1, 4, 0, 1, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX }, - { 1, 3, 1, 0, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 }, - { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 }, - { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, - { 1, 3, 1, 0, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE }, - { 1, 4, 0, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB }, - { 1, 1, 0, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC}, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP}, - { 2, 1, 0, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP}, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DDIV", TGSI_OPCODE_DDIV }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG }, + { 1, 4, 0, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX }, + { 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 }, + { 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 }, + { 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, + { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, + { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, + { 1, 3, 1, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 }, + { 1, 2, 1, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ }, + { 1, 3, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE }, + { 1, 3, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE }, + { 1, 4, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI }, + { 1, 1, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV }, + { 1, 1, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC }, + { 1, 1, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB }, + { 1, 1, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB }, + { 1, 1, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB }, + { 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID }, + { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE }, + { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET }, + { 1, 1, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D }, + { 1, 1, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F }, + { 1, 1, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS }, + { 1, 1, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG }, + { 1, 2, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD }, + { 1, 2, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL }, + { 1, 2, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX }, + { 1, 2, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN }, + { 1, 2, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT }, + { 1, 2, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE }, + { 1, 2, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ }, + { 1, 2, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE }, + { 1, 1, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP }, + { 1, 1, 0, 0 ,0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT }, + { 1, 3, 0, 0 ,0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD }, + { 1, 1, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC}, + { 1, 2, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP}, + { 2, 1, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP}, + { 1, 1, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I }, + { 1, 1, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D }, + { 1, 1, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U }, + { 1, 1, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D }, + { 1, 1, 0, 0 ,0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ }, + { 1, 1, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC }, + { 1, 1, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL }, + { 1, 1, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR }, + { 1, 1, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND }, + { 1, 1, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG }, }; const struct tgsi_opcode_info * @@ -298,7 +272,7 @@ tgsi_get_opcode_info( uint opcode ) if (firsttime) { unsigned i; firsttime = 0; - for (i = 0; i < ARRAY_SIZE(opcode_info); i++) + for (i = 0; i < Elements(opcode_info); i++) assert(opcode_info[i].opcode == i); } @@ -322,18 +296,16 @@ const char * tgsi_get_processor_name( uint processor ) { switch (processor) { - case PIPE_SHADER_VERTEX: + case TGSI_PROCESSOR_VERTEX: return "vertex shader"; - case PIPE_SHADER_FRAGMENT: + case TGSI_PROCESSOR_FRAGMENT: return "fragment shader"; - case PIPE_SHADER_GEOMETRY: + case TGSI_PROCESSOR_GEOMETRY: return "geometry shader"; - case PIPE_SHADER_TESS_CTRL: + case TGSI_PROCESSOR_TESS_CTRL: return "tessellation control shader"; - case PIPE_SHADER_TESS_EVAL: + case TGSI_PROCESSOR_TESS_EVAL: return "tessellation evaluation shader"; - case PIPE_SHADER_COMPUTE: - return "compute shader"; default: return "unknown shader type!"; } @@ -359,7 +331,6 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */ case TGSI_OPCODE_TXQ: case TGSI_OPCODE_TXQ_LZ: - case TGSI_OPCODE_TXQS: case TGSI_OPCODE_F2U: case TGSI_OPCODE_UDIV: case TGSI_OPCODE_UMAD: @@ -407,12 +378,6 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSLT: case TGSI_OPCODE_DSNE: - case TGSI_OPCODE_U64SEQ: - case TGSI_OPCODE_U64SNE: - case TGSI_OPCODE_U64SLT: - case TGSI_OPCODE_U64SGE: - case TGSI_OPCODE_I64SLT: - case TGSI_OPCODE_I64SGE: return TGSI_TYPE_SIGNED; case TGSI_OPCODE_DADD: case TGSI_OPCODE_DABS: @@ -420,7 +385,6 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_DNEG: case TGSI_OPCODE_DMUL: case TGSI_OPCODE_DMAX: - case TGSI_OPCODE_DDIV: case TGSI_OPCODE_DMIN: case TGSI_OPCODE_DRCP: case TGSI_OPCODE_DSQRT: @@ -437,33 +401,7 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_F2D: case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: - case TGSI_OPCODE_U642D: - case TGSI_OPCODE_I642D: return TGSI_TYPE_DOUBLE; - case TGSI_OPCODE_U64MAX: - case TGSI_OPCODE_U64MIN: - case TGSI_OPCODE_U64ADD: - case TGSI_OPCODE_U64MUL: - case TGSI_OPCODE_U64DIV: - case TGSI_OPCODE_U64MOD: - case TGSI_OPCODE_U64SHL: - case TGSI_OPCODE_U64SHR: - case TGSI_OPCODE_F2U64: - case TGSI_OPCODE_D2U64: - return TGSI_TYPE_UNSIGNED64; - case TGSI_OPCODE_I64MAX: - case TGSI_OPCODE_I64MIN: - case TGSI_OPCODE_I64ABS: - case TGSI_OPCODE_I64SSG: - case TGSI_OPCODE_I64NEG: - case TGSI_OPCODE_I64SHR: - case TGSI_OPCODE_I64DIV: - case TGSI_OPCODE_I64MOD: - case TGSI_OPCODE_F2I64: - case TGSI_OPCODE_U2I64: - case TGSI_OPCODE_I2I64: - case TGSI_OPCODE_D2I64: - return TGSI_TYPE_SIGNED64; default: return TGSI_TYPE_FLOAT; } @@ -478,7 +416,6 @@ tgsi_opcode_infer_src_type( uint opcode ) switch (opcode) { case TGSI_OPCODE_UIF: case TGSI_OPCODE_TXF: - case TGSI_OPCODE_TXF_LZ: case TGSI_OPCODE_BREAKC: case TGSI_OPCODE_U2F: case TGSI_OPCODE_U2D: @@ -488,14 +425,10 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_SAMPLE_I: case TGSI_OPCODE_SAMPLE_I_MS: case TGSI_OPCODE_UMUL_HI: - case TGSI_OPCODE_UP2H: - case TGSI_OPCODE_U2I64: - case TGSI_OPCODE_MEMBAR: return TGSI_TYPE_UNSIGNED; case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_I2F: case TGSI_OPCODE_I2D: - case TGSI_OPCODE_I2I64: return TGSI_TYPE_SIGNED; case TGSI_OPCODE_ARL: case TGSI_OPCODE_ARR: @@ -508,8 +441,6 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_FSLT: case TGSI_OPCODE_FSNE: case TGSI_OPCODE_UCMP: - case TGSI_OPCODE_F2U64: - case TGSI_OPCODE_F2I64: return TGSI_TYPE_FLOAT; case TGSI_OPCODE_D2F: case TGSI_OPCODE_D2U: @@ -518,21 +449,7 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSLT: case TGSI_OPCODE_DSNE: - case TGSI_OPCODE_D2U64: - case TGSI_OPCODE_D2I64: return TGSI_TYPE_DOUBLE; - case TGSI_OPCODE_U64SEQ: - case TGSI_OPCODE_U64SNE: - case TGSI_OPCODE_U64SLT: - case TGSI_OPCODE_U64SGE: - case TGSI_OPCODE_U642F: - case TGSI_OPCODE_U642D: - return TGSI_TYPE_UNSIGNED64; - case TGSI_OPCODE_I64SLT: - case TGSI_OPCODE_I64SGE: - case TGSI_OPCODE_I642F: - case TGSI_OPCODE_I642D: - return TGSI_TYPE_SIGNED64; default: return tgsi_opcode_infer_type(opcode); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c index c26c13b5e..a3b90bdb5 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -264,13 +264,14 @@ transform_dst(struct tgsi_transform_context *tctx, * dst.z = src0.x \times src1.y - src1.x \times src0.y * dst.w = 1.0 * - * ; needs: 1 tmp, imm{1.0} - * MUL tmpA.xyz, src1.yzx, src0.zxy - * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz + * ; needs: 2 tmp, imm{1.0} + * MUL tmpA.xyz, src0.yzx, src1.zxy + * MUL tmpB.xyz, src1.yzx, src0.zxy + * SUB dst.xyz, tmpA.xyz, tmpB.xyz * MOV dst.w, imm{1.0} */ -#define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2)) -#define XPD_TMP 1 +#define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2)) +#define XPD_TMP 2 static void transform_xpd(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) @@ -282,26 +283,34 @@ transform_xpd(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { - /* MUL tmpA.xyz, src1.yzx, src0.zxy */ + /* MUL tmpA.xyz, src0.yzx, src1.zxy */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _)); + reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _)); + tctx->emit_instruction(tctx, &new_inst); + + /* MUL tmpB.xyz, src1.yzx, src0.zxy */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ); + new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _)); reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _)); tctx->emit_instruction(tctx, &new_inst); - /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */ + /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ); - new_inst.Instruction.NumSrcRegs = 3; - reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _)); - reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _)); - reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); - new_inst.Src[2].Register.Negate = true; + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); + reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, _)); tctx->emit_instruction(tctx, &new_inst); } @@ -388,15 +397,14 @@ transform_scs(struct tgsi_transform_context *tctx, * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w * - * This becomes: src0 \times src1 + src2 - src0 \times src2, which - * can then become: src0 \times src1 - (src0 \times src2 - src2) - * - * ; needs: 1 tmp - * MAD tmpA, src0, src2, -src2 - * MAD dst, src0, src1, -tmpA + * ; needs: 2 tmp, imm{1.0} + * MUL tmpA, src0, src1 + * SUB tmpB, imm{1.0}, src0 + * MUL tmpB, tmpB, src2 + * ADD dst, tmpA, tmpB */ -#define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) -#define LRP_TMP 1 +#define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3)) +#define LRP_TMP 2 static void transform_lrp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) @@ -409,28 +417,44 @@ transform_lrp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { - /* MAD tmpA, src0, src2, -src2 */ + /* MUL tmpA, src0, src1 */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; + new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 3; + new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &new_inst); + + /* SUB tmpB, imm{1.0}, src0 */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, Y, Y, Y)); + reg_src(&new_inst.Src[1], src0, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &new_inst); + + /* MUL tmpB, tmpB, src2 */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); - reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); - new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; tctx->emit_instruction(tctx, &new_inst); - /* MAD dst, src0, src1, -tmpA */ + /* ADD dst, tmpA, tmpB */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 3; - reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); - reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); - reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - new_inst.Src[2].Register.Negate = true; + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, W)); tctx->emit_instruction(tctx, &new_inst); } } @@ -468,13 +492,12 @@ transform_frc(struct tgsi_transform_context *tctx, /* SUB dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } @@ -572,25 +595,16 @@ transform_lit(struct tgsi_transform_context *tctx, reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); tctx->emit_instruction(tctx, &new_inst); - /* MIN tmpA.z, src.w, imm{128.0} */ + /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MIN; + new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); - new_inst.Instruction.NumSrcRegs = 2; + new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); - tctx->emit_instruction(tctx, &new_inst); - - /* MAX tmpA.z, tmpA.z, -imm{128.0} */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _)); - reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); new_inst.Src[1].Register.Negate = true; + reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _)); tctx->emit_instruction(tctx, &new_inst); /* LG2 tmpA.y, tmpA.y */ @@ -662,19 +676,14 @@ transform_lit(struct tgsi_transform_context *tctx, * dst.w = 1.0 * * ; needs: 1 tmp, imm{1.0} - * if (lowering FLR) { - * FRC tmpA.x, src.x - * SUB tmpA.x, src.x, tmpA.x - * } else { - * FLR tmpA.x, src.x - * } + * FLR tmpA.x, src.x * EX2 tmpA.y, src.x * SUB dst.y, src.x, tmpA.x * EX2 dst.x, tmpA.x * MOV dst.z, tmpA.y * MOV dst.w, imm{1.0} */ -#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ +#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ NINST(1)+ NINST(1) - OINST(1)) #define EXP_TMP 1 static void @@ -687,36 +696,14 @@ transform_exp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { - if (ctx->config->lower_FLR) { - /* FRC tmpA.x, src.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); - tctx->emit_instruction(tctx, &new_inst); - - /* SUB tmpA.x, src.x, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); - new_inst.Src[1].Register.Negate = 1; - tctx->emit_instruction(tctx, &new_inst); - } else { - /* FLR tmpA.x, src.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); - tctx->emit_instruction(tctx, &new_inst); - } + /* FLR tmpA.x, src.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { @@ -733,13 +720,12 @@ transform_exp(struct tgsi_transform_context *tctx, if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { /* SUB dst.y, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); - new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } @@ -785,19 +771,14 @@ transform_exp(struct tgsi_transform_context *tctx, * * ; needs: 1 tmp, imm{1.0} * LG2 tmpA.x, |src.x| - * if (lowering FLR) { - * FRC tmpA.y, tmpA.x - * SUB tmpA.y, tmpA.x, tmpA.y - * } else { - * FLR tmpA.y, tmpA.x - * } + * FLR tmpA.y, tmpA.x * EX2 tmpA.z, tmpA.y * RCP tmpA.z, tmpA.z * MUL dst.y, |src.x|, tmpA.z * MOV dst.xz, tmpA.yx * MOV dst.w, imm{1.0} */ -#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ +#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \ NINST(2) + NINST(1) + NINST(1) - OINST(1)) #define LOG_TMP 1 static void @@ -822,36 +803,14 @@ transform_log(struct tgsi_transform_context *tctx, } if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { - if (ctx->config->lower_FLR) { - /* FRC tmpA.y, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); - tctx->emit_instruction(tctx, &new_inst); - - /* SUB tmpA.y, tmpA.x, tmpA.y */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); - new_inst.Src[1].Register.Negate = 1; - tctx->emit_instruction(tctx, &new_inst); - } else { - /* FLR tmpA.y, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); - tctx->emit_instruction(tctx, &new_inst); - } + /* FLR tmpA.y, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { @@ -1046,131 +1005,6 @@ transform_dotp(struct tgsi_transform_context *tctx, } } -/* FLR - floor, CEIL - ceil - * ; needs: 1 tmp - * if (CEIL) { - * FRC tmpA, -src - * ADD dst, src, tmpA - * } else { - * FRC tmpA, src - * SUB dst, src, tmpA - * } - */ -#define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) -#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) -#define FLR_TMP 1 -#define CEIL_TMP 1 -static void -transform_flr_ceil(struct tgsi_transform_context *tctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); - struct tgsi_full_dst_register *dst = &inst->Dst[0]; - struct tgsi_full_src_register *src0 = &inst->Src[0]; - struct tgsi_full_instruction new_inst; - unsigned opcode = inst->Instruction.Opcode; - - if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { - /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); - - if (opcode == TGSI_OPCODE_CEIL) - new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; - tctx->emit_instruction(tctx, &new_inst); - - /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - if (opcode == TGSI_OPCODE_FLR) - new_inst.Src[1].Register.Negate = 1; - tctx->emit_instruction(tctx, &new_inst); - } -} - -/* TRUNC - truncate off fractional part - * dst.x = trunc(src.x) - * dst.y = trunc(src.y) - * dst.z = trunc(src.z) - * dst.w = trunc(src.w) - * - * ; needs: 1 tmp - * if (lower FLR) { - * FRC tmpA, |src| - * SUB tmpA, |src|, tmpA - * } else { - * FLR tmpA, |src| - * } - * CMP dst, src, -tmpA, tmpA - */ -#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) -#define TRUNC_TMP 1 -static void -transform_trunc(struct tgsi_transform_context *tctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); - struct tgsi_full_dst_register *dst = &inst->Dst[0]; - struct tgsi_full_src_register *src0 = &inst->Src[0]; - struct tgsi_full_instruction new_inst; - - if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { - if (ctx->config->lower_FLR) { - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); - new_inst.Src[0].Register.Absolute = true; - new_inst.Src[0].Register.Negate = false; - tctx->emit_instruction(tctx, &new_inst); - - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); - new_inst.Src[0].Register.Absolute = true; - new_inst.Src[0].Register.Negate = false; - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - new_inst.Src[1].Register.Negate = 1; - tctx->emit_instruction(tctx, &new_inst); - } else { - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); - new_inst.Src[0].Register.Absolute = true; - new_inst.Src[0].Register.Negate = false; - tctx->emit_instruction(tctx, &new_inst); - } - - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); - new_inst.Instruction.NumSrcRegs = 3; - reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - new_inst.Src[1].Register.Negate = true; - reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - tctx->emit_instruction(tctx, &new_inst); - } -} - /* Inserts a MOV_SAT for the needed components of tex coord. Note that * in the case of TXP, the clamping must happen *after* projection, so * we need to lower TXP to TEX. @@ -1567,21 +1401,6 @@ transform_instr(struct tgsi_transform_context *tctx, goto skip; transform_dotp(tctx, inst); break; - case TGSI_OPCODE_FLR: - if (!ctx->config->lower_FLR) - goto skip; - transform_flr_ceil(tctx, inst); - break; - case TGSI_OPCODE_CEIL: - if (!ctx->config->lower_CEIL) - goto skip; - transform_flr_ceil(tctx, inst); - break; - case TGSI_OPCODE_TRUNC: - if (!ctx->config->lower_TRUNC) - goto skip; - transform_trunc(tctx, inst); - break; case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: case TGSI_OPCODE_TXB: @@ -1611,11 +1430,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, int newlen, numtmp; /* sanity check in case limit is ever increased: */ - STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); - - /* sanity check the lowering */ - assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); - assert(!(config->lower_FRC && config->lower_TRUNC)); + assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); memset(&ctx, 0, sizeof(ctx)); ctx.base.transform_instruction = transform_instr; @@ -1628,7 +1443,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, * color, then figure out the number of additional inputs we need * to create for BCOLOR's.. */ - if ((info->processor == PIPE_SHADER_FRAGMENT) && + if ((info->processor == TGSI_PROCESSOR_FRAGMENT) && config->color_two_side) { int i; ctx.face_idx = -1; @@ -1658,9 +1473,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, OPCS(DPH) || OPCS(DP2) || OPCS(DP2A) || - OPCS(FLR) || - OPCS(CEIL) || - OPCS(TRUNC) || OPCS(TXP) || ctx.two_side_colors || ctx.saturate)) @@ -1729,18 +1541,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, newlen += DP2A_GROW * OPCS(DP2A); numtmp = MAX2(numtmp, DOTP_TMP); } - if (OPCS(FLR)) { - newlen += FLR_GROW * OPCS(FLR); - numtmp = MAX2(numtmp, FLR_TMP); - } - if (OPCS(CEIL)) { - newlen += CEIL_GROW * OPCS(CEIL); - numtmp = MAX2(numtmp, CEIL_TMP); - } - if (OPCS(TRUNC)) { - newlen += TRUNC_GROW * OPCS(TRUNC); - numtmp = MAX2(numtmp, TRUNC_TMP); - } if (ctx.saturate || config->lower_TXP) { int n = 0; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index ab73fabac..d8752ce47 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -73,16 +73,19 @@ OP12(MAX) OP12(SLT) OP12(SGE) OP13(MAD) +OP12(SUB) OP13(LRP) OP11(SQRT) OP13(DP2A) OP11(FRC) +OP13(CLAMP) OP11(FLR) OP11(ROUND) OP11(EX2) OP11(LG2) OP12(POW) OP12(XPD) +OP11(ABS) OP12(DPH) OP11(COS) OP11(DDX) diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c index c706fc8ae..0729b5d24 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -121,8 +121,8 @@ tgsi_parse_token( next_token( ctx, &decl->Semantic ); } - if (decl->Declaration.File == TGSI_FILE_IMAGE) { - next_token(ctx, &decl->Image); + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + next_token(ctx, &decl->Resource); } if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { @@ -155,14 +155,12 @@ tgsi_parse_token( break; case TGSI_IMM_UINT32: - case TGSI_IMM_UINT64: for (i = 0; i < imm_count; i++) { next_token(ctx, &imm->u[i].Uint); } break; case TGSI_IMM_INT32: - case TGSI_IMM_INT64: for (i = 0; i < imm_count; i++) { next_token(ctx, &imm->u[i].Int); } @@ -182,6 +180,10 @@ tgsi_parse_token( memset(inst, 0, sizeof *inst); copy_token(&inst->Instruction, &token); + if (inst->Instruction.Predicate) { + next_token(ctx, &inst->Predicate); + } + if (inst->Instruction.Label) { next_token( ctx, &inst->Label); } @@ -193,10 +195,6 @@ tgsi_parse_token( } } - if (inst->Instruction.Memory) { - next_token(ctx, &inst->Memory); - } - assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); for (i = 0; i < inst->Instruction.NumDstRegs; i++) { @@ -311,7 +309,7 @@ tgsi_dump_tokens(const struct tgsi_token *tokens) int nr = tgsi_num_tokens(tokens); int i; - STATIC_ASSERT(sizeof(*tokens) == sizeof(unsigned)); + assert(sizeof(*tokens) == sizeof(unsigned)); debug_printf("const unsigned tokens[%d] = {\n", nr); for (i = 0; i < nr; i++) diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h index 07806ab35..35e1c7cfd 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -64,7 +64,7 @@ struct tgsi_full_declaration struct tgsi_declaration_dimension Dim; struct tgsi_declaration_interp Interp; struct tgsi_declaration_semantic Semantic; - struct tgsi_declaration_image Image; + struct tgsi_declaration_resource Resource; struct tgsi_declaration_sampler_view SamplerView; struct tgsi_declaration_array Array; }; @@ -88,9 +88,9 @@ struct tgsi_full_property struct tgsi_full_instruction { struct tgsi_instruction Instruction; + struct tgsi_instruction_predicate Predicate; struct tgsi_instruction_label Label; struct tgsi_instruction_texture Texture; - struct tgsi_instruction_memory Memory; struct tgsi_full_dst_register Dst[TGSI_FULL_MAX_DST_REGISTERS]; struct tgsi_full_src_register Src[TGSI_FULL_MAX_SRC_REGISTERS]; struct tgsi_texture_offset TexOffsets[TGSI_FULL_MAX_TEX_OFFSETS]; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c index a95bbfa98..d14372feb 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -256,6 +256,7 @@ static const char *file_names[TGSI_FILE_COUNT] = "SAMP", "ADDR", "IMM", + "PRED", "SV", "RES" }; @@ -320,7 +321,7 @@ iter_instruction( } info = tgsi_get_opcode_info( inst->Instruction.Opcode ); - if (!info) { + if (info == NULL) { report_error( ctx, "(%u): Invalid instruction opcode", inst->Instruction.Opcode ); return TRUE; } @@ -413,9 +414,9 @@ iter_declaration( decl->Semantic.Name == TGSI_SEMANTIC_TESSOUTER || decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER; if (file == TGSI_FILE_INPUT && !patch && ( - processor == PIPE_SHADER_GEOMETRY || - processor == PIPE_SHADER_TESS_CTRL || - processor == PIPE_SHADER_TESS_EVAL)) { + processor == TGSI_PROCESSOR_GEOMETRY || + processor == TGSI_PROCESSOR_TESS_CTRL || + processor == TGSI_PROCESSOR_TESS_EVAL)) { uint vert; for (vert = 0; vert < ctx->implied_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); @@ -423,7 +424,7 @@ iter_declaration( check_and_declare(ctx, reg); } } else if (file == TGSI_FILE_OUTPUT && !patch && - processor == PIPE_SHADER_TESS_CTRL) { + processor == TGSI_PROCESSOR_TESS_CTRL) { uint vert; for (vert = 0; vert < ctx->implied_out_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); @@ -484,11 +485,11 @@ iter_property( { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - if (iter->processor.Processor == PIPE_SHADER_GEOMETRY && + if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY && prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); } - if (iter->processor.Processor == PIPE_SHADER_TESS_CTRL && + if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL && prop->Property.PropertyName == TGSI_PROPERTY_TCS_VERTICES_OUT) ctx->implied_out_array_size = prop->u[0].Data; return TRUE; @@ -498,8 +499,8 @@ static boolean prolog(struct tgsi_iterate_context *iter) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - if (iter->processor.Processor == PIPE_SHADER_TESS_CTRL || - iter->processor.Processor == PIPE_SHADER_TESS_EVAL) + if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL || + iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL) ctx->implied_array_size = 32; return TRUE; } @@ -558,7 +559,6 @@ tgsi_sanity_check( const struct tgsi_token *tokens ) { struct sanity_check_ctx ctx; - boolean retval; ctx.iter.prolog = prolog; ctx.iter.iterate_instruction = iter_instruction; @@ -580,12 +580,11 @@ tgsi_sanity_check( ctx.implied_array_size = 0; ctx.print = debug_get_option_print_sanity(); - retval = tgsi_iterate_shader( tokens, &ctx.iter ); + if (!tgsi_iterate_shader( tokens, &ctx.iter )) + return FALSE; + regs_hash_destroy(ctx.regs_decl); regs_hash_destroy(ctx.regs_used); regs_hash_destroy(ctx.regs_ind_used); - if (retval == FALSE) - return FALSE; - return ctx.errors == 0; } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c index bf614db80..7523baf4c 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -38,661 +38,11 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_prim.h" -#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_scan.h" -static bool -is_memory_file(unsigned file) -{ - return file == TGSI_FILE_SAMPLER || - file == TGSI_FILE_SAMPLER_VIEW || - file == TGSI_FILE_IMAGE || - file == TGSI_FILE_BUFFER; -} - - -static bool -is_mem_query_inst(unsigned opcode) -{ - return opcode == TGSI_OPCODE_RESQ || - opcode == TGSI_OPCODE_TXQ || - opcode == TGSI_OPCODE_TXQS || - opcode == TGSI_OPCODE_TXQ_LZ || - opcode == TGSI_OPCODE_LODQ; -} - -/** - * Is the opcode a "true" texture instruction which samples from a - * texture map? - */ -static bool -is_texture_inst(unsigned opcode) -{ - return (!is_mem_query_inst(opcode) && - tgsi_get_opcode_info(opcode)->is_tex); -} - - -/** - * Is the opcode an instruction which computes a derivative explicitly or - * implicitly? - */ -static bool -computes_derivative(unsigned opcode) -{ - if (tgsi_get_opcode_info(opcode)->is_tex) { - return opcode != TGSI_OPCODE_TG4 && - opcode != TGSI_OPCODE_TXD && - opcode != TGSI_OPCODE_TXF && - opcode != TGSI_OPCODE_TXF_LZ && - opcode != TGSI_OPCODE_TEX_LZ && - opcode != TGSI_OPCODE_TXL && - opcode != TGSI_OPCODE_TXL2 && - opcode != TGSI_OPCODE_TXQ && - opcode != TGSI_OPCODE_TXQ_LZ && - opcode != TGSI_OPCODE_TXQS; - } - - return opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE || - opcode == TGSI_OPCODE_DDY || opcode == TGSI_OPCODE_DDY_FINE || - opcode == TGSI_OPCODE_SAMPLE || - opcode == TGSI_OPCODE_SAMPLE_B || - opcode == TGSI_OPCODE_SAMPLE_C; -} - - -static void -scan_src_operand(struct tgsi_shader_info *info, - const struct tgsi_full_instruction *fullinst, - const struct tgsi_full_src_register *src, - unsigned src_index, - unsigned usage_mask, - bool is_interp_instruction, - bool *is_mem_inst) -{ - int ind = src->Register.Index; - - /* Mark which inputs are effectively used */ - if (src->Register.File == TGSI_FILE_INPUT) { - if (src->Register.Indirect) { - for (ind = 0; ind < info->num_inputs; ++ind) { - info->input_usage_mask[ind] |= usage_mask; - } - } else { - assert(ind >= 0); - assert(ind < PIPE_MAX_SHADER_INPUTS); - info->input_usage_mask[ind] |= usage_mask; - } - - if (info->processor == PIPE_SHADER_FRAGMENT) { - unsigned name, index, input; - - if (src->Register.Indirect && src->Indirect.ArrayID) - input = info->input_array_first[src->Indirect.ArrayID]; - else - input = src->Register.Index; - - name = info->input_semantic_name[input]; - index = info->input_semantic_index[input]; - - if (name == TGSI_SEMANTIC_POSITION && - (src->Register.SwizzleX == TGSI_SWIZZLE_Z || - src->Register.SwizzleY == TGSI_SWIZZLE_Z || - src->Register.SwizzleZ == TGSI_SWIZZLE_Z || - src->Register.SwizzleW == TGSI_SWIZZLE_Z)) - info->reads_z = TRUE; - - if (name == TGSI_SEMANTIC_COLOR) { - unsigned mask = - (1 << src->Register.SwizzleX) | - (1 << src->Register.SwizzleY) | - (1 << src->Register.SwizzleZ) | - (1 << src->Register.SwizzleW); - - info->colors_read |= mask << (index * 4); - } - - /* Process only interpolated varyings. Don't include POSITION. - * Don't include integer varyings, because they are not - * interpolated. Don't process inputs interpolated by INTERP - * opcodes. Those are tracked separately. - */ - if ((!is_interp_instruction || src_index != 0) && - (name == TGSI_SEMANTIC_GENERIC || - name == TGSI_SEMANTIC_TEXCOORD || - name == TGSI_SEMANTIC_COLOR || - name == TGSI_SEMANTIC_BCOLOR || - name == TGSI_SEMANTIC_FOG || - name == TGSI_SEMANTIC_CLIPDIST)) { - switch (info->input_interpolate[input]) { - case TGSI_INTERPOLATE_COLOR: - case TGSI_INTERPOLATE_PERSPECTIVE: - switch (info->input_interpolate_loc[input]) { - case TGSI_INTERPOLATE_LOC_CENTER: - info->uses_persp_center = TRUE; - break; - case TGSI_INTERPOLATE_LOC_CENTROID: - info->uses_persp_centroid = TRUE; - break; - case TGSI_INTERPOLATE_LOC_SAMPLE: - info->uses_persp_sample = TRUE; - break; - } - break; - case TGSI_INTERPOLATE_LINEAR: - switch (info->input_interpolate_loc[input]) { - case TGSI_INTERPOLATE_LOC_CENTER: - info->uses_linear_center = TRUE; - break; - case TGSI_INTERPOLATE_LOC_CENTROID: - info->uses_linear_centroid = TRUE; - break; - case TGSI_INTERPOLATE_LOC_SAMPLE: - info->uses_linear_sample = TRUE; - break; - } - break; - /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */ - } - } - } - } - - if (info->processor == PIPE_SHADER_TESS_CTRL && - src->Register.File == TGSI_FILE_OUTPUT) { - unsigned input; - - if (src->Register.Indirect && src->Indirect.ArrayID) - input = info->output_array_first[src->Indirect.ArrayID]; - else - input = src->Register.Index; - - switch (info->output_semantic_name[input]) { - case TGSI_SEMANTIC_PATCH: - info->reads_perpatch_outputs = true; - break; - case TGSI_SEMANTIC_TESSINNER: - case TGSI_SEMANTIC_TESSOUTER: - info->reads_tessfactor_outputs = true; - break; - default: - info->reads_pervertex_outputs = true; - } - } - - /* check for indirect register reads */ - if (src->Register.Indirect) { - info->indirect_files |= (1 << src->Register.File); - info->indirect_files_read |= (1 << src->Register.File); - - /* record indirect constant buffer indexing */ - if (src->Register.File == TGSI_FILE_CONSTANT) { - if (src->Register.Dimension) { - if (src->Dimension.Indirect) - info->const_buffers_indirect = info->const_buffers_declared; - else - info->const_buffers_indirect |= 1u << src->Dimension.Index; - } else { - info->const_buffers_indirect |= 1; - } - } - } - - if (src->Register.Dimension && src->Dimension.Indirect) - info->dim_indirect_files |= 1u << src->Register.File; - - /* Texture samplers */ - if (src->Register.File == TGSI_FILE_SAMPLER) { - const unsigned index = src->Register.Index; - - assert(fullinst->Instruction.Texture); - assert(index < ARRAY_SIZE(info->is_msaa_sampler)); - assert(index < PIPE_MAX_SAMPLERS); - - if (is_texture_inst(fullinst->Instruction.Opcode)) { - const unsigned target = fullinst->Texture.Texture; - assert(target < TGSI_TEXTURE_UNKNOWN); - /* for texture instructions, check that the texture instruction - * target matches the previous sampler view declaration (if there - * was one.) - */ - if (info->sampler_targets[index] == TGSI_TEXTURE_UNKNOWN) { - /* probably no sampler view declaration */ - info->sampler_targets[index] = target; - } else { - /* Make sure the texture instruction's sampler/target info - * agrees with the sampler view declaration. - */ - assert(info->sampler_targets[index] == target); - } - /* MSAA samplers */ - if (target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - info->is_msaa_sampler[src->Register.Index] = TRUE; - } - } - } - - if (is_memory_file(src->Register.File) && - !is_mem_query_inst(fullinst->Instruction.Opcode)) { - *is_mem_inst = true; - - if (tgsi_get_opcode_info(fullinst->Instruction.Opcode)->is_store) { - info->writes_memory = TRUE; - - if (src->Register.File == TGSI_FILE_IMAGE) { - if (src->Register.Indirect) - info->images_atomic = info->images_declared; - else - info->images_atomic |= 1 << src->Register.Index; - } else if (src->Register.File == TGSI_FILE_BUFFER) { - if (src->Register.Indirect) - info->shader_buffers_atomic = info->shader_buffers_declared; - else - info->shader_buffers_atomic |= 1 << src->Register.Index; - } - } else { - if (src->Register.File == TGSI_FILE_IMAGE) { - if (src->Register.Indirect) - info->images_load = info->images_declared; - else - info->images_load |= 1 << src->Register.Index; - } else if (src->Register.File == TGSI_FILE_BUFFER) { - if (src->Register.Indirect) - info->shader_buffers_load = info->shader_buffers_declared; - else - info->shader_buffers_load |= 1 << src->Register.Index; - } - } - } -} - - -static void -scan_instruction(struct tgsi_shader_info *info, - const struct tgsi_full_instruction *fullinst, - unsigned *current_depth) -{ - unsigned i; - bool is_mem_inst = false; - bool is_interp_instruction = false; - - assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); - info->opcode_count[fullinst->Instruction.Opcode]++; - - switch (fullinst->Instruction.Opcode) { - case TGSI_OPCODE_IF: - case TGSI_OPCODE_UIF: - case TGSI_OPCODE_BGNLOOP: - (*current_depth)++; - info->max_depth = MAX2(info->max_depth, *current_depth); - break; - case TGSI_OPCODE_ENDIF: - case TGSI_OPCODE_ENDLOOP: - (*current_depth)--; - break; - default: - break; - } - - if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID || - fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || - fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { - const struct tgsi_full_src_register *src0 = &fullinst->Src[0]; - unsigned input; - - is_interp_instruction = true; - - if (src0->Register.Indirect && src0->Indirect.ArrayID) - input = info->input_array_first[src0->Indirect.ArrayID]; - else - input = src0->Register.Index; - - /* For the INTERP opcodes, the interpolation is always - * PERSPECTIVE unless LINEAR is specified. - */ - switch (info->input_interpolate[input]) { - case TGSI_INTERPOLATE_COLOR: - case TGSI_INTERPOLATE_CONSTANT: - case TGSI_INTERPOLATE_PERSPECTIVE: - switch (fullinst->Instruction.Opcode) { - case TGSI_OPCODE_INTERP_CENTROID: - info->uses_persp_opcode_interp_centroid = TRUE; - break; - case TGSI_OPCODE_INTERP_OFFSET: - info->uses_persp_opcode_interp_offset = TRUE; - break; - case TGSI_OPCODE_INTERP_SAMPLE: - info->uses_persp_opcode_interp_sample = TRUE; - break; - } - break; - - case TGSI_INTERPOLATE_LINEAR: - switch (fullinst->Instruction.Opcode) { - case TGSI_OPCODE_INTERP_CENTROID: - info->uses_linear_opcode_interp_centroid = TRUE; - break; - case TGSI_OPCODE_INTERP_OFFSET: - info->uses_linear_opcode_interp_offset = TRUE; - break; - case TGSI_OPCODE_INTERP_SAMPLE: - info->uses_linear_opcode_interp_sample = TRUE; - break; - } - break; - } - } - - if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && - fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) - info->uses_doubles = TRUE; - - for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { - scan_src_operand(info, fullinst, &fullinst->Src[i], i, - tgsi_util_get_inst_usage_mask(fullinst, i), - is_interp_instruction, &is_mem_inst); - } - - if (fullinst->Instruction.Texture) { - for (i = 0; i < fullinst->Texture.NumOffsets; i++) { - struct tgsi_full_src_register src = {{0}}; - - src.Register.File = fullinst->TexOffsets[i].File; - src.Register.Index = fullinst->TexOffsets[i].Index; - src.Register.SwizzleX = fullinst->TexOffsets[i].SwizzleX; - src.Register.SwizzleY = fullinst->TexOffsets[i].SwizzleY; - src.Register.SwizzleZ = fullinst->TexOffsets[i].SwizzleZ; - - /* The usage mask is suboptimal but should be safe. */ - scan_src_operand(info, fullinst, &src, 0, TGSI_WRITEMASK_XYZ, - false, &is_mem_inst); - } - } - - /* check for indirect register writes */ - for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; - if (dst->Register.Indirect) { - info->indirect_files |= (1 << dst->Register.File); - info->indirect_files_written |= (1 << dst->Register.File); - } - - if (dst->Register.Dimension && dst->Dimension.Indirect) - info->dim_indirect_files |= 1u << dst->Register.File; - - if (is_memory_file(dst->Register.File)) { - assert(fullinst->Instruction.Opcode == TGSI_OPCODE_STORE); - - is_mem_inst = true; - info->writes_memory = TRUE; - - if (dst->Register.File == TGSI_FILE_IMAGE) { - if (dst->Register.Indirect) - info->images_store = info->images_declared; - else - info->images_store |= 1 << dst->Register.Index; - } else if (dst->Register.File == TGSI_FILE_BUFFER) { - if (dst->Register.Indirect) - info->shader_buffers_store = info->shader_buffers_declared; - else - info->shader_buffers_store |= 1 << dst->Register.Index; - } - } - } - - if (is_mem_inst) - info->num_memory_instructions++; - - if (computes_derivative(fullinst->Instruction.Opcode)) - info->uses_derivatives = true; - - info->num_instructions++; -} - - -static void -scan_declaration(struct tgsi_shader_info *info, - const struct tgsi_full_declaration *fulldecl) -{ - const uint file = fulldecl->Declaration.File; - const unsigned procType = info->processor; - uint reg; - - if (fulldecl->Declaration.Array) { - unsigned array_id = fulldecl->Array.ArrayID; - - switch (file) { - case TGSI_FILE_INPUT: - assert(array_id < ARRAY_SIZE(info->input_array_first)); - info->input_array_first[array_id] = fulldecl->Range.First; - info->input_array_last[array_id] = fulldecl->Range.Last; - break; - case TGSI_FILE_OUTPUT: - assert(array_id < ARRAY_SIZE(info->output_array_first)); - info->output_array_first[array_id] = fulldecl->Range.First; - info->output_array_last[array_id] = fulldecl->Range.Last; - break; - } - info->array_max[file] = MAX2(info->array_max[file], array_id); - } - - for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) { - unsigned semName = fulldecl->Semantic.Name; - unsigned semIndex = fulldecl->Semantic.Index + - (reg - fulldecl->Range.First); - int buffer; - unsigned index, target, type; - - /* only first 32 regs will appear in this bitfield */ - info->file_mask[file] |= (1 << reg); - info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)reg); - - switch (file) { - case TGSI_FILE_CONSTANT: - buffer = 0; - - if (fulldecl->Declaration.Dimension) - buffer = fulldecl->Dim.Index2D; - - info->const_file_max[buffer] = - MAX2(info->const_file_max[buffer], (int)reg); - info->const_buffers_declared |= 1u << buffer; - break; - - case TGSI_FILE_IMAGE: - info->images_declared |= 1u << reg; - if (fulldecl->Image.Resource == TGSI_TEXTURE_BUFFER) - info->images_buffers |= 1 << reg; - break; - - case TGSI_FILE_BUFFER: - info->shader_buffers_declared |= 1u << reg; - break; - - case TGSI_FILE_INPUT: - info->input_semantic_name[reg] = (ubyte) semName; - info->input_semantic_index[reg] = (ubyte) semIndex; - info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; - info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; - info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; - - /* Vertex shaders can have inputs with holes between them. */ - info->num_inputs = MAX2(info->num_inputs, reg + 1); - - if (semName == TGSI_SEMANTIC_PRIMID) - info->uses_primid = TRUE; - else if (procType == PIPE_SHADER_FRAGMENT) { - if (semName == TGSI_SEMANTIC_POSITION) - info->reads_position = TRUE; - else if (semName == TGSI_SEMANTIC_FACE) - info->uses_frontface = TRUE; - } - break; - - case TGSI_FILE_SYSTEM_VALUE: - index = fulldecl->Range.First; - - info->system_value_semantic_name[index] = semName; - info->num_system_values = MAX2(info->num_system_values, index + 1); - - switch (semName) { - case TGSI_SEMANTIC_INSTANCEID: - info->uses_instanceid = TRUE; - break; - case TGSI_SEMANTIC_VERTEXID: - info->uses_vertexid = TRUE; - break; - case TGSI_SEMANTIC_VERTEXID_NOBASE: - info->uses_vertexid_nobase = TRUE; - break; - case TGSI_SEMANTIC_BASEVERTEX: - info->uses_basevertex = TRUE; - break; - case TGSI_SEMANTIC_PRIMID: - info->uses_primid = TRUE; - break; - case TGSI_SEMANTIC_INVOCATIONID: - info->uses_invocationid = TRUE; - break; - case TGSI_SEMANTIC_POSITION: - info->reads_position = TRUE; - break; - case TGSI_SEMANTIC_FACE: - info->uses_frontface = TRUE; - break; - case TGSI_SEMANTIC_SAMPLEMASK: - info->reads_samplemask = TRUE; - break; - case TGSI_SEMANTIC_TESSINNER: - case TGSI_SEMANTIC_TESSOUTER: - info->reads_tess_factors = true; - break; - } - break; - - case TGSI_FILE_OUTPUT: - info->output_semantic_name[reg] = (ubyte) semName; - info->output_semantic_index[reg] = (ubyte) semIndex; - info->output_usagemask[reg] |= fulldecl->Declaration.UsageMask; - info->num_outputs = MAX2(info->num_outputs, reg + 1); - - if (fulldecl->Declaration.UsageMask & TGSI_WRITEMASK_X) { - info->output_streams[reg] |= (ubyte)fulldecl->Semantic.StreamX; - info->num_stream_output_components[fulldecl->Semantic.StreamX]++; - } - if (fulldecl->Declaration.UsageMask & TGSI_WRITEMASK_Y) { - info->output_streams[reg] |= (ubyte)fulldecl->Semantic.StreamY << 2; - info->num_stream_output_components[fulldecl->Semantic.StreamY]++; - } - if (fulldecl->Declaration.UsageMask & TGSI_WRITEMASK_Z) { - info->output_streams[reg] |= (ubyte)fulldecl->Semantic.StreamZ << 4; - info->num_stream_output_components[fulldecl->Semantic.StreamZ]++; - } - if (fulldecl->Declaration.UsageMask & TGSI_WRITEMASK_W) { - info->output_streams[reg] |= (ubyte)fulldecl->Semantic.StreamW << 6; - info->num_stream_output_components[fulldecl->Semantic.StreamW]++; - } - - switch (semName) { - case TGSI_SEMANTIC_PRIMID: - info->writes_primid = true; - break; - case TGSI_SEMANTIC_VIEWPORT_INDEX: - info->writes_viewport_index = true; - break; - case TGSI_SEMANTIC_LAYER: - info->writes_layer = true; - break; - case TGSI_SEMANTIC_PSIZE: - info->writes_psize = true; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - info->writes_clipvertex = true; - break; - case TGSI_SEMANTIC_COLOR: - info->colors_written |= 1 << semIndex; - break; - case TGSI_SEMANTIC_STENCIL: - info->writes_stencil = true; - break; - case TGSI_SEMANTIC_SAMPLEMASK: - info->writes_samplemask = true; - break; - case TGSI_SEMANTIC_EDGEFLAG: - info->writes_edgeflag = true; - break; - case TGSI_SEMANTIC_POSITION: - if (procType == PIPE_SHADER_FRAGMENT) - info->writes_z = true; - else - info->writes_position = true; - break; - } - break; - - case TGSI_FILE_SAMPLER: - STATIC_ASSERT(sizeof(info->samplers_declared) * 8 >= PIPE_MAX_SAMPLERS); - info->samplers_declared |= 1u << reg; - break; - - case TGSI_FILE_SAMPLER_VIEW: - target = fulldecl->SamplerView.Resource; - type = fulldecl->SamplerView.ReturnTypeX; - - assert(target < TGSI_TEXTURE_UNKNOWN); - if (info->sampler_targets[reg] == TGSI_TEXTURE_UNKNOWN) { - /* Save sampler target for this sampler index */ - info->sampler_targets[reg] = target; - info->sampler_type[reg] = type; - } else { - /* if previously declared, make sure targets agree */ - assert(info->sampler_targets[reg] == target); - assert(info->sampler_type[reg] == type); - } - break; - } - } -} - - -static void -scan_immediate(struct tgsi_shader_info *info) -{ - uint reg = info->immediate_count++; - uint file = TGSI_FILE_IMMEDIATE; - - info->file_mask[file] |= (1 << reg); - info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)reg); -} - - -static void -scan_property(struct tgsi_shader_info *info, - const struct tgsi_full_property *fullprop) -{ - unsigned name = fullprop->Property.PropertyName; - unsigned value = fullprop->u[0].Data; - - assert(name < ARRAY_SIZE(info->properties)); - info->properties[name] = value; - - switch (name) { - case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: - info->num_written_clipdistance = value; - info->clipdist_writemask |= (1 << value) - 1; - break; - case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: - info->num_written_culldistance = value; - info->culldist_writemask |= (1 << value) - 1; - break; - } -} /** @@ -706,16 +56,13 @@ tgsi_scan_shader(const struct tgsi_token *tokens, { uint procType, i; struct tgsi_parse_context parse; - unsigned current_depth = 0; memset(info, 0, sizeof(*info)); for (i = 0; i < TGSI_FILE_COUNT; i++) info->file_max[i] = -1; - for (i = 0; i < ARRAY_SIZE(info->const_file_max); i++) + for (i = 0; i < Elements(info->const_file_max); i++) info->const_file_max[i] = -1; info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = 1; - for (i = 0; i < ARRAY_SIZE(info->sampler_targets); i++) - info->sampler_targets[i] = TGSI_TEXTURE_UNKNOWN; /** ** Setup to begin parsing input shader @@ -725,38 +72,265 @@ tgsi_scan_shader(const struct tgsi_token *tokens, return; } procType = parse.FullHeader.Processor.Processor; - assert(procType == PIPE_SHADER_FRAGMENT || - procType == PIPE_SHADER_VERTEX || - procType == PIPE_SHADER_GEOMETRY || - procType == PIPE_SHADER_TESS_CTRL || - procType == PIPE_SHADER_TESS_EVAL || - procType == PIPE_SHADER_COMPUTE); + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY || + procType == TGSI_PROCESSOR_TESS_CTRL || + procType == TGSI_PROCESSOR_TESS_EVAL || + procType == TGSI_PROCESSOR_COMPUTE); info->processor = procType; + /** ** Loop over incoming program tokens/instructions */ - while (!tgsi_parse_end_of_tokens(&parse)) { + while( !tgsi_parse_end_of_tokens( &parse ) ) { + info->num_tokens++; tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_INSTRUCTION: - scan_instruction(info, &parse.FullToken.FullInstruction, - ¤t_depth); + { + const struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + uint i; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + + for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = + &fullinst->Src[i]; + int ind = src->Register.Index; + + /* Mark which inputs are effectively used */ + if (src->Register.File == TGSI_FILE_INPUT) { + unsigned usage_mask; + usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); + if (src->Register.Indirect) { + for (ind = 0; ind < info->num_inputs; ++ind) { + info->input_usage_mask[ind] |= usage_mask; + } + } else { + assert(ind >= 0); + assert(ind < PIPE_MAX_SHADER_INPUTS); + info->input_usage_mask[ind] |= usage_mask; + } + + if (procType == TGSI_PROCESSOR_FRAGMENT && + info->reads_position && + src->Register.Index == 0 && + (src->Register.SwizzleX == TGSI_SWIZZLE_Z || + src->Register.SwizzleY == TGSI_SWIZZLE_Z || + src->Register.SwizzleZ == TGSI_SWIZZLE_Z || + src->Register.SwizzleW == TGSI_SWIZZLE_Z)) { + info->reads_z = TRUE; + } + } + + /* check for indirect register reads */ + if (src->Register.Indirect) { + info->indirect_files |= (1 << src->Register.File); + info->indirect_files_read |= (1 << src->Register.File); + } + + /* MSAA samplers */ + if (src->Register.File == TGSI_FILE_SAMPLER) { + assert(fullinst->Instruction.Texture); + assert(src->Register.Index < Elements(info->is_msaa_sampler)); + + if (fullinst->Instruction.Texture && + (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || + fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { + info->is_msaa_sampler[src->Register.Index] = TRUE; + } + } + } + + /* check for indirect register writes */ + for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; + if (dst->Register.Indirect) { + info->indirect_files |= (1 << dst->Register.File); + info->indirect_files_written |= (1 << dst->Register.File); + } + } + + info->num_instructions++; + } break; + case TGSI_TOKEN_TYPE_DECLARATION: - scan_declaration(info, &parse.FullToken.FullDeclaration); + { + const struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + const uint file = fulldecl->Declaration.File; + uint reg; + + if (fulldecl->Declaration.Array) { + unsigned array_id = fulldecl->Array.ArrayID; + + switch (file) { + case TGSI_FILE_INPUT: + assert(array_id < ARRAY_SIZE(info->input_array_first)); + info->input_array_first[array_id] = fulldecl->Range.First; + info->input_array_last[array_id] = fulldecl->Range.Last; + break; + case TGSI_FILE_OUTPUT: + assert(array_id < ARRAY_SIZE(info->output_array_first)); + info->output_array_first[array_id] = fulldecl->Range.First; + info->output_array_last[array_id] = fulldecl->Range.Last; + break; + } + info->array_max[file] = MAX2(info->array_max[file], array_id); + } + + for (reg = fulldecl->Range.First; + reg <= fulldecl->Range.Last; + reg++) { + unsigned semName = fulldecl->Semantic.Name; + unsigned semIndex = + fulldecl->Semantic.Index + (reg - fulldecl->Range.First); + + /* only first 32 regs will appear in this bitfield */ + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); + + if (file == TGSI_FILE_CONSTANT) { + int buffer = 0; + + if (fulldecl->Declaration.Dimension) + buffer = fulldecl->Dim.Index2D; + + info->const_file_max[buffer] = + MAX2(info->const_file_max[buffer], (int)reg); + } + else if (file == TGSI_FILE_INPUT) { + info->input_semantic_name[reg] = (ubyte) semName; + info->input_semantic_index[reg] = (ubyte) semIndex; + info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; + info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; + info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; + info->num_inputs++; + + if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID) + info->uses_centroid = TRUE; + + if (semName == TGSI_SEMANTIC_PRIMID) + info->uses_primid = TRUE; + else if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (semName == TGSI_SEMANTIC_POSITION) + info->reads_position = TRUE; + else if (semName == TGSI_SEMANTIC_FACE) + info->uses_frontface = TRUE; + } + } + else if (file == TGSI_FILE_SYSTEM_VALUE) { + unsigned index = fulldecl->Range.First; + + info->system_value_semantic_name[index] = semName; + info->num_system_values = MAX2(info->num_system_values, + index + 1); + + if (semName == TGSI_SEMANTIC_INSTANCEID) { + info->uses_instanceid = TRUE; + } + else if (semName == TGSI_SEMANTIC_VERTEXID) { + info->uses_vertexid = TRUE; + } + else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) { + info->uses_vertexid_nobase = TRUE; + } + else if (semName == TGSI_SEMANTIC_BASEVERTEX) { + info->uses_basevertex = TRUE; + } + else if (semName == TGSI_SEMANTIC_PRIMID) { + info->uses_primid = TRUE; + } else if (semName == TGSI_SEMANTIC_INVOCATIONID) { + info->uses_invocationid = TRUE; + } + } + else if (file == TGSI_FILE_OUTPUT) { + info->output_semantic_name[reg] = (ubyte) semName; + info->output_semantic_index[reg] = (ubyte) semIndex; + info->num_outputs++; + + if (procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY || + procType == TGSI_PROCESSOR_TESS_CTRL || + procType == TGSI_PROCESSOR_TESS_EVAL) { + if (semName == TGSI_SEMANTIC_CLIPDIST) { + info->num_written_clipdistance += + util_bitcount(fulldecl->Declaration.UsageMask); + info->clipdist_writemask |= + fulldecl->Declaration.UsageMask << (semIndex*4); + } + else if (semName == TGSI_SEMANTIC_CULLDIST) { + info->num_written_culldistance += + util_bitcount(fulldecl->Declaration.UsageMask); + info->culldist_writemask |= + fulldecl->Declaration.UsageMask << (semIndex*4); + } + else if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) { + info->writes_viewport_index = TRUE; + } + else if (semName == TGSI_SEMANTIC_LAYER) { + info->writes_layer = TRUE; + } + else if (semName == TGSI_SEMANTIC_PSIZE) { + info->writes_psize = TRUE; + } + else if (semName == TGSI_SEMANTIC_CLIPVERTEX) { + info->writes_clipvertex = TRUE; + } + } + + if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (semName == TGSI_SEMANTIC_POSITION) { + info->writes_z = TRUE; + } + else if (semName == TGSI_SEMANTIC_STENCIL) { + info->writes_stencil = TRUE; + } + } + + if (procType == TGSI_PROCESSOR_VERTEX) { + if (semName == TGSI_SEMANTIC_EDGEFLAG) { + info->writes_edgeflag = TRUE; + } + } + } + } + } break; + case TGSI_TOKEN_TYPE_IMMEDIATE: - scan_immediate(info); + { + uint reg = info->immediate_count++; + uint file = TGSI_FILE_IMMEDIATE; + + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); + } break; + case TGSI_TOKEN_TYPE_PROPERTY: - scan_property(info, &parse.FullToken.FullProperty); + { + const struct tgsi_full_property *fullprop + = &parse.FullToken.FullProperty; + unsigned name = fullprop->Property.PropertyName; + + assert(name < Elements(info->properties)); + info->properties[name] = fullprop->u[0].Data; + } break; + default: - assert(!"Unexpected TGSI token type"); + assert( 0 ); } } @@ -766,7 +340,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* The dimensions of the IN decleration in geometry shader have * to be deduced from the type of the input primitive. */ - if (procType == PIPE_SHADER_GEOMETRY) { + if (procType == TGSI_PROCESSOR_GEOMETRY) { unsigned input_primitive = info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]; int num_verts = u_vertices_per_prim(input_primitive); @@ -779,85 +353,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } - tgsi_parse_free(&parse); + tgsi_parse_free (&parse); } -/** - * Collect information about the arrays of a given register file. - * - * @param tokens TGSI shader - * @param file the register file to scan through - * @param max_array_id number of entries in @p arrays; should be equal to the - * highest array id, i.e. tgsi_shader_info::array_max[file]. - * @param arrays info for array of each ID will be written to arrays[ID - 1]. - */ -void -tgsi_scan_arrays(const struct tgsi_token *tokens, - unsigned file, - unsigned max_array_id, - struct tgsi_array_info *arrays) -{ - struct tgsi_parse_context parse; - - if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { - debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n"); - return; - } - - memset(arrays, 0, sizeof(arrays[0]) * max_array_id); - - while (!tgsi_parse_end_of_tokens(&parse)) { - struct tgsi_full_instruction *inst; - - tgsi_parse_token(&parse); - - if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { - struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - - if (decl->Declaration.Array && decl->Declaration.File == file && - decl->Array.ArrayID > 0 && decl->Array.ArrayID <= max_array_id) { - struct tgsi_array_info *array = &arrays[decl->Array.ArrayID - 1]; - assert(!array->declared); - array->declared = true; - array->range = decl->Range; - } - } - - if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) - continue; - - inst = &parse.FullToken.FullInstruction; - for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &inst->Dst[i]; - if (dst->Register.File != file) - continue; - - if (dst->Register.Indirect) { - if (dst->Indirect.ArrayID > 0 && - dst->Indirect.ArrayID <= max_array_id) { - arrays[dst->Indirect.ArrayID - 1].writemask |= dst->Register.WriteMask; - } else { - /* Indirect writes without an ArrayID can write anywhere. */ - for (unsigned j = 0; j < max_array_id; ++j) - arrays[j].writemask |= dst->Register.WriteMask; - } - } else { - /* Check whether the write falls into any of the arrays anyway. */ - for (unsigned j = 0; j < max_array_id; ++j) { - struct tgsi_array_info *array = &arrays[j]; - if (array->declared && - dst->Register.Index >= array->range.First && - dst->Register.Index <= array->range.Last) - array->writemask |= dst->Register.WriteMask; - } - } - } - } - - tgsi_parse_free(&parse); - - return; -} /** diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h index 3854827e5..b81bdd71f 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -54,8 +54,6 @@ struct tgsi_shader_info ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS]; - ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS]; ubyte num_system_values; ubyte system_value_semantic_name[PIPE_MAX_SHADER_INPUTS]; @@ -66,11 +64,6 @@ struct tgsi_shader_info uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ int const_file_max[PIPE_MAX_CONSTANT_BUFFERS]; - unsigned const_buffers_declared; /**< bitmask of declared const buffers */ - unsigned samplers_declared; /**< bitmask of declared samplers */ - ubyte sampler_targets[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_TEXTURE_x values */ - ubyte sampler_type[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_RETURN_TYPE_x */ - ubyte num_stream_output_components[4]; ubyte input_array_first[PIPE_MAX_SHADER_INPUTS]; ubyte input_array_last[PIPE_MAX_SHADER_INPUTS]; @@ -80,40 +73,16 @@ struct tgsi_shader_info uint immediate_count; /**< number of immediates declared */ uint num_instructions; - uint num_memory_instructions; /**< sampler, buffer, and image instructions */ uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ - /** - * If a tessellation control shader reads outputs, this describes which ones. - */ - boolean reads_pervertex_outputs; - boolean reads_perpatch_outputs; - boolean reads_tessfactor_outputs; - - ubyte colors_read; /**< which color components are read by the FS */ - ubyte colors_written; boolean reads_position; /**< does fragment shader read position? */ boolean reads_z; /**< does fragment shader read depth? */ - boolean reads_samplemask; /**< does fragment shader read sample mask? */ - boolean reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */ boolean writes_z; /**< does fragment shader write Z value? */ boolean writes_stencil; /**< does fragment shader write stencil value? */ - boolean writes_samplemask; /**< does fragment shader write sample mask? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KILL or KILL_IF instruction used? */ - boolean uses_persp_center; - boolean uses_persp_centroid; - boolean uses_persp_sample; - boolean uses_linear_center; - boolean uses_linear_centroid; - boolean uses_linear_sample; - boolean uses_persp_opcode_interp_centroid; - boolean uses_persp_opcode_interp_offset; - boolean uses_persp_opcode_interp_sample; - boolean uses_linear_opcode_interp_centroid; - boolean uses_linear_opcode_interp_offset; - boolean uses_linear_opcode_interp_sample; + boolean uses_centroid; boolean uses_instanceid; boolean uses_vertexid; boolean uses_vertexid_nobase; @@ -121,33 +90,16 @@ struct tgsi_shader_info boolean uses_primid; boolean uses_frontface; boolean uses_invocationid; - boolean writes_position; boolean writes_psize; boolean writes_clipvertex; - boolean writes_primid; boolean writes_viewport_index; boolean writes_layer; - boolean writes_memory; /**< contains stores or atomics to buffers or images */ boolean is_msaa_sampler[PIPE_MAX_SAMPLERS]; - boolean uses_doubles; /**< uses any of the double instructions */ - boolean uses_derivatives; + unsigned clipdist_writemask; unsigned culldist_writemask; unsigned num_written_culldistance; unsigned num_written_clipdistance; - - unsigned images_declared; /**< bitmask of declared images */ - /** - * Bitmask indicating which declared image is a buffer. - */ - unsigned images_buffers; - unsigned images_load; /**< bitmask of images using loads */ - unsigned images_store; /**< bitmask of images using stores */ - unsigned images_atomic; /**< bitmask of images using atomics */ - unsigned shader_buffers_declared; /**< bitmask of declared shader buffers */ - unsigned shader_buffers_load; /**< bitmask of shader buffers using loads */ - unsigned shader_buffers_store; /**< bitmask of shader buffers using stores */ - unsigned shader_buffers_atomic; /**< bitmask of shader buffers using atomics */ /** * Bitmask indicating which register files are accessed with * indirect addressing. The bits are (1 << TGSI_FILE_x), etc. @@ -159,38 +111,14 @@ struct tgsi_shader_info */ unsigned indirect_files_read; unsigned indirect_files_written; - unsigned dim_indirect_files; /**< shader resource indexing */ - unsigned const_buffers_indirect; /**< const buffers using indirect addressing */ unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */ - - /** - * Max nesting limit of loops/if's - */ - unsigned max_depth; -}; - -struct tgsi_array_info -{ - /** Whether an array with this ID was declared. */ - bool declared; - - /** The OR of all writemasks used to write to this array. */ - ubyte writemask; - - /** The range with which the array was declared. */ - struct tgsi_declaration_range range; }; extern void tgsi_scan_shader(const struct tgsi_token *tokens, struct tgsi_shader_info *info); -void -tgsi_scan_arrays(const struct tgsi_token *tokens, - unsigned file, - unsigned max_array_id, - struct tgsi_array_info *arrays); extern boolean tgsi_is_passthrough_shader(const struct tgsi_token *tokens); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c index 26403508e..8271ea081 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -34,8 +34,8 @@ const char *tgsi_processor_type_names[6] = { - "VERT", "FRAG", + "VERT", "GEOM", "TESS_CTRL", "TESS_EVAL", @@ -52,11 +52,10 @@ static const char *tgsi_file_names[] = "SAMP", "ADDR", "IMM", + "PRED", "SV", - "IMAGE", - "SVIEW", - "BUFFER", - "MEMORY", + "RES", + "SVIEW" }; const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = @@ -84,6 +83,7 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = "PCOORD", "VIEWPORT_INDEX", "LAYER", + "CULLDIST", "SAMPLEID", "SAMPLEPOS", "SAMPLEMASK", @@ -95,17 +95,6 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = "TESSOUTER", "TESSINNER", "VERTICESIN", - "HELPER_INVOCATION", - "BASEINSTANCE", - "DRAWID", - "WORK_DIM", - "SUBGROUP_SIZE", - "SUBGROUP_INVOCATION", - "SUBGROUP_EQ_MASK", - "SUBGROUP_GE_MASK", - "SUBGROUP_GT_MASK", - "SUBGROUP_LE_MASK", - "SUBGROUP_LT_MASK", }; const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] = @@ -148,14 +137,6 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "TES_SPACING", "TES_VERTEX_ORDER_CW", "TES_POINT_MODE", - "NUM_CLIPDIST_ENABLED", - "NUM_CULLDIST_ENABLED", - "FS_EARLY_DEPTH_STENCIL", - "NEXT_SHADER", - "CS_FIXED_BLOCK_WIDTH", - "CS_FIXED_BLOCK_HEIGHT", - "CS_FIXED_BLOCK_DEPTH", - "MUL_ZERO_WINS", }; const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] = @@ -213,33 +194,24 @@ const char *tgsi_fs_coord_pixel_center_names[2] = "INTEGER" }; -const char *tgsi_immediate_type_names[6] = +const char *tgsi_immediate_type_names[4] = { "FLT32", "UINT32", "INT32", - "FLT64", - "UINT64", - "INT64", -}; - -const char *tgsi_memory_names[3] = -{ - "COHERENT", - "RESTRICT", - "VOLATILE", + "FLT64" }; static inline void tgsi_strings_check(void) { - STATIC_ASSERT(ARRAY_SIZE(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT); - STATIC_ASSERT(ARRAY_SIZE(tgsi_texture_names) == TGSI_TEXTURE_COUNT); - STATIC_ASSERT(ARRAY_SIZE(tgsi_property_names) == TGSI_PROPERTY_COUNT); - STATIC_ASSERT(ARRAY_SIZE(tgsi_primitive_names) == PIPE_PRIM_MAX); - STATIC_ASSERT(ARRAY_SIZE(tgsi_interpolate_names) == TGSI_INTERPOLATE_COUNT); - STATIC_ASSERT(ARRAY_SIZE(tgsi_return_type_names) == TGSI_RETURN_TYPE_COUNT); + STATIC_ASSERT(Elements(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT); + STATIC_ASSERT(Elements(tgsi_texture_names) == TGSI_TEXTURE_COUNT); + STATIC_ASSERT(Elements(tgsi_property_names) == TGSI_PROPERTY_COUNT); + STATIC_ASSERT(Elements(tgsi_primitive_names) == PIPE_PRIM_MAX); + STATIC_ASSERT(Elements(tgsi_interpolate_names) == TGSI_INTERPOLATE_COUNT); + STATIC_ASSERT(Elements(tgsi_return_type_names) == TGSI_RETURN_TYPE_COUNT); (void) tgsi_processor_type_names; (void) tgsi_return_type_names; (void) tgsi_immediate_type_names; @@ -251,8 +223,8 @@ tgsi_strings_check(void) const char * tgsi_file_name(unsigned file) { - STATIC_ASSERT(ARRAY_SIZE(tgsi_file_names) == TGSI_FILE_COUNT); - if (file < ARRAY_SIZE(tgsi_file_names)) + STATIC_ASSERT(Elements(tgsi_file_names) == TGSI_FILE_COUNT); + if (file < Elements(tgsi_file_names)) return tgsi_file_names[file]; else return "invalid file"; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h index bb2d3458d..71e74372f 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h @@ -38,7 +38,7 @@ extern "C" { #endif -extern const char *tgsi_processor_type_names[PIPE_SHADER_TYPES]; +extern const char *tgsi_processor_type_names[6]; extern const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT]; @@ -58,9 +58,7 @@ extern const char *tgsi_fs_coord_origin_names[2]; extern const char *tgsi_fs_coord_pixel_center_names[2]; -extern const char *tgsi_immediate_type_names[6]; - -extern const char *tgsi_memory_names[3]; +extern const char *tgsi_immediate_type_names[4]; const char * diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c index 93a05568f..3e3ed5b19 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -119,42 +119,6 @@ static boolean str_match_nocase_whole( const char **pcur, const char *str ) return FALSE; } -/* Return the array index that matches starting at *pcur, where the string at - * *pcur is terminated by a non-digit non-letter non-underscore. - * Returns -1 if no match is found. - * - * On success, the pointer to the first string is moved to the end of the read - * word. - */ -static int str_match_name_from_array(const char **pcur, - const char * const *array, - unsigned array_size) -{ - for (unsigned j = 0; j < array_size; ++j) { - if (str_match_nocase_whole(pcur, array[j])) - return j; - } - return -1; -} - -/* Return the format corresponding to the name at *pcur. - * Returns -1 if there is no format name. - * - * On success, the pointer to the string is moved to the end of the read format - * name. - */ -static int str_match_format(const char **pcur) -{ - for (unsigned i = 0; i < PIPE_FORMAT_COUNT; i++) { - const struct util_format_description *desc = - util_format_description(i); - if (desc && str_match_nocase_whole(pcur, desc->name)) { - return i; - } - } - return -1; -} - /* Eat zero or more whitespaces. */ static void eat_opt_white( const char **pcur ) @@ -208,17 +172,14 @@ static boolean parse_int( const char **pcur, int *val ) return FALSE; } -static boolean parse_identifier( const char **pcur, char *ret, size_t len ) +static boolean parse_identifier( const char **pcur, char *ret ) { const char *cur = *pcur; int i = 0; if (is_alpha_underscore( cur )) { ret[i++] = *cur++; - while (is_alpha_underscore( cur ) || is_digit( cur )) { - if (i == len - 1) - return FALSE; + while (is_alpha_underscore( cur ) || is_digit( cur )) ret[i++] = *cur++; - } ret[i++] = '\0'; *pcur = cur; return TRUE; @@ -234,15 +195,8 @@ static boolean parse_float( const char **pcur, float *val ) boolean integral_part = FALSE; boolean fractional_part = FALSE; - if (*cur == '0' && *(cur + 1) == 'x') { - union fi fi; - fi.ui = strtoul(cur, NULL, 16); - *val = fi.f; - cur += 10; - goto out; - } - *val = (float) atof( cur ); + if (*cur == '-' || *cur == '+') cur++; if (is_digit( cur )) { @@ -274,8 +228,6 @@ static boolean parse_float( const char **pcur, float *val ) else return FALSE; } - -out: *pcur = cur; return TRUE; } @@ -298,42 +250,6 @@ static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1) return TRUE; } -static boolean parse_int64( const char **pcur, uint32_t *val0, uint32_t *val1) -{ - const char *cur = *pcur; - union { - int64_t i64val; - uint32_t uval[2]; - } v; - - v.i64val = strtoll(cur, (char**)pcur, 0); - if (*pcur == cur) - return FALSE; - - *val0 = v.uval[0]; - *val1 = v.uval[1]; - - return TRUE; -} - -static boolean parse_uint64( const char **pcur, uint32_t *val0, uint32_t *val1) -{ - const char *cur = *pcur; - union { - uint64_t u64val; - uint32_t uval[2]; - } v; - - v.u64val = strtoull(cur, (char**)pcur, 0); - if (*pcur == cur) - return FALSE; - - *val0 = v.uval[0]; - *val1 = v.uval[1]; - - return TRUE; -} - struct translate_ctx { const char *text; @@ -376,17 +292,17 @@ static boolean parse_header( struct translate_ctx *ctx ) uint processor; if (str_match_nocase_whole( &ctx->cur, "FRAG" )) - processor = PIPE_SHADER_FRAGMENT; + processor = TGSI_PROCESSOR_FRAGMENT; else if (str_match_nocase_whole( &ctx->cur, "VERT" )) - processor = PIPE_SHADER_VERTEX; + processor = TGSI_PROCESSOR_VERTEX; else if (str_match_nocase_whole( &ctx->cur, "GEOM" )) - processor = PIPE_SHADER_GEOMETRY; + processor = TGSI_PROCESSOR_GEOMETRY; else if (str_match_nocase_whole( &ctx->cur, "TESS_CTRL" )) - processor = PIPE_SHADER_TESS_CTRL; + processor = TGSI_PROCESSOR_TESS_CTRL; else if (str_match_nocase_whole( &ctx->cur, "TESS_EVAL" )) - processor = PIPE_SHADER_TESS_EVAL; + processor = TGSI_PROCESSOR_TESS_EVAL; else if (str_match_nocase_whole( &ctx->cur, "COMP" )) - processor = PIPE_SHADER_COMPUTE; + processor = TGSI_PROCESSOR_COMPUTE; else { report_error( ctx, "Unknown header" ); return FALSE; @@ -773,9 +689,9 @@ parse_register_dcl( * the second bracket */ /* tessellation has similar constraints to geometry shader */ - if ((ctx->processor == PIPE_SHADER_GEOMETRY && is_in) || - (ctx->processor == PIPE_SHADER_TESS_EVAL && is_in) || - (ctx->processor == PIPE_SHADER_TESS_CTRL && (is_in || is_out))) { + if ((ctx->processor == TGSI_PROCESSOR_GEOMETRY && is_in) || + (ctx->processor == TGSI_PROCESSOR_TESS_EVAL && is_in) || + (ctx->processor == TGSI_PROCESSOR_TESS_CTRL && (is_in || is_out))) { brackets[0] = brackets[1]; *num_brackets = 1; } else { @@ -1036,6 +952,43 @@ parse_instruction( inst = tgsi_default_full_instruction(); + /* Parse predicate. + */ + eat_opt_white( &ctx->cur ); + if (*ctx->cur == '(') { + uint file; + int index; + uint swizzle[4]; + boolean parsed_swizzle; + + inst.Instruction.Predicate = 1; + + ctx->cur++; + if (*ctx->cur == '!') { + ctx->cur++; + inst.Predicate.Negate = 1; + } + + if (!parse_register_1d( ctx, &file, &index )) + return FALSE; + + if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, 4 )) { + if (parsed_swizzle) { + inst.Predicate.SwizzleX = swizzle[0]; + inst.Predicate.SwizzleY = swizzle[1]; + inst.Predicate.SwizzleZ = swizzle[2]; + inst.Predicate.SwizzleW = swizzle[3]; + } + } + + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + + ctx->cur++; + } + /* Parse instruction name. */ eat_opt_white( &ctx->cur ); @@ -1077,15 +1030,6 @@ parse_instruction( inst.Texture.Texture = TGSI_TEXTURE_UNKNOWN; } - if ((i >= TGSI_OPCODE_LOAD && i <= TGSI_OPCODE_ATOMIMAX) || - i == TGSI_OPCODE_RESQ) { - inst.Instruction.Memory = 1; - inst.Memory.Qualifier = 0; - } - - assume(info->num_dst <= TGSI_FULL_MAX_DST_REGISTERS); - assume(info->num_src <= TGSI_FULL_MAX_SRC_REGISTERS); - /* Parse instruction operands. */ for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) { @@ -1126,7 +1070,7 @@ parse_instruction( cur = ctx->cur; eat_opt_white( &cur ); - for (i = 0; inst.Instruction.Texture && *cur == ',' && i < TGSI_FULL_MAX_TEX_OFFSETS; i++) { + for (i = 0; inst.Instruction.Texture && *cur == ','; i++) { cur++; eat_opt_white( &cur ); ctx->cur = cur; @@ -1138,41 +1082,6 @@ parse_instruction( inst.Texture.NumOffsets = i; cur = ctx->cur; - eat_opt_white(&cur); - - for (; inst.Instruction.Memory && *cur == ','; - ctx->cur = cur, eat_opt_white(&cur)) { - int j; - - cur++; - eat_opt_white(&cur); - - j = str_match_name_from_array(&cur, tgsi_memory_names, - ARRAY_SIZE(tgsi_memory_names)); - if (j >= 0) { - inst.Memory.Qualifier |= 1U << j; - continue; - } - - j = str_match_name_from_array(&cur, tgsi_texture_names, - ARRAY_SIZE(tgsi_texture_names)); - if (j >= 0) { - inst.Memory.Texture = j; - continue; - } - - j = str_match_format(&cur); - if (j >= 0) { - inst.Memory.Format = j; - continue; - } - - ctx->cur = cur; - report_error(ctx, "Expected memory qualifier, texture target, or format\n"); - return FALSE; - } - - cur = ctx->cur; eat_opt_white( &cur ); if (info->is_branch && *cur == ':') { uint target; @@ -1230,14 +1139,6 @@ static boolean parse_immediate_data(struct translate_ctx *ctx, unsigned type, ret = parse_double(&ctx->cur, &values[i].Uint, &values[i+1].Uint); i++; break; - case TGSI_IMM_INT64: - ret = parse_int64(&ctx->cur, &values[i].Uint, &values[i+1].Uint); - i++; - break; - case TGSI_IMM_UINT64: - ret = parse_uint64(&ctx->cur, &values[i].Uint, &values[i+1].Uint); - i++; - break; case TGSI_IMM_FLOAT32: ret = parse_float(&ctx->cur, &values[i].Float); break; @@ -1304,7 +1205,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) } is_vs_input = (file == TGSI_FILE_INPUT && - ctx->processor == PIPE_SHADER_VERTEX); + ctx->processor == TGSI_PROCESSOR_VERTEX); cur = ctx->cur; eat_opt_white( &cur ); @@ -1341,10 +1242,10 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur++; eat_opt_white( &cur ); - if (file == TGSI_FILE_IMAGE) { + if (file == TGSI_FILE_RESOURCE) { for (i = 0; i < TGSI_TEXTURE_COUNT; i++) { if (str_match_nocase_whole(&cur, tgsi_texture_names[i])) { - decl.Image.Resource = i; + decl.Resource.Resource = i; break; } } @@ -1359,17 +1260,13 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur2++; eat_opt_white(&cur2); if (str_match_nocase_whole(&cur2, "RAW")) { - decl.Image.Raw = 1; + decl.Resource.Raw = 1; } else if (str_match_nocase_whole(&cur2, "WR")) { - decl.Image.Writable = 1; + decl.Resource.Writable = 1; } else { - int format = str_match_format(&cur2); - if (format < 0) - break; - - decl.Image.Format = format; + break; } cur = cur2; eat_opt_white(&cur2); @@ -1442,26 +1339,6 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl.SamplerView.ReturnTypeX; } ctx->cur = cur; - } else if (file == TGSI_FILE_BUFFER) { - if (str_match_nocase_whole(&cur, "ATOMIC")) { - decl.Declaration.Atomic = 1; - ctx->cur = cur; - } - } else if (file == TGSI_FILE_MEMORY) { - if (str_match_nocase_whole(&cur, "GLOBAL")) { - /* Note this is a no-op global is the default */ - decl.Declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL; - ctx->cur = cur; - } else if (str_match_nocase_whole(&cur, "SHARED")) { - decl.Declaration.MemType = TGSI_MEMORY_TYPE_SHARED; - ctx->cur = cur; - } else if (str_match_nocase_whole(&cur, "PRIVATE")) { - decl.Declaration.MemType = TGSI_MEMORY_TYPE_PRIVATE; - ctx->cur = cur; - } else if (str_match_nocase_whole(&cur, "INPUT")) { - decl.Declaration.MemType = TGSI_MEMORY_TYPE_INPUT; - ctx->cur = cur; - } } else { if (str_match_nocase_whole(&cur, "LOCAL")) { decl.Declaration.Local = 1; @@ -1512,54 +1389,6 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur = ctx->cur; eat_opt_white( &cur ); - if (*cur == ',' && - file == TGSI_FILE_OUTPUT && ctx->processor == PIPE_SHADER_GEOMETRY) { - cur++; - eat_opt_white(&cur); - if (str_match_nocase_whole(&cur, "STREAM")) { - uint stream[4]; - - eat_opt_white(&cur); - if (*cur != '(') { - report_error(ctx, "Expected '('"); - return FALSE; - } - cur++; - - for (int i = 0; i < 4; ++i) { - eat_opt_white(&cur); - if (!parse_uint(&cur, &stream[i])) { - report_error(ctx, "Expected literal integer"); - return FALSE; - } - - eat_opt_white(&cur); - if (i < 3) { - if (*cur != ',') { - report_error(ctx, "Expected ','"); - return FALSE; - } - cur++; - } - } - - if (*cur != ')') { - report_error(ctx, "Expected ')'"); - return FALSE; - } - cur++; - - decl.Semantic.StreamX = stream[0]; - decl.Semantic.StreamY = stream[1]; - decl.Semantic.StreamZ = stream[2]; - decl.Semantic.StreamW = stream[3]; - - ctx->cur = cur; - } - } - - cur = ctx->cur; - eat_opt_white( &cur ); if (*cur == ',' && !is_vs_input) { uint i; @@ -1645,11 +1474,11 @@ static boolean parse_immediate( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - for (type = 0; type < ARRAY_SIZE(tgsi_immediate_type_names); ++type) { + for (type = 0; type < Elements(tgsi_immediate_type_names); ++type) { if (str_match_nocase_whole(&ctx->cur, tgsi_immediate_type_names[type])) break; } - if (type == ARRAY_SIZE(tgsi_immediate_type_names)) { + if (type == Elements(tgsi_immediate_type_names)) { report_error( ctx, "Expected immediate type" ); return FALSE; } @@ -1695,7 +1524,7 @@ parse_fs_coord_origin( const char **pcur, uint *fs_coord_origin ) { uint i; - for (i = 0; i < ARRAY_SIZE(tgsi_fs_coord_origin_names); i++) { + for (i = 0; i < Elements(tgsi_fs_coord_origin_names); i++) { const char *cur = *pcur; if (str_match_nocase_whole( &cur, tgsi_fs_coord_origin_names[i])) { @@ -1712,7 +1541,7 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center ) { uint i; - for (i = 0; i < ARRAY_SIZE(tgsi_fs_coord_pixel_center_names); i++) { + for (i = 0; i < Elements(tgsi_fs_coord_pixel_center_names); i++) { const char *cur = *pcur; if (str_match_nocase_whole( &cur, tgsi_fs_coord_pixel_center_names[i])) { @@ -1724,22 +1553,6 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center ) return FALSE; } -static boolean -parse_property_next_shader( const char **pcur, uint *next_shader ) -{ - uint i; - - for (i = 0; i < ARRAY_SIZE(tgsi_processor_type_names); i++) { - const char *cur = *pcur; - - if (str_match_nocase_whole( &cur, tgsi_processor_type_names[i])) { - *next_shader = i; - *pcur = cur; - return TRUE; - } - } - return FALSE; -} static boolean parse_property( struct translate_ctx *ctx ) { @@ -1753,7 +1566,7 @@ static boolean parse_property( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!parse_identifier( &ctx->cur, id, sizeof(id) )) { + if (!parse_identifier( &ctx->cur, id )) { report_error( ctx, "Syntax error" ); return FALSE; } @@ -1777,7 +1590,7 @@ static boolean parse_property( struct translate_ctx *ctx ) return FALSE; } if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM && - ctx->processor == PIPE_SHADER_GEOMETRY) { + ctx->processor == TGSI_PROCESSOR_GEOMETRY) { ctx->implied_array_size = u_vertices_per_prim(values[0]); } break; @@ -1793,12 +1606,6 @@ static boolean parse_property( struct translate_ctx *ctx ) return FALSE; } break; - case TGSI_PROPERTY_NEXT_SHADER: - if (!parse_property_next_shader(&ctx->cur, &values[0] )) { - report_error( ctx, "Unknown next shader property value." ); - return FALSE; - } - break; case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: default: if (!parse_uint(&ctx->cur, &values[0] )) { @@ -1831,8 +1638,8 @@ static boolean translate( struct translate_ctx *ctx ) if (!parse_header( ctx )) return FALSE; - if (ctx->processor == PIPE_SHADER_TESS_CTRL || - ctx->processor == PIPE_SHADER_TESS_EVAL) + if (ctx->processor == TGSI_PROCESSOR_TESS_CTRL || + ctx->processor == TGSI_PROCESSOR_TESS_EVAL) ctx->implied_array_size = 32; while (*ctx->cur != '\0') { diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h index 7ea82066f..ceb7c2e0f 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -95,39 +95,20 @@ struct tgsi_transform_context * Helper for emitting temporary register declarations. */ static inline void -tgsi_transform_temps_decl(struct tgsi_transform_context *ctx, - unsigned firstIdx, unsigned lastIdx) +tgsi_transform_temp_decl(struct tgsi_transform_context *ctx, + unsigned index) { struct tgsi_full_declaration decl; decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.Range.First = firstIdx; - decl.Range.Last = lastIdx; + decl.Range.First = + decl.Range.Last = index; ctx->emit_declaration(ctx, &decl); } -static inline void -tgsi_transform_temp_decl(struct tgsi_transform_context *ctx, - unsigned index) -{ - tgsi_transform_temps_decl(ctx, index, index); -} static inline void -tgsi_transform_const_decl(struct tgsi_transform_context *ctx, - unsigned firstIdx, unsigned lastIdx) -{ - struct tgsi_full_declaration decl; - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Range.First = firstIdx; - decl.Range.Last = lastIdx; - ctx->emit_declaration(ctx, &decl); -} - -static inline void tgsi_transform_input_decl(struct tgsi_transform_context *ctx, unsigned index, unsigned sem_name, unsigned sem_index, @@ -148,26 +129,6 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx, ctx->emit_declaration(ctx, &decl); } -static inline void -tgsi_transform_output_decl(struct tgsi_transform_context *ctx, - unsigned index, - unsigned sem_name, unsigned sem_index, - unsigned interp) -{ - struct tgsi_full_declaration decl; - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Interpolate = 1; - decl.Declaration.Semantic = 1; - decl.Semantic.Name = sem_name; - decl.Semantic.Index = sem_index; - decl.Range.First = - decl.Range.Last = index; - decl.Interp.Interpolate = interp; - - ctx->emit_declaration(ctx, &decl); -} static inline void tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx, @@ -192,7 +153,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW; - decl.Declaration.UsageMask = TGSI_WRITEMASK_XYZW; + decl.Declaration.UsageMask = 0xf; decl.Range.First = decl.Range.Last = index; decl.SamplerView.Resource = target; @@ -221,28 +182,6 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx, ctx->emit_immediate(ctx, &immed); } -static inline void -tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg, - unsigned file, unsigned index, unsigned writemask) -{ - reg->Register.File = file; - reg->Register.Index = index; - reg->Register.WriteMask = writemask; -} - -static inline void -tgsi_transform_src_reg(struct tgsi_full_src_register *reg, - unsigned file, unsigned index, - unsigned swizzleX, unsigned swizzleY, - unsigned swizzleZ, unsigned swizzleW) -{ - reg->Register.File = file; - reg->Register.Index = index; - reg->Register.SwizzleX = swizzleX; - reg->Register.SwizzleY = swizzleY; - reg->Register.SwizzleZ = swizzleZ; - reg->Register.SwizzleW = swizzleW; -} /** * Helper for emitting 1-operand instructions. @@ -281,8 +220,7 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx, unsigned src0_file, unsigned src0_index, unsigned src1_file, - unsigned src1_index, - bool src1_negate) + unsigned src1_index) { struct tgsi_full_instruction inst; @@ -297,47 +235,12 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx, inst.Src[0].Register.Index = src0_index; inst.Src[1].Register.File = src1_file; inst.Src[1].Register.Index = src1_index; - inst.Src[1].Register.Negate = src1_negate; ctx->emit_instruction(ctx, &inst); } static inline void -tgsi_transform_op3_inst(struct tgsi_transform_context *ctx, - unsigned opcode, - unsigned dst_file, - unsigned dst_index, - unsigned dst_writemask, - unsigned src0_file, - unsigned src0_index, - unsigned src1_file, - unsigned src1_index, - unsigned src2_file, - unsigned src2_index) -{ - struct tgsi_full_instruction inst; - - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = opcode; - inst.Instruction.NumDstRegs = 1; - inst.Dst[0].Register.File = dst_file, - inst.Dst[0].Register.Index = dst_index; - inst.Dst[0].Register.WriteMask = dst_writemask; - inst.Instruction.NumSrcRegs = 3; - inst.Src[0].Register.File = src0_file; - inst.Src[0].Register.Index = src0_index; - inst.Src[1].Register.File = src1_file; - inst.Src[1].Register.Index = src1_index; - inst.Src[2].Register.File = src2_file; - inst.Src[2].Register.Index = src2_index; - - ctx->emit_instruction(ctx, &inst); -} - - - -static inline void tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx, unsigned opcode, unsigned dst_file, @@ -390,8 +293,7 @@ tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx, unsigned src0_swizzle, unsigned src1_file, unsigned src1_index, - unsigned src1_swizzle, - bool src1_negate) + unsigned src1_swizzle) { struct tgsi_full_instruction inst; @@ -406,7 +308,6 @@ tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx, inst.Src[0].Register.Index = src0_index; inst.Src[1].Register.File = src1_file; inst.Src[1].Register.Index = src1_index; - inst.Src[1].Register.Negate = src1_negate; switch (dst_writemask) { case TGSI_WRITEMASK_X: inst.Src[0].Register.SwizzleX = src0_swizzle; @@ -498,8 +399,7 @@ static inline void tgsi_transform_kill_inst(struct tgsi_transform_context *ctx, unsigned src_file, unsigned src_index, - unsigned src_swizzle, - boolean negate) + unsigned src_swizzle) { struct tgsi_full_instruction inst; @@ -513,25 +413,22 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx, inst.Src[0].Register.SwizzleY = inst.Src[0].Register.SwizzleZ = inst.Src[0].Register.SwizzleW = src_swizzle; - inst.Src[0].Register.Negate = negate; + inst.Src[0].Register.Negate = 1; ctx->emit_instruction(ctx, &inst); } static inline void -tgsi_transform_tex_inst(struct tgsi_transform_context *ctx, - unsigned dst_file, - unsigned dst_index, - unsigned src_file, - unsigned src_index, - unsigned tex_target, - unsigned sampler_index) +tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx, + unsigned dst_file, + unsigned dst_index, + unsigned src_file, + unsigned src_index, + unsigned sampler_index) { struct tgsi_full_instruction inst; - assert(tex_target < TGSI_TEXTURE_COUNT); - inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; @@ -539,7 +436,7 @@ tgsi_transform_tex_inst(struct tgsi_transform_context *ctx, inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 2; inst.Instruction.Texture = TRUE; - inst.Texture.Texture = tex_target; + inst.Texture.Texture = TGSI_TEXTURE_2D; inst.Src[0].Register.File = src_file; inst.Src[0].Register.Index = src_index; inst.Src[1].Register.File = TGSI_FILE_SAMPLER; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 9eb00d091..3d2131950 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -35,7 +35,6 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_sanity.h" #include "util/u_debug.h" -#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_bitmask.h" @@ -50,16 +49,15 @@ union tgsi_any_token { struct tgsi_declaration_range decl_range; struct tgsi_declaration_dimension decl_dim; struct tgsi_declaration_interp decl_interp; - struct tgsi_declaration_image decl_image; struct tgsi_declaration_semantic decl_semantic; struct tgsi_declaration_sampler_view decl_sampler_view; struct tgsi_declaration_array array; struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; + struct tgsi_instruction_predicate insn_predicate; struct tgsi_instruction_label insn_label; struct tgsi_instruction_texture insn_texture; - struct tgsi_instruction_memory insn_memory; struct tgsi_texture_offset insn_texture_offset; struct tgsi_src_register src; struct tgsi_ind_register ind; @@ -76,12 +74,13 @@ struct ureg_tokens { unsigned count; }; -#define UREG_MAX_INPUT (4 * PIPE_MAX_SHADER_INPUTS) +#define UREG_MAX_INPUT PIPE_MAX_SHADER_INPUTS #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS -#define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS) +#define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 4096 #define UREG_MAX_ADDR 3 +#define UREG_MAX_PRED 1 #define UREG_MAX_ARRAY_TEMPS 256 struct const_decl { @@ -99,14 +98,12 @@ struct ureg_program { unsigned processor; bool supports_any_inout_decl_range; - int next_shader_processor; struct { unsigned semantic_name; unsigned semantic_index; unsigned interp; unsigned char cylindrical_wrap; - unsigned char usage_mask; unsigned interp_location; unsigned first; unsigned last; @@ -117,6 +114,7 @@ struct ureg_program unsigned vs_inputs[PIPE_MAX_ATTRIBS/32]; struct { + unsigned index; unsigned semantic_name; unsigned semantic_index; } system_value[UREG_MAX_SYSTEM_VALUE]; @@ -125,7 +123,6 @@ struct ureg_program struct { unsigned semantic_name; unsigned semantic_index; - unsigned streams; unsigned usage_mask; /* = TGSI_WRITEMASK_* */ unsigned first; unsigned last; @@ -157,21 +154,6 @@ struct ureg_program } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS]; unsigned nr_sampler_views; - struct { - unsigned index; - unsigned target; - unsigned format; - boolean wr; - boolean raw; - } image[PIPE_MAX_SHADER_IMAGES]; - unsigned nr_images; - - struct { - unsigned index; - bool atomic; - } buffer[PIPE_MAX_SHADER_BUFFERS]; - unsigned nr_buffers; - struct util_bitmask *free_temps; struct util_bitmask *local_temps; struct util_bitmask *decl_temps; @@ -186,11 +168,10 @@ struct ureg_program unsigned properties[TGSI_PROPERTY_COUNT]; unsigned nr_addrs; + unsigned nr_preds; unsigned nr_instructions; struct ureg_tokens domain[2]; - - bool use_memory[TGSI_MEMORY_TYPE_COUNT]; }; static union tgsi_any_token error_tokens[32]; @@ -201,7 +182,7 @@ static void tokens_error( struct ureg_tokens *tokens ) FREE(tokens->tokens); tokens->tokens = error_tokens; - tokens->size = ARRAY_SIZE(error_tokens); + tokens->size = Elements(error_tokens); tokens->count = 0; } @@ -260,42 +241,33 @@ static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, return &ureg->domain[domain].tokens[nr]; } - void ureg_property(struct ureg_program *ureg, unsigned name, unsigned value) { - assert(name < ARRAY_SIZE(ureg->properties)); + assert(name < Elements(ureg->properties)); ureg->properties[name] = value; } struct ureg_src -ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *ureg, +ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index, unsigned interp_mode, unsigned cylindrical_wrap, unsigned interp_location, - unsigned index, - unsigned usage_mask, unsigned array_id, unsigned array_size) { unsigned i; - assert(usage_mask != 0); - assert(usage_mask <= TGSI_WRITEMASK_XYZW); - for (i = 0; i < ureg->nr_inputs; i++) { if (ureg->input[i].semantic_name == semantic_name && ureg->input[i].semantic_index == semantic_index) { assert(ureg->input[i].interp == interp_mode); assert(ureg->input[i].cylindrical_wrap == cylindrical_wrap); assert(ureg->input[i].interp_location == interp_location); - if (ureg->input[i].array_id == array_id) { - ureg->input[i].usage_mask |= usage_mask; - goto out; - } - assert((ureg->input[i].usage_mask & usage_mask) == 0); + assert(ureg->input[i].array_id == array_id); + goto out; } } @@ -306,11 +278,10 @@ ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *ureg, ureg->input[i].interp = interp_mode; ureg->input[i].cylindrical_wrap = cylindrical_wrap; ureg->input[i].interp_location = interp_location; - ureg->input[i].first = index; - ureg->input[i].last = index + array_size - 1; + ureg->input[i].first = ureg->nr_input_regs; + ureg->input[i].last = ureg->nr_input_regs + array_size - 1; ureg->input[i].array_id = array_id; - ureg->input[i].usage_mask = usage_mask; - ureg->nr_input_regs = MAX2(ureg->nr_input_regs, index + array_size); + ureg->nr_input_regs += array_size; ureg->nr_inputs++; } else { set_bad(ureg); @@ -321,27 +292,12 @@ out: array_id); } -struct ureg_src -ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, - unsigned semantic_name, - unsigned semantic_index, - unsigned interp_mode, - unsigned cylindrical_wrap, - unsigned interp_location, - unsigned array_id, - unsigned array_size) -{ - return ureg_DECL_fs_input_cyl_centroid_layout(ureg, - semantic_name, semantic_index, interp_mode, cylindrical_wrap, interp_location, - ureg->nr_input_regs, TGSI_WRITEMASK_XYZW, array_id, array_size); -} - struct ureg_src ureg_DECL_vs_input( struct ureg_program *ureg, unsigned index ) { - assert(ureg->processor == PIPE_SHADER_VERTEX); + assert(ureg->processor == TGSI_PROCESSOR_VERTEX); assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs)); ureg->vs_inputs[index/32] |= 1 << (index % 32); @@ -350,21 +306,6 @@ ureg_DECL_vs_input( struct ureg_program *ureg, struct ureg_src -ureg_DECL_input_layout(struct ureg_program *ureg, - unsigned semantic_name, - unsigned semantic_index, - unsigned index, - unsigned usage_mask, - unsigned array_id, - unsigned array_size) -{ - return ureg_DECL_fs_input_cyl_centroid_layout(ureg, - semantic_name, semantic_index, 0, 0, 0, - index, usage_mask, array_id, array_size); -} - - -struct ureg_src ureg_DECL_input(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index, @@ -378,37 +319,26 @@ ureg_DECL_input(struct ureg_program *ureg, struct ureg_src ureg_DECL_system_value(struct ureg_program *ureg, + unsigned index, unsigned semantic_name, unsigned semantic_index) { - unsigned i; - - for (i = 0; i < ureg->nr_system_values; i++) { - if (ureg->system_value[i].semantic_name == semantic_name && - ureg->system_value[i].semantic_index == semantic_index) { - goto out; - } - } - if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { + ureg->system_value[ureg->nr_system_values].index = index; ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; - i = ureg->nr_system_values; ureg->nr_system_values++; } else { set_bad(ureg); } -out: - return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i); + return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); } -struct ureg_dst -ureg_DECL_output_layout(struct ureg_program *ureg, - unsigned semantic_name, - unsigned semantic_index, - unsigned streams, +struct ureg_dst +ureg_DECL_output_masked(struct ureg_program *ureg, + unsigned name, unsigned index, unsigned usage_mask, unsigned array_id, @@ -417,58 +347,36 @@ ureg_DECL_output_layout(struct ureg_program *ureg, unsigned i; assert(usage_mask != 0); - assert(!(streams & 0x03) || (usage_mask & 1)); - assert(!(streams & 0x0c) || (usage_mask & 2)); - assert(!(streams & 0x30) || (usage_mask & 4)); - assert(!(streams & 0xc0) || (usage_mask & 8)); for (i = 0; i < ureg->nr_outputs; i++) { - if (ureg->output[i].semantic_name == semantic_name && - ureg->output[i].semantic_index == semantic_index) { - if (ureg->output[i].array_id == array_id) { - ureg->output[i].usage_mask |= usage_mask; - goto out; - } - assert((ureg->output[i].usage_mask & usage_mask) == 0); + if (ureg->output[i].semantic_name == name && + ureg->output[i].semantic_index == index) { + assert(ureg->output[i].array_id == array_id); + ureg->output[i].usage_mask |= usage_mask; + goto out; } } if (ureg->nr_outputs < UREG_MAX_OUTPUT) { - ureg->output[i].semantic_name = semantic_name; - ureg->output[i].semantic_index = semantic_index; + ureg->output[i].semantic_name = name; + ureg->output[i].semantic_index = index; ureg->output[i].usage_mask = usage_mask; - ureg->output[i].first = index; - ureg->output[i].last = index + array_size - 1; + ureg->output[i].first = ureg->nr_output_regs; + ureg->output[i].last = ureg->nr_output_regs + array_size - 1; ureg->output[i].array_id = array_id; - ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size); + ureg->nr_output_regs += array_size; ureg->nr_outputs++; } else { set_bad( ureg ); - i = 0; } out: - ureg->output[i].streams |= streams; - return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first, array_id); } -struct ureg_dst -ureg_DECL_output_masked(struct ureg_program *ureg, - unsigned name, - unsigned index, - unsigned usage_mask, - unsigned array_id, - unsigned array_size) -{ - return ureg_DECL_output_layout(ureg, name, index, 0, - ureg->nr_output_regs, usage_mask, array_id, array_size); -} - - struct ureg_dst ureg_DECL_output(struct ureg_program *ureg, unsigned name, @@ -520,7 +428,7 @@ ureg_DECL_constant2D(struct ureg_program *ureg, } -/* A one-dimensional, deprecated version of ureg_DECL_constant2D(). +/* A one-dimensional, depricated version of ureg_DECL_constant2D(). * * Constant operands declared with this function must be addressed * with a one-dimensional index. @@ -669,6 +577,19 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); } +/* Allocate a new predicate register. + */ +struct ureg_dst +ureg_DECL_predicate(struct ureg_program *ureg) +{ + if (ureg->nr_preds < UREG_MAX_PRED) { + return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++); + } + + assert(0); + return ureg_dst_register(TGSI_FILE_PREDICATE, 0); +} + /* Allocate a new sampler. */ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, @@ -726,71 +647,6 @@ ureg_DECL_sampler_view(struct ureg_program *ureg, return reg; } -/* Allocate a new image. - */ -struct ureg_src -ureg_DECL_image(struct ureg_program *ureg, - unsigned index, - unsigned target, - unsigned format, - boolean wr, - boolean raw) -{ - struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index); - unsigned i; - - for (i = 0; i < ureg->nr_images; i++) - if (ureg->image[i].index == index) - return reg; - - if (i < PIPE_MAX_SHADER_IMAGES) { - ureg->image[i].index = index; - ureg->image[i].target = target; - ureg->image[i].wr = wr; - ureg->image[i].raw = raw; - ureg->image[i].format = format; - ureg->nr_images++; - return reg; - } - - assert(0); - return reg; -} - -/* Allocate a new buffer. - */ -struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, - bool atomic) -{ - struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr); - unsigned i; - - for (i = 0; i < ureg->nr_buffers; i++) - if (ureg->buffer[i].index == nr) - return reg; - - if (i < PIPE_MAX_SHADER_BUFFERS) { - ureg->buffer[i].index = nr; - ureg->buffer[i].atomic = atomic; - ureg->nr_buffers++; - return reg; - } - - assert(0); - return reg; -} - -/* Allocate a memory area. - */ -struct ureg_src ureg_DECL_memory(struct ureg_program *ureg, - unsigned memory_type) -{ - struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type); - - ureg->use_memory[memory_type] = true; - return reg; -} - static int match_or_expand_immediate64( const unsigned *v, int type, @@ -842,9 +698,7 @@ match_or_expand_immediate( const unsigned *v, unsigned nr2 = *pnr2; unsigned i, j; - if (type == TGSI_IMM_FLOAT64 || - type == TGSI_IMM_UINT64 || - type == TGSI_IMM_INT64) + if (type == TGSI_IMM_FLOAT64) return match_or_expand_immediate64(v, type, nr, v2, pnr2, swizzle); *swizzle = 0; @@ -923,9 +777,7 @@ out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - if (type == TGSI_IMM_FLOAT64 || - type == TGSI_IMM_UINT64 || - type == TGSI_IMM_INT64) { + if (type == TGSI_IMM_FLOAT64) { for (j = nr; j < 4; j+=2) { swizzle |= (swizzle & 0xf) << (j * 2); } @@ -1025,43 +877,6 @@ ureg_DECL_immediate_int( struct ureg_program *ureg, return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } -struct ureg_src -ureg_DECL_immediate_uint64( struct ureg_program *ureg, - const uint64_t *v, - unsigned nr ) -{ - union { - unsigned u[4]; - uint64_t u64[2]; - } fu; - unsigned int i; - - assert((nr / 2) < 3); - for (i = 0; i < nr / 2; i++) { - fu.u64[i] = v[i]; - } - - return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64); -} - -struct ureg_src -ureg_DECL_immediate_int64( struct ureg_program *ureg, - const int64_t *v, - unsigned nr ) -{ - union { - unsigned u[4]; - int64_t i64[2]; - } fu; - unsigned int i; - - assert((nr / 2) < 3); - for (i = 0; i < nr / 2; i++) { - fu.i64[i] = v[i]; - } - - return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64); -} void ureg_emit_src( struct ureg_program *ureg, @@ -1202,7 +1017,7 @@ static void validate( unsigned opcode, #ifdef DEBUG const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); assert(info); - if (info) { + if(info) { assert(nr_dst == info->num_dst); assert(nr_src == info->num_src); } @@ -1213,11 +1028,17 @@ struct ureg_emit_insn_result ureg_emit_insn(struct ureg_program *ureg, unsigned opcode, boolean saturate, + boolean predicate, + boolean pred_negate, + unsigned pred_swizzle_x, + unsigned pred_swizzle_y, + unsigned pred_swizzle_z, + unsigned pred_swizzle_w, unsigned num_dst, - unsigned num_src) + unsigned num_src ) { union tgsi_any_token *out; - uint count = 1; + uint count = predicate ? 2 : 1; struct ureg_emit_insn_result result; validate( opcode, num_dst, num_src ); @@ -1232,6 +1053,16 @@ ureg_emit_insn(struct ureg_program *ureg, result.insn_token = ureg->domain[DOMAIN_INSN].count - count; result.extended_token = result.insn_token; + if (predicate) { + out[0].insn.Predicate = 1; + out[1].insn_predicate = tgsi_default_instruction_predicate(); + out[1].insn_predicate.Negate = pred_negate; + out[1].insn_predicate.SwizzleX = pred_swizzle_x; + out[1].insn_predicate.SwizzleY = pred_swizzle_y; + out[1].insn_predicate.SwizzleZ = pred_swizzle_z; + out[1].insn_predicate.SwizzleW = pred_swizzle_w; + } + ureg->nr_instructions++; return result; @@ -1251,7 +1082,7 @@ ureg_emit_label(struct ureg_program *ureg, { union tgsi_any_token *out, *insn; - if (!label_token) + if(!label_token) return; out = get_tokens( ureg, DOMAIN_INSN, 1 ); @@ -1316,25 +1147,6 @@ ureg_emit_texture_offset(struct ureg_program *ureg, } -void -ureg_emit_memory(struct ureg_program *ureg, - unsigned extended_token, - unsigned qualifier, - unsigned texture, - unsigned format) -{ - union tgsi_any_token *out, *insn; - - out = get_tokens( ureg, DOMAIN_INSN, 1 ); - insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); - - insn->insn.Memory = 1; - - out[0].value = 0; - out[0].insn_memory.Qualifier = qualifier; - out[0].insn_memory.Texture = texture; - out[0].insn_memory.Format = format; -} void ureg_fixup_insn_size(struct ureg_program *ureg, @@ -1358,16 +1170,33 @@ ureg_insn(struct ureg_program *ureg, struct ureg_emit_insn_result insn; unsigned i; boolean saturate; + boolean predicate; + boolean negate = FALSE; + unsigned swizzle[4] = { 0 }; if (nr_dst && ureg_dst_is_empty(dst[0])) { return; } saturate = nr_dst ? dst[0].Saturate : FALSE; + predicate = nr_dst ? dst[0].Predicate : FALSE; + if (predicate) { + negate = dst[0].PredNegate; + swizzle[0] = dst[0].PredSwizzleX; + swizzle[1] = dst[0].PredSwizzleY; + swizzle[2] = dst[0].PredSwizzleZ; + swizzle[3] = dst[0].PredSwizzleW; + } insn = ureg_emit_insn(ureg, opcode, saturate, + predicate, + negate, + swizzle[0], + swizzle[1], + swizzle[2], + swizzle[3], nr_dst, nr_src); @@ -1394,16 +1223,33 @@ ureg_tex_insn(struct ureg_program *ureg, struct ureg_emit_insn_result insn; unsigned i; boolean saturate; + boolean predicate; + boolean negate = FALSE; + unsigned swizzle[4] = { 0 }; if (nr_dst && ureg_dst_is_empty(dst[0])) { return; } saturate = nr_dst ? dst[0].Saturate : FALSE; + predicate = nr_dst ? dst[0].Predicate : FALSE; + if (predicate) { + negate = dst[0].PredNegate; + swizzle[0] = dst[0].PredSwizzleX; + swizzle[1] = dst[0].PredSwizzleY; + swizzle[2] = dst[0].PredSwizzleZ; + swizzle[3] = dst[0].PredSwizzleW; + } insn = ureg_emit_insn(ureg, opcode, saturate, + predicate, + negate, + swizzle[0], + swizzle[1], + swizzle[2], + swizzle[3], nr_dst, nr_src); @@ -1423,15 +1269,11 @@ ureg_tex_insn(struct ureg_program *ureg, void -ureg_memory_insn(struct ureg_program *ureg, - unsigned opcode, - const struct ureg_dst *dst, - unsigned nr_dst, - const struct ureg_src *src, - unsigned nr_src, - unsigned qualifier, - unsigned texture, - unsigned format) +ureg_label_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_src *src, + unsigned nr_src, + unsigned *label_token ) { struct ureg_emit_insn_result insn; unsigned i; @@ -1439,18 +1281,21 @@ ureg_memory_insn(struct ureg_program *ureg, insn = ureg_emit_insn(ureg, opcode, FALSE, - nr_dst, + FALSE, + FALSE, + TGSI_SWIZZLE_X, + TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Z, + TGSI_SWIZZLE_W, + 0, nr_src); - ureg_emit_memory(ureg, insn.extended_token, qualifier, texture, format); - - for (i = 0; i < nr_dst; i++) - ureg_emit_dst(ureg, dst[i]); + ureg_emit_label( ureg, insn.extended_token, label_token ); for (i = 0; i < nr_src; i++) - ureg_emit_src(ureg, src[i]); + ureg_emit_src( ureg, src[i] ); - ureg_fixup_insn_size(ureg, insn.insn_token); + ureg_fixup_insn_size( ureg, insn.insn_token ); } @@ -1461,7 +1306,6 @@ emit_decl_semantic(struct ureg_program *ureg, unsigned last, unsigned semantic_name, unsigned semantic_index, - unsigned streams, unsigned usage_mask, unsigned array_id) { @@ -1482,10 +1326,6 @@ emit_decl_semantic(struct ureg_program *ureg, out[2].value = 0; out[2].decl_semantic.Name = semantic_name; out[2].decl_semantic.Index = semantic_index; - out[2].decl_semantic.StreamX = streams & 3; - out[2].decl_semantic.StreamY = (streams >> 2) & 3; - out[2].decl_semantic.StreamZ = (streams >> 4) & 3; - out[2].decl_semantic.StreamW = (streams >> 6) & 3; if (array_id) { out[3].value = 0; @@ -1504,8 +1344,7 @@ emit_decl_fs(struct ureg_program *ureg, unsigned interpolate, unsigned cylindrical_wrap, unsigned interpolate_location, - unsigned array_id, - unsigned usage_mask) + unsigned array_id) { union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 5 : 4); @@ -1514,7 +1353,7 @@ emit_decl_fs(struct ureg_program *ureg, out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 4; out[0].decl.File = file; - out[0].decl.UsageMask = usage_mask; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ out[0].decl.Interpolate = 1; out[0].decl.Semantic = 1; out[0].decl.Array = array_id != 0; @@ -1623,7 +1462,7 @@ emit_decl_sampler_view(struct ureg_program *ureg, out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 3; out[0].decl.File = TGSI_FILE_SAMPLER_VIEW; - out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.UsageMask = 0xf; out[1].value = 0; out[1].decl_range.First = index; @@ -1638,69 +1477,6 @@ emit_decl_sampler_view(struct ureg_program *ureg, } static void -emit_decl_image(struct ureg_program *ureg, - unsigned index, - unsigned target, - unsigned format, - boolean wr, - boolean raw) -{ - union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); - - out[0].value = 0; - out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; - out[0].decl.NrTokens = 3; - out[0].decl.File = TGSI_FILE_IMAGE; - out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; - - out[1].value = 0; - out[1].decl_range.First = index; - out[1].decl_range.Last = index; - - out[2].value = 0; - out[2].decl_image.Resource = target; - out[2].decl_image.Writable = wr; - out[2].decl_image.Raw = raw; - out[2].decl_image.Format = format; -} - -static void -emit_decl_buffer(struct ureg_program *ureg, - unsigned index, - bool atomic) -{ - union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); - - out[0].value = 0; - out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; - out[0].decl.NrTokens = 2; - out[0].decl.File = TGSI_FILE_BUFFER; - out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; - out[0].decl.Atomic = atomic; - - out[1].value = 0; - out[1].decl_range.First = index; - out[1].decl_range.Last = index; -} - -static void -emit_decl_memory(struct ureg_program *ureg, unsigned memory_type) -{ - union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); - - out[0].value = 0; - out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; - out[0].decl.NrTokens = 2; - out[0].decl.File = TGSI_FILE_MEMORY; - out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; - out[0].decl.MemType = memory_type; - - out[1].value = 0; - out[1].decl_range.First = memory_type; - out[1].decl_range.Last = memory_type; -} - -static void emit_immediate( struct ureg_program *ureg, const unsigned *v, unsigned type ) @@ -1739,17 +1515,17 @@ static void emit_decls( struct ureg_program *ureg ) { unsigned i,j; - for (i = 0; i < ARRAY_SIZE(ureg->properties); i++) + for (i = 0; i < Elements(ureg->properties); i++) if (ureg->properties[i] != ~0) emit_property(ureg, i, ureg->properties[i]); - if (ureg->processor == PIPE_SHADER_VERTEX) { + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (ureg->vs_inputs[i/32] & (1u << (i%32))) { + if (ureg->vs_inputs[i/32] & (1 << (i%32))) { emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); } } - } else if (ureg->processor == PIPE_SHADER_FRAGMENT) { + } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { if (ureg->supports_any_inout_decl_range) { for (i = 0; i < ureg->nr_inputs; i++) { emit_decl_fs(ureg, @@ -1761,8 +1537,7 @@ static void emit_decls( struct ureg_program *ureg ) ureg->input[i].interp, ureg->input[i].cylindrical_wrap, ureg->input[i].interp_location, - ureg->input[i].array_id, - ureg->input[i].usage_mask); + ureg->input[i].array_id); } } else { @@ -1776,8 +1551,7 @@ static void emit_decls( struct ureg_program *ureg ) (j - ureg->input[i].first), ureg->input[i].interp, ureg->input[i].cylindrical_wrap, - ureg->input[i].interp_location, 0, - ureg->input[i].usage_mask); + ureg->input[i].interp_location, 0); } } } @@ -1790,7 +1564,6 @@ static void emit_decls( struct ureg_program *ureg ) ureg->input[i].last, ureg->input[i].semantic_name, ureg->input[i].semantic_index, - 0, TGSI_WRITEMASK_XYZW, ureg->input[i].array_id); } @@ -1804,7 +1577,6 @@ static void emit_decls( struct ureg_program *ureg ) ureg->input[i].semantic_name, ureg->input[i].semantic_index + (j - ureg->input[i].first), - 0, TGSI_WRITEMASK_XYZW, 0); } } @@ -1814,11 +1586,10 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_system_values; i++) { emit_decl_semantic(ureg, TGSI_FILE_SYSTEM_VALUE, - i, - i, + ureg->system_value[i].index, + ureg->system_value[i].index, ureg->system_value[i].semantic_name, ureg->system_value[i].semantic_index, - 0, TGSI_WRITEMASK_XYZW, 0); } @@ -1830,7 +1601,6 @@ static void emit_decls( struct ureg_program *ureg ) ureg->output[i].last, ureg->output[i].semantic_name, ureg->output[i].semantic_index, - ureg->output[i].streams, ureg->output[i].usage_mask, ureg->output[i].array_id); } @@ -1844,7 +1614,6 @@ static void emit_decls( struct ureg_program *ureg ) ureg->output[i].semantic_name, ureg->output[i].semantic_index + (j - ureg->output[i].first), - ureg->output[i].streams, ureg->output[i].usage_mask, 0); } } @@ -1866,24 +1635,6 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler_view[i].return_type_w); } - for (i = 0; i < ureg->nr_images; i++) { - emit_decl_image(ureg, - ureg->image[i].index, - ureg->image[i].target, - ureg->image[i].format, - ureg->image[i].wr, - ureg->image[i].raw); - } - - for (i = 0; i < ureg->nr_buffers; i++) { - emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic); - } - - for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) { - if (ureg->use_memory[i]) - emit_decl_memory(ureg, i); - } - if (ureg->const_decls.nr_constant_ranges) { for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { emit_decl_range(ureg, @@ -1931,6 +1682,13 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_addrs ); } + if (ureg->nr_preds) { + emit_decl_range(ureg, + TGSI_FILE_PREDICATE, + 0, + ureg->nr_preds); + } + for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, ureg->immediate[i].value.u, @@ -1980,16 +1738,6 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) { const struct tgsi_token *tokens; - switch (ureg->processor) { - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_TESS_EVAL: - ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER, - ureg->next_shader_processor == -1 ? - PIPE_SHADER_FRAGMENT : - ureg->next_shader_processor); - break; - } - emit_header( ureg ); emit_decls( ureg ); copy_instructions( ureg ); @@ -2029,23 +1777,25 @@ void *ureg_create_shader( struct ureg_program *ureg, { struct pipe_shader_state state; - pipe_shader_state_from_tgsi(&state, ureg_finalize(ureg)); + state.tokens = ureg_finalize(ureg); if(!state.tokens) return NULL; if (so) state.stream_output = *so; + else + memset(&state.stream_output, 0, sizeof(state.stream_output)); switch (ureg->processor) { - case PIPE_SHADER_VERTEX: + case TGSI_PROCESSOR_VERTEX: return pipe->create_vs_state(pipe, &state); - case PIPE_SHADER_TESS_CTRL: + case TGSI_PROCESSOR_TESS_CTRL: return pipe->create_tcs_state(pipe, &state); - case PIPE_SHADER_TESS_EVAL: + case TGSI_PROCESSOR_TESS_EVAL: return pipe->create_tes_state(pipe, &state); - case PIPE_SHADER_GEOMETRY: + case TGSI_PROCESSOR_GEOMETRY: return pipe->create_gs_state(pipe, &state); - case PIPE_SHADER_FRAGMENT: + case TGSI_PROCESSOR_FRAGMENT: return pipe->create_fs_state(pipe, &state); default: return NULL; @@ -2063,7 +1813,7 @@ const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; if (nr_tokens) - *nr_tokens = ureg->domain[DOMAIN_DECL].count; + *nr_tokens = ureg->domain[DOMAIN_DECL].size; ureg->domain[DOMAIN_DECL].tokens = 0; ureg->domain[DOMAIN_DECL].size = 0; @@ -2080,6 +1830,29 @@ void ureg_free_tokens( const struct tgsi_token *tokens ) } +static inline unsigned +pipe_shader_from_tgsi_processor(unsigned processor) +{ + switch (processor) { + case TGSI_PROCESSOR_VERTEX: + return PIPE_SHADER_VERTEX; + case TGSI_PROCESSOR_TESS_CTRL: + return PIPE_SHADER_TESS_CTRL; + case TGSI_PROCESSOR_TESS_EVAL: + return PIPE_SHADER_TESS_EVAL; + case TGSI_PROCESSOR_GEOMETRY: + return PIPE_SHADER_GEOMETRY; + case TGSI_PROCESSOR_FRAGMENT: + return PIPE_SHADER_FRAGMENT; + case TGSI_PROCESSOR_COMPUTE: + return PIPE_SHADER_COMPUTE; + default: + assert(0); + return PIPE_SHADER_VERTEX; + } +} + + struct ureg_program * ureg_create(unsigned processor) { @@ -2092,17 +1865,17 @@ ureg_create_with_screen(unsigned processor, struct pipe_screen *screen) { int i; struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); - if (!ureg) + if (ureg == NULL) goto no_ureg; ureg->processor = processor; ureg->supports_any_inout_decl_range = screen && - screen->get_shader_param(screen, processor, + screen->get_shader_param(screen, + pipe_shader_from_tgsi_processor(processor), PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0; - ureg->next_shader_processor = -1; - for (i = 0; i < ARRAY_SIZE(ureg->properties); i++) + for (i = 0; i < Elements(ureg->properties); i++) ureg->properties[i] = ~0; ureg->free_temps = util_bitmask_create(); @@ -2130,13 +1903,6 @@ no_ureg: } -void -ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor) -{ - ureg->next_shader_processor = processor; -} - - unsigned ureg_get_nr_outputs( const struct ureg_program *ureg ) { @@ -2150,7 +1916,7 @@ void ureg_destroy( struct ureg_program *ureg ) { unsigned i; - for (i = 0; i < ARRAY_SIZE(ureg->domain); i++) { + for (i = 0; i < Elements(ureg->domain); i++) { if (ureg->domain[i].tokens && ureg->domain[i].tokens != error_tokens) FREE(ureg->domain[i].tokens); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h index d30191594..0aae550d6 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -79,6 +79,12 @@ struct ureg_dst unsigned DimIndirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ unsigned Saturate : 1; /* BOOL */ + unsigned Predicate : 1; + unsigned PredNegate : 1; /* BOOL */ + unsigned PredSwizzleX : 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleY : 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleZ : 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleW : 2; /* TGSI_SWIZZLE_ */ int Index : 16; /* SINT */ int IndirectIndex : 16; /* SINT */ unsigned IndirectFile : 4; /* TGSI_FILE_ */ @@ -108,8 +114,6 @@ ureg_create_shader( struct ureg_program *, struct pipe_context *pipe, const struct pipe_stream_output_info *so ); -void -ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor); /* Alternately, return the built token stream and hand ownership of * that memory to the caller: @@ -167,18 +171,6 @@ ureg_property(struct ureg_program *ureg, unsigned name, unsigned value); */ struct ureg_src -ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *, - unsigned semantic_name, - unsigned semantic_index, - unsigned interp_mode, - unsigned cylindrical_wrap, - unsigned interp_location, - unsigned index, - unsigned usage_mask, - unsigned array_id, - unsigned array_size); - -struct ureg_src ureg_DECL_fs_input_cyl_centroid(struct ureg_program *, unsigned semantic_name, unsigned semantic_index, @@ -221,15 +213,6 @@ ureg_DECL_vs_input( struct ureg_program *, unsigned index ); struct ureg_src -ureg_DECL_input_layout(struct ureg_program *, - unsigned semantic_name, - unsigned semantic_index, - unsigned index, - unsigned usage_mask, - unsigned array_id, - unsigned array_size); - -struct ureg_src ureg_DECL_input(struct ureg_program *, unsigned semantic_name, unsigned semantic_index, @@ -238,20 +221,11 @@ ureg_DECL_input(struct ureg_program *, struct ureg_src ureg_DECL_system_value(struct ureg_program *, + unsigned index, unsigned semantic_name, unsigned semantic_index); struct ureg_dst -ureg_DECL_output_layout(struct ureg_program *, - unsigned semantic_name, - unsigned semantic_index, - unsigned streams, - unsigned index, - unsigned usage_mask, - unsigned array_id, - unsigned array_size); - -struct ureg_dst ureg_DECL_output_masked(struct ureg_program *, unsigned semantic_name, unsigned semantic_index, @@ -296,16 +270,6 @@ ureg_DECL_immediate_int( struct ureg_program *, const int *v, unsigned nr ); -struct ureg_src -ureg_DECL_immediate_uint64( struct ureg_program *, - const uint64_t *v, - unsigned nr ); - -struct ureg_src -ureg_DECL_immediate_int64( struct ureg_program *, - const int64_t *v, - unsigned nr ); - void ureg_DECL_constant2D(struct ureg_program *ureg, unsigned first, @@ -342,6 +306,9 @@ ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst ureg_DECL_address( struct ureg_program * ); +struct ureg_dst +ureg_DECL_predicate(struct ureg_program *); + /* Supply an index to the sampler declaration as this is the hook to * the external pipe_sampler state. Users of this function probably * don't want just any sampler, but a specific one which they've set @@ -360,19 +327,6 @@ ureg_DECL_sampler_view(struct ureg_program *, unsigned return_type_z, unsigned return_type_w ); -struct ureg_src -ureg_DECL_image(struct ureg_program *ureg, - unsigned index, - unsigned target, - unsigned format, - boolean wr, - boolean raw); - -struct ureg_src -ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic); - -struct ureg_src -ureg_DECL_memory(struct ureg_program *ureg, unsigned memory_type); static inline struct ureg_src ureg_imm4f( struct ureg_program *ureg, @@ -562,15 +516,12 @@ ureg_tex_insn(struct ureg_program *ureg, void -ureg_memory_insn(struct ureg_program *ureg, - unsigned opcode, - const struct ureg_dst *dst, - unsigned nr_dst, - const struct ureg_src *src, - unsigned nr_src, - unsigned qualifier, - unsigned texture, - unsigned format); +ureg_label_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_src *src, + unsigned nr_src, + unsigned *label); + /*********************************************************************** * Internal instruction helpers, don't call these directly: @@ -585,8 +536,14 @@ struct ureg_emit_insn_result ureg_emit_insn(struct ureg_program *ureg, unsigned opcode, boolean saturate, + boolean predicate, + boolean pred_negate, + unsigned pred_swizzle_x, + unsigned pred_swizzle_y, + unsigned pred_swizzle_z, + unsigned pred_swizzle_w, unsigned num_dst, - unsigned num_src); + unsigned num_src ); void ureg_emit_label(struct ureg_program *ureg, @@ -602,13 +559,6 @@ void ureg_emit_texture_offset(struct ureg_program *ureg, const struct tgsi_texture_offset *offset); -void -ureg_emit_memory(struct ureg_program *ureg, - unsigned insn_token, - unsigned qualifier, - unsigned texture, - unsigned format); - void ureg_emit_dst( struct ureg_program *ureg, struct ureg_dst dst ); @@ -630,6 +580,12 @@ static inline void ureg_##op( struct ureg_program *ureg ) \ insn = ureg_emit_insn(ureg, \ opcode, \ FALSE, \ + FALSE, \ + FALSE, \ + TGSI_SWIZZLE_X, \ + TGSI_SWIZZLE_Y, \ + TGSI_SWIZZLE_Z, \ + TGSI_SWIZZLE_W, \ 0, \ 0); \ ureg_fixup_insn_size( ureg, insn.insn_token ); \ @@ -644,6 +600,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ FALSE, \ + FALSE, \ + FALSE, \ + TGSI_SWIZZLE_X, \ + TGSI_SWIZZLE_Y, \ + TGSI_SWIZZLE_Z, \ + TGSI_SWIZZLE_W, \ 0, \ 1); \ ureg_emit_src( ureg, src ); \ @@ -659,6 +621,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ FALSE, \ + FALSE, \ + FALSE, \ + TGSI_SWIZZLE_X, \ + TGSI_SWIZZLE_Y, \ + TGSI_SWIZZLE_Z, \ + TGSI_SWIZZLE_W, \ 0, \ 0); \ ureg_emit_label( ureg, insn.extended_token, label_token ); \ @@ -675,6 +643,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ FALSE, \ + FALSE, \ + FALSE, \ + TGSI_SWIZZLE_X, \ + TGSI_SWIZZLE_Y, \ + TGSI_SWIZZLE_Z, \ + TGSI_SWIZZLE_W, \ 0, \ 1); \ ureg_emit_label( ureg, insn.extended_token, label_token ); \ @@ -693,6 +667,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 0); \ ureg_emit_dst( ureg, dst ); \ @@ -712,6 +692,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 1); \ ureg_emit_dst( ureg, dst ); \ @@ -732,6 +718,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 2); \ ureg_emit_dst( ureg, dst ); \ @@ -754,6 +746,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 2); \ ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ @@ -777,6 +775,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 2); \ ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ @@ -800,6 +804,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 3); \ ureg_emit_dst( ureg, dst ); \ @@ -824,6 +834,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 3); \ ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ @@ -850,6 +866,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 4); \ ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ @@ -877,6 +899,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 4); \ ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ @@ -904,6 +932,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 4); \ ureg_emit_dst( ureg, dst ); \ @@ -931,6 +965,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 5); \ ureg_emit_dst( ureg, dst ); \ @@ -959,6 +999,12 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ 1, \ 5); \ ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ @@ -1042,6 +1088,24 @@ ureg_saturate( struct ureg_dst reg ) return reg; } +static inline struct ureg_dst +ureg_predicate(struct ureg_dst reg, + boolean negate, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Predicate = 1; + reg.PredNegate = negate; + reg.PredSwizzleX = swizzle_x; + reg.PredSwizzleY = swizzle_y; + reg.PredSwizzleZ = swizzle_z; + reg.PredSwizzleW = swizzle_w; + return reg; +} + static inline struct ureg_dst ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr ) { @@ -1142,6 +1206,12 @@ ureg_dst_array_register(unsigned file, dst.IndirectIndex = 0; dst.IndirectSwizzle = 0; dst.Saturate = 0; + dst.Predicate = 0; + dst.PredNegate = 0; + dst.PredSwizzleX = TGSI_SWIZZLE_X; + dst.PredSwizzleY = TGSI_SWIZZLE_Y; + dst.PredSwizzleZ = TGSI_SWIZZLE_Z; + dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = index; dst.Dimension = 0; dst.DimensionIndex = 0; @@ -1177,6 +1247,12 @@ ureg_dst( struct ureg_src src ) dst.IndirectIndex = src.IndirectIndex; dst.IndirectSwizzle = src.IndirectSwizzle; dst.Saturate = 0; + dst.Predicate = 0; + dst.PredNegate = 0; + dst.PredSwizzleX = TGSI_SWIZZLE_X; + dst.PredSwizzleY = TGSI_SWIZZLE_Y; + dst.PredSwizzleZ = TGSI_SWIZZLE_Z; + dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = src.Index; dst.Dimension = src.Dimension; dst.DimensionIndex = src.DimensionIndex; @@ -1268,6 +1344,12 @@ ureg_dst_undef( void ) dst.IndirectIndex = 0; dst.IndirectSwizzle = 0; dst.Saturate = 0; + dst.Predicate = 0; + dst.PredNegate = 0; + dst.PredSwizzleX = TGSI_SWIZZLE_X; + dst.PredSwizzleY = TGSI_SWIZZLE_Y; + dst.PredSwizzleZ = TGSI_SWIZZLE_Z; + dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = 0; dst.Dimension = 0; dst.DimensionIndex = 0; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c index f6d2d3f4c..e5b8427a0 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -29,7 +29,6 @@ #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_util.h" -#include "tgsi_exec.h" union pointer_hack { @@ -54,17 +53,17 @@ tgsi_util_get_src_register_swizzle( const struct tgsi_src_register *reg, unsigned component ) { - switch (component) { - case TGSI_CHAN_X: + switch( component ) { + case 0: return reg->SwizzleX; - case TGSI_CHAN_Y: + case 1: return reg->SwizzleY; - case TGSI_CHAN_Z: + case 2: return reg->SwizzleZ; - case TGSI_CHAN_W: + case 3: return reg->SwizzleW; default: - assert(0); + assert( 0 ); } return 0; } @@ -192,13 +191,16 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: case TGSI_OPCODE_MAD: + case TGSI_OPCODE_SUB: case TGSI_OPCODE_LRP: case TGSI_OPCODE_FMA: case TGSI_OPCODE_FRC: case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_CLAMP: case TGSI_OPCODE_FLR: case TGSI_OPCODE_ROUND: case TGSI_OPCODE_POW: + case TGSI_OPCODE_ABS: case TGSI_OPCODE_COS: case TGSI_OPCODE_SIN: case TGSI_OPCODE_DDX: @@ -372,8 +374,10 @@ tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg) * sample index. */ int -tgsi_util_get_texture_coord_dim(unsigned tgsi_tex) +tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample) { + int dim; + /* * Depending on the texture target, (src0.xyzw, src1.x) is interpreted * differently: @@ -402,7 +406,8 @@ tgsi_util_get_texture_coord_dim(unsigned tgsi_tex) case TGSI_TEXTURE_BUFFER: case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: - return 1; + dim = 1; + break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_1D_ARRAY: @@ -410,64 +415,50 @@ tgsi_util_get_texture_coord_dim(unsigned tgsi_tex) case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_SHADOW1D_ARRAY: case TGSI_TEXTURE_2D_MSAA: - return 2; + dim = 2; + break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOWCUBE: case TGSI_TEXTURE_SHADOW2D_ARRAY: case TGSI_TEXTURE_2D_ARRAY_MSAA: - return 3; + dim = 3; + break; case TGSI_TEXTURE_CUBE_ARRAY: case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - return 4; + dim = 4; + break; default: assert(!"unknown texture target"); - return 0; + dim = 0; + break; } -} - -/** - * Given a TGSI_TEXTURE_x target, return the src register index for the - * shadow reference coordinate. - */ -int -tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex) -{ - switch (tgsi_tex) { - case TGSI_TEXTURE_SHADOW1D: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_SHADOW1D_ARRAY: - return 2; - case TGSI_TEXTURE_SHADOWCUBE: - case TGSI_TEXTURE_SHADOW2D_ARRAY: - case TGSI_TEXTURE_2D_MSAA: - case TGSI_TEXTURE_2D_ARRAY_MSAA: - return 3; - case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - return 4; - default: - /* no shadow nor sample */ - return -1; + if (shadow_or_sample) { + switch (tgsi_tex) { + case TGSI_TEXTURE_SHADOW1D: + /* there is a gap */ + *shadow_or_sample = 2; + break; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + *shadow_or_sample = dim; + break; + case TGSI_TEXTURE_2D_MSAA: + case TGSI_TEXTURE_2D_ARRAY_MSAA: + *shadow_or_sample = 3; + break; + default: + /* no shadow nor sample */ + *shadow_or_sample = -1; + break; + } } -} - -boolean -tgsi_is_shadow_target(unsigned target) -{ - switch (target) { - case TGSI_TEXTURE_SHADOW1D: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_SHADOW1D_ARRAY: - case TGSI_TEXTURE_SHADOW2D_ARRAY: - case TGSI_TEXTURE_SHADOWCUBE: - case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - return TRUE; - default: - return FALSE; - } + return dim; } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h index aa4606d0b..deb1ecc66 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -28,8 +28,6 @@ #ifndef TGSI_UTIL_H #define TGSI_UTIL_H -#include "pipe/p_shader_tokens.h" - #if defined __cplusplus extern "C" { #endif @@ -82,21 +80,7 @@ struct tgsi_src_register tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg); int -tgsi_util_get_texture_coord_dim(unsigned tgsi_tex); - -int -tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex); - -boolean -tgsi_is_shadow_target(unsigned target); - - -static inline boolean -tgsi_is_msaa_target(unsigned target) -{ - return (target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA); -} +tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample); #if defined __cplusplus } |