diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2018-10-23 06:36:00 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2018-10-23 06:36:00 +0000 |
commit | b65fcab046d3a1b6b6ac315720df220925c5322e (patch) | |
tree | ff73dcc383ac0799c655ff6194cda9dacb75dde9 /lib/mesa/src/gallium/auxiliary/tgsi | |
parent | 18d6381c51e253e4c41c62619f80d9ce745b95c8 (diff) |
Merge Mesa 17.3.9
Mesa 18.x needs an ld with build-id for at least the intel code
Mesa 18.2 assumes linux only memfd syscalls in intel code
Tested by matthieu@, kettenis@ and myself on a variety of hardware and
architectures. ok kettenis@
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/tgsi')
25 files changed, 1663 insertions, 1884 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_aa_point.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_aa_point.c index 9016effd3..4b14a2fc9 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_aa_point.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_aa_point.c @@ -148,16 +148,16 @@ aa_prolog(struct tgsi_transform_context *ctx) tmp0 = ts->tmp; /* SUB t0.xy, texIn, (0.5, 0,5) */ - tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY, TGSI_FILE_INPUT, texIn, - TGSI_FILE_IMMEDIATE, imm); + TGSI_FILE_IMMEDIATE, imm, true); /* DP2 t0.x, t0.xy, t0.xy; # t0.x = x^2 + y^2 */ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, tmp0, - TGSI_FILE_TEMPORARY, tmp0); + TGSI_FILE_TEMPORARY, tmp0, false); /* SQRT t0.x, t0.x */ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT, @@ -167,22 +167,22 @@ aa_prolog(struct tgsi_transform_context *ctx) /* compute coverage factor = (0.5-d)/(0.5-k) */ /* SUB t0.w, 0.5, texIn.z; # t0.w = 0.5-k */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X, - TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z); + TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z, true); /* SUB t0.y, 0.5, t0.x; # t0.y = 0.5-d */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true); /* DIV t0.w, t0.y, t0.w; # coverage = (0.5-d)/(0.5-k) */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W, false); /* If the coverage value is negative, it means the fragment is outside * the point's circular boundary. Kill it. @@ -198,7 +198,7 @@ aa_prolog(struct tgsi_transform_context *ctx) tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W, - TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W); + TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W, false); } /** @@ -249,7 +249,7 @@ aa_epilog(struct tgsi_transform_context *ctx) TGSI_FILE_OUTPUT, ts->color_out, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, ts->color_tmp, - TGSI_FILE_TEMPORARY, ts->tmp); + TGSI_FILE_TEMPORARY, ts->tmp, false); } /** diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c index d525c8ff3..0c4ec8d1c 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -163,6 +163,16 @@ tgsi_default_declaration_range( void ) return dr; } +static struct tgsi_declaration_dimension +tgsi_default_declaration_dimension() +{ + struct tgsi_declaration_dimension dim; + + dim.Index2D = 0; + + return dim; +} + static struct tgsi_declaration_range tgsi_build_declaration_range( unsigned first, @@ -239,7 +249,10 @@ tgsi_default_declaration_semantic( void ) ds.Name = TGSI_SEMANTIC_POSITION; ds.Index = 0; - ds.Padding = 0; + ds.StreamX = 0; + ds.StreamY = 0; + ds.StreamZ = 0; + ds.StreamW = 0; return ds; } @@ -248,6 +261,10 @@ static struct tgsi_declaration_semantic tgsi_build_declaration_semantic( unsigned semantic_name, unsigned semantic_index, + unsigned streamx, + unsigned streamy, + unsigned streamz, + unsigned streamw, struct tgsi_declaration *declaration, struct tgsi_header *header ) { @@ -258,7 +275,10 @@ tgsi_build_declaration_semantic( ds.Name = semantic_name; ds.Index = semantic_index; - ds.Padding = 0; + ds.StreamX = streamx; + ds.StreamY = streamy; + ds.StreamZ = streamz; + ds.StreamW = streamw; declaration_grow( declaration, header ); @@ -371,6 +391,7 @@ tgsi_default_full_declaration( void ) full_declaration.Declaration = tgsi_default_declaration(); full_declaration.Range = tgsi_default_declaration_range(); + full_declaration.Dim = tgsi_default_declaration_dimension(); full_declaration.Semantic = tgsi_default_declaration_semantic(); full_declaration.Interp = tgsi_default_declaration_interp(); full_declaration.Image = tgsi_default_declaration_image(); @@ -461,6 +482,10 @@ tgsi_build_full_declaration( *ds = tgsi_build_declaration_semantic( full_decl->Semantic.Name, full_decl->Semantic.Index, + full_decl->Semantic.StreamX, + full_decl->Semantic.StreamY, + full_decl->Semantic.StreamZ, + full_decl->Semantic.StreamW, declaration, header ); } @@ -623,12 +648,12 @@ tgsi_default_instruction( void ) instruction.NrTokens = 0; instruction.Opcode = TGSI_OPCODE_MOV; instruction.Saturate = 0; - instruction.Predicate = 0; instruction.NumDstRegs = 1; instruction.NumSrcRegs = 1; instruction.Label = 0; instruction.Texture = 0; instruction.Memory = 0; + instruction.Precise = 0; instruction.Padding = 0; return instruction; @@ -637,7 +662,7 @@ tgsi_default_instruction( void ) static struct tgsi_instruction tgsi_build_instruction(unsigned opcode, unsigned saturate, - unsigned predicate, + unsigned precise, unsigned num_dst_regs, unsigned num_src_regs, struct tgsi_header *header) @@ -652,7 +677,7 @@ tgsi_build_instruction(unsigned opcode, instruction = tgsi_default_instruction(); instruction.Opcode = opcode; instruction.Saturate = saturate; - instruction.Predicate = predicate; + instruction.Precise = precise; instruction.NumDstRegs = num_dst_regs; instruction.NumSrcRegs = num_src_regs; @@ -673,47 +698,6 @@ instruction_grow( header_bodysize_grow( header ); } -struct tgsi_instruction_predicate -tgsi_default_instruction_predicate(void) -{ - struct tgsi_instruction_predicate instruction_predicate; - - instruction_predicate.SwizzleX = TGSI_SWIZZLE_X; - instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y; - instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z; - instruction_predicate.SwizzleW = TGSI_SWIZZLE_W; - instruction_predicate.Negate = 0; - instruction_predicate.Index = 0; - instruction_predicate.Padding = 0; - - return instruction_predicate; -} - -static struct tgsi_instruction_predicate -tgsi_build_instruction_predicate(int index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_instruction *instruction, - struct tgsi_header *header) -{ - struct tgsi_instruction_predicate instruction_predicate; - - instruction_predicate = tgsi_default_instruction_predicate(); - instruction_predicate.SwizzleX = swizzleX; - instruction_predicate.SwizzleY = swizzleY; - instruction_predicate.SwizzleZ = swizzleZ; - instruction_predicate.SwizzleW = swizzleW; - instruction_predicate.Negate = negate; - instruction_predicate.Index = index; - - instruction_grow(instruction, header); - - return instruction_predicate; -} - static struct tgsi_instruction_label tgsi_default_instruction_label( void ) { @@ -750,6 +734,7 @@ tgsi_default_instruction_texture( void ) instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN; instruction_texture.NumOffsets = 0; + instruction_texture.ReturnType = TGSI_RETURN_TYPE_UNKNOWN; instruction_texture.Padding = 0; return instruction_texture; @@ -759,6 +744,7 @@ static struct tgsi_instruction_texture tgsi_build_instruction_texture( unsigned texture, unsigned num_offsets, + unsigned return_type, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -767,6 +753,7 @@ tgsi_build_instruction_texture( instruction_texture.Texture = texture; instruction_texture.NumOffsets = num_offsets; + instruction_texture.ReturnType = return_type; instruction_texture.Padding = 0; instruction->Texture = 1; @@ -1052,7 +1039,6 @@ tgsi_default_full_instruction( void ) unsigned i; full_instruction.Instruction = tgsi_default_instruction(); - full_instruction.Predicate = tgsi_default_instruction_predicate(); full_instruction.Label = tgsi_default_instruction_label(); full_instruction.Texture = tgsi_default_instruction_texture(); full_instruction.Memory = tgsi_default_instruction_memory(); @@ -1088,32 +1074,12 @@ tgsi_build_full_instruction( *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode, full_inst->Instruction.Saturate, - full_inst->Instruction.Predicate, + full_inst->Instruction.Precise, full_inst->Instruction.NumDstRegs, full_inst->Instruction.NumSrcRegs, header); prev_token = (struct tgsi_token *) instruction; - if (full_inst->Instruction.Predicate) { - struct tgsi_instruction_predicate *instruction_predicate; - - if (maxsize <= size) { - return 0; - } - instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size]; - size++; - - *instruction_predicate = - tgsi_build_instruction_predicate(full_inst->Predicate.Index, - full_inst->Predicate.Negate, - full_inst->Predicate.SwizzleX, - full_inst->Predicate.SwizzleY, - full_inst->Predicate.SwizzleZ, - full_inst->Predicate.SwizzleW, - instruction, - header); - } - if (full_inst->Instruction.Label) { struct tgsi_instruction_label *instruction_label; @@ -1142,7 +1108,8 @@ tgsi_build_full_instruction( *instruction_texture = tgsi_build_instruction_texture( full_inst->Texture.Texture, - full_inst->Texture.NumOffsets, + full_inst->Texture.NumOffsets, + full_inst->Texture.ReturnType, prev_token, instruction, header ); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h index 34d181ab2..53f31932c 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -110,9 +110,6 @@ tgsi_build_full_instruction( struct tgsi_header *header, unsigned maxsize ); -struct tgsi_instruction_predicate -tgsi_default_instruction_predicate(void); - struct tgsi_full_src_register tgsi_full_src_register_from_dst(const struct tgsi_full_dst_register *dst); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c index 405114d09..f6c85390e 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -25,6 +25,8 @@ * **************************************************************************/ +#include <inttypes.h> + #include "util/u_debug.h" #include "util/u_string.h" #include "util/u_math.h" @@ -87,6 +89,8 @@ dump_enum( #define CHR(C) ctx->dump_printf( ctx, "%c", C ) #define UIX(I) ctx->dump_printf( ctx, "0x%x", I ) #define UID(I) ctx->dump_printf( ctx, "%u", I ) +#define SI64D(I) ctx->dump_printf( ctx, "%"PRId64, I ) +#define UI64D(I) ctx->dump_printf( ctx, "%"PRIu64, I ) #define INSTID(I) ctx->dump_printf( ctx, "% 3u", I ) #define SID(I) ctx->dump_printf( ctx, "%d", I ) #define FLT(F) ctx->dump_printf( ctx, "%10.4f", F ) @@ -257,14 +261,14 @@ dump_imm_data(struct tgsi_iterate_context *iter, case TGSI_IMM_INT64: { union di d; d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32; - UID( d.i ); + SI64D( d.i ); i++; break; } case TGSI_IMM_UINT64: { union di d; d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32; - UID( d.ui ); + UI64D( d.ui ); i++; break; } @@ -360,6 +364,19 @@ iter_declaration( UID( decl->Semantic.Index ); CHR( ']' ); } + + if (decl->Semantic.StreamX != 0 || decl->Semantic.StreamY != 0 || + decl->Semantic.StreamZ != 0 || decl->Semantic.StreamW != 0) { + TXT(", STREAM("); + UID(decl->Semantic.StreamX); + TXT(", "); + UID(decl->Semantic.StreamY); + TXT(", "); + UID(decl->Semantic.StreamZ); + TXT(", "); + UID(decl->Semantic.StreamW); + CHR(')'); + } } if (decl->Declaration.File == TGSI_FILE_IMAGE) { @@ -561,36 +578,16 @@ iter_instruction( TXT( " " ); ctx->indent += info->post_indent; - if (inst->Instruction.Predicate) { - CHR( '(' ); - - if (inst->Predicate.Negate) - CHR( '!' ); - - TXT( "PRED[" ); - SID( inst->Predicate.Index ); - CHR( ']' ); - - if (inst->Predicate.SwizzleX != TGSI_SWIZZLE_X || - inst->Predicate.SwizzleY != TGSI_SWIZZLE_Y || - inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z || - inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) { - CHR( '.' ); - ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names ); - ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names ); - ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names ); - ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names ); - } - - TXT( ") " ); - } - - TXT( info->mnemonic ); + TXT( tgsi_get_opcode_name(inst->Instruction.Opcode) ); if (inst->Instruction.Saturate) { TXT( "_SAT" ); } + if (inst->Instruction.Precise) { + TXT( "_PRECISE" ); + } + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *dst = &inst->Dst[i]; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c index 7b5c56d9d..793c0da39 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -127,18 +127,6 @@ micro_ceil(union tgsi_exec_channel *dst, } static void -micro_clamp(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2) -{ - dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; - dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; - dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; - dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; -} - -static void micro_cmp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, @@ -210,6 +198,16 @@ micro_dadd(union tgsi_double_channel *dst, } static void +micro_ddiv(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->d[0] = src[0].d[0] / src[1].d[0]; + dst->d[1] = src[0].d[1] / src[1].d[1]; + dst->d[2] = src[0].d[2] / src[1].d[2]; + dst->d[3] = src[0].d[3] / src[1].d[3]; +} + +static void micro_ddx(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -848,40 +846,40 @@ static void micro_u64div(union tgsi_double_channel *dst, const union tgsi_double_channel *src) { - dst->u64[0] = src[0].u64[0] / src[1].u64[0]; - dst->u64[1] = src[0].u64[1] / src[1].u64[1]; - dst->u64[2] = src[0].u64[2] / src[1].u64[2]; - dst->u64[3] = src[0].u64[3] / src[1].u64[3]; + dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull; + dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull; + dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull; + dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull; } static void micro_i64div(union tgsi_double_channel *dst, const union tgsi_double_channel *src) { - dst->i64[0] = src[0].i64[0] / src[1].i64[0]; - dst->i64[1] = src[0].i64[1] / src[1].i64[1]; - dst->i64[2] = src[0].i64[2] / src[1].i64[2]; - dst->i64[3] = src[0].i64[3] / src[1].i64[3]; + dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0; + dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0; + dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0; + dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0; } static void micro_u64mod(union tgsi_double_channel *dst, const union tgsi_double_channel *src) { - dst->u64[0] = src[0].u64[0] % src[1].u64[0]; - dst->u64[1] = src[0].u64[1] % src[1].u64[1]; - dst->u64[2] = src[0].u64[2] % src[1].u64[2]; - dst->u64[3] = src[0].u64[3] % src[1].u64[3]; + dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull; + dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull; + dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull; + dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull; } static void micro_i64mod(union tgsi_double_channel *dst, const union tgsi_double_channel *src) { - dst->i64[0] = src[0].i64[0] % src[1].i64[0]; - dst->i64[1] = src[0].i64[1] % src[1].i64[1]; - dst->i64[2] = src[0].i64[2] % src[1].i64[2]; - dst->i64[3] = src[0].i64[3] % src[1].i64[3]; + dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll; + dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll; + dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll; + dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll; } static void @@ -1293,7 +1291,6 @@ tgsi_exec_machine_create(enum pipe_shader_type shader_type) mach->ShaderType = shader_type; mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; - mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; if (shader_type != PIPE_SHADER_COMPUTE) { mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); @@ -1457,6 +1454,17 @@ micro_pow( } static void +micro_ldexp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->f[0] = ldexpf(src0->f[0], src1->i[0]); + dst->f[1] = ldexpf(src0->f[1], src1->i[1]); + dst->f[2] = ldexpf(src0->f[2], src1->i[2]); + dst->f[3] = ldexpf(src0->f[3], src1->i[3]); +} + +static void micro_sub(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1) @@ -1561,15 +1569,6 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; - case TGSI_FILE_PREDICATE: - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); - assert(index2D->i[i] == 0); - - chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; - } - break; - case TGSI_FILE_OUTPUT: /* vertex/fragment output vars can be read too */ for (i = 0; i < TGSI_QUAD_SIZE; i++) { @@ -1772,11 +1771,9 @@ store_dest_dstret(struct tgsi_exec_machine *mach, uint chan_index, enum tgsi_exec_datatype dst_datatype) { - uint i; static union tgsi_exec_channel null; union tgsi_exec_channel *dst; union tgsi_exec_channel index2D; - uint execmask = mach->ExecMask; int offset = 0; /* indirection offset */ int index; @@ -1928,58 +1925,11 @@ store_dest_dstret(struct tgsi_exec_machine *mach, dst = &mach->Addrs[index].xyzw[chan_index]; break; - case TGSI_FILE_PREDICATE: - index = reg->Register.Index; - assert(index < TGSI_EXEC_NUM_PREDS); - dst = &mach->Predicates[index].xyzw[chan_index]; - break; - default: assert( 0 ); return NULL; } - if (inst->Instruction.Predicate) { - uint swizzle; - union tgsi_exec_channel *pred; - - switch (chan_index) { - case TGSI_CHAN_X: - swizzle = inst->Predicate.SwizzleX; - break; - case TGSI_CHAN_Y: - swizzle = inst->Predicate.SwizzleY; - break; - case TGSI_CHAN_Z: - swizzle = inst->Predicate.SwizzleZ; - break; - case TGSI_CHAN_W: - swizzle = inst->Predicate.SwizzleW; - break; - default: - assert(0); - return NULL; - } - - assert(inst->Predicate.Index == 0); - - pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; - - if (inst->Predicate.Negate) { - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - if (pred->u[i]) { - execmask &= ~(1 << i); - } - } - } else { - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - if (!pred->u[i]) { - execmask &= ~(1 << i); - } - } - } - } - return dst; } @@ -2401,15 +2351,22 @@ static void exec_lodq(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - uint unit; + uint resource_unit, sampler_unit; int dim; int i; union tgsi_exec_channel coords[4]; const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; union tgsi_exec_channel r[2]; - unit = fetch_sampler_unit(mach, inst, 1); - dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + resource_unit = fetch_sampler_unit(mach, inst, 1); + if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { + uint target = mach->SamplerViews[resource_unit].Resource; + dim = tgsi_util_get_texture_coord_dim(target); + sampler_unit = fetch_sampler_unit(mach, inst, 2); + } else { + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + sampler_unit = resource_unit; + } assert(dim <= ARRAY_SIZE(coords)); /* fetch coordinates */ for (i = 0; i < dim; i++) { @@ -2419,7 +2376,7 @@ exec_lodq(struct tgsi_exec_machine *mach, for (i = dim; i < ARRAY_SIZE(coords); i++) { args[i] = &ZeroVec; } - mach->Sampler->query_lod(mach->Sampler, unit, unit, + mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit, args[0]->f, args[1]->f, args[2]->f, @@ -2436,6 +2393,35 @@ exec_lodq(struct tgsi_exec_machine *mach, store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); } + if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { + unsigned char swizzles[4]; + unsigned chan; + swizzles[0] = inst->Src[1].Register.SwizzleX; + swizzles[1] = inst->Src[1].Register.SwizzleY; + swizzles[2] = inst->Src[1].Register.SwizzleZ; + swizzles[3] = inst->Src[1].Register.SwizzleW; + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + if (swizzles[chan] >= 2) { + store_dest(mach, &ZeroVec, + &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } else { + store_dest(mach, &r[swizzles[chan]], + &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } + } + } else { + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, + TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, + TGSI_EXEC_DATA_FLOAT); + } + } } static void @@ -2692,6 +2678,9 @@ exec_sample(struct tgsi_exec_machine *mach, lod = &c1; control = TGSI_SAMPLER_LOD_EXPLICIT; } + else if (modifier == TEX_MODIFIER_GATHER) { + control = TGSI_SAMPLER_GATHER; + } else { assert(modifier == TEX_MODIFIER_LEVEL_ZERO); control = TGSI_SAMPLER_LOD_ZERO; @@ -3246,60 +3235,6 @@ exec_dp4(struct tgsi_exec_machine *mach, } static void -exec_dp2a(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - unsigned int chan; - union tgsi_exec_channel arg[3]; - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&arg[2], &arg[0], &arg[1]); - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); - - fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_add(&arg[0], &arg[0], &arg[1]); - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void -exec_dph(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - unsigned int chan; - union tgsi_exec_channel arg[3]; - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&arg[2], &arg[0], &arg[1]); - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); - - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - micro_add(&arg[0], &arg[0], &arg[1]); - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void exec_dp2(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -3361,74 +3296,42 @@ exec_up2h(struct tgsi_exec_machine *mach, } static void -exec_scs(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +micro_ucmp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) { - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { - union tgsi_exec_channel arg; - union tgsi_exec_channel result; - - fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - micro_cos(&result, &arg); - store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - micro_sin(&result, &arg); - store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - } - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { - store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - } + dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; + dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; + dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; + dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; } static void -exec_xpd(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_ucmp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - union tgsi_exec_channel r[6]; - union tgsi_exec_channel d[3]; - - fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - - micro_mul(&r[2], &r[0], &r[1]); - - fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - - micro_mul(&r[5], &r[3], &r[4] ); - micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]); - - fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - - micro_mul(&r[3], &r[3], &r[2]); - - fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - - micro_mul(&r[1], &r[1], &r[5]); - micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]); + unsigned int chan; + struct tgsi_exec_vector dst; - micro_mul(&r[5], &r[5], &r[4]); - micro_mul(&r[0], &r[0], &r[2]); - micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]); + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { - store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &src[0], &inst->Src[0], chan, + TGSI_EXEC_DATA_UINT); + fetch_source(mach, &src[1], &inst->Src[1], chan, + TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &src[2], &inst->Src[2], chan, + TGSI_EXEC_DATA_FLOAT); + micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); + } } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, + TGSI_EXEC_DATA_FLOAT); + } } } @@ -3832,17 +3735,15 @@ exec_dfracexp(struct tgsi_exec_machine *mach, union tgsi_double_channel dst; union tgsi_exec_channel dst_exp; - if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) { - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - micro_dfracexp(&dst, &dst_exp, &src); + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + micro_dfracexp(&dst, &dst_exp, &src); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); - store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); - } - if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) { - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_dfracexp(&dst, &dst_exp, &src); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); - store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); + for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[1].Register.WriteMask & (1 << chan)) + store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT); } } @@ -4674,10 +4575,10 @@ micro_mod(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1) { - dst->i[0] = src0->i[0] % src1->i[0]; - dst->i[1] = src0->i[1] % src1->i[1]; - dst->i[2] = src0->i[2] % src1->i[2]; - dst->i[3] = src0->i[3] % src1->i[3]; + dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0; + dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0; + dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0; + dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0; } static void @@ -4999,18 +4900,6 @@ micro_uarl(union tgsi_exec_channel *dst, dst->i[3] = src->u[3]; } -static void -micro_ucmp(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2) -{ - dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; - dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; - dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; - dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; -} - /** * Signed bitfield extract (i.e. sign-extend the extracted bits) */ @@ -5208,10 +5097,6 @@ exec_instruction( exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_SUB: - exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_LRP: exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -5220,18 +5105,10 @@ exec_instruction( exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_DP2A: - exec_dp2a(mach, inst); - break; - case TGSI_OPCODE_FRC: exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_CLAMP: - exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_FLR: exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -5252,16 +5129,8 @@ exec_instruction( exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_XPD: - exec_xpd(mach, inst); - break; - - case TGSI_OPCODE_ABS: - exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - - case TGSI_OPCODE_DPH: - exec_dph(mach, inst); + case TGSI_OPCODE_LDEXP: + exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_COS: @@ -5490,10 +5359,6 @@ exec_instruction( exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_SCS: - exec_scs(mach, inst); - break; - case TGSI_OPCODE_DIV: exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -5573,14 +5438,6 @@ exec_instruction( *pc = -1; break; - case TGSI_OPCODE_PUSHA: - assert (0); - break; - - case TGSI_OPCODE_POPA: - assert (0); - break; - case TGSI_OPCODE_CEIL: exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -5617,10 +5474,6 @@ exec_instruction( exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; - case TGSI_OPCODE_SAD: - assert (0); - break; - case TGSI_OPCODE_TXF: exec_txf(mach, inst); break; @@ -5725,25 +5578,6 @@ exec_instruction( case TGSI_OPCODE_NOP: break; - case TGSI_OPCODE_BREAKC: - IFETCH(&r[0], 0, TGSI_CHAN_X); - /* update CondMask */ - if (r[0].u[0] && (mach->ExecMask & 0x1)) { - mach->LoopMask &= ~0x1; - } - if (r[0].u[1] && (mach->ExecMask & 0x2)) { - mach->LoopMask &= ~0x2; - } - if (r[0].u[2] && (mach->ExecMask & 0x4)) { - mach->LoopMask &= ~0x4; - } - if (r[0].u[3] && (mach->ExecMask & 0x8)) { - mach->LoopMask &= ~0x8; - } - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - case TGSI_OPCODE_F2I: exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; @@ -5905,7 +5739,7 @@ exec_instruction( break; case TGSI_OPCODE_GATHER4: - assert(0); + exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE); break; case TGSI_OPCODE_SVIEWINFO: @@ -5920,12 +5754,16 @@ exec_instruction( assert(0); break; + case TGSI_OPCODE_LOD: + exec_lodq(mach, inst); + break; + case TGSI_OPCODE_UARL: exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_UCMP: - exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + exec_ucmp(mach, inst); break; case TGSI_OPCODE_IABS: @@ -6003,6 +5841,10 @@ exec_instruction( exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); break; + case TGSI_OPCODE_DDIV: + exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); + break; + case TGSI_OPCODE_DMUL: exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); break; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h index 9343d788d..514c69ede 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -58,6 +58,15 @@ extern "C" { TGSI_FOR_EACH_CHANNEL( CHAN )\ TGSI_IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) +#define TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\ + ((INST)->Dst[1].Register.WriteMask & (1 << (CHAN))) + +#define TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\ + if (TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN )) + +#define TGSI_FOR_EACH_DST1_ENABLED_CHANNEL( INST, CHAN )\ + TGSI_FOR_EACH_CHANNEL( CHAN )\ + TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN ) /** * Registers may be treated as float, signed int or unsigned int. @@ -273,10 +282,6 @@ struct tgsi_sampler #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) #define TGSI_EXEC_NUM_ADDRS 3 -/* predicate register */ -#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 11) -#define TGSI_EXEC_NUM_PREDS 1 - #define TGSI_EXEC_NUM_TEMP_EXTRAS 12 @@ -359,7 +364,6 @@ struct tgsi_exec_machine struct tgsi_exec_vector SystemValue[TGSI_MAX_MISC_INPUTS]; struct tgsi_exec_vector *Addrs; - struct tgsi_exec_vector *Predicates; struct tgsi_sampler *Sampler; @@ -505,8 +509,6 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return PIPE_MAX_CONSTANT_BUFFERS; case PIPE_SHADER_CAP_MAX_TEMPS: return TGSI_EXEC_NUM_TEMPS; - case PIPE_SHADER_CAP_MAX_PREDS: - return TGSI_EXEC_NUM_PREDS; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: @@ -518,6 +520,9 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1; case PIPE_SHADER_CAP_INTEGERS: return 1; + case PIPE_SHADER_CAP_INT64_ATOMICS: + case PIPE_SHADER_CAP_FP16: + return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return PIPE_MAX_SAMPLERS; case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: @@ -528,12 +533,14 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1 << PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; - case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: return 0; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: return PIPE_MAX_SHADER_BUFFERS; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_from_mesa.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_from_mesa.c new file mode 100644 index 000000000..b7a21f29b --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_from_mesa.c @@ -0,0 +1,185 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "tgsi/tgsi_from_mesa.h" + +#include "pipe/p_compiler.h" + +/** + * Determine the semantic index that is used when the given varying is mapped + * to TGSI_SEMANTIC_GENERIC. + */ +unsigned +tgsi_get_generic_gl_varying_index(gl_varying_slot attr, + bool needs_texcoord_semantic) +{ + if (attr >= VARYING_SLOT_VAR0) { + if (needs_texcoord_semantic) + return attr - VARYING_SLOT_VAR0; + else + return 9 + (attr - VARYING_SLOT_VAR0); + } + if (attr == VARYING_SLOT_PNTC) { + assert(!needs_texcoord_semantic); + return 8; + } + if (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) { + assert(!needs_texcoord_semantic); + return attr - VARYING_SLOT_TEX0; + } + + assert(0); + return 0; +} + +/** + * Determine the semantic name and index used for the given varying. + */ +void +tgsi_get_gl_varying_semantic(gl_varying_slot attr, + bool needs_texcoord_semantic, + unsigned *semantic_name, + unsigned *semantic_index) +{ + switch (attr) { + case VARYING_SLOT_POS: + *semantic_name = TGSI_SEMANTIC_POSITION; + *semantic_index = 0; + break; + case VARYING_SLOT_COL0: + *semantic_name = TGSI_SEMANTIC_COLOR; + *semantic_index = 0; + break; + case VARYING_SLOT_COL1: + *semantic_name = TGSI_SEMANTIC_COLOR; + *semantic_index = 1; + break; + case VARYING_SLOT_BFC0: + *semantic_name = TGSI_SEMANTIC_BCOLOR; + *semantic_index = 0; + break; + case VARYING_SLOT_BFC1: + *semantic_name = TGSI_SEMANTIC_BCOLOR; + *semantic_index = 1; + break; + case VARYING_SLOT_FOGC: + *semantic_name = TGSI_SEMANTIC_FOG; + *semantic_index = 0; + break; + case VARYING_SLOT_PSIZ: + *semantic_name = TGSI_SEMANTIC_PSIZE; + *semantic_index = 0; + break; + case VARYING_SLOT_CLIP_DIST0: + *semantic_name = TGSI_SEMANTIC_CLIPDIST; + *semantic_index = 0; + break; + case VARYING_SLOT_CLIP_DIST1: + *semantic_name = TGSI_SEMANTIC_CLIPDIST; + *semantic_index = 1; + break; + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_CULL_DIST1: + /* these should have been lowered by GLSL */ + assert(0); + break; + case VARYING_SLOT_EDGE: + *semantic_name = TGSI_SEMANTIC_EDGEFLAG; + *semantic_index = 0; + break; + case VARYING_SLOT_CLIP_VERTEX: + *semantic_name = TGSI_SEMANTIC_CLIPVERTEX; + *semantic_index = 0; + break; + case VARYING_SLOT_LAYER: + *semantic_name = TGSI_SEMANTIC_LAYER; + *semantic_index = 0; + break; + case VARYING_SLOT_VIEWPORT: + *semantic_name = TGSI_SEMANTIC_VIEWPORT_INDEX; + *semantic_index = 0; + break; + case VARYING_SLOT_PNTC: + *semantic_name = TGSI_SEMANTIC_PCOORD; + *semantic_index = 0; + break; + + case VARYING_SLOT_TEX0: + case VARYING_SLOT_TEX1: + case VARYING_SLOT_TEX2: + case VARYING_SLOT_TEX3: + case VARYING_SLOT_TEX4: + case VARYING_SLOT_TEX5: + case VARYING_SLOT_TEX6: + case VARYING_SLOT_TEX7: + if (needs_texcoord_semantic) { + *semantic_name = TGSI_SEMANTIC_TEXCOORD; + *semantic_index = attr - VARYING_SLOT_TEX0; + break; + } + /* fall through */ + case VARYING_SLOT_VAR0: + default: + assert(attr >= VARYING_SLOT_VAR0 || + (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); + *semantic_name = TGSI_SEMANTIC_GENERIC; + *semantic_index = + tgsi_get_generic_gl_varying_index(attr, needs_texcoord_semantic); + break; + } +} + +/** + * Determine the semantic name and index used for the given fragment shader + * result. + */ +void +tgsi_get_gl_frag_result_semantic(gl_frag_result frag_result, + unsigned *semantic_name, + unsigned *semantic_index) +{ + if (frag_result >= FRAG_RESULT_DATA0) { + *semantic_name = TGSI_SEMANTIC_COLOR; + *semantic_index = frag_result - FRAG_RESULT_DATA0; + return; + } + + *semantic_index = 0; + + switch (frag_result) { + case FRAG_RESULT_DEPTH: + *semantic_name = TGSI_SEMANTIC_POSITION; + break; + case FRAG_RESULT_STENCIL: + *semantic_name = TGSI_SEMANTIC_STENCIL; + break; + case FRAG_RESULT_COLOR: + *semantic_name = TGSI_SEMANTIC_COLOR; + break; + case FRAG_RESULT_SAMPLE_MASK: + *semantic_name = TGSI_SEMANTIC_SAMPLEMASK; + break; + default: + assert(false); + } +} diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_from_mesa.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_from_mesa.h new file mode 100644 index 000000000..bfaa48d7f --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_from_mesa.h @@ -0,0 +1,71 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef TGSI_FROM_MESA_H +#define TGSI_FROM_MESA_H + +#include <stdbool.h> + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "compiler/shader_enums.h" + +void +tgsi_get_gl_varying_semantic(gl_varying_slot attr, + bool needs_texcoord_semantic, + unsigned *semantic_name, + unsigned *semantic_index); + +unsigned +tgsi_get_generic_gl_varying_index(gl_varying_slot attr, + bool needs_texcoord_semantic); + +void +tgsi_get_gl_frag_result_semantic(gl_frag_result frag_result, + unsigned *semantic_name, + unsigned *semantic_index); + +static inline enum pipe_shader_type +pipe_shader_type_from_mesa(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return PIPE_SHADER_VERTEX; + case MESA_SHADER_TESS_CTRL: + return PIPE_SHADER_TESS_CTRL; + case MESA_SHADER_TESS_EVAL: + return PIPE_SHADER_TESS_EVAL; + case MESA_SHADER_GEOMETRY: + return PIPE_SHADER_GEOMETRY; + case MESA_SHADER_FRAGMENT: + return PIPE_SHADER_FRAGMENT; + case MESA_SHADER_COMPUTE: + return PIPE_SHADER_COMPUTE; + default: + unreachable("bad shader stage"); + } +} + +#endif /* TGSI_FROM_MESA_H */ diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c index 37549aae7..4e399508e 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -35,260 +35,21 @@ #define CHAN TGSI_OUTPUT_CHAN_DEPENDENT #define OTHR TGSI_OUTPUT_OTHER -static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = -{ - { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D }, - { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 }, /* removed */ - { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK }, - { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF }, - { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */ - { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, - { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, - { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */ - { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, - { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, - { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, - { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, - { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ }, - { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS }, - { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */ - { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, - { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ - { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH }, - { 0, 1, 0, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }, +#define OPCODE(_num_dst, _num_src, _output_mode, name, ...) \ + { .opcode = TGSI_OPCODE_ ## name, \ + .output_mode = _output_mode, .num_dst = _num_dst, .num_src = _num_src, \ + ##__VA_ARGS__ }, - { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I }, - { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS }, - { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B }, - { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C }, - { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ }, - { 1, 5, 0, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D }, - { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L }, - { 1, 3, 0, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD }, - { 1, 2, 0, 1, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE }, - { 0, 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER }, +#define OPCODE_GAP(opc) { .opcode = opc }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG }, - { 1, 4, 0, 1, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN }, - { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX }, - { 1, 3, 1, 0, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 }, - { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 }, - { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, - { 1, 3, 1, 0, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE }, - { 1, 4, 0, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB }, - { 1, 1, 0, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE }, - { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC}, - { 1, 2, 0, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP}, - { 2, 1, 0, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP}, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD }, +static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = +{ +#include "tgsi_info_opcodes.h" }; +#undef OPCODE +#undef OPCODE_GAP + const struct tgsi_opcode_info * tgsi_get_opcode_info( uint opcode ) { @@ -308,12 +69,23 @@ tgsi_get_opcode_info( uint opcode ) return NULL; } +#define OPCODE(_num_dst, _num_src, _output_mode, name, ...) #name, +#define OPCODE_GAP(opc) "UNK" #opc, + +static const char * const opcode_names[TGSI_OPCODE_LAST] = +{ +#include "tgsi_info_opcodes.h" +}; + +#undef OPCODE +#undef OPCODE_GAP const char * tgsi_get_opcode_name( uint opcode ) { - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - return info->mnemonic; + if (opcode >= ARRAY_SIZE(opcode_names)) + return "UNK_OOB"; + return opcode_names[opcode]; } @@ -331,6 +103,8 @@ tgsi_get_processor_name( uint processor ) return "tessellation control shader"; case PIPE_SHADER_TESS_EVAL: return "tessellation evaluation shader"; + case PIPE_SHADER_COMPUTE: + return "compute shader"; default: return "unknown shader type!"; } @@ -353,9 +127,7 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_AND: case TGSI_OPCODE_OR: case TGSI_OPCODE_XOR: - case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */ case TGSI_OPCODE_TXQ: - case TGSI_OPCODE_TXQ_LZ: case TGSI_OPCODE_TXQS: case TGSI_OPCODE_F2U: case TGSI_OPCODE_UDIV: @@ -417,6 +189,7 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_DNEG: case TGSI_OPCODE_DMUL: case TGSI_OPCODE_DMAX: + case TGSI_OPCODE_DDIV: case TGSI_OPCODE_DMIN: case TGSI_OPCODE_DRCP: case TGSI_OPCODE_DSQRT: @@ -469,12 +242,16 @@ tgsi_opcode_infer_type( uint opcode ) * infer the source type of a TGSI opcode. */ enum tgsi_opcode_type -tgsi_opcode_infer_src_type( uint opcode ) +tgsi_opcode_infer_src_type(uint opcode, uint src_idx) { + if (src_idx == 1 && + (opcode == TGSI_OPCODE_DLDEXP || opcode == TGSI_OPCODE_LDEXP)) + return TGSI_TYPE_SIGNED; + switch (opcode) { case TGSI_OPCODE_UIF: case TGSI_OPCODE_TXF: - case TGSI_OPCODE_BREAKC: + case TGSI_OPCODE_TXF_LZ: case TGSI_OPCODE_U2F: case TGSI_OPCODE_U2D: case TGSI_OPCODE_UADD: @@ -494,7 +271,6 @@ tgsi_opcode_infer_src_type( uint opcode ) return TGSI_TYPE_SIGNED; case TGSI_OPCODE_ARL: case TGSI_OPCODE_ARR: - case TGSI_OPCODE_TXQ_LZ: case TGSI_OPCODE_F2D: case TGSI_OPCODE_F2I: case TGSI_OPCODE_F2U: @@ -537,7 +313,10 @@ tgsi_opcode_infer_src_type( uint opcode ) * infer the destination type of a TGSI opcode. */ enum tgsi_opcode_type -tgsi_opcode_infer_dst_type( uint opcode ) +tgsi_opcode_infer_dst_type( uint opcode, uint dst_idx ) { + if (dst_idx == 1 && opcode == TGSI_OPCODE_DFRACEXP) + return TGSI_TYPE_SIGNED; + return tgsi_opcode_infer_type(opcode); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c index b0a28f271..bfc3a6bfe 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -258,130 +258,6 @@ transform_dst(struct tgsi_transform_context *tctx, } } -/* XPD - Cross Product - * dst.x = src0.y \times src1.z - src1.y \times src0.z - * dst.y = src0.z \times src1.x - src1.z \times src0.x - * dst.z = src0.x \times src1.y - src1.x \times src0.y - * dst.w = 1.0 - * - * ; needs: 1 tmp, imm{1.0} - * MUL tmpA.xyz, src1.yzx, src0.zxy - * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz - * MOV dst.w, imm{1.0} - */ -#define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2)) -#define XPD_TMP 1 -static void -transform_xpd(struct tgsi_transform_context *tctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); - struct tgsi_full_dst_register *dst = &inst->Dst[0]; - struct tgsi_full_src_register *src0 = &inst->Src[0]; - struct tgsi_full_src_register *src1 = &inst->Src[1]; - struct tgsi_full_instruction new_inst; - - if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { - /* MUL tmpA.xyz, src1.yzx, src0.zxy */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _)); - reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _)); - tctx->emit_instruction(tctx, &new_inst); - - /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ); - new_inst.Instruction.NumSrcRegs = 3; - reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _)); - reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _)); - reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); - new_inst.Src[2].Register.Negate = true; - tctx->emit_instruction(tctx, &new_inst); - } - - if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { - /* MOV dst.w, imm{1.0} */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); - tctx->emit_instruction(tctx, &new_inst); - } -} - -/* SCS - Sine Cosine - * dst.x = \cos{src.x} - * dst.y = \sin{src.x} - * dst.z = 0.0 - * dst.w = 1.0 - * - * ; needs: 1 tmp, imm{0.0, 1.0} - * if (dst.x aliases src.x) { - * MOV tmpA.x, src.x - * src = tmpA - * } - * COS dst.x, src.x - * SIN dst.y, src.x - * MOV dst.zw, imm{0.0, 1.0} - */ -#define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1)) -#define SCS_TMP 1 -static void -transform_scs(struct tgsi_transform_context *tctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); - struct tgsi_full_dst_register *dst = &inst->Dst[0]; - struct tgsi_full_src_register *src = &inst->Src[0]; - struct tgsi_full_instruction new_inst; - - if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) { - create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0); - src = &ctx->tmp[A].src; - } - - if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { - /* COS dst.x, src.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_COS; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); - tctx->emit_instruction(tctx, &new_inst); - } - - if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { - /* SIN dst.y, src.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SIN; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); - tctx->emit_instruction(tctx, &new_inst); - } - - if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) { - /* MOV dst.zw, imm{0.0, 1.0} */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y)); - tctx->emit_instruction(tctx, &new_inst); - } -} - /* LRP - Linear Interpolate * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y @@ -468,12 +344,13 @@ transform_frc(struct tgsi_transform_context *tctx, /* SUB dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } @@ -571,16 +448,25 @@ transform_lit(struct tgsi_transform_context *tctx, reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); tctx->emit_instruction(tctx, &new_inst); - /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */ + /* MIN tmpA.z, src.w, imm{128.0} */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP; + new_inst.Instruction.Opcode = TGSI_OPCODE_MIN; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); - new_inst.Instruction.NumSrcRegs = 3; + new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); + tctx->emit_instruction(tctx, &new_inst); + + /* MAX tmpA.z, tmpA.z, -imm{128.0} */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _)); + reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); new_inst.Src[1].Register.Negate = true; - reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _)); tctx->emit_instruction(tctx, &new_inst); /* LG2 tmpA.y, tmpA.y */ @@ -689,12 +575,13 @@ transform_exp(struct tgsi_transform_context *tctx, /* SUB tmpA.x, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { /* FLR tmpA.x, src.x */ @@ -722,12 +609,13 @@ transform_exp(struct tgsi_transform_context *tctx, if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { /* SUB dst.y, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } @@ -822,12 +710,13 @@ transform_log(struct tgsi_transform_context *tctx, /* SUB tmpA.y, tmpA.x, tmpA.y */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { /* FLR tmpA.y, tmpA.x */ @@ -901,41 +790,29 @@ transform_log(struct tgsi_transform_context *tctx, * DP3 - 3-component Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z * - * DPH - Homogeneous Dot Product - * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w - * * DP2 - 2-component Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y * - * DP2A - 2-component Dot Product And Add - * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x - * * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar * operations, which is what you'd prefer for a ISA that is natively * scalar. Probably a native vector ISA would at least already have * DP4/DP3 instructions, but perhaps there is room for an alternative - * translation for DPH/DP2/DP2A using vector instructions. + * translation for DP2 using vector instructions. * * ; needs: 1 tmp * MUL tmpA.x, src0.x, src1.x * MAD tmpA.x, src0.y, src1.y, tmpA.x - * if (DPH || DP3 || DP4) { + * if (DP3 || DP4) { * MAD tmpA.x, src0.z, src1.z, tmpA.x - * if (DPH) { - * ADD tmpA.x, src1.w, tmpA.x - * } else if (DP4) { + * if (DP4) { * MAD tmpA.x, src0.w, src1.w, tmpA.x * } - * } else if (DP2A) { - * ADD tmpA.x, src2.x, tmpA.x * } * ; fixup last instruction to replicate into dst */ #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) -#define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2)) #define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) -#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3)) #define DOTP_TMP 1 static void transform_dotp(struct tgsi_transform_context *tctx, @@ -945,7 +822,6 @@ transform_dotp(struct tgsi_transform_context *tctx, struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_src_register *src1 = &inst->Src[1]; - struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */ struct tgsi_full_instruction new_inst; unsigned opcode = inst->Instruction.Opcode; @@ -974,8 +850,7 @@ transform_dotp(struct tgsi_transform_context *tctx, reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); - if ((opcode == TGSI_OPCODE_DPH) || - (opcode == TGSI_OPCODE_DP3) || + if ((opcode == TGSI_OPCODE_DP3) || (opcode == TGSI_OPCODE_DP4)) { tctx->emit_instruction(tctx, &new_inst); @@ -989,18 +864,7 @@ transform_dotp(struct tgsi_transform_context *tctx, reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); - if (opcode == TGSI_OPCODE_DPH) { - tctx->emit_instruction(tctx, &new_inst); - - /* ADD tmpA.x, src1.w, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W)); - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X)); - } else if (opcode == TGSI_OPCODE_DP4) { + if (opcode == TGSI_OPCODE_DP4) { tctx->emit_instruction(tctx, &new_inst); /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ @@ -1013,17 +877,6 @@ transform_dotp(struct tgsi_transform_context *tctx, reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); } - } else if (opcode == TGSI_OPCODE_DP2A) { - tctx->emit_instruction(tctx, &new_inst); - - /* ADD tmpA.x, src2.x, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X)); - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X)); } /* fixup last instruction to write to dst: */ @@ -1072,15 +925,14 @@ transform_flr_ceil(struct tgsi_transform_context *tctx, /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); - if (opcode == TGSI_OPCODE_CEIL) - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - else - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + if (opcode == TGSI_OPCODE_FLR) + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } @@ -1124,7 +976,7 @@ transform_trunc(struct tgsi_transform_context *tctx, tctx->emit_instruction(tctx, &new_inst); new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; @@ -1132,6 +984,7 @@ transform_trunc(struct tgsi_transform_context *tctx, new_inst.Src[0].Register.Absolute = true; new_inst.Src[0].Register.Negate = false; reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { new_inst = tgsi_default_full_instruction(); @@ -1489,16 +1342,6 @@ transform_instr(struct tgsi_transform_context *tctx, goto skip; transform_dst(tctx, inst); break; - case TGSI_OPCODE_XPD: - if (!ctx->config->lower_XPD) - goto skip; - transform_xpd(tctx, inst); - break; - case TGSI_OPCODE_SCS: - if (!ctx->config->lower_SCS) - goto skip; - transform_scs(tctx, inst); - break; case TGSI_OPCODE_LRP: if (!ctx->config->lower_LRP) goto skip; @@ -1539,21 +1382,11 @@ transform_instr(struct tgsi_transform_context *tctx, goto skip; transform_dotp(tctx, inst); break; - case TGSI_OPCODE_DPH: - if (!ctx->config->lower_DPH) - goto skip; - transform_dotp(tctx, inst); - break; case TGSI_OPCODE_DP2: if (!ctx->config->lower_DP2) goto skip; transform_dotp(tctx, inst); break; - case TGSI_OPCODE_DP2A: - if (!ctx->config->lower_DP2A) - goto skip; - transform_dotp(tctx, inst); - break; case TGSI_OPCODE_FLR: if (!ctx->config->lower_FLR) goto skip; @@ -1632,8 +1465,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) /* if there are no instructions to lower, then we are done: */ if (!(OPCS(DST) || - OPCS(XPD) || - OPCS(SCS) || OPCS(LRP) || OPCS(FRC) || OPCS(POW) || @@ -1642,9 +1473,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, OPCS(LOG) || OPCS(DP4) || OPCS(DP3) || - OPCS(DPH) || OPCS(DP2) || - OPCS(DP2A) || OPCS(FLR) || OPCS(CEIL) || OPCS(TRUNC) || @@ -1664,14 +1493,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, newlen += DST_GROW * OPCS(DST); numtmp = MAX2(numtmp, DST_TMP); } - if (OPCS(XPD)) { - newlen += XPD_GROW * OPCS(XPD); - numtmp = MAX2(numtmp, XPD_TMP); - } - if (OPCS(SCS)) { - newlen += SCS_GROW * OPCS(SCS); - numtmp = MAX2(numtmp, SCS_TMP); - } if (OPCS(LRP)) { newlen += LRP_GROW * OPCS(LRP); numtmp = MAX2(numtmp, LRP_TMP); @@ -1704,18 +1525,10 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, newlen += DP3_GROW * OPCS(DP3); numtmp = MAX2(numtmp, DOTP_TMP); } - if (OPCS(DPH)) { - newlen += DPH_GROW * OPCS(DPH); - numtmp = MAX2(numtmp, DOTP_TMP); - } if (OPCS(DP2)) { newlen += DP2_GROW * OPCS(DP2); numtmp = MAX2(numtmp, DOTP_TMP); } - if (OPCS(DP2A)) { - newlen += DP2A_GROW * OPCS(DP2A); - numtmp = MAX2(numtmp, DOTP_TMP); - } if (OPCS(FLR)) { newlen += FLR_GROW * OPCS(FLR); numtmp = MAX2(numtmp, FLR_TMP); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index d8752ce47..9a13fa668 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -28,26 +28,6 @@ #define OP12_TEX(a) OP12(a) #endif -#ifndef OP14_TEX -#define OP14_TEX(a) OP14(a) -#endif - -#ifndef OP12_SAMPLE -#define OP12_SAMPLE(a) OP12(a) -#endif - -#ifndef OP13_SAMPLE -#define OP13_SAMPLE(a) OP13(a) -#endif - -#ifndef OP14_SAMPLE -#define OP14_SAMPLE(a) OP14(a) -#endif - -#ifndef OP15_SAMPLE -#define OP15_SAMPLE(a) OP15(a) -#endif - #ifndef OP00_LBL #define OP00_LBL(a) OP00(a) #endif @@ -73,20 +53,16 @@ OP12(MAX) OP12(SLT) OP12(SGE) OP13(MAD) -OP12(SUB) +OP12_TEX(TEX_LZ) OP13(LRP) OP11(SQRT) -OP13(DP2A) OP11(FRC) -OP13(CLAMP) +OP12_TEX(TXF_LZ) OP11(FLR) OP11(ROUND) OP11(EX2) OP11(LG2) OP12(POW) -OP12(XPD) -OP11(ABS) -OP12(DPH) OP11(COS) OP11(DDX) OP11(DDY) @@ -114,7 +90,6 @@ OP00_LBL(CAL) OP00(RET) OP11(SSG) OP13(CMP) -OP11(SCS) OP12_TEX(TXB) OP12(DIV) OP12(DP2) @@ -124,8 +99,6 @@ OP01_LBL(IF) OP01_LBL(UIF) OP00_LBL(ELSE) OP00(ENDIF) -OP01(PUSHA) -OP10(POPA) OP11(CEIL) OP11(I2F) OP11(NOT) @@ -135,7 +108,6 @@ OP12(AND) OP12(OR) OP12(MOD) OP12(XOR) -OP13(SAD) OP12_TEX(TXF) OP12_TEX(TXQ) OP00(CONT) @@ -146,8 +118,6 @@ OP00(BGNSUB) OP00_LBL(ENDLOOP) OP00(ENDSUB) OP00(NOP) -OP01(CALLNZ) -OP01(BREAKC) OP01(KILL_IF) OP00(END) OP11(F2I) @@ -181,15 +151,6 @@ OP01(CASE) OP00(DEFAULT) OP00(ENDSWITCH) -OP13_SAMPLE(SAMPLE) -OP12_SAMPLE(SAMPLE_I) -OP13_SAMPLE(SAMPLE_I_MS) -OP14_SAMPLE(SAMPLE_B) -OP14_SAMPLE(SAMPLE_C) -OP14_SAMPLE(SAMPLE_C_LZ) -OP15_SAMPLE(SAMPLE_D) -OP14_SAMPLE(SAMPLE_L) -OP13_SAMPLE(GATHER4) OP12(SVIEWINFO) OP13(SAMPLE_POS) OP12(SAMPLE_INFO) @@ -209,21 +170,8 @@ OP12(UMUL_HI) #undef OP12 #undef OP13 -#ifdef OP14 -#undef OP14 -#endif - -#ifdef OP15 -#undef OP15 -#endif - #undef OP00_LBL #undef OP01_LBL #undef OP12_TEX #undef OP14_TEX - -#undef OP12_SAMPLE -#undef OP13_SAMPLE -#undef OP14_SAMPLE -#undef OP15_SAMPLE diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c index 940af7d30..c706fc8ae 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -182,10 +182,6 @@ tgsi_parse_token( memset(inst, 0, sizeof *inst); copy_token(&inst->Instruction, &token); - if (inst->Instruction.Predicate) { - next_token(ctx, &inst->Predicate); - } - if (inst->Instruction.Label) { next_token( ctx, &inst->Label); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h index 4689fb797..07806ab35 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -88,7 +88,6 @@ struct tgsi_full_property struct tgsi_full_instruction { struct tgsi_instruction Instruction; - struct tgsi_instruction_predicate Predicate; struct tgsi_instruction_label Label; struct tgsi_instruction_texture Texture; struct tgsi_instruction_memory Memory; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c index 713bd609d..f60a17c78 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c @@ -295,7 +295,7 @@ psprite_emit_vertex_inst(struct tgsi_transform_context *ctx, tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W); + TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W, false); /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */ inst = tgsi_default_full_instruction(); @@ -323,15 +323,15 @@ psprite_emit_vertex_inst(struct tgsi_transform_context *ctx, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y, TGSI_FILE_TEMPORARY, ts->point_size_tmp, - TGSI_SWIZZLE_X); + TGSI_SWIZZLE_X, false); - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, ts->point_coord_k, TGSI_WRITEMASK_X, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Z, TGSI_FILE_TEMPORARY, ts->point_coord_k, - TGSI_SWIZZLE_X); + TGSI_SWIZZLE_X, true); } @@ -442,13 +442,13 @@ psprite_inst(struct tgsi_transform_context *ctx, tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y); + TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y, false); /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W); + TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W, false); } else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Dst[0].Register.Index == ts->point_pos_out) { diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 239a2c938..2c9ad9933 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -256,7 +256,6 @@ static const char *file_names[TGSI_FILE_COUNT] = "SAMP", "ADDR", "IMM", - "PRED", "SV", "RES" }; @@ -327,10 +326,12 @@ iter_instruction( } if (info->num_dst != inst->Instruction.NumDstRegs) { - report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst ); + report_error( ctx, "%s: Invalid number of destination operands, should be %u", + tgsi_get_opcode_name(inst->Instruction.Opcode), info->num_dst ); } if (info->num_src != inst->Instruction.NumSrcRegs) { - report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src ); + report_error( ctx, "%s: Invalid number of source operands, should be %u", + tgsi_get_opcode_name(inst->Instruction.Opcode), info->num_src ); } /* Check destination and source registers' validity. diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c index b86207883..989c57ebe 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -54,6 +54,15 @@ is_memory_file(unsigned file) } +static bool +is_mem_query_inst(unsigned opcode) +{ + return opcode == TGSI_OPCODE_RESQ || + opcode == TGSI_OPCODE_TXQ || + opcode == TGSI_OPCODE_TXQS || + opcode == TGSI_OPCODE_LODQ; +} + /** * Is the opcode a "true" texture instruction which samples from a * texture map? @@ -61,10 +70,7 @@ is_memory_file(unsigned file) static bool is_texture_inst(unsigned opcode) { - return (opcode != TGSI_OPCODE_TXQ && - opcode != TGSI_OPCODE_TXQS && - opcode != TGSI_OPCODE_TXQ_LZ && - opcode != TGSI_OPCODE_LODQ && + return (!is_mem_query_inst(opcode) && tgsi_get_opcode_info(opcode)->is_tex); } @@ -80,10 +86,11 @@ computes_derivative(unsigned opcode) return opcode != TGSI_OPCODE_TG4 && opcode != TGSI_OPCODE_TXD && opcode != TGSI_OPCODE_TXF && + opcode != TGSI_OPCODE_TXF_LZ && + opcode != TGSI_OPCODE_TEX_LZ && opcode != TGSI_OPCODE_TXL && opcode != TGSI_OPCODE_TXL2 && opcode != TGSI_OPCODE_TXQ && - opcode != TGSI_OPCODE_TXQ_LZ && opcode != TGSI_OPCODE_TXQS; } @@ -96,6 +103,233 @@ computes_derivative(unsigned opcode) static void +scan_src_operand(struct tgsi_shader_info *info, + const struct tgsi_full_instruction *fullinst, + const struct tgsi_full_src_register *src, + unsigned src_index, + unsigned usage_mask_after_swizzle, + bool is_interp_instruction, + bool *is_mem_inst) +{ + int ind = src->Register.Index; + + if (info->processor == PIPE_SHADER_COMPUTE && + src->Register.File == TGSI_FILE_SYSTEM_VALUE) { + unsigned name, mask; + + name = info->system_value_semantic_name[src->Register.Index]; + + switch (name) { + case TGSI_SEMANTIC_THREAD_ID: + case TGSI_SEMANTIC_BLOCK_ID: + mask = usage_mask_after_swizzle & TGSI_WRITEMASK_XYZ; + while (mask) { + unsigned i = u_bit_scan(&mask); + + if (name == TGSI_SEMANTIC_THREAD_ID) + info->uses_thread_id[i] = true; + else + info->uses_block_id[i] = true; + } + break; + case TGSI_SEMANTIC_BLOCK_SIZE: + /* The block size is translated to IMM with a fixed block size. */ + if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0) + info->uses_block_size = true; + break; + case TGSI_SEMANTIC_GRID_SIZE: + info->uses_grid_size = true; + break; + } + } + + /* Mark which inputs are effectively used */ + if (src->Register.File == TGSI_FILE_INPUT) { + if (src->Register.Indirect) { + for (ind = 0; ind < info->num_inputs; ++ind) { + info->input_usage_mask[ind] |= usage_mask_after_swizzle; + } + } else { + assert(ind >= 0); + assert(ind < PIPE_MAX_SHADER_INPUTS); + info->input_usage_mask[ind] |= usage_mask_after_swizzle; + } + + if (info->processor == PIPE_SHADER_FRAGMENT) { + unsigned name, index, input; + + if (src->Register.Indirect && src->Indirect.ArrayID) + input = info->input_array_first[src->Indirect.ArrayID]; + else + input = src->Register.Index; + + name = info->input_semantic_name[input]; + index = info->input_semantic_index[input]; + + if (name == TGSI_SEMANTIC_POSITION && + usage_mask_after_swizzle & TGSI_WRITEMASK_Z) + info->reads_z = true; + + if (name == TGSI_SEMANTIC_COLOR) + info->colors_read |= usage_mask_after_swizzle << (index * 4); + + /* Process only interpolated varyings. Don't include POSITION. + * Don't include integer varyings, because they are not + * interpolated. Don't process inputs interpolated by INTERP + * opcodes. Those are tracked separately. + */ + if ((!is_interp_instruction || src_index != 0) && + (name == TGSI_SEMANTIC_GENERIC || + name == TGSI_SEMANTIC_TEXCOORD || + name == TGSI_SEMANTIC_COLOR || + name == TGSI_SEMANTIC_BCOLOR || + name == TGSI_SEMANTIC_FOG || + name == TGSI_SEMANTIC_CLIPDIST)) { + switch (info->input_interpolate[input]) { + case TGSI_INTERPOLATE_COLOR: + case TGSI_INTERPOLATE_PERSPECTIVE: + switch (info->input_interpolate_loc[input]) { + case TGSI_INTERPOLATE_LOC_CENTER: + info->uses_persp_center = TRUE; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + info->uses_persp_centroid = TRUE; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + info->uses_persp_sample = TRUE; + break; + } + break; + case TGSI_INTERPOLATE_LINEAR: + switch (info->input_interpolate_loc[input]) { + case TGSI_INTERPOLATE_LOC_CENTER: + info->uses_linear_center = TRUE; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + info->uses_linear_centroid = TRUE; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + info->uses_linear_sample = TRUE; + break; + } + break; + /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */ + } + } + } + } + + if (info->processor == PIPE_SHADER_TESS_CTRL && + src->Register.File == TGSI_FILE_OUTPUT) { + unsigned input; + + if (src->Register.Indirect && src->Indirect.ArrayID) + input = info->output_array_first[src->Indirect.ArrayID]; + else + input = src->Register.Index; + + switch (info->output_semantic_name[input]) { + case TGSI_SEMANTIC_PATCH: + info->reads_perpatch_outputs = true; + break; + case TGSI_SEMANTIC_TESSINNER: + case TGSI_SEMANTIC_TESSOUTER: + info->reads_tessfactor_outputs = true; + break; + default: + info->reads_pervertex_outputs = true; + } + } + + /* check for indirect register reads */ + if (src->Register.Indirect) { + info->indirect_files |= (1 << src->Register.File); + info->indirect_files_read |= (1 << src->Register.File); + + /* record indirect constant buffer indexing */ + if (src->Register.File == TGSI_FILE_CONSTANT) { + if (src->Register.Dimension) { + if (src->Dimension.Indirect) + info->const_buffers_indirect = info->const_buffers_declared; + else + info->const_buffers_indirect |= 1u << src->Dimension.Index; + } else { + info->const_buffers_indirect |= 1; + } + } + } + + if (src->Register.Dimension && src->Dimension.Indirect) + info->dim_indirect_files |= 1u << src->Register.File; + + /* Texture samplers */ + if (src->Register.File == TGSI_FILE_SAMPLER) { + const unsigned index = src->Register.Index; + + assert(fullinst->Instruction.Texture); + assert(index < ARRAY_SIZE(info->is_msaa_sampler)); + assert(index < PIPE_MAX_SAMPLERS); + + if (is_texture_inst(fullinst->Instruction.Opcode)) { + const unsigned target = fullinst->Texture.Texture; + assert(target < TGSI_TEXTURE_UNKNOWN); + /* for texture instructions, check that the texture instruction + * target matches the previous sampler view declaration (if there + * was one.) + */ + if (info->sampler_targets[index] == TGSI_TEXTURE_UNKNOWN) { + /* probably no sampler view declaration */ + info->sampler_targets[index] = target; + } else { + /* Make sure the texture instruction's sampler/target info + * agrees with the sampler view declaration. + */ + assert(info->sampler_targets[index] == target); + } + /* MSAA samplers */ + if (target == TGSI_TEXTURE_2D_MSAA || + target == TGSI_TEXTURE_2D_ARRAY_MSAA) { + info->is_msaa_sampler[src->Register.Index] = TRUE; + } + } + } + + if (is_memory_file(src->Register.File) && + !is_mem_query_inst(fullinst->Instruction.Opcode)) { + *is_mem_inst = true; + + if (tgsi_get_opcode_info(fullinst->Instruction.Opcode)->is_store) { + info->writes_memory = TRUE; + + if (src->Register.File == TGSI_FILE_IMAGE) { + if (src->Register.Indirect) + info->images_atomic = info->images_declared; + else + info->images_atomic |= 1 << src->Register.Index; + } else if (src->Register.File == TGSI_FILE_BUFFER) { + if (src->Register.Indirect) + info->shader_buffers_atomic = info->shader_buffers_declared; + else + info->shader_buffers_atomic |= 1 << src->Register.Index; + } + } else { + if (src->Register.File == TGSI_FILE_IMAGE) { + if (src->Register.Indirect) + info->images_load = info->images_declared; + else + info->images_load |= 1 << src->Register.Index; + } else if (src->Register.File == TGSI_FILE_BUFFER) { + if (src->Register.Indirect) + info->shader_buffers_load = info->shader_buffers_declared; + else + info->shader_buffers_load |= 1 << src->Register.Index; + } + } + } +} + + +static void scan_instruction(struct tgsi_shader_info *info, const struct tgsi_full_instruction *fullinst, unsigned *current_depth) @@ -103,6 +337,7 @@ scan_instruction(struct tgsi_shader_info *info, unsigned i; bool is_mem_inst = false; bool is_interp_instruction = false; + unsigned sampler_src; assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); info->opcode_count[fullinst->Instruction.Opcode]++; @@ -118,6 +353,44 @@ scan_instruction(struct tgsi_shader_info *info, case TGSI_OPCODE_ENDLOOP: (*current_depth)--; break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TEX_LZ: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXQS: + case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TXF_LZ: + case TGSI_OPCODE_TEX2: + case TGSI_OPCODE_TXB2: + case TGSI_OPCODE_TXL2: + case TGSI_OPCODE_TG4: + case TGSI_OPCODE_LODQ: + sampler_src = fullinst->Instruction.NumSrcRegs - 1; + if (fullinst->Src[sampler_src].Register.File != TGSI_FILE_SAMPLER) + info->uses_bindless_samplers = true; + break; + case TGSI_OPCODE_RESQ: + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File)) + info->uses_bindless_images = true; + break; + case TGSI_OPCODE_STORE: + if (tgsi_is_bindless_image_file(fullinst->Dst[0].Register.File)) + info->uses_bindless_images = true; + break; default: break; } @@ -171,170 +444,107 @@ scan_instruction(struct tgsi_shader_info *info, } } - if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && - fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) + if ((fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && + fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) || + fullinst->Instruction.Opcode == TGSI_OPCODE_DFMA || + fullinst->Instruction.Opcode == TGSI_OPCODE_DDIV || + fullinst->Instruction.Opcode == TGSI_OPCODE_D2U64 || + fullinst->Instruction.Opcode == TGSI_OPCODE_D2I64 || + fullinst->Instruction.Opcode == TGSI_OPCODE_U642D || + fullinst->Instruction.Opcode == TGSI_OPCODE_I642D) info->uses_doubles = TRUE; for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = &fullinst->Src[i]; - int ind = src->Register.Index; - - /* Mark which inputs are effectively used */ - if (src->Register.File == TGSI_FILE_INPUT) { - unsigned usage_mask; - usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); - if (src->Register.Indirect) { - for (ind = 0; ind < info->num_inputs; ++ind) { - info->input_usage_mask[ind] |= usage_mask; - } - } else { - assert(ind >= 0); - assert(ind < PIPE_MAX_SHADER_INPUTS); - info->input_usage_mask[ind] |= usage_mask; - } + scan_src_operand(info, fullinst, &fullinst->Src[i], i, + tgsi_util_get_inst_usage_mask(fullinst, i), + is_interp_instruction, &is_mem_inst); - if (info->processor == PIPE_SHADER_FRAGMENT) { - unsigned name, index, input; + if (fullinst->Src[i].Register.Indirect) { + struct tgsi_full_src_register src = {{0}}; - if (src->Register.Indirect && src->Indirect.ArrayID) - input = info->input_array_first[src->Indirect.ArrayID]; - else - input = src->Register.Index; - - name = info->input_semantic_name[input]; - index = info->input_semantic_index[input]; - - if (name == TGSI_SEMANTIC_POSITION && - (src->Register.SwizzleX == TGSI_SWIZZLE_Z || - src->Register.SwizzleY == TGSI_SWIZZLE_Z || - src->Register.SwizzleZ == TGSI_SWIZZLE_Z || - src->Register.SwizzleW == TGSI_SWIZZLE_Z)) - info->reads_z = TRUE; - - if (name == TGSI_SEMANTIC_COLOR) { - unsigned mask = - (1 << src->Register.SwizzleX) | - (1 << src->Register.SwizzleY) | - (1 << src->Register.SwizzleZ) | - (1 << src->Register.SwizzleW); - - info->colors_read |= mask << (index * 4); - } - - /* Process only interpolated varyings. Don't include POSITION. - * Don't include integer varyings, because they are not - * interpolated. Don't process inputs interpolated by INTERP - * opcodes. Those are tracked separately. - */ - if ((!is_interp_instruction || i != 0) && - (name == TGSI_SEMANTIC_GENERIC || - name == TGSI_SEMANTIC_TEXCOORD || - name == TGSI_SEMANTIC_COLOR || - name == TGSI_SEMANTIC_BCOLOR || - name == TGSI_SEMANTIC_FOG || - name == TGSI_SEMANTIC_CLIPDIST)) { - switch (info->input_interpolate[input]) { - case TGSI_INTERPOLATE_COLOR: - case TGSI_INTERPOLATE_PERSPECTIVE: - switch (info->input_interpolate_loc[input]) { - case TGSI_INTERPOLATE_LOC_CENTER: - info->uses_persp_center = TRUE; - break; - case TGSI_INTERPOLATE_LOC_CENTROID: - info->uses_persp_centroid = TRUE; - break; - case TGSI_INTERPOLATE_LOC_SAMPLE: - info->uses_persp_sample = TRUE; - break; - } - break; - case TGSI_INTERPOLATE_LINEAR: - switch (info->input_interpolate_loc[input]) { - case TGSI_INTERPOLATE_LOC_CENTER: - info->uses_linear_center = TRUE; - break; - case TGSI_INTERPOLATE_LOC_CENTROID: - info->uses_linear_centroid = TRUE; - break; - case TGSI_INTERPOLATE_LOC_SAMPLE: - info->uses_linear_sample = TRUE; - break; - } - break; - /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */ - } - } - } - } + src.Register.File = fullinst->Src[i].Indirect.File; + src.Register.Index = fullinst->Src[i].Indirect.Index; - /* check for indirect register reads */ - if (src->Register.Indirect) { - info->indirect_files |= (1 << src->Register.File); - info->indirect_files_read |= (1 << src->Register.File); + scan_src_operand(info, fullinst, &src, -1, + 1 << fullinst->Src[i].Indirect.Swizzle, + false, NULL); } - /* Texture samplers */ - if (src->Register.File == TGSI_FILE_SAMPLER) { - const unsigned index = src->Register.Index; + if (fullinst->Src[i].Register.Dimension && + fullinst->Src[i].Dimension.Indirect) { + struct tgsi_full_src_register src = {{0}}; - assert(fullinst->Instruction.Texture); - assert(index < ARRAY_SIZE(info->is_msaa_sampler)); - assert(index < PIPE_MAX_SAMPLERS); + src.Register.File = fullinst->Src[i].DimIndirect.File; + src.Register.Index = fullinst->Src[i].DimIndirect.Index; - if (is_texture_inst(fullinst->Instruction.Opcode)) { - const unsigned target = fullinst->Texture.Texture; - assert(target < TGSI_TEXTURE_UNKNOWN); - /* for texture instructions, check that the texture instruction - * target matches the previous sampler view declaration (if there - * was one.) - */ - if (info->sampler_targets[index] == TGSI_TEXTURE_UNKNOWN) { - /* probably no sampler view declaration */ - info->sampler_targets[index] = target; - } else { - /* Make sure the texture instruction's sampler/target info - * agrees with the sampler view declaration. - */ - assert(info->sampler_targets[index] == target); - } - /* MSAA samplers */ - if (target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - info->is_msaa_sampler[src->Register.Index] = TRUE; - } - } + scan_src_operand(info, fullinst, &src, -1, + 1 << fullinst->Src[i].DimIndirect.Swizzle, + false, NULL); } + } - if (is_memory_file(src->Register.File)) { - is_mem_inst = true; + if (fullinst->Instruction.Texture) { + for (i = 0; i < fullinst->Texture.NumOffsets; i++) { + struct tgsi_full_src_register src = {{0}}; - if (tgsi_get_opcode_info(fullinst->Instruction.Opcode)->is_store) { - info->writes_memory = TRUE; + src.Register.File = fullinst->TexOffsets[i].File; + src.Register.Index = fullinst->TexOffsets[i].Index; - if (src->Register.File == TGSI_FILE_IMAGE && - !src->Register.Indirect) - info->images_writemask |= 1 << src->Register.Index; - } + /* The usage mask is suboptimal but should be safe. */ + scan_src_operand(info, fullinst, &src, -1, + (1 << fullinst->TexOffsets[i].SwizzleX) | + (1 << fullinst->TexOffsets[i].SwizzleY) | + (1 << fullinst->TexOffsets[i].SwizzleZ), + false, &is_mem_inst); } } /* check for indirect register writes */ for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; + if (dst->Register.Indirect) { + struct tgsi_full_src_register src = {{0}}; + + src.Register.File = dst->Indirect.File; + src.Register.Index = dst->Indirect.Index; + + scan_src_operand(info, fullinst, &src, -1, + 1 << dst->Indirect.Swizzle, false, NULL); + info->indirect_files |= (1 << dst->Register.File); info->indirect_files_written |= (1 << dst->Register.File); } + if (dst->Register.Dimension && dst->Dimension.Indirect) { + struct tgsi_full_src_register src = {{0}}; + + src.Register.File = dst->DimIndirect.File; + src.Register.Index = dst->DimIndirect.Index; + + scan_src_operand(info, fullinst, &src, -1, + 1 << dst->DimIndirect.Swizzle, false, NULL); + + info->dim_indirect_files |= 1u << dst->Register.File; + } + if (is_memory_file(dst->Register.File)) { assert(fullinst->Instruction.Opcode == TGSI_OPCODE_STORE); is_mem_inst = true; info->writes_memory = TRUE; - if (dst->Register.File == TGSI_FILE_IMAGE && - !dst->Register.Indirect) - info->images_writemask |= 1 << dst->Register.Index; + if (dst->Register.File == TGSI_FILE_IMAGE) { + if (dst->Register.Indirect) + info->images_store = info->images_declared; + else + info->images_store |= 1 << dst->Register.Index; + } else if (dst->Register.File == TGSI_FILE_BUFFER) { + if (dst->Register.Indirect) + info->shader_buffers_store = info->shader_buffers_declared; + else + info->shader_buffers_store |= 1 << dst->Register.Index; + } } } @@ -378,22 +588,37 @@ scan_declaration(struct tgsi_shader_info *info, unsigned semName = fulldecl->Semantic.Name; unsigned semIndex = fulldecl->Semantic.Index + (reg - fulldecl->Range.First); + int buffer; + unsigned index, target, type; /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << reg); info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); - if (file == TGSI_FILE_CONSTANT) { - int buffer = 0; + switch (file) { + case TGSI_FILE_CONSTANT: + buffer = 0; if (fulldecl->Declaration.Dimension) buffer = fulldecl->Dim.Index2D; info->const_file_max[buffer] = MAX2(info->const_file_max[buffer], (int)reg); - } - else if (file == TGSI_FILE_INPUT) { + info->const_buffers_declared |= 1u << buffer; + break; + + case TGSI_FILE_IMAGE: + info->images_declared |= 1u << reg; + if (fulldecl->Image.Resource == TGSI_TEXTURE_BUFFER) + info->images_buffers |= 1 << reg; + break; + + case TGSI_FILE_BUFFER: + info->shader_buffers_declared |= 1u << reg; + break; + + case TGSI_FILE_INPUT: info->input_semantic_name[reg] = (ubyte) semName; info->input_semantic_index[reg] = (ubyte) semIndex; info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; @@ -403,17 +628,21 @@ scan_declaration(struct tgsi_shader_info *info, /* Vertex shaders can have inputs with holes between them. */ info->num_inputs = MAX2(info->num_inputs, reg + 1); - if (semName == TGSI_SEMANTIC_PRIMID) - info->uses_primid = TRUE; - else if (procType == PIPE_SHADER_FRAGMENT) { - if (semName == TGSI_SEMANTIC_POSITION) - info->reads_position = TRUE; - else if (semName == TGSI_SEMANTIC_FACE) - info->uses_frontface = TRUE; + switch (semName) { + case TGSI_SEMANTIC_PRIMID: + info->uses_primid = true; + break; + case TGSI_SEMANTIC_POSITION: + info->reads_position = true; + break; + case TGSI_SEMANTIC_FACE: + info->uses_frontface = true; + break; } - } - else if (file == TGSI_FILE_SYSTEM_VALUE) { - unsigned index = fulldecl->Range.First; + break; + + case TGSI_FILE_SYSTEM_VALUE: + index = fulldecl->Range.First; info->system_value_semantic_name[index] = semName; info->num_system_values = MAX2(info->num_system_values, index + 1); @@ -446,61 +675,81 @@ scan_declaration(struct tgsi_shader_info *info, case TGSI_SEMANTIC_SAMPLEMASK: info->reads_samplemask = TRUE; break; + case TGSI_SEMANTIC_TESSINNER: + case TGSI_SEMANTIC_TESSOUTER: + info->reads_tess_factors = true; + break; } - } - else if (file == TGSI_FILE_OUTPUT) { + break; + + case TGSI_FILE_OUTPUT: info->output_semantic_name[reg] = (ubyte) semName; info->output_semantic_index[reg] = (ubyte) semIndex; + info->output_usagemask[reg] |= fulldecl->Declaration.UsageMask; info->num_outputs = MAX2(info->num_outputs, reg + 1); - if (semName == TGSI_SEMANTIC_COLOR) - info->colors_written |= 1 << semIndex; - - if (procType == PIPE_SHADER_VERTEX || - procType == PIPE_SHADER_GEOMETRY || - procType == PIPE_SHADER_TESS_CTRL || - procType == PIPE_SHADER_TESS_EVAL) { - switch (semName) { - case TGSI_SEMANTIC_VIEWPORT_INDEX: - info->writes_viewport_index = TRUE; - break; - case TGSI_SEMANTIC_LAYER: - info->writes_layer = TRUE; - break; - case TGSI_SEMANTIC_PSIZE: - info->writes_psize = TRUE; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - info->writes_clipvertex = TRUE; - break; - } + if (fulldecl->Declaration.UsageMask & TGSI_WRITEMASK_X) { + info->output_streams[reg] |= (ubyte)fulldecl->Semantic.StreamX; + info->num_stream_output_components[fulldecl->Semantic.StreamX]++; } - - if (procType == PIPE_SHADER_FRAGMENT) { - switch (semName) { - case TGSI_SEMANTIC_POSITION: - info->writes_z = TRUE; - break; - case TGSI_SEMANTIC_STENCIL: - info->writes_stencil = TRUE; - break; - case TGSI_SEMANTIC_SAMPLEMASK: - info->writes_samplemask = TRUE; - break; - } + if (fulldecl->Declaration.UsageMask & TGSI_WRITEMASK_Y) { + info->output_streams[reg] |= (ubyte)fulldecl->Semantic.StreamY << 2; + info->num_stream_output_components[fulldecl->Semantic.StreamY]++; + } + if (fulldecl->Declaration.UsageMask & TGSI_WRITEMASK_Z) { + info->output_streams[reg] |= (ubyte)fulldecl->Semantic.StreamZ << 4; + info->num_stream_output_components[fulldecl->Semantic.StreamZ]++; + } + if (fulldecl->Declaration.UsageMask & TGSI_WRITEMASK_W) { + info->output_streams[reg] |= (ubyte)fulldecl->Semantic.StreamW << 6; + info->num_stream_output_components[fulldecl->Semantic.StreamW]++; } - if (procType == PIPE_SHADER_VERTEX) { - if (semName == TGSI_SEMANTIC_EDGEFLAG) { - info->writes_edgeflag = TRUE; - } + switch (semName) { + case TGSI_SEMANTIC_PRIMID: + info->writes_primid = true; + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + info->writes_viewport_index = true; + break; + case TGSI_SEMANTIC_LAYER: + info->writes_layer = true; + break; + case TGSI_SEMANTIC_PSIZE: + info->writes_psize = true; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + info->writes_clipvertex = true; + break; + case TGSI_SEMANTIC_COLOR: + info->colors_written |= 1 << semIndex; + break; + case TGSI_SEMANTIC_STENCIL: + info->writes_stencil = true; + break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->writes_samplemask = true; + break; + case TGSI_SEMANTIC_EDGEFLAG: + info->writes_edgeflag = true; + break; + case TGSI_SEMANTIC_POSITION: + if (procType == PIPE_SHADER_FRAGMENT) + info->writes_z = true; + else + info->writes_position = true; + break; } - } else if (file == TGSI_FILE_SAMPLER) { + break; + + case TGSI_FILE_SAMPLER: STATIC_ASSERT(sizeof(info->samplers_declared) * 8 >= PIPE_MAX_SAMPLERS); info->samplers_declared |= 1u << reg; - } else if (file == TGSI_FILE_SAMPLER_VIEW) { - unsigned target = fulldecl->SamplerView.Resource; - unsigned type = fulldecl->SamplerView.ReturnTypeX; + break; + + case TGSI_FILE_SAMPLER_VIEW: + target = fulldecl->SamplerView.Resource; + type = fulldecl->SamplerView.ReturnTypeX; assert(target < TGSI_TEXTURE_UNKNOWN); if (info->sampler_targets[reg] == TGSI_TEXTURE_UNKNOWN) { @@ -512,9 +761,7 @@ scan_declaration(struct tgsi_shader_info *info, assert(info->sampler_targets[reg] == target); assert(info->sampler_type[reg] == type); } - } else if (file == TGSI_FILE_IMAGE) { - if (fulldecl->Image.Resource == TGSI_TEXTURE_BUFFER) - info->images_buffers |= 1 << reg; + break; } } } @@ -719,78 +966,224 @@ tgsi_scan_arrays(const struct tgsi_token *tokens, return; } +static void +check_no_subroutines(const struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + case TGSI_OPCODE_CAL: + unreachable("subroutines unhandled"); + } +} -/** - * Check if the given shader is a "passthrough" shader consisting of only - * MOV instructions of the form: MOV OUT[n], IN[n] - * - */ -boolean -tgsi_is_passthrough_shader(const struct tgsi_token *tokens) +static unsigned +get_inst_tessfactor_writemask(const struct tgsi_shader_info *info, + const struct tgsi_full_instruction *inst) { - struct tgsi_parse_context parse; + unsigned writemask = 0; - /** - ** Setup to begin parsing input shader - **/ + for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &inst->Dst[i]; + + if (dst->Register.File == TGSI_FILE_OUTPUT && + !dst->Register.Indirect) { + unsigned name = info->output_semantic_name[dst->Register.Index]; + + if (name == TGSI_SEMANTIC_TESSINNER) + writemask |= dst->Register.WriteMask; + else if (name == TGSI_SEMANTIC_TESSOUTER) + writemask |= dst->Register.WriteMask << 4; + } + } + return writemask; +} + +static unsigned +get_block_tessfactor_writemask(const struct tgsi_shader_info *info, + struct tgsi_parse_context *parse, + unsigned end_opcode) +{ + struct tgsi_full_instruction *inst; + unsigned writemask = 0; + + do { + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + + /* Recursively process nested blocks. */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + writemask |= + get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDIF); + continue; + + case TGSI_OPCODE_BGNLOOP: + writemask |= + get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP); + continue; + + case TGSI_OPCODE_BARRIER: + unreachable("nested BARRIER is illegal"); + continue; + } + + writemask |= get_inst_tessfactor_writemask(info, inst); + } while (inst->Instruction.Opcode != end_opcode); + + return writemask; +} + +static void +get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info, + struct tgsi_parse_context *parse, + unsigned *upper_block_tf_writemask, + unsigned *cond_block_tf_writemask) +{ + struct tgsi_full_instruction *inst; + unsigned then_tessfactor_writemask = 0; + unsigned else_tessfactor_writemask = 0; + bool is_then = true; + + do { + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ELSE: + is_then = false; + continue; + + /* Recursively process nested blocks. */ + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + get_if_block_tessfactor_writemask(info, parse, + is_then ? &then_tessfactor_writemask : + &else_tessfactor_writemask, + cond_block_tf_writemask); + continue; + + case TGSI_OPCODE_BGNLOOP: + *cond_block_tf_writemask |= + get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP); + continue; + + case TGSI_OPCODE_BARRIER: + unreachable("nested BARRIER is illegal"); + continue; + } + + /* Process an instruction in the current block. */ + unsigned writemask = get_inst_tessfactor_writemask(info, inst); + + if (writemask) { + if (is_then) + then_tessfactor_writemask |= writemask; + else + else_tessfactor_writemask |= writemask; + } + } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF); + + if (then_tessfactor_writemask || else_tessfactor_writemask) { + /* If both statements write the same tess factor channels, + * we can say that the upper block writes them too. */ + *upper_block_tf_writemask |= then_tessfactor_writemask & + else_tessfactor_writemask; + *cond_block_tf_writemask |= then_tessfactor_writemask | + else_tessfactor_writemask; + } +} + +void +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, + const struct tgsi_shader_info *info, + struct tgsi_tessctrl_info *out) +{ + memset(out, 0, sizeof(*out)); + + if (info->processor != PIPE_SHADER_TESS_CTRL) + return; + + struct tgsi_parse_context parse; if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { - debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n"); - return FALSE; + debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n"); + return; } - /** - ** Loop over incoming program tokens/instructions + /* The pass works as follows: + * If all codepaths write tess factors, we can say that all invocations + * define tess factors. + * + * Each tess factor channel is tracked separately. */ - while (!tgsi_parse_end_of_tokens(&parse)) { + unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */ + unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */ + /* Initial value = true. Here the pass will accumulate results from multiple + * segments surrounded by barriers. If tess factors aren't written at all, + * it's a shader bug and we don't care if this will be true. + */ + out->tessfactors_are_def_in_all_invocs = true; + + while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *fullinst = - &parse.FullToken.FullInstruction; - const struct tgsi_full_src_register *src = - &fullinst->Src[0]; - const struct tgsi_full_dst_register *dst = - &fullinst->Dst[0]; - - /* Do a whole bunch of checks for a simple move */ - if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - (src->Register.File != TGSI_FILE_INPUT && - src->Register.File != TGSI_FILE_SYSTEM_VALUE) || - dst->Register.File != TGSI_FILE_OUTPUT || - src->Register.Index != dst->Register.Index || - - src->Register.Negate || - src->Register.Absolute || - - src->Register.SwizzleX != TGSI_SWIZZLE_X || - src->Register.SwizzleY != TGSI_SWIZZLE_Y || - src->Register.SwizzleZ != TGSI_SWIZZLE_Z || - src->Register.SwizzleW != TGSI_SWIZZLE_W || - - dst->Register.WriteMask != TGSI_WRITEMASK_XYZW) - { - tgsi_parse_free(&parse); - return FALSE; - } - } - break; + if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + continue; - case TGSI_TOKEN_TYPE_DECLARATION: - /* fall-through */ - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* fall-through */ - case TGSI_TOKEN_TYPE_PROPERTY: - /* fall-through */ - default: - ; /* no-op */ + struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + check_no_subroutines(inst); + + /* Process nested blocks. */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + get_if_block_tessfactor_writemask(info, &parse, + &main_block_tf_writemask, + &cond_block_tf_writemask); + continue; + + case TGSI_OPCODE_BGNLOOP: + cond_block_tf_writemask |= + get_block_tessfactor_writemask(info, &parse, TGSI_OPCODE_ENDIF); + continue; + + case TGSI_OPCODE_BARRIER: + /* The following case must be prevented: + * gl_TessLevelInner = ...; + * barrier(); + * if (gl_InvocationID == 1) + * gl_TessLevelInner = ...; + * + * If you consider disjoint code segments separated by barriers, each + * such segment that writes tess factor channels should write the same + * channels in all codepaths within that segment. + */ + if (main_block_tf_writemask || cond_block_tf_writemask) { + /* Accumulate the result: */ + out->tessfactors_are_def_in_all_invocs &= + !(cond_block_tf_writemask & ~main_block_tf_writemask); + + /* Analyze the next code segment from scratch. */ + main_block_tf_writemask = 0; + cond_block_tf_writemask = 0; + } + continue; } + + main_block_tf_writemask |= get_inst_tessfactor_writemask(info, inst); } - tgsi_parse_free(&parse); + /* Accumulate the result for the last code segment separated by a barrier. */ + if (main_block_tf_writemask || cond_block_tf_writemask) { + out->tessfactors_are_def_in_all_invocs &= + !(cond_block_tf_writemask & ~main_block_tf_writemask); + } - /* if we get here, it's a pass-through shader */ - return TRUE; + tgsi_parse_free(&parse); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h index 0c5f2ba06..0f872b002 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -54,6 +54,8 @@ struct tgsi_shader_info ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS]; + ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS]; ubyte num_system_values; ubyte system_value_semantic_name[PIPE_MAX_SHADER_INPUTS]; @@ -64,9 +66,11 @@ struct tgsi_shader_info uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ int const_file_max[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned const_buffers_declared; /**< bitmask of declared const buffers */ unsigned samplers_declared; /**< bitmask of declared samplers */ ubyte sampler_targets[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_TEXTURE_x values */ ubyte sampler_type[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_RETURN_TYPE_x */ + ubyte num_stream_output_components[4]; ubyte input_array_first[PIPE_MAX_SHADER_INPUTS]; ubyte input_array_last[PIPE_MAX_SHADER_INPUTS]; @@ -80,11 +84,19 @@ struct tgsi_shader_info uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + /** + * If a tessellation control shader reads outputs, this describes which ones. + */ + boolean reads_pervertex_outputs; + boolean reads_perpatch_outputs; + boolean reads_tessfactor_outputs; + ubyte colors_read; /**< which color components are read by the FS */ ubyte colors_written; boolean reads_position; /**< does fragment shader read position? */ boolean reads_z; /**< does fragment shader read depth? */ boolean reads_samplemask; /**< does fragment shader read sample mask? */ + boolean reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */ boolean writes_z; /**< does fragment shader write Z value? */ boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_samplemask; /**< does fragment shader write sample mask? */ @@ -109,27 +121,39 @@ struct tgsi_shader_info boolean uses_primid; boolean uses_frontface; boolean uses_invocationid; + boolean uses_thread_id[3]; + boolean uses_block_id[3]; + boolean uses_block_size; + boolean uses_grid_size; + boolean writes_position; boolean writes_psize; boolean writes_clipvertex; + boolean writes_primid; boolean writes_viewport_index; boolean writes_layer; boolean writes_memory; /**< contains stores or atomics to buffers or images */ boolean is_msaa_sampler[PIPE_MAX_SAMPLERS]; boolean uses_doubles; /**< uses any of the double instructions */ boolean uses_derivatives; + boolean uses_bindless_samplers; + boolean uses_bindless_images; unsigned clipdist_writemask; unsigned culldist_writemask; unsigned num_written_culldistance; unsigned num_written_clipdistance; - /** - * Bitmask indicating which images are written to (STORE / ATOM*). - * Indirect image accesses are not reflected in this mask. - */ - unsigned images_writemask; + + unsigned images_declared; /**< bitmask of declared images */ /** * Bitmask indicating which declared image is a buffer. */ unsigned images_buffers; + unsigned images_load; /**< bitmask of images using loads */ + unsigned images_store; /**< bitmask of images using stores */ + unsigned images_atomic; /**< bitmask of images using atomics */ + unsigned shader_buffers_declared; /**< bitmask of declared shader buffers */ + unsigned shader_buffers_load; /**< bitmask of shader buffers using loads */ + unsigned shader_buffers_store; /**< bitmask of shader buffers using stores */ + unsigned shader_buffers_atomic; /**< bitmask of shader buffers using atomics */ /** * Bitmask indicating which register files are accessed with * indirect addressing. The bits are (1 << TGSI_FILE_x), etc. @@ -141,6 +165,8 @@ struct tgsi_shader_info */ unsigned indirect_files_read; unsigned indirect_files_written; + unsigned dim_indirect_files; /**< shader resource indexing */ + unsigned const_buffers_indirect; /**< const buffers using indirect addressing */ unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */ @@ -162,6 +188,12 @@ struct tgsi_array_info struct tgsi_declaration_range range; }; +struct tgsi_tessctrl_info +{ + /** Whether all codepaths write tess factors in all invocations. */ + bool tessfactors_are_def_in_all_invocs; +}; + extern void tgsi_scan_shader(const struct tgsi_token *tokens, struct tgsi_shader_info *info); @@ -172,8 +204,18 @@ tgsi_scan_arrays(const struct tgsi_token *tokens, unsigned max_array_id, struct tgsi_array_info *arrays); -extern boolean -tgsi_is_passthrough_shader(const struct tgsi_token *tokens); +void +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, + const struct tgsi_shader_info *info, + struct tgsi_tessctrl_info *out); + +static inline bool +tgsi_is_bindless_image_file(unsigned file) +{ + return file != TGSI_FILE_IMAGE && + file != TGSI_FILE_MEMORY && + file != TGSI_FILE_BUFFER; +} #ifdef __cplusplus } // extern "C" diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c index 536a4c8f3..0872db9ce 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -52,12 +52,12 @@ static const char *tgsi_file_names[] = "SAMP", "ADDR", "IMM", - "PRED", "SV", "IMAGE", "SVIEW", "BUFFER", "MEMORY", + "CONSTBUF", }; const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = @@ -100,6 +100,13 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = "BASEINSTANCE", "DRAWID", "WORK_DIM", + "SUBGROUP_SIZE", + "SUBGROUP_INVOCATION", + "SUBGROUP_EQ_MASK", + "SUBGROUP_GE_MASK", + "SUBGROUP_GT_MASK", + "SUBGROUP_LE_MASK", + "SUBGROUP_LT_MASK", }; const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] = @@ -145,10 +152,12 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "NUM_CLIPDIST_ENABLED", "NUM_CULLDIST_ENABLED", "FS_EARLY_DEPTH_STENCIL", + "FS_POST_DEPTH_COVERAGE", "NEXT_SHADER", "CS_FIXED_BLOCK_WIDTH", "CS_FIXED_BLOCK_HEIGHT", - "CS_FIXED_BLOCK_DEPTH" + "CS_FIXED_BLOCK_DEPTH", + "MUL_ZERO_WINS", }; const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] = @@ -206,12 +215,14 @@ const char *tgsi_fs_coord_pixel_center_names[2] = "INTEGER" }; -const char *tgsi_immediate_type_names[4] = +const char *tgsi_immediate_type_names[6] = { "FLT32", "UINT32", "INT32", - "FLT64" + "FLT64", + "UINT64", + "INT64", }; const char *tgsi_memory_names[3] = diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h index 9a9362e91..bb2d3458d 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_strings.h @@ -58,7 +58,7 @@ extern const char *tgsi_fs_coord_origin_names[2]; extern const char *tgsi_fs_coord_pixel_center_names[2]; -extern const char *tgsi_immediate_type_names[4]; +extern const char *tgsi_immediate_type_names[6]; extern const char *tgsi_memory_names[3]; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c index be8084251..02241a66b 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -208,14 +208,17 @@ static boolean parse_int( const char **pcur, int *val ) return FALSE; } -static boolean parse_identifier( const char **pcur, char *ret ) +static boolean parse_identifier( const char **pcur, char *ret, size_t len ) { const char *cur = *pcur; int i = 0; if (is_alpha_underscore( cur )) { ret[i++] = *cur++; - while (is_alpha_underscore( cur ) || is_digit( cur )) + while (is_alpha_underscore( cur ) || is_digit( cur )) { + if (i == len - 1) + return FALSE; ret[i++] = *cur++; + } ret[i++] = '\0'; *pcur = cur; return TRUE; @@ -996,24 +999,34 @@ parse_texoffset_operand( static boolean match_inst(const char **pcur, unsigned *saturate, + unsigned *precise, const struct tgsi_opcode_info *info) { const char *cur = *pcur; + const char *mnemonic = tgsi_get_opcode_name(info->opcode); /* simple case: the whole string matches the instruction name */ - if (str_match_nocase_whole(&cur, info->mnemonic)) { + if (str_match_nocase_whole(&cur, mnemonic)) { *pcur = cur; *saturate = 0; + *precise = 0; return TRUE; } - if (str_match_no_case(&cur, info->mnemonic)) { + if (str_match_no_case(&cur, mnemonic)) { /* the instruction has a suffix, figure it out */ - if (str_match_nocase_whole(&cur, "_SAT")) { + if (str_match_no_case(&cur, "_SAT")) { *pcur = cur; *saturate = 1; - return TRUE; } + + if (str_match_no_case(&cur, "_PRECISE")) { + *pcur = cur; + *precise = 1; + } + + if (!is_digit_alpha_underscore(cur)) + return TRUE; } return FALSE; @@ -1026,6 +1039,7 @@ parse_instruction( { uint i; uint saturate = 0; + uint precise = 0; const struct tgsi_opcode_info *info; struct tgsi_full_instruction inst; const char *cur; @@ -1033,43 +1047,6 @@ parse_instruction( inst = tgsi_default_full_instruction(); - /* Parse predicate. - */ - eat_opt_white( &ctx->cur ); - if (*ctx->cur == '(') { - uint file; - int index; - uint swizzle[4]; - boolean parsed_swizzle; - - inst.Instruction.Predicate = 1; - - ctx->cur++; - if (*ctx->cur == '!') { - ctx->cur++; - inst.Predicate.Negate = 1; - } - - if (!parse_register_1d( ctx, &file, &index )) - return FALSE; - - if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, 4 )) { - if (parsed_swizzle) { - inst.Predicate.SwizzleX = swizzle[0]; - inst.Predicate.SwizzleY = swizzle[1]; - inst.Predicate.SwizzleZ = swizzle[2]; - inst.Predicate.SwizzleW = swizzle[3]; - } - } - - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - - ctx->cur++; - } - /* Parse instruction name. */ eat_opt_white( &ctx->cur ); @@ -1077,7 +1054,7 @@ parse_instruction( cur = ctx->cur; info = tgsi_get_opcode_info( i ); - if (match_inst(&cur, &saturate, info)) { + if (match_inst(&cur, &saturate, &precise, info)) { if (info->num_dst + info->num_src + info->is_tex == 0) { ctx->cur = cur; break; @@ -1098,6 +1075,7 @@ parse_instruction( inst.Instruction.Opcode = i; inst.Instruction.Saturate = saturate; + inst.Instruction.Precise = precise; inst.Instruction.NumDstRegs = info->num_dst; inst.Instruction.NumSrcRegs = info->num_src; @@ -1160,7 +1138,7 @@ parse_instruction( cur = ctx->cur; eat_opt_white( &cur ); - for (i = 0; inst.Instruction.Texture && *cur == ','; i++) { + for (i = 0; inst.Instruction.Texture && *cur == ',' && i < TGSI_FULL_MAX_TEX_OFFSETS; i++) { cur++; eat_opt_white( &cur ); ctx->cur = cur; @@ -1546,6 +1524,54 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur = ctx->cur; eat_opt_white( &cur ); + if (*cur == ',' && + file == TGSI_FILE_OUTPUT && ctx->processor == PIPE_SHADER_GEOMETRY) { + cur++; + eat_opt_white(&cur); + if (str_match_nocase_whole(&cur, "STREAM")) { + uint stream[4]; + + eat_opt_white(&cur); + if (*cur != '(') { + report_error(ctx, "Expected '('"); + return FALSE; + } + cur++; + + for (int i = 0; i < 4; ++i) { + eat_opt_white(&cur); + if (!parse_uint(&cur, &stream[i])) { + report_error(ctx, "Expected literal integer"); + return FALSE; + } + + eat_opt_white(&cur); + if (i < 3) { + if (*cur != ',') { + report_error(ctx, "Expected ','"); + return FALSE; + } + cur++; + } + } + + if (*cur != ')') { + report_error(ctx, "Expected ')'"); + return FALSE; + } + cur++; + + decl.Semantic.StreamX = stream[0]; + decl.Semantic.StreamY = stream[1]; + decl.Semantic.StreamZ = stream[2]; + decl.Semantic.StreamW = stream[3]; + + ctx->cur = cur; + } + } + + cur = ctx->cur; + eat_opt_white( &cur ); if (*cur == ',' && !is_vs_input) { uint i; @@ -1739,7 +1765,7 @@ static boolean parse_property( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!parse_identifier( &ctx->cur, id )) { + if (!parse_identifier( &ctx->cur, id, sizeof(id) )) { report_error( ctx, "Syntax error" ); return FALSE; } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h index c21ff959c..e4da0f5de 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -124,6 +124,8 @@ tgsi_transform_const_decl(struct tgsi_transform_context *ctx, decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Range.First = firstIdx; decl.Range.Last = lastIdx; + decl.Declaration.Dimension = 1; + /* Dim.Index2D is already 0 */ ctx->emit_declaration(ctx, &decl); } @@ -231,6 +233,18 @@ tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg, } static inline void +tgsi_transform_src_reg_xyzw(struct tgsi_full_src_register *reg, + unsigned file, unsigned index) +{ + reg->Register.File = file; + reg->Register.Index = index; + if (file == TGSI_FILE_CONSTANT) { + reg->Register.Dimension = 1; + reg->Dimension.Index = 0; + } +} + +static inline void tgsi_transform_src_reg(struct tgsi_full_src_register *reg, unsigned file, unsigned index, unsigned swizzleX, unsigned swizzleY, @@ -238,7 +252,11 @@ tgsi_transform_src_reg(struct tgsi_full_src_register *reg, { reg->Register.File = file; reg->Register.Index = index; - reg->Register.SwizzleX = swizzleX; + if (file == TGSI_FILE_CONSTANT) { + reg->Register.Dimension = 1; + reg->Dimension.Index = 0; + } + reg->Register.SwizzleX = swizzleX; reg->Register.SwizzleY = swizzleY; reg->Register.SwizzleZ = swizzleZ; reg->Register.SwizzleW = swizzleW; @@ -265,8 +283,7 @@ tgsi_transform_op1_inst(struct tgsi_transform_context *ctx, inst.Dst[0].Register.Index = dst_index; inst.Dst[0].Register.WriteMask = dst_writemask; inst.Instruction.NumSrcRegs = 1; - inst.Src[0].Register.File = src0_file; - inst.Src[0].Register.Index = src0_index; + tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index); ctx->emit_instruction(ctx, &inst); } @@ -281,7 +298,8 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx, unsigned src0_file, unsigned src0_index, unsigned src1_file, - unsigned src1_index) + unsigned src1_index, + bool src1_negate) { struct tgsi_full_instruction inst; @@ -292,10 +310,9 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx, inst.Dst[0].Register.Index = dst_index; inst.Dst[0].Register.WriteMask = dst_writemask; inst.Instruction.NumSrcRegs = 2; - inst.Src[0].Register.File = src0_file; - inst.Src[0].Register.Index = src0_index; - inst.Src[1].Register.File = src1_file; - inst.Src[1].Register.Index = src1_index; + tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index); + tgsi_transform_src_reg_xyzw(&inst.Src[1], src1_file, src1_index); + inst.Src[1].Register.Negate = src1_negate; ctx->emit_instruction(ctx, &inst); } @@ -323,12 +340,9 @@ tgsi_transform_op3_inst(struct tgsi_transform_context *ctx, inst.Dst[0].Register.Index = dst_index; inst.Dst[0].Register.WriteMask = dst_writemask; inst.Instruction.NumSrcRegs = 3; - inst.Src[0].Register.File = src0_file; - inst.Src[0].Register.Index = src0_index; - inst.Src[1].Register.File = src1_file; - inst.Src[1].Register.Index = src1_index; - inst.Src[2].Register.File = src2_file; - inst.Src[2].Register.Index = src2_index; + tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index); + tgsi_transform_src_reg_xyzw(&inst.Src[1], src1_file, src1_index); + tgsi_transform_src_reg_xyzw(&inst.Src[2], src2_file, src2_index); ctx->emit_instruction(ctx, &inst); } @@ -354,8 +368,7 @@ tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx, inst.Dst[0].Register.Index = dst_index; inst.Dst[0].Register.WriteMask = dst_writemask; inst.Instruction.NumSrcRegs = 1; - inst.Src[0].Register.File = src0_file; - inst.Src[0].Register.Index = src0_index; + tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index); switch (dst_writemask) { case TGSI_WRITEMASK_X: inst.Src[0].Register.SwizzleX = src0_swizzle; @@ -388,7 +401,8 @@ tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx, unsigned src0_swizzle, unsigned src1_file, unsigned src1_index, - unsigned src1_swizzle) + unsigned src1_swizzle, + bool src1_negate) { struct tgsi_full_instruction inst; @@ -399,10 +413,9 @@ tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx, inst.Dst[0].Register.Index = dst_index; inst.Dst[0].Register.WriteMask = dst_writemask; inst.Instruction.NumSrcRegs = 2; - inst.Src[0].Register.File = src0_file; - inst.Src[0].Register.Index = src0_index; - inst.Src[1].Register.File = src1_file; - inst.Src[1].Register.Index = src1_index; + tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index); + tgsi_transform_src_reg_xyzw(&inst.Src[1], src1_file, src1_index); + inst.Src[1].Register.Negate = src1_negate; switch (dst_writemask) { case TGSI_WRITEMASK_X: inst.Src[0].Register.SwizzleX = src0_swizzle; @@ -454,13 +467,10 @@ tgsi_transform_op3_swz_inst(struct tgsi_transform_context *ctx, inst.Dst[0].Register.Index = dst_index; inst.Dst[0].Register.WriteMask = dst_writemask; inst.Instruction.NumSrcRegs = 3; - inst.Src[0].Register.File = src0_file; - inst.Src[0].Register.Index = src0_index; + tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index); inst.Src[0].Register.Negate = src0_negate; - inst.Src[1].Register.File = src1_file; - inst.Src[1].Register.Index = src1_index; - inst.Src[2].Register.File = src2_file; - inst.Src[2].Register.Index = src2_index; + tgsi_transform_src_reg_xyzw(&inst.Src[1], src1_file, src1_index); + tgsi_transform_src_reg_xyzw(&inst.Src[2], src2_file, src2_index); switch (dst_writemask) { case TGSI_WRITEMASK_X: inst.Src[0].Register.SwizzleX = src0_swizzle; @@ -503,8 +513,7 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx, inst.Instruction.Opcode = TGSI_OPCODE_KILL_IF; inst.Instruction.NumDstRegs = 0; inst.Instruction.NumSrcRegs = 1; - inst.Src[0].Register.File = src_file; - inst.Src[0].Register.Index = src_index; + tgsi_transform_src_reg_xyzw(&inst.Src[0], src_file, src_index); inst.Src[0].Register.SwizzleX = inst.Src[0].Register.SwizzleY = inst.Src[0].Register.SwizzleZ = @@ -536,10 +545,8 @@ tgsi_transform_tex_inst(struct tgsi_transform_context *ctx, inst.Instruction.NumSrcRegs = 2; inst.Instruction.Texture = TRUE; inst.Texture.Texture = tex_target; - inst.Src[0].Register.File = src_file; - inst.Src[0].Register.Index = src_index; - inst.Src[1].Register.File = TGSI_FILE_SAMPLER; - inst.Src[1].Register.Index = sampler_index; + tgsi_transform_src_reg_xyzw(&inst.Src[0], src_file, src_index); + tgsi_transform_src_reg_xyzw(&inst.Src[1], TGSI_FILE_SAMPLER, sampler_index); ctx->emit_instruction(ctx, &inst); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 7bcd24297..b26434ccb 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -57,7 +57,6 @@ union tgsi_any_token { struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; - struct tgsi_instruction_predicate insn_predicate; struct tgsi_instruction_label insn_label; struct tgsi_instruction_texture insn_texture; struct tgsi_instruction_memory insn_memory; @@ -83,7 +82,6 @@ struct ureg_tokens { #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 4096 #define UREG_MAX_ADDR 3 -#define UREG_MAX_PRED 1 #define UREG_MAX_ARRAY_TEMPS 256 struct const_decl { @@ -127,6 +125,7 @@ struct ureg_program struct { unsigned semantic_name; unsigned semantic_index; + unsigned streams; unsigned usage_mask; /* = TGSI_WRITEMASK_* */ unsigned first; unsigned last; @@ -181,13 +180,11 @@ struct ureg_program unsigned array_temps[UREG_MAX_ARRAY_TEMPS]; unsigned nr_array_temps; - struct const_decl const_decls; - struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS]; + struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS]; unsigned properties[TGSI_PROPERTY_COUNT]; unsigned nr_addrs; - unsigned nr_preds; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -262,6 +259,7 @@ static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, return &ureg->domain[domain].tokens[nr]; } + void ureg_property(struct ureg_program *ureg, unsigned name, unsigned value) { @@ -409,6 +407,7 @@ struct ureg_dst ureg_DECL_output_layout(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index, + unsigned streams, unsigned index, unsigned usage_mask, unsigned array_id, @@ -417,6 +416,10 @@ ureg_DECL_output_layout(struct ureg_program *ureg, unsigned i; assert(usage_mask != 0); + assert(!(streams & 0x03) || (usage_mask & 1)); + assert(!(streams & 0x0c) || (usage_mask & 2)); + assert(!(streams & 0x30) || (usage_mask & 4)); + assert(!(streams & 0xc0) || (usage_mask & 8)); for (i = 0; i < ureg->nr_outputs; i++) { if (ureg->output[i].semantic_name == semantic_name && @@ -441,9 +444,12 @@ ureg_DECL_output_layout(struct ureg_program *ureg, } else { set_bad( ureg ); + i = 0; } out: + ureg->output[i].streams |= streams; + return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first, array_id); } @@ -457,7 +463,7 @@ ureg_DECL_output_masked(struct ureg_program *ureg, unsigned array_id, unsigned array_size) { - return ureg_DECL_output_layout(ureg, name, index, + return ureg_DECL_output_layout(ureg, name, index, 0, ureg->nr_output_regs, usage_mask, array_id, array_size); } @@ -500,7 +506,7 @@ ureg_DECL_constant2D(struct ureg_program *ureg, unsigned last, unsigned index2D) { - struct const_decl *decl = &ureg->const_decls2D[index2D]; + struct const_decl *decl = &ureg->const_decls[index2D]; assert(index2D < PIPE_MAX_CONSTANT_BUFFERS); @@ -522,7 +528,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, unsigned index) { - struct const_decl *decl = &ureg->const_decls; + struct const_decl *decl = &ureg->const_decls[0]; unsigned minconst = index, maxconst = index; unsigned i; @@ -572,7 +578,9 @@ out: assert(i < decl->nr_constant_ranges); assert(decl->constant_range[i].first <= index); assert(decl->constant_range[i].last >= index); - return ureg_src_register(TGSI_FILE_CONSTANT, index); + + struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, index); + return ureg_src_dimension(src, 0); } static struct ureg_dst alloc_temporary( struct ureg_program *ureg, @@ -662,19 +670,6 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); } -/* Allocate a new predicate register. - */ -struct ureg_dst -ureg_DECL_predicate(struct ureg_program *ureg) -{ - if (ureg->nr_preds < UREG_MAX_PRED) { - return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++); - } - - assert(0); - return ureg_dst_register(TGSI_FILE_PREDICATE, 0); -} - /* Allocate a new sampler. */ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, @@ -1146,8 +1141,6 @@ ureg_emit_dst( struct ureg_program *ureg, unsigned n = 0; assert(dst.File != TGSI_FILE_NULL); - assert(dst.File != TGSI_FILE_CONSTANT); - assert(dst.File != TGSI_FILE_INPUT); assert(dst.File != TGSI_FILE_SAMPLER); assert(dst.File != TGSI_FILE_SAMPLER_VIEW); assert(dst.File != TGSI_FILE_IMMEDIATE); @@ -1219,17 +1212,12 @@ struct ureg_emit_insn_result ureg_emit_insn(struct ureg_program *ureg, unsigned opcode, boolean saturate, - boolean predicate, - boolean pred_negate, - unsigned pred_swizzle_x, - unsigned pred_swizzle_y, - unsigned pred_swizzle_z, - unsigned pred_swizzle_w, + unsigned precise, unsigned num_dst, - unsigned num_src ) + unsigned num_src) { union tgsi_any_token *out; - uint count = predicate ? 2 : 1; + uint count = 1; struct ureg_emit_insn_result result; validate( opcode, num_dst, num_src ); @@ -1238,22 +1226,13 @@ ureg_emit_insn(struct ureg_program *ureg, out[0].insn = tgsi_default_instruction(); out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; + out[0].insn.Precise = precise; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; result.insn_token = ureg->domain[DOMAIN_INSN].count - count; result.extended_token = result.insn_token; - if (predicate) { - out[0].insn.Predicate = 1; - out[1].insn_predicate = tgsi_default_instruction_predicate(); - out[1].insn_predicate.Negate = pred_negate; - out[1].insn_predicate.SwizzleX = pred_swizzle_x; - out[1].insn_predicate.SwizzleY = pred_swizzle_y; - out[1].insn_predicate.SwizzleZ = pred_swizzle_z; - out[1].insn_predicate.SwizzleW = pred_swizzle_w; - } - ureg->nr_instructions++; return result; @@ -1311,7 +1290,7 @@ ureg_fixup_label(struct ureg_program *ureg, void ureg_emit_texture(struct ureg_program *ureg, unsigned extended_token, - unsigned target, unsigned num_offsets) + unsigned target, unsigned return_type, unsigned num_offsets) { union tgsi_any_token *out, *insn; @@ -1323,6 +1302,7 @@ ureg_emit_texture(struct ureg_program *ureg, out[0].value = 0; out[0].insn_texture.Texture = target; out[0].insn_texture.NumOffsets = num_offsets; + out[0].insn_texture.ReturnType = return_type; } void @@ -1375,38 +1355,23 @@ ureg_insn(struct ureg_program *ureg, const struct ureg_dst *dst, unsigned nr_dst, const struct ureg_src *src, - unsigned nr_src ) + unsigned nr_src, + unsigned precise ) { struct ureg_emit_insn_result insn; unsigned i; boolean saturate; - boolean predicate; - boolean negate = FALSE; - unsigned swizzle[4] = { 0 }; if (nr_dst && ureg_dst_is_empty(dst[0])) { return; } saturate = nr_dst ? dst[0].Saturate : FALSE; - predicate = nr_dst ? dst[0].Predicate : FALSE; - if (predicate) { - negate = dst[0].PredNegate; - swizzle[0] = dst[0].PredSwizzleX; - swizzle[1] = dst[0].PredSwizzleY; - swizzle[2] = dst[0].PredSwizzleZ; - swizzle[3] = dst[0].PredSwizzleW; - } insn = ureg_emit_insn(ureg, opcode, saturate, - predicate, - negate, - swizzle[0], - swizzle[1], - swizzle[2], - swizzle[3], + precise, nr_dst, nr_src); @@ -1425,6 +1390,7 @@ ureg_tex_insn(struct ureg_program *ureg, const struct ureg_dst *dst, unsigned nr_dst, unsigned target, + unsigned return_type, const struct tgsi_texture_offset *texoffsets, unsigned nr_offset, const struct ureg_src *src, @@ -1433,37 +1399,22 @@ ureg_tex_insn(struct ureg_program *ureg, struct ureg_emit_insn_result insn; unsigned i; boolean saturate; - boolean predicate; - boolean negate = FALSE; - unsigned swizzle[4] = { 0 }; if (nr_dst && ureg_dst_is_empty(dst[0])) { return; } saturate = nr_dst ? dst[0].Saturate : FALSE; - predicate = nr_dst ? dst[0].Predicate : FALSE; - if (predicate) { - negate = dst[0].PredNegate; - swizzle[0] = dst[0].PredSwizzleX; - swizzle[1] = dst[0].PredSwizzleY; - swizzle[2] = dst[0].PredSwizzleZ; - swizzle[3] = dst[0].PredSwizzleW; - } insn = ureg_emit_insn(ureg, opcode, saturate, - predicate, - negate, - swizzle[0], - swizzle[1], - swizzle[2], - swizzle[3], + 0, nr_dst, nr_src); - ureg_emit_texture( ureg, insn.extended_token, target, nr_offset ); + ureg_emit_texture( ureg, insn.extended_token, target, return_type, + nr_offset ); for (i = 0; i < nr_offset; i++) ureg_emit_texture_offset( ureg, &texoffsets[i]); @@ -1479,37 +1430,6 @@ ureg_tex_insn(struct ureg_program *ureg, void -ureg_label_insn(struct ureg_program *ureg, - unsigned opcode, - const struct ureg_src *src, - unsigned nr_src, - unsigned *label_token ) -{ - struct ureg_emit_insn_result insn; - unsigned i; - - insn = ureg_emit_insn(ureg, - opcode, - FALSE, - FALSE, - FALSE, - TGSI_SWIZZLE_X, - TGSI_SWIZZLE_Y, - TGSI_SWIZZLE_Z, - TGSI_SWIZZLE_W, - 0, - nr_src); - - ureg_emit_label( ureg, insn.extended_token, label_token ); - - for (i = 0; i < nr_src; i++) - ureg_emit_src( ureg, src[i] ); - - ureg_fixup_insn_size( ureg, insn.insn_token ); -} - - -void ureg_memory_insn(struct ureg_program *ureg, unsigned opcode, const struct ureg_dst *dst, @@ -1526,12 +1446,7 @@ ureg_memory_insn(struct ureg_program *ureg, insn = ureg_emit_insn(ureg, opcode, FALSE, - FALSE, - FALSE, - TGSI_SWIZZLE_X, - TGSI_SWIZZLE_Y, - TGSI_SWIZZLE_Z, - TGSI_SWIZZLE_W, + 0, nr_dst, nr_src); @@ -1554,6 +1469,7 @@ emit_decl_semantic(struct ureg_program *ureg, unsigned last, unsigned semantic_name, unsigned semantic_index, + unsigned streams, unsigned usage_mask, unsigned array_id) { @@ -1574,6 +1490,10 @@ emit_decl_semantic(struct ureg_program *ureg, out[2].value = 0; out[2].decl_semantic.Name = semantic_name; out[2].decl_semantic.Index = semantic_index; + out[2].decl_semantic.StreamX = streams & 3; + out[2].decl_semantic.StreamY = (streams >> 2) & 3; + out[2].decl_semantic.StreamZ = (streams >> 4) & 3; + out[2].decl_semantic.StreamW = (streams >> 6) & 3; if (array_id) { out[3].value = 0; @@ -1878,6 +1798,7 @@ static void emit_decls( struct ureg_program *ureg ) ureg->input[i].last, ureg->input[i].semantic_name, ureg->input[i].semantic_index, + 0, TGSI_WRITEMASK_XYZW, ureg->input[i].array_id); } @@ -1891,6 +1812,7 @@ static void emit_decls( struct ureg_program *ureg ) ureg->input[i].semantic_name, ureg->input[i].semantic_index + (j - ureg->input[i].first), + 0, TGSI_WRITEMASK_XYZW, 0); } } @@ -1904,6 +1826,7 @@ static void emit_decls( struct ureg_program *ureg ) i, ureg->system_value[i].semantic_name, ureg->system_value[i].semantic_index, + 0, TGSI_WRITEMASK_XYZW, 0); } @@ -1915,6 +1838,7 @@ static void emit_decls( struct ureg_program *ureg ) ureg->output[i].last, ureg->output[i].semantic_name, ureg->output[i].semantic_index, + ureg->output[i].streams, ureg->output[i].usage_mask, ureg->output[i].array_id); } @@ -1928,6 +1852,7 @@ static void emit_decls( struct ureg_program *ureg ) ureg->output[i].semantic_name, ureg->output[i].semantic_index + (j - ureg->output[i].first), + ureg->output[i].streams, ureg->output[i].usage_mask, 0); } } @@ -1967,17 +1892,8 @@ static void emit_decls( struct ureg_program *ureg ) emit_decl_memory(ureg, i); } - if (ureg->const_decls.nr_constant_ranges) { - for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { - emit_decl_range(ureg, - TGSI_FILE_CONSTANT, - ureg->const_decls.constant_range[i].first, - ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1); - } - } - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - struct const_decl *decl = &ureg->const_decls2D[i]; + struct const_decl *decl = &ureg->const_decls[i]; if (decl->nr_constant_ranges) { uint j; @@ -2014,13 +1930,6 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_addrs ); } - if (ureg->nr_preds) { - emit_decl_range(ureg, - TGSI_FILE_PREDICATE, - 0, - ureg->nr_preds); - } - for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, ureg->immediate[i].value.u, @@ -2153,7 +2062,7 @@ const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; if (nr_tokens) - *nr_tokens = ureg->domain[DOMAIN_DECL].size; + *nr_tokens = ureg->domain[DOMAIN_DECL].count; ureg->domain[DOMAIN_DECL].tokens = 0; ureg->domain[DOMAIN_DECL].size = 0; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h index d3c28b33e..e88c2c19c 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -79,12 +79,6 @@ struct ureg_dst unsigned DimIndirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ unsigned Saturate : 1; /* BOOL */ - unsigned Predicate : 1; - unsigned PredNegate : 1; /* BOOL */ - unsigned PredSwizzleX : 2; /* TGSI_SWIZZLE_ */ - unsigned PredSwizzleY : 2; /* TGSI_SWIZZLE_ */ - unsigned PredSwizzleZ : 2; /* TGSI_SWIZZLE_ */ - unsigned PredSwizzleW : 2; /* TGSI_SWIZZLE_ */ int Index : 16; /* SINT */ int IndirectIndex : 16; /* SINT */ unsigned IndirectFile : 4; /* TGSI_FILE_ */ @@ -251,6 +245,7 @@ struct ureg_dst ureg_DECL_output_layout(struct ureg_program *, unsigned semantic_name, unsigned semantic_index, + unsigned streams, unsigned index, unsigned usage_mask, unsigned array_id, @@ -347,9 +342,6 @@ ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst ureg_DECL_address( struct ureg_program * ); -struct ureg_dst -ureg_DECL_predicate(struct ureg_program *); - /* Supply an index to the sampler declaration as this is the hook to * the external pipe_sampler state. Users of this function probably * don't want just any sampler, but a specific one which they've set @@ -554,7 +546,8 @@ ureg_insn(struct ureg_program *ureg, const struct ureg_dst *dst, unsigned nr_dst, const struct ureg_src *src, - unsigned nr_src ); + unsigned nr_src, + unsigned precise ); void @@ -563,6 +556,7 @@ ureg_tex_insn(struct ureg_program *ureg, const struct ureg_dst *dst, unsigned nr_dst, unsigned target, + unsigned return_type, const struct tgsi_texture_offset *texoffsets, unsigned nr_offset, const struct ureg_src *src, @@ -570,13 +564,6 @@ ureg_tex_insn(struct ureg_program *ureg, void -ureg_label_insn(struct ureg_program *ureg, - unsigned opcode, - const struct ureg_src *src, - unsigned nr_src, - unsigned *label); - -void ureg_memory_insn(struct ureg_program *ureg, unsigned opcode, const struct ureg_dst *dst, @@ -600,14 +587,9 @@ struct ureg_emit_insn_result ureg_emit_insn(struct ureg_program *ureg, unsigned opcode, boolean saturate, - boolean predicate, - boolean pred_negate, - unsigned pred_swizzle_x, - unsigned pred_swizzle_y, - unsigned pred_swizzle_z, - unsigned pred_swizzle_w, + unsigned precise, unsigned num_dst, - unsigned num_src ); + unsigned num_src); void ureg_emit_label(struct ureg_program *ureg, @@ -617,7 +599,7 @@ ureg_emit_label(struct ureg_program *ureg, void ureg_emit_texture(struct ureg_program *ureg, unsigned insn_token, - unsigned target, unsigned num_offsets); + unsigned target, unsigned return_type, unsigned num_offsets); void ureg_emit_texture_offset(struct ureg_program *ureg, @@ -651,12 +633,7 @@ static inline void ureg_##op( struct ureg_program *ureg ) \ insn = ureg_emit_insn(ureg, \ opcode, \ FALSE, \ - FALSE, \ - FALSE, \ - TGSI_SWIZZLE_X, \ - TGSI_SWIZZLE_Y, \ - TGSI_SWIZZLE_Z, \ - TGSI_SWIZZLE_W, \ + 0, \ 0, \ 0); \ ureg_fixup_insn_size( ureg, insn.insn_token ); \ @@ -671,12 +648,7 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ FALSE, \ - FALSE, \ - FALSE, \ - TGSI_SWIZZLE_X, \ - TGSI_SWIZZLE_Y, \ - TGSI_SWIZZLE_Z, \ - TGSI_SWIZZLE_W, \ + 0, \ 0, \ 1); \ ureg_emit_src( ureg, src ); \ @@ -692,12 +664,7 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ FALSE, \ - FALSE, \ - FALSE, \ - TGSI_SWIZZLE_X, \ - TGSI_SWIZZLE_Y, \ - TGSI_SWIZZLE_Z, \ - TGSI_SWIZZLE_W, \ + 0, \ 0, \ 0); \ ureg_emit_label( ureg, insn.extended_token, label_token ); \ @@ -714,12 +681,7 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ FALSE, \ - FALSE, \ - FALSE, \ - TGSI_SWIZZLE_X, \ - TGSI_SWIZZLE_Y, \ - TGSI_SWIZZLE_Z, \ - TGSI_SWIZZLE_W, \ + 0, \ 0, \ 1); \ ureg_emit_label( ureg, insn.extended_token, label_token ); \ @@ -738,12 +700,7 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ + 0, \ 1, \ 0); \ ureg_emit_dst( ureg, dst ); \ @@ -763,12 +720,7 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ + 0, \ 1, \ 1); \ ureg_emit_dst( ureg, dst ); \ @@ -789,12 +741,7 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ + 0, \ 1, \ 2); \ ureg_emit_dst( ureg, dst ); \ @@ -811,50 +758,18 @@ static inline void ureg_##op( struct ureg_program *ureg, \ struct ureg_src src1 ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ + unsigned return_type = TGSI_RETURN_TYPE_UNKNOWN; \ struct ureg_emit_insn_result insn; \ if (ureg_dst_is_empty(dst)) \ return; \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ - 1, \ - 2); \ - ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ - ureg_emit_dst( ureg, dst ); \ - ureg_emit_src( ureg, src0 ); \ - ureg_emit_src( ureg, src1 ); \ - ureg_fixup_insn_size( ureg, insn.insn_token ); \ -} - -#define OP12_SAMPLE( op ) \ -static inline void ureg_##op( struct ureg_program *ureg, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1 ) \ -{ \ - unsigned opcode = TGSI_OPCODE_##op; \ - unsigned target = TGSI_TEXTURE_UNKNOWN; \ - struct ureg_emit_insn_result insn; \ - if (ureg_dst_is_empty(dst)) \ - return; \ - insn = ureg_emit_insn(ureg, \ - opcode, \ - dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ + 0, \ 1, \ 2); \ - ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ + ureg_emit_texture( ureg, insn.extended_token, target, \ + return_type, 0 ); \ ureg_emit_dst( ureg, dst ); \ ureg_emit_src( ureg, src0 ); \ ureg_emit_src( ureg, src1 ); \ @@ -875,45 +790,9 @@ static inline void ureg_##op( struct ureg_program *ureg, \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ - 1, \ - 3); \ - ureg_emit_dst( ureg, dst ); \ - ureg_emit_src( ureg, src0 ); \ - ureg_emit_src( ureg, src1 ); \ - ureg_emit_src( ureg, src2 ); \ - ureg_fixup_insn_size( ureg, insn.insn_token ); \ -} - -#define OP13_SAMPLE( op ) \ -static inline void ureg_##op( struct ureg_program *ureg, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1, \ - struct ureg_src src2 ) \ -{ \ - unsigned opcode = TGSI_OPCODE_##op; \ - unsigned target = TGSI_TEXTURE_UNKNOWN; \ - struct ureg_emit_insn_result insn; \ - if (ureg_dst_is_empty(dst)) \ - return; \ - insn = ureg_emit_insn(ureg, \ - opcode, \ - dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ + 0, \ 1, \ 3); \ - ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ ureg_emit_dst( ureg, dst ); \ ureg_emit_src( ureg, src0 ); \ ureg_emit_src( ureg, src1 ); \ @@ -931,21 +810,18 @@ static inline void ureg_##op( struct ureg_program *ureg, \ struct ureg_src src3 ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ + unsigned return_type = TGSI_RETURN_TYPE_UNKNOWN; \ struct ureg_emit_insn_result insn; \ if (ureg_dst_is_empty(dst)) \ return; \ insn = ureg_emit_insn(ureg, \ opcode, \ dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ + 0, \ 1, \ 4); \ - ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ + ureg_emit_texture( ureg, insn.extended_token, target, \ + return_type, 0 ); \ ureg_emit_dst( ureg, dst ); \ ureg_emit_src( ureg, src0 ); \ ureg_emit_src( ureg, src1 ); \ @@ -954,140 +830,6 @@ static inline void ureg_##op( struct ureg_program *ureg, \ ureg_fixup_insn_size( ureg, insn.insn_token ); \ } -#define OP14_SAMPLE( op ) \ -static inline void ureg_##op( struct ureg_program *ureg, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1, \ - struct ureg_src src2, \ - struct ureg_src src3 ) \ -{ \ - unsigned opcode = TGSI_OPCODE_##op; \ - unsigned target = TGSI_TEXTURE_UNKNOWN; \ - struct ureg_emit_insn_result insn; \ - if (ureg_dst_is_empty(dst)) \ - return; \ - insn = ureg_emit_insn(ureg, \ - opcode, \ - dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ - 1, \ - 4); \ - ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ - ureg_emit_dst( ureg, dst ); \ - ureg_emit_src( ureg, src0 ); \ - ureg_emit_src( ureg, src1 ); \ - ureg_emit_src( ureg, src2 ); \ - ureg_emit_src( ureg, src3 ); \ - ureg_fixup_insn_size( ureg, insn.insn_token ); \ -} - - -#define OP14( op ) \ -static inline void ureg_##op( struct ureg_program *ureg, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1, \ - struct ureg_src src2, \ - struct ureg_src src3 ) \ -{ \ - unsigned opcode = TGSI_OPCODE_##op; \ - struct ureg_emit_insn_result insn; \ - if (ureg_dst_is_empty(dst)) \ - return; \ - insn = ureg_emit_insn(ureg, \ - opcode, \ - dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ - 1, \ - 4); \ - ureg_emit_dst( ureg, dst ); \ - ureg_emit_src( ureg, src0 ); \ - ureg_emit_src( ureg, src1 ); \ - ureg_emit_src( ureg, src2 ); \ - ureg_emit_src( ureg, src3 ); \ - ureg_fixup_insn_size( ureg, insn.insn_token ); \ -} - - -#define OP15( op ) \ -static inline void ureg_##op( struct ureg_program *ureg, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1, \ - struct ureg_src src2, \ - struct ureg_src src3, \ - struct ureg_src src4 ) \ -{ \ - unsigned opcode = TGSI_OPCODE_##op; \ - struct ureg_emit_insn_result insn; \ - if (ureg_dst_is_empty(dst)) \ - return; \ - insn = ureg_emit_insn(ureg, \ - opcode, \ - dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ - 1, \ - 5); \ - ureg_emit_dst( ureg, dst ); \ - ureg_emit_src( ureg, src0 ); \ - ureg_emit_src( ureg, src1 ); \ - ureg_emit_src( ureg, src2 ); \ - ureg_emit_src( ureg, src3 ); \ - ureg_emit_src( ureg, src4 ); \ - ureg_fixup_insn_size( ureg, insn.insn_token ); \ -} - -#define OP15_SAMPLE( op ) \ -static inline void ureg_##op( struct ureg_program *ureg, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1, \ - struct ureg_src src2, \ - struct ureg_src src3, \ - struct ureg_src src4 ) \ -{ \ - unsigned opcode = TGSI_OPCODE_##op; \ - unsigned target = TGSI_TEXTURE_UNKNOWN; \ - struct ureg_emit_insn_result insn; \ - if (ureg_dst_is_empty(dst)) \ - return; \ - insn = ureg_emit_insn(ureg, \ - opcode, \ - dst.Saturate, \ - dst.Predicate, \ - dst.PredNegate, \ - dst.PredSwizzleX, \ - dst.PredSwizzleY, \ - dst.PredSwizzleZ, \ - dst.PredSwizzleW, \ - 1, \ - 5); \ - ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \ - ureg_emit_dst( ureg, dst ); \ - ureg_emit_src( ureg, src0 ); \ - ureg_emit_src( ureg, src1 ); \ - ureg_emit_src( ureg, src2 ); \ - ureg_emit_src( ureg, src3 ); \ - ureg_emit_src( ureg, src4 ); \ - ureg_fixup_insn_size( ureg, insn.insn_token ); \ -} - /* Use a template include to generate a correctly-typed ureg_OP() * function for each TGSI opcode: */ @@ -1159,29 +901,10 @@ ureg_saturate( struct ureg_dst reg ) return reg; } -static inline struct ureg_dst -ureg_predicate(struct ureg_dst reg, - boolean negate, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w) -{ - assert(reg.File != TGSI_FILE_NULL); - reg.Predicate = 1; - reg.PredNegate = negate; - reg.PredSwizzleX = swizzle_x; - reg.PredSwizzleY = swizzle_y; - reg.PredSwizzleZ = swizzle_z; - reg.PredSwizzleW = swizzle_w; - return reg; -} - static inline struct ureg_dst ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr ) { assert(reg.File != TGSI_FILE_NULL); - assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY); reg.Indirect = 1; reg.IndirectFile = addr.File; reg.IndirectIndex = addr.Index; @@ -1193,7 +916,6 @@ static inline struct ureg_src ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) { assert(reg.File != TGSI_FILE_NULL); - assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY); reg.Indirect = 1; reg.IndirectFile = addr.File; reg.IndirectIndex = addr.Index; @@ -1277,12 +999,6 @@ ureg_dst_array_register(unsigned file, dst.IndirectIndex = 0; dst.IndirectSwizzle = 0; dst.Saturate = 0; - dst.Predicate = 0; - dst.PredNegate = 0; - dst.PredSwizzleX = TGSI_SWIZZLE_X; - dst.PredSwizzleY = TGSI_SWIZZLE_Y; - dst.PredSwizzleZ = TGSI_SWIZZLE_Z; - dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = index; dst.Dimension = 0; dst.DimensionIndex = 0; @@ -1307,10 +1023,6 @@ ureg_dst( struct ureg_src src ) { struct ureg_dst dst; - assert(!src.Indirect || - (src.IndirectFile == TGSI_FILE_ADDRESS || - src.IndirectFile == TGSI_FILE_TEMPORARY)); - dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.IndirectFile = src.IndirectFile; @@ -1318,12 +1030,6 @@ ureg_dst( struct ureg_src src ) dst.IndirectIndex = src.IndirectIndex; dst.IndirectSwizzle = src.IndirectSwizzle; dst.Saturate = 0; - dst.Predicate = 0; - dst.PredNegate = 0; - dst.PredSwizzleX = TGSI_SWIZZLE_X; - dst.PredSwizzleY = TGSI_SWIZZLE_Y; - dst.PredSwizzleZ = TGSI_SWIZZLE_Z; - dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = src.Index; dst.Dimension = src.Dimension; dst.DimensionIndex = src.DimensionIndex; @@ -1415,12 +1121,6 @@ ureg_dst_undef( void ) dst.IndirectIndex = 0; dst.IndirectSwizzle = 0; dst.Saturate = 0; - dst.Predicate = 0; - dst.PredNegate = 0; - dst.PredSwizzleX = TGSI_SWIZZLE_X; - dst.PredSwizzleY = TGSI_SWIZZLE_Y; - dst.PredSwizzleZ = TGSI_SWIZZLE_Z; - dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = 0; dst.Dimension = 0; dst.DimensionIndex = 0; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c index fbe29626a..afe5690ce 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -27,9 +27,11 @@ #include "util/u_debug.h" #include "pipe/p_shader_tokens.h" +#include "tgsi_info.h" #include "tgsi_parse.h" #include "tgsi_util.h" #include "tgsi_exec.h" +#include "util/bitscan.h" union pointer_hack { @@ -180,85 +182,80 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, unsigned chan; switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_ARL: - case TGSI_OPCODE_ARR: + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + case TGSI_OPCODE_EMIT: + case TGSI_OPCODE_ENDPRIM: case TGSI_OPCODE_RCP: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_DIV: - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MAX: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_MAD: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_LRP: - case TGSI_OPCODE_FMA: - case TGSI_OPCODE_FRC: - case TGSI_OPCODE_CEIL: - case TGSI_OPCODE_CLAMP: - case TGSI_OPCODE_FLR: - case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_POW: - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - case TGSI_OPCODE_DDX: - case TGSI_OPCODE_DDY: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_SSG: - case TGSI_OPCODE_CMP: - case TGSI_OPCODE_TRUNC: - case TGSI_OPCODE_NOT: - case TGSI_OPCODE_AND: - case TGSI_OPCODE_OR: - case TGSI_OPCODE_XOR: - case TGSI_OPCODE_SAD: - case TGSI_OPCODE_FSEQ: - case TGSI_OPCODE_FSGE: - case TGSI_OPCODE_FSLT: - case TGSI_OPCODE_FSNE: - case TGSI_OPCODE_F2I: - case TGSI_OPCODE_IDIV: - case TGSI_OPCODE_IMAX: - case TGSI_OPCODE_IMIN: - case TGSI_OPCODE_INEG: - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_ISHR: - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_F2U: - case TGSI_OPCODE_U2F: - case TGSI_OPCODE_UADD: - case TGSI_OPCODE_UDIV: - case TGSI_OPCODE_UMAD: - case TGSI_OPCODE_UMAX: - case TGSI_OPCODE_UMIN: - case TGSI_OPCODE_UMOD: - case TGSI_OPCODE_UMUL: - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USHR: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: - case TGSI_OPCODE_IMUL_HI: - case TGSI_OPCODE_UMUL_HI: - case TGSI_OPCODE_DDX_FINE: - case TGSI_OPCODE_DDY_FINE: - /* Channel-wise operations */ - read_mask = write_mask; - break; - + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SQRT: case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_POW: /* reads src0.x and src1.x */ + case TGSI_OPCODE_UP2H: + case TGSI_OPCODE_UP2US: + case TGSI_OPCODE_UP4B: + case TGSI_OPCODE_UP4UB: + case TGSI_OPCODE_MEMBAR: + case TGSI_OPCODE_BALLOT: read_mask = TGSI_WRITEMASK_X; break; - case TGSI_OPCODE_SCS: - read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0; + case TGSI_OPCODE_DP2: + case TGSI_OPCODE_PK2H: + case TGSI_OPCODE_PK2US: + case TGSI_OPCODE_DFRACEXP: + case TGSI_OPCODE_F2D: + case TGSI_OPCODE_I2D: + case TGSI_OPCODE_U2D: + case TGSI_OPCODE_F2U64: + case TGSI_OPCODE_F2I64: + case TGSI_OPCODE_U2I64: + case TGSI_OPCODE_I2I64: + case TGSI_OPCODE_TXQS: /* bindless handle possible */ + case TGSI_OPCODE_RESQ: /* bindless handle possible */ + read_mask = TGSI_WRITEMASK_XY; + break; + + case TGSI_OPCODE_TXQ: + if (src_idx == 0) + read_mask = TGSI_WRITEMASK_X; + else + read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ + break; + + case TGSI_OPCODE_DP3: + read_mask = TGSI_WRITEMASK_XYZ; + break; + + case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_PK4B: + case TGSI_OPCODE_PK4UB: + case TGSI_OPCODE_D2F: + case TGSI_OPCODE_D2I: + case TGSI_OPCODE_D2U: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_U642F: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_I642F: + read_mask = TGSI_WRITEMASK_XYZW; + break; + + case TGSI_OPCODE_LIT: + read_mask = write_mask & TGSI_WRITEMASK_YZ ? + TGSI_WRITEMASK_XY | TGSI_WRITEMASK_W : 0; break; case TGSI_OPCODE_EXP: @@ -266,78 +263,158 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0; break; - case TGSI_OPCODE_DP2A: - read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY; - break; - - case TGSI_OPCODE_DP2: - read_mask = TGSI_WRITEMASK_XY; + case TGSI_OPCODE_DST: + if (src_idx == 0) + read_mask = TGSI_WRITEMASK_YZ; + else + read_mask = TGSI_WRITEMASK_YW; break; - case TGSI_OPCODE_DP3: - read_mask = TGSI_WRITEMASK_XYZ; + case TGSI_OPCODE_DLDEXP: + if (src_idx == 0) { + read_mask = write_mask; + } else { + read_mask = + (write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0) | + (write_mask & TGSI_WRITEMASK_ZW ? TGSI_WRITEMASK_Z : 0); + } break; - case TGSI_OPCODE_DP4: - read_mask = TGSI_WRITEMASK_XYZW; + case TGSI_OPCODE_READ_INVOC: + if (src_idx == 0) + read_mask = write_mask; + else + read_mask = TGSI_WRITEMASK_X; break; - case TGSI_OPCODE_DPH: - read_mask = src_idx == 0 ? TGSI_WRITEMASK_XYZ : TGSI_WRITEMASK_XYZW; + case TGSI_OPCODE_FBFETCH: + read_mask = 0; /* not a real register read */ break; case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TEX_LZ: + case TGSI_OPCODE_TXF_LZ: + case TGSI_OPCODE_TXF: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: - if (src_idx == 0) { - /* Note that the SHADOW variants use the Z component too */ - switch (inst->Texture.Texture) { - case TGSI_TEXTURE_1D: - read_mask = TGSI_WRITEMASK_X; - break; - case TGSI_TEXTURE_SHADOW1D: - read_mask = TGSI_WRITEMASK_XZ; - break; - case TGSI_TEXTURE_1D_ARRAY: - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - read_mask = TGSI_WRITEMASK_XY; - break; - case TGSI_TEXTURE_SHADOW1D_ARRAY: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_2D_ARRAY: - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - case TGSI_TEXTURE_2D_MSAA: - read_mask = TGSI_WRITEMASK_XYZ; - break; - case TGSI_TEXTURE_SHADOW2D_ARRAY: - case TGSI_TEXTURE_CUBE_ARRAY: - case TGSI_TEXTURE_SHADOWCUBE: - case TGSI_TEXTURE_2D_ARRAY_MSAA: - case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - read_mask = TGSI_WRITEMASK_XYZW; - break; - default: - assert(0); - read_mask = 0; - } - - if (inst->Instruction.Opcode != TGSI_OPCODE_TEX) { + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TEX2: + case TGSI_OPCODE_TXB2: + case TGSI_OPCODE_TXL2: + case TGSI_OPCODE_LODQ: + case TGSI_OPCODE_TG4: { + unsigned dim_layer = + tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + unsigned dim_layer_shadow, dim; + + /* Add shadow. */ + if (tgsi_is_shadow_target(inst->Texture.Texture)) { + dim_layer_shadow = dim_layer + 1; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D) + dim_layer_shadow = 3; + } else { + dim_layer_shadow = dim_layer; + } + + /* Remove layer. */ + if (tgsi_is_array_sampler(inst->Texture.Texture)) + dim = dim_layer - 1; + else + dim = dim_layer; + + read_mask = TGSI_WRITEMASK_XY; /* bindless handle in the last operand */ + + switch (src_idx) { + case 0: + if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) + read_mask = u_bit_consecutive(0, dim); + else + read_mask = u_bit_consecutive(0, dim_layer_shadow) & 0xf; + + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D) + read_mask &= ~TGSI_WRITEMASK_Y; + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXF || + inst->Instruction.Opcode == TGSI_OPCODE_TXB || + inst->Instruction.Opcode == TGSI_OPCODE_TXL || + inst->Instruction.Opcode == TGSI_OPCODE_TXP) read_mask |= TGSI_WRITEMASK_W; - } + break; + + case 1: + if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) + read_mask = u_bit_consecutive(0, dim); + else if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || + inst->Instruction.Opcode == TGSI_OPCODE_TG4) + read_mask = TGSI_WRITEMASK_X; + break; + + case 2: + if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) + read_mask = u_bit_consecutive(0, dim); + break; + } + break; + } + + case TGSI_OPCODE_LOAD: + if (src_idx == 0) { + read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ + } else { + unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); + read_mask = u_bit_consecutive(0, dim); + } + break; + + case TGSI_OPCODE_STORE: + if (src_idx == 0) { + unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); + read_mask = u_bit_consecutive(0, dim); + } else { + read_mask = TGSI_WRITEMASK_XYZW; + } + break; + + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + if (src_idx == 0) { + read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ + } else if (src_idx == 1) { + unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); + read_mask = u_bit_consecutive(0, dim); } else { - /* A safe approximation */ read_mask = TGSI_WRITEMASK_XYZW; } break; + case TGSI_OPCODE_INTERP_CENTROID: + case TGSI_OPCODE_INTERP_SAMPLE: + case TGSI_OPCODE_INTERP_OFFSET: + if (src_idx == 0) + read_mask = write_mask; + else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) + read_mask = TGSI_WRITEMASK_XY; /* offset */ + else + read_mask = TGSI_WRITEMASK_X; /* sample */ + break; + default: - /* Assume all channels are read */ - read_mask = TGSI_WRITEMASK_XYZW; + if (tgsi_get_opcode_info(inst->Instruction.Opcode)->output_mode == + TGSI_OUTPUT_COMPONENTWISE) + read_mask = write_mask; + else + read_mask = TGSI_WRITEMASK_XYZW; /* assume all channels are read */ break; } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h index 83a930b69..534b5f78e 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -28,12 +28,12 @@ #ifndef TGSI_UTIL_H #define TGSI_UTIL_H +#include "pipe/p_shader_tokens.h" + #if defined __cplusplus extern "C" { #endif -#include "pipe/p_shader_tokens.h" - struct tgsi_src_register; struct tgsi_full_src_register; struct tgsi_full_instruction; @@ -98,6 +98,18 @@ tgsi_is_msaa_target(unsigned target) target == TGSI_TEXTURE_2D_ARRAY_MSAA); } +static inline bool +tgsi_is_array_sampler(unsigned target) +{ + return target == TGSI_TEXTURE_1D_ARRAY || + target == TGSI_TEXTURE_SHADOW1D_ARRAY || + target == TGSI_TEXTURE_2D_ARRAY || + target == TGSI_TEXTURE_SHADOW2D_ARRAY || + target == TGSI_TEXTURE_CUBE_ARRAY || + target == TGSI_TEXTURE_SHADOWCUBE_ARRAY || + target == TGSI_TEXTURE_2D_ARRAY_MSAA; +} + #if defined __cplusplus } #endif |