summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2015-12-23 13:27:37 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2015-12-23 13:27:37 +0000
commit5f7540fe56d9bd78811acbd834be64d07862b5a5 (patch)
treea88beb5592749f9418634348467950aeaf1f9d01
parent2f35ab22e92c7e0d9211cd62d9e57904c69940a0 (diff)
Import Mesa 11.0.8
This seems to fix some of the problems with clutter/gnome reported to occur on r600 with 11.0.6
-rw-r--r--lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp9
-rw-r--r--lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c14
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c3
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h1
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_helpers.c8
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h8
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c8
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_texture.c1
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp15
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp3
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp6
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp5
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp7
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp43
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp46
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nouveau_buffer.c8
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_context.c15
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_state.c7
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c17
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c1
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c6
-rw-r--r--lib/mesa/src/gallium/drivers/r600/evergreen_state.c9
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_pipe.h2
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_shader.c159
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_state.c29
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_state_common.c18
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600d.h1
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/Makefile.am5
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/cayman_msaa.c12
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c7
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600d_common.h2
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c2
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h5
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce.c5
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_compute.c39
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_shader.c10
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_shader.h4
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state.c23
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c22
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.c4
-rw-r--r--lib/mesa/src/gallium/targets/opencl/Makefile.am7
44 files changed, 472 insertions, 130 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 7bda1184e..3ee708f4f 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -536,6 +536,15 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
#if defined(PIPE_ARCH_PPC)
MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
+#if HAVE_LLVM >= 0x0304
+ /*
+ * Make sure VSX instructions are disabled
+ * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
+ */
+ if (util_cpu_caps.has_altivec) {
+ MAttrs.push_back("-vsx");
+ }
+#endif
#endif
builder.setMAttrs(MAttrs);
diff --git a/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c b/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 93dfb8033..14370e26d 100644
--- a/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1087,6 +1087,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
op = nir_texop_tex;
num_srcs = 1;
break;
+ case TGSI_OPCODE_TEX2:
+ op = nir_texop_tex;
+ num_srcs = 1;
+ samp = 2;
+ break;
case TGSI_OPCODE_TXP:
op = nir_texop_tex;
num_srcs = 2;
@@ -1242,10 +1247,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
}
if (instr->is_shadow) {
- if (instr->coord_components < 3)
- instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
- else
+ if (instr->coord_components == 4)
+ instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
+ else if (instr->coord_components == 3)
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
+ else
+ instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
instr->src[src_number].src_type = nir_tex_src_comparitor;
src_number++;
@@ -1651,6 +1658,7 @@ ttn_emit_instruction(struct ttn_compile *c)
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXL2:
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXQ_LZ:
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 7523baf4c..98a5fec77 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -258,6 +258,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->output_semantic_index[reg] = (ubyte) semIndex;
info->num_outputs++;
+ if (semName == TGSI_SEMANTIC_COLOR)
+ info->colors_written |= 1 << semIndex;
+
if (procType == TGSI_PROCESSOR_VERTEX ||
procType == TGSI_PROCESSOR_GEOMETRY ||
procType == TGSI_PROCESSOR_TESS_CTRL ||
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h
index b81bdd71f..6301f91c1 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -76,6 +76,7 @@ struct tgsi_shader_info
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
+ ubyte colors_written;
boolean reads_position; /**< does fragment shader read position? */
boolean reads_z; /**< does fragment shader read depth? */
boolean writes_z; /**< does fragment shader write Z value? */
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c
index ac1edcdbb..117a51b3b 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c
@@ -81,7 +81,13 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
const struct pipe_vertex_buffer *src,
unsigned start_slot, unsigned count)
{
- uint32_t enabled_buffers = (1ull << *dst_count) - 1;
+ unsigned i;
+ uint32_t enabled_buffers = 0;
+
+ for (i = 0; i < *dst_count; i++) {
+ if (dst[i].buffer || dst[i].user_buffer)
+ enabled_buffers |= (1ull << i);
+ }
util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
count);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 2e1d712a2..149a197ae 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -153,7 +153,7 @@ enum a4xx_vtx_fmt {
enum a4xx_tex_fmt {
TFMT4_5_6_5_UNORM = 11,
- TFMT4_5_5_5_1_UNORM = 10,
+ TFMT4_5_5_5_1_UNORM = 9,
TFMT4_4_4_4_4_UNORM = 8,
TFMT4_X8Z24_UNORM = 71,
TFMT4_10_10_10_2_UNORM = 33,
@@ -2718,6 +2718,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
{
return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
}
+#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000
+#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19
+static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+ return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
#define REG_A4XX_TEX_SAMP_1 0x00000001
#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index a3d7123cc..47e8855e7 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -250,14 +250,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
}
}
- /* adjust regids for alpha output formats. there is no alpha render
- * format, so it's just treated like red
- */
- for (i = 0; i < nr; i++)
- if (util_format_is_alpha(pipe_surface_format(bufs[i])))
- color_regid[i] += 3;
-
-
/* TODO get these dynamically: */
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 213b29c91..7a28e09b3 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -111,6 +111,7 @@ fd4_sampler_state_create(struct pipe_context *pctx,
COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->texsamp1 |=
A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
index fa8ee072a..9f0e07333 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
@@ -291,7 +291,7 @@ void BasicBlock::permuteAdjacent(Instruction *a, Instruction *b)
if (b->prev)
b->prev->next = b;
- if (a->prev)
+ if (a->next)
a->next->prev = a;
}
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 8f1542959..fa9336283 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -575,8 +575,8 @@ CodeEmitterGK110::emitIMUL(const Instruction *i)
if (isLIMM(i->src(1), TYPE_S32)) {
emitForm_L(i, 0x280, 2, Modifier(0));
- assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH);
-
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ code[1] |= 1 << 24;
if (i->sType == TYPE_S32)
code[1] |= 3 << 25;
} else {
@@ -695,14 +695,9 @@ CodeEmitterGK110::emitIMAD(const Instruction *i)
if (i->sType == TYPE_S32)
code[1] |= (1 << 19) | (1 << 24);
- if (code[0] & 0x1) {
- assert(!i->subOp);
- SAT_(39);
- } else {
- if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
- code[1] |= 1 << 25;
- SAT_(35);
- }
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ code[1] |= 1 << 25;
+ SAT_(35);
}
void
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 6bf5219d3..f88f4c2e9 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2322,6 +2322,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_PFETCH:
emitPFETCH(insn);
break;
+ case OP_AFETCH:
+ emitAFETCH(insn);
+ break;
case OP_EMIT:
case OP_RESTART:
emitOUT(insn);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index f153674e9..f4ff33164 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -2870,6 +2870,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
}
setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
+
+ // If the loop never breaks (e.g. only has RET's inside), then there
+ // will be no way to get to the break bb. However BGNLOOP will have
+ // already made a PREBREAK to it, so it must be in the CFG.
+ if (getBB()->cfg.incidentCount() == 0)
+ loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
}
break;
case TGSI_OPCODE_BRK:
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index d87cdfff8..4f4320b4d 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -202,7 +202,8 @@ NV50LegalizePostRA::visit(Function *fn)
Program *prog = fn->getProgram();
r63 = new_LValue(fn, FILE_GPR);
- if (prog->maxGPR < 63)
+ // GPR units on nv50 are in half-regs
+ if (prog->maxGPR < 126)
r63->reg.data.id = 63;
else
r63->reg.data.id = 127;
@@ -831,7 +832,7 @@ NV50LoweringPreSSA::handleTXB(TexInstruction *i)
}
Value *flags = bld.getScratch(1, FILE_FLAGS);
bld.setPosition(cond, true);
- bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0));
+ bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0))->flagsDef = 0;
Instruction *tex[4];
for (l = 0; l < 4; ++l) {
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index b1f406585..0f575f2ee 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -686,7 +686,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
i->tex.s = 0x1f;
i->setIndirectR(hnd);
i->setIndirectS(NULL);
- } else if (i->tex.r == i->tex.s) {
+ } else if (i->tex.r == i->tex.s || i->op == OP_TXF) {
i->tex.r += prog->driver->io.texBindBase / 4;
i->tex.s = 0; // only a single cX[] value possible here
} else {
@@ -962,11 +962,14 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
bool
NVC0LoweringPass::handleTXQ(TexInstruction *txq)
{
+ const int chipset = prog->getTarget()->getChipset();
+ if (chipset >= NVISA_GK104_CHIPSET && txq->tex.rIndirectSrc < 0)
+ txq->tex.r += prog->driver->io.texBindBase / 4;
+
if (txq->tex.rIndirectSrc < 0)
return true;
Value *ticRel = txq->getIndirectR();
- const int chipset = prog->getTarget()->getChipset();
txq->setIndirectS(NULL);
txq->tex.sIndirectSrc = -1;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 44f74c613..13f36d0cf 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -842,6 +842,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->src(0).mod = i->src(t).mod;
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
i->src(1).mod = 0;
+ } else
+ if (i->postFactor && i->sType == TYPE_F32) {
+ /* Can't emit a postfactor with an immediate, have to fold it in */
+ i->setSrc(s, new_ImmediateValue(
+ prog, imm0.reg.data.f32 * exp2f(i->postFactor)));
+ i->postFactor = 0;
}
break;
case OP_MAD:
@@ -2606,8 +2612,11 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
i->getSrc(0)->reg.data.id >= 64)
break;
+ if (i->getPredicate())
+ break;
+
def = i->getSrc(1)->getInsn();
- if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
+ if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
vtmp = i->getSrc(1);
i->setSrc(1, def->getSrc(0));
@@ -2909,6 +2918,16 @@ DeadCodeElim::visit(BasicBlock *bb)
return true;
}
+// Each load can go into up to 4 destinations, any of which might potentially
+// be dead (i.e. a hole). These can always be split into 2 loads, independent
+// of where the holes are. We find the first contiguous region, put it into
+// the first load, and then put the second contiguous region into the second
+// load. There can be at most 2 contiguous regions.
+//
+// Note that there are some restrictions, for example it's not possible to do
+// a 64-bit load that's not 64-bit aligned, so such a load has to be split
+// up. Also hardware doesn't support 96-bit loads, so those also have to be
+// split into a 64-bit and 32-bit load.
void
DeadCodeElim::checkSplitLoad(Instruction *ld1)
{
@@ -2929,6 +2948,8 @@ DeadCodeElim::checkSplitLoad(Instruction *ld1)
addr1 = ld1->getSrc(0)->reg.data.offset;
n1 = n2 = 0;
size1 = size2 = 0;
+
+ // Compute address/width for first load
for (d = 0; ld1->defExists(d); ++d) {
if (mask & (1 << d)) {
if (size1 && (addr1 & 0x7))
@@ -2942,16 +2963,34 @@ DeadCodeElim::checkSplitLoad(Instruction *ld1)
break;
}
}
+
+ // Scale back the size of the first load until it can be loaded. This
+ // typically happens for TYPE_B96 loads.
+ while (n1 &&
+ !prog->getTarget()->isAccessSupported(ld1->getSrc(0)->reg.file,
+ typeOfSize(size1))) {
+ size1 -= def1[--n1]->reg.size;
+ d--;
+ }
+
+ // Compute address/width for second load
for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
if (mask & (1 << d)) {
+ assert(!size2 || !(addr2 & 0x7));
def2[n2] = ld1->getDef(d);
size2 += def2[n2++]->reg.size;
- } else {
+ } else if (!n2) {
assert(!n2);
addr2 += ld1->getDef(d)->reg.size;
+ } else {
+ break;
}
}
+ // Make sure that we've processed all the values
+ for (; ld1->defExists(d); ++d)
+ assert(!(mask & (1 << d)));
+
updateLdStOffset(ld1, addr1, func);
ld1->setType(typeOfSize(size1));
for (d = 0; d < 4; ++d)
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 7859c8e79..41d2cc916 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
Instruction *st;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
- st = new_Instruction(func, OP_STORE, ty);
- st->setSrc(0, slot);
- st->setSrc(1, lval);
lval->noSpill = 1;
+ if (ty != TYPE_B96) {
+ st = new_Instruction(func, OP_STORE, ty);
+ st->setSrc(0, slot);
+ st->setSrc(1, lval);
+ } else {
+ st = new_Instruction(func, OP_SPLIT, ty);
+ st->setSrc(0, lval);
+ for (int d = 0; d < lval->reg.size / 4; ++d)
+ st->setDef(d, new_LValue(func, FILE_GPR));
+
+ for (int d = lval->reg.size / 4 - 1; d >= 0; --d) {
+ Value *tmp = cloneShallow(func, slot);
+ tmp->reg.size = 4;
+ tmp->reg.data.offset += 4 * d;
+
+ Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32);
+ s->setSrc(0, tmp);
+ s->setSrc(1, st->getDef(d));
+ defi->bb->insertAfter(defi, s);
+ }
+ }
} else {
st = new_Instruction(func, OP_CVT, ty);
st->setDef(0, slot);
@@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
Instruction *ld;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
lval->noSpill = 1;
- ld = new_Instruction(func, OP_LOAD, ty);
+ if (ty != TYPE_B96) {
+ ld = new_Instruction(func, OP_LOAD, ty);
+ } else {
+ ld = new_Instruction(func, OP_MERGE, ty);
+ for (int d = 0; d < lval->reg.size / 4; ++d) {
+ Value *tmp = cloneShallow(func, slot);
+ LValue *val;
+ tmp->reg.size = 4;
+ tmp->reg.data.offset += 4 * d;
+
+ Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32);
+ l->setDef(0, (val = new_LValue(func, FILE_GPR)));
+ l->setSrc(0, tmp);
+ usei->bb->insertBefore(usei, l);
+ ld->setSrc(d, val);
+ val->noSpill = 1;
+ }
+ ld->setDef(0, lval);
+ usei->bb->insertBefore(usei, ld);
+ return lval;
+ }
} else {
ld = new_Instruction(func, OP_CVT, ty);
}
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index f3ddcaa51..76a76545c 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -454,7 +454,7 @@ TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const
return false;
}
}
- if (s >= 3)
+ if (s >= opInfo[insn->op].srcNr || s >= 3)
return false;
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
}
diff --git a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 27df0eba6..3b4e8025b 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/lib/mesa/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -426,7 +426,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
return false;
}
}
- if (s >= 3)
+ if (s >= opInfo[insn->op].srcNr || s >= 3)
return false;
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
}
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nouveau_buffer.c b/lib/mesa/src/gallium/drivers/nouveau/nouveau_buffer.c
index 72e070b5f..371e1ec76 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -656,8 +656,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
buffer->domain = NOUVEAU_BO_GART;
- } else if (buffer->base.bind &
- (screen->vidmem_bindings & screen->sysmem_bindings)) {
+ } else if (buffer->base.bind == 0 || (buffer->base.bind &
+ (screen->vidmem_bindings & screen->sysmem_bindings))) {
switch (buffer->base.usage) {
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
@@ -684,6 +684,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (buffer->base.bind & screen->sysmem_bindings)
buffer->domain = NOUVEAU_BO_GART;
}
+ /* There can be very special situations where we want non-gpu-mapped
+ * buffers, but never through this interface.
+ */
+ assert(buffer->domain);
ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
if (ret == false)
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_context.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 152c2ce13..2710b7524 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -159,9 +159,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nv50_context *nv50 = nv50_context(&ctx->pipe);
+ unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
if (nv50->framebuffer.cbufs[i] &&
@@ -173,7 +174,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nv50->framebuffer.zsbuf &&
nv50->framebuffer.zsbuf->texture == res) {
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
@@ -183,11 +184,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_SAMPLER_VIEW)) {
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
for (i = 0; i < nv50->num_vtxbufs; ++i) {
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_state.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 410e6311e..335d95259 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -960,6 +960,9 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
unsigned i;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
+ nv50->dirty |= NV50_NEW_ARRAYS;
+
util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb,
start_slot, count);
@@ -983,10 +986,6 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
nv50->vbo_constant &= ~(1 << dst_index);
}
}
-
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
-
- nv50->dirty |= NV50_NEW_ARRAYS;
}
static void
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 7a15a11f5..a0b9a54d9 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
+ unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
@@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_COMMAND_ARGS_BUFFER |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_COMMAND_ARGS_BUFFER |
+ PIPE_BIND_SAMPLER_VIEW)) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
nvc0->dirty |= NVC0_NEW_ARRAYS;
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 191e3b727..139a5039b 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -417,6 +417,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
if (screen->pm.prog) {
screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
nvc0_program_destroy(NULL, screen->pm.prog);
+ FREE(screen->pm.prog);
}
nouveau_bo_ref(NULL, &screen->text);
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index c5bfd0395..310e30f52 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -998,6 +998,9 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
unsigned i;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+
util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb,
start_slot, count);
@@ -1021,9 +1024,6 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
nvc0->constant_vbos &= ~(1 << dst_index);
}
}
-
- nvc0->dirty |= NVC0_NEW_ARRAYS;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
}
static void
diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_state.c b/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
index 2dc381178..1976d873f 100644
--- a/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
+++ b/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
@@ -1527,12 +1527,17 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples,
S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
+ r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
+ EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+ EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
} else {
r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+ r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+ EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
}
}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe.h b/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
index bb91f8308..896e3f71e 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
@@ -57,7 +57,7 @@
/* the number of CS dwords for flushing and drawing */
#define R600_MAX_FLUSH_CS_DWORDS 16
-#define R600_MAX_DRAW_CS_DWORDS 47
+#define R600_MAX_DRAW_CS_DWORDS 52
#define R600_TRACE_CS_DWORDS 7
#define R600_MAX_USER_CONST_BUFFERS 13
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.c b/lib/mesa/src/gallium/drivers/r600/r600_shader.c
index 819f9a798..911e81fed 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_shader.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.c
@@ -598,6 +598,106 @@ static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back
return 0;
}
+/* execute a single slot ALU calculation */
+static int single_alu_op2(struct r600_shader_ctx *ctx, int op,
+ int dst_sel, int dst_chan,
+ int src0_sel, unsigned src0_chan_val,
+ int src1_sel, unsigned src1_chan_val)
+{
+ struct r600_bytecode_alu alu;
+ int r, i;
+
+ if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) {
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = op;
+ alu.src[0].sel = src0_sel;
+ if (src0_sel == V_SQ_ALU_SRC_LITERAL)
+ alu.src[0].value = src0_chan_val;
+ else
+ alu.src[0].chan = src0_chan_val;
+ alu.src[1].sel = src1_sel;
+ if (src1_sel == V_SQ_ALU_SRC_LITERAL)
+ alu.src[1].value = src1_chan_val;
+ else
+ alu.src[1].chan = src1_chan_val;
+ alu.dst.sel = dst_sel;
+ alu.dst.chan = i;
+ alu.dst.write = i == dst_chan;
+ alu.last = (i == 3);
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+ }
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = op;
+ alu.src[0].sel = src0_sel;
+ if (src0_sel == V_SQ_ALU_SRC_LITERAL)
+ alu.src[0].value = src0_chan_val;
+ else
+ alu.src[0].chan = src0_chan_val;
+ alu.src[1].sel = src1_sel;
+ if (src1_sel == V_SQ_ALU_SRC_LITERAL)
+ alu.src[1].value = src1_chan_val;
+ else
+ alu.src[1].chan = src1_chan_val;
+ alu.dst.sel = dst_sel;
+ alu.dst.chan = dst_chan;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+}
+
+/* execute a single slot ALU calculation */
+static int single_alu_op3(struct r600_shader_ctx *ctx, int op,
+ int dst_sel, int dst_chan,
+ int src0_sel, unsigned src0_chan_val,
+ int src1_sel, unsigned src1_chan_val,
+ int src2_sel, unsigned src2_chan_val)
+{
+ struct r600_bytecode_alu alu;
+ int r;
+
+ /* validate this for other ops */
+ assert(op == ALU_OP3_MULADD_UINT24);
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = op;
+ alu.src[0].sel = src0_sel;
+ if (src0_sel == V_SQ_ALU_SRC_LITERAL)
+ alu.src[0].value = src0_chan_val;
+ else
+ alu.src[0].chan = src0_chan_val;
+ alu.src[1].sel = src1_sel;
+ if (src1_sel == V_SQ_ALU_SRC_LITERAL)
+ alu.src[1].value = src1_chan_val;
+ else
+ alu.src[1].chan = src1_chan_val;
+ alu.src[2].sel = src2_sel;
+ if (src2_sel == V_SQ_ALU_SRC_LITERAL)
+ alu.src[2].value = src2_chan_val;
+ else
+ alu.src[2].chan = src2_chan_val;
+ alu.dst.sel = dst_sel;
+ alu.dst.chan = dst_chan;
+ alu.is_op3 = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+}
+
+static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index)
+{
+ return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg;
+}
+
static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
{
int i;
@@ -1129,6 +1229,7 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
unsigned vtx_id = src->Dimension.Index;
int offset_reg = vtx_id / 3;
int offset_chan = vtx_id % 3;
+ int t2 = 0;
/* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
* R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
@@ -1136,13 +1237,24 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
if (offset_reg == 0 && offset_chan == 2)
offset_chan = 3;
+ if (src->Dimension.Indirect || src->Register.Indirect)
+ t2 = r600_get_temp(ctx);
+
if (src->Dimension.Indirect) {
int treg[3];
- int t2;
struct r600_bytecode_alu alu;
int r, i;
-
- /* you have got to be shitting me -
+ unsigned addr_reg;
+ addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index);
+ if (src->DimIndirect.Index > 0) {
+ r = single_alu_op2(ctx, ALU_OP1_MOV,
+ ctx->bc->ar_reg, 0,
+ addr_reg, 0,
+ 0, 0);
+ if (r)
+ return r;
+ }
+ /*
we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
at least this is what fglrx seems to do. */
for (i = 0; i < 3; i++) {
@@ -1150,7 +1262,6 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
}
r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
- t2 = r600_get_temp(ctx);
for (i = 0; i < 3; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
@@ -1175,8 +1286,33 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
if (r)
return r;
offset_reg = t2;
+ offset_chan = 0;
}
+ if (src->Register.Indirect) {
+ int addr_reg;
+ unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID];
+
+ addr_reg = get_address_file_reg(ctx, src->Indirect.Index);
+
+ /* pull the value from index_reg */
+ r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+ t2, 1,
+ addr_reg, 0,
+ V_SQ_ALU_SRC_LITERAL, first);
+ if (r)
+ return r;
+ r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
+ t2, 0,
+ t2, 1,
+ V_SQ_ALU_SRC_LITERAL, 4,
+ offset_reg, offset_chan);
+ if (r)
+ return r;
+ offset_reg = t2;
+ offset_chan = 0;
+ index = src->Register.Index - first;
+ }
memset(&vtx, 0, sizeof(vtx));
vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
@@ -1222,6 +1358,7 @@ static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
fetch_gs_input(ctx, src, treg);
ctx->src[i].sel = treg;
+ ctx->src[i].rel = 0;
}
}
return 0;
@@ -1972,7 +2109,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.nliterals = 0;
ctx.literals = NULL;
- shader->fs_write_all = FALSE;
+
+ shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
+ ctx.info.colors_written == 1;
if (shader->vs_as_gs_a)
vs_add_primid_output(&ctx, key.vs.prim_id_out);
@@ -2003,10 +2142,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
case TGSI_TOKEN_TYPE_PROPERTY:
property = &ctx.parse.FullToken.FullProperty;
switch (property->Property.PropertyName) {
- case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
- if (property->u[0].Data == 1)
- shader->fs_write_all = TRUE;
- break;
case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
if (property->u[0].Data == 1)
shader->vs_position_window_space = TRUE;
@@ -2159,6 +2294,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_bytecode_alu alu;
int r;
+ /* GS thread with no output workaround - emit a cut at start of GS */
+ if (ctx.bc->chip_class == R600)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX);
+
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
@@ -6671,7 +6810,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
struct r600_bytecode_alu alu;
int r;
int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- unsigned reg = inst->Dst[0].Register.Index > 0 ? ctx->bc->index_reg[inst->Dst[0].Register.Index - 1] : ctx->bc->ar_reg;
+ unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index);
assert(inst->Dst[0].Register.Index < 3);
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_state.c b/lib/mesa/src/gallium/drivers/r600/r600_state.c
index 2c727815d..a588e16e5 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_state.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_state.c
@@ -2181,10 +2181,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
num_temp_gprs = 4;
num_gs_gprs = 0;
num_es_gprs = 0;
- num_ps_threads = 136;
- num_vs_threads = 48;
- num_gs_threads = 4;
- num_es_threads = 4;
+ /* use limits 40 VS and at least 16 ES/GS */
+ num_ps_threads = 120;
+ num_vs_threads = 40;
+ num_gs_threads = 16;
+ num_es_threads = 16;
num_ps_stack_entries = 40;
num_vs_stack_entries = 40;
num_gs_stack_entries = 32;
@@ -2643,6 +2644,9 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
S_02881C_USE_VTX_VIEWPORT_INDX(rshader->vs_out_viewport);
}
+#define RV610_GSVS_ALIGN 32
+#define R600_GSVS_ALIGN 16
+
void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -2652,6 +2656,23 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
unsigned gsvs_itemsize =
(cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2;
+ /* some r600s needs gsvs itemsize aligned to cacheline size
+ this was fixed in rs780 and above. */
+ switch (rctx->b.family) {
+ case CHIP_RV610:
+ gsvs_itemsize = align(gsvs_itemsize, RV610_GSVS_ALIGN);
+ break;
+ case CHIP_R600:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ gsvs_itemsize = align(gsvs_itemsize, R600_GSVS_ALIGN);
+ break;
+ default:
+ break;
+ }
+
r600_init_command_buffer(cb, 64);
/* VGT_GS_MODE is written by r600_emit_shader_stages */
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_state_common.c b/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
index bdd9337ec..2eebb5792 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
@@ -1691,6 +1691,24 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
}
+ /* SMX returns CONTEXT_DONE too early workaround */
+ if (rctx->b.family == CHIP_R600 ||
+ rctx->b.family == CHIP_RV610 ||
+ rctx->b.family == CHIP_RV630 ||
+ rctx->b.family == CHIP_RV635) {
+ /* if we have gs shader or streamout
+ we need to do a wait idle after every draw */
+ if (rctx->gs_shader || rctx->b.streamout.streamout_enabled) {
+ r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ }
+ }
+
+ /* ES ring rolling over at EOP - workaround */
+ if (rctx->b.chip_class == R600) {
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT);
+ }
+
if (rctx->screen->b.trace_bo) {
r600_trace_emit(rctx);
}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600d.h b/lib/mesa/src/gallium/drivers/r600/r600d.h
index bce8b4ea0..4b44546b6 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600d.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600d.h
@@ -130,6 +130,7 @@
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
#define EVENT_TYPE_VGT_FLUSH 0x24
+#define EVENT_TYPE_SQ_NON_EVENT 0x26
#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.am b/lib/mesa/src/gallium/drivers/radeon/Makefile.am
index 13d8976de..a6fc145cb 100644
--- a/lib/mesa/src/gallium/drivers/radeon/Makefile.am
+++ b/lib/mesa/src/gallium/drivers/radeon/Makefile.am
@@ -16,7 +16,8 @@ libradeon_la_SOURCES = \
if NEED_RADEON_LLVM
AM_CFLAGS += \
- $(LLVM_CFLAGS)
+ $(LLVM_CFLAGS) \
+ $(LIBELF_CFLAGS)
libradeon_la_SOURCES += \
$(LLVM_C_FILES)
@@ -24,7 +25,7 @@ libradeon_la_SOURCES += \
libradeon_la_LIBADD = \
$(CLOCK_LIB) \
$(LLVM_LIBS) \
- $(ELF_LIB)
+ $(LIBELF_LIBS)
libradeon_la_LDFLAGS = \
$(LLVM_LDFLAGS)
diff --git a/lib/mesa/src/gallium/drivers/radeon/cayman_msaa.c b/lib/mesa/src/gallium/drivers/radeon/cayman_msaa.c
index 12a5f6047..60cf2db5d 100644
--- a/lib/mesa/src/gallium/drivers/radeon/cayman_msaa.c
+++ b/lib/mesa/src/gallium/drivers/radeon/cayman_msaa.c
@@ -229,13 +229,17 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
- EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
+ EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
+ EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+ EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
} else if (overrast_samples > 1) {
r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+ r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+ EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
}
} else {
r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
@@ -245,6 +249,8 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+ r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+ EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
}
}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c
index 495fda0a8..62f7647d3 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -226,8 +226,8 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->family = rscreen->family;
rctx->chip_class = rscreen->chip_class;
- if (rscreen->family == CHIP_HAWAII)
- rctx->max_db = 16;
+ if (rscreen->chip_class >= CIK)
+ rctx->max_db = MAX2(8, rscreen->info.r600_num_backends);
else if (rscreen->chip_class >= EVERGREEN)
rctx->max_db = 8;
else
@@ -543,10 +543,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
case CHIP_TONGA: return "tonga";
case CHIP_ICELAND: return "iceland";
case CHIP_CARRIZO: return "carrizo";
- case CHIP_FIJI: return "fiji";
#if HAVE_LLVM <= 0x0307
+ case CHIP_FIJI: return "tonga";
case CHIP_STONEY: return "carrizo";
#else
+ case CHIP_FIJI: return "fiji";
case CHIP_STONEY: return "stoney";
#endif
default: return "";
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600d_common.h b/lib/mesa/src/gallium/drivers/radeon/r600d_common.h
index 115042d15..7512c7232 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600d_common.h
+++ b/lib/mesa/src/gallium/drivers/radeon/r600d_common.h
@@ -168,6 +168,8 @@
#define EG_R_028A4C_PA_SC_MODE_CNTL_1 0x028A4C
#define EG_S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 16)
+#define EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) & 0x1) << 25)
+#define EG_S_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) & 0x1) << 26)
#define CM_R_028804_DB_EQAA 0x00028804
#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0)
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
index 55c216aa5..3a5d9f444 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
@@ -951,6 +951,8 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
dec->msg->body.decode.db_pitch = dec->base.width;
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
+ if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY)
+ dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2;
switch (u_reduce_video_profile(picture->profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
index 452fbd608..756f69828 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
@@ -385,7 +385,10 @@ struct ruvd_msg {
uint32_t dt_chroma_top_offset;
uint32_t dt_chroma_bottom_offset;
uint32_t dt_surf_tile_config;
- uint32_t dt_reserved[3];
+ uint32_t dt_uv_surf_tile_config;
+ // re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney
+ uint32_t dt_wa_chroma_top_offset;
+ uint32_t dt_wa_chroma_bottom_offset;
uint32_t reserved[16];
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
index 7eab974a3..bb1f9de50 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
@@ -388,6 +388,11 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
struct radeon_surf *tmp_surf;
unsigned cpb_size;
+ if (rscreen->info.family == CHIP_STONEY) {
+ RVID_ERR("Stoney VCE is not supported!\n");
+ return NULL;
+ }
+
if (!rscreen->info.vce_fw_version) {
RVID_ERR("Kernel doesn't supports VCE!\n");
return NULL;
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_compute.c b/lib/mesa/src/gallium/drivers/radeonsi/si_compute.c
index d4fe56536..748bf5acb 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_compute.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_compute.c
@@ -33,14 +33,6 @@
#include "sid.h"
#define MAX_GLOBAL_BUFFERS 20
-#if HAVE_LLVM < 0x0305
-#define NUM_USER_SGPRS 2
-#else
-/* XXX: Even though we don't pass the scratch buffer via user sgprs any more
- * LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible
- * with older mesa. */
-#define NUM_USER_SGPRS 4
-#endif
struct si_compute {
struct si_context *ctx;
@@ -241,7 +233,6 @@ static void si_launch_grid(
uint64_t kernel_args_va;
uint64_t scratch_buffer_va = 0;
uint64_t shader_va;
- unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
struct si_shader *shader = &program->shader;
unsigned lds_blocks;
@@ -365,20 +356,7 @@ static void si_launch_grid(
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
- si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,
- /* We always use at least 3 VGPRS, these come from
- * TIDIG_COMP_CNT.
- * XXX: The compiler should account for this.
- */
- S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4)
- /* We always use at least 4 + arg_user_sgpr_count. The 4 extra
- * sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN
- * XXX: The compiler should account for this.
- */
- | S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count,
- shader->num_sgprs)) - 1) / 8)
- | S_00B028_FLOAT_MODE(shader->float_mode))
- ;
+ si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->rsrc1);
lds_blocks = shader->lds_size;
/* XXX: We are over allocating LDS. For SI, the shader reports LDS in
@@ -394,17 +372,10 @@ static void si_launch_grid(
assert(lds_blocks <= 0xFF);
- si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2,
- S_00B84C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0)
- | S_00B84C_USER_SGPR(arg_user_sgpr_count)
- | S_00B84C_TGID_X_EN(1)
- | S_00B84C_TGID_Y_EN(1)
- | S_00B84C_TGID_Z_EN(1)
- | S_00B84C_TG_SIZE_EN(1)
- | S_00B84C_TIDIG_COMP_CNT(2)
- | S_00B84C_LDS_SIZE(lds_blocks)
- | S_00B84C_EXCP_EN(0))
- ;
+ shader->rsrc2 &= C_00B84C_LDS_SIZE;
+ shader->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
+
+ si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->rsrc2);
si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c
index ef986bd2e..2f9e00975 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader.c
@@ -637,6 +637,14 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
lp_build_const_int32(gallivm, swizzle));
value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+ if (type == TGSI_TYPE_DOUBLE) {
+ LLVMValueRef value2;
+ dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+ lp_build_const_int32(gallivm, swizzle + 1));
+ value2 = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+ return radeon_llvm_emit_fetch_double(bld_base, value, value2);
+ }
+
return LLVMBuildBitCast(gallivm->builder, value,
tgsi2llvmtype(bld_base, type), "");
}
@@ -3752,12 +3760,14 @@ void si_shader_binary_read_config(const struct si_screen *sscreen,
shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
shader->float_mode = G_00B028_FLOAT_MODE(value);
+ shader->rsrc1 = value;
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
break;
case R_00B84C_COMPUTE_PGM_RSRC2:
shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
+ shader->rsrc2 = value;
break;
case R_0286CC_SPI_PS_INPUT_ENA:
shader->spi_ps_input_ena = value;
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader.h b/lib/mesa/src/gallium/drivers/radeonsi/si_shader.h
index 511ee7333..5f4944a96 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader.h
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader.h
@@ -268,8 +268,8 @@ struct si_shader {
bool is_gs_copy_shader;
bool dx10_clamp_mode; /* convert NaNs to 0 */
- unsigned ls_rsrc1;
- unsigned ls_rsrc2;
+ unsigned rsrc1;
+ unsigned rsrc2;
};
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state.c
index 81e138233..af96d8680 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_state.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state.c
@@ -2979,6 +2979,28 @@ static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
}
+static void si_init_border_color_buffer(struct si_context *sctx)
+{
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+ if (!pm4)
+ return;
+
+ assert(sctx->scratch_buffer == NULL);
+ r600_resource_reference(&sctx->scratch_buffer, NULL);
+ sctx->scratch_buffer = si_resource_create_custom(&sctx->screen->b.b,
+ PIPE_USAGE_DEFAULT,
+ 4096 * 16);
+
+ uint64_t va = sctx->scratch_buffer->gpu_address;
+
+ si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va >> 8);
+ if (sctx->b.chip_class >= CIK)
+ si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va >> 40);
+ si_pm4_add_bo(pm4, sctx->scratch_buffer, RADEON_USAGE_READ,
+ RADEON_PRIO_SHADER_DATA);
+ si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
+}
+
static void si_init_config(struct si_context *sctx);
void si_init_state_functions(struct si_context *sctx)
@@ -3045,6 +3067,7 @@ void si_init_state_functions(struct si_context *sctx)
}
si_init_config(sctx);
+ si_init_border_color_buffer(sctx);
}
static void
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c
index e0394cf00..a15981de3 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -163,7 +163,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
lds_size = output_patch0_offset + output_patch_size * *num_patches;
- ls_rsrc2 = ls->current->ls_rsrc2;
+ ls_rsrc2 = ls->current->rsrc2;
if (sctx->b.chip_class >= CIK) {
assert(lds_size <= 65536);
@@ -178,7 +178,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
- radeon_emit(cs, ls->current->ls_rsrc1);
+ radeon_emit(cs, ls->current->rsrc1);
radeon_emit(cs, ls_rsrc2);
/* Compute userdata SGPRs. */
@@ -216,6 +216,18 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
}
+static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
+{
+ switch (info->mode) {
+ case PIPE_PRIM_PATCHES:
+ return info->count / info->vertices_per_patch;
+ case R600_PRIM_RECTANGLE_LIST:
+ return info->count / 3;
+ default:
+ return u_prims_for_vertices(info->mode, info->count);
+ }
+}
+
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
const struct pipe_draw_info *info,
unsigned num_patches)
@@ -305,7 +317,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
(info->indirect ||
(info->instance_count > 1 &&
- u_prims_for_vertices(info->mode, info->count) <= 1)))
+ si_num_prims_for_vertices(info) <= 1)))
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
/* Instancing bug on 2 SE chips. */
@@ -849,7 +861,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
/* Workaround for a VGT hang when streamout is enabled.
* It must be done after drawing. */
- if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
+ if ((sctx->b.family == CHIP_HAWAII ||
+ sctx->b.family == CHIP_TONGA ||
+ sctx->b.family == CHIP_FIJI) &&
(sctx->b.streamout.streamout_enabled ||
sctx->b.streamout.prims_gen_query_enabled)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.c
index f9b38ed53..1ba9c8595 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -119,10 +119,10 @@ static void si_shader_ls(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
- shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
+ shader->rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
S_00B528_SGPRS((num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
- shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+ shader->rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
}
diff --git a/lib/mesa/src/gallium/targets/opencl/Makefile.am b/lib/mesa/src/gallium/targets/opencl/Makefile.am
index c78b26832..93b4d3a83 100644
--- a/lib/mesa/src/gallium/targets/opencl/Makefile.am
+++ b/lib/mesa/src/gallium/targets/opencl/Makefile.am
@@ -2,6 +2,9 @@ include $(top_srcdir)/src/gallium/Automake.inc
lib_LTLIBRARIES = lib@OPENCL_LIBNAME@.la
+AM_CPPFLAGS = \
+ $(LIBELF_CFLAGS)
+
lib@OPENCL_LIBNAME@_la_LDFLAGS = \
$(LLVM_LDFLAGS) \
-no-undefined \
@@ -20,8 +23,8 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(ELF_LIB) \
- -ldl \
+ $(LIBELF_LIBS) \
+ $(DLOPEN_LIBS) \
-lclangCodeGen \
-lclangFrontendTool \
-lclangFrontend \