summaryrefslogtreecommitdiff
path: root/lib/mesa/src/broadcom/qpu/qpu_pack.c
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2024-04-02 09:30:07 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2024-04-02 09:30:07 +0000
commitf54e142455cb3c9d1662dae7e096a32a47e5409b (patch)
tree440ecd46269f0eac25e349e1ed58f246490c5e26 /lib/mesa/src/broadcom/qpu/qpu_pack.c
parent36d8503c27530f68d655d3ef77a6eaa4dfd8ad65 (diff)
Import Mesa 23.3.6
Diffstat (limited to 'lib/mesa/src/broadcom/qpu/qpu_pack.c')
-rw-r--r--lib/mesa/src/broadcom/qpu/qpu_pack.c1426
1 files changed, 1228 insertions, 198 deletions
diff --git a/lib/mesa/src/broadcom/qpu/qpu_pack.c b/lib/mesa/src/broadcom/qpu/qpu_pack.c
index a875683c6..08fce4af1 100644
--- a/lib/mesa/src/broadcom/qpu/qpu_pack.c
+++ b/lib/mesa/src/broadcom/qpu/qpu_pack.c
@@ -84,6 +84,9 @@
#define V3D_QPU_MUL_A_SHIFT 18
#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
+#define V3D_QPU_RADDR_C_SHIFT 18
+#define V3D_QPU_RADDR_C_MASK QPU_MASK(23, 18)
+
#define V3D_QPU_ADD_B_SHIFT 15
#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
@@ -98,6 +101,9 @@
#define V3D_QPU_BRANCH_BDI_SHIFT 12
#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
+#define V3D_QPU_RADDR_D_SHIFT 12
+#define V3D_QPU_RADDR_D_MASK QPU_MASK(17, 12)
+
#define V3D_QPU_RADDR_A_SHIFT 6
#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
@@ -112,12 +118,15 @@
#define LDTMU .ldtmu = true
#define LDVARY .ldvary = true
#define LDVPM .ldvpm = true
-#define SMIMM .small_imm = true
#define LDTLB .ldtlb = true
#define LDTLBU .ldtlbu = true
#define UCB .ucb = true
#define ROT .rotate = true
#define WRTMUC .wrtmuc = true
+#define SMIMM_A .small_imm_a = true
+#define SMIMM_B .small_imm_b = true
+#define SMIMM_C .small_imm_c = true
+#define SMIMM_D .small_imm_d = true
static const struct v3d_qpu_sig v33_sig_map[] = {
/* MISC R3 R4 R5 */
@@ -135,8 +144,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
[11] = { THRSW, LDVARY, LDUNIF },
[12] = { LDVARY, LDTMU, },
[13] = { THRSW, LDVARY, LDTMU, },
- [14] = { SMIMM, LDVARY, },
- [15] = { SMIMM, },
+ [14] = { SMIMM_B, LDVARY, },
+ [15] = { SMIMM_B, },
[16] = { LDTLB, },
[17] = { LDTLBU, },
/* 18-21 reserved */
@@ -148,8 +157,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
[27] = { THRSW, LDVPM, LDUNIF },
[28] = { LDVPM, LDTMU, },
[29] = { THRSW, LDVPM, LDTMU, },
- [30] = { SMIMM, LDVPM, },
- [31] = { SMIMM, },
+ [30] = { SMIMM_B, LDVPM, },
+ [31] = { SMIMM_B, },
};
static const struct v3d_qpu_sig v40_sig_map[] = {
@@ -167,8 +176,8 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
[10] = { LDVARY, LDUNIF },
[11] = { THRSW, LDVARY, LDUNIF },
/* 12-13 reserved */
- [14] = { SMIMM, LDVARY, },
- [15] = { SMIMM, },
+ [14] = { SMIMM_B, LDVARY, },
+ [15] = { SMIMM_B, },
[16] = { LDTLB, },
[17] = { LDTLBU, },
[18] = { WRTMUC },
@@ -178,7 +187,7 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
[22] = { UCB, },
[23] = { ROT, },
/* 24-30 reserved */
- [31] = { SMIMM, LDTMU, },
+ [31] = { SMIMM_B, LDTMU, },
};
static const struct v3d_qpu_sig v41_sig_map[] = {
@@ -197,8 +206,8 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
[11] = { THRSW, LDVARY, LDUNIF },
[12] = { LDUNIFRF },
[13] = { THRSW, LDUNIFRF },
- [14] = { SMIMM, LDVARY, },
- [15] = { SMIMM, },
+ [14] = { SMIMM_B, LDVARY },
+ [15] = { SMIMM_B, },
[16] = { LDTLB, },
[17] = { LDTLBU, },
[18] = { WRTMUC },
@@ -210,7 +219,41 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
[24] = { LDUNIFA},
[25] = { LDUNIFARF },
/* 26-30 reserved */
- [31] = { SMIMM, LDTMU, },
+ [31] = { SMIMM_B, LDTMU, },
+};
+
+
+static const struct v3d_qpu_sig v71_sig_map[] = {
+ /* MISC phys RF0 */
+ [0] = { },
+ [1] = { THRSW, },
+ [2] = { LDUNIF },
+ [3] = { THRSW, LDUNIF },
+ [4] = { LDTMU, },
+ [5] = { THRSW, LDTMU, },
+ [6] = { LDTMU, LDUNIF },
+ [7] = { THRSW, LDTMU, LDUNIF },
+ [8] = { LDVARY, },
+ [9] = { THRSW, LDVARY, },
+ [10] = { LDVARY, LDUNIF },
+ [11] = { THRSW, LDVARY, LDUNIF },
+ [12] = { LDUNIFRF },
+ [13] = { THRSW, LDUNIFRF },
+ [14] = { SMIMM_A, },
+ [15] = { SMIMM_B, },
+ [16] = { LDTLB, },
+ [17] = { LDTLBU, },
+ [18] = { WRTMUC },
+ [19] = { THRSW, WRTMUC },
+ [20] = { LDVARY, WRTMUC },
+ [21] = { THRSW, LDVARY, WRTMUC },
+ [22] = { UCB, },
+ /* 23 reserved */
+ [24] = { LDUNIFA},
+ [25] = { LDUNIFARF },
+ /* 26-29 reserved */
+ [30] = { SMIMM_C, },
+ [31] = { SMIMM_D, },
};
bool
@@ -221,7 +264,9 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
if (packed_sig >= ARRAY_SIZE(v33_sig_map))
return false;
- if (devinfo->ver >= 41)
+ if (devinfo->ver >= 71)
+ *sig = v71_sig_map[packed_sig];
+ else if (devinfo->ver >= 41)
*sig = v41_sig_map[packed_sig];
else if (devinfo->ver == 40)
*sig = v40_sig_map[packed_sig];
@@ -240,7 +285,9 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
{
static const struct v3d_qpu_sig *map;
- if (devinfo->ver >= 41)
+ if (devinfo->ver >= 71)
+ map = v71_sig_map;
+ else if (devinfo->ver >= 41)
map = v41_sig_map;
else if (devinfo->ver == 40)
map = v40_sig_map;
@@ -443,16 +490,29 @@ v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
/* Make a mapping of the table of opcodes in the spec. The opcode is
* determined by a combination of the opcode field, and in the case of 0 or
- * 1-arg opcodes, the mux_b field as well.
+ * 1-arg opcodes, the mux (version <= 42) or raddr (version >= 71) field as
+ * well.
*/
-#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
-#define ANYMUX MUX_MASK(0, 7)
+#define OP_MASK(val) BITFIELD64_BIT(val)
+#define OP_RANGE(bot, top) BITFIELD64_RANGE(bot, top - bot + 1)
+#define ANYMUX OP_RANGE(0, 7)
+/* FIXME: right now ussing BITFIELD64_RANGE to set the last bit raises a
+ * warning when building with clang using the shift-count-overflow option
+ */
+#define ANYOPMASK ~0ull
struct opcode_desc {
uint8_t opcode_first;
uint8_t opcode_last;
- uint8_t mux_b_mask;
- uint8_t mux_a_mask;
+
+ union {
+ struct {
+ uint8_t b_mask;
+ uint8_t a_mask;
+ } mux;
+ uint64_t raddr_mask;
+ };
+
uint8_t op;
/* first_ver == 0 if it's the same across all V3D versions.
@@ -465,122 +525,321 @@ struct opcode_desc {
uint8_t last_ver;
};
-static const struct opcode_desc add_ops[] = {
+static const struct opcode_desc add_ops_v33[] = {
/* FADD is FADDNF depending on the order of the mux_a/mux_b. */
- { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
- { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
- { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
- { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
- { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
- { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
- { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
- { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
- { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
- { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
- { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
- { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
- { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
- { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
- { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
- { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
+ { 0, 47, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADD },
+ { 0, 47, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADDNF },
+ { 53, 55, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
+ { 56, 56, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ADD },
+ { 57, 59, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
+ { 60, 60, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SUB },
+ { 61, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
+ { 64, 111, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FSUB },
+ { 120, 120, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MIN },
+ { 121, 121, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MAX },
+ { 122, 122, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMIN },
+ { 123, 123, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMAX },
+ { 124, 124, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHL },
+ { 125, 125, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHR },
+ { 126, 126, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ASR },
+ { 127, 127, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ROR },
/* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
- { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
- { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
- { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
-
- { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
- { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
- { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
-
- { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
- { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
- { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
- { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
- { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
- { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
- { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
- { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
- { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
- { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
- { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
- { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
- { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
- { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
- { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
- { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
- { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
- { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
-
- { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
- { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
- { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
- { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
-
- { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
- { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
- { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
- { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
- { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
- { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
- { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
- { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
- { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
- { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
- { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
-
- { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
- { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
- { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
- { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
- { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
- { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
- { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
- { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
- { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
- { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
- { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
- { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
+ { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMIN },
+ { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMAX },
+ { 176, 180, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMIN },
+
+ { 181, 181, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_AND },
+ { 182, 182, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_OR },
+ { 183, 183, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_XOR },
+
+ { 184, 184, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VADD },
+ { 185, 185, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VSUB },
+ { 186, 186, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_NOT },
+ { 186, 186, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_NEG },
+ { 186, 186, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_FLAPUSH },
+ { 186, 186, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FLBPUSH },
+ { 186, 186, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_FLPOP },
+ { 186, 186, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_RECIP },
+ { 186, 186, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SETMSF },
+ { 186, 186, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_SETREVF },
+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(1), V3D_QPU_A_TIDX },
+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(2), V3D_QPU_A_EIDX },
+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(3), V3D_QPU_A_LR },
+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(4), V3D_QPU_A_VFLA },
+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VFLB },
+ { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
+
+ { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(0, 2), V3D_QPU_A_FXCD },
+ { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(3), V3D_QPU_A_XCD },
+ { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(4, 6), V3D_QPU_A_FYCD },
+ { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(7), V3D_QPU_A_YCD },
+
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(0), V3D_QPU_A_MSF },
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(1), V3D_QPU_A_REVF },
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_VDWWT, 33 },
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_IID, 40 },
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(3), V3D_QPU_A_SAMPID, 40 },
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(4), V3D_QPU_A_BARRIERID, 40 },
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(5), V3D_QPU_A_TMUWT },
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VPMWT },
+ { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(7), V3D_QPU_A_FLAFIRST, 41 },
+ { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = OP_MASK(0), V3D_QPU_A_FLNAFIRST, 41 },
+ { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
+
+ { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
+ { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
+ { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
+ { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
+ { 188, 188, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMP, 40 },
+ { 188, 188, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT, 41 },
+ { 188, 188, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_EXP, 41 },
+ { 188, 188, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_LOG, 41 },
+ { 188, 188, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SIN, 41 },
+ { 188, 188, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT2, 41 },
+ { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
+ { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
/* FIXME: MORE COMPLICATED */
- /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
+ /* { 190, 191, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
- { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
- { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
+ { 192, 239, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FCMP },
+ { 240, 244, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMAX },
- { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
- { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
- { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
- { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
- { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
- { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
- { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
- { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
+ { 245, 245, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FROUND },
+ { 245, 245, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIN },
+ { 245, 245, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FTRUNC },
+ { 245, 245, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIZ },
+ { 246, 246, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FFLOOR },
+ { 246, 246, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOUZ },
+ { 246, 246, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FCEIL },
+ { 246, 246, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOC },
- { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
- { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
+ { 247, 247, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FDX },
+ { 247, 247, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FDY },
/* The stvpms are distinguished by the waddr field. */
- { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
- { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
- { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
+ { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMV },
+ { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMD },
+ { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMP },
+
+ { 252, 252, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_ITOF },
+ { 252, 252, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_CLZ },
+ { 252, 252, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_UTOF },
+};
+
+static const struct opcode_desc mul_ops_v33[] = {
+ { 1, 1, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_ADD },
+ { 2, 2, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SUB },
+ { 3, 3, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_UMUL24 },
+ { 4, 8, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_VFMUL },
+ { 9, 9, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SMUL24 },
+ { 10, 10, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_MULTOP },
+ { 14, 14, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMOV, 33, 42 },
+ { 15, 15, .mux.b_mask = OP_RANGE(0, 3), ANYMUX, V3D_QPU_M_FMOV, 33, 42},
+ { 15, 15, .mux.b_mask = OP_MASK(4), .mux.a_mask = OP_MASK(0), V3D_QPU_M_NOP, 33, 42 },
+ { 15, 15, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_M_MOV, 33, 42 },
+
+ { 16, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMUL },
+};
- { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
- { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
- { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
+/* Note that it would have been possible to define all the add/mul opcodes in
+ * just one table, using the first_ver/last_ver. But taking into account that
+ * for v71 there were a lot of changes, it was more tidy this way. Also right
+ * now we are doing a linear search on those tables, so this maintains the
+ * tables smaller.
+ *
+ * Just in case we merge the tables, we define the first_ver as 71 for those
+ * opcodes that changed on v71
+ */
+static const struct opcode_desc add_ops_v71[] = {
+ /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
+ { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
+ { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
+ { 53, 55, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
+ { 56, 56, .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
+ { 57, 59, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
+ { 60, 60, .raddr_mask = ANYOPMASK, V3D_QPU_A_SUB },
+ { 61, 63, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
+ { 64, 111, .raddr_mask = ANYOPMASK, V3D_QPU_A_FSUB },
+ { 120, 120, .raddr_mask = ANYOPMASK, V3D_QPU_A_MIN },
+ { 121, 121, .raddr_mask = ANYOPMASK, V3D_QPU_A_MAX },
+ { 122, 122, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMIN },
+ { 123, 123, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMAX },
+ { 124, 124, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHL },
+ { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
+ { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
+ { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
+ /* FMIN is instead FMAX depending on the raddr_a/b order. */
+ { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
+ { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
+ { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
+
+ { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
+ { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
+ { 183, 183, .raddr_mask = ANYOPMASK, V3D_QPU_A_XOR },
+ { 184, 184, .raddr_mask = ANYOPMASK, V3D_QPU_A_VADD },
+ { 185, 185, .raddr_mask = ANYOPMASK, V3D_QPU_A_VSUB },
+
+ { 186, 186, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOT },
+ { 186, 186, .raddr_mask = OP_MASK(1), V3D_QPU_A_NEG },
+ { 186, 186, .raddr_mask = OP_MASK(2), V3D_QPU_A_FLAPUSH },
+ { 186, 186, .raddr_mask = OP_MASK(3), V3D_QPU_A_FLBPUSH },
+ { 186, 186, .raddr_mask = OP_MASK(4), V3D_QPU_A_FLPOP },
+ { 186, 186, .raddr_mask = OP_MASK(5), V3D_QPU_A_CLZ },
+ { 186, 186, .raddr_mask = OP_MASK(6), V3D_QPU_A_SETMSF },
+ { 186, 186, .raddr_mask = OP_MASK(7), V3D_QPU_A_SETREVF },
+
+ { 187, 187, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
+ { 187, 187, .raddr_mask = OP_MASK(1), V3D_QPU_A_TIDX },
+ { 187, 187, .raddr_mask = OP_MASK(2), V3D_QPU_A_EIDX },
+ { 187, 187, .raddr_mask = OP_MASK(3), V3D_QPU_A_LR },
+ { 187, 187, .raddr_mask = OP_MASK(4), V3D_QPU_A_VFLA },
+ { 187, 187, .raddr_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
+ { 187, 187, .raddr_mask = OP_MASK(6), V3D_QPU_A_VFLB },
+ { 187, 187, .raddr_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
+ { 187, 187, .raddr_mask = OP_MASK(8), V3D_QPU_A_XCD },
+ { 187, 187, .raddr_mask = OP_MASK(9), V3D_QPU_A_YCD },
+ { 187, 187, .raddr_mask = OP_MASK(10), V3D_QPU_A_MSF },
+ { 187, 187, .raddr_mask = OP_MASK(11), V3D_QPU_A_REVF },
+ { 187, 187, .raddr_mask = OP_MASK(12), V3D_QPU_A_IID },
+ { 187, 187, .raddr_mask = OP_MASK(13), V3D_QPU_A_SAMPID },
+ { 187, 187, .raddr_mask = OP_MASK(14), V3D_QPU_A_BARRIERID },
+ { 187, 187, .raddr_mask = OP_MASK(15), V3D_QPU_A_TMUWT },
+ { 187, 187, .raddr_mask = OP_MASK(16), V3D_QPU_A_VPMWT },
+ { 187, 187, .raddr_mask = OP_MASK(17), V3D_QPU_A_FLAFIRST },
+ { 187, 187, .raddr_mask = OP_MASK(18), V3D_QPU_A_FLNAFIRST },
+
+ { 187, 187, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FXCD },
+ { 187, 187, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FYCD },
+
+ { 188, 188, .raddr_mask = OP_MASK(0), V3D_QPU_A_LDVPMV_IN, 71 },
+ { 188, 188, .raddr_mask = OP_MASK(1), V3D_QPU_A_LDVPMD_IN, 71 },
+ { 188, 188, .raddr_mask = OP_MASK(2), V3D_QPU_A_LDVPMP, 71 },
+
+ { 188, 188, .raddr_mask = OP_MASK(32), V3D_QPU_A_RECIP, 71 },
+ { 188, 188, .raddr_mask = OP_MASK(33), V3D_QPU_A_RSQRT, 71 },
+ { 188, 188, .raddr_mask = OP_MASK(34), V3D_QPU_A_EXP, 71 },
+ { 188, 188, .raddr_mask = OP_MASK(35), V3D_QPU_A_LOG, 71 },
+ { 188, 188, .raddr_mask = OP_MASK(36), V3D_QPU_A_SIN, 71 },
+ { 188, 188, .raddr_mask = OP_MASK(37), V3D_QPU_A_RSQRT2, 71 },
+
+ { 189, 189, .raddr_mask = ANYOPMASK, V3D_QPU_A_LDVPMG_IN, 71 },
+
+ /* The stvpms are distinguished by the waddr field. */
+ { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMV, 71},
+ { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMD, 71},
+ { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMP, 71},
+
+ { 192, 207, .raddr_mask = ANYOPMASK, V3D_QPU_A_FCMP, 71 },
+
+ { 245, 245, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FROUND, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FROUND, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FROUND, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FROUND, 71 },
+
+ { 245, 245, .raddr_mask = OP_MASK(3), V3D_QPU_A_FTOIN, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(7), V3D_QPU_A_FTOIN, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(11), V3D_QPU_A_FTOIN, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(15), V3D_QPU_A_FTOIN, 71 },
+
+ { 245, 245, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FTRUNC, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FTRUNC, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FTRUNC, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FTRUNC, 71 },
+
+ { 245, 245, .raddr_mask = OP_MASK(19), V3D_QPU_A_FTOIZ, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(23), V3D_QPU_A_FTOIZ, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(27), V3D_QPU_A_FTOIZ, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(31), V3D_QPU_A_FTOIZ, 71 },
+
+ { 245, 245, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FFLOOR, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FFLOOR, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(40, 42), V3D_QPU_A_FFLOOR, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(44, 46), V3D_QPU_A_FFLOOR, 71 },
+
+ { 245, 245, .raddr_mask = OP_MASK(35), V3D_QPU_A_FTOUZ, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(39), V3D_QPU_A_FTOUZ, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(43), V3D_QPU_A_FTOUZ, 71 },
+ { 245, 245, .raddr_mask = OP_MASK(47), V3D_QPU_A_FTOUZ, 71 },
+
+ { 245, 245, .raddr_mask = OP_RANGE(48, 50), V3D_QPU_A_FCEIL, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(52, 54), V3D_QPU_A_FCEIL, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(56, 58), V3D_QPU_A_FCEIL, 71 },
+ { 245, 245, .raddr_mask = OP_RANGE(60, 62), V3D_QPU_A_FCEIL, 71 },
+
+ { 245, 245, .raddr_mask = OP_MASK(51), V3D_QPU_A_FTOC },
+ { 245, 245, .raddr_mask = OP_MASK(55), V3D_QPU_A_FTOC },
+ { 245, 245, .raddr_mask = OP_MASK(59), V3D_QPU_A_FTOC },
+ { 245, 245, .raddr_mask = OP_MASK(63), V3D_QPU_A_FTOC },
+
+ { 246, 246, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FDX, 71 },
+ { 246, 246, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FDX, 71 },
+ { 246, 246, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FDX, 71 },
+ { 246, 246, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FDX, 71 },
+ { 246, 246, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FDY, 71 },
+ { 246, 246, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FDY, 71 },
+ { 246, 246, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FDY, 71 },
+ { 246, 246, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FDY, 71 },
+
+ { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
+ { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
+
+ { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 },
+ { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 },
+
+ { 249, 249, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FMOV, 71 },
+ { 249, 249, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FMOV, 71 },
+ { 249, 249, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FMOV, 71 },
+ { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 },
+ { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 },
+ { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 },
+ { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 },
+
+ { 249, 249, .raddr_mask = OP_MASK(3), V3D_QPU_A_MOV, 71 },
+ { 249, 249, .raddr_mask = OP_MASK(7), V3D_QPU_A_MOV, 71 },
+ { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 },
+ { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
+ { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
+
+ { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 },
+ { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 },
};
-static const struct opcode_desc mul_ops[] = {
- { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
- { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
- { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
- { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
- { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
- { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
- { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
- { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
- { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
- { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
- { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
+static const struct opcode_desc mul_ops_v71[] = {
+ /* For V3D 7.1, second mask field would be ignored */
+ { 1, 1, .raddr_mask = ANYOPMASK, V3D_QPU_M_ADD, 71 },
+ { 2, 2, .raddr_mask = ANYOPMASK, V3D_QPU_M_SUB, 71 },
+ { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
+ { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
+ { 4, 8, .raddr_mask = ANYOPMASK, V3D_QPU_M_VFMUL, 71 },
+ { 9, 9, .raddr_mask = ANYOPMASK, V3D_QPU_M_SMUL24, 71 },
+ { 10, 10, .raddr_mask = ANYOPMASK, V3D_QPU_M_MULTOP, 71 },
+
+ { 14, 14, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_M_FMOV, 71 },
+ { 14, 14, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_M_FMOV, 71 },
+ { 14, 14, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_M_FMOV, 71 },
+ { 14, 14, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_M_FMOV, 71 },
+ { 14, 14, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_M_FMOV, 71 },
+ { 14, 14, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_M_FMOV, 71 },
+
+ { 14, 14, .raddr_mask = OP_MASK(3), V3D_QPU_M_MOV, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(7), V3D_QPU_M_MOV, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(11), V3D_QPU_M_MOV, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 },
+
+ { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 },
+ { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 },
+
+ { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 },
+
+ { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL },
};
/* Returns true if op_desc should be filtered out based on devinfo->ver
@@ -589,17 +848,23 @@ static const struct opcode_desc mul_ops[] = {
*/
static bool
opcode_invalid_in_version(const struct v3d_device_info *devinfo,
- const struct opcode_desc *op_desc)
+ const uint8_t first_ver,
+ const uint8_t last_ver)
{
- return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) ||
- (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver);
+ return (first_ver != 0 && devinfo->ver < first_ver) ||
+ (last_ver != 0 && devinfo->ver > last_ver);
}
+/* Note that we pass as parameters mux_a, mux_b and raddr, even if depending
+ * on the devinfo->ver some would be ignored. We do this way just to avoid
+ * having two really similar lookup_opcode methods
+ */
static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
const struct opcode_desc *opcodes,
size_t num_opcodes, uint32_t opcode,
- uint32_t mux_a, uint32_t mux_b)
+ uint32_t mux_a, uint32_t mux_b,
+ uint32_t raddr)
{
for (int i = 0; i < num_opcodes; i++) {
const struct opcode_desc *op_desc = &opcodes[i];
@@ -608,14 +873,19 @@ lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
opcode > op_desc->opcode_last)
continue;
- if (opcode_invalid_in_version(devinfo, op_desc))
+ if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
continue;
- if (!(op_desc->mux_b_mask & (1 << mux_b)))
- continue;
+ if (devinfo->ver < 71) {
+ if (!(op_desc->mux.b_mask & (1 << mux_b)))
+ continue;
- if (!(op_desc->mux_a_mask & (1 << mux_a)))
- continue;
+ if (!(op_desc->mux.a_mask & (1 << mux_a)))
+ continue;
+ } else {
+ if (!(op_desc->raddr_mask & ((uint64_t) 1 << raddr)))
+ continue;
+ }
return op_desc;
}
@@ -668,6 +938,56 @@ v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
}
static bool
+v3d_qpu_int32_unpack_unpack(uint32_t packed,
+ enum v3d_qpu_input_unpack *unpacked)
+{
+ switch (packed) {
+ case 0:
+ *unpacked = V3D_QPU_UNPACK_NONE;
+ return true;
+ case 1:
+ *unpacked = V3D_QPU_UNPACK_UL;
+ return true;
+ case 2:
+ *unpacked = V3D_QPU_UNPACK_UH;
+ return true;
+ case 3:
+ *unpacked = V3D_QPU_UNPACK_IL;
+ return true;
+ case 4:
+ *unpacked = V3D_QPU_UNPACK_IH;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
+ uint32_t *packed)
+{
+ switch (unpacked) {
+ case V3D_QPU_UNPACK_NONE:
+ *packed = 0;
+ return true;
+ case V3D_QPU_UNPACK_UL:
+ *packed = 1;
+ return true;
+ case V3D_QPU_UNPACK_UH:
+ *packed = 2;
+ return true;
+ case V3D_QPU_UNPACK_IL:
+ *packed = 3;
+ return true;
+ case V3D_QPU_UNPACK_IH:
+ *packed = 4;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,
enum v3d_qpu_input_unpack *unpacked)
{
@@ -737,8 +1057,8 @@ v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,
}
static bool
-v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
- struct v3d_qpu_instr *instr)
+v3d33_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
+ struct v3d_qpu_instr *instr)
{
uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
@@ -755,8 +1075,9 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
map_op = (map_op - 253 + 245);
const struct opcode_desc *desc =
- lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops),
- map_op, mux_a, mux_b);
+ lookup_opcode_from_packed(devinfo, add_ops_v33,
+ ARRAY_SIZE(add_ops_v33),
+ map_op, mux_a, mux_b, 0);
if (!desc)
return false;
@@ -812,12 +1133,12 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
- &instr->alu.add.a_unpack)) {
+ &instr->alu.add.a.unpack)) {
return false;
}
if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
- &instr->alu.add.b_unpack)) {
+ &instr->alu.add.b.unpack)) {
return false;
}
break;
@@ -831,7 +1152,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
instr->alu.add.output_pack = mux_b & 0x3;
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
- &instr->alu.add.a_unpack)) {
+ &instr->alu.add.a.unpack)) {
return false;
}
break;
@@ -843,7 +1164,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
- &instr->alu.add.a_unpack)) {
+ &instr->alu.add.a.unpack)) {
return false;
}
break;
@@ -851,23 +1172,23 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
case V3D_QPU_A_VFMIN:
case V3D_QPU_A_VFMAX:
if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
- &instr->alu.add.a_unpack)) {
+ &instr->alu.add.a.unpack)) {
return false;
}
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
- instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
break;
default:
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
- instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
- instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
break;
}
- instr->alu.add.a = mux_a;
- instr->alu.add.b = mux_b;
+ instr->alu.add.a.mux = mux_a;
+ instr->alu.add.b.mux = mux_b;
instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
instr->alu.add.magic_write = false;
@@ -892,18 +1213,205 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
}
static bool
-v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
+v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
+ struct v3d_qpu_instr *instr)
+{
+ uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
+ uint32_t raddr_a = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_A);
+ uint32_t raddr_b = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_B);
+ uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
+ uint32_t map_op = op;
+
+ const struct opcode_desc *desc =
+ lookup_opcode_from_packed(devinfo,
+ add_ops_v71,
+ ARRAY_SIZE(add_ops_v71),
+ map_op, 0, 0,
+ raddr_b);
+ if (!desc)
+ return false;
+
+ instr->alu.add.op = desc->op;
+
+ /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
+ * operands.
+ */
+ if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
+ instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
+ if (instr->alu.add.op == V3D_QPU_A_FMIN)
+ instr->alu.add.op = V3D_QPU_A_FMAX;
+ if (instr->alu.add.op == V3D_QPU_A_FADD)
+ instr->alu.add.op = V3D_QPU_A_FADDNF;
+ }
+
+ /* Some QPU ops require a bit more than just basic opcode and mux a/b
+ * comparisons to distinguish them.
+ */
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_STVPMV:
+ case V3D_QPU_A_STVPMD:
+ case V3D_QPU_A_STVPMP:
+ switch (waddr) {
+ case 0:
+ instr->alu.add.op = V3D_QPU_A_STVPMV;
+ break;
+ case 1:
+ instr->alu.add.op = V3D_QPU_A_STVPMD;
+ break;
+ case 2:
+ instr->alu.add.op = V3D_QPU_A_STVPMP;
+ break;
+ default:
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_FADD:
+ case V3D_QPU_A_FADDNF:
+ case V3D_QPU_A_FSUB:
+ case V3D_QPU_A_FMIN:
+ case V3D_QPU_A_FMAX:
+ case V3D_QPU_A_FCMP:
+ case V3D_QPU_A_VFPACK:
+ if (instr->alu.add.op != V3D_QPU_A_VFPACK &&
+ instr->alu.add.op != V3D_QPU_A_FCMP) {
+ instr->alu.add.output_pack = (op >> 4) & 0x3;
+ } else {
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
+ }
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
+ &instr->alu.add.a.unpack)) {
+ return false;
+ }
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
+ &instr->alu.add.b.unpack)) {
+ return false;
+ }
+ break;
+
+ case V3D_QPU_A_FFLOOR:
+ case V3D_QPU_A_FROUND:
+ case V3D_QPU_A_FTRUNC:
+ case V3D_QPU_A_FCEIL:
+ case V3D_QPU_A_FDX:
+ case V3D_QPU_A_FDY:
+ instr->alu.add.output_pack = raddr_b & 0x3;
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
+ &instr->alu.add.a.unpack)) {
+ return false;
+ }
+ break;
+
+ case V3D_QPU_A_FTOIN:
+ case V3D_QPU_A_FTOIZ:
+ case V3D_QPU_A_FTOUZ:
+ case V3D_QPU_A_FTOC:
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
+
+ if (!v3d_qpu_float32_unpack_unpack((raddr_b >> 2) & 0x3,
+ &instr->alu.add.a.unpack)) {
+ return false;
+ }
+ break;
+
+ case V3D_QPU_A_VFMIN:
+ case V3D_QPU_A_VFMAX:
+ unreachable("pending v71 update");
+ if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
+ &instr->alu.add.a.unpack)) {
+ return false;
+ }
+
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
+ break;
+
+ case V3D_QPU_A_MOV:
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
+
+ if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7,
+ &instr->alu.add.a.unpack)) {
+ return false;
+ }
+ break;
+
+ case V3D_QPU_A_FMOV:
+ instr->alu.add.output_pack = raddr_b & 0x3;
+
+ /* Mul alu FMOV has one additional variant */
+ int32_t unpack = (raddr_b >> 2) & 0x7;
+ if (unpack == 7)
+ return false;
+
+ if (!v3d_qpu_float32_unpack_unpack(unpack,
+ &instr->alu.add.a.unpack)) {
+ return false;
+ }
+ break;
+
+ default:
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
+ instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
+ break;
+ }
+
+ instr->alu.add.a.raddr = raddr_a;
+ instr->alu.add.b.raddr = raddr_b;
+ instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
+
+ instr->alu.add.magic_write = false;
+ if (packed_inst & V3D_QPU_MA) {
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_LDVPMV_IN:
+ instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
+ break;
+ case V3D_QPU_A_LDVPMD_IN:
+ instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
+ break;
+ case V3D_QPU_A_LDVPMG_IN:
+ instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
+ break;
+ default:
+ instr->alu.add.magic_write = true;
+ break;
+ }
+ }
+
+ return true;
+}
+
+static bool
+v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
struct v3d_qpu_instr *instr)
{
+ if (devinfo->ver < 71)
+ return v3d33_qpu_add_unpack(devinfo, packed_inst, instr);
+ else
+ return v3d71_qpu_add_unpack(devinfo, packed_inst, instr);
+}
+
+static bool
+v3d33_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
+ struct v3d_qpu_instr *instr)
+{
uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
{
const struct opcode_desc *desc =
- lookup_opcode_from_packed(devinfo, mul_ops,
- ARRAY_SIZE(mul_ops),
- op, mux_a, mux_b);
+ lookup_opcode_from_packed(devinfo,
+ mul_ops_v33,
+ ARRAY_SIZE(mul_ops_v33),
+ op, mux_a, mux_b, 0);
if (!desc)
return false;
@@ -915,12 +1423,12 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
- &instr->alu.mul.a_unpack)) {
+ &instr->alu.mul.a.unpack)) {
return false;
}
if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
- &instr->alu.mul.b_unpack)) {
+ &instr->alu.mul.b.unpack)) {
return false;
}
@@ -931,7 +1439,7 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
((mux_b >> 2) & 1));
if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
- &instr->alu.mul.a_unpack)) {
+ &instr->alu.mul.a.unpack)) {
return false;
}
@@ -941,29 +1449,123 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
- &instr->alu.mul.a_unpack)) {
+ &instr->alu.mul.a.unpack)) {
return false;
}
- instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
break;
default:
instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
- instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
- instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
break;
}
- instr->alu.mul.a = mux_a;
- instr->alu.mul.b = mux_b;
+ instr->alu.mul.a.mux = mux_a;
+ instr->alu.mul.b.mux = mux_b;
instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
return true;
}
+static bool
+v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
+ struct v3d_qpu_instr *instr)
+{
+ uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
+ uint32_t raddr_c = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_C);
+ uint32_t raddr_d = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_D);
+
+ {
+ const struct opcode_desc *desc =
+ lookup_opcode_from_packed(devinfo,
+ mul_ops_v71,
+ ARRAY_SIZE(mul_ops_v71),
+ op, 0, 0,
+ raddr_d);
+ if (!desc)
+ return false;
+
+ instr->alu.mul.op = desc->op;
+ }
+
+ switch (instr->alu.mul.op) {
+ case V3D_QPU_M_FMUL:
+ instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
+ &instr->alu.mul.a.unpack)) {
+ return false;
+ }
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
+ &instr->alu.mul.b.unpack)) {
+ return false;
+ }
+
+ break;
+
+ case V3D_QPU_M_FMOV:
+ instr->alu.mul.output_pack = raddr_d & 0x3;
+
+ if (!v3d_qpu_float32_unpack_unpack((raddr_d >> 2) & 0x7,
+ &instr->alu.mul.a.unpack)) {
+ return false;
+ }
+
+ break;
+
+ case V3D_QPU_M_VFMUL:
+ unreachable("pending v71 update");
+ instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
+
+ if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
+ &instr->alu.mul.a.unpack)) {
+ return false;
+ }
+
+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
+
+ break;
+
+ case V3D_QPU_M_MOV:
+ instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
+
+ if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7,
+ &instr->alu.mul.a.unpack)) {
+ return false;
+ }
+ break;
+
+ default:
+ instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
+ instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
+ break;
+ }
+
+ instr->alu.mul.a.raddr = raddr_c;
+ instr->alu.mul.b.raddr = raddr_d;
+ instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
+ instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
+
+ return true;
+}
+
+static bool
+v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
+ struct v3d_qpu_instr *instr)
+{
+ if (devinfo->ver < 71)
+ return v3d33_qpu_mul_unpack(devinfo, packed_inst, instr);
+ else
+ return v3d71_qpu_mul_unpack(devinfo, packed_inst, instr);
+}
+
static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
const struct opcode_desc *opcodes, size_t num_opcodes,
@@ -975,7 +1577,7 @@ lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
if (op_desc->op != op)
continue;
- if (opcode_invalid_in_version(devinfo, op_desc))
+ if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
continue;
return op_desc;
@@ -985,15 +1587,16 @@ lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
}
static bool
-v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
- const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
+v3d33_qpu_add_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
{
uint32_t waddr = instr->alu.add.waddr;
- uint32_t mux_a = instr->alu.add.a;
- uint32_t mux_b = instr->alu.add.b;
+ uint32_t mux_a = instr->alu.add.a.mux;
+ uint32_t mux_b = instr->alu.add.b.mux;
int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
const struct opcode_desc *desc =
- lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
+ lookup_opcode_from_instr(devinfo, add_ops_v33,
+ ARRAY_SIZE(add_ops_v33),
instr->alu.add.op);
if (!desc)
@@ -1005,10 +1608,10 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
* identify the operation type.
*/
if (nsrc < 2)
- mux_b = ffs(desc->mux_b_mask) - 1;
+ mux_b = ffs(desc->mux.b_mask) - 1;
if (nsrc < 1)
- mux_a = ffs(desc->mux_a_mask) - 1;
+ mux_a = ffs(desc->mux.a_mask) - 1;
bool no_magic_write = false;
@@ -1061,12 +1664,12 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
}
opcode |= output_pack << 4;
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
&a_unpack)) {
return false;
}
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
&b_unpack)) {
return false;
}
@@ -1100,23 +1703,23 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
uint32_t a_unpack;
uint32_t b_unpack;
- if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
- instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
+ if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
+ instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
return false;
}
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
&a_unpack)) {
return false;
}
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
&b_unpack)) {
return false;
}
- opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
- opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
+ opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
+ opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
break;
}
@@ -1135,13 +1738,13 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
}
mux_b |= packed;
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
&packed)) {
return false;
}
if (packed == 0)
return false;
- opcode = (opcode & ~(1 << 2)) | packed << 2;
+ opcode = (opcode & ~(0x3 << 2)) | packed << 2;
break;
}
@@ -1153,7 +1756,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
return false;
uint32_t packed;
- if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
&packed)) {
return false;
}
@@ -1166,11 +1769,11 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
case V3D_QPU_A_VFMIN:
case V3D_QPU_A_VFMAX:
if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
- instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
return false;
}
- if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
+ if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
&packed)) {
return false;
}
@@ -1180,8 +1783,8 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
default:
if (instr->alu.add.op != V3D_QPU_A_NOP &&
(instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
- instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
- instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
+ instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
return false;
}
break;
@@ -1198,15 +1801,280 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
}
static bool
-v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
- const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
+v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
{
- uint32_t mux_a = instr->alu.mul.a;
- uint32_t mux_b = instr->alu.mul.b;
+ uint32_t waddr = instr->alu.add.waddr;
+ uint32_t raddr_a = instr->alu.add.a.raddr;
+ uint32_t raddr_b = instr->alu.add.b.raddr;
+
+ int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
+ const struct opcode_desc *desc =
+ lookup_opcode_from_instr(devinfo, add_ops_v71,
+ ARRAY_SIZE(add_ops_v71),
+ instr->alu.add.op);
+ if (!desc)
+ return false;
+
+ uint32_t opcode = desc->opcode_first;
+
+ /* If an operation doesn't use an arg, its raddr values may be used to
+ * identify the operation type.
+ */
+ if (nsrc < 2)
+ raddr_b = ffsll(desc->raddr_mask) - 1;
+
+ bool no_magic_write = false;
+
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_STVPMV:
+ waddr = 0;
+ no_magic_write = true;
+ break;
+ case V3D_QPU_A_STVPMD:
+ waddr = 1;
+ no_magic_write = true;
+ break;
+ case V3D_QPU_A_STVPMP:
+ waddr = 2;
+ no_magic_write = true;
+ break;
+
+ case V3D_QPU_A_LDVPMV_IN:
+ case V3D_QPU_A_LDVPMD_IN:
+ case V3D_QPU_A_LDVPMP:
+ case V3D_QPU_A_LDVPMG_IN:
+ assert(!instr->alu.add.magic_write);
+ break;
+
+ case V3D_QPU_A_LDVPMV_OUT:
+ case V3D_QPU_A_LDVPMD_OUT:
+ case V3D_QPU_A_LDVPMG_OUT:
+ assert(!instr->alu.add.magic_write);
+ *packed_instr |= V3D_QPU_MA;
+ break;
+
+ default:
+ break;
+ }
+
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_FADD:
+ case V3D_QPU_A_FADDNF:
+ case V3D_QPU_A_FSUB:
+ case V3D_QPU_A_FMIN:
+ case V3D_QPU_A_FMAX:
+ case V3D_QPU_A_FCMP: {
+ uint32_t output_pack;
+ uint32_t a_unpack;
+ uint32_t b_unpack;
+
+ if (instr->alu.add.op != V3D_QPU_A_FCMP) {
+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
+ &output_pack)) {
+ return false;
+ }
+ opcode |= output_pack << 4;
+ }
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
+ &a_unpack)) {
+ return false;
+ }
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
+ &b_unpack)) {
+ return false;
+ }
+
+ /* These operations with commutative operands are
+ * distinguished by the order of the operands come in.
+ */
+ bool ordering =
+ instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a >
+ instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b;
+ if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
+ instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
+ ((instr->alu.add.op == V3D_QPU_A_FMAX ||
+ instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
+ uint32_t temp;
+
+ temp = a_unpack;
+ a_unpack = b_unpack;
+ b_unpack = temp;
+
+ temp = raddr_a;
+ raddr_a = raddr_b;
+ raddr_b = temp;
+
+ /* If we are swapping raddr_a/b we also need to swap
+ * small_imm_a/b.
+ */
+ if (instr->sig.small_imm_a || instr->sig.small_imm_b) {
+ assert(instr->sig.small_imm_a !=
+ instr->sig.small_imm_b);
+ struct v3d_qpu_sig new_sig = instr->sig;
+ new_sig.small_imm_a = !instr->sig.small_imm_a;
+ new_sig.small_imm_b = !instr->sig.small_imm_b;
+ uint32_t sig;
+ if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
+ return false;
+ *packed_instr &= ~V3D_QPU_SIG_MASK;
+ *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
+ }
+ }
+
+ opcode |= a_unpack << 2;
+ opcode |= b_unpack << 0;
+
+ break;
+ }
+
+ case V3D_QPU_A_VFPACK: {
+ uint32_t a_unpack;
+ uint32_t b_unpack;
+
+ if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
+ instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
+ return false;
+ }
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
+ &a_unpack)) {
+ return false;
+ }
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
+ &b_unpack)) {
+ return false;
+ }
+
+ opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
+ opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
+
+ break;
+ }
+
+ case V3D_QPU_A_FFLOOR:
+ case V3D_QPU_A_FROUND:
+ case V3D_QPU_A_FTRUNC:
+ case V3D_QPU_A_FCEIL:
+ case V3D_QPU_A_FDX:
+ case V3D_QPU_A_FDY: {
+ uint32_t packed;
+
+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
+ &packed)) {
+ return false;
+ }
+ raddr_b |= packed;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
+ &packed)) {
+ return false;
+ }
+ if (packed == 0)
+ return false;
+ raddr_b = (raddr_b & ~(0x3 << 2)) | packed << 2;
+ break;
+ }
+
+ case V3D_QPU_A_FTOIN:
+ case V3D_QPU_A_FTOIZ:
+ case V3D_QPU_A_FTOUZ:
+ case V3D_QPU_A_FTOC:
+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
+ return false;
+
+ uint32_t packed;
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
+ &packed)) {
+ return false;
+ }
+ if (packed == 0)
+ return false;
+
+ raddr_b |= (raddr_b & ~(0x3 << 2)) | packed << 2;
+
+ break;
+
+ case V3D_QPU_A_VFMIN:
+ case V3D_QPU_A_VFMAX:
+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
+ return false;
+ }
+
+ if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
+ &packed)) {
+ return false;
+ }
+ opcode |= packed;
+ break;
+
+ case V3D_QPU_A_MOV: {
+ uint32_t packed;
+
+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
+ return false;
+
+ if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack,
+ &packed)) {
+ return false;
+ }
+
+ raddr_b |= packed << 2;
+ break;
+ }
+
+ case V3D_QPU_A_FMOV: {
+ uint32_t packed;
+
+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
+ &packed)) {
+ return false;
+ }
+ raddr_b = packed;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
+ &packed)) {
+ return false;
+ }
+ raddr_b |= packed << 2;
+ break;
+ }
+
+ default:
+ if (instr->alu.add.op != V3D_QPU_A_NOP &&
+ (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
+ instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
+ instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
+ return false;
+ }
+ break;
+ }
+
+ *packed_instr |= QPU_SET_FIELD(raddr_a, V3D_QPU_RADDR_A);
+ *packed_instr |= QPU_SET_FIELD(raddr_b, V3D_QPU_RADDR_B);
+ *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
+ *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
+ if (instr->alu.add.magic_write && !no_magic_write)
+ *packed_instr |= V3D_QPU_MA;
+
+ return true;
+}
+
+static bool
+v3d33_qpu_mul_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
+{
+ uint32_t mux_a = instr->alu.mul.a.mux;
+ uint32_t mux_b = instr->alu.mul.b.mux;
int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
const struct opcode_desc *desc =
- lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops),
+ lookup_opcode_from_instr(devinfo, mul_ops_v33,
+ ARRAY_SIZE(mul_ops_v33),
instr->alu.mul.op);
if (!desc)
@@ -1218,10 +2086,10 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
* that here. If mux a/b determine packing, it will be set below.
*/
if (nsrc < 2)
- mux_b = ffs(desc->mux_b_mask) - 1;
+ mux_b = ffs(desc->mux.b_mask) - 1;
if (nsrc < 1)
- mux_a = ffs(desc->mux_a_mask) - 1;
+ mux_a = ffs(desc->mux.a_mask) - 1;
switch (instr->alu.mul.op) {
case V3D_QPU_M_FMUL: {
@@ -1236,13 +2104,13 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
*/
opcode += packed << 4;
- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
&packed)) {
return false;
}
opcode |= packed << 2;
- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack,
&packed)) {
return false;
}
@@ -1260,7 +2128,7 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
opcode |= (packed >> 1) & 1;
mux_b = (packed & 1) << 2;
- if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
&packed)) {
return false;
}
@@ -1274,22 +2142,28 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
return false;
- if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
+ if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
&packed)) {
return false;
}
- if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
+ if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
opcode = 8;
else
opcode |= (packed + 4) & 7;
- if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
+ if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
return false;
break;
}
default:
+ if (instr->alu.mul.op != V3D_QPU_M_NOP &&
+ (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
+ instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
+ instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
+ return false;
+ }
break;
}
@@ -1305,6 +2179,150 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
}
static bool
+v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
+{
+ uint32_t raddr_c = instr->alu.mul.a.raddr;
+ uint32_t raddr_d = instr->alu.mul.b.raddr;
+ int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
+
+ const struct opcode_desc *desc =
+ lookup_opcode_from_instr(devinfo, mul_ops_v71,
+ ARRAY_SIZE(mul_ops_v71),
+ instr->alu.mul.op);
+ if (!desc)
+ return false;
+
+ uint32_t opcode = desc->opcode_first;
+
+ /* Some opcodes have a single valid value for their raddr_d, so set
+ * that here. If raddr_b determine packing, it will be set below.
+ */
+ if (nsrc < 2)
+ raddr_d = ffsll(desc->raddr_mask) - 1;
+
+ switch (instr->alu.mul.op) {
+ case V3D_QPU_M_FMUL: {
+ uint32_t packed;
+
+ if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
+ &packed)) {
+ return false;
+ }
+ /* No need for a +1 because desc->opcode_first has a 1 in this
+ * field.
+ */
+ opcode += packed << 4;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
+ &packed)) {
+ return false;
+ }
+ opcode |= packed << 2;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack,
+ &packed)) {
+ return false;
+ }
+ opcode |= packed << 0;
+ break;
+ }
+
+ case V3D_QPU_M_FMOV: {
+ uint32_t packed;
+
+ if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
+ &packed)) {
+ return false;
+ }
+ raddr_d |= packed;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
+ &packed)) {
+ return false;
+ }
+ raddr_d |= packed << 2;
+ break;
+ }
+
+ case V3D_QPU_M_VFMUL: {
+ unreachable("pending v71 update");
+ uint32_t packed;
+
+ if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
+ return false;
+
+ if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
+ &packed)) {
+ return false;
+ }
+ if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
+ opcode = 8;
+ else
+ opcode |= (packed + 4) & 7;
+
+ if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
+ return false;
+
+ break;
+ }
+
+ case V3D_QPU_M_MOV: {
+ uint32_t packed;
+
+ if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
+ return false;
+
+ if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack,
+ &packed)) {
+ return false;
+ }
+
+ raddr_d |= packed << 2;
+ break;
+ }
+
+ default:
+ if (instr->alu.mul.op != V3D_QPU_M_NOP &&
+ (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
+ instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
+ instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
+ return false;
+ }
+ break;
+ }
+
+ *packed_instr |= QPU_SET_FIELD(raddr_c, V3D_QPU_RADDR_C);
+ *packed_instr |= QPU_SET_FIELD(raddr_d, V3D_QPU_RADDR_D);
+ *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
+ *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
+ if (instr->alu.mul.magic_write)
+ *packed_instr |= V3D_QPU_MM;
+
+ return true;
+}
+
+static bool
+v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
+{
+ if (devinfo->ver < 71)
+ return v3d33_qpu_add_pack(devinfo, instr, packed_instr);
+ else
+ return v3d71_qpu_add_pack(devinfo, instr, packed_instr);
+}
+
+static bool
+v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
+{
+ if (devinfo->ver < 71)
+ return v3d33_qpu_mul_pack(devinfo, instr, packed_instr);
+ else
+ return v3d71_qpu_mul_pack(devinfo, instr, packed_instr);
+}
+
+static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
uint64_t packed_instr,
struct v3d_qpu_instr *instr)
@@ -1332,8 +2350,14 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
return false;
}
- instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
- instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
+ if (devinfo->ver <= 71) {
+ /*
+ * For v71 this will be set on add/mul unpack, as raddr are now
+ * part of v3d_qpu_input
+ */
+ instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
+ instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
+ }
if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
return false;
@@ -1419,8 +2443,14 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
*packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
- *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
- *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
+ if (devinfo->ver < 71) {
+ /*
+ * For v71 this will be set on add/mul unpack, as raddr are now
+ * part of v3d_qpu_input
+ */
+ *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
+ *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
+ }
if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
return false;