summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2019-05-23 05:33:34 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2019-05-23 05:33:34 +0000
commit9886815a25d84be79f51e65ebd8e458bb5d26ca8 (patch)
treea65edf018dd992543337433f7303fb29a6c8e8cf /lib/mesa/src/gallium/drivers/vc4
parente2a3acb64af2657b1181806818eacad061103c23 (diff)
Merge Mesa 19.0.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers/vc4')
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c13
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h6
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_context.c13
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_context.h9
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_job.c6
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_blend.c40
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_io.c3
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_program.c59
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_resource.c13
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_screen.c3
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c187
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_simulator_validate.h1
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c227
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c2
14 files changed, 147 insertions, 435 deletions
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c b/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c
index 54f9d9c26..716ca50ea 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -386,7 +386,6 @@ vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
static struct vc4_bo *
vc4_bo_open_handle(struct vc4_screen *screen,
- uint32_t winsys_stride,
uint32_t handle, uint32_t size)
{
struct vc4_bo *bo;
@@ -410,8 +409,7 @@ vc4_bo_open_handle(struct vc4_screen *screen,
bo->private = false;
#ifdef USE_VC4_SIMULATOR
- vc4_simulator_open_from_handle(screen->fd, winsys_stride,
- bo->handle, bo->size);
+ vc4_simulator_open_from_handle(screen->fd, bo->handle, bo->size);
bo->map = malloc(bo->size);
#endif
@@ -423,8 +421,7 @@ done:
}
struct vc4_bo *
-vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
- uint32_t winsys_stride)
+vc4_bo_open_name(struct vc4_screen *screen, uint32_t name)
{
struct drm_gem_open o = {
.name = name
@@ -436,11 +433,11 @@ vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
return NULL;
}
- return vc4_bo_open_handle(screen, winsys_stride, o.handle, o.size);
+ return vc4_bo_open_handle(screen, o.handle, o.size);
}
struct vc4_bo *
-vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd, uint32_t winsys_stride)
+vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd)
{
uint32_t handle;
int ret = drmPrimeFDToHandle(screen->fd, fd, &handle);
@@ -457,7 +454,7 @@ vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd, uint32_t winsys_stride)
return NULL;
}
- return vc4_bo_open_handle(screen, winsys_stride, handle, size);
+ return vc4_bo_open_handle(screen, handle, size);
}
int
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h b/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h
index 9fa477442..30a388ee5 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -66,10 +66,8 @@ struct vc4_bo *vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data,
uint32_t size);
void vc4_bo_last_unreference(struct vc4_bo *bo);
void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time);
-struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
- uint32_t winsys_stride);
-struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd,
- uint32_t winsys_stride);
+struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name);
+struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd);
bool vc4_bo_flink(struct vc4_bo *bo, uint32_t *name);
int vc4_bo_get_dmabuf(struct vc4_bo *bo);
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_context.c b/lib/mesa/src/gallium/drivers/vc4/vc4_context.c
index ffd7d4c85..94969dcb1 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_context.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_context.c
@@ -85,6 +85,18 @@ vc4_texture_barrier(struct pipe_context *pctx, unsigned flags)
}
static void
+vc4_set_debug_callback(struct pipe_context *pctx,
+ const struct pipe_debug_callback *cb)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+
+ if (cb)
+ vc4->debug = *cb;
+ else
+ memset(&vc4->debug, 0, sizeof(vc4->debug));
+}
+
+static void
vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
{
struct vc4_context *vc4 = vc4_context(pctx);
@@ -164,6 +176,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
pctx->priv = priv;
pctx->destroy = vc4_context_destroy;
pctx->flush = vc4_pipe_flush;
+ pctx->set_debug_callback = vc4_set_debug_callback;
pctx->invalidate_resource = vc4_invalidate_resource;
pctx->texture_barrier = vc4_texture_barrier;
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_context.h b/lib/mesa/src/gallium/drivers/vc4/vc4_context.h
index ce8bcffac..1d3179c71 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_context.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_context.h
@@ -405,6 +405,7 @@ struct vc4_context {
struct pipe_viewport_state viewport;
struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
struct vc4_vertexbuf_stateobj vertexbuf;
+ struct pipe_debug_callback debug;
struct vc4_hwperfmon *perfmon;
/** @} */
@@ -451,6 +452,8 @@ struct vc4_depth_stencil_alpha_state {
#define perf_debug(...) do { \
if (unlikely(vc4_debug & VC4_DEBUG_PERF)) \
fprintf(stderr, __VA_ARGS__); \
+ if (unlikely(vc4->debug.debug_message)) \
+ pipe_debug_message(&vc4->debug, PERF_INFO, __VA_ARGS__); \
} while (0)
static inline struct vc4_context *
@@ -486,12 +489,8 @@ void vc4_program_fini(struct pipe_context *pctx);
void vc4_query_init(struct pipe_context *pctx);
void vc4_simulator_init(struct vc4_screen *screen);
void vc4_simulator_destroy(struct vc4_screen *screen);
-int vc4_simulator_flush(struct vc4_context *vc4,
- struct drm_vc4_submit_cl *args,
- struct vc4_job *job);
int vc4_simulator_ioctl(int fd, unsigned long request, void *arg);
-void vc4_simulator_open_from_handle(int fd, uint32_t winsys_stride,
- int handle, uint32_t size);
+void vc4_simulator_open_from_handle(int fd, int handle, uint32_t size);
static inline int
vc4_ioctl(int fd, unsigned long request, void *arg)
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_job.c b/lib/mesa/src/gallium/drivers/vc4/vc4_job.c
index f38c46475..2b87a00df 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_job.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_job.c
@@ -492,11 +492,7 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
if (!(vc4_debug & VC4_DEBUG_NORAST)) {
int ret;
-#ifndef USE_VC4_SIMULATOR
- ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
-#else
- ret = vc4_simulator_flush(vc4, &submit, job);
-#endif
+ ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
static bool warned = false;
if (ret && !warned) {
fprintf(stderr, "Draw call returned %s. "
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 60eccb4fc..ff6268f47 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -42,6 +42,7 @@
#include "util/u_format.h"
#include "vc4_qir.h"
#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
#include "vc4_context.h"
static bool
@@ -67,37 +68,6 @@ vc4_nir_get_dst_color(nir_builder *b, int sample)
return &load->dest.ssa;
}
-static nir_ssa_def *
-vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
-{
- nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
- nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
- nir_ssa_def *high = nir_fpow(b,
- nir_fmul(b,
- nir_fadd(b, srgb,
- nir_imm_float(b, 0.055)),
- nir_imm_float(b, 1.0 / 1.055)),
- nir_imm_float(b, 2.4));
-
- return nir_bcsel(b, is_low, low, high);
-}
-
-static nir_ssa_def *
-vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
-{
- nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
- nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
- nir_ssa_def *high = nir_fsub(b,
- nir_fmul(b,
- nir_imm_float(b, 1.055),
- nir_fpow(b,
- linear,
- nir_imm_float(b, 0.41666))),
- nir_imm_float(b, 0.055));
-
- return nir_bcsel(b, is_low, low, high);
-}
-
static nir_ssa_def *
vc4_blend_channel_f(nir_builder *b,
nir_ssa_def **src,
@@ -130,7 +100,7 @@ vc4_blend_channel_f(nir_builder *b,
return nir_load_system_value(b,
nir_intrinsic_load_blend_const_color_r_float +
channel,
- 0);
+ 0, 32);
case PIPE_BLENDFACTOR_CONST_ALPHA:
return nir_load_blend_const_color_a_float(b);
case PIPE_BLENDFACTOR_ZERO:
@@ -148,7 +118,7 @@ vc4_blend_channel_f(nir_builder *b,
nir_load_system_value(b,
nir_intrinsic_load_blend_const_color_r_float +
channel,
- 0));
+ 0, 32));
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
return nir_fsub(b, nir_imm_float(b, 1.0),
nir_load_blend_const_color_a_float(b));
@@ -501,14 +471,14 @@ vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
/* Turn dst color to linear. */
for (int i = 0; i < 3; i++)
- dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
+ dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
nir_ssa_def *blend_color[4];
vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
/* sRGB encode the output color */
for (int i = 0; i < 3; i++)
- blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
+ blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
} else {
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index b7969a562..fc2baee1b 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -330,7 +330,8 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr_comp =
nir_intrinsic_instr_create(c->s, intr->intrinsic);
intr_comp->num_components = 1;
- nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1,
+ intr->dest.ssa.bit_size, NULL);
/* Convert the uniform offset to bytes. If it happens
* to be a constant, constant-folding will clean up
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_program.c b/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
index bc9bd76ae..8f1e561c4 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_program.c
@@ -1004,24 +1004,24 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
enum qpu_cond cond;
switch (compare_instr->op) {
- case nir_op_feq:
- case nir_op_ieq:
+ case nir_op_feq32:
+ case nir_op_ieq32:
case nir_op_seq:
cond = QPU_COND_ZS;
break;
- case nir_op_fne:
- case nir_op_ine:
+ case nir_op_fne32:
+ case nir_op_ine32:
case nir_op_sne:
cond = QPU_COND_ZC;
break;
- case nir_op_fge:
- case nir_op_ige:
- case nir_op_uge:
+ case nir_op_fge32:
+ case nir_op_ige32:
+ case nir_op_uge32:
case nir_op_sge:
cond = QPU_COND_NC;
break;
- case nir_op_flt:
- case nir_op_ilt:
+ case nir_op_flt32:
+ case nir_op_ilt32:
case nir_op_slt:
cond = QPU_COND_NS;
break;
@@ -1048,7 +1048,7 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
qir_uniform_f(c, 1.0), qir_uniform_f(c, 0.0));
break;
- case nir_op_bcsel:
+ case nir_op_b32csel:
*dest = qir_SEL(c, cond,
ntq_get_alu_src(c, sel_instr, 1),
ntq_get_alu_src(c, sel_instr, 2));
@@ -1208,14 +1208,14 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
case nir_op_u2f32:
result = qir_ITOF(c, src[0]);
break;
- case nir_op_b2f:
+ case nir_op_b2f32:
result = qir_AND(c, src[0], qir_uniform_f(c, 1.0));
break;
- case nir_op_b2i:
+ case nir_op_b2i32:
result = qir_AND(c, src[0], qir_uniform_ui(c, 1));
break;
- case nir_op_i2b:
- case nir_op_f2b:
+ case nir_op_i2b32:
+ case nir_op_f2b32:
qir_SF(c, src[0]);
result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC,
qir_uniform_ui(c, ~0),
@@ -1264,21 +1264,21 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
case nir_op_sne:
case nir_op_sge:
case nir_op_slt:
- case nir_op_feq:
- case nir_op_fne:
- case nir_op_fge:
- case nir_op_flt:
- case nir_op_ieq:
- case nir_op_ine:
- case nir_op_ige:
- case nir_op_uge:
- case nir_op_ilt:
+ case nir_op_feq32:
+ case nir_op_fne32:
+ case nir_op_fge32:
+ case nir_op_flt32:
+ case nir_op_ieq32:
+ case nir_op_ine32:
+ case nir_op_ige32:
+ case nir_op_uge32:
+ case nir_op_ilt32:
if (!ntq_emit_comparison(c, &result, instr, instr)) {
fprintf(stderr, "Bad comparison instruction\n");
}
break;
- case nir_op_bcsel:
+ case nir_op_b32csel:
result = ntq_emit_bcsel(c, instr, src);
break;
case nir_op_fcsel:
@@ -1591,14 +1591,14 @@ vc4_optimize_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
- NIR_PASS(progress, s, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
- nir_var_local);
+ nir_var_function_temp);
} while (progress);
}
@@ -2363,7 +2363,8 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
if (stage == QSTAGE_FRAG) {
NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
} else {
- NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
+ NIR_PASS_V(c->s, nir_lower_clip_vs,
+ c->key->ucp_enables, false);
NIR_PASS_V(c->s, nir_lower_io_to_scalar,
nir_var_shader_out);
}
@@ -2384,6 +2385,8 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
vc4_optimize_nir(c->s);
+ NIR_PASS_V(c->s, nir_lower_bool_to_int32);
+
NIR_PASS_V(c->s, nir_convert_from_ssa, true);
if (vc4_debug & VC4_DEBUG_SHADERDB) {
@@ -2514,7 +2517,7 @@ vc4_shader_state_create(struct pipe_context *pctx,
vc4_optimize_nir(s);
- NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
+ NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
/* Garbage collect dead instructions */
nir_sweep(s);
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_resource.c b/lib/mesa/src/gallium/drivers/vc4/vc4_resource.c
index 41e6ec5c1..a4d1b903b 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_resource.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_resource.c
@@ -319,8 +319,10 @@ vc4_resource_get_handle(struct pipe_screen *pscreen,
return vc4_bo_flink(rsc->bo, &whandle->handle);
case WINSYS_HANDLE_TYPE_KMS:
- if (screen->ro && renderonly_get_handle(rsc->scanout, whandle))
- return TRUE;
+ if (screen->ro) {
+ assert(rsc->scanout);
+ return renderonly_get_handle(rsc->scanout, whandle);
+ }
whandle->handle = rsc->bo->handle;
return TRUE;
case WINSYS_HANDLE_TYPE_FD:
@@ -622,12 +624,10 @@ vc4_resource_from_handle(struct pipe_screen *pscreen,
switch (whandle->type) {
case WINSYS_HANDLE_TYPE_SHARED:
- rsc->bo = vc4_bo_open_name(screen,
- whandle->handle, whandle->stride);
+ rsc->bo = vc4_bo_open_name(screen, whandle->handle);
break;
case WINSYS_HANDLE_TYPE_FD:
- rsc->bo = vc4_bo_open_dmabuf(screen,
- whandle->handle, whandle->stride);
+ rsc->bo = vc4_bo_open_dmabuf(screen, whandle->handle);
break;
default:
fprintf(stderr,
@@ -1013,6 +1013,7 @@ void
vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
struct pipe_sampler_view *pview)
{
+ struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_sampler_view *view = vc4_sampler_view(pview);
struct vc4_resource *shadow = vc4_resource(view->texture);
struct vc4_resource *orig = vc4_resource(pview->texture);
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_screen.c b/lib/mesa/src/gallium/drivers/vc4/vc4_screen.c
index e7f7c82c2..acb4a1feb 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_screen.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_screen.c
@@ -178,6 +178,9 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Note: Not supported in hardware, just faking it. */
return 5;
+ case PIPE_CAP_MAX_VARYINGS:
+ return 8;
+
case PIPE_CAP_VENDOR_ID:
return 0x14E4;
case PIPE_CAP_ACCELERATED:
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c b/lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c
index 37c098a04..2ce5a7596 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_simulator.c
@@ -99,10 +99,13 @@ struct vc4_simulator_bo {
/** Area for this BO within sim_state->mem */
struct mem_block *block;
- void *winsys_map;
- uint32_t winsys_stride;
int handle;
+
+ /* Mapping of the underlying GEM object that we copy in/out of
+ * simulator memory.
+ */
+ void *gem_vaddr;
};
static void *
@@ -143,6 +146,7 @@ vc4_create_simulator_bo(int fd, int handle, unsigned size)
sim_bo->file = file;
sim_bo->handle = handle;
+ /* Allocate space for the buffer in simulator memory. */
mtx_lock(&sim_state.mutex);
sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, PAGE_ALIGN2, 0);
mtx_unlock(&sim_state.mutex);
@@ -162,6 +166,25 @@ vc4_create_simulator_bo(int fd, int handle, unsigned size)
mtx_lock(&sim_state.mutex);
_mesa_hash_table_insert(file->bo_map, int_to_key(handle), bo);
mtx_unlock(&sim_state.mutex);
+
+ /* Map the GEM buffer for copy in/out to the simulator. */
+ struct drm_mode_map_dumb map = {
+ .handle = handle,
+ };
+ int ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
+ if (ret) {
+ fprintf(stderr, "Failed to get MMAP offset: %d\n",
+ errno);
+ abort();
+ }
+ sim_bo->gem_vaddr = mmap(NULL, obj->base.size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd, map.offset);
+ if (sim_bo->gem_vaddr == MAP_FAILED) {
+ fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+ handle, (long long)map.offset, (int)obj->base.size);
+ abort();
+ }
}
return sim_bo;
@@ -174,16 +197,19 @@ vc4_free_simulator_bo(struct vc4_simulator_bo *sim_bo)
struct drm_vc4_bo *bo = &sim_bo->base;
struct drm_gem_cma_object *obj = &bo->base;
- if (sim_bo->winsys_map)
- munmap(sim_bo->winsys_map, obj->base.size);
+ if (bo->validated_shader) {
+ free(bo->validated_shader->texture_samples);
+ free(bo->validated_shader);
+ }
+
+ if (sim_bo->gem_vaddr)
+ munmap(sim_bo->gem_vaddr, obj->base.size);
mtx_lock(&sim_state.mutex);
u_mmFreeMem(sim_bo->block);
if (sim_bo->handle) {
- struct hash_entry *entry =
- _mesa_hash_table_search(sim_file->bo_map,
- int_to_key(sim_bo->handle));
- _mesa_hash_table_remove(sim_file->bo_map, entry);
+ _mesa_hash_table_remove_key(sim_file->bo_map,
+ int_to_key(sim_bo->handle));
}
mtx_unlock(&sim_state.mutex);
ralloc_free(sim_bo);
@@ -210,41 +236,23 @@ drm_gem_cma_create(struct drm_device *dev, size_t size)
}
static int
-vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_job *job,
+vc4_simulator_pin_bos(struct vc4_simulator_file *file,
struct vc4_exec_info *exec)
{
- int fd = dev->screen->fd;
- struct vc4_simulator_file *file = vc4_get_simulator_file_for_fd(fd);
struct drm_vc4_submit_cl *args = exec->args;
- struct vc4_bo **bos = job->bo_pointers.base;
+ uint32_t *bo_handles = (uint32_t *)(uintptr_t)args->bo_handles;
exec->bo_count = args->bo_handle_count;
exec->bo = calloc(exec->bo_count, sizeof(void *));
for (int i = 0; i < exec->bo_count; i++) {
- struct vc4_bo *bo = bos[i];
struct vc4_simulator_bo *sim_bo =
- vc4_get_simulator_bo(file, bo->handle);
+ vc4_get_simulator_bo(file, bo_handles[i]);
struct drm_vc4_bo *drm_bo = &sim_bo->base;
struct drm_gem_cma_object *obj = &drm_bo->base;
- drm_bo->bo = bo;
-#if 0
- fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
-#endif
-
- vc4_bo_map(bo);
- memcpy(obj->vaddr, bo->map, bo->size);
+ memcpy(obj->vaddr, sim_bo->gem_vaddr, obj->base.size);
exec->bo[i] = obj;
-
- /* The kernel does this validation at shader create ioctl
- * time.
- */
- if (strcmp(bo->name, "code") == 0) {
- drm_bo->validated_shader = vc4_validate_shader(obj);
- if (!drm_bo->validated_shader)
- abort();
- }
}
return 0;
}
@@ -255,16 +263,13 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
for (int i = 0; i < exec->bo_count; i++) {
struct drm_gem_cma_object *obj = exec->bo[i];
struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
- struct vc4_bo *bo = drm_bo->bo;
+ struct vc4_simulator_bo *sim_bo =
+ (struct vc4_simulator_bo *)drm_bo;
assert(*(uint32_t *)(obj->vaddr +
obj->base.size) == BO_SENTINEL);
- memcpy(bo->map, obj->vaddr, bo->size);
-
- if (drm_bo->validated_shader) {
- free(drm_bo->validated_shader->texture_samples);
- free(drm_bo->validated_shader);
- }
+ if (sim_bo->gem_vaddr)
+ memcpy(sim_bo->gem_vaddr, obj->vaddr, obj->base.size);
}
free(exec->bo);
@@ -359,19 +364,10 @@ vc4_dump_to_file(struct vc4_exec_info *exec)
fclose(f);
}
-int
-vc4_simulator_flush(struct vc4_context *vc4,
- struct drm_vc4_submit_cl *args, struct vc4_job *job)
+static int
+vc4_simulator_submit_cl_ioctl(int fd, struct drm_vc4_submit_cl *args)
{
- struct vc4_screen *screen = vc4->screen;
- int fd = screen->fd;
struct vc4_simulator_file *file = vc4_get_simulator_file_for_fd(fd);
- struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
- struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
- struct vc4_simulator_bo *csim_bo = ctex ? vc4_get_simulator_bo(file, ctex->bo->handle) : NULL;
- uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0;
- uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
- uint32_t row_len = MIN2(sim_stride, winsys_stride);
struct vc4_exec_info exec;
struct drm_device *dev = &file->dev;
int ret;
@@ -379,25 +375,9 @@ vc4_simulator_flush(struct vc4_context *vc4,
memset(&exec, 0, sizeof(exec));
list_inithead(&exec.unref_list);
- if (ctex && csim_bo->winsys_map) {
-#if 0
- fprintf(stderr, "%dx%d %d %d %d\n",
- ctex->base.b.width0, ctex->base.b.height0,
- winsys_stride,
- sim_stride,
- ctex->bo->size);
-#endif
-
- for (int y = 0; y < ctex->base.height0; y++) {
- memcpy(ctex->bo->map + y * sim_stride,
- csim_bo->winsys_map + y * winsys_stride,
- row_len);
- }
- }
-
exec.args = args;
- ret = vc4_simulator_pin_bos(dev, job, &exec);
+ ret = vc4_simulator_pin_bos(file, &exec);
if (ret)
return ret;
@@ -448,65 +428,19 @@ vc4_simulator_flush(struct vc4_context *vc4,
vc4_free_simulator_bo(sim_bo);
}
- if (ctex && csim_bo->winsys_map) {
- for (int y = 0; y < ctex->base.height0; y++) {
- memcpy(csim_bo->winsys_map + y * winsys_stride,
- ctex->bo->map + y * sim_stride,
- row_len);
- }
- }
-
return 0;
}
/**
- * Map the underlying GEM object from the real hardware GEM handle.
- */
-static void *
-vc4_simulator_map_winsys_bo(int fd, struct vc4_simulator_bo *sim_bo)
-{
- struct drm_vc4_bo *bo = &sim_bo->base;
- struct drm_gem_cma_object *obj = &bo->base;
- int ret;
- void *map;
-
- struct drm_mode_map_dumb map_dumb = {
- .handle = sim_bo->handle,
- };
- ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb);
- if (ret != 0) {
- fprintf(stderr, "map ioctl failure\n");
- abort();
- }
-
- map = mmap(NULL, obj->base.size, PROT_READ | PROT_WRITE, MAP_SHARED,
- fd, map_dumb.offset);
- if (map == MAP_FAILED) {
- fprintf(stderr,
- "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
- sim_bo->handle, (long long)map_dumb.offset,
- (int)obj->base.size);
- abort();
- }
-
- return map;
-}
-
-/**
* Do fixups after a BO has been opened from a handle.
*
* This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
* time, but we're still using drmPrimeFDToHandle() so we have this helper to
* be called afterward instead.
*/
-void vc4_simulator_open_from_handle(int fd, uint32_t winsys_stride,
- int handle, uint32_t size)
+void vc4_simulator_open_from_handle(int fd, int handle, uint32_t size)
{
- struct vc4_simulator_bo *sim_bo =
- vc4_create_simulator_bo(fd, handle, size);
-
- sim_bo->winsys_stride = winsys_stride;
- sim_bo->winsys_map = vc4_simulator_map_winsys_bo(fd, sim_bo);
+ vc4_create_simulator_bo(fd, handle, size);
}
/**
@@ -558,19 +492,22 @@ vc4_simulator_create_shader_bo_ioctl(int fd,
args->handle = create.handle;
- vc4_create_simulator_bo(fd, create.handle, args->size);
+ struct vc4_simulator_bo *sim_bo =
+ vc4_create_simulator_bo(fd, create.handle, args->size);
+ struct drm_vc4_bo *drm_bo = &sim_bo->base;
+ struct drm_gem_cma_object *obj = &drm_bo->base;
- struct drm_mode_map_dumb map = {
- .handle = create.handle
- };
- ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
- if (ret)
- return ret;
+ /* Copy into the simulator's BO for validation. */
+ memcpy(obj->vaddr, (void *)(uintptr_t)args->data, args->size);
+
+ /* Copy into the GEM BO to prevent the simulator_pin_bos() from
+ * smashing it.
+ */
+ memcpy(sim_bo->gem_vaddr, (void *)(uintptr_t)args->data, args->size);
- void *shader = mmap(NULL, args->size, PROT_READ | PROT_WRITE, MAP_SHARED,
- fd, map.offset);
- memcpy(shader, (void *)(uintptr_t)args->data, args->size);
- munmap(shader, args->size);
+ drm_bo->validated_shader = vc4_validate_shader(obj);
+ if (!drm_bo->validated_shader)
+ return -EINVAL;
return 0;
}
@@ -643,6 +580,8 @@ int
vc4_simulator_ioctl(int fd, unsigned long request, void *args)
{
switch (request) {
+ case DRM_IOCTL_VC4_SUBMIT_CL:
+ return vc4_simulator_submit_cl_ioctl(fd, args);
case DRM_IOCTL_VC4_CREATE_BO:
return vc4_simulator_create_bo_ioctl(fd, args);
case DRM_IOCTL_VC4_CREATE_SHADER_BO:
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_simulator_validate.h b/lib/mesa/src/gallium/drivers/vc4/vc4_simulator_validate.h
index d507b5fb6..e2777cd54 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -94,7 +94,6 @@ struct drm_gem_cma_object {
struct drm_vc4_bo {
struct drm_gem_cma_object base;
- struct vc4_bo *bo;
struct vc4_validated_shader_info *validated_shader;
struct list_head unref_head;
};
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c b/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c
index 167161fdf..d2a84bb35 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt.c
@@ -26,7 +26,7 @@
* Helper functions from vc4_tiling.c that will be compiled for using NEON
* assembly or not.
*
- * If VC4_BUILD_NEON is set, then the functions will be suffixed with _neon.
+ * If V3D_BUILD_NEON is set, then the functions will be suffixed with _neon.
* They will only use NEON assembly if __ARM_ARCH is also set, to keep the x86
* sim build working.
*/
@@ -34,8 +34,9 @@
#include <string.h>
#include "pipe/p_state.h"
#include "vc4_tiling.h"
+#include "broadcom/common/v3d_cpu_tiling.h"
-#ifdef VC4_BUILD_NEON
+#ifdef V3D_BUILD_NEON
#define NEON_TAG(x) x ## _neon
#else
#define NEON_TAG(x) x ## _base
@@ -63,217 +64,6 @@ vc4_utile_stride(int cpp)
}
}
-static void
-vc4_load_utile(void *cpu, void *gpu, uint32_t cpu_stride, uint32_t cpp)
-{
- uint32_t gpu_stride = vc4_utile_stride(cpp);
-#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
- if (gpu_stride == 8) {
- __asm__ volatile (
- /* Load from the GPU in one shot, no interleave, to
- * d0-d7.
- */
- "vldm %[gpu], {q0, q1, q2, q3}\n"
- /* Store each 8-byte line to cpu-side destination,
- * incrementing it by the stride each time.
- */
- "vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d1, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d3, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d5, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d6, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d7, [%[cpu]]\n"
- : [cpu] "+r"(cpu)
- : [gpu] "r"(gpu),
- [cpu_stride] "r"(cpu_stride)
- : "q0", "q1", "q2", "q3");
- } else {
- assert(gpu_stride == 16);
- void *cpu2 = cpu + 8;
- __asm__ volatile (
- /* Load from the GPU in one shot, no interleave, to
- * d0-d7.
- */
- "vldm %[gpu], {q0, q1, q2, q3};\n"
- /* Store each 16-byte line in 2 parts to the cpu-side
- * destination. (vld1 can only store one d-register
- * at a time).
- */
- "vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d1, [%[cpu2]],%[cpu_stride]\n"
- "vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d3, [%[cpu2]],%[cpu_stride]\n"
- "vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
- "vst1.8 d5, [%[cpu2]],%[cpu_stride]\n"
- "vst1.8 d6, [%[cpu]]\n"
- "vst1.8 d7, [%[cpu2]]\n"
- : [cpu] "+r"(cpu),
- [cpu2] "+r"(cpu2)
- : [gpu] "r"(gpu),
- [cpu_stride] "r"(cpu_stride)
- : "q0", "q1", "q2", "q3");
- }
-#elif defined (PIPE_ARCH_AARCH64)
- if (gpu_stride == 8) {
- __asm__ volatile (
- /* Load from the GPU in one shot, no interleave, to
- * d0-d7.
- */
- "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
- /* Store each 8-byte line to cpu-side destination,
- * incrementing it by the stride each time.
- */
- "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
- "st1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
- "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
- "st1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
- "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
- "st1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
- "st1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
- "st1 {v3.D}[1], [%[cpu]]\n"
- : [cpu] "+r"(cpu)
- : [gpu] "r"(gpu),
- [cpu_stride] "r"(cpu_stride)
- : "v0", "v1", "v2", "v3");
- } else {
- assert(gpu_stride == 16);
- void *cpu2 = cpu + 8;
- __asm__ volatile (
- /* Load from the GPU in one shot, no interleave, to
- * d0-d7.
- */
- "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
- /* Store each 16-byte line in 2 parts to the cpu-side
- * destination. (vld1 can only store one d-register
- * at a time).
- */
- "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
- "st1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n"
- "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
- "st1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n"
- "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
- "st1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n"
- "st1 {v3.D}[0], [%[cpu]]\n"
- "st1 {v3.D}[1], [%[cpu2]]\n"
- : [cpu] "+r"(cpu),
- [cpu2] "+r"(cpu2)
- : [gpu] "r"(gpu),
- [cpu_stride] "r"(cpu_stride)
- : "v0", "v1", "v2", "v3");
- }
-#else
- for (uint32_t gpu_offset = 0; gpu_offset < 64; gpu_offset += gpu_stride) {
- memcpy(cpu, gpu + gpu_offset, gpu_stride);
- cpu += cpu_stride;
- }
-#endif
-}
-
-static void
-vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp)
-{
- uint32_t gpu_stride = vc4_utile_stride(cpp);
-
-#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
- if (gpu_stride == 8) {
- __asm__ volatile (
- /* Load each 8-byte line from cpu-side source,
- * incrementing it by the stride each time.
- */
- "vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d1, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d3, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d5, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d6, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d7, [%[cpu]]\n"
- /* Load from the GPU in one shot, no interleave, to
- * d0-d7.
- */
- "vstm %[gpu], {q0, q1, q2, q3}\n"
- : [cpu] "+r"(cpu)
- : [gpu] "r"(gpu),
- [cpu_stride] "r"(cpu_stride)
- : "q0", "q1", "q2", "q3");
- } else {
- assert(gpu_stride == 16);
- void *cpu2 = cpu + 8;
- __asm__ volatile (
- /* Load each 16-byte line in 2 parts from the cpu-side
- * destination. (vld1 can only store one d-register
- * at a time).
- */
- "vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d1, [%[cpu2]],%[cpu_stride]\n"
- "vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d3, [%[cpu2]],%[cpu_stride]\n"
- "vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
- "vld1.8 d5, [%[cpu2]],%[cpu_stride]\n"
- "vld1.8 d6, [%[cpu]]\n"
- "vld1.8 d7, [%[cpu2]]\n"
- /* Store to the GPU in one shot, no interleave. */
- "vstm %[gpu], {q0, q1, q2, q3}\n"
- : [cpu] "+r"(cpu),
- [cpu2] "+r"(cpu2)
- : [gpu] "r"(gpu),
- [cpu_stride] "r"(cpu_stride)
- : "q0", "q1", "q2", "q3");
- }
-#elif defined (PIPE_ARCH_AARCH64)
- if (gpu_stride == 8) {
- __asm__ volatile (
- /* Load each 8-byte line from cpu-side source,
- * incrementing it by the stride each time.
- */
- "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v3.D}[1], [%[cpu]]\n"
- /* Store to the GPU in one shot, no interleave. */
- "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
- : [cpu] "+r"(cpu)
- : [gpu] "r"(gpu),
- [cpu_stride] "r"(cpu_stride)
- : "v0", "v1", "v2", "v3");
- } else {
- assert(gpu_stride == 16);
- void *cpu2 = cpu + 8;
- __asm__ volatile (
- /* Load each 16-byte line in 2 parts from the cpu-side
- * destination. (vld1 can only store one d-register
- * at a time).
- */
- "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n"
- "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n"
- "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
- "ld1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n"
- "ld1 {v3.D}[0], [%[cpu]]\n"
- "ld1 {v3.D}[1], [%[cpu2]]\n"
- /* Store to the GPU in one shot, no interleave. */
- "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
- : [cpu] "+r"(cpu),
- [cpu2] "+r"(cpu2)
- : [gpu] "r"(gpu),
- [cpu_stride] "r"(cpu_stride)
- : "v0", "v1", "v2", "v3");
- }
-#else
- for (uint32_t gpu_offset = 0; gpu_offset < 64; gpu_offset += gpu_stride) {
- memcpy(gpu + gpu_offset, cpu, gpu_stride);
- cpu += cpu_stride;
- }
-#endif
-
-}
/**
* Returns the X value into the address bits for LT tiling.
*
@@ -349,6 +139,7 @@ vc4_lt_image_aligned(void *gpu, uint32_t gpu_stride,
{
uint32_t utile_w = vc4_utile_width(cpp);
uint32_t utile_h = vc4_utile_height(cpp);
+ uint32_t utile_stride = vc4_utile_stride(cpp);
uint32_t xstart = box->x;
uint32_t ystart = box->y;
@@ -357,15 +148,17 @@ vc4_lt_image_aligned(void *gpu, uint32_t gpu_stride,
void *gpu_tile = gpu + ((ystart + y) * gpu_stride +
(xstart + x) * 64 / utile_w);
if (to_cpu) {
- vc4_load_utile(cpu + (cpu_stride * y +
+ v3d_load_utile(cpu + (cpu_stride * y +
x * cpp),
+ cpu_stride,
gpu_tile,
- cpu_stride, cpp);
+ utile_stride);
} else {
- vc4_store_utile(gpu_tile,
+ v3d_store_utile(gpu_tile,
+ utile_stride,
cpu + (cpu_stride * y +
x * cpp),
- cpu_stride, cpp);
+ cpu_stride);
}
}
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c b/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c
index 7ba66ae4c..9efec3799 100644
--- a/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c
+++ b/lib/mesa/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c
@@ -26,5 +26,5 @@
* single file.
*/
-#define VC4_BUILD_NEON
+#define V3D_BUILD_NEON
#include "vc4_tiling_lt.c"