summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/r600
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2021-07-22 10:17:30 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2021-07-22 10:17:30 +0000
commitca11beabae33eb59fb981b8adf50b1d47a2a98f0 (patch)
tree3e4691a396e6e54cd54224a190663d5cf976625b /lib/mesa/src/gallium/drivers/r600
parent27c8a50e8bbde7d28b1fc46d715a4c469e24f2c4 (diff)
Import Mesa 21.1.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers/r600')
-rw-r--r--lib/mesa/src/gallium/drivers/r600/Android.mk13
-rw-r--r--lib/mesa/src/gallium/drivers/r600/compute_memory_pool.c10
-rw-r--r--lib/mesa/src/gallium/drivers/r600/compute_memory_pool.h2
-rw-r--r--lib/mesa/src/gallium/drivers/r600/meson.build100
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c98
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_cs.h10
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_dump.c32
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_isa.c4
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_isa.h6
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c149
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h12
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_query.c56
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_streamout.c12
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_test_dma.c4
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_texture.c99
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_viewport.c10
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_uvd.c248
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_vce.c38
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_vce.h12
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_video.c24
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_video.h2
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp3
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp11
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp2
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h6
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp4
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp0
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h91
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py49
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp1063
30 files changed, 1673 insertions, 497 deletions
diff --git a/lib/mesa/src/gallium/drivers/r600/Android.mk b/lib/mesa/src/gallium/drivers/r600/Android.mk
index 19a3ba820..b87fc91e6 100644
--- a/lib/mesa/src/gallium/drivers/r600/Android.mk
+++ b/lib/mesa/src/gallium/drivers/r600/Android.mk
@@ -32,8 +32,10 @@ LOCAL_SRC_FILES := $(C_SOURCES) $(CXX_SOURCES)
LOCAL_C_INCLUDES += \
$(MESA_TOP)/src/amd/common \
- $(MESA_TOP)/src/amd/llvm
+ $(MESA_TOP)/src/amd/llvm \
+ $(MESA_TOP)/src/mesa
+LOCAL_STATIC_LIBRARIES := libmesa_nir
LOCAL_SHARED_LIBRARIES := libdrm_radeon
LOCAL_MODULE := libmesa_pipe_r600
@@ -47,6 +49,15 @@ $(intermediates)/egd_tables.h: $(MESA_TOP)/src/gallium/drivers/r600/egd_tables.p
@echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))"
$(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/gallium/drivers/r600/egd_tables.py $(MESA_TOP)/src/gallium/drivers/r600/evergreend.h > $@
+sfn_nir_algebraic_gen := $(LOCAL_PATH)/sfn/sfn_nir_algebraic.py
+sfn_nir_algebraic_deps := \
+ $(LOCAL_PATH)/sfn/sfn_nir_algebraic.py \
+ $(MESA_TOP)/src/compiler/nir/nir_algebraic.py
+
+$(intermediates)/sfn_nir_algebraic.c: $(sfn_nir_algebraic_deps)
+ @mkdir -p $(dir $@)
+ $(hide) $(MESA_PYTHON2) $(sfn_nir_algebraic_gen) -p $(MESA_TOP)/src/compiler/nir/ > $@
+
ifeq ($(MESA_ENABLE_LLVM),true)
$(call mesa-build-with-llvm)
endif
diff --git a/lib/mesa/src/gallium/drivers/r600/compute_memory_pool.c b/lib/mesa/src/gallium/drivers/r600/compute_memory_pool.c
index 685c2b6d2..58a5dffdf 100644
--- a/lib/mesa/src/gallium/drivers/r600/compute_memory_pool.c
+++ b/lib/mesa/src/gallium/drivers/r600/compute_memory_pool.c
@@ -436,7 +436,7 @@ static void compute_memory_move_item(struct compute_memory_pool *pool,
if (pool->item_list != item->link.prev) {
ASSERTED struct compute_memory_item *prev;
- prev = container_of(item->link.prev, item, link);
+ prev = container_of(item->link.prev, struct compute_memory_item, link);
assert(prev->start_in_dw + prev->size_in_dw <= new_start_in_dw);
}
@@ -479,7 +479,7 @@ static void compute_memory_move_item(struct compute_memory_pool *pool,
u_box_1d(new_start_in_dw * 4, (offset + item->size_in_dw) * 4, &box);
- map = pipe->transfer_map(pipe, src, 0, PIPE_TRANSFER_READ_WRITE,
+ map = pipe->transfer_map(pipe, src, 0, PIPE_MAP_READ_WRITE,
&box, &trans);
assert(map);
@@ -495,7 +495,7 @@ static void compute_memory_move_item(struct compute_memory_pool *pool,
}
/**
- * Frees the memory asociated to the item with id \a id from the pool.
+ * Frees the memory associated to the item with id \a id from the pool.
* \param id The id of the item to be freed.
*/
void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
@@ -614,7 +614,7 @@ static void compute_memory_transfer(
offset_in_chunk, size);
if (device_to_host) {
- map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
+ map = pipe->transfer_map(pipe, gart, 0, PIPE_MAP_READ,
&(struct pipe_box) { .width = aligned_size * 4,
.height = 1, .depth = 1 }, &xfer);
assert(xfer);
@@ -622,7 +622,7 @@ static void compute_memory_transfer(
memcpy(data, map + internal_offset, size);
pipe->transfer_unmap(pipe, xfer);
} else {
- map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
+ map = pipe->transfer_map(pipe, gart, 0, PIPE_MAP_WRITE,
&(struct pipe_box) { .width = aligned_size * 4,
.height = 1, .depth = 1 }, &xfer);
assert(xfer);
diff --git a/lib/mesa/src/gallium/drivers/r600/compute_memory_pool.h b/lib/mesa/src/gallium/drivers/r600/compute_memory_pool.h
index 2064e5635..3b9097627 100644
--- a/lib/mesa/src/gallium/drivers/r600/compute_memory_pool.h
+++ b/lib/mesa/src/gallium/drivers/r600/compute_memory_pool.h
@@ -47,7 +47,7 @@ struct compute_memory_item
int64_t start_in_dw;
int64_t size_in_dw; /**< Size of the chunk in dwords */
- /** Intermediate buffer asociated with an item. It is used mainly for mapping
+ /** Intermediate buffer associated with an item. It is used mainly for mapping
* items against it. They are listed in the pool's unallocated list */
struct r600_resource *real_buffer;
diff --git a/lib/mesa/src/gallium/drivers/r600/meson.build b/lib/mesa/src/gallium/drivers/r600/meson.build
index 91f62a8a7..424ac3ca0 100644
--- a/lib/mesa/src/gallium/drivers/r600/meson.build
+++ b/lib/mesa/src/gallium/drivers/r600/meson.build
@@ -105,7 +105,80 @@ files_r600 = files(
'sb/sb_shader.h',
'sb/sb_ssa_builder.cpp',
'sb/sb_valtable.cpp',
-)
+ 'sfn/sfn_alu_defines.cpp',
+ 'sfn/sfn_alu_defines.h',
+ 'sfn/sfn_callstack.cpp',
+ 'sfn/sfn_callstack.h',
+ 'sfn/sfn_conditionaljumptracker.cpp',
+ 'sfn/sfn_conditionaljumptracker.h',
+ 'sfn/sfn_defines.h',
+ 'sfn/sfn_debug.cpp',
+ 'sfn/sfn_debug.h',
+ 'sfn/sfn_emitaluinstruction.cpp',
+ 'sfn/sfn_emitaluinstruction.h',
+ 'sfn/sfn_emitinstruction.cpp',
+ 'sfn/sfn_emitinstruction.h',
+ 'sfn/sfn_emitssboinstruction.cpp',
+ 'sfn/sfn_emitssboinstruction.h',
+ 'sfn/sfn_emittexinstruction.cpp',
+ 'sfn/sfn_emittexinstruction.h',
+ 'sfn/sfn_emitinstruction.h',
+ 'sfn/sfn_instruction_alu.cpp',
+ 'sfn/sfn_instruction_alu.h',
+ 'sfn/sfn_instruction_base.cpp',
+ 'sfn/sfn_instruction_base.h',
+ 'sfn/sfn_instruction_block.cpp',
+ 'sfn/sfn_instruction_block.h',
+ 'sfn/sfn_instruction_cf.cpp',
+ 'sfn/sfn_instruction_cf.h',
+ 'sfn/sfn_instruction_export.cpp',
+ 'sfn/sfn_instruction_export.h',
+ 'sfn/sfn_instruction_fetch.cpp',
+ 'sfn/sfn_instruction_fetch.h',
+ 'sfn/sfn_instruction_gds.cpp',
+ 'sfn/sfn_instruction_gds.h',
+ 'sfn/sfn_instruction_lds.cpp',
+ 'sfn/sfn_instruction_lds.h',
+ 'sfn/sfn_instruction_misc.cpp',
+ 'sfn/sfn_instruction_misc.h',
+ 'sfn/sfn_instruction_tex.cpp',
+ 'sfn/sfn_instruction_tex.h',
+ 'sfn/sfn_ir_to_assembly.cpp',
+ 'sfn/sfn_ir_to_assembly.h',
+ 'sfn/sfn_liverange.cpp',
+ 'sfn/sfn_liverange.h',
+ 'sfn/sfn_nir.cpp',
+ 'sfn/sfn_nir.h',
+ 'sfn/sfn_nir_lower_64bit.cpp',
+ 'sfn/sfn_nir_lower_fs_out_to_vector.cpp',
+ 'sfn/sfn_nir_lower_fs_out_to_vector.h',
+ 'sfn/sfn_nir_lower_tess_io.cpp',
+ 'sfn/sfn_nir_vectorize_vs_inputs.c',
+ 'sfn/sfn_shader_base.cpp',
+ 'sfn/sfn_shader_base.h',
+ 'sfn/sfn_shader_compute.cpp',
+ 'sfn/sfn_shader_compute.h',
+ 'sfn/sfn_shader_fragment.cpp',
+ 'sfn/sfn_shader_fragment.h',
+ 'sfn/sfn_shader_geometry.cpp',
+ 'sfn/sfn_shader_geometry.h',
+ 'sfn/sfn_shader_tcs.cpp',
+ 'sfn/sfn_shader_tcs.h',
+ 'sfn/sfn_shader_tess_eval.cpp',
+ 'sfn/sfn_shader_tess_eval.h',
+ 'sfn/sfn_shader_vertex.cpp',
+ 'sfn/sfn_shader_vertex.h',
+ 'sfn/sfn_shaderio.cpp',
+ 'sfn/sfn_shaderio.h',
+ 'sfn/sfn_value.cpp',
+ 'sfn/sfn_value.h',
+ 'sfn/sfn_value_gpr.cpp',
+ 'sfn/sfn_value_gpr.h',
+ 'sfn/sfn_valuepool.cpp',
+ 'sfn/sfn_valuepool.h',
+ 'sfn/sfn_vertexstageexport.cpp',
+ 'sfn/sfn_vertexstageexport.h',
+ )
egd_tables_h = custom_target(
'egd_tables.h',
@@ -115,6 +188,19 @@ egd_tables_h = custom_target(
capture : true,
)
+sfn_nir_algebraic_c = custom_target(
+ 'sfn_nir_algebraic.c',
+ input : 'sfn/sfn_nir_algebraic.py',
+ output : 'sfn_nir_algebraic.c',
+ command : [
+ prog_python, '@INPUT@',
+ '-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
+ ],
+ capture : true,
+ depend_files : nir_algebraic_py,
+)
+
+
r600_c_args = []
if with_gallium_opencl
r600_c_args += '-DHAVE_OPENCL'
@@ -122,17 +208,17 @@ endif
libr600 = static_library(
'r600',
- [files_r600, egd_tables_h],
- c_args : [c_vis_args, r600_c_args, '-Wstrict-overflow=0'],
- cpp_args : [cpp_vis_args],
+ [files_r600, egd_tables_h, sfn_nir_algebraic_c],
+ c_args : [r600_c_args, '-Wstrict-overflow=0'],
+ gnu_symbol_visibility : 'hidden',
include_directories : [
- inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common,
+ inc_src, inc_mapi, inc_mesa, inc_include, inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common,
inc_gallium_drivers,
],
- dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm],
+ dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm, idep_nir, idep_nir_headers],
)
driver_r600 = declare_dependency(
compile_args : '-DGALLIUM_R600',
- link_with : [libr600, libradeonwinsys],
+ link_with : [libr600, libmesa_gallium, libradeonwinsys],
)
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c b/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c
index d0f44dcb6..7dbf7a1ba 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c
@@ -34,11 +34,11 @@ bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct pb_buffer *buf,
enum radeon_bo_usage usage)
{
- if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
+ if (ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs, buf, usage)) {
return true;
}
- if (radeon_emitted(ctx->dma.cs, 0) &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
+ if (radeon_emitted(&ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs, buf, usage)) {
return true;
}
return false;
@@ -53,19 +53,19 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
assert(!(resource->flags & RADEON_FLAG_SPARSE));
- if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
- return ctx->ws->buffer_map(resource->buf, NULL, usage);
+ if (usage & PIPE_MAP_UNSYNCHRONIZED) {
+ return ctx->ws->buffer_map(ctx->ws, resource->buf, NULL, usage);
}
- if (!(usage & PIPE_TRANSFER_WRITE)) {
+ if (!(usage & PIPE_MAP_WRITE)) {
/* have to wait for the last write */
rusage = RADEON_USAGE_WRITE;
}
- if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
+ if (radeon_emitted(&ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs,
resource->buf, rusage)) {
- if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ if (usage & PIPE_MAP_DONTBLOCK) {
ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
return NULL;
} else {
@@ -73,10 +73,10 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
busy = true;
}
}
- if (radeon_emitted(ctx->dma.cs, 0) &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
+ if (radeon_emitted(&ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs,
resource->buf, rusage)) {
- if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ if (usage & PIPE_MAP_DONTBLOCK) {
ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
return NULL;
} else {
@@ -85,20 +85,20 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
}
}
- if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
- if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ if (busy || !ctx->ws->buffer_wait(ctx->ws, resource->buf, 0, rusage)) {
+ if (usage & PIPE_MAP_DONTBLOCK) {
return NULL;
} else {
/* We will be wait for the GPU. Wait for any offloaded
* CS flush to complete to avoid busy-waiting in the winsys. */
- ctx->ws->cs_sync_flush(ctx->gfx.cs);
- if (ctx->dma.cs)
- ctx->ws->cs_sync_flush(ctx->dma.cs);
+ ctx->ws->cs_sync_flush(&ctx->gfx.cs);
+ if (ctx->dma.cs.priv)
+ ctx->ws->cs_sync_flush(&ctx->dma.cs);
}
}
/* Setting the CS to NULL will prevent doing checks we have done already. */
- return ctx->ws->buffer_map(resource->buf, NULL, usage);
+ return ctx->ws->buffer_map(ctx->ws, resource->buf, NULL, usage);
}
void r600_init_resource_fields(struct r600_common_screen *rscreen,
@@ -116,7 +116,7 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
switch (res->b.b.usage) {
case PIPE_USAGE_STREAM:
res->flags = RADEON_FLAG_GTT_WC;
- /* fall through */
+ FALLTHROUGH;
case PIPE_USAGE_STAGING:
/* Transfers are likely to occur more often with these
* resources. */
@@ -131,7 +131,7 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
res->flags |= RADEON_FLAG_GTT_WC;
break;
}
- /* fall through */
+ FALLTHROUGH;
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
default:
@@ -254,7 +254,7 @@ r600_invalidate_buffer(struct r600_common_context *rctx,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rctx->ws, rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
} else {
util_range_set_empty(&rbuffer->valid_buffer_range);
@@ -334,7 +334,7 @@ static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
return rctx->screen->has_cp_dma ||
- (dword_aligned && (rctx->dma.cs ||
+ (dword_aligned && (rctx->dma.cs.priv ||
rctx->screen->has_streamout));
}
@@ -365,51 +365,51 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
* So don't ever use staging buffers.
*/
if (rbuffer->b.is_user_ptr)
- usage |= PIPE_TRANSFER_PERSISTENT;
+ usage |= PIPE_MAP_PERSISTENT;
/* See if the buffer range being mapped has never been initialized,
* in which case it can be mapped unsynchronized. */
- if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ if (!(usage & (PIPE_MAP_UNSYNCHRONIZED |
TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
- usage & PIPE_TRANSFER_WRITE &&
+ usage & PIPE_MAP_WRITE &&
!rbuffer->b.is_shared &&
!util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ usage |= PIPE_MAP_UNSYNCHRONIZED;
}
/* If discarding the entire range, discard the whole resource instead. */
- if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
+ if (usage & PIPE_MAP_DISCARD_RANGE &&
box->x == 0 && box->width == resource->width0) {
- usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+ usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
}
- if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
- !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE &&
+ !(usage & (PIPE_MAP_UNSYNCHRONIZED |
TC_TRANSFER_MAP_NO_INVALIDATE))) {
- assert(usage & PIPE_TRANSFER_WRITE);
+ assert(usage & PIPE_MAP_WRITE);
if (r600_invalidate_buffer(rctx, rbuffer)) {
/* At this point, the buffer is always idle. */
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ usage |= PIPE_MAP_UNSYNCHRONIZED;
} else {
/* Fall back to a temporary buffer. */
- usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ usage |= PIPE_MAP_DISCARD_RANGE;
}
}
- if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+ if ((usage & PIPE_MAP_DISCARD_RANGE) &&
!(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
- ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_PERSISTENT)) &&
+ ((!(usage & (PIPE_MAP_UNSYNCHRONIZED |
+ PIPE_MAP_PERSISTENT)) &&
r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
(rbuffer->flags & RADEON_FLAG_SPARSE))) {
- assert(usage & PIPE_TRANSFER_WRITE);
+ assert(usage & PIPE_MAP_WRITE);
/* Check if mapping this buffer would cause waiting for the GPU.
*/
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rctx->ws, rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
struct r600_resource *staging = NULL;
@@ -429,12 +429,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
}
} else {
/* At this point, the buffer is always idle (we checked it above). */
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ usage |= PIPE_MAP_UNSYNCHRONIZED;
}
}
/* Use a staging buffer in cached GTT for reads. */
- else if (((usage & PIPE_TRANSFER_READ) &&
- !(usage & PIPE_TRANSFER_PERSISTENT) &&
+ else if (((usage & PIPE_MAP_READ) &&
+ !(usage & PIPE_MAP_PERSISTENT) &&
(rbuffer->domains & RADEON_DOMAIN_VRAM ||
rbuffer->flags & RADEON_FLAG_GTT_WC) &&
r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) ||
@@ -452,7 +452,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
0, 0, resource, 0, box);
data = r600_buffer_map_sync_with_rings(rctx, staging,
- usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
+ usage & ~PIPE_MAP_UNSYNCHRONIZED);
if (!data) {
r600_resource_reference(&staging, NULL);
return NULL;
@@ -506,8 +506,8 @@ static void r600_buffer_flush_region(struct pipe_context *ctx,
struct pipe_transfer *transfer,
const struct pipe_box *rel_box)
{
- unsigned required_usage = PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_FLUSH_EXPLICIT;
+ unsigned required_usage = PIPE_MAP_WRITE |
+ PIPE_MAP_FLUSH_EXPLICIT;
if ((transfer->usage & required_usage) == required_usage) {
struct pipe_box box;
@@ -523,8 +523,8 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- if (transfer->usage & PIPE_TRANSFER_WRITE &&
- !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ if (transfer->usage & PIPE_MAP_WRITE &&
+ !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
r600_resource_reference(&rtransfer->staging, NULL);
@@ -545,10 +545,10 @@ void r600_buffer_subdata(struct pipe_context *ctx,
struct pipe_box box;
uint8_t *map = NULL;
- usage |= PIPE_TRANSFER_WRITE;
+ usage |= PIPE_MAP_WRITE;
- if (!(usage & PIPE_TRANSFER_MAP_DIRECTLY))
- usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ if (!(usage & PIPE_MAP_DIRECTLY))
+ usage |= PIPE_MAP_DISCARD_RANGE;
u_box_1d(offset, size, &box);
map = r600_buffer_transfer_map(ctx, buffer, 0, usage, &box, &transfer);
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_cs.h b/lib/mesa/src/gallium/drivers/r600/r600_cs.h
index 424adba27..71e606b9b 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_cs.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_cs.h
@@ -45,8 +45,8 @@ radeon_cs_memory_below_limit(struct r600_common_screen *screen,
struct radeon_cmdbuf *cs,
uint64_t vram, uint64_t gtt)
{
- vram += cs->used_vram;
- gtt += cs->used_gart;
+ vram += (uint64_t)cs->used_vram_kb * 1024;
+ gtt += (uint64_t)cs->used_gart_kb * 1024;
/* Anything that goes above the VRAM size should go to GTT. */
if (vram > screen->info.vram_size)
@@ -74,7 +74,7 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
{
assert(usage);
return rctx->ws->cs_add_buffer(
- ring->cs, rbo->buf,
+ &ring->cs, rbo->buf,
(enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
rbo->domains, priority) * 4;
}
@@ -105,7 +105,7 @@ radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
bool check_mem)
{
if (check_mem &&
- !radeon_cs_memory_below_limit(rctx->screen, ring->cs,
+ !radeon_cs_memory_below_limit(rctx->screen, &ring->cs,
rctx->vram + rbo->vram_usage,
rctx->gtt + rbo->gart_usage))
ring->flush(rctx, PIPE_FLUSH_ASYNC, NULL);
@@ -118,7 +118,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority)
{
- struct radeon_cmdbuf *cs = ring->cs;
+ struct radeon_cmdbuf *cs = &ring->cs;
bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_has_virtual_memory;
unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_dump.c b/lib/mesa/src/gallium/drivers/r600/r600_dump.c
index 29a89605e..76b56bc7d 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_dump.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_dump.c
@@ -26,6 +26,7 @@
#include "r600_dump.h"
#include "r600_shader.h"
+#include "tgsi/tgsi_strings.h"
void print_shader_info(FILE *f , int id, struct r600_shader *shader)
{
@@ -165,4 +166,35 @@ void print_pipe_info(FILE *f, struct tgsi_shader_info *shader)
PRINT_UINT_MEMBER(writes_memory);
PRINT_UINT_MEMBER(file_mask[TGSI_FILE_HW_ATOMIC]);
PRINT_UINT_MEMBER(file_count[TGSI_FILE_HW_ATOMIC]);
+
+ for(unsigned int i = 0; i < TGSI_PROPERTY_COUNT; ++i) {
+ if (shader->properties[i] != 0)
+ fprintf(stderr, "PROP: %s = %d\n", tgsi_property_names[i], shader->properties[i]);
+ }
+
+#define PRINT_UINT_ARRAY_MEMBER(M, IDX) \
+ if (shader-> M [ IDX ]) fprintf(f, #M "[%d] = %d\n", IDX, (unsigned) shader-> M [ IDX ]);
+
+ for (int i = 0; i < shader->num_inputs; ++i) {
+ PRINT_UINT_ARRAY_MEMBER(input_semantic_name, i); /**< TGSI_SEMANTIC_x */
+ PRINT_UINT_ARRAY_MEMBER(input_semantic_index, i);
+ PRINT_UINT_ARRAY_MEMBER(input_interpolate, i);
+ PRINT_UINT_ARRAY_MEMBER(input_interpolate_loc, i);
+ PRINT_UINT_ARRAY_MEMBER(input_usage_mask, i);
+ PRINT_UINT_ARRAY_MEMBER(input_cylindrical_wrap, i);
+ }
+
+ for (int i = 0; i < shader->num_inputs; ++i) {
+ PRINT_UINT_ARRAY_MEMBER(output_semantic_name, i);
+ PRINT_UINT_ARRAY_MEMBER(output_semantic_index, i);
+ PRINT_UINT_ARRAY_MEMBER(output_usagemask, i);
+ PRINT_UINT_ARRAY_MEMBER(output_streams, i);
+ }
+
+ for (int i = 0; i < shader->num_system_values; ++i)
+ PRINT_UINT_ARRAY_MEMBER(system_value_semantic_name, i);
+
+ PRINT_UINT_MEMBER(reads_pervertex_outputs);
+ PRINT_UINT_MEMBER(reads_perpatch_outputs);
+ PRINT_UINT_MEMBER(reads_tessfactor_outputs);
}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_isa.c b/lib/mesa/src/gallium/drivers/r600/r600_isa.c
index 57b0e044f..0a5c4dac1 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_isa.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_isa.c
@@ -194,8 +194,8 @@ const struct alu_op_info r600_alu_op_table[] = {
{"MULADD_IEEE_PREV", 2, { -1, 0xD5 },{ 0, 0, AF_V, AF_V}, AF_PREV_INTERLEAVE | AF_IEEE },
{"INTERP_XY", 2, { -1, 0xD6 },{ 0, 0, AF_4V, AF_4V}, AF_INTERP },
{"INTERP_ZW", 2, { -1, 0xD7 },{ 0, 0, AF_4V, AF_4V}, AF_INTERP },
- {"INTERP_X", 2, { -1, 0xD8 },{ 0, 0, AF_V, AF_V}, AF_INTERP },
- {"INTERP_Z", 2, { -1, 0xD9 },{ 0, 0, AF_V, AF_V}, AF_INTERP },
+ {"INTERP_X", 2, { -1, 0xD8 },{ 0, 0, AF_2V, AF_2V}, AF_INTERP },
+ {"INTERP_Z", 2, { -1, 0xD9 },{ 0, 0, AF_2V, AF_2V}, AF_INTERP },
{"STORE_FLAGS", 1, { -1, 0xDA },{ 0, 0, AF_V, AF_V}, 0 },
{"LOAD_STORE_FLAGS", 1, { -1, 0xDB },{ 0, 0, AF_V, AF_V}, 0 },
{"LDS_1A", 2, { -1, 0xDC },{ 0, 0, AF_V, AF_V}, 0 },
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_isa.h b/lib/mesa/src/gallium/drivers/r600/r600_isa.h
index fcaf1f766..1c098fbb1 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_isa.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_isa.h
@@ -36,6 +36,7 @@ extern "C" {
/* ALU flags */
enum alu_op_flags
{
+ AF_NONE = 0,
AF_V = (1<<0), /* allowed in vector slots */
/* allowed in scalar(trans) slot (slots xyz on cayman, may be replicated
@@ -46,6 +47,9 @@ enum alu_op_flags
AF_4V = (AF_V | AF_4SLOT),
AF_VS = (AF_V | AF_S), /* allowed in any slot */
+ AF_2SLOT = (1 << 3),
+ AF_2V = AF_V | AF_2SLOT, /* XY or ZW */
+
AF_KILL = (1<<4),
AF_PRED = (1<<5),
AF_SET = (1<<6),
@@ -54,6 +58,7 @@ enum alu_op_flags
AF_PREV_INTERLEAVE = (1<<7),
AF_MOVA = (1<<8), /* all MOVA instructions */
+
AF_IEEE = (1<<10),
AF_DST_TYPE_MASK = (3<<11),
@@ -106,6 +111,7 @@ enum alu_op_flags
/* condition codes - 3 bits */
AF_CC_SHIFT = 29,
+
AF_CC_MASK = (7U << AF_CC_SHIFT),
AF_CC_E = (0U << AF_CC_SHIFT),
AF_CC_GT = (1U << AF_CC_SHIFT),
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c
index 91607ca71..fe3d0c616 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c
@@ -38,6 +38,7 @@
#include "radeon_video.h"
#include <inttypes.h>
#include <sys/utsname.h>
+#include <stdlib.h>
#ifdef LLVM_AVAILABLE
#include <llvm-c/TargetMachine.h>
@@ -76,7 +77,7 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx,
struct r600_resource *buf, uint64_t va,
uint32_t new_fence, unsigned query_type)
{
- struct radeon_cmdbuf *cs = ctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &ctx->gfx.cs;
unsigned op = EVENT_TYPE(event) |
EVENT_INDEX(5) |
event_flags;
@@ -108,7 +109,7 @@ void r600_gfx_wait_fence(struct r600_common_context *ctx,
struct r600_resource *buf,
uint64_t va, uint32_t ref, uint32_t mask)
{
- struct radeon_cmdbuf *cs = ctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &ctx->gfx.cs;
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
@@ -205,7 +206,7 @@ void r600_draw_rectangle(struct blitter_context *blitter,
vbuffer.stride = 2 * 4 * sizeof(float); /* vertex size */
vbuffer.buffer_offset = offset;
- rctx->b.set_vertex_buffers(&rctx->b, blitter->vb_slot, 1, &vbuffer);
+ rctx->b.set_vertex_buffers(&rctx->b, blitter->vb_slot, 1, 0, false, &vbuffer);
util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3,
0, num_instances);
pipe_resource_reference(&buf, NULL);
@@ -213,7 +214,7 @@ void r600_draw_rectangle(struct blitter_context *blitter,
static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->dma.cs;
+ struct radeon_cmdbuf *cs = &rctx->dma.cs;
if (rctx->chip_class >= EVERGREEN)
radeon_emit(cs, 0xf0000000); /* NOP */
@@ -226,8 +227,8 @@ static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src)
{
- uint64_t vram = ctx->dma.cs->used_vram;
- uint64_t gtt = ctx->dma.cs->used_gart;
+ uint64_t vram = (uint64_t)ctx->dma.cs.used_vram_kb * 1024;
+ uint64_t gtt = (uint64_t)ctx->dma.cs.used_gart_kb * 1024;
if (dst) {
vram += dst->vram_usage;
@@ -239,12 +240,12 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
}
/* Flush the GFX IB if DMA depends on it. */
- if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ if (radeon_emitted(&ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
((dst &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
+ ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs, dst->buf,
RADEON_USAGE_READWRITE)) ||
(src &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
+ ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs, src->buf,
RADEON_USAGE_WRITE))))
ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
@@ -261,21 +262,21 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
* engine busy while uploads are being submitted.
*/
num_dw++; /* for emit_wait_idle below */
- if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw, false) ||
- ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
- !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
+ if (!ctx->ws->cs_check_space(&ctx->dma.cs, num_dw, false) ||
+ ctx->dma.cs.used_vram_kb + ctx->dma.cs.used_gart_kb > 64 * 1024 ||
+ !radeon_cs_memory_below_limit(ctx->screen, &ctx->dma.cs, vram, gtt)) {
ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
- assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
+ assert((num_dw + ctx->dma.cs.current.cdw) <= ctx->dma.cs.current.max_dw);
}
/* Wait for idle if either buffer has been used in the IB before to
* prevent read-after-write hazards.
*/
if ((dst &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
+ ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs, dst->buf,
RADEON_USAGE_READWRITE)) ||
(src &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
+ ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs, src->buf,
RADEON_USAGE_WRITE)))
r600_dma_emit_wait_idle(ctx);
@@ -344,22 +345,22 @@ static void r600_flush_from_st(struct pipe_context *ctx,
rflags |= PIPE_FLUSH_END_OF_FRAME;
/* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
- if (rctx->dma.cs)
+ if (rctx->dma.cs.priv)
rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
- if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
+ if (!radeon_emitted(&rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
if (fence)
ws->fence_reference(&gfx_fence, rctx->last_gfx_fence);
if (!(flags & PIPE_FLUSH_DEFERRED))
- ws->cs_sync_flush(rctx->gfx.cs);
+ ws->cs_sync_flush(&rctx->gfx.cs);
} else {
/* Instead of flushing, create a deferred fence. Constraints:
- * - The state tracker must allow a deferred flush.
- * - The state tracker must request a fence.
- * Thread safety in fence_finish must be ensured by the state tracker.
+ * - the gallium frontend must allow a deferred flush.
+ * - the gallium frontend must request a fence.
+ * Thread safety in fence_finish must be ensured by the gallium frontend.
*/
if (flags & PIPE_FLUSH_DEFERRED && fence) {
- gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
+ gfx_fence = rctx->ws->cs_get_next_fence(&rctx->gfx.cs);
deferred_fence = true;
} else {
rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
@@ -391,9 +392,9 @@ static void r600_flush_from_st(struct pipe_context *ctx,
}
finish:
if (!(flags & PIPE_FLUSH_DEFERRED)) {
- if (rctx->dma.cs)
- ws->cs_sync_flush(rctx->dma.cs);
- ws->cs_sync_flush(rctx->gfx.cs);
+ if (rctx->dma.cs.priv)
+ ws->cs_sync_flush(&rctx->dma.cs);
+ ws->cs_sync_flush(&rctx->gfx.cs);
}
}
@@ -401,7 +402,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
struct pipe_fence_handle **fence)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct radeon_cmdbuf *cs = rctx->dma.cs;
+ struct radeon_cmdbuf *cs = &rctx->dma.cs;
struct radeon_saved_cs saved;
bool check_vm =
(rctx->screen->debug_flags & DBG_CHECK_VM) &&
@@ -486,7 +487,7 @@ static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- return rctx->ws->ctx_query_reset_status(rctx->ctx);
+ return rctx->ws->ctx_query_reset_status(rctx->ctx, false, NULL);
}
static void r600_set_debug_callback(struct pipe_context *ctx,
@@ -555,23 +556,23 @@ static bool r600_resource_commit(struct pipe_context *pctx,
* (b) wait for threaded submit to finish, including those that were
* triggered by some other, earlier operation.
*/
- if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
+ if (radeon_emitted(&ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs,
res->buf, RADEON_USAGE_READWRITE)) {
ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
}
- if (radeon_emitted(ctx->dma.cs, 0) &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
+ if (radeon_emitted(&ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs,
res->buf, RADEON_USAGE_READWRITE)) {
ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
}
- ctx->ws->cs_sync_flush(ctx->dma.cs);
- ctx->ws->cs_sync_flush(ctx->gfx.cs);
+ ctx->ws->cs_sync_flush(&ctx->dma.cs);
+ ctx->ws->cs_sync_flush(&ctx->gfx.cs);
assert(resource->target == PIPE_BUFFER);
- return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
+ return ctx->ws->buffer_commit(ctx->ws, res->buf, box->x, box->width, commit);
}
bool r600_common_context_init(struct r600_common_context *rctx,
@@ -615,11 +616,8 @@ bool r600_common_context_init(struct r600_common_context *rctx,
r600_query_init(rctx);
cayman_init_msaa(&rctx->b);
- rctx->allocator_zeroed_memory =
- u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
- 0, PIPE_USAGE_DEFAULT, 0, true);
- if (!rctx->allocator_zeroed_memory)
- return false;
+ u_suballocator_init(&rctx->allocator_zeroed_memory, &rctx->b, rscreen->info.gart_page_size,
+ 0, PIPE_USAGE_DEFAULT, 0, true);
rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
0, PIPE_USAGE_STREAM, 0);
@@ -636,9 +634,8 @@ bool r600_common_context_init(struct r600_common_context *rctx,
return false;
if (rscreen->info.num_rings[RING_DMA] && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
- rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
- r600_flush_dma_ring,
- rctx, false);
+ rctx->ws->cs_create(&rctx->dma.cs, rctx->ctx, RING_DMA,
+ r600_flush_dma_ring, rctx, false);
rctx->dma.flush = r600_flush_dma_ring;
}
@@ -650,10 +647,8 @@ void r600_common_context_cleanup(struct r600_common_context *rctx)
if (rctx->query_result_shader)
rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
- if (rctx->gfx.cs)
- rctx->ws->cs_destroy(rctx->gfx.cs);
- if (rctx->dma.cs)
- rctx->ws->cs_destroy(rctx->dma.cs);
+ rctx->ws->cs_destroy(&rctx->gfx.cs);
+ rctx->ws->cs_destroy(&rctx->dma.cs);
if (rctx->ctx)
rctx->ws->ctx_destroy(rctx->ctx);
@@ -665,9 +660,7 @@ void r600_common_context_cleanup(struct r600_common_context *rctx)
slab_destroy_child(&rctx->pool_transfers);
slab_destroy_child(&rctx->pool_transfers_unsync);
- if (rctx->allocator_zeroed_memory) {
- u_suballocator_destroy(rctx->allocator_zeroed_memory);
- }
+ u_suballocator_destroy(&rctx->allocator_zeroed_memory);
rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
r600_resource_reference(&rctx->eop_bug_scratch, NULL);
@@ -810,8 +803,6 @@ static const char* r600_get_name(struct pipe_screen* pscreen)
static float r600_get_paramf(struct pipe_screen* pscreen,
enum pipe_capf param)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen;
-
switch (param) {
case PIPE_CAPF_MAX_LINE_WIDTH:
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
@@ -909,7 +900,8 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
enum pipe_shader_ir ir_type)
{
- if (ir_type != PIPE_SHADER_IR_TGSI)
+ if (ir_type != PIPE_SHADER_IR_TGSI &&
+ ir_type != PIPE_SHADER_IR_NIR)
return 256;
if (screen->chip_class >= EVERGREEN)
return 1024;
@@ -1177,6 +1169,18 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
}
}
+static const void *
+r600_get_compiler_options(struct pipe_screen *screen,
+ enum pipe_shader_ir ir,
+ enum pipe_shader_type shader)
+{
+ assert(ir == PIPE_SHADER_IR_NIR);
+
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+
+ return &rscreen->nir_options;
+}
+
bool r600_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws)
{
@@ -1184,7 +1188,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
struct utsname uname_data;
const char *chip_name;
- ws->query_info(ws, &rscreen->info);
+ ws->query_info(ws, &rscreen->info, false, false);
rscreen->ws = ws;
chip_name = r600_get_family_name(rscreen);
@@ -1210,6 +1214,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
rscreen->b.get_timestamp = r600_get_timestamp;
+ rscreen->b.get_compiler_options = r600_get_compiler_options;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
@@ -1284,17 +1289,51 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
printf("max_se = %i\n", rscreen->info.max_se);
- printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
+ printf("max_sh_per_se = %i\n", rscreen->info.max_sa_per_se);
printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
- printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
+ printf("num_render_backends = %i\n", rscreen->info.max_render_backends);
printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
}
+
+ const struct nir_shader_compiler_options nir_options = {
+ .fuse_ffma16 = true,
+ .fuse_ffma32 = true,
+ .fuse_ffma64 = true,
+ .lower_flrp32 = true,
+ .lower_flrp64 = true,
+ .lower_fpow = true,
+ .lower_fdiv = true,
+ .lower_isign = true,
+ .lower_fsign = true,
+ .lower_fmod = true,
+ .lower_doubles_options = nir_lower_fp64_full_software,
+ .lower_int64_options = ~0,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_rotate = true,
+ .max_unroll_iterations = 32,
+ .lower_interpolate_at = true,
+ .vectorize_io = true,
+ .has_umad24 = true,
+ .has_umul24 = true,
+ .use_interpolated_input_intrinsics = true,
+ .has_fsub = true,
+ .has_isub = true,
+ .lower_iabs = true,
+ .lower_bitfield_extract = true,
+ .lower_bitfield_insert_to_bitfield_select = true,
+ .has_fused_comp_and_csel = true,
+ .lower_find_msb_to_reverse = true,
+ };
+
+ rscreen->nir_options = nir_options;
+
return true;
}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h
index 2ecd03845..b55a27d63 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h
@@ -45,6 +45,8 @@
#include "util/u_transfer.h"
#include "util/u_threaded_context.h"
+#include "compiler/nir/nir.h"
+
struct u_log_context;
#define ATI_VENDOR_ID 0x1002
@@ -289,7 +291,7 @@ struct r600_mmio_counter {
};
union r600_mmio_counters {
- struct {
+ struct r600_mmio_counters_named {
/* For global GPU load including SDMA. */
struct r600_mmio_counter gpu;
@@ -320,7 +322,7 @@ union r600_mmio_counters {
struct r600_mmio_counter cp_dma;
struct r600_mmio_counter scratch_ram;
} named;
- unsigned array[0];
+ unsigned array[sizeof(struct r600_mmio_counters_named) / sizeof(unsigned)];
};
struct r600_memory_object {
@@ -404,6 +406,8 @@ struct r600_common_screen {
*/
unsigned compute_to_L2;
} barrier_flags;
+
+ struct nir_shader_compiler_options nir_options;
};
/* This encapsulates a state or an operation which can emitted into the GPU
@@ -474,7 +478,7 @@ struct r600_viewports {
};
struct r600_ring {
- struct radeon_cmdbuf *cs;
+ struct radeon_cmdbuf cs;
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence);
};
@@ -508,7 +512,7 @@ struct r600_common_context {
unsigned last_num_draw_calls;
struct threaded_context *tc;
- struct u_suballocator *allocator_zeroed_memory;
+ struct u_suballocator allocator_zeroed_memory;
struct slab_child_pool pool_transfers;
struct slab_child_pool pool_transfers_unsync; /* for threaded_context */
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_query.c b/lib/mesa/src/gallium/drivers/r600/r600_query.c
index 4ef7bc8ca..60e691f9e 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_query.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_query.c
@@ -430,7 +430,7 @@ static bool r600_query_sw_get_result(struct r600_common_context *rctx,
result->u32 = rctx->screen->info.num_good_compute_units;
return true;
case R600_QUERY_GPIN_NUM_RB:
- result->u32 = rctx->screen->info.num_render_backends;
+ result->u32 = rctx->screen->info.max_render_backends;
return true;
case R600_QUERY_GPIN_NUM_SPI:
result->u32 = 1; /* all supported chips have one SPI per SE */
@@ -526,9 +526,9 @@ static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
struct r600_resource *buffer)
{
/* Callers ensure that the buffer is currently unused by the GPU. */
- uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL,
- PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
+ uint32_t *results = rscreen->ws->buffer_map(rscreen->ws, buffer->buf, NULL,
+ PIPE_MAP_WRITE |
+ PIPE_MAP_UNSYNCHRONIZED);
if (!results)
return false;
@@ -537,7 +537,7 @@ static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
- unsigned max_rbs = rscreen->info.num_render_backends;
+ unsigned max_rbs = rscreen->info.max_render_backends;
unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
unsigned num_results;
unsigned i, j;
@@ -622,7 +622,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- query->result_size = 16 * rscreen->info.num_render_backends;
+ query->result_size = 16 * rscreen->info.max_render_backends;
query->result_size += 16; /* for the fence + alignment */
query->num_cs_dw_begin = 6;
query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
@@ -728,7 +728,7 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
struct r600_resource *buffer,
uint64_t va)
{
- struct radeon_cmdbuf *cs = ctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &ctx->gfx.cs;
switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -808,7 +808,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
struct r600_resource *buffer,
uint64_t va)
{
- struct radeon_cmdbuf *cs = ctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &ctx->gfx.cs;
uint64_t fence_va = 0;
switch (query->b.type) {
@@ -821,7 +821,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- fence_va = va + ctx->screen->info.num_render_backends * 16 - 8;
+ fence_va = va + ctx->screen->info.max_render_backends * 16 - 8;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -837,7 +837,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
break;
case PIPE_QUERY_TIME_ELAPSED:
va += 8;
- /* fall through */
+ FALLTHROUGH;
case PIPE_QUERY_TIMESTAMP:
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
@@ -900,7 +900,7 @@ static void emit_set_predicate(struct r600_common_context *ctx,
struct r600_resource *buf, uint64_t va,
uint32_t op)
{
- struct radeon_cmdbuf *cs = ctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &ctx->gfx.cs;
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
radeon_emit(cs, va);
@@ -1021,7 +1021,7 @@ void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
/* Obtain a new buffer if the current one can't be mapped without a stall. */
if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rctx->ws, query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
r600_resource_reference(&query->buffer.buf, NULL);
query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
} else {
@@ -1082,7 +1082,7 @@ static void r600_get_hw_query_params(struct r600_common_context *rctx,
struct r600_query_hw *rquery, int index,
struct r600_hw_query_params *params)
{
- unsigned max_rbs = rctx->screen->info.num_render_backends;
+ unsigned max_rbs = rctx->screen->info.max_render_backends;
params->pair_stride = 0;
params->pair_count = 1;
@@ -1125,6 +1125,7 @@ static void r600_get_hw_query_params(struct r600_common_context *rctx,
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
params->pair_count = R600_MAX_STREAMS;
params->pair_stride = 32;
+ FALLTHROUGH;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
params->start_offset = 0;
params->end_offset = 16;
@@ -1172,7 +1173,7 @@ static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
void *buffer,
union pipe_query_result *result)
{
- unsigned max_rbs = rscreen->info.num_render_backends;
+ unsigned max_rbs = rscreen->info.max_render_backends;
switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER: {
@@ -1336,13 +1337,13 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
query->ops->clear_result(query, result);
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
- unsigned usage = PIPE_TRANSFER_READ |
- (wait ? 0 : PIPE_TRANSFER_DONTBLOCK);
+ unsigned usage = PIPE_MAP_READ |
+ (wait ? 0 : PIPE_MAP_DONTBLOCK);
unsigned results_base = 0;
void *map;
if (rquery->b.flushed)
- map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
+ map = rctx->ws->buffer_map(rctx->ws, qbuf->buf->buf, NULL, usage);
else
map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
@@ -1590,10 +1591,7 @@ static void r600_restore_qbo_state(struct r600_common_context *rctx,
struct r600_qbo_state *st)
{
rctx->b.bind_compute_state(&rctx->b, st->saved_compute);
-
- rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
- pipe_resource_reference(&st->saved_const0.buffer, NULL);
-
+ rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0);
rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo, ~0);
for (unsigned i = 0; i < 3; ++i)
pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
@@ -1636,7 +1634,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
}
if (query->buffer.previous) {
- u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 256,
+ u_suballocator_alloc(&rctx->allocator_zeroed_memory, 16, 256,
&tmp_buffer_offset, &tmp_buffer);
if (!tmp_buffer)
return;
@@ -1726,7 +1724,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
} else
consts.buffer_offset = 0;
- rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
+ rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer);
rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo, ~0);
@@ -1833,7 +1831,7 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
{
struct r600_common_context *ctx =
(struct r600_common_context*)rscreen->aux_context;
- struct radeon_cmdbuf *cs = ctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &ctx->gfx.cs;
struct r600_resource *buffer;
uint32_t *results;
unsigned i, mask = 0;
@@ -1847,9 +1845,9 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
* written to. By increasing this number we'll write the
* status bit for these as per the normal disabled rb logic.
*/
- ctx->screen->info.num_render_backends = 8;
+ ctx->screen->info.max_render_backends = 8;
}
- max_rbs = ctx->screen->info.num_render_backends;
+ max_rbs = ctx->screen->info.max_render_backends;
assert(rscreen->chip_class <= CAYMAN);
@@ -1895,7 +1893,7 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
return;
/* initialize buffer with zeroes */
- results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
+ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_MAP_WRITE);
if (results) {
memset(results, 0, max_rbs * 4 * 4);
@@ -1909,7 +1907,7 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
/* analyze results */
- results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
+ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_MAP_READ);
if (results) {
for(i = 0; i < max_rbs; i++) {
/* at least highest bit will be set if backend is used */
@@ -2122,7 +2120,7 @@ void r600_query_init(struct r600_common_context *rctx)
rctx->b.get_query_result_resource = r600_get_query_result_resource;
rctx->render_cond_atom.emit = r600_emit_query_predication;
- if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
+ if (((struct r600_common_screen*)rctx->b.screen)->info.max_render_backends > 0)
rctx->b.render_condition = r600_render_condition;
list_inithead(&rctx->active_queries);
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_streamout.c b/lib/mesa/src/gallium/drivers/r600/r600_streamout.c
index f925c07b2..f45561d29 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_streamout.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_streamout.c
@@ -51,7 +51,7 @@ r600_create_so_target(struct pipe_context *ctx,
return NULL;
}
- u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
+ u_suballocator_alloc(&rctx->allocator_zeroed_memory, 4, 4,
&t->buf_filled_size_offset,
(struct pipe_resource**)&t->buf_filled_size);
if (!t->buf_filled_size) {
@@ -154,7 +154,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->gfx.cs;
unsigned reg_strmout_cntl;
/* The register is at different places on different ASICs. */
@@ -180,7 +180,7 @@ static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
uint16_t *stride_in_dw = rctx->streamout.stride_in_dw;
unsigned i, update_flags = 0;
@@ -253,7 +253,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
void r600_emit_streamout_end(struct r600_common_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
unsigned i;
uint64_t va;
@@ -315,8 +315,8 @@ static void r600_emit_streamout_enable(struct r600_common_context *rctx,
S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
}
- radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
- radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val);
+ radeon_set_context_reg(&rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+ radeon_set_context_reg(&rctx->gfx.cs, strmout_config_reg, strmout_config_val);
}
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c b/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c
index 512e77420..e8e54fb99 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c
@@ -59,7 +59,7 @@ static void set_random_pixels(struct pipe_context *ctx,
uint8_t *map;
unsigned x,y,z;
- map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE,
+ map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_MAP_WRITE,
0, 0, 0, tex->width0, tex->height0,
tex->array_size, &t);
assert(map);
@@ -94,7 +94,7 @@ static bool compare_textures(struct pipe_context *ctx,
int y,z;
bool pass = true;
- map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ,
+ map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_MAP_READ,
0, 0, 0, tex->width0, tex->height0,
tex->array_size, &t);
assert(map);
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_texture.c b/lib/mesa/src/gallium/drivers/r600/r600_texture.c
index 518e92d9f..c910bd08b 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_texture.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_texture.c
@@ -33,7 +33,7 @@
#include "util/u_pack_color.h"
#include "util/u_surface.h"
#include "util/os_time.h"
-#include "state_tracker/winsys_handle.h"
+#include "frontend/winsys_handle.h"
#include <errno.h>
#include <inttypes.h>
@@ -52,7 +52,7 @@ bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
unsigned src_level,
const struct pipe_box *src_box)
{
- if (!rctx->dma.cs)
+ if (!rctx->dma.cs.priv)
return false;
if (rdst->surface.bpe != rsrc->surface.bpe)
@@ -183,11 +183,11 @@ static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen,
*layer_stride = (uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4;
if (!box)
- return rtex->surface.u.legacy.level[level].offset;
+ return (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
/* Each texture is an array of mipmap levels. Each level is
* an array of slices. */
- return rtex->surface.u.legacy.level[level].offset +
+ return (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256 +
box->z * (uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4 +
(box->y / rtex->surface.blk_h *
rtex->surface.u.legacy.level[level].nblk_x +
@@ -243,8 +243,6 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
flags |= RADEON_SURF_SHAREABLE;
if (is_imported)
flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
- if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
- flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
r = rscreen->ws->surface_init(rscreen->ws, ptex,
flags, bpe, array_mode, surface);
@@ -264,7 +262,7 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
if (offset) {
for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
- surface->u.legacy.level[i].offset += offset;
+ surface->u.legacy.level[i].offset_256B += offset / 256;
}
return 0;
@@ -457,7 +455,7 @@ static void r600_texture_get_info(struct pipe_screen* screen,
return;
if (resource->target != PIPE_BUFFER) {
- offset = rtex->surface.u.legacy.level[0].offset;
+ offset = (uint64_t)rtex->surface.u.legacy.level[0].offset_256B * 256;
stride = rtex->surface.u.legacy.level[0].nblk_x *
rtex->surface.bpe;
}
@@ -521,7 +519,7 @@ static bool r600_texture_get_handle(struct pipe_screen* screen,
if (!res->b.is_shared || update_metadata) {
r600_texture_init_metadata(rscreen, rtex, &metadata);
- rscreen->ws->buffer_set_metadata(res->buf, &metadata);
+ rscreen->ws->buffer_set_metadata(rscreen->ws, res->buf, &metadata, NULL);
}
slice_size = (uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
@@ -655,7 +653,7 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
out->bank_height = fmask.u.legacy.bankh;
out->tile_swizzle = fmask.tile_swizzle;
- out->alignment = MAX2(256, fmask.surf_alignment);
+ out->alignment = MAX2(256, 1 << fmask.surf_alignment_log2);
out->size = fmask.surf_size;
}
@@ -759,7 +757,7 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
unsigned num_pipes = rscreen->info.num_tile_pipes;
- rtex->surface.htile_size = 0;
+ rtex->surface.meta_size = 0;
if (rscreen->chip_class <= EVERGREEN &&
rscreen->info.drm_minor < 26)
@@ -806,8 +804,8 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
base_align = num_pipes * pipe_interleave_bytes;
- rtex->surface.htile_alignment = base_align;
- rtex->surface.htile_size =
+ rtex->surface.meta_alignment_log2 = util_logbase2(base_align);
+ rtex->surface.meta_size =
util_num_layers(&rtex->resource.b.b, 0) *
align(slice_bytes, base_align);
}
@@ -817,11 +815,11 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
{
r600_texture_get_htile_size(rscreen, rtex);
- if (!rtex->surface.htile_size)
+ if (!rtex->surface.meta_size)
return;
- rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment);
- rtex->size = rtex->htile_offset + rtex->surface.htile_size;
+ rtex->htile_offset = align(rtex->size, 1 << rtex->surface.meta_alignment_log2);
+ rtex->size = rtex->htile_offset + rtex->surface.meta_size;
}
void r600_print_texture_info(struct r600_common_screen *rscreen,
@@ -832,7 +830,7 @@ void r600_print_texture_info(struct r600_common_screen *rscreen,
/* Common parameters. */
u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
"blk_h=%u, array_size=%u, last_level=%u, "
- "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
+ "bpe=%u, nsamples=%u, flags=0x%"PRIx64", %s\n",
rtex->resource.b.b.width0, rtex->resource.b.b.height0,
rtex->resource.b.b.depth0, rtex->surface.blk_w,
rtex->surface.blk_h,
@@ -842,7 +840,7 @@ void r600_print_texture_info(struct r600_common_screen *rscreen,
u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
"bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
- rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
+ rtex->surface.surf_size, 1 << rtex->surface.surf_alignment_log2, rtex->surface.u.legacy.bankw,
rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config,
(rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
@@ -863,14 +861,14 @@ void r600_print_texture_info(struct r600_common_screen *rscreen,
if (rtex->htile_offset)
u_log_printf(log, " HTile: offset=%"PRIu64", size=%u "
"alignment=%u\n",
- rtex->htile_offset, rtex->surface.htile_size,
- rtex->surface.htile_alignment);
+ rtex->htile_offset, rtex->surface.meta_size,
+ 1 << rtex->surface.meta_alignment_log2);
for (i = 0; i <= rtex->resource.b.b.last_level; i++)
u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
"npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
"mode=%u, tiling_index = %u\n",
- i, rtex->surface.u.legacy.level[i].offset,
+ i, (uint64_t)rtex->surface.u.legacy.level[i].offset_256B * 256,
(uint64_t)rtex->surface.u.legacy.level[i].slice_size_dw * 4,
u_minify(rtex->resource.b.b.width0, i),
u_minify(rtex->resource.b.b.height0, i),
@@ -888,15 +886,15 @@ void r600_print_texture_info(struct r600_common_screen *rscreen,
"slice_size=%"PRIu64", npix_x=%u, "
"npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
"mode=%u, tiling_index = %u\n",
- i, rtex->surface.u.legacy.stencil_level[i].offset,
- (uint64_t)rtex->surface.u.legacy.stencil_level[i].slice_size_dw * 4,
+ i, (uint64_t)rtex->surface.u.legacy.zs.stencil_level[i].offset_256B * 256,
+ (uint64_t)rtex->surface.u.legacy.zs.stencil_level[i].slice_size_dw * 4,
u_minify(rtex->resource.b.b.width0, i),
u_minify(rtex->resource.b.b.height0, i),
u_minify(rtex->resource.b.b.depth0, i),
- rtex->surface.u.legacy.stencil_level[i].nblk_x,
- rtex->surface.u.legacy.stencil_level[i].nblk_y,
- rtex->surface.u.legacy.stencil_level[i].mode,
- rtex->surface.u.legacy.stencil_tiling_index[i]);
+ rtex->surface.u.legacy.zs.stencil_level[i].nblk_x,
+ rtex->surface.u.legacy.zs.stencil_level[i].nblk_y,
+ rtex->surface.u.legacy.zs.stencil_level[i].mode,
+ rtex->surface.u.legacy.zs.stencil_tiling_index[i]);
}
}
}
@@ -918,7 +916,6 @@ r600_texture_create_object(struct pipe_screen *screen,
resource = &rtex->resource;
resource->b.b = *base;
- resource->b.b.next = NULL;
resource->b.vtbl = &r600_texture_vtbl;
pipe_reference_init(&resource->b.b.reference, 1);
resource->b.b.screen = screen;
@@ -974,7 +971,7 @@ r600_texture_create_object(struct pipe_screen *screen,
/* Now create the backing buffer. */
if (!buf) {
r600_init_resource_fields(rscreen, resource, rtex->size,
- rtex->surface.surf_alignment);
+ 1 << rtex->surface.surf_alignment_log2);
if (!r600_alloc_resource(rscreen, resource)) {
FREE(rtex);
@@ -984,7 +981,7 @@ r600_texture_create_object(struct pipe_screen *screen,
resource->buf = buf;
resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
resource->bo_size = buf->size;
- resource->bo_alignment = buf->alignment;
+ resource->bo_alignment = 1 << buf->alignment_log2;
resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
if (resource->domains & RADEON_DOMAIN_VRAM)
resource->vram_usage = buf->size;
@@ -1003,7 +1000,7 @@ r600_texture_create_object(struct pipe_screen *screen,
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
rtex->htile_offset,
- rtex->surface.htile_size,
+ rtex->surface.meta_size,
clear_value);
}
@@ -1135,7 +1132,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
if (!buf)
return NULL;
- rscreen->ws->buffer_get_metadata(buf, &metadata);
+ rscreen->ws->buffer_get_metadata(rscreen->ws, buf, &metadata, NULL);
r600_surface_import_metadata(rscreen, &surface, &metadata,
&array_mode, &is_scanout);
@@ -1262,7 +1259,7 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
/* r600g doesn't react to dirty_tex_descriptor_counter */
return rscreen->chip_class >= GFX6 &&
!rtex->resource.b.is_shared &&
- !(transfer_usage & PIPE_TRANSFER_READ) &&
+ !(transfer_usage & PIPE_MAP_READ) &&
rtex->resource.b.b.last_level == 0 &&
util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
box->x, box->y, box->z,
@@ -1339,14 +1336,14 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
*/
if (!rtex->surface.is_linear)
use_staging_texture = true;
- else if (usage & PIPE_TRANSFER_READ)
+ else if (usage & PIPE_MAP_READ)
use_staging_texture =
rtex->resource.domains & RADEON_DOMAIN_VRAM ||
rtex->resource.flags & RADEON_FLAG_GTT_WC;
/* Write & linear only: */
else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(rtex->resource.buf, 0,
+ !rctx->ws->buffer_wait(rctx->ws, rtex->resource.buf, 0,
RADEON_USAGE_READWRITE)) {
/* It's busy. */
if (r600_can_invalidate_texture(rctx->screen, rtex,
@@ -1389,7 +1386,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
return NULL;
}
- if (usage & PIPE_TRANSFER_READ) {
+ if (usage & PIPE_MAP_READ) {
struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
if (!temp) {
R600_ERR("failed to create a temporary depth texture\n");
@@ -1435,7 +1432,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
r600_init_temp_resource_from_box(&resource, texture, box, level,
R600_RESOURCE_FLAG_TRANSFER);
- resource.usage = (usage & PIPE_TRANSFER_READ) ?
+ resource.usage = (usage & PIPE_MAP_READ) ?
PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
/* Create the temporary texture. */
@@ -1452,10 +1449,10 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
&trans->b.b.stride,
&trans->b.b.layer_stride);
- if (usage & PIPE_TRANSFER_READ)
+ if (usage & PIPE_MAP_READ)
r600_copy_to_staging_texture(ctx, trans);
else
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ usage |= PIPE_MAP_UNSYNCHRONIZED;
buf = trans->staging;
} else {
@@ -1484,7 +1481,7 @@ static void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_resource *texture = transfer->resource;
struct r600_texture *rtex = (struct r600_texture*)texture;
- if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
+ if ((transfer->usage & PIPE_MAP_WRITE) && rtransfer->staging) {
if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
ctx->resource_copy_region(ctx, texture, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
@@ -1617,8 +1614,6 @@ static void r600_clear_texture(struct pipe_context *pipe,
struct r600_texture *rtex = (struct r600_texture*)tex;
struct pipe_surface tmpl = {{0}};
struct pipe_surface *sf;
- const struct util_format_description *desc =
- util_format_description(tex->format);
tmpl.format = tex->format;
tmpl.u.tex.first_layer = box->z;
@@ -1635,11 +1630,11 @@ static void r600_clear_texture(struct pipe_context *pipe,
/* Depth is always present. */
clear = PIPE_CLEAR_DEPTH;
- desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
+ util_format_unpack_z_float(tex->format, &depth, data, 1);
if (rtex->surface.has_stencil) {
clear |= PIPE_CLEAR_STENCIL;
- desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
+ util_format_unpack_s_8uint(tex->format, &stencil, data, 1);
}
pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
@@ -1648,13 +1643,7 @@ static void r600_clear_texture(struct pipe_context *pipe,
} else {
union pipe_color_union color;
- /* pipe_color_union requires the full vec4 representation. */
- if (util_format_is_pure_uint(tex->format))
- desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
- else if (util_format_is_pure_sint(tex->format))
- desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
- else
- desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
+ util_format_unpack_rgba(tex->format, color.ui, data, 1);
if (screen->is_format_supported(screen, tex->format,
tex->target, 0, 0,
@@ -1751,12 +1740,8 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
color->ui[0] == color->ui[2]);
uc.ui[0] = color->ui[0];
uc.ui[1] = color->ui[3];
- } else if (util_format_is_pure_uint(surface_format)) {
- util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
- } else if (util_format_is_pure_sint(surface_format)) {
- util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
} else {
- util_pack_color(color->f, surface_format, &uc);
+ util_pack_color_union(surface_format, &uc, color);
}
memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
@@ -1914,7 +1899,7 @@ r600_texture_from_memobj(struct pipe_screen *screen,
struct pb_buffer *buf = NULL;
if (memobj->b.dedicated) {
- rscreen->ws->buffer_get_metadata(memobj->buf, &metadata);
+ rscreen->ws->buffer_get_metadata(rscreen->ws, memobj->buf, &metadata, NULL);
r600_surface_import_metadata(rscreen, &surface, &metadata,
&array_mode, &is_scanout);
} else {
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_viewport.c b/lib/mesa/src/gallium/drivers/r600/r600_viewport.c
index 7a5bf8f39..a8ed01a0c 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_viewport.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_viewport.c
@@ -185,7 +185,7 @@ static void r600_emit_one_scissor(struct r600_common_context *rctx,
static void r600_emit_guardband(struct r600_common_context *rctx,
struct r600_signed_scissor *vp_as_scissor)
{
- struct radeon_cmdbuf *cs = rctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->gfx.cs;
struct pipe_viewport_state vp;
float left, top, right, bottom, max_range, guardband_x, guardband_y;
@@ -235,7 +235,7 @@ static void r600_emit_guardband(struct r600_common_context *rctx,
static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->gfx.cs;
struct pipe_scissor_state *states = rctx->scissors.states;
unsigned mask = rctx->scissors.dirty_mask;
bool scissor_enabled = rctx->scissor_enabled;
@@ -306,7 +306,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
static void r600_emit_one_viewport(struct r600_common_context *rctx,
struct pipe_viewport_state *state)
{
- struct radeon_cmdbuf *cs = rctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->gfx.cs;
radeon_emit(cs, fui(state->scale[0]));
radeon_emit(cs, fui(state->translate[0]));
@@ -318,7 +318,7 @@ static void r600_emit_one_viewport(struct r600_common_context *rctx,
static void r600_emit_viewports(struct r600_common_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->gfx.cs;
struct pipe_viewport_state *states = rctx->viewports.states;
unsigned mask = rctx->viewports.dirty_mask;
@@ -348,7 +348,7 @@ static void r600_emit_viewports(struct r600_common_context *rctx)
static void r600_emit_depth_ranges(struct r600_common_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->gfx.cs;
struct pipe_viewport_state *states = rctx->viewports.states;
unsigned mask = rctx->viewports.depth_range_dirty_mask;
float zmin, zmax;
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c
index e7107c0b4..e4766a72f 100644
--- a/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c
@@ -73,7 +73,7 @@ struct ruvd_decoder {
struct pipe_screen *screen;
struct radeon_winsys* ws;
- struct radeon_cmdbuf* cs;
+ struct radeon_cmdbuf cs;
unsigned cur_buffer;
@@ -102,14 +102,14 @@ struct ruvd_decoder {
/* flush IB to the hardware */
static int flush(struct ruvd_decoder *dec, unsigned flags)
{
- return dec->ws->cs_flush(dec->cs, flags, NULL);
+ return dec->ws->cs_flush(&dec->cs, flags, NULL);
}
/* add a new set register command to the IB */
static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
{
- radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0));
- radeon_emit(dec->cs, val);
+ radeon_emit(&dec->cs, RUVD_PKT0(reg >> 2, 0));
+ radeon_emit(&dec->cs, val);
}
/* send a command to the VCPU through the GPCOM registers */
@@ -119,7 +119,7 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
{
int reloc_idx;
- reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ reloc_idx = dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
domain, 0);
if (!dec->use_legacy) {
uint64_t addr;
@@ -152,8 +152,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* and map it for CPU access */
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
- PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
+ ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
+ PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
@@ -177,7 +177,8 @@ static void send_msg_buf(struct ruvd_decoder *dec)
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* unmap the buffer */
- dec->ws->buffer_unmap(buf->res->buf);
+ dec->ws->buffer_unmap(dec->ws, buf->res->buf);
+ dec->bs_ptr = NULL;
dec->msg = NULL;
dec->fb = NULL;
dec->it = NULL;
@@ -225,55 +226,6 @@ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
}
}
-static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec)
-{
- unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
- unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
-
- unsigned max_references = dec->base.max_references + 1;
-
- if (dec->base.width * dec->base.height >= 4096*2000)
- max_references = MAX2(max_references, 8);
- else
- max_references = MAX2(max_references, 17);
-
- width = align (width, 16);
- height = align (height, 16);
- return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
-}
-
-static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic)
-{
- unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
- unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
- unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
-
- unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
- unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
- unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
-
- unsigned max_references = dec->base.max_references + 1;
-
- if (dec->base.width * dec->base.height >= 4096*2000)
- max_references = MAX2(max_references, 8);
- else
- max_references = MAX2(max_references, 17);
-
- log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +
- pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
-
- width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
- height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
-
- num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
- context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
- max_mb_address = (unsigned) ceil(height * 8 / 2048.0);
-
- cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
- db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
-
- return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
-}
static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
{
@@ -514,156 +466,6 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
return result;
}
-/* get h265 specific message bits */
-static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,
- struct pipe_h265_picture_desc *pic)
-{
- struct ruvd_h265 result;
- unsigned i;
-
- memset(&result, 0, sizeof(result));
-
- result.sps_info_flags = 0;
- result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
- result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
- result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
- result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
- result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
- result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
- result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
- result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
- result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
- if (pic->UseRefPicList == true)
- result.sps_info_flags |= 1 << 10;
-
- result.chroma_format = pic->pps->sps->chroma_format_idc;
- result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
- result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
- result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
- result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
- result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3;
- result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
- result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2;
- result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size;
- result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
- result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
- result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
- result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
- result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
- result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
- result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
-
- result.pps_info_flags = 0;
- result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
- result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
- result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
- result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
- result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
- result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
- result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
- result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
- result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
- result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
- result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
- result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
- result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
- result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
- result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
- result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
- result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
- result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
- result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
- result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
- //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
-
- result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
- result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
- result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
- result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
- result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
- result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
- result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
- result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
- result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
- result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
- result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
- result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
- result.init_qp_minus26 = pic->pps->init_qp_minus26;
-
- for (i = 0; i < 19; ++i)
- result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
-
- for (i = 0; i < 21; ++i)
- result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
-
- result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
- result.curr_idx = pic->CurrPicOrderCntVal;
- result.curr_poc = pic->CurrPicOrderCntVal;
-
- vl_video_buffer_set_associated_data(target, &dec->base,
- (void *)(uintptr_t)pic->CurrPicOrderCntVal,
- &ruvd_destroy_associated_data);
-
- for (i = 0; i < 16; ++i) {
- struct pipe_video_buffer *ref = pic->ref[i];
- uintptr_t ref_pic = 0;
-
- result.poc_list[i] = pic->PicOrderCntVal[i];
-
- if (ref)
- ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
- else
- ref_pic = 0x7F;
- result.ref_pic_list[i] = ref_pic;
- }
-
- for (i = 0; i < 8; ++i) {
- result.ref_pic_set_st_curr_before[i] = 0xFF;
- result.ref_pic_set_st_curr_after[i] = 0xFF;
- result.ref_pic_set_lt_curr[i] = 0xFF;
- }
-
- for (i = 0; i < pic->NumPocStCurrBefore; ++i)
- result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
-
- for (i = 0; i < pic->NumPocStCurrAfter; ++i)
- result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
-
- for (i = 0; i < pic->NumPocLtCurr; ++i)
- result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
-
- for (i = 0; i < 6; ++i)
- result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
-
- for (i = 0; i < 2; ++i)
- result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
-
- memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
- memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
- memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
- memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
-
- for (i = 0 ; i < 2 ; i++) {
- for (int j = 0 ; j < 15 ; j++)
- result.direct_reflist[i][j] = pic->RefPicList[i][j];
- }
-
- /* TODO
- result.highestTid;
- result.isNonRef;
-
- IDRPicFlag;
- RAPPicFlag;
- NumPocTotalCurr;
- NumShortTermPictureSliceHeaderBits;
- NumLongTermPictureSliceHeaderBits;
-
- IsLongTerm[16];
- */
-
- return result;
-}
-
/* get vc1 specific message bits */
static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
{
@@ -1007,7 +809,7 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
flush(dec, 0);
- dec->ws->cs_destroy(dec->cs);
+ dec->ws->cs_destroy(&dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
@@ -1038,9 +840,9 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder,
&ruvd_destroy_associated_data);
dec->bs_size = 0;
- dec->bs_ptr = dec->ws->buffer_map(
+ dec->bs_ptr = dec->ws->buffer_map(dec->ws,
dec->bs_buffers[dec->cur_buffer].res->buf,
- dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
+ &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
}
/**
@@ -1086,15 +888,16 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
new_size += 2; /* save for EOI */
if (new_size > buf->res->buf->size) {
- dec->ws->buffer_unmap(buf->res->buf);
- if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
+ dec->ws->buffer_unmap(dec->ws, buf->res->buf);
+ dec->bs_ptr = NULL;
+ if (!rvid_resize_buffer(dec->screen, &dec->cs, buf, new_size)) {
RVID_ERR("Can't resize bitstream buffer!");
return;
}
- dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
- PIPE_TRANSFER_WRITE |
- RADEON_TRANSFER_TEMPORARY);
+ dec->bs_ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
+ PIPE_MAP_WRITE |
+ RADEON_MAP_TEMPORARY);
if (!dec->bs_ptr)
return;
@@ -1136,7 +939,8 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
bs_size = align(dec->bs_size, 128);
memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
- dec->ws->buffer_unmap(bs_buf->res->buf);
+ dec->ws->buffer_unmap(dec->ws, bs_buf->res->buf);
+ dec->bs_ptr = NULL;
map_msg_fb_it_buf(dec);
dec->msg->size = sizeof(*dec->msg);
@@ -1240,14 +1044,14 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
struct ruvd_decoder *dec;
int r, i;
- ws->query_info(ws, &info);
+ ws->query_info(ws, &info, false, false);
switch(u_reduce_video_profile(templ->profile)) {
case PIPE_VIDEO_FORMAT_MPEG12:
if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
return vl_create_mpeg12_decoder(context, templ);
- /* fall through */
+ FALLTHROUGH;
case PIPE_VIDEO_FORMAT_MPEG4:
width = align(width, VL_MACROBLOCK_WIDTH);
height = align(height, VL_MACROBLOCK_HEIGHT);
@@ -1286,8 +1090,8 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
dec->stream_handle = rvid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
- dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, false);
- if (!dec->cs) {
+
+ if (!ws->cs_create(&dec->cs, rctx->ctx, RING_UVD, NULL, NULL, false)) {
RVID_ERR("Can't get command submission context.\n");
goto error;
}
@@ -1347,7 +1151,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
return &dec->base;
error:
- if (dec->cs) dec->ws->cs_destroy(dec->cs);
+ dec->ws->cs_destroy(&dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
@@ -1366,7 +1170,7 @@ error:
/* calculate top/bottom offset */
static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
{
- return surface->u.legacy.level[0].offset +
+ return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
}
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_vce.c b/lib/mesa/src/gallium/drivers/r600/radeon_vce.c
index 16f48c694..1cf8522ba 100644
--- a/lib/mesa/src/gallium/drivers/r600/radeon_vce.c
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_vce.c
@@ -63,7 +63,7 @@ static void (*get_pic_param)(struct rvce_encoder *enc,
*/
static void flush(struct rvce_encoder *enc)
{
- enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL);
+ enc->ws->cs_flush(&enc->cs, PIPE_FLUSH_ASYNC, NULL);
enc->task_info_idx = 0;
enc->bs_idx = 0;
}
@@ -71,7 +71,7 @@ static void flush(struct rvce_encoder *enc)
#if 0
static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
{
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE);
unsigned i = 0;
fprintf(stderr, "\n");
fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
@@ -105,7 +105,7 @@ static void reset_cpb(struct rvce_encoder *enc)
for (i = 0; i < enc->cpb_num; ++i) {
struct rvce_cpb_slot *slot = &enc->cpb_array[i];
slot->index = i;
- slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP;
+ slot->picture_type = PIPE_H2645_ENC_PICTURE_TYPE_SKIP;
slot->frame_num = 0;
slot->pic_order_cnt = 0;
list_addtail(&slot->list, &enc->cpb_slots);
@@ -126,10 +126,10 @@ static void sort_cpb(struct rvce_encoder *enc)
if (i->frame_num == enc->pic.ref_idx_l1)
l1 = i;
- if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0)
+ if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P && l0)
break;
- if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B &&
+ if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B &&
l0 && l1)
break;
}
@@ -256,7 +256,7 @@ static void rvce_destroy(struct pipe_video_codec *encoder)
rvid_destroy_buffer(&fb);
}
rvid_destroy_buffer(&enc->cpb);
- enc->ws->cs_destroy(enc->cs);
+ enc->ws->cs_destroy(&enc->cs);
FREE(enc->cpb_array);
FREE(enc);
}
@@ -281,10 +281,10 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
- if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+ if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_IDR)
reset_cpb(enc);
- else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
- pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)
+ else if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P ||
+ pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B)
sort_cpb(enc);
if (!enc->stream_handle) {
@@ -323,7 +323,7 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
RVID_ERR("Can't create feedback buffer.\n");
return;
}
- if (!radeon_emitted(enc->cs, 0))
+ if (!radeon_emitted(&enc->cs, 0))
enc->session(enc);
enc->encode(enc);
enc->feedback(enc);
@@ -357,9 +357,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
struct rvid_buffer *fb = feedback;
if (size) {
- uint32_t *ptr = enc->ws->buffer_map(
- fb->res->buf, enc->cs,
- PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
+ uint32_t *ptr = enc->ws->buffer_map(enc->ws,
+ fb->res->buf, &enc->cs,
+ PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY);
if (ptr[1]) {
*size = ptr[4] - ptr[9];
@@ -367,7 +367,7 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
*size = 0;
}
- enc->ws->buffer_unmap(fb->res->buf);
+ enc->ws->buffer_unmap(enc->ws, fb->res->buf);
}
//dump_feedback(enc, fb);
rvid_destroy_buffer(fb);
@@ -431,14 +431,13 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
enc->screen = context->screen;
enc->ws = ws;
- enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, false);
- if (!enc->cs) {
+
+ if (!ws->cs_create(&enc->cs, rctx->ctx, RING_VCE, rvce_cs_flush, enc, false)) {
RVID_ERR("Can't get command submission context.\n");
goto error;
}
templat.buffer_format = PIPE_FORMAT_NV12;
- templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
templat.width = enc->base.width;
templat.height = enc->base.height;
templat.interlaced = false;
@@ -478,8 +477,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
return &enc->base;
error:
- if (enc->cs)
- enc->ws->cs_destroy(enc->cs);
+ enc->ws->cs_destroy(&enc->cs);
rvid_destroy_buffer(&enc->cpb);
@@ -520,7 +518,7 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
{
int reloc_idx;
- reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
domain, 0);
if (enc->use_vm) {
uint64_t addr;
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_vce.h b/lib/mesa/src/gallium/drivers/r600/radeon_vce.h
index c5e054777..a437336bc 100644
--- a/lib/mesa/src/gallium/drivers/r600/radeon_vce.h
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_vce.h
@@ -36,14 +36,14 @@
#include "util/list.h"
-#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RVCE_CS(value) (enc->cs.current.buf[enc->cs.current.cdw++] = (value))
#define RVCE_BEGIN(cmd) { \
- uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+ uint32_t *begin = &enc->cs.current.buf[enc->cs.current.cdw++]; \
RVCE_CS(cmd)
#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
-#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
+#define RVCE_END() *begin = (&enc->cs.current.buf[enc->cs.current.cdw] - begin) * 4; }
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
#define RVCE_MAX_AUX_BUFFER_NUM 4
@@ -60,7 +60,7 @@ struct rvce_cpb_slot {
struct list_head list;
unsigned index;
- enum pipe_h264_enc_picture_type picture_type;
+ enum pipe_h2645_enc_picture_type picture_type;
unsigned frame_num;
unsigned pic_order_cnt;
};
@@ -340,7 +340,7 @@ struct rvce_h264_enc_pic {
unsigned quant_p_frames;
unsigned quant_b_frames;
- enum pipe_h264_enc_picture_type picture_type;
+ enum pipe_h2645_enc_picture_type picture_type;
unsigned frame_num;
unsigned frame_num_cnt;
unsigned p_remain;
@@ -387,7 +387,7 @@ struct rvce_encoder {
struct pipe_screen *screen;
struct radeon_winsys* ws;
- struct radeon_cmdbuf* cs;
+ struct radeon_cmdbuf cs;
rvce_get_buffer get_buffer;
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_video.c b/lib/mesa/src/gallium/drivers/r600/radeon_video.c
index 81c1a5e51..6ada9ba18 100644
--- a/lib/mesa/src/gallium/drivers/r600/radeon_video.c
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_video.c
@@ -97,13 +97,13 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
- src = ws->buffer_map(old_buf.res->buf, cs,
- PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
+ src = ws->buffer_map(ws, old_buf.res->buf, cs,
+ PIPE_MAP_READ | RADEON_MAP_TEMPORARY);
if (!src)
goto error;
- dst = ws->buffer_map(new_buf->res->buf, cs,
- PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
+ dst = ws->buffer_map(ws, new_buf->res->buf, cs,
+ PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
if (!dst)
goto error;
@@ -113,14 +113,14 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
dst += bytes;
memset(dst, 0, new_size);
}
- ws->buffer_unmap(new_buf->res->buf);
- ws->buffer_unmap(old_buf.res->buf);
+ ws->buffer_unmap(ws, new_buf->res->buf);
+ ws->buffer_unmap(ws, old_buf.res->buf);
rvid_destroy_buffer(&old_buf);
return true;
error:
if (src)
- ws->buffer_unmap(old_buf.res->buf);
+ ws->buffer_unmap(ws, old_buf.res->buf);
rvid_destroy_buffer(new_buf);
*new_buf = old_buf;
return false;
@@ -171,7 +171,7 @@ void rvid_join_surfaces(struct r600_common_context *rctx,
continue;
/* adjust the texture layer offsets */
- off = align(off, surfaces[i]->surf_alignment);
+ off = align(off, 1 << surfaces[i]->surf_alignment_log2);
/* copy the tiling parameters */
surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
@@ -180,7 +180,7 @@ void rvid_join_surfaces(struct r600_common_context *rctx,
surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split;
for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
- surfaces[i]->u.legacy.level[j].offset += off;
+ surfaces[i]->u.legacy.level[j].offset_256B += off / 256;
off += surfaces[i]->surf_size;
}
@@ -189,9 +189,9 @@ void rvid_join_surfaces(struct r600_common_context *rctx,
if (!buffers[i] || !*buffers[i])
continue;
- size = align(size, (*buffers[i])->alignment);
+ size = align(size, 1 << (*buffers[i])->alignment_log2);
size += (*buffers[i])->size;
- alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
+ alignment = MAX2(alignment, 1 << (*buffers[i])->alignment_log2);
}
if (!size)
@@ -224,7 +224,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_format codec = u_reduce_video_profile(profile);
struct radeon_info info;
- rscreen->ws->query_info(rscreen->ws, &info);
+ rscreen->ws->query_info(rscreen->ws, &info, false, false);
if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
switch (param) {
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_video.h b/lib/mesa/src/gallium/drivers/r600/radeon_video.h
index 4777c6c0e..59c9377de 100644
--- a/lib/mesa/src/gallium/drivers/r600/radeon_video.h
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_video.h
@@ -65,7 +65,7 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
/* join surfaces into the same buffer with identical tiling params
- sumup their sizes and replace the backend buffers with a single bo */
+ sum up their sizes and replace the backend buffers with a single bo */
void rvid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
index 4a7f82ba7..b04cb73e2 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
@@ -540,7 +540,8 @@ int bc_decoder::decode_fetch_mem(unsigned & i, bc_fetch& bc) {
uint32_t dw2 = dw[i+2];
i += 4; // MEM instructions align to 4 words boundaries
- assert(i < ndw);
+
+ assert(i <= ndw);
MEM_RD_WORD0_R7EGCM w0(dw0);
bc.elem_size = w0.get_ELEM_SIZE();
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 6b19d61ba..446486c36 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -385,6 +385,9 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
n->flags |= NF_ALU_4SLOT;
+ if (ctx.alu_slots(n->bc.op) & AF_2SLOT)
+ n->flags |= NF_ALU_2SLOT;
+
n->src.resize(src_count);
unsigned flags = n->bc.op_ptr->flags;
@@ -476,7 +479,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
n->src[s] = sh->get_const_value(src.value);
} else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
- SLOT_TRANS : src.chan;
+ ((unsigned)SLOT_TRANS) : src.chan;
// XXX shouldn't happen but llvm backend uses PS on cayman
if (prev_slot == SLOT_TRANS && ctx.is_cayman())
@@ -586,12 +589,16 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
alu_node *a = static_cast<alu_node*>(*I);
unsigned sflags = a->bc.slot_flags;
- if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) {
+ if (sflags == AF_4V || sflags == AF_2V || (ctx.is_cayman() && sflags == AF_S)) {
if (!p)
p = sh->create_alu_packed();
a->remove();
p->push_back(a);
+ if (sflags == AF_2V && p->count() == 2) {
+ g->push_front(p);
+ p = NULL;
+ }
}
}
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp
index 57dded5ef..402ba357f 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp
@@ -396,6 +396,8 @@ void dump::dump_flags(node &n) {
sblog << "CH_CONS ";
if (n.flags & NF_ALU_4SLOT)
sblog << "4S ";
+ if (n.flags & NF_ALU_2SLOT)
+ sblog << "2S ";
}
void dump::dump_val(value* v) {
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h
index a21b0bf99..179eab478 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h
@@ -546,10 +546,10 @@ private:
void add_prev_chan(unsigned chan);
unsigned get_preferable_chan_mask();
- void ra_node(container_node *c);
- void process_op(node *n);
+ bool ra_node(container_node *c);
+ bool process_op(node *n);
- void color(value *v);
+ bool color(value *v);
void color_bs_constraint(ra_constraint *c);
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp
index 4390a8f52..979f4bc13 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp
@@ -131,8 +131,8 @@ void peephole::optimize_cc_op2(alu_node* a) {
std::swap(a->src[0],a->src[1]);
swapped = true;
// clear modifiers
- memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
- memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
+ a->bc.src[0].clear();
+ a->bc.src[1].clear();
}
if (swapped || (a->src[1]->is_const() &&
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h
new file mode 100644
index 000000000..9b34fcd4d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h
@@ -0,0 +1,91 @@
+#ifndef INSTRUCTIONVISITOR_H
+#define INSTRUCTIONVISITOR_H
+
+namespace r600 {
+
+
+class AluInstruction;
+class ExportInstruction;
+class TexInstruction;
+class FetchInstruction;
+class IfInstruction;
+class ElseInstruction;
+class IfElseEndInstruction;
+class LoopBeginInstruction;
+class LoopEndInstruction;
+class LoopBreakInstruction;
+class LoopContInstruction;
+class StreamOutIntruction;
+class MemRingOutIntruction;
+class EmitVertex;
+class WaitAck;
+class WriteScratchInstruction;
+class GDSInstr;
+class RatInstruction;
+class LDSWriteInstruction;
+class LDSReadInstruction;
+class LDSAtomicInstruction;
+class GDSStoreTessFactor;
+class InstructionBlock;
+
+class InstructionVisitor
+{
+public:
+ virtual ~InstructionVisitor() {};
+ virtual bool visit(AluInstruction& i) = 0;
+ virtual bool visit(ExportInstruction& i) = 0;
+ virtual bool visit(TexInstruction& i) = 0;
+ virtual bool visit(FetchInstruction& i) = 0;
+ virtual bool visit(IfInstruction& i) = 0;
+ virtual bool visit(ElseInstruction& i) = 0;
+ virtual bool visit(IfElseEndInstruction& i) = 0;
+ virtual bool visit(LoopBeginInstruction& i) = 0;
+ virtual bool visit(LoopEndInstruction& i) = 0;
+ virtual bool visit(LoopBreakInstruction& i) = 0;
+ virtual bool visit(LoopContInstruction& i) = 0;
+ virtual bool visit(StreamOutIntruction& i) = 0;
+ virtual bool visit(MemRingOutIntruction& i) = 0;
+ virtual bool visit(EmitVertex& i) = 0;
+ virtual bool visit(WaitAck& i) = 0;
+ virtual bool visit(WriteScratchInstruction& i) = 0;
+ virtual bool visit(GDSInstr& i) = 0;
+ virtual bool visit(RatInstruction& i) = 0;
+ virtual bool visit(LDSWriteInstruction& i) = 0;
+ virtual bool visit(LDSReadInstruction& i) = 0;
+ virtual bool visit(LDSAtomicInstruction& i) = 0;
+ virtual bool visit(GDSStoreTessFactor& i) = 0;
+ virtual bool visit(InstructionBlock& i) = 0;
+};
+
+class ConstInstructionVisitor
+{
+public:
+ virtual ~ConstInstructionVisitor() {};
+ virtual bool visit(const AluInstruction& i) = 0;
+ virtual bool visit(const ExportInstruction& i) = 0;
+ virtual bool visit(const TexInstruction& i) = 0;
+ virtual bool visit(const FetchInstruction& i) = 0;
+ virtual bool visit(const IfInstruction& i) = 0;
+ virtual bool visit(const ElseInstruction& i) = 0;
+ virtual bool visit(const IfElseEndInstruction& i) = 0;
+ virtual bool visit(const LoopBeginInstruction& i) = 0;
+ virtual bool visit(const LoopEndInstruction& i) = 0;
+ virtual bool visit(const LoopBreakInstruction& i) = 0;
+ virtual bool visit(const LoopContInstruction& i) = 0;
+ virtual bool visit(const StreamOutIntruction& i) = 0;
+ virtual bool visit(const MemRingOutIntruction& i) = 0;
+ virtual bool visit(const EmitVertex& i) = 0;
+ virtual bool visit(const WaitAck& i) = 0;
+ virtual bool visit(const WriteScratchInstruction& i) = 0;
+ virtual bool visit(const GDSInstr& i) = 0;
+ virtual bool visit(const RatInstruction& i) = 0;
+ virtual bool visit(const LDSWriteInstruction& i) = 0;
+ virtual bool visit(const LDSReadInstruction& i) = 0;
+ virtual bool visit(const LDSAtomicInstruction& i) = 0;
+ virtual bool visit(const GDSStoreTessFactor& i) = 0;
+ virtual bool visit(const InstructionBlock& i) = 0;
+};
+
+}
+
+#endif // INSTRUCTIONVISITOR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py
new file mode 100644
index 000000000..2ef064111
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py
@@ -0,0 +1,49 @@
+#
+# Copyright (C) 2021 Collabora Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import argparse
+import sys
+
+lower_alu = [
+ # For chipfamily r600 one must do fma (2*pi ffract() - 0.5)
+ (('fsin', "a@32"), ('fsin_r600', ('fadd', ('ffract', ('ffma', 'a', 0.15915494, 0.5)), -0.5))),
+ (('fcos', "a@32"), ('fcos_r600', ('fadd', ('ffract', ('ffma', 'a', 0.15915494, 0.5)), -0.5))),
+]
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-p', '--import-path', required=True)
+ args = parser.parse_args()
+ sys.path.insert(0, args.import_path)
+ run()
+
+
+def run():
+ import nir_algebraic # pylint: disable=import-error
+
+ print('#include "sfn/sfn_nir.h"')
+
+ print(nir_algebraic.AlgebraicPass("r600_lower_alu",
+ lower_alu).render())
+
+if __name__ == '__main__':
+ main()
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
new file mode 100644
index 000000000..88e0085fa
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
@@ -0,0 +1,1063 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2020 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_nir.h"
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#include <map>
+#include <vector>
+#include <iostream>
+
+namespace r600 {
+
+using std::map;
+using std::pair;
+using std::make_pair;
+using std::vector;
+
+class LowerSplit64BitVar : public NirLowerInstruction {
+public:
+
+ ~LowerSplit64BitVar();
+ using VarSplit = pair<nir_variable*, nir_variable*>;
+ using VarMap = map<unsigned, VarSplit>;
+
+ nir_ssa_def *
+ split_double_load_deref(nir_intrinsic_instr *intr);
+
+ nir_ssa_def *
+ split_double_store_deref(nir_intrinsic_instr *intr);
+
+private:
+ nir_ssa_def *
+ split_load_deref_array(nir_intrinsic_instr *intr, nir_src& index);
+
+ nir_ssa_def *
+ split_load_deref_var(nir_intrinsic_instr *intr);
+
+ nir_ssa_def *
+ split_store_deref_array(nir_intrinsic_instr *intr, nir_deref_instr *deref);
+
+ nir_ssa_def *
+ split_store_deref_var(nir_intrinsic_instr *intr, nir_deref_instr *deref1);
+
+ VarSplit get_var_pair(nir_variable *old_var);
+
+ nir_ssa_def *
+ merge_64bit_loads(nir_ssa_def *load1, nir_ssa_def *load2, bool out_is_vec3);
+
+ nir_ssa_def *split_double_load(nir_intrinsic_instr *load1);
+
+ nir_ssa_def *
+ split_store_output(nir_intrinsic_instr *store1);
+
+ nir_ssa_def *split_double_load_uniform(nir_intrinsic_instr *intr);
+
+ nir_ssa_def *
+ split_double_load_ssbo(nir_intrinsic_instr *intr);
+
+ nir_ssa_def *
+ split_double_load_ubo(nir_intrinsic_instr *intr);
+
+ nir_ssa_def *
+ split_reduction(nir_ssa_def *src[2][2], nir_op op1, nir_op op2, nir_op reduction);
+
+ nir_ssa_def *
+ split_reduction3(nir_alu_instr *alu,
+ nir_op op1, nir_op op2, nir_op reduction);
+
+ nir_ssa_def *
+ split_reduction4(nir_alu_instr *alu,
+ nir_op op1, nir_op op2, nir_op reduction);
+
+ nir_ssa_def *split_bcsel(nir_alu_instr *alu);
+
+ nir_ssa_def *split_load_const(nir_load_const_instr *lc);
+
+ bool filter(const nir_instr *instr) const override;
+ nir_ssa_def *lower(nir_instr *instr) override;
+
+ VarMap m_varmap;
+ vector<nir_variable*> m_old_vars;
+ vector<nir_instr *> m_old_stores;
+};
+
+
+bool
+LowerSplit64BitVar::filter(const nir_instr *instr) const
+{
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ auto intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_deref:
+ case nir_intrinsic_load_uniform:
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_ssbo:
+ if (nir_dest_bit_size(intr->dest) != 64)
+ return false;
+ return nir_dest_num_components(intr->dest) >= 3;
+ case nir_intrinsic_store_output:
+ if (nir_src_bit_size(intr->src[0]) != 64)
+ return false;
+ return nir_src_num_components(intr->src[0]) >= 3;
+ case nir_intrinsic_store_deref:
+ if (nir_src_bit_size(intr->src[1]) != 64)
+ return false;
+ return nir_src_num_components(intr->src[1]) >= 3;
+ default:
+ return false;
+ }
+ }
+ case nir_instr_type_alu: {
+ auto alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_bcsel:
+ if (nir_dest_num_components(alu->dest.dest) < 3)
+ return false;
+ return nir_dest_bit_size(alu->dest.dest) == 64;
+ case nir_op_bany_fnequal3:
+ case nir_op_bany_fnequal4:
+ case nir_op_ball_fequal3:
+ case nir_op_ball_fequal4:
+ case nir_op_bany_inequal3:
+ case nir_op_bany_inequal4:
+ case nir_op_ball_iequal3:
+ case nir_op_ball_iequal4:
+ case nir_op_fdot3:
+ case nir_op_fdot4:
+ return nir_src_bit_size(alu->src[1].src) == 64;
+ default:
+ return false;
+ }
+ }
+ case nir_instr_type_load_const: {
+ auto lc = nir_instr_as_load_const(instr);
+ if (lc->def.bit_size != 64)
+ return false;
+ return lc->def.num_components >= 3;
+ }
+ default:
+ return false;
+ }
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::merge_64bit_loads(nir_ssa_def *load1,
+ nir_ssa_def *load2, bool out_is_vec3)
+{
+ if (out_is_vec3)
+ return nir_vec3(b, nir_channel(b, load1, 0),
+ nir_channel(b, load1, 1),
+ nir_channel(b, load2, 0));
+ else
+ return nir_vec4(b, nir_channel(b, load1, 0),
+ nir_channel(b, load1, 1),
+ nir_channel(b, load2, 0),
+ nir_channel(b, load2, 1));
+}
+
+LowerSplit64BitVar::~LowerSplit64BitVar()
+{
+ for(auto&& v: m_old_vars)
+ exec_node_remove(&v->node);
+
+ for(auto&& v: m_old_stores)
+ nir_instr_remove(v);
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_double_store_deref(nir_intrinsic_instr *intr)
+{
+ auto deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+ if (deref->deref_type == nir_deref_type_var)
+ return split_store_deref_var(intr, deref);
+ else if (deref->deref_type == nir_deref_type_array)
+ return split_store_deref_array(intr, deref);
+ else {
+ unreachable("only splitting of stores to vars and arrays is supported");
+ }
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_double_load_deref(nir_intrinsic_instr *intr)
+{
+ auto deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+ if (deref->deref_type == nir_deref_type_var)
+ return split_load_deref_var(intr);
+ else if (deref->deref_type == nir_deref_type_array)
+ return split_load_deref_array(intr, deref->arr.index);
+ else {
+ unreachable(0 && "only splitting of loads from vars and arrays is supported");
+ }
+ m_old_stores.push_back(&intr->instr);
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_load_deref_array(nir_intrinsic_instr *intr, nir_src& index)
+{
+ auto old_var = nir_intrinsic_get_var(intr, 0);
+ unsigned old_components = old_var->type->without_array()->components();
+
+ assert(old_components > 2 && old_components <= 4);
+
+ auto vars = get_var_pair(old_var);
+
+ auto deref1 = nir_build_deref_var(b, vars.first);
+ auto deref_array1 = nir_build_deref_array(b, deref1, nir_ssa_for_src(b, index, 1));
+ auto load1 = nir_build_load_deref(b, 2, 64, &deref_array1->dest.ssa, (enum gl_access_qualifier)0);
+
+ auto deref2 = nir_build_deref_var(b, vars.second);
+ auto deref_array2 = nir_build_deref_array(b, deref2, nir_ssa_for_src(b, index, 1));
+
+ auto load2 = nir_build_load_deref(b, old_components - 2, 64, &deref_array2->dest.ssa, (enum gl_access_qualifier)0);
+
+ return merge_64bit_loads(load1, load2, old_components == 3);
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_store_deref_array(nir_intrinsic_instr *intr, nir_deref_instr *deref)
+{
+ auto old_var = nir_intrinsic_get_var(intr, 0);
+ unsigned old_components = old_var->type->without_array()->components();
+
+ assert(old_components > 2 && old_components <= 4);
+
+ auto src_xy = nir_channels(b, intr->src[1].ssa, 3);
+
+ auto vars = get_var_pair(old_var);
+
+ auto deref1 = nir_build_deref_var(b, vars.first);
+ auto deref_array1 = nir_build_deref_array(b, deref1, nir_ssa_for_src(b, deref->arr.index, 1));
+
+ nir_build_store_deref(b, &deref_array1->dest.ssa, src_xy, 3);
+
+ auto deref2 = nir_build_deref_var(b, vars.second);
+ auto deref_array2 = nir_build_deref_array(b, deref2, nir_ssa_for_src(b, deref->arr.index, 1));
+
+ if (old_components == 3)
+ nir_build_store_deref(b, &deref_array2->dest.ssa, nir_channel(b, intr->src[1].ssa, 2), 1);
+ else
+ nir_build_store_deref(b, &deref_array2->dest.ssa, nir_channels(b, intr->src[1].ssa, 0xc), 3);
+
+ return NIR_LOWER_INSTR_PROGRESS_REPLACE;
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, nir_deref_instr *deref)
+{
+ auto old_var = nir_intrinsic_get_var(intr, 0);
+ unsigned old_components = old_var->type->without_array()->components();
+
+ assert(old_components > 2 && old_components <= 4);
+
+ auto src_xy = nir_channels(b, intr->src[1].ssa, 3);
+
+ auto vars = get_var_pair(old_var);
+
+ auto deref1 = nir_build_deref_var(b, vars.first);
+ nir_build_store_deref(b, &deref1->dest.ssa, src_xy, 3);
+
+ auto deref2 = nir_build_deref_var(b, vars.second);
+ if (old_components == 3)
+ nir_build_store_deref(b, &deref2->dest.ssa, nir_channel(b, intr->src[1].ssa, 2), 1);
+ else
+ nir_build_store_deref(b, &deref2->dest.ssa, nir_channels(b, intr->src[1].ssa, 0xc), 3);
+
+ return NIR_LOWER_INSTR_PROGRESS_REPLACE;
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_load_deref_var(nir_intrinsic_instr *intr)
+{
+ auto old_var = nir_intrinsic_get_var(intr, 0);
+ auto vars = get_var_pair(old_var);
+ unsigned old_components = old_var->type->components();
+
+ nir_deref_instr *deref1 = nir_build_deref_var(b, vars.first);
+ auto *load1 = nir_load_deref(b, deref1);
+
+ nir_deref_instr *deref2 = nir_build_deref_var(b, vars.second);
+ deref2->type = vars.second->type;
+
+ auto *load2 = nir_load_deref(b, deref2);
+
+ return merge_64bit_loads(load1, load2, old_components == 3);
+}
+
+LowerSplit64BitVar::VarSplit
+LowerSplit64BitVar::get_var_pair(nir_variable *old_var)
+{
+ auto split_vars = m_varmap.find(old_var->data.driver_location);
+
+ assert(old_var->type->without_array()->components() > 2);
+
+ if (split_vars == m_varmap.end()) {
+ auto var1 = nir_variable_clone(old_var, b->shader);
+ auto var2 = nir_variable_clone(old_var, b->shader);
+
+ var1->type = glsl_dvec_type(2);
+ var2->type = glsl_dvec_type(old_var->type->without_array()->components() - 2);
+
+ if (old_var->type->is_array()) {
+ var1->type = glsl_array_type(var1->type, old_var->type->array_size(), 0);
+ var2->type = glsl_array_type(var2->type, old_var->type->array_size(), 0);
+ }
+
+ if (old_var->data.mode == nir_var_shader_in ||
+ old_var->data.mode == nir_var_shader_out) {
+ ++var2->data.driver_location;
+ ++var2->data.location;
+ nir_shader_add_variable(b->shader, var1);
+ nir_shader_add_variable(b->shader, var2);
+ } else if (old_var->data.mode == nir_var_function_temp) {
+ exec_list_push_tail(&b->impl->locals, &var1->node);
+ exec_list_push_tail(&b->impl->locals, &var2->node);
+ }
+
+ m_varmap[old_var->data.driver_location] = make_pair(var1, var2);
+ }
+ return m_varmap[old_var->data.driver_location];
+}
+
+
+nir_ssa_def *
+LowerSplit64BitVar::split_double_load(nir_intrinsic_instr *load1)
+{
+ unsigned old_components = nir_dest_num_components(load1->dest);
+ auto load2 = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &load1->instr));
+ nir_io_semantics sem = nir_intrinsic_io_semantics(load1);
+
+ load1->dest.ssa.num_components = 2;
+ sem.num_slots = 1;
+ nir_intrinsic_set_io_semantics(load1, sem);
+
+ load2->dest.ssa.num_components = old_components - 2;
+ sem.location += 1;
+ nir_intrinsic_set_io_semantics(load2, sem);
+ nir_intrinsic_set_base(load2, nir_intrinsic_base(load1) + 1);
+ nir_builder_instr_insert(b, &load2->instr);
+
+ return merge_64bit_loads(&load1->dest.ssa, &load2->dest.ssa, old_components == 3);
+}
+
+
+nir_ssa_def *
+LowerSplit64BitVar::split_store_output(nir_intrinsic_instr *store1)
+{
+ auto src = store1->src[0];
+ unsigned old_components = nir_src_num_components(src);
+ nir_io_semantics sem = nir_intrinsic_io_semantics(store1);
+
+ auto store2 = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &store1->instr));
+ auto src1 = nir_channels(b, src.ssa, 3);
+ auto src2 = nir_channels(b, src.ssa, old_components == 3 ? 4 : 0xc);
+
+ nir_instr_rewrite_src(&store1->instr, &src, nir_src_for_ssa(src1));
+ nir_intrinsic_set_write_mask(store1, 3);
+
+ nir_instr_rewrite_src(&store2->instr, &src, nir_src_for_ssa(src2));
+ nir_intrinsic_set_write_mask(store2, old_components == 3 ? 1 : 3);
+
+ sem.num_slots = 1;
+ nir_intrinsic_set_io_semantics(store1, sem);
+
+ sem.location += 1;
+ nir_intrinsic_set_io_semantics(store2, sem);
+ nir_intrinsic_set_base(store2, nir_intrinsic_base(store1));
+
+ nir_builder_instr_insert(b, &store2->instr);
+ return NIR_LOWER_INSTR_PROGRESS;
+}
+
+
+nir_ssa_def *
+LowerSplit64BitVar::split_double_load_uniform(nir_intrinsic_instr *intr)
+{
+ unsigned second_components = nir_dest_num_components(intr->dest) - 2;
+ nir_intrinsic_instr *load2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
+ load2->src[0] = nir_src_for_ssa(nir_iadd_imm(b, intr->src[0].ssa, 1));
+ nir_intrinsic_set_dest_type(load2, nir_intrinsic_dest_type(intr));
+ nir_intrinsic_set_base(load2, nir_intrinsic_base(intr));
+ nir_intrinsic_set_range(load2, nir_intrinsic_range(intr));
+ load2->num_components = second_components;
+
+ nir_ssa_dest_init(&load2->instr, &load2->dest, second_components, 64, nullptr);
+ nir_builder_instr_insert(b, &load2->instr);
+
+ intr->dest.ssa.num_components = intr->num_components = 2;
+
+ if (second_components == 1)
+ return nir_vec3(b, nir_channel(b, &intr->dest.ssa, 0),
+ nir_channel(b, &intr->dest.ssa, 1),
+ nir_channel(b, &load2->dest.ssa, 0));
+ else
+ return nir_vec4(b, nir_channel(b, &intr->dest.ssa, 0),
+ nir_channel(b, &intr->dest.ssa, 1),
+ nir_channel(b, &load2->dest.ssa, 0),
+ nir_channel(b, &load2->dest.ssa, 1));
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_double_load_ssbo(nir_intrinsic_instr *intr)
+{
+ unsigned second_components = nir_dest_num_components(intr->dest) - 2;
+ nir_intrinsic_instr *load2 = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intr->instr));
+
+ auto new_src0 = nir_src_for_ssa(nir_iadd_imm(b, intr->src[0].ssa, 1));
+ nir_instr_rewrite_src(&load2->instr, &load2->src[0], new_src0);
+ load2->num_components = second_components;
+ nir_ssa_dest_init(&load2->instr, &load2->dest, second_components, 64, nullptr);
+
+ nir_intrinsic_set_dest_type(load2, nir_intrinsic_dest_type(intr));
+ nir_builder_instr_insert(b, &load2->instr);
+
+ intr->dest.ssa.num_components = intr->num_components = 2;
+
+ return merge_64bit_loads(&intr->dest.ssa, &load2->dest.ssa, second_components == 1);
+}
+
+
+nir_ssa_def *
+LowerSplit64BitVar::split_double_load_ubo(nir_intrinsic_instr *intr)
+{
+ unsigned second_components = nir_dest_num_components(intr->dest) - 2;
+ nir_intrinsic_instr *load2 = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intr->instr));
+ load2->src[0] = intr->src[0];
+ load2->src[1] = nir_src_for_ssa(nir_iadd_imm(b, intr->src[1].ssa, 16));
+ nir_intrinsic_set_range_base(load2, nir_intrinsic_range_base(intr) + 16);
+ nir_intrinsic_set_range(load2, nir_intrinsic_range(intr));
+ nir_intrinsic_set_access(load2, nir_intrinsic_access(intr));
+ nir_intrinsic_set_align_mul(load2, nir_intrinsic_align_mul(intr));
+ nir_intrinsic_set_align_offset(load2, nir_intrinsic_align_offset(intr) + 16);
+
+ load2->num_components = second_components;
+
+ nir_ssa_dest_init(&load2->instr, &load2->dest, second_components, 64, nullptr);
+ nir_builder_instr_insert(b, &load2->instr);
+
+ intr->dest.ssa.num_components = intr->num_components = 2;
+
+ return merge_64bit_loads(&intr->dest.ssa, &load2->dest.ssa, second_components == 1);
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_reduction(nir_ssa_def *src[2][2], nir_op op1, nir_op op2, nir_op reduction)
+{
+ auto cmp0 = nir_build_alu(b, op1, src[0][0], src[0][1], nullptr, nullptr);
+ auto cmp1 = nir_build_alu(b, op2, src[1][0], src[1][1], nullptr, nullptr);
+ return nir_build_alu(b, reduction, cmp0, cmp1, nullptr, nullptr);
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_reduction3(nir_alu_instr *alu,
+ nir_op op1, nir_op op2, nir_op reduction)
+{
+ nir_ssa_def *src[2][2];
+
+ src[0][0] = nir_channels(b, nir_ssa_for_src(b, alu->src[0].src, 2), 3);
+ src[0][1] = nir_channels(b, nir_ssa_for_src(b, alu->src[1].src, 2), 3);
+
+ src[1][0] = nir_channel(b, nir_ssa_for_src(b, alu->src[0].src, 3), 2);
+ src[1][1] = nir_channel(b, nir_ssa_for_src(b, alu->src[1].src, 3), 2);
+
+ return split_reduction(src, op1, op2, reduction);
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_reduction4(nir_alu_instr *alu,
+ nir_op op1, nir_op op2, nir_op reduction)
+{
+ nir_ssa_def *src[2][2];
+
+ src[0][0] = nir_channels(b, nir_ssa_for_src(b, alu->src[0].src, 2), 3);
+ src[0][1] = nir_channels(b, nir_ssa_for_src(b, alu->src[1].src, 2), 3);
+
+ src[1][0] = nir_channels(b, nir_ssa_for_src(b, alu->src[0].src, 4), 0xc);
+ src[1][1] = nir_channels(b, nir_ssa_for_src(b, alu->src[1].src, 4), 0xc);
+
+ return split_reduction(src, op1, op2, reduction);
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_bcsel(nir_alu_instr *alu)
+{
+ static nir_ssa_def *dest[4];
+ for (unsigned i = 0; i < nir_dest_num_components(alu->dest.dest); ++i) {
+ dest[i] = nir_bcsel(b,
+ nir_channel(b, alu->src[0].src.ssa, i),
+ nir_channel(b, alu->src[1].src.ssa, i),
+ nir_channel(b, alu->src[2].src.ssa, i));
+ }
+ return nir_vec(b, dest, nir_dest_num_components(alu->dest.dest));
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::split_load_const(nir_load_const_instr *lc)
+{
+ nir_ssa_def *ir[4];
+ for (unsigned i = 0; i < lc->def.num_components; ++i)
+ ir[i] = nir_imm_double(b, lc->value[i].f64);
+
+ return nir_vec(b, ir, lc->def.num_components);
+}
+
+nir_ssa_def *
+LowerSplit64BitVar::lower(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ auto intr = nir_instr_as_intrinsic(instr);
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_deref:
+ return this->split_double_load_deref(intr);
+ case nir_intrinsic_load_uniform:
+ return split_double_load_uniform(intr);
+ case nir_intrinsic_load_ubo:
+ return split_double_load_ubo(intr);
+ case nir_intrinsic_load_ssbo:
+ return split_double_load_ssbo(intr);
+ case nir_intrinsic_load_input:
+ return split_double_load(intr);
+ case nir_intrinsic_store_output:
+ return split_store_output(intr);
+ case nir_intrinsic_store_deref:
+ return split_double_store_deref(intr);
+ default:
+ assert(0);
+ }
+ }
+ case nir_instr_type_alu: {
+ auto alu = nir_instr_as_alu(instr);
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "\n");
+ switch (alu->op) {
+ case nir_op_bany_fnequal3:
+ return split_reduction3(alu, nir_op_bany_fnequal2, nir_op_fneu, nir_op_ior);
+ case nir_op_ball_fequal3:
+ return split_reduction3(alu, nir_op_ball_fequal2, nir_op_feq, nir_op_iand);
+ case nir_op_bany_inequal3:
+ return split_reduction3(alu, nir_op_bany_inequal2, nir_op_ine, nir_op_ior);
+ case nir_op_ball_iequal3:
+ return split_reduction3(alu, nir_op_ball_iequal2, nir_op_ieq, nir_op_iand);
+ case nir_op_fdot3:
+ return split_reduction3(alu, nir_op_fdot2, nir_op_fmul, nir_op_fadd);
+ case nir_op_bany_fnequal4:
+ return split_reduction4(alu, nir_op_bany_fnequal2, nir_op_bany_fnequal2, nir_op_ior);
+ case nir_op_ball_fequal4:
+ return split_reduction4(alu, nir_op_ball_fequal2, nir_op_ball_fequal2, nir_op_iand);
+ case nir_op_bany_inequal4:
+ return split_reduction4(alu, nir_op_bany_inequal2, nir_op_bany_inequal2, nir_op_ior);
+ case nir_op_ball_iequal4:
+ return split_reduction4(alu, nir_op_bany_fnequal2, nir_op_bany_fnequal2, nir_op_ior);
+ case nir_op_fdot4:
+ return split_reduction4(alu, nir_op_fdot2, nir_op_fdot2, nir_op_fadd);
+ case nir_op_bcsel:
+ return split_bcsel(alu);
+ default:
+ assert(0);
+ }
+ }
+ case nir_instr_type_load_const: {
+ auto lc = nir_instr_as_load_const(instr);
+ return split_load_const(lc);
+ }
+ default:
+ assert(0);
+ }
+ return nullptr;
+}
+
+/* Split 64 bit instruction so that at most two 64 bit components are
+ * used in one instruction */
+
+bool
+r600_nir_split_64bit_io(nir_shader *sh)
+{
+ return LowerSplit64BitVar().run(sh);
+}
+
+/* */
+class Lower64BitToVec2 : public NirLowerInstruction {
+
+private:
+ bool filter(const nir_instr *instr) const override;
+ nir_ssa_def *lower(nir_instr *instr) override;
+
+ nir_ssa_def *load_deref_64_to_vec2(nir_intrinsic_instr *intr);
+ nir_ssa_def *load_uniform_64_to_vec2(nir_intrinsic_instr *intr);
+ nir_ssa_def *load_ssbo_64_to_vec2(nir_intrinsic_instr *intr);
+ nir_ssa_def *load_64_to_vec2(nir_intrinsic_instr *intr);
+ nir_ssa_def *store_64_to_vec2(nir_intrinsic_instr *intr);
+};
+
+bool
+Lower64BitToVec2::filter(const nir_instr *instr) const
+{
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ auto intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_deref:
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_uniform:
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_ubo_vec4:
+ case nir_intrinsic_load_ssbo:
+ return nir_dest_bit_size(intr->dest) == 64;
+ case nir_intrinsic_store_deref: {
+ if (nir_src_bit_size(intr->src[1]) == 64)
+ return true;
+ auto var = nir_intrinsic_get_var(intr, 0);
+ if (var->type->without_array()->bit_size() == 64)
+ return true;
+ return (var->type->without_array()->components() != intr->num_components);
+ }
+ default:
+ return false;
+ }
+ }
+ case nir_instr_type_alu: {
+ auto alu = nir_instr_as_alu(instr);
+ return nir_dest_bit_size(alu->dest.dest) == 64;
+ }
+ case nir_instr_type_phi: {
+ auto phi = nir_instr_as_phi(instr);
+ return nir_dest_bit_size(phi->dest) == 64;
+ }
+ case nir_instr_type_load_const: {
+ auto lc = nir_instr_as_load_const(instr);
+ return lc->def.bit_size == 64;
+ }
+ case nir_instr_type_ssa_undef: {
+ auto undef = nir_instr_as_ssa_undef(instr);
+ return undef->def.bit_size == 64;
+ }
+ default:
+ return false;
+ }
+}
+
+nir_ssa_def *
+Lower64BitToVec2::lower(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ auto intr = nir_instr_as_intrinsic(instr);
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_deref:
+ return load_deref_64_to_vec2(intr);
+ case nir_intrinsic_load_uniform:
+ return load_uniform_64_to_vec2(intr);
+ case nir_intrinsic_load_ssbo:
+ return load_ssbo_64_to_vec2(intr);
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_ubo_vec4:
+ return load_64_to_vec2(intr);
+ case nir_intrinsic_store_deref:
+ return store_64_to_vec2(intr);
+ default:
+
+ return nullptr;
+ }
+ }
+ case nir_instr_type_alu: {
+ auto alu = nir_instr_as_alu(instr);
+ alu->dest.dest.ssa.bit_size = 32;
+ alu->dest.dest.ssa.num_components *= 2;
+ alu->dest.write_mask = (1 << alu->dest.dest.ssa.num_components) - 1;
+ switch (alu->op) {
+ case nir_op_pack_64_2x32_split:
+ alu->op = nir_op_vec2;
+ break;
+ case nir_op_pack_64_2x32:
+ alu->op = nir_op_mov;
+ break;
+ case nir_op_vec2:
+ return nir_vec4(b,
+ nir_channel(b, alu->src[0].src.ssa, 0),
+ nir_channel(b, alu->src[0].src.ssa, 1),
+ nir_channel(b, alu->src[1].src.ssa, 0),
+ nir_channel(b, alu->src[1].src.ssa, 1));
+ default:
+ return NULL;
+ }
+ return NIR_LOWER_INSTR_PROGRESS;
+ }
+ case nir_instr_type_phi: {
+ auto phi = nir_instr_as_phi(instr);
+ phi->dest.ssa.bit_size = 32;
+ phi->dest.ssa.num_components = 2;
+ return NIR_LOWER_INSTR_PROGRESS;
+ }
+ case nir_instr_type_load_const: {
+ auto lc = nir_instr_as_load_const(instr);
+ assert(lc->def.num_components < 3);
+ nir_const_value val[4] = {0};
+ for (uint i = 0; i < lc->def.num_components; ++i) {
+ uint64_t v = lc->value[i].u64;
+ val[0].u32 = v & 0xffffffff;
+ val[1].u32 = (v >> 32) & 0xffffffff;
+ }
+
+ return nir_build_imm(b, 2 * lc->def.num_components, 32, val);
+ }
+ case nir_instr_type_ssa_undef: {
+ auto undef = nir_instr_as_ssa_undef(instr);
+ undef->def.num_components *= 2;
+ undef->def.bit_size = 32;
+ return NIR_LOWER_INSTR_PROGRESS;
+ }
+ default:
+ return nullptr;
+ }
+
+}
+
+
+nir_ssa_def *
+Lower64BitToVec2::load_deref_64_to_vec2(nir_intrinsic_instr *intr)
+{
+ auto deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+ auto var = nir_intrinsic_get_var(intr, 0);
+ unsigned components = var->type->without_array()->components();
+ if (var->type->without_array()->bit_size() == 64) {
+ components *= 2;
+ if (deref->deref_type == nir_deref_type_var) {
+ var->type = glsl_vec_type(components);
+ } else if (deref->deref_type == nir_deref_type_array) {
+
+ var->type = glsl_array_type(glsl_vec_type(components),
+ var->type->array_size(), 0);
+
+ } else {
+ nir_print_shader(b->shader, stderr);
+ assert(0 && "Only lowring of var and array derefs supported\n");
+ }
+ }
+ deref->type = var->type;
+ if (deref->deref_type == nir_deref_type_array) {
+ auto deref_array = nir_instr_as_deref(deref->parent.ssa->parent_instr);
+ deref_array->type = var->type;
+ deref->type = deref_array->type->without_array();
+ }
+
+ intr->num_components = components;
+ intr->dest.ssa.bit_size = 32;
+ intr->dest.ssa.num_components = components;
+ return NIR_LOWER_INSTR_PROGRESS;
+}
+
+nir_ssa_def *
+Lower64BitToVec2::store_64_to_vec2(nir_intrinsic_instr *intr)
+{
+ auto deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+ auto var = nir_intrinsic_get_var(intr, 0);
+
+ unsigned components = var->type->without_array()->components();
+ unsigned wrmask = nir_intrinsic_write_mask(intr);
+ if (var->type->without_array()->bit_size() == 64) {
+ components *= 2;
+ if (deref->deref_type == nir_deref_type_var) {
+ var->type = glsl_vec_type(components);
+ } else if (deref->deref_type == nir_deref_type_array) {
+ var->type = glsl_array_type(glsl_vec_type(components),
+ var->type->array_size(), 0);
+ } else {
+ nir_print_shader(b->shader, stderr);
+ assert(0 && "Only lowring of var and array derefs supported\n");
+ }
+ }
+ deref->type = var->type;
+ if (deref->deref_type == nir_deref_type_array) {
+ auto deref_array = nir_instr_as_deref(deref->parent.ssa->parent_instr);
+ deref_array->type = var->type;
+ deref->type = deref_array->type->without_array();
+ }
+ intr->num_components = components;
+ nir_intrinsic_set_write_mask(intr, wrmask == 1 ? 3 : 0xf);
+ return NIR_LOWER_INSTR_PROGRESS;
+}
+
+
+nir_ssa_def *
+Lower64BitToVec2::load_uniform_64_to_vec2(nir_intrinsic_instr *intr)
+{
+ intr->num_components *= 2;
+ intr->dest.ssa.bit_size = 32;
+ intr->dest.ssa.num_components *= 2;
+ nir_intrinsic_set_dest_type(intr, nir_type_float32);
+ return NIR_LOWER_INSTR_PROGRESS;
+}
+
+nir_ssa_def *
+Lower64BitToVec2::load_64_to_vec2(nir_intrinsic_instr *intr)
+{
+ intr->num_components *= 2;
+ intr->dest.ssa.bit_size = 32;
+ intr->dest.ssa.num_components *= 2;
+ nir_intrinsic_set_component(intr, nir_intrinsic_component(intr) * 2);
+ return NIR_LOWER_INSTR_PROGRESS;
+}
+
+nir_ssa_def *
+Lower64BitToVec2::load_ssbo_64_to_vec2(nir_intrinsic_instr *intr)
+{
+ intr->num_components *= 2;
+ intr->dest.ssa.bit_size = 32;
+ intr->dest.ssa.num_components *= 2;
+ return NIR_LOWER_INSTR_PROGRESS;
+}
+
+static bool store_64bit_intr(nir_src *src, void *state)
+{
+ bool *s = (bool *)state;
+ *s = nir_src_bit_size(*src) == 64;
+ return !*s;
+}
+
+static bool double2vec2(nir_src *src, void *state)
+{
+ if (nir_src_bit_size(*src) != 64)
+ return true;
+
+ assert(src->is_ssa);
+ src->ssa->bit_size = 32;
+ src->ssa->num_components *= 2;
+ return true;
+}
+
+bool
+r600_nir_64_to_vec2(nir_shader *sh)
+{
+ vector<nir_instr*> intr64bit;
+ nir_foreach_function(function, sh) {
+ if (function->impl) {
+ nir_builder b;
+ nir_builder_init(&b, function->impl);
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ switch (instr->type) {
+ case nir_instr_type_alu: {
+ bool success = false;
+ nir_foreach_src(instr, store_64bit_intr, &success);
+ if (success)
+ intr64bit.push_back(instr);
+ break;
+ }
+ case nir_instr_type_intrinsic: {
+ auto ir = nir_instr_as_intrinsic(instr);
+ switch (ir->intrinsic) {
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_ssbo: {
+ bool success = false;
+ nir_foreach_src(instr, store_64bit_intr, &success);
+ if (success) {
+ auto wm = nir_intrinsic_write_mask(ir);
+ nir_intrinsic_set_write_mask(ir, (wm == 1) ? 3 : 0xf);
+ ir->num_components *= 2;
+ }
+ break;
+ }
+ default:
+ ;
+ }
+ }
+ default:
+ ;
+ }
+ }
+ }
+ }
+ }
+
+ bool result = Lower64BitToVec2().run(sh);
+
+ if (result || !intr64bit.empty()) {
+
+ for(auto&& instr: intr64bit) {
+ if (instr->type == nir_instr_type_alu) {
+ auto alu = nir_instr_as_alu(instr);
+ auto alu_info = nir_op_infos[alu->op];
+ for (unsigned i = 0; i < alu_info.num_inputs; ++i) {
+ int swizzle[NIR_MAX_VEC_COMPONENTS] = {0};
+ for (unsigned k = 0; k < NIR_MAX_VEC_COMPONENTS / 2; k++) {
+ if (!nir_alu_instr_channel_used(alu, i, k)) {
+ continue;
+ }
+
+ switch (alu->op) {
+ case nir_op_unpack_64_2x32_split_x:
+ swizzle[2 * k] = alu->src[i].swizzle[k] * 2;
+ alu->op = nir_op_mov;
+ break;
+ case nir_op_unpack_64_2x32_split_y:
+ swizzle[2 * k] = alu->src[i].swizzle[k] * 2 + 1;
+ alu->op = nir_op_mov;
+ break;
+ case nir_op_unpack_64_2x32:
+ alu->op = nir_op_mov;
+ break;
+ case nir_op_bcsel:
+ if (i == 0) {
+ swizzle[2 * k] = swizzle[2 * k + 1] = alu->src[i].swizzle[k] * 2;
+ break;
+ }
+ FALLTHROUGH;
+ default:
+ swizzle[2 * k] = alu->src[i].swizzle[k] * 2;
+ swizzle[2 * k + 1] = alu->src[i].swizzle[k] * 2 + 1;
+ }
+ }
+ for (unsigned k = 0; k < NIR_MAX_VEC_COMPONENTS; ++k) {
+ alu->src[i].swizzle[k] = swizzle[k];
+ }
+ }
+ } else
+ nir_foreach_src(instr, double2vec2, nullptr);
+ }
+ result = true;
+ }
+
+ return result;
+}
+
+using std::map;
+using std::vector;
+using std::pair;
+
+class StoreMerger {
+public:
+ StoreMerger(nir_shader *shader);
+ void collect_stores();
+ bool combine();
+ void combine_one_slot(vector<nir_intrinsic_instr*>& stores);
+
+ using StoreCombos = map<unsigned, vector<nir_intrinsic_instr*>>;
+
+ StoreCombos m_stores;
+ nir_shader *sh;
+};
+
+StoreMerger::StoreMerger(nir_shader *shader):
+ sh(shader)
+{
+}
+
+
+void StoreMerger::collect_stores()
+{
+ unsigned vertex = 0;
+ nir_foreach_function(function, sh) {
+ if (function->impl) {
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ auto ir = nir_instr_as_intrinsic(instr);
+ if (ir->intrinsic == nir_intrinsic_emit_vertex ||
+ ir->intrinsic == nir_intrinsic_emit_vertex_with_counter) {
+ ++vertex;
+ continue;
+ }
+ if (ir->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ unsigned index = nir_intrinsic_base(ir) + 64 * vertex +
+ 8 * 64 * nir_intrinsic_io_semantics(ir).gs_streams;
+ m_stores[index].push_back(ir);
+ }
+ }
+ }
+ }
+}
+
+bool StoreMerger::combine()
+{
+ bool progress = false;
+ for(auto&& i : m_stores) {
+ if (i.second.size() < 2)
+ continue;
+
+ combine_one_slot(i.second);
+ progress = true;
+ }
+ return progress;
+}
+
+void StoreMerger::combine_one_slot(vector<nir_intrinsic_instr*>& stores)
+{
+ nir_ssa_def *srcs[4] = {nullptr};
+
+ nir_builder b;
+ nir_builder_init(&b, nir_shader_get_entrypoint(sh));
+ auto last_store = *stores.rbegin();
+
+ b.cursor = nir_before_instr(&last_store->instr);
+
+ unsigned comps = 0;
+ unsigned writemask = 0;
+ unsigned first_comp = 4;
+ for (auto&& store : stores) {
+ int cmp = nir_intrinsic_component(store);
+ for (unsigned i = 0; i < nir_src_num_components(store->src[0]); ++i, ++comps) {
+ unsigned out_comp = i + cmp;
+ srcs[out_comp] = nir_channel(&b, store->src[0].ssa, i);
+ writemask |= 1 << out_comp;
+ if (first_comp > out_comp)
+ first_comp = out_comp;
+ }
+ }
+
+ auto new_src = nir_vec(&b, srcs, comps);
+
+ nir_instr_rewrite_src(&last_store->instr, &last_store->src[0], nir_src_for_ssa(new_src));
+ last_store->num_components = comps;
+ nir_intrinsic_set_component(last_store, first_comp);
+ nir_intrinsic_set_write_mask(last_store, writemask);
+
+ for (auto i = stores.begin(); i != stores.end() - 1; ++i)
+ nir_instr_remove(&(*i)->instr);
+}
+
+bool r600_merge_vec2_stores(nir_shader *shader)
+{
+ r600::StoreMerger merger(shader);
+ merger.collect_stores();
+ return merger.combine();
+}
+
+} // end namespace r600
+
+