summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/radeon
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2017-08-14 09:45:54 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2017-08-14 09:45:54 +0000
commit4c58069f5013f0a621503525f7d5193bfe9976b3 (patch)
treebd8f8a08b889e9a8b99c9de01ae12459d527ea6d /lib/mesa/src/gallium/drivers/radeon
parent5caa025e6b62d0456faad86c89f239a14d1eaadb (diff)
Import Mesa 17.1.6
Diffstat (limited to 'lib/mesa/src/gallium/drivers/radeon')
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/Makefile.am11
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/Makefile.sources4
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c92
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c206
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c20
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c410
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h208
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_query.c359
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_query.h42
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_streamout.c2
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c67
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_texture.c915
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c166
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h12
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce.c36
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c16
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c10
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c42
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_video.c59
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_video.h2
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h266
21 files changed, 1930 insertions, 1015 deletions
diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.am b/lib/mesa/src/gallium/drivers/radeon/Makefile.am
index a6fc145cb..2be6af4b1 100644
--- a/lib/mesa/src/gallium/drivers/radeon/Makefile.am
+++ b/lib/mesa/src/gallium/drivers/radeon/Makefile.am
@@ -13,19 +13,14 @@ noinst_LTLIBRARIES = libradeon.la
libradeon_la_SOURCES = \
$(C_SOURCES)
-if NEED_RADEON_LLVM
+if HAVE_GALLIUM_LLVM
AM_CFLAGS += \
- $(LLVM_CFLAGS) \
- $(LIBELF_CFLAGS)
-
-libradeon_la_SOURCES += \
- $(LLVM_C_FILES)
+ $(LLVM_CFLAGS)
libradeon_la_LIBADD = \
$(CLOCK_LIB) \
- $(LLVM_LIBS) \
- $(LIBELF_LIBS)
+ $(LLVM_LIBS)
libradeon_la_LDFLAGS = \
$(LLVM_LDFLAGS)
diff --git a/lib/mesa/src/gallium/drivers/radeon/Makefile.sources b/lib/mesa/src/gallium/drivers/radeon/Makefile.sources
index 3e13dae3c..9dd4e1a88 100644
--- a/lib/mesa/src/gallium/drivers/radeon/Makefile.sources
+++ b/lib/mesa/src/gallium/drivers/radeon/Makefile.sources
@@ -22,7 +22,3 @@ C_SOURCES := \
radeon_video.c \
radeon_video.h \
radeon_winsys.h
-
-LLVM_C_FILES := \
- radeon_elf_util.c \
- radeon_elf_util.h
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c
index bbab58946..b2289e26f 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -51,6 +51,8 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
bool busy = false;
+ assert(!(resource->flags & RADEON_FLAG_SPARSE));
+
if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
return ctx->ws->buffer_map(resource->buf, NULL, usage);
}
@@ -159,8 +161,8 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
}
/* Tiled textures are unmappable. Always put them in VRAM. */
- if (res->b.b.target != PIPE_BUFFER &&
- rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
+ if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
+ res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
res->domains = RADEON_DOMAIN_VRAM;
res->flags &= ~RADEON_FLAG_CPU_ACCESS;
res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
@@ -170,8 +172,12 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
/* If VRAM is just stolen system memory, allow both VRAM and
* GTT, whichever has free space. If a buffer is evicted from
* VRAM to GTT, it will stay there.
+ *
+ * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
+ * placements even with a low amount of stolen VRAM.
*/
if (!rscreen->info.has_dedicated_vram &&
+ (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
res->domains == RADEON_DOMAIN_VRAM)
res->domains = RADEON_DOMAIN_VRAM_GTT;
@@ -245,6 +251,10 @@ r600_invalidate_buffer(struct r600_common_context *rctx,
if (rbuffer->is_shared)
return false;
+ /* Sparse buffers can't be reallocated. */
+ if (rbuffer->flags & RADEON_FLAG_SPARSE)
+ return false;
+
/* In AMD_pinned_memory, the user pointer association only gets
* broken when the buffer is explicitly re-allocated.
*/
@@ -275,7 +285,6 @@ void r600_invalidate_resource(struct pipe_context *ctx,
static void *r600_buffer_get_transfer(struct pipe_context *ctx,
struct pipe_resource *resource,
- unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer,
@@ -285,8 +294,9 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx,
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *transfer = slab_alloc(&rctx->pool_transfers);
- transfer->transfer.resource = resource;
- transfer->transfer.level = level;
+ transfer->transfer.resource = NULL;
+ pipe_resource_reference(&transfer->transfer.resource, resource);
+ transfer->transfer.level = 0;
transfer->transfer.usage = usage;
transfer->transfer.box = *box;
transfer->transfer.stride = 0;
@@ -317,11 +327,25 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
- struct r600_resource *rbuffer = r600_resource(resource);
- uint8_t *data;
+ struct r600_resource *rbuffer = r600_resource(resource);
+ uint8_t *data;
assert(box->x + box->width <= resource->width0);
+ /* From GL_AMD_pinned_memory issues:
+ *
+ * 4) Is glMapBuffer on a shared buffer guaranteed to return the
+ * same system address which was specified at creation time?
+ *
+ * RESOLVED: NO. The GL implementation might return a different
+ * virtual mapping of that memory, although the same physical
+ * page will be used.
+ *
+ * So don't ever use staging buffers.
+ */
+ if (rscreen->ws->buffer_is_user_ptr(rbuffer->buf))
+ usage |= PIPE_TRANSFER_PERSISTENT;
+
/* See if the buffer range being mapped has never been initialized,
* in which case it can be mapped unsynchronized. */
if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
@@ -351,26 +375,34 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
}
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
- !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_PERSISTENT)) &&
!(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
- r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
+ ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_PERSISTENT)) &&
+ r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
+ (rbuffer->flags & RADEON_FLAG_SPARSE))) {
assert(usage & PIPE_TRANSFER_WRITE);
- /* Check if mapping this buffer would cause waiting for the GPU. */
- if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
+ /* Check if mapping this buffer would cause waiting for the GPU.
+ */
+ if (rbuffer->flags & RADEON_FLAG_SPARSE ||
+ r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
struct r600_resource *staging = NULL;
- u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
- 256, &offset, (struct pipe_resource**)&staging, (void**)&data);
+ u_upload_alloc(ctx->stream_uploader, 0,
+ box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
+ rctx->screen->info.tcc_cache_line_size,
+ &offset, (struct pipe_resource**)&staging,
+ (void**)&data);
if (staging) {
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
- return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, staging, offset);
+ } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+ return NULL;
}
} else {
/* At this point, the buffer is always idle (we checked it above). */
@@ -378,11 +410,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
}
}
/* Use a staging buffer in cached GTT for reads. */
- else if ((usage & PIPE_TRANSFER_READ) &&
- !(usage & PIPE_TRANSFER_PERSISTENT) &&
- (rbuffer->domains & RADEON_DOMAIN_VRAM ||
- rbuffer->flags & RADEON_FLAG_GTT_WC) &&
- r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
+ else if (((usage & PIPE_TRANSFER_READ) &&
+ !(usage & PIPE_TRANSFER_PERSISTENT) &&
+ (rbuffer->domains & RADEON_DOMAIN_VRAM ||
+ rbuffer->flags & RADEON_FLAG_GTT_WC) &&
+ r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) ||
+ (rbuffer->flags & RADEON_FLAG_SPARSE)) {
struct r600_resource *staging;
staging = (struct r600_resource*) pipe_buffer_create(
@@ -402,8 +435,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
}
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
- return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, staging, 0);
+ } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+ return NULL;
}
}
@@ -413,7 +448,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
}
data += box->x;
- return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, NULL, 0);
}
@@ -469,6 +504,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
if (rtransfer->staging)
r600_resource_reference(&rtransfer->staging, NULL);
+ pipe_resource_reference(&transfer->resource, NULL);
slab_free(&rctx->pool_transfers, transfer);
}
@@ -535,6 +571,8 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
if (templ->bind & PIPE_BIND_SHARED)
rbuffer->flags |= RADEON_FLAG_HANDLE;
+ if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
+ rbuffer->flags |= RADEON_FLAG_SPARSE;
if (!r600_alloc_resource(rscreen, rbuffer)) {
FREE(rbuffer);
@@ -544,7 +582,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
}
struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
- unsigned bind,
+ unsigned flags,
unsigned usage,
unsigned size,
unsigned alignment)
@@ -554,9 +592,9 @@ struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
memset(&buffer, 0, sizeof buffer);
buffer.target = PIPE_BUFFER;
buffer.format = PIPE_FORMAT_R8_UNORM;
- buffer.bind = bind;
+ buffer.bind = 0;
buffer.usage = usage;
- buffer.flags = 0;
+ buffer.flags = flags;
buffer.width0 = size;
buffer.height0 = 1;
buffer.depth0 = 1;
@@ -574,6 +612,7 @@ r600_buffer_from_user_memory(struct pipe_screen *screen,
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
rbuffer->domains = RADEON_DOMAIN_GTT;
+ rbuffer->flags = 0;
util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
/* Convert a user pointer to a buffer. */
@@ -589,5 +628,8 @@ r600_buffer_from_user_memory(struct pipe_screen *screen,
else
rbuffer->gpu_address = 0;
+ rbuffer->vram_usage = 0;
+ rbuffer->gart_usage = templ->width0;
+
return &rbuffer->b.b;
}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c b/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c
index a653834b3..3b45545b7 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_gpu_load.c
@@ -35,6 +35,7 @@
*/
#include "r600_pipe_common.h"
+#include "r600_query.h"
#include "os/os_time.h"
/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
@@ -42,17 +43,97 @@
#define SAMPLES_PER_SEC 10000
#define GRBM_STATUS 0x8010
+#define TA_BUSY(x) (((x) >> 14) & 0x1)
+#define GDS_BUSY(x) (((x) >> 15) & 0x1)
+#define VGT_BUSY(x) (((x) >> 17) & 0x1)
+#define IA_BUSY(x) (((x) >> 19) & 0x1)
+#define SX_BUSY(x) (((x) >> 20) & 0x1)
+#define WD_BUSY(x) (((x) >> 21) & 0x1)
+#define SPI_BUSY(x) (((x) >> 22) & 0x1)
+#define BCI_BUSY(x) (((x) >> 23) & 0x1)
+#define SC_BUSY(x) (((x) >> 24) & 0x1)
+#define PA_BUSY(x) (((x) >> 25) & 0x1)
+#define DB_BUSY(x) (((x) >> 26) & 0x1)
+#define CP_BUSY(x) (((x) >> 29) & 0x1)
+#define CB_BUSY(x) (((x) >> 30) & 0x1)
#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)
-static bool r600_is_gpu_busy(struct r600_common_screen *rscreen)
+#define SRBM_STATUS2 0x0e4c
+#define SDMA_BUSY(x) (((x) >> 5) & 0x1)
+
+#define CP_STAT 0x8680
+#define PFP_BUSY(x) (((x) >> 15) & 0x1)
+#define MEQ_BUSY(x) (((x) >> 16) & 0x1)
+#define ME_BUSY(x) (((x) >> 17) & 0x1)
+#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
+#define DMA_BUSY(x) (((x) >> 22) & 0x1)
+#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
+#define CE_BUSY(x) (((x) >> 26) & 0x1)
+
+#define IDENTITY(x) x
+
+#define UPDATE_COUNTER(field, mask) \
+ do { \
+ if (mask(value)) \
+ p_atomic_inc(&counters->named.field.busy); \
+ else \
+ p_atomic_inc(&counters->named.field.idle); \
+ } while (0)
+
+static void r600_update_mmio_counters(struct r600_common_screen *rscreen,
+ union r600_mmio_counters *counters)
{
uint32_t value = 0;
+ bool gui_busy, sdma_busy = false;
+ /* GRBM_STATUS */
rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
- return GUI_ACTIVE(value);
+
+ UPDATE_COUNTER(ta, TA_BUSY);
+ UPDATE_COUNTER(gds, GDS_BUSY);
+ UPDATE_COUNTER(vgt, VGT_BUSY);
+ UPDATE_COUNTER(ia, IA_BUSY);
+ UPDATE_COUNTER(sx, SX_BUSY);
+ UPDATE_COUNTER(wd, WD_BUSY);
+ UPDATE_COUNTER(spi, SPI_BUSY);
+ UPDATE_COUNTER(bci, BCI_BUSY);
+ UPDATE_COUNTER(sc, SC_BUSY);
+ UPDATE_COUNTER(pa, PA_BUSY);
+ UPDATE_COUNTER(db, DB_BUSY);
+ UPDATE_COUNTER(cp, CP_BUSY);
+ UPDATE_COUNTER(cb, CB_BUSY);
+ UPDATE_COUNTER(gui, GUI_ACTIVE);
+ gui_busy = GUI_ACTIVE(value);
+
+ if (rscreen->chip_class >= CIK) {
+ /* SRBM_STATUS2 */
+ rscreen->ws->read_registers(rscreen->ws, SRBM_STATUS2, 1, &value);
+
+ UPDATE_COUNTER(sdma, SDMA_BUSY);
+ sdma_busy = SDMA_BUSY(value);
+ }
+
+ if (rscreen->chip_class >= VI) {
+ /* CP_STAT */
+ rscreen->ws->read_registers(rscreen->ws, CP_STAT, 1, &value);
+
+ UPDATE_COUNTER(pfp, PFP_BUSY);
+ UPDATE_COUNTER(meq, MEQ_BUSY);
+ UPDATE_COUNTER(me, ME_BUSY);
+ UPDATE_COUNTER(surf_sync, SURFACE_SYNC_BUSY);
+ UPDATE_COUNTER(dma, DMA_BUSY);
+ UPDATE_COUNTER(scratch_ram, SCRATCH_RAM_BUSY);
+ UPDATE_COUNTER(ce, CE_BUSY);
+ }
+
+ value = gui_busy || sdma_busy;
+ UPDATE_COUNTER(gpu, IDENTITY);
}
-static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param)
+#undef UPDATE_COUNTER
+
+static int
+r600_gpu_load_thread(void *param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
const int period_us = 1000000 / SAMPLES_PER_SEC;
@@ -77,10 +158,7 @@ static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param)
last_time = cur_time;
/* Update the counters. */
- if (r600_is_gpu_busy(rscreen))
- p_atomic_inc(&rscreen->gpu_load_counter_busy);
- else
- p_atomic_inc(&rscreen->gpu_load_counter_idle);
+ r600_update_mmio_counters(rscreen, &rscreen->mmio_counters);
}
p_atomic_dec(&rscreen->gpu_load_stop_thread);
return 0;
@@ -92,50 +170,118 @@ void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
return;
p_atomic_inc(&rscreen->gpu_load_stop_thread);
- pipe_thread_wait(rscreen->gpu_load_thread);
+ thrd_join(rscreen->gpu_load_thread, NULL);
rscreen->gpu_load_thread = 0;
}
-static uint64_t r600_gpu_load_read_counter(struct r600_common_screen *rscreen)
+static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen,
+ unsigned busy_index)
{
/* Start the thread if needed. */
if (!rscreen->gpu_load_thread) {
- pipe_mutex_lock(rscreen->gpu_load_mutex);
+ mtx_lock(&rscreen->gpu_load_mutex);
/* Check again inside the mutex. */
if (!rscreen->gpu_load_thread)
rscreen->gpu_load_thread =
- pipe_thread_create(r600_gpu_load_thread, rscreen);
- pipe_mutex_unlock(rscreen->gpu_load_mutex);
+ u_thread_create(r600_gpu_load_thread, rscreen);
+ mtx_unlock(&rscreen->gpu_load_mutex);
}
- /* The busy counter is in the lower 32 bits.
- * The idle counter is in the upper 32 bits. */
- return p_atomic_read(&rscreen->gpu_load_counter_busy) |
- ((uint64_t)p_atomic_read(&rscreen->gpu_load_counter_idle) << 32);
-}
+ unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]);
+ unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]);
-/**
- * Just return the counters.
- */
-uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen)
-{
- return r600_gpu_load_read_counter(rscreen);
+ return busy | ((uint64_t)idle << 32);
}
-unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin)
+static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen,
+ uint64_t begin, unsigned busy_index)
{
- uint64_t end = r600_gpu_load_read_counter(rscreen);
+ uint64_t end = r600_read_mmio_counter(rscreen, busy_index);
unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
unsigned idle = (end >> 32) - (begin >> 32);
- /* Calculate the GPU load.
+ /* Calculate the % of time the busy counter was being incremented.
*
- * If no counters have been incremented, return the current load.
+ * If no counters were incremented, return the current counter status.
* It's for the case when the load is queried faster than
* the counters are updated.
*/
- if (idle || busy)
+ if (idle || busy) {
return busy*100 / (busy + idle);
- else
- return r600_is_gpu_busy(rscreen) ? 100 : 0;
+ } else {
+ union r600_mmio_counters counters;
+
+ memset(&counters, 0, sizeof(counters));
+ r600_update_mmio_counters(rscreen, &counters);
+ return counters.array[busy_index] ? 100 : 0;
+ }
+}
+
+#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \
+ rscreen->mmio_counters.array)
+
+static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
+ unsigned type)
+{
+ switch (type) {
+ case R600_QUERY_GPU_LOAD:
+ return BUSY_INDEX(rscreen, gpu);
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ return BUSY_INDEX(rscreen, spi);
+ case R600_QUERY_GPU_TA_BUSY:
+ return BUSY_INDEX(rscreen, ta);
+ case R600_QUERY_GPU_GDS_BUSY:
+ return BUSY_INDEX(rscreen, gds);
+ case R600_QUERY_GPU_VGT_BUSY:
+ return BUSY_INDEX(rscreen, vgt);
+ case R600_QUERY_GPU_IA_BUSY:
+ return BUSY_INDEX(rscreen, ia);
+ case R600_QUERY_GPU_SX_BUSY:
+ return BUSY_INDEX(rscreen, sx);
+ case R600_QUERY_GPU_WD_BUSY:
+ return BUSY_INDEX(rscreen, wd);
+ case R600_QUERY_GPU_BCI_BUSY:
+ return BUSY_INDEX(rscreen, bci);
+ case R600_QUERY_GPU_SC_BUSY:
+ return BUSY_INDEX(rscreen, sc);
+ case R600_QUERY_GPU_PA_BUSY:
+ return BUSY_INDEX(rscreen, pa);
+ case R600_QUERY_GPU_DB_BUSY:
+ return BUSY_INDEX(rscreen, db);
+ case R600_QUERY_GPU_CP_BUSY:
+ return BUSY_INDEX(rscreen, cp);
+ case R600_QUERY_GPU_CB_BUSY:
+ return BUSY_INDEX(rscreen, cb);
+ case R600_QUERY_GPU_SDMA_BUSY:
+ return BUSY_INDEX(rscreen, sdma);
+ case R600_QUERY_GPU_PFP_BUSY:
+ return BUSY_INDEX(rscreen, pfp);
+ case R600_QUERY_GPU_MEQ_BUSY:
+ return BUSY_INDEX(rscreen, meq);
+ case R600_QUERY_GPU_ME_BUSY:
+ return BUSY_INDEX(rscreen, me);
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ return BUSY_INDEX(rscreen, surf_sync);
+ case R600_QUERY_GPU_DMA_BUSY:
+ return BUSY_INDEX(rscreen, dma);
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ return BUSY_INDEX(rscreen, scratch_ram);
+ case R600_QUERY_GPU_CE_BUSY:
+ return BUSY_INDEX(rscreen, ce);
+ default:
+ unreachable("invalid query type");
+ }
+}
+
+uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
+{
+ unsigned busy_index = busy_index_from_type(rscreen, type);
+ return r600_read_mmio_counter(rscreen, busy_index);
+}
+
+unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
+ uint64_t begin)
+{
+ unsigned busy_index = busy_index_from_type(rscreen, type);
+ return r600_end_mmio_counter(rscreen, begin, busy_index);
}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c b/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c
index 0c55fc2a2..48f609bcb 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -99,7 +99,7 @@ struct r600_query_pc {
struct r600_pc_group *groups;
};
-static void r600_pc_query_destroy(struct r600_common_context *ctx,
+static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery)
{
struct r600_query_pc *query = (struct r600_query_pc *)rquery;
@@ -112,10 +112,10 @@ static void r600_pc_query_destroy(struct r600_common_context *ctx,
FREE(query->counters);
- r600_query_hw_destroy(ctx, rquery);
+ r600_query_hw_destroy(rscreen, rquery);
}
-static bool r600_pc_query_prepare_buffer(struct r600_common_context *ctx,
+static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
struct r600_query_hw *hwquery,
struct r600_resource *buffer)
{
@@ -196,7 +196,7 @@ static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
}
-static void r600_pc_query_add_result(struct r600_common_context *ctx,
+static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
struct r600_query_hw *hwquery,
void *buffer,
union pipe_query_result *result)
@@ -301,8 +301,8 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types)
{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_common_screen *screen = rctx->screen;
+ struct r600_common_screen *screen =
+ (struct r600_common_screen *)ctx->screen;
struct r600_perfcounters *pc = screen->perfcounters;
struct r600_perfcounter_block *block;
struct r600_pc_group *group;
@@ -365,7 +365,7 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
unsigned instances = 1;
if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
- instances = rctx->screen->info.max_se;
+ instances = screen->info.max_se;
if (group->instance < 0)
instances *= block->num_instances;
@@ -417,13 +417,13 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
counter->qwords *= block->num_instances;
}
- if (!r600_query_hw_init(rctx, &query->b))
+ if (!r600_query_hw_init(screen, &query->b))
goto error;
return (struct pipe_query *)query;
error:
- r600_pc_query_destroy(rctx, &query->b.b);
+ r600_pc_query_destroy(screen, &query->b.b);
return NULL;
}
@@ -545,7 +545,7 @@ int r600_get_perfcounter_info(struct r600_common_screen *screen,
info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
- info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
+ info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
info->group_id = base_gid + sub / block->num_selectors;
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c
index f62bbf2e0..2019ecdd5 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -43,6 +43,14 @@
#define HAVE_LLVM 0
#endif
+#if HAVE_LLVM
+#include <llvm-c/TargetMachine.h>
+#endif
+
+#ifndef MESA_LLVM_VERSION_PATCH
+#define MESA_LLVM_VERSION_PATCH 0
+#endif
+
struct r600_multi_fence {
struct pipe_reference reference;
struct pipe_fence_handle *gfx;
@@ -58,12 +66,12 @@ struct r600_multi_fence {
/*
* shader binary helpers.
*/
-void radeon_shader_binary_init(struct radeon_shader_binary *b)
+void radeon_shader_binary_init(struct ac_shader_binary *b)
{
memset(b, 0, sizeof(*b));
}
-void radeon_shader_binary_clean(struct radeon_shader_binary *b)
+void radeon_shader_binary_clean(struct ac_shader_binary *b)
{
if (!b)
return;
@@ -80,35 +88,63 @@ void radeon_shader_binary_clean(struct radeon_shader_binary *b)
* pipe_context
*/
-void r600_gfx_write_fence(struct r600_common_context *ctx, struct r600_resource *buf,
- uint64_t va, uint32_t old_value, uint32_t new_value)
+/**
+ * Write an EOP event.
+ *
+ * \param event EVENT_TYPE_*
+ * \param event_flags Optional cache flush flags (TC)
+ * \param data_sel 1 = fence, 3 = timestamp
+ * \param buf Buffer
+ * \param va GPU address
+ * \param old_value Previous fence value (for a bug workaround)
+ * \param new_value Fence value to write for this event.
+ */
+void r600_gfx_write_event_eop(struct r600_common_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t old_fence, uint32_t new_fence)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
+ unsigned op = EVENT_TYPE(event) |
+ EVENT_INDEX(5) |
+ event_flags;
+
+ if (ctx->chip_class >= GFX9) {
+ radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
+ radeon_emit(cs, op);
+ radeon_emit(cs, EOP_DATA_SEL(data_sel));
+ radeon_emit(cs, va); /* address lo */
+ radeon_emit(cs, va >> 32); /* address hi */
+ radeon_emit(cs, new_fence); /* immediate data lo */
+ radeon_emit(cs, 0); /* immediate data hi */
+ radeon_emit(cs, 0); /* unused */
+ } else {
+ if (ctx->chip_class == CIK ||
+ ctx->chip_class == VI) {
+ /* Two EOP events are required to make all engines go idle
+ * (and optional cache flushes executed) before the timestamp
+ * is written.
+ */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
+ radeon_emit(cs, old_fence); /* immediate data */
+ radeon_emit(cs, 0); /* unused */
+ }
- if (ctx->chip_class == CIK ||
- ctx->chip_class == VI) {
- /* Two EOP events are required to make all engines go idle
- * (and optional cache flushes executed) before the timestamp
- * is written.
- */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
- EVENT_INDEX(5));
+ radeon_emit(cs, op);
radeon_emit(cs, va);
- radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
- radeon_emit(cs, old_value); /* immediate data */
+ radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
+ radeon_emit(cs, new_fence); /* immediate data */
radeon_emit(cs, 0); /* unused */
}
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
- EVENT_INDEX(5));
- radeon_emit(cs, va);
- radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
- radeon_emit(cs, new_value); /* immediate data */
- radeon_emit(cs, 0); /* unused */
-
- r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
+ if (buf)
+ r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
}
unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
@@ -172,7 +208,9 @@ void r600_draw_rectangle(struct blitter_context *blitter,
/* Upload vertices. The hw rectangle has only 3 vertices,
* I guess the 4th one is derived from the first 3.
* The vertex specification should match u_blitter's vertex element state. */
- u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, 256, &offset, &buf, (void**)&vb);
+ u_upload_alloc(rctx->b.stream_uploader, 0, sizeof(float) * 24,
+ rctx->screen->info.tcc_cache_line_size,
+ &offset, &buf, (void**)&vb);
if (!buf)
return;
@@ -203,10 +241,26 @@ void r600_draw_rectangle(struct blitter_context *blitter,
pipe_resource_reference(&buf, NULL);
}
+static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->dma.cs;
+
+ /* NOP waits for idle on Evergreen and later. */
+ if (rctx->chip_class >= CIK)
+ radeon_emit(cs, 0x00000000); /* NOP */
+ else if (rctx->chip_class >= EVERGREEN)
+ radeon_emit(cs, 0xf0000000); /* NOP */
+ else {
+ /* TODO: R600-R700 should use the FENCE packet.
+ * CS checker support is required. */
+ }
+}
+
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src)
{
- uint64_t vram = 0, gtt = 0;
+ uint64_t vram = ctx->dma.cs->used_vram;
+ uint64_t gtt = ctx->dma.cs->used_gart;
if (dst) {
vram += dst->vram_usage;
@@ -229,13 +283,35 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
/* Flush if there's not enough space, or if the memory usage per IB
* is too large.
+ *
+ * IBs using too little memory are limited by the IB submission overhead.
+ * IBs using too much memory are limited by the kernel/TTM overhead.
+ * Too long IBs create CPU-GPU pipeline bubbles and add latency.
+ *
+ * This heuristic makes sure that DMA requests are executed
+ * very soon after the call is made and lowers memory usage.
+ * It improves texture upload performance by keeping the DMA
+ * engine busy while uploads are being submitted.
*/
+ num_dw++; /* for emit_wait_idle below */
if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
+ ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
!radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
}
+ /* Wait for idle if either buffer has been used in the IB before to
+ * prevent read-after-write hazards.
+ */
+ if ((dst &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
+ RADEON_USAGE_READWRITE)) ||
+ (src &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
+ RADEON_USAGE_WRITE)))
+ r600_dma_emit_wait_idle(ctx);
+
/* If GPUVM is not supported, the CS checker needs 2 entries
* in the buffer list per packet, which has to be done manually.
*/
@@ -249,44 +325,9 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
RADEON_USAGE_READ,
RADEON_PRIO_SDMA_BUFFER);
}
-}
-
-/* This is required to prevent read-after-write hazards. */
-void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
-{
- struct radeon_winsys_cs *cs = rctx->dma.cs;
-
- /* done at the end of DMA calls, so increment this. */
- rctx->num_dma_calls++;
-
- /* IBs using too little memory are limited by the IB submission overhead.
- * IBs using too much memory are limited by the kernel/TTM overhead.
- * Too long IBs create CPU-GPU pipeline bubbles and add latency.
- *
- * This heuristic makes sure that DMA requests are executed
- * very soon after the call is made and lowers memory usage.
- * It improves texture upload performance by keeping the DMA
- * engine busy while uploads are being submitted.
- */
- if (cs->used_vram + cs->used_gart > 64 * 1024 * 1024) {
- rctx->dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
- return;
- }
-
- r600_need_dma_space(rctx, 1, NULL, NULL);
-
- if (!radeon_emitted(cs, 0)) /* empty queue */
- return;
- /* NOP waits for idle on Evergreen and later. */
- if (rctx->chip_class >= CIK)
- radeon_emit(cs, 0x00000000); /* NOP */
- else if (rctx->chip_class >= EVERGREEN)
- radeon_emit(cs, 0xf0000000); /* NOP */
- else {
- /* TODO: R600-R700 should use the FENCE packet.
- * CS checker support is required. */
- }
+ /* this function is called before all DMA calls, so increment this. */
+ ctx->num_dma_calls++;
}
static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
@@ -325,24 +366,22 @@ static void r600_flush_from_st(struct pipe_context *ctx,
struct pipe_screen *screen = ctx->screen;
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct radeon_winsys *ws = rctx->ws;
- unsigned rflags = 0;
struct pipe_fence_handle *gfx_fence = NULL;
struct pipe_fence_handle *sdma_fence = NULL;
bool deferred_fence = false;
+ unsigned rflags = RADEON_FLUSH_ASYNC;
if (flags & PIPE_FLUSH_END_OF_FRAME)
rflags |= RADEON_FLUSH_END_OF_FRAME;
- if (flags & PIPE_FLUSH_DEFERRED)
- rflags |= RADEON_FLUSH_ASYNC;
- if (rctx->dma.cs) {
+ /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
+ if (rctx->dma.cs)
rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
- }
if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
if (fence)
ws->fence_reference(&gfx_fence, rctx->last_gfx_fence);
- if (!(rflags & RADEON_FLUSH_ASYNC))
+ if (!(flags & PIPE_FLUSH_DEFERRED))
ws->cs_sync_flush(rctx->gfx.cs);
} else {
/* Instead of flushing, create a deferred fence. Constraints:
@@ -378,6 +417,12 @@ static void r600_flush_from_st(struct pipe_context *ctx,
screen->fence_reference(screen, fence, NULL);
*fence = (struct pipe_fence_handle*)multi_fence;
}
+
+ if (!(flags & PIPE_FLUSH_DEFERRED)) {
+ if (rctx->dma.cs)
+ ws->cs_sync_flush(rctx->dma.cs);
+ ws->cs_sync_flush(rctx->gfx.cs);
+ }
}
static void r600_flush_dma_ring(void *ctx, unsigned flags,
@@ -516,6 +561,50 @@ bool r600_check_device_reset(struct r600_common_context *rctx)
return true;
}
+static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ uint64_t offset, uint64_t size,
+ unsigned value)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+ rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE);
+}
+
+static bool r600_resource_commit(struct pipe_context *pctx,
+ struct pipe_resource *resource,
+ unsigned level, struct pipe_box *box,
+ bool commit)
+{
+ struct r600_common_context *ctx = (struct r600_common_context *)pctx;
+ struct r600_resource *res = r600_resource(resource);
+
+ /*
+ * Since buffer commitment changes cannot be pipelined, we need to
+ * (a) flush any pending commands that refer to the buffer we're about
+ * to change, and
+ * (b) wait for threaded submit to finish, including those that were
+ * triggered by some other, earlier operation.
+ */
+ if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
+ res->buf, RADEON_USAGE_READWRITE)) {
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ }
+ if (radeon_emitted(ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
+ res->buf, RADEON_USAGE_READWRITE)) {
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ }
+
+ ctx->ws->cs_sync_flush(ctx->dma.cs);
+ ctx->ws->cs_sync_flush(ctx->gfx.cs);
+
+ assert(resource->target == PIPE_BUFFER);
+
+ return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
+}
+
bool r600_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen,
unsigned context_flags)
@@ -527,14 +616,8 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->family = rscreen->family;
rctx->chip_class = rscreen->chip_class;
- if (rscreen->chip_class >= CIK)
- rctx->max_db = MAX2(8, rscreen->info.num_render_backends);
- else if (rscreen->chip_class >= EVERGREEN)
- rctx->max_db = 8;
- else
- rctx->max_db = 4;
-
rctx->b.invalidate_resource = r600_invalidate_resource;
+ rctx->b.resource_commit = r600_resource_commit;
rctx->b.transfer_map = u_transfer_map_vtbl;
rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
@@ -542,6 +625,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->b.memory_barrier = r600_memory_barrier;
rctx->b.flush = r600_flush_from_st;
rctx->b.set_debug_callback = r600_set_debug_callback;
+ rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
/* evergreen_compute.c has a special codepath for global buffers.
* Everything else can use the direct path.
@@ -569,14 +653,18 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->allocator_zeroed_memory =
u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
- 0, PIPE_USAGE_DEFAULT, true);
+ 0, PIPE_USAGE_DEFAULT, 0, true);
if (!rctx->allocator_zeroed_memory)
return false;
- rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024,
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
- if (!rctx->uploader)
+ rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
+ 0, PIPE_USAGE_STREAM);
+ if (!rctx->b.stream_uploader)
+ return false;
+
+ rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
+ 0, PIPE_USAGE_DEFAULT);
+ if (!rctx->b.const_uploader)
return false;
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
@@ -619,9 +707,10 @@ void r600_common_context_cleanup(struct r600_common_context *rctx)
if (rctx->ctx)
rctx->ws->ctx_destroy(rctx->ctx);
- if (rctx->uploader) {
- u_upload_destroy(rctx->uploader);
- }
+ if (rctx->b.stream_uploader)
+ u_upload_destroy(rctx->b.stream_uploader);
+ if (rctx->b.const_uploader)
+ u_upload_destroy(rctx->b.const_uploader);
slab_destroy_child(&rctx->pool_transfers);
@@ -656,8 +745,12 @@ static const struct debug_named_value common_debug_options[] = {
{ "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
{ "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" },
{ "checkir", DBG_CHECK_IR, "Enable additional sanity checks on shader IR" },
+ { "nooptvariant", DBG_NO_OPT_VARIANT, "Disable compiling optimized shader variants." },
{ "testdma", DBG_TEST_DMA, "Invoke SDMA tests and exit." },
+ { "testvmfaultcp", DBG_TEST_VMFAULT_CP, "Invoke a CP VM fault test and exit." },
+ { "testvmfaultsdma", DBG_TEST_VMFAULT_SDMA, "Invoke a SDMA VM fault test and exit." },
+ { "testvmfaultshader", DBG_TEST_VMFAULT_SHADER, "Invoke a shader VM fault test and exit." },
/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
@@ -673,7 +766,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
- { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
+ { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." },
{ "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
{ "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
{ "noce", DBG_NO_CE, "Disable the constant engine"},
@@ -737,11 +830,54 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
case CHIP_FIJI: return "AMD FIJI";
case CHIP_POLARIS10: return "AMD POLARIS10";
case CHIP_POLARIS11: return "AMD POLARIS11";
+ case CHIP_POLARIS12: return "AMD POLARIS12";
case CHIP_STONEY: return "AMD STONEY";
+ case CHIP_VEGA10: return "AMD VEGA10";
+ case CHIP_RAVEN: return "AMD RAVEN";
default: return "AMD unknown";
}
}
+static void r600_disk_cache_create(struct r600_common_screen *rscreen)
+{
+ /* Don't use the cache if shader dumping is enabled. */
+ if (rscreen->debug_flags &
+ (DBG_FS | DBG_VS | DBG_TCS | DBG_TES | DBG_GS | DBG_PS | DBG_CS))
+ return;
+
+ uint32_t mesa_timestamp;
+ if (disk_cache_get_function_timestamp(r600_disk_cache_create,
+ &mesa_timestamp)) {
+ char *timestamp_str;
+ int res = -1;
+ if (rscreen->chip_class < SI) {
+ res = asprintf(&timestamp_str, "%u",mesa_timestamp);
+ }
+#if HAVE_LLVM
+ else {
+ uint32_t llvm_timestamp;
+ if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
+ &llvm_timestamp)) {
+ res = asprintf(&timestamp_str, "%u_%u",
+ mesa_timestamp, llvm_timestamp);
+ }
+ }
+#endif
+ if (res != -1) {
+ rscreen->disk_shader_cache =
+ disk_cache_create(r600_get_chip_name(rscreen),
+ timestamp_str);
+ free(timestamp_str);
+ }
+ }
+}
+
+static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+ return rscreen->disk_shader_cache;
+}
+
static const char* r600_get_name(struct pipe_screen* pscreen)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
@@ -861,24 +997,45 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
case CHIP_TONGA: return "tonga";
case CHIP_ICELAND: return "iceland";
case CHIP_CARRIZO: return "carrizo";
-#if HAVE_LLVM <= 0x0307
- case CHIP_FIJI: return "tonga";
- case CHIP_STONEY: return "carrizo";
-#else
- case CHIP_FIJI: return "fiji";
- case CHIP_STONEY: return "stoney";
-#endif
-#if HAVE_LLVM <= 0x0308
- case CHIP_POLARIS10: return "tonga";
- case CHIP_POLARIS11: return "tonga";
-#else
- case CHIP_POLARIS10: return "polaris10";
- case CHIP_POLARIS11: return "polaris11";
-#endif
- default: return "";
+ case CHIP_FIJI:
+ return "fiji";
+ case CHIP_STONEY:
+ return "stoney";
+ case CHIP_POLARIS10:
+ return HAVE_LLVM >= 0x0309 ? "polaris10" : "carrizo";
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12: /* same as polaris11 */
+ return HAVE_LLVM >= 0x0309 ? "polaris11" : "carrizo";
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ return "gfx900";
+ default:
+ return "";
}
}
+static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
+ enum pipe_shader_ir ir_type)
+{
+ if (ir_type != PIPE_SHADER_IR_TGSI)
+ return 256;
+
+ if (HAVE_LLVM < 0x309)
+ return 256;
+
+ /* Only 16 waves per thread-group on gfx9. */
+ if (screen->chip_class >= GFX9)
+ return 1024;
+
+ /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
+ * round number.
+ */
+ if (screen->chip_class >= SI)
+ return 2048;
+
+ return 256;
+}
+
static int r600_get_compute_param(struct pipe_screen *screen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
@@ -933,27 +1090,17 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
if (ret) {
uint64_t *block_size = ret;
- if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
- ir_type == PIPE_SHADER_IR_TGSI) {
- block_size[0] = 2048;
- block_size[1] = 2048;
- block_size[2] = 2048;
- } else {
- block_size[0] = 256;
- block_size[1] = 256;
- block_size[2] = 256;
- }
+ unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
+ block_size[0] = threads_per_block;
+ block_size[1] = threads_per_block;
+ block_size[2] = threads_per_block;
}
return 3 * sizeof(uint64_t);
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
if (ret) {
uint64_t *max_threads_per_block = ret;
- if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
- ir_type == PIPE_SHADER_IR_TGSI)
- *max_threads_per_block = 2048;
- else
- *max_threads_per_block = 256;
+ *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
}
return sizeof(uint64_t);
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
@@ -1186,11 +1333,11 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
snprintf(kernel_version, sizeof(kernel_version),
" / %s", uname_data.release);
-#if HAVE_LLVM
- snprintf(llvm_string, sizeof(llvm_string),
- ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
- HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
-#endif
+ if (HAVE_LLVM > 0) {
+ snprintf(llvm_string, sizeof(llvm_string),
+ ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+ }
snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
"%s (DRM %i.%i.%i%s%s)",
@@ -1201,6 +1348,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.get_name = r600_get_name;
rscreen->b.get_vendor = r600_get_vendor;
rscreen->b.get_device_vendor = r600_get_device_vendor;
+ rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
rscreen->b.get_timestamp = r600_get_timestamp;
@@ -1225,6 +1373,10 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->family = rscreen->info.family;
rscreen->chip_class = rscreen->info.chip_class;
rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
+ rscreen->has_rbplus = false;
+ rscreen->rbplus_allowed = false;
+
+ r600_disk_cache_create(rscreen);
slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
@@ -1236,8 +1388,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
}
util_format_s3tc_init();
- pipe_mutex_init(rscreen->aux_context_lock);
- pipe_mutex_init(rscreen->gpu_load_mutex);
+ (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
+ (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
if (rscreen->debug_flags & DBG_INFO) {
printf("pci_id = 0x%x\n", rscreen->info.pci_id);
@@ -1246,6 +1398,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
printf("chip_class = %i\n", rscreen->info.chip_class);
printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
+ printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
printf("max_alloc_size = %i MB\n",
(int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
@@ -1274,6 +1427,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
+ printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
}
return true;
}
@@ -1283,12 +1437,13 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen)
r600_perfcounters_destroy(rscreen);
r600_gpu_load_kill_thread(rscreen);
- pipe_mutex_destroy(rscreen->gpu_load_mutex);
- pipe_mutex_destroy(rscreen->aux_context_lock);
+ mtx_destroy(&rscreen->gpu_load_mutex);
+ mtx_destroy(&rscreen->aux_context_lock);
rscreen->aux_context->destroy(rscreen->aux_context);
slab_destroy_parent(&rscreen->pool_transfers);
+ disk_cache_destroy(rscreen->disk_shader_cache);
rscreen->ws->destroy(rscreen->ws);
FREE(rscreen);
}
@@ -1321,13 +1476,12 @@ bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned proce
}
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- uint64_t offset, uint64_t size, unsigned value,
- enum r600_coherency coher)
+ uint64_t offset, uint64_t size, unsigned value)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
- pipe_mutex_lock(rscreen->aux_context_lock);
- rctx->clear_buffer(&rctx->b, dst, offset, size, value, coher);
+ mtx_lock(&rscreen->aux_context_lock);
+ rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
- pipe_mutex_unlock(rscreen->aux_context_lock);
+ mtx_unlock(&rscreen->aux_context_lock);
}
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h
index 86772c0af..bd542e500 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -34,8 +34,11 @@
#include <stdio.h>
+#include "amd/common/ac_binary.h"
+
#include "radeon/radeon_winsys.h"
+#include "util/disk_cache.h"
#include "util/u_blitter.h"
#include "util/list.h"
#include "util/u_range.h"
@@ -49,6 +52,7 @@
#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
#define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
+#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
/* Pipeline & streamout query controls. */
@@ -79,6 +83,7 @@
#define DBG_NO_ASM (1 << 14)
#define DBG_PREOPT_IR (1 << 15)
#define DBG_CHECK_IR (1 << 16)
+#define DBG_NO_OPT_VARIANT (1 << 17)
/* gaps */
#define DBG_TEST_DMA (1 << 20)
/* Bits 21-31 are reserved for the r600g driver. */
@@ -102,6 +107,9 @@
#define DBG_NO_CE (1llu << 48)
#define DBG_UNSAFE_MATH (1llu << 49)
#define DBG_NO_DCC_FB (1llu << 50)
+#define DBG_TEST_VMFAULT_CP (1llu << 51)
+#define DBG_TEST_VMFAULT_SDMA (1llu << 52)
+#define DBG_TEST_VMFAULT_SHADER (1llu << 53)
#define R600_MAP_BUFFER_ALIGNMENT 64
#define R600_MAX_VIEWPORTS 16
@@ -125,45 +133,8 @@ struct r600_perfcounters;
struct tgsi_shader_info;
struct r600_qbo_state;
-struct radeon_shader_reloc {
- char name[32];
- uint64_t offset;
-};
-
-struct radeon_shader_binary {
- /** Shader code */
- unsigned char *code;
- unsigned code_size;
-
- /** Config/Context register state that accompanies this shader.
- * This is a stream of dword pairs. First dword contains the
- * register address, the second dword contains the value.*/
- unsigned char *config;
- unsigned config_size;
-
- /** The number of bytes of config information for each global symbol.
- */
- unsigned config_size_per_symbol;
-
- /** Constant data accessed by the shader. This will be uploaded
- * into a constant buffer. */
- unsigned char *rodata;
- unsigned rodata_size;
-
- /** List of symbol offsets for the shader */
- uint64_t *global_symbol_offsets;
- unsigned global_symbol_count;
-
- struct radeon_shader_reloc *relocs;
- unsigned reloc_count;
-
- /** Disassembled shader in a string. */
- char *disasm_string;
- char *llvm_ir_string;
-};
-
-void radeon_shader_binary_init(struct radeon_shader_binary *b);
-void radeon_shader_binary_clean(struct radeon_shader_binary *b);
+void radeon_shader_binary_init(struct ac_shader_binary *b);
+void radeon_shader_binary_clean(struct ac_shader_binary *b);
/* Only 32-bit buffer allocations are supported, gallium doesn't support more
* at the moment.
@@ -232,20 +203,8 @@ struct r600_cmask_info {
uint64_t offset;
uint64_t size;
unsigned alignment;
- unsigned pitch;
- unsigned height;
- unsigned xalign;
- unsigned yalign;
unsigned slice_tile_max;
- unsigned base_address_reg;
-};
-
-struct r600_htile_info {
- unsigned pitch;
- unsigned height;
- unsigned xalign;
- unsigned yalign;
- unsigned alignment;
+ uint64_t base_address_reg;
};
struct r600_texture {
@@ -273,7 +232,6 @@ struct r600_texture {
unsigned last_msaa_resolve_target_micro_mode;
/* Depth buffer compression and fast clear. */
- struct r600_htile_info htile;
struct r600_resource *htile_buffer;
bool tc_compatible_htile;
bool depth_cleared; /* if it was cleared at least once */
@@ -319,7 +277,10 @@ struct r600_texture {
struct r600_surface {
struct pipe_surface base;
- const struct radeon_surf_level *level_info;
+
+ /* These can vary with block-compressed textures. */
+ unsigned width0;
+ unsigned height0;
bool color_initialized;
bool depth_initialized;
@@ -329,6 +290,7 @@ struct r600_surface {
bool export_16bpc;
bool color_is_int8;
bool color_is_int10;
+ bool dcc_incompatible;
/* Color registers. */
unsigned cb_color_info;
@@ -339,6 +301,7 @@ struct r600_surface {
unsigned cb_color_pitch; /* EG and later */
unsigned cb_color_slice; /* EG and later */
unsigned cb_color_attrib; /* EG and later */
+ unsigned cb_color_attrib2; /* GFX9 and later */
unsigned cb_dcc_control; /* VI and later */
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
unsigned cb_color_fmask_slice; /* EG and later */
@@ -352,20 +315,63 @@ struct r600_surface {
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
/* DB registers. */
+ uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
+ uint64_t db_stencil_base; /* EG and later */
+ uint64_t db_htile_data_base;
unsigned db_depth_info; /* R600 only, then SI and later */
unsigned db_z_info; /* EG and later */
- unsigned db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
+ unsigned db_z_info2; /* GFX9+ */
unsigned db_depth_view;
unsigned db_depth_size;
unsigned db_depth_slice; /* EG and later */
- unsigned db_stencil_base; /* EG and later */
unsigned db_stencil_info; /* EG and later */
+ unsigned db_stencil_info2; /* GFX9+ */
unsigned db_prefetch_limit; /* R600 only */
unsigned db_htile_surface;
- unsigned db_htile_data_base;
unsigned db_preload_control; /* EG and later */
};
+struct r600_mmio_counter {
+ unsigned busy;
+ unsigned idle;
+};
+
+union r600_mmio_counters {
+ struct {
+ /* For global GPU load including SDMA. */
+ struct r600_mmio_counter gpu;
+
+ /* GRBM_STATUS */
+ struct r600_mmio_counter spi;
+ struct r600_mmio_counter gui;
+ struct r600_mmio_counter ta;
+ struct r600_mmio_counter gds;
+ struct r600_mmio_counter vgt;
+ struct r600_mmio_counter ia;
+ struct r600_mmio_counter sx;
+ struct r600_mmio_counter wd;
+ struct r600_mmio_counter bci;
+ struct r600_mmio_counter sc;
+ struct r600_mmio_counter pa;
+ struct r600_mmio_counter db;
+ struct r600_mmio_counter cp;
+ struct r600_mmio_counter cb;
+
+ /* SRBM_STATUS2 */
+ struct r600_mmio_counter sdma;
+
+ /* CP_STAT */
+ struct r600_mmio_counter pfp;
+ struct r600_mmio_counter meq;
+ struct r600_mmio_counter me;
+ struct r600_mmio_counter surf_sync;
+ struct r600_mmio_counter dma;
+ struct r600_mmio_counter scratch_ram;
+ struct r600_mmio_counter ce;
+ } named;
+ unsigned array[0];
+};
+
struct r600_common_screen {
struct pipe_screen b;
struct radeon_winsys *ws;
@@ -375,6 +381,10 @@ struct r600_common_screen {
uint64_t debug_flags;
bool has_cp_dma;
bool has_streamout;
+ bool has_rbplus; /* if RB+ registers exist */
+ bool rbplus_allowed; /* if RB+ is allowed */
+
+ struct disk_cache *disk_shader_cache;
struct slab_parent_pool pool_transfers;
@@ -384,7 +394,7 @@ struct r600_common_screen {
/* Auxiliary context. Mainly used to initialize resources.
* It must be locked prior to using and flushed before unlocking. */
struct pipe_context *aux_context;
- pipe_mutex aux_context_lock;
+ mtx_t aux_context_lock;
/* This must be in the screen, because UE4 uses one context for
* compilation and another one for rendering.
@@ -394,12 +404,12 @@ struct r600_common_screen {
* are loading shaders on demand. This is a monotonic counter.
*/
unsigned num_shaders_created;
+ unsigned num_shader_cache_hits;
/* GPU load thread. */
- pipe_mutex gpu_load_mutex;
- pipe_thread gpu_load_thread;
- unsigned gpu_load_counter_busy;
- unsigned gpu_load_counter_idle;
+ mtx_t gpu_load_mutex;
+ thrd_t gpu_load_thread;
+ union r600_mmio_counters mmio_counters;
volatile unsigned gpu_load_stop_thread; /* bool */
char renderer_string[100];
@@ -407,12 +417,14 @@ struct r600_common_screen {
/* Performance counters. */
struct r600_perfcounters *perfcounters;
- /* If pipe_screen wants to re-emit the framebuffer state of all
- * contexts, it should atomically increment this. Each context will
- * compare this with its own last known value of the counter before
- * drawing and re-emit the framebuffer state accordingly.
+ /* If pipe_screen wants to recompute and re-emit the framebuffer,
+ * sampler, and image states of all contexts, it should atomically
+ * increment this.
+ *
+ * Each context will compare this with its own last known value of
+ * the counter before drawing and re-emit the states accordingly.
*/
- unsigned dirty_fb_counter;
+ unsigned dirty_tex_counter;
/* Atomically increment this counter when an existing texture's
* metadata is enabled or disabled in a way that requires changing
@@ -420,12 +432,6 @@ struct r600_common_screen {
*/
unsigned compressed_colortex_counter;
- /* Atomically increment this counter when an existing texture's
- * backing buffer or tile mode parameters have changed that requires
- * recomputation of shader descriptors.
- */
- unsigned dirty_tex_descriptor_counter;
-
struct {
/* Context flags to set so that all writes from earlier jobs
* in the CP are seen by L2 clients.
@@ -480,7 +486,7 @@ struct r600_streamout {
/* External state which comes from the vertex shader,
* it must be set explicitly when binding a shader. */
- unsigned *stride_in_dw;
+ uint16_t *stride_in_dw;
unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
@@ -544,11 +550,9 @@ struct r600_common_context {
unsigned num_gfx_cs_flushes;
unsigned initial_gfx_cs_size;
unsigned gpu_reset_counter;
- unsigned last_dirty_fb_counter;
+ unsigned last_dirty_tex_counter;
unsigned last_compressed_colortex_counter;
- unsigned last_dirty_tex_descriptor_counter;
- struct u_upload_mgr *uploader;
struct u_suballocator *allocator_zeroed_memory;
struct slab_child_pool pool_transfers;
@@ -574,18 +578,19 @@ struct r600_common_context {
int num_perfect_occlusion_queries;
struct list_head active_queries;
unsigned num_cs_dw_queries_suspend;
- /* Additional hardware info. */
- unsigned backend_mask;
- unsigned max_db; /* for OQ */
/* Misc stats. */
unsigned num_draw_calls;
unsigned num_spill_draw_calls;
unsigned num_compute_calls;
unsigned num_spill_compute_calls;
unsigned num_dma_calls;
+ unsigned num_cp_dma_calls;
unsigned num_vs_flushes;
unsigned num_ps_flushes;
unsigned num_cs_flushes;
+ unsigned num_fb_cache_flushes;
+ unsigned num_L2_invalidates;
+ unsigned num_L2_writebacks;
uint64_t num_alloc_tex_transfer_bytes;
unsigned last_tex_ps_draw_ratio; /* for query */
@@ -638,6 +643,9 @@ struct r600_common_context {
unsigned src_level,
const struct pipe_box *src_box);
+ void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value);
+
void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value,
enum r600_coherency coher);
@@ -693,7 +701,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment);
struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
- unsigned bind,
+ unsigned flags,
unsigned usage,
unsigned size,
unsigned alignment);
@@ -706,8 +714,11 @@ r600_invalidate_resource(struct pipe_context *ctx,
struct pipe_resource *resource);
/* r600_common_pipe.c */
-void r600_gfx_write_fence(struct r600_common_context *ctx, struct r600_resource *buf,
- uint64_t va, uint32_t old_value, uint32_t new_value);
+void r600_gfx_write_event_eop(struct r600_common_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t old_fence, uint32_t new_fence);
unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
void r600_gfx_wait_fence(struct r600_common_context *ctx,
uint64_t va, uint32_t ref, uint32_t mask);
@@ -729,14 +740,12 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
unsigned processor);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- uint64_t offset, uint64_t size, unsigned value,
- enum r600_coherency coher);
+ uint64_t offset, uint64_t size, unsigned value);
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
const char *r600_get_llvm_processor_name(enum radeon_family family);
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src);
-void r600_dma_emit_wait_idle(struct r600_common_context *rctx);
void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
struct radeon_saved_cs *saved);
void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
@@ -744,8 +753,9 @@ bool r600_check_device_reset(struct r600_common_context *rctx);
/* r600_gpu_load.c */
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
-uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
-unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
+uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
+unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
+ uint64_t begin);
/* r600_perfcounters.c */
void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
@@ -755,7 +765,7 @@ void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
void r600_query_init(struct r600_common_context *rctx);
void r600_suspend_queries(struct r600_common_context *ctx);
void r600_resume_queries(struct r600_common_context *ctx);
-void r600_query_init_backend_mask(struct r600_common_context *ctx);
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
/* r600_streamout.c */
void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
@@ -789,18 +799,23 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
struct pipe_resource *texture,
struct r600_texture **staging);
-void r600_print_texture_info(struct r600_texture *rtex, FILE *f);
+void r600_print_texture_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, FILE *f);
struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
bool vi_dcc_formats_compatible(enum pipe_format format1,
enum pipe_format format2);
-void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx,
+bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
+ unsigned level,
+ enum pipe_format view_format);
+void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
struct pipe_resource *tex,
unsigned level,
enum pipe_format view_format);
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
+ unsigned width0, unsigned height0,
unsigned width, unsigned height);
unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
void vi_separate_dcc_start_query(struct pipe_context *ctx,
@@ -951,6 +966,12 @@ r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
(!stencil_sampler && tex->can_sample_z);
}
+static inline bool
+vi_dcc_enabled(struct r600_texture *tex, unsigned level)
+{
+ return tex->dcc_offset && level < tex->surface.num_dcc_levels;
+}
+
#define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
@@ -966,4 +987,9 @@ r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
(((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
(((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
+static inline int S_FIXED(float value, unsigned frac_bits)
+{
+ return value * (1 << frac_bits);
+}
+
#endif
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_query.c b/lib/mesa/src/gallium/drivers/radeon/r600_query.c
index 4b6767dd3..7764871aa 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_query.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_query.c
@@ -26,7 +26,7 @@
#include "r600_cs.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
-
+#include "os/os_time.h"
#include "tgsi/tgsi_text.h"
struct r600_hw_query_params {
@@ -43,17 +43,20 @@ struct r600_query_sw {
uint64_t begin_result;
uint64_t end_result;
+
+ uint64_t begin_time;
+ uint64_t end_time;
+
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
};
-static void r600_query_sw_destroy(struct r600_common_context *rctx,
+static void r600_query_sw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery)
{
- struct pipe_screen *screen = rctx->b.screen;
struct r600_query_sw *query = (struct r600_query_sw *)rquery;
- screen->fence_reference(screen, &query->fence, NULL);
+ rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL);
FREE(query);
}
@@ -65,14 +68,18 @@ static enum radeon_value_id winsys_id_from_type(unsigned type)
case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
- case R600_QUERY_NUM_CTX_FLUSHES: return RADEON_NUM_CS_FLUSHES;
+ case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS;
+ case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
+ case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+ case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+ case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
default: unreachable("query type does not correspond to winsys id");
}
}
@@ -81,6 +88,7 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
struct r600_query *rquery)
{
struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+ enum radeon_value_id ws_id;
switch(query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -101,6 +109,9 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
case R600_QUERY_DMA_CALLS:
query->begin_result = rctx->num_dma_calls;
break;
+ case R600_QUERY_CP_DMA_CALLS:
+ query->begin_result = rctx->num_cp_dma_calls;
+ break;
case R600_QUERY_NUM_VS_FLUSHES:
query->begin_result = rctx->num_vs_flushes;
break;
@@ -110,28 +121,67 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
case R600_QUERY_NUM_CS_FLUSHES:
query->begin_result = rctx->num_cs_flushes;
break;
+ case R600_QUERY_NUM_FB_CACHE_FLUSHES:
+ query->begin_result = rctx->num_fb_cache_flushes;
+ break;
+ case R600_QUERY_NUM_L2_INVALIDATES:
+ query->begin_result = rctx->num_L2_invalidates;
+ break;
+ case R600_QUERY_NUM_L2_WRITEBACKS:
+ query->begin_result = rctx->num_L2_writebacks;
+ break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_MAPPED_VRAM:
case R600_QUERY_MAPPED_GTT:
case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_VRAM_VIS_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
+ case R600_QUERY_NUM_MAPPED_BUFFERS:
query->begin_result = 0;
break;
case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CTX_FLUSHES:
+ case R600_QUERY_NUM_GFX_IBS:
+ case R600_QUERY_NUM_SDMA_IBS:
case R600_QUERY_NUM_BYTES_MOVED:
case R600_QUERY_NUM_EVICTIONS: {
enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
break;
}
+ case R600_QUERY_CS_THREAD_BUSY:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->begin_time = os_time_get_nano();
+ break;
case R600_QUERY_GPU_LOAD:
- query->begin_result = r600_gpu_load_begin(rctx->screen);
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ case R600_QUERY_GPU_TA_BUSY:
+ case R600_QUERY_GPU_GDS_BUSY:
+ case R600_QUERY_GPU_VGT_BUSY:
+ case R600_QUERY_GPU_IA_BUSY:
+ case R600_QUERY_GPU_SX_BUSY:
+ case R600_QUERY_GPU_WD_BUSY:
+ case R600_QUERY_GPU_BCI_BUSY:
+ case R600_QUERY_GPU_SC_BUSY:
+ case R600_QUERY_GPU_PA_BUSY:
+ case R600_QUERY_GPU_DB_BUSY:
+ case R600_QUERY_GPU_CP_BUSY:
+ case R600_QUERY_GPU_CB_BUSY:
+ case R600_QUERY_GPU_SDMA_BUSY:
+ case R600_QUERY_GPU_PFP_BUSY:
+ case R600_QUERY_GPU_MEQ_BUSY:
+ case R600_QUERY_GPU_ME_BUSY:
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ case R600_QUERY_GPU_DMA_BUSY:
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ case R600_QUERY_GPU_CE_BUSY:
+ query->begin_result = r600_begin_counter(rctx->screen,
+ query->b.type);
break;
case R600_QUERY_NUM_COMPILATIONS:
query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
@@ -139,6 +189,10 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
case R600_QUERY_NUM_SHADERS_CREATED:
query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
+ case R600_QUERY_NUM_SHADER_CACHE_HITS:
+ query->begin_result =
+ p_atomic_read(&rctx->screen->num_shader_cache_hits);
+ break;
case R600_QUERY_GPIN_ASIC_ID:
case R600_QUERY_GPIN_NUM_SIMD:
case R600_QUERY_GPIN_NUM_RB:
@@ -156,6 +210,7 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
struct r600_query *rquery)
{
struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+ enum radeon_value_id ws_id;
switch(query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -178,6 +233,9 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
case R600_QUERY_DMA_CALLS:
query->end_result = rctx->num_dma_calls;
break;
+ case R600_QUERY_CP_DMA_CALLS:
+ query->end_result = rctx->num_cp_dma_calls;
+ break;
case R600_QUERY_NUM_VS_FLUSHES:
query->end_result = rctx->num_vs_flushes;
break;
@@ -187,26 +245,65 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
case R600_QUERY_NUM_CS_FLUSHES:
query->end_result = rctx->num_cs_flushes;
break;
+ case R600_QUERY_NUM_FB_CACHE_FLUSHES:
+ query->end_result = rctx->num_fb_cache_flushes;
+ break;
+ case R600_QUERY_NUM_L2_INVALIDATES:
+ query->end_result = rctx->num_L2_invalidates;
+ break;
+ case R600_QUERY_NUM_L2_WRITEBACKS:
+ query->end_result = rctx->num_L2_writebacks;
+ break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_MAPPED_VRAM:
case R600_QUERY_MAPPED_GTT:
case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_VRAM_VIS_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CTX_FLUSHES:
+ case R600_QUERY_NUM_MAPPED_BUFFERS:
+ case R600_QUERY_NUM_GFX_IBS:
+ case R600_QUERY_NUM_SDMA_IBS:
case R600_QUERY_NUM_BYTES_MOVED:
case R600_QUERY_NUM_EVICTIONS: {
enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
break;
}
+ case R600_QUERY_CS_THREAD_BUSY:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->end_time = os_time_get_nano();
+ break;
case R600_QUERY_GPU_LOAD:
- query->end_result = r600_gpu_load_end(rctx->screen,
- query->begin_result);
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ case R600_QUERY_GPU_TA_BUSY:
+ case R600_QUERY_GPU_GDS_BUSY:
+ case R600_QUERY_GPU_VGT_BUSY:
+ case R600_QUERY_GPU_IA_BUSY:
+ case R600_QUERY_GPU_SX_BUSY:
+ case R600_QUERY_GPU_WD_BUSY:
+ case R600_QUERY_GPU_BCI_BUSY:
+ case R600_QUERY_GPU_SC_BUSY:
+ case R600_QUERY_GPU_PA_BUSY:
+ case R600_QUERY_GPU_DB_BUSY:
+ case R600_QUERY_GPU_CP_BUSY:
+ case R600_QUERY_GPU_CB_BUSY:
+ case R600_QUERY_GPU_SDMA_BUSY:
+ case R600_QUERY_GPU_PFP_BUSY:
+ case R600_QUERY_GPU_MEQ_BUSY:
+ case R600_QUERY_GPU_ME_BUSY:
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ case R600_QUERY_GPU_DMA_BUSY:
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ case R600_QUERY_GPU_CE_BUSY:
+ query->end_result = r600_end_counter(rctx->screen,
+ query->b.type,
+ query->begin_result);
query->begin_result = 0;
break;
case R600_QUERY_NUM_COMPILATIONS:
@@ -218,6 +315,10 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
query->end_result = rctx->last_tex_ps_draw_ratio;
break;
+ case R600_QUERY_NUM_SHADER_CACHE_HITS:
+ query->end_result =
+ p_atomic_read(&rctx->screen->num_shader_cache_hits);
+ break;
case R600_QUERY_GPIN_ASIC_ID:
case R600_QUERY_GPIN_NUM_SIMD:
case R600_QUERY_GPIN_NUM_RB:
@@ -252,6 +353,10 @@ static bool r600_query_sw_get_result(struct r600_common_context *rctx,
return result->b;
}
+ case R600_QUERY_CS_THREAD_BUSY:
+ result->u64 = (query->end_result - query->begin_result) * 100 /
+ (query->end_time - query->begin_time);
+ return true;
case R600_QUERY_GPIN_ASIC_ID:
result->u32 = 0;
return true;
@@ -294,8 +399,7 @@ static struct r600_query_ops sw_query_ops = {
.get_result_resource = NULL
};
-static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx,
- unsigned query_type)
+static struct pipe_query *r600_query_sw_create(unsigned query_type)
{
struct r600_query_sw *query;
@@ -309,7 +413,7 @@ static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx,
return (struct pipe_query *)query;
}
-void r600_query_hw_destroy(struct r600_common_context *rctx,
+void r600_query_hw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery)
{
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@@ -327,23 +431,23 @@ void r600_query_hw_destroy(struct r600_common_context *rctx,
FREE(rquery);
}
-static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx,
+static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen,
struct r600_query_hw *query)
{
unsigned buf_size = MAX2(query->result_size,
- ctx->screen->info.min_alloc_size);
+ rscreen->info.min_alloc_size);
/* Queries are normally read by the CPU after
* being written by the gpu, hence staging is probably a good
* usage pattern.
*/
struct r600_resource *buf = (struct r600_resource*)
- pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
+ pipe_buffer_create(&rscreen->b, 0,
PIPE_USAGE_STAGING, buf_size);
if (!buf)
return NULL;
- if (!query->ops->prepare_buffer(ctx, query, buf)) {
+ if (!query->ops->prepare_buffer(rscreen, query, buf)) {
r600_resource_reference(&buf, NULL);
return NULL;
}
@@ -351,14 +455,14 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
return buf;
}
-static bool r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
+static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
struct r600_query_hw *query,
struct r600_resource *buffer)
{
/* Callers ensure that the buffer is currently unused by the GPU. */
- uint32_t *results = ctx->ws->buffer_map(buffer->buf, NULL,
- PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
+ uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
if (!results)
return false;
@@ -366,19 +470,21 @@ static bool r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ unsigned max_rbs = rscreen->info.num_render_backends;
+ unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
unsigned num_results;
unsigned i, j;
/* Set top bits for unused backends. */
num_results = buffer->b.b.width0 / query->result_size;
for (j = 0; j < num_results; j++) {
- for (i = 0; i < ctx->max_db; i++) {
- if (!(ctx->backend_mask & (1<<i))) {
+ for (i = 0; i < max_rbs; i++) {
+ if (!(enabled_rb_mask & (1<<i))) {
results[(i * 4)+1] = 0x80000000;
results[(i * 4)+3] = 0x80000000;
}
}
- results += 4 * ctx->max_db;
+ results += 4 * max_rbs;
}
}
@@ -409,7 +515,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
struct r600_query_hw *query,
struct r600_resource *buffer,
uint64_t va);
-static void r600_query_hw_add_result(struct r600_common_context *ctx,
+static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
struct r600_query_hw *, void *buffer,
union pipe_query_result *result);
static void r600_query_hw_clear_result(struct r600_query_hw *,
@@ -423,17 +529,17 @@ static struct r600_query_hw_ops query_hw_default_hw_ops = {
.add_result = r600_query_hw_add_result,
};
-bool r600_query_hw_init(struct r600_common_context *rctx,
+bool r600_query_hw_init(struct r600_common_screen *rscreen,
struct r600_query_hw *query)
{
- query->buffer.buf = r600_new_query_buffer(rctx, query);
+ query->buffer.buf = r600_new_query_buffer(rscreen, query);
if (!query->buffer.buf)
return false;
return true;
}
-static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
+static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen,
unsigned query_type,
unsigned index)
{
@@ -448,19 +554,19 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
- query->result_size = 16 * rctx->max_db;
+ query->result_size = 16 * rscreen->info.num_render_backends;
query->result_size += 16; /* for the fence + alignment */
query->num_cs_dw_begin = 6;
- query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rctx->screen);
+ query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 24;
query->num_cs_dw_begin = 8;
- query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rctx->screen);
+ query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 16;
- query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rctx->screen);
+ query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
query->flags = R600_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -475,10 +581,10 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
- query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
+ query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
query->result_size += 8; /* for the fence + alignment */
query->num_cs_dw_begin = 6;
- query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rctx->screen);
+ query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
break;
default:
assert(0);
@@ -486,7 +592,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
return NULL;
}
- if (!r600_query_hw_init(rctx, query)) {
+ if (!r600_query_hw_init(rscreen, query)) {
FREE(query);
return NULL;
}
@@ -545,7 +651,7 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
- radeon_emit(cs, (va >> 32) & 0xFFFF);
+ radeon_emit(cs, va >> 32);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -554,21 +660,17 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
- radeon_emit(cs, (va >> 32) & 0xFFFF);
+ radeon_emit(cs, va >> 32);
break;
case PIPE_QUERY_TIME_ELAPSED:
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
- radeon_emit(cs, va);
- radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
+ 0, 3, NULL, va, 0, 0);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
- radeon_emit(cs, (va >> 32) & 0xFFFF);
+ radeon_emit(cs, va >> 32);
break;
default:
assert(0);
@@ -597,7 +699,7 @@ static void r600_query_hw_emit_start(struct r600_common_context *ctx,
*qbuf = query->buffer;
query->buffer.results_end = 0;
query->buffer.previous = qbuf;
- query->buffer.buf = r600_new_query_buffer(ctx, query);
+ query->buffer.buf = r600_new_query_buffer(ctx->screen, query);
if (!query->buffer.buf)
return;
}
@@ -625,9 +727,9 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
- radeon_emit(cs, (va >> 32) & 0xFFFF);
+ radeon_emit(cs, va >> 32);
- fence_va = va + ctx->max_db * 16 - 8;
+ fence_va = va + ctx->screen->info.num_render_backends * 16 - 8;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -637,19 +739,14 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
- radeon_emit(cs, (va >> 32) & 0xFFFF);
+ radeon_emit(cs, va >> 32);
break;
case PIPE_QUERY_TIME_ELAPSED:
va += 8;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
- radeon_emit(cs, va);
- radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
-
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
+ 0, 3, NULL, va, 0, 0);
fence_va = va + 8;
break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
@@ -659,7 +756,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
- radeon_emit(cs, (va >> 32) & 0xFFFF);
+ radeon_emit(cs, va >> 32);
fence_va = va + sample_size;
break;
@@ -671,7 +768,8 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
RADEON_PRIO_QUERY);
if (fence_va)
- r600_gfx_write_fence(ctx, query->buffer.buf, fence_va, 0, 0x80000000);
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1,
+ query->buffer.buf, fence_va, 0, 0x80000000);
}
static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
@@ -743,12 +841,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
/* emit predicate packets for all data blocks */
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
unsigned results_base = 0;
- uint64_t va = qbuf->buf->gpu_address;
+ uint64_t va_base = qbuf->buf->gpu_address;
while (results_base < qbuf->results_end) {
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
- radeon_emit(cs, va + results_base);
- radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
+ uint64_t va = va_base + results_base;
+
+ if (ctx->chip_class >= GFX9) {
+ radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, op | ((va >> 32) & 0xFF));
+ }
r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, RADEON_USAGE_READ,
RADEON_PRIO_QUERY);
results_base += query->result_size;
@@ -761,14 +868,15 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_common_screen *rscreen =
+ (struct r600_common_screen *)ctx->screen;
if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
query_type == PIPE_QUERY_GPU_FINISHED ||
query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
- return r600_query_sw_create(ctx, query_type);
+ return r600_query_sw_create(query_type);
- return r600_query_hw_create(rctx, query_type, index);
+ return r600_query_hw_create(rscreen, query_type, index);
}
static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
@@ -776,7 +884,7 @@ static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *quer
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
- rquery->ops->destroy(rctx, rquery);
+ rquery->ops->destroy(rctx->screen, rquery);
}
static boolean r600_begin_query(struct pipe_context *ctx,
@@ -808,9 +916,9 @@ void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
r600_resource_reference(&query->buffer.buf, NULL);
- query->buffer.buf = r600_new_query_buffer(rctx, query);
+ query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
} else {
- if (!query->ops->prepare_buffer(rctx, query, query->buffer.buf))
+ if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf))
r600_resource_reference(&query->buffer.buf, NULL);
}
}
@@ -867,6 +975,8 @@ static void r600_get_hw_query_params(struct r600_common_context *rctx,
struct r600_query_hw *rquery, int index,
struct r600_hw_query_params *params)
{
+ unsigned max_rbs = rctx->screen->info.num_render_backends;
+
params->pair_stride = 0;
params->pair_count = 1;
@@ -875,9 +985,9 @@ static void r600_get_hw_query_params(struct r600_common_context *rctx,
case PIPE_QUERY_OCCLUSION_PREDICATE:
params->start_offset = 0;
params->end_offset = 8;
- params->fence_offset = rctx->max_db * 16;
+ params->fence_offset = max_rbs * 16;
params->pair_stride = 16;
- params->pair_count = rctx->max_db;
+ params->pair_count = max_rbs;
break;
case PIPE_QUERY_TIME_ELAPSED:
params->start_offset = 0;
@@ -936,14 +1046,16 @@ static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned
return 0;
}
-static void r600_query_hw_add_result(struct r600_common_context *ctx,
+static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
struct r600_query_hw *query,
void *buffer,
union pipe_query_result *result)
{
+ unsigned max_rbs = rscreen->info.num_render_backends;
+
switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER: {
- for (unsigned i = 0; i < ctx->max_db; ++i) {
+ for (unsigned i = 0; i < max_rbs; ++i) {
unsigned results_base = i * 16;
result->u64 +=
r600_query_read_result(buffer + results_base, 0, 2, true);
@@ -951,7 +1063,7 @@ static void r600_query_hw_add_result(struct r600_common_context *ctx,
break;
}
case PIPE_QUERY_OCCLUSION_PREDICATE: {
- for (unsigned i = 0; i < ctx->max_db; ++i) {
+ for (unsigned i = 0; i < max_rbs; ++i) {
unsigned results_base = i * 16;
result->b = result->b ||
r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
@@ -989,7 +1101,7 @@ static void r600_query_hw_add_result(struct r600_common_context *ctx,
r600_query_read_result(buffer, 0, 4, true);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
- if (ctx->chip_class >= EVERGREEN) {
+ if (rscreen->chip_class >= EVERGREEN) {
result->pipeline_statistics.ps_invocations +=
r600_query_read_result(buffer, 0, 22, false);
result->pipeline_statistics.c_primitives +=
@@ -1087,6 +1199,7 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
struct r600_query *rquery,
bool wait, union pipe_query_result *result)
{
+ struct r600_common_screen *rscreen = rctx->screen;
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *qbuf;
@@ -1103,7 +1216,7 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
return false;
while (results_base != qbuf->results_end) {
- query->ops->add_result(rctx, query, map + results_base,
+ query->ops->add_result(rscreen, query, map + results_base,
result);
results_base += query->result_size;
}
@@ -1112,7 +1225,7 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
/* Convert the time to expected units. */
if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
rquery->type == PIPE_QUERY_TIMESTAMP) {
- result->u64 = (1000000 * result->u64) / rctx->screen->info.clock_crystal_freq;
+ result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq;
}
return true;
}
@@ -1170,6 +1283,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
"IMM[1] UINT32 {1, 2, 4, 8}\n"
"IMM[2] UINT32 {16, 32, 64, 128}\n"
"IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
+ "IMM[4] UINT32 {0, 0, 0, 0}\n"
"AND TEMP[5], CONST[0].wwww, IMM[2].xxxx\n"
"UIF TEMP[5]\n"
@@ -1269,7 +1383,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
/* Convert to boolean */
"AND TEMP[4], CONST[0].wwww, IMM[1].wwww\n"
"UIF TEMP[4]\n"
- "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[0].xxxx\n"
+ "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
"AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
"MOV TEMP[0].y, IMM[0].xxxx\n"
"ENDIF\n"
@@ -1479,7 +1593,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
static void r600_render_condition(struct pipe_context *ctx,
struct pipe_query *query,
boolean condition,
- uint mode)
+ enum pipe_render_cond_flag mode)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query_hw *rquery = (struct r600_query_hw *)query;
@@ -1550,19 +1664,23 @@ void r600_resume_queries(struct r600_common_context *ctx)
}
}
-/* Get backends mask */
-void r600_query_init_backend_mask(struct r600_common_context *ctx)
+/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
{
+ struct r600_common_context *ctx =
+ (struct r600_common_context*)rscreen->aux_context;
struct radeon_winsys_cs *cs = ctx->gfx.cs;
struct r600_resource *buffer;
uint32_t *results;
- unsigned num_backends = ctx->screen->info.num_render_backends;
unsigned i, mask = 0;
+ unsigned max_rbs = ctx->screen->info.num_render_backends;
+
+ assert(rscreen->chip_class <= CAYMAN);
/* if backend_map query is supported by the kernel */
- if (ctx->screen->info.r600_gb_backend_map_valid) {
- unsigned num_tile_pipes = ctx->screen->info.num_tile_pipes;
- unsigned backend_map = ctx->screen->info.r600_gb_backend_map;
+ if (rscreen->info.r600_gb_backend_map_valid) {
+ unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
+ unsigned backend_map = rscreen->info.r600_gb_backend_map;
unsigned item_width, item_mask;
if (ctx->chip_class >= EVERGREEN) {
@@ -1579,7 +1697,7 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
backend_map >>= item_width;
}
if (mask != 0) {
- ctx->backend_mask = mask;
+ rscreen->info.enabled_rb_mask = mask;
return;
}
}
@@ -1588,15 +1706,15 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
/* create buffer for event data */
buffer = (struct r600_resource*)
- pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
- PIPE_USAGE_STAGING, ctx->max_db*16);
+ pipe_buffer_create(ctx->b.screen, 0,
+ PIPE_USAGE_STAGING, max_rbs * 16);
if (!buffer)
- goto err;
+ return;
/* initialize buffer with zeroes */
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
if (results) {
- memset(results, 0, ctx->max_db * 4 * 4);
+ memset(results, 0, max_rbs * 4 * 4);
/* emit EVENT_WRITE for ZPASS_DONE */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
@@ -1610,7 +1728,7 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
/* analyze results */
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
if (results) {
- for(i = 0; i < ctx->max_db; i++) {
+ for(i = 0; i < max_rbs; i++) {
/* at least highest bit will be set if backend is used */
if (results[i*4 + 1])
mask |= (1<<i);
@@ -1620,15 +1738,8 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
r600_resource_reference(&buffer, NULL);
- if (mask != 0) {
- ctx->backend_mask = mask;
- return;
- }
-
-err:
- /* fallback to old method - set num_backends lower bits to 1 */
- ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends);
- return;
+ if (mask)
+ rscreen->info.enabled_rb_mask = mask;
}
#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
@@ -1649,23 +1760,32 @@ err:
static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
+ X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
+ X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
+ X("num-fb-cache-flushes", NUM_FB_CACHE_FLUSHES, UINT64, AVERAGE),
+ X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
+ X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
+ X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE),
X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
- X("num-ctx-flushes", NUM_CTX_FLUSHES, UINT64, AVERAGE),
+ X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
+ X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
+ X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+ X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE),
@@ -1680,12 +1800,34 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
- /* The following queries must be at the end of the list because their
- * availability is adjusted dynamically based on the DRM version. */
- X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
+
+ /* The following queries must be at the end of the list because their
+ * availability is adjusted dynamically based on the DRM version. */
+ X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
+ X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE),
+ X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE),
+ X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE),
+ X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE),
+ X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE),
+ X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE),
+ X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE),
+ X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE),
+ X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE),
+ X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE),
+ X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE),
+ X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE),
+ X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE),
+ X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE),
+ X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE),
+ X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
+ X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
+ X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
+ X("GPU-dma-busy", GPU_DMA_BUSY, UINT64, AVERAGE),
+ X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
+ X("GPU-ce-busy", GPU_CE_BUSY, UINT64, AVERAGE),
};
#undef X
@@ -1696,10 +1838,14 @@ static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
{
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
return ARRAY_SIZE(r600_driver_query_list);
- else if (rscreen->info.drm_major == 3)
- return ARRAY_SIZE(r600_driver_query_list) - 3;
+ else if (rscreen->info.drm_major == 3) {
+ if (rscreen->chip_class >= VI)
+ return ARRAY_SIZE(r600_driver_query_list);
+ else
+ return ARRAY_SIZE(r600_driver_query_list) - 7;
+ }
else
- return ARRAY_SIZE(r600_driver_query_list) - 4;
+ return ARRAY_SIZE(r600_driver_query_list) - 25;
}
static int r600_get_driver_query_info(struct pipe_screen *screen,
@@ -1735,6 +1881,9 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
case R600_QUERY_GPU_TEMPERATURE:
info->max_value.u64 = 125;
break;
+ case R600_QUERY_VRAM_VIS_USAGE:
+ info->max_value.u64 = rscreen->info.vram_vis_size;
+ break;
}
if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_query.h b/lib/mesa/src/gallium/drivers/radeon/r600_query.h
index 14c433d91..b9ab44ca3 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_query.h
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_query.h
@@ -48,26 +48,56 @@ enum {
R600_QUERY_COMPUTE_CALLS,
R600_QUERY_SPILL_COMPUTE_CALLS,
R600_QUERY_DMA_CALLS,
+ R600_QUERY_CP_DMA_CALLS,
R600_QUERY_NUM_VS_FLUSHES,
R600_QUERY_NUM_PS_FLUSHES,
R600_QUERY_NUM_CS_FLUSHES,
+ R600_QUERY_NUM_FB_CACHE_FLUSHES,
+ R600_QUERY_NUM_L2_INVALIDATES,
+ R600_QUERY_NUM_L2_WRITEBACKS,
+ R600_QUERY_CS_THREAD_BUSY,
R600_QUERY_REQUESTED_VRAM,
R600_QUERY_REQUESTED_GTT,
R600_QUERY_MAPPED_VRAM,
R600_QUERY_MAPPED_GTT,
R600_QUERY_BUFFER_WAIT_TIME,
- R600_QUERY_NUM_CTX_FLUSHES,
+ R600_QUERY_NUM_MAPPED_BUFFERS,
+ R600_QUERY_NUM_GFX_IBS,
+ R600_QUERY_NUM_SDMA_IBS,
R600_QUERY_NUM_BYTES_MOVED,
R600_QUERY_NUM_EVICTIONS,
R600_QUERY_VRAM_USAGE,
+ R600_QUERY_VRAM_VIS_USAGE,
R600_QUERY_GTT_USAGE,
R600_QUERY_GPU_TEMPERATURE,
R600_QUERY_CURRENT_GPU_SCLK,
R600_QUERY_CURRENT_GPU_MCLK,
R600_QUERY_GPU_LOAD,
+ R600_QUERY_GPU_SHADERS_BUSY,
+ R600_QUERY_GPU_TA_BUSY,
+ R600_QUERY_GPU_GDS_BUSY,
+ R600_QUERY_GPU_VGT_BUSY,
+ R600_QUERY_GPU_IA_BUSY,
+ R600_QUERY_GPU_SX_BUSY,
+ R600_QUERY_GPU_WD_BUSY,
+ R600_QUERY_GPU_BCI_BUSY,
+ R600_QUERY_GPU_SC_BUSY,
+ R600_QUERY_GPU_PA_BUSY,
+ R600_QUERY_GPU_DB_BUSY,
+ R600_QUERY_GPU_CP_BUSY,
+ R600_QUERY_GPU_CB_BUSY,
+ R600_QUERY_GPU_SDMA_BUSY,
+ R600_QUERY_GPU_PFP_BUSY,
+ R600_QUERY_GPU_MEQ_BUSY,
+ R600_QUERY_GPU_ME_BUSY,
+ R600_QUERY_GPU_SURF_SYNC_BUSY,
+ R600_QUERY_GPU_DMA_BUSY,
+ R600_QUERY_GPU_SCRATCH_RAM_BUSY,
+ R600_QUERY_GPU_CE_BUSY,
R600_QUERY_NUM_COMPILATIONS,
R600_QUERY_NUM_SHADERS_CREATED,
R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO,
+ R600_QUERY_NUM_SHADER_CACHE_HITS,
R600_QUERY_GPIN_ASIC_ID,
R600_QUERY_GPIN_NUM_SIMD,
R600_QUERY_GPIN_NUM_RB,
@@ -83,7 +113,7 @@ enum {
};
struct r600_query_ops {
- void (*destroy)(struct r600_common_context *, struct r600_query *);
+ void (*destroy)(struct r600_common_screen *, struct r600_query *);
bool (*begin)(struct r600_common_context *, struct r600_query *);
bool (*end)(struct r600_common_context *, struct r600_query *);
bool (*get_result)(struct r600_common_context *,
@@ -112,7 +142,7 @@ enum {
};
struct r600_query_hw_ops {
- bool (*prepare_buffer)(struct r600_common_context *,
+ bool (*prepare_buffer)(struct r600_common_screen *,
struct r600_query_hw *,
struct r600_resource *);
void (*emit_start)(struct r600_common_context *,
@@ -122,7 +152,7 @@ struct r600_query_hw_ops {
struct r600_query_hw *,
struct r600_resource *buffer, uint64_t va);
void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
- void (*add_result)(struct r600_common_context *ctx,
+ void (*add_result)(struct r600_common_screen *screen,
struct r600_query_hw *, void *buffer,
union pipe_query_result *result);
};
@@ -157,9 +187,9 @@ struct r600_query_hw {
unsigned stream;
};
-bool r600_query_hw_init(struct r600_common_context *rctx,
+bool r600_query_hw_init(struct r600_common_screen *rscreen,
struct r600_query_hw *query);
-void r600_query_hw_destroy(struct r600_common_context *rctx,
+void r600_query_hw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery);
bool r600_query_hw_begin(struct r600_common_context *rctx,
struct r600_query *rquery);
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_streamout.c b/lib/mesa/src/gallium/drivers/radeon/r600_streamout.c
index b5296aa56..a18089a3b 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_streamout.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_streamout.c
@@ -187,7 +187,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
- unsigned *stride_in_dw = rctx->streamout.stride_in_dw;
+ uint16_t *stride_in_dw = rctx->streamout.stride_in_dw;
unsigned i, update_flags = 0;
r600_flush_vgt_streamout(rctx);
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c b/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c
index 1e60f6aff..9e1ff9e5f 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_test_dma.c
@@ -26,26 +26,10 @@
#include "r600_pipe_common.h"
#include "util/u_surface.h"
+#include "util/rand_xor.h"
static uint64_t seed_xorshift128plus[2];
-/* Super fast random number generator.
- *
- * This rand_xorshift128plus function by Sebastiano Vigna belongs
- * to the public domain.
- */
-static uint64_t rand_xorshift128plus(void)
-{
- uint64_t *s = seed_xorshift128plus;
-
- uint64_t s1 = s[0];
- const uint64_t s0 = s[1];
- s[0] = s0;
- s1 ^= s1 << 23;
- s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5);
- return s[1] + s0;
-}
-
#define RAND_NUM_SIZE 8
/* The GPU blits are emulated on the CPU using these CPU textures. */
@@ -91,8 +75,10 @@ static void set_random_pixels(struct pipe_context *ctx,
assert(t->stride % RAND_NUM_SIZE == 0);
assert(cpu->stride % RAND_NUM_SIZE == 0);
- for (x = 0; x < size; x++)
- *ptr++ = *ptr_cpu++ = rand_xorshift128plus();
+ for (x = 0; x < size; x++) {
+ *ptr++ = *ptr_cpu++ =
+ rand_xorshift128plus(seed_xorshift128plus);
+ }
}
}
@@ -149,18 +135,24 @@ static enum pipe_format get_format_from_bpp(int bpp)
}
}
-static const char *array_mode_to_string(unsigned mode)
+static const char *array_mode_to_string(struct r600_common_screen *rscreen,
+ struct radeon_surf *surf)
{
- switch (mode) {
- case RADEON_SURF_MODE_LINEAR_ALIGNED:
- return "LINEAR_ALIGNED";
- case RADEON_SURF_MODE_1D:
- return "1D_TILED_THIN1";
- case RADEON_SURF_MODE_2D:
- return "2D_TILED_THIN1";
- default:
- assert(0);
+ if (rscreen->chip_class >= GFX9) {
+ /* TODO */
return " UNKNOWN";
+ } else {
+ switch (surf->u.legacy.level[0].mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ return "LINEAR_ALIGNED";
+ case RADEON_SURF_MODE_1D:
+ return "1D_TILED_THIN1";
+ case RADEON_SURF_MODE_2D:
+ return "2D_TILED_THIN1";
+ default:
+ assert(0);
+ return " UNKNOWN";
+ }
}
}
@@ -197,8 +189,7 @@ void r600_test_dma(struct r600_common_screen *rscreen)
/* the seed for random test parameters */
srand(0x9b47d95b);
/* the seed for random pixel data */
- seed_xorshift128plus[0] = 0x3bffb83978e24f88;
- seed_xorshift128plus[1] = 0x9238d5d56c71cd35;
+ s_rand_xorshift128plus(seed_xorshift128plus, false);
iterations = 1000000000; /* just kill it when you are bored */
num_partial_copies = 30;
@@ -292,16 +283,16 @@ void r600_test_dma(struct r600_common_screen *rscreen)
printf("%4u: dst = (%5u x %5u x %u, %s), "
" src = (%5u x %5u x %u, %s), bpp = %2u, ",
i, tdst.width0, tdst.height0, tdst.array_size,
- array_mode_to_string(rdst->surface.level[0].mode),
+ array_mode_to_string(rscreen, &rdst->surface),
tsrc.width0, tsrc.height0, tsrc.array_size,
- array_mode_to_string(rsrc->surface.level[0].mode), bpp);
+ array_mode_to_string(rscreen, &rsrc->surface), bpp);
fflush(stdout);
/* set src pixels */
set_random_pixels(ctx, src, &src_cpu);
/* clear dst pixels */
- rctx->clear_buffer(ctx, dst, 0, rdst->surface.bo_size, 0, true);
+ rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true);
memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
/* preparation */
@@ -331,8 +322,8 @@ void r600_test_dma(struct r600_common_screen *rscreen)
dstz = rand() % (tdst.array_size - depth + 1);
/* special code path to hit the tiled partial copies */
- if (rsrc->surface.level[0].mode >= RADEON_SURF_MODE_1D &&
- rdst->surface.level[0].mode >= RADEON_SURF_MODE_1D &&
+ if (!rsrc->surface.is_linear &&
+ !rdst->surface.is_linear &&
rand() & 1) {
if (max_width < 8 || max_height < 8)
continue;
@@ -359,8 +350,8 @@ void r600_test_dma(struct r600_common_screen *rscreen)
}
/* special code path to hit out-of-bounds reads in L2T */
- if (rsrc->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED &&
- rdst->surface.level[0].mode >= RADEON_SURF_MODE_1D &&
+ if (rsrc->surface.is_linear &&
+ !rdst->surface.is_linear &&
rand() % 4 == 0) {
srcx = 0;
srcy = 0;
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_texture.c b/lib/mesa/src/gallium/drivers/radeon/r600_texture.c
index 27035c0fa..4b2082523 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_texture.c
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_texture.c
@@ -37,8 +37,9 @@
static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex);
-static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
- const struct pipe_resource *templ);
+static enum radeon_surf_mode
+r600_choose_tiling(struct r600_common_screen *rscreen,
+ const struct pipe_resource *templ);
bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
@@ -52,8 +53,7 @@ bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
if (!rctx->dma.cs)
return false;
- if (util_format_get_blocksizebits(rdst->resource.b.b.format) !=
- util_format_get_blocksizebits(rsrc->resource.b.b.format))
+ if (rdst->surface.bpe != rsrc->surface.bpe)
return false;
/* MSAA: Blits don't exist in the real world. */
@@ -72,8 +72,8 @@ bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
* src: Use the 3D path. DCC decompression is expensive.
* dst: Use the 3D path to compress the pixels with DCC.
*/
- if ((rsrc->dcc_offset && rsrc->surface.level[src_level].dcc_enabled) ||
- (rdst->dcc_offset && rdst->surface.level[dst_level].dcc_enabled))
+ if (vi_dcc_enabled(rsrc, src_level) ||
+ vi_dcc_enabled(rdst, dst_level))
return false;
/* CMASK as:
@@ -177,179 +177,170 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
src, 0, &sbox);
}
-static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
- const struct pipe_box *box)
+static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, unsigned level,
+ const struct pipe_box *box,
+ unsigned *stride,
+ unsigned *layer_stride)
{
- enum pipe_format format = rtex->resource.b.b.format;
+ if (rscreen->chip_class >= GFX9) {
+ *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
+ *layer_stride = rtex->surface.u.gfx9.surf_slice_size;
+
+ if (!box)
+ return 0;
+
+ /* Each texture is an array of slices. Each slice is an array
+ * of mipmap levels. */
+ return box->z * rtex->surface.u.gfx9.surf_slice_size +
+ rtex->surface.u.gfx9.offset[level] +
+ (box->y / rtex->surface.blk_h *
+ rtex->surface.u.gfx9.surf_pitch +
+ box->x / rtex->surface.blk_w) * rtex->surface.bpe;
+ } else {
+ *stride = rtex->surface.u.legacy.level[level].nblk_x *
+ rtex->surface.bpe;
+ *layer_stride = rtex->surface.u.legacy.level[level].slice_size;
- return rtex->surface.level[level].offset +
- box->z * rtex->surface.level[level].slice_size +
- box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes +
- box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ if (!box)
+ return rtex->surface.u.legacy.level[level].offset;
+
+ /* Each texture is an array of mipmap levels. Each level is
+ * an array of slices. */
+ return rtex->surface.u.legacy.level[level].offset +
+ box->z * rtex->surface.u.legacy.level[level].slice_size +
+ (box->y / rtex->surface.blk_h *
+ rtex->surface.u.legacy.level[level].nblk_x +
+ box->x / rtex->surface.blk_w) * rtex->surface.bpe;
+ }
}
static int r600_init_surface(struct r600_common_screen *rscreen,
struct radeon_surf *surface,
const struct pipe_resource *ptex,
- unsigned array_mode,
+ enum radeon_surf_mode array_mode,
+ unsigned pitch_in_bytes_override,
+ unsigned offset,
+ bool is_imported,
+ bool is_scanout,
bool is_flushed_depth,
bool tc_compatible_htile)
{
const struct util_format_description *desc =
util_format_description(ptex->format);
bool is_depth, is_stencil;
+ int r;
+ unsigned i, bpe, flags = 0;
is_depth = util_format_has_depth(desc);
is_stencil = util_format_has_stencil(desc);
- surface->npix_x = ptex->width0;
- surface->npix_y = ptex->height0;
- surface->npix_z = ptex->depth0;
- surface->blk_w = util_format_get_blockwidth(ptex->format);
- surface->blk_h = util_format_get_blockheight(ptex->format);
- surface->blk_d = 1;
- surface->array_size = 1;
- surface->last_level = ptex->last_level;
-
if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
- surface->bpe = 4; /* stencil is allocated separately on evergreen */
+ bpe = 4; /* stencil is allocated separately on evergreen */
} else {
- surface->bpe = util_format_get_blocksize(ptex->format);
+ bpe = util_format_get_blocksize(ptex->format);
/* align byte per element on dword */
- if (surface->bpe == 3) {
- surface->bpe = 4;
+ if (bpe == 3) {
+ bpe = 4;
}
}
- surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1;
- surface->flags = RADEON_SURF_SET(array_mode, MODE);
-
- switch (ptex->target) {
- case PIPE_TEXTURE_1D:
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
- break;
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D:
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
- break;
- case PIPE_TEXTURE_3D:
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
- break;
- case PIPE_TEXTURE_1D_ARRAY:
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
- surface->array_size = ptex->array_size;
- break;
- case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */
- assert(ptex->array_size % 6 == 0);
- case PIPE_TEXTURE_2D_ARRAY:
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
- surface->array_size = ptex->array_size;
- break;
- case PIPE_TEXTURE_CUBE:
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE);
- break;
- case PIPE_BUFFER:
- default:
- return -EINVAL;
- }
-
if (!is_flushed_depth && is_depth) {
- surface->flags |= RADEON_SURF_ZBUFFER;
+ flags |= RADEON_SURF_ZBUFFER;
if (tc_compatible_htile &&
- array_mode == RADEON_SURF_MODE_2D) {
+ (rscreen->chip_class >= GFX9 ||
+ array_mode == RADEON_SURF_MODE_2D)) {
/* TC-compatible HTILE only supports Z32_FLOAT.
- * Promote Z16 to Z32. DB->CB copies will convert
+ * GFX9 also supports Z16_UNORM.
+ * On VI, promote Z16 to Z32. DB->CB copies will convert
* the format for transfers.
*/
- surface->bpe = 4;
- surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
- }
+ if (rscreen->chip_class == VI)
+ bpe = 4;
- if (is_stencil) {
- surface->flags |= RADEON_SURF_SBUFFER |
- RADEON_SURF_HAS_SBUFFER_MIPTREE;
+ flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
}
- }
- if (rscreen->chip_class >= SI) {
- surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
+ if (is_stencil)
+ flags |= RADEON_SURF_SBUFFER;
}
if (rscreen->chip_class >= VI &&
(ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
- surface->flags |= RADEON_SURF_DISABLE_DCC;
+ flags |= RADEON_SURF_DISABLE_DCC;
- if (ptex->bind & PIPE_BIND_SCANOUT) {
+ if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
/* This should catch bugs in gallium users setting incorrect flags. */
- assert(surface->nsamples == 1 &&
- surface->array_size == 1 &&
- surface->npix_z == 1 &&
- surface->last_level == 0 &&
- !(surface->flags & RADEON_SURF_Z_OR_SBUFFER));
+ assert(ptex->nr_samples <= 1 &&
+ ptex->array_size == 1 &&
+ ptex->depth0 == 1 &&
+ ptex->last_level == 0 &&
+ !(flags & RADEON_SURF_Z_OR_SBUFFER));
- surface->flags |= RADEON_SURF_SCANOUT;
+ flags |= RADEON_SURF_SCANOUT;
}
- return 0;
-}
-static int r600_setup_surface(struct pipe_screen *screen,
- struct r600_texture *rtex,
- unsigned pitch_in_bytes_override,
- unsigned offset)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- unsigned i;
- int r;
+ if (is_imported)
+ flags |= RADEON_SURF_IMPORTED;
+ if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
+ flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
- r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface);
+ r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe,
+ array_mode, surface);
if (r) {
return r;
}
- rtex->size = rtex->surface.bo_size;
-
- if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) {
- /* old ddx on evergreen over estimate alignment for 1d, only 1 level
- * for those
- */
- rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe;
- rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
- rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
- }
+ if (rscreen->chip_class >= GFX9) {
+ assert(!pitch_in_bytes_override ||
+ pitch_in_bytes_override == surface->u.gfx9.surf_pitch * bpe);
+ surface->u.gfx9.surf_offset = offset;
+ } else {
+ if (pitch_in_bytes_override &&
+ pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) {
+ /* old ddx on evergreen over estimate alignment for 1d, only 1 level
+ * for those
+ */
+ surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe;
+ surface->u.legacy.level[0].slice_size = pitch_in_bytes_override *
+ surface->u.legacy.level[0].nblk_y;
+ }
- if (offset) {
- for (i = 0; i < ARRAY_SIZE(rtex->surface.level); ++i)
- rtex->surface.level[i].offset += offset;
+ if (offset) {
+ for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
+ surface->u.legacy.level[i].offset += offset;
+ }
}
return 0;
}
-static void r600_texture_init_metadata(struct r600_texture *rtex,
+static void r600_texture_init_metadata(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
struct radeon_bo_metadata *metadata)
{
struct radeon_surf *surface = &rtex->surface;
memset(metadata, 0, sizeof(*metadata));
- metadata->microtile = surface->level[0].mode >= RADEON_SURF_MODE_1D ?
- RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
- metadata->macrotile = surface->level[0].mode >= RADEON_SURF_MODE_2D ?
- RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
- metadata->pipe_config = surface->pipe_config;
- metadata->bankw = surface->bankw;
- metadata->bankh = surface->bankh;
- metadata->tile_split = surface->tile_split;
- metadata->mtilea = surface->mtilea;
- metadata->num_banks = surface->num_banks;
- metadata->stride = surface->level[0].pitch_bytes;
- metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
-}
-static void r600_dirty_all_framebuffer_states(struct r600_common_screen *rscreen)
-{
- p_atomic_inc(&rscreen->dirty_fb_counter);
+ if (rscreen->chip_class >= GFX9) {
+ metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
+ } else {
+ metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
+ RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
+ RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
+ metadata->u.legacy.bankw = surface->u.legacy.bankw;
+ metadata->u.legacy.bankh = surface->u.legacy.bankh;
+ metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
+ metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
+ metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
+ metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
+ metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+ }
}
static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
@@ -359,13 +350,13 @@ static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
struct pipe_context *ctx = &rctx->b;
if (ctx == rscreen->aux_context)
- pipe_mutex_lock(rscreen->aux_context_lock);
+ mtx_lock(&rscreen->aux_context_lock);
ctx->flush_resource(ctx, &rtex->resource.b.b);
ctx->flush(ctx, NULL, 0);
if (ctx == rscreen->aux_context)
- pipe_mutex_unlock(rscreen->aux_context_lock);
+ mtx_unlock(&rscreen->aux_context_lock);
}
static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
@@ -390,7 +381,7 @@ static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
r600_resource_reference(&rtex->cmask_buffer, NULL);
/* Notify all contexts about the change. */
- r600_dirty_all_framebuffer_states(rscreen);
+ p_atomic_inc(&rscreen->dirty_tex_counter);
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
@@ -414,7 +405,7 @@ static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
rtex->dcc_offset = 0;
/* Notify all contexts about the change. */
- r600_dirty_all_framebuffer_states(rscreen);
+ p_atomic_inc(&rscreen->dirty_tex_counter);
return true;
}
@@ -448,14 +439,14 @@ bool r600_texture_disable_dcc(struct r600_common_context *rctx,
return false;
if (&rctx->b == rscreen->aux_context)
- pipe_mutex_lock(rscreen->aux_context_lock);
+ mtx_lock(&rscreen->aux_context_lock);
/* Decompress DCC. */
rctx->decompress_dcc(&rctx->b, rtex);
rctx->b.flush(&rctx->b, NULL, 0);
if (&rctx->b == rscreen->aux_context)
- pipe_mutex_unlock(rscreen->aux_context_lock);
+ mtx_unlock(&rscreen->aux_context_lock);
return r600_texture_discard_dcc(rscreen, rtex);
}
@@ -476,7 +467,7 @@ static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx,
return;
if (rtex->resource.is_shared ||
- rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED)
+ rtex->surface.is_linear)
return;
/* This fails with MSAA, depth, and compressed textures. */
@@ -529,8 +520,7 @@ static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx,
r600_texture_reference(&new_tex, NULL);
- r600_dirty_all_framebuffer_states(rctx->screen);
- p_atomic_inc(&rctx->screen->dirty_tex_descriptor_counter);
+ p_atomic_inc(&rctx->screen->dirty_tex_counter);
}
static boolean r600_texture_get_handle(struct pipe_screen* screen,
@@ -546,6 +536,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
struct r600_texture *rtex = (struct r600_texture*)resource;
struct radeon_bo_metadata metadata;
bool update_metadata = false;
+ unsigned stride, offset, slice_size;
/* This is not supported now, but it might be required for OpenCL
* interop in the future.
@@ -578,7 +569,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
/* Set metadata. */
if (!res->is_shared || update_metadata) {
- r600_texture_init_metadata(rtex, &metadata);
+ r600_texture_init_metadata(rscreen, rtex, &metadata);
if (rscreen->query_opaque_metadata)
rscreen->query_opaque_metadata(rscreen, rtex,
&metadata);
@@ -599,11 +590,25 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
res->external_usage = usage;
}
- return rscreen->ws->buffer_get_handle(res->buf,
- rtex->surface.level[0].pitch_bytes,
- rtex->surface.level[0].offset,
- rtex->surface.level[0].slice_size,
- whandle);
+ if (res->b.b.target == PIPE_BUFFER) {
+ offset = 0;
+ stride = 0;
+ slice_size = 0;
+ } else {
+ if (rscreen->chip_class >= GFX9) {
+ offset = rtex->surface.u.gfx9.surf_offset;
+ stride = rtex->surface.u.gfx9.surf_pitch *
+ rtex->surface.bpe;
+ slice_size = rtex->surface.u.gfx9.surf_slice_size;
+ } else {
+ offset = rtex->surface.u.legacy.level[0].offset;
+ stride = rtex->surface.u.legacy.level[0].nblk_x *
+ rtex->surface.bpe;
+ slice_size = rtex->surface.u.legacy.level[0].slice_size;
+ }
+ }
+ return rscreen->ws->buffer_get_handle(res->buf, stride, offset,
+ slice_size, whandle);
}
static void r600_texture_destroy(struct pipe_screen *screen,
@@ -633,35 +638,39 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
struct r600_fmask_info *out)
{
/* FMASK is allocated like an ordinary texture. */
- struct radeon_surf fmask = rtex->surface;
+ struct pipe_resource templ = rtex->resource.b.b;
+ struct radeon_surf fmask = {};
+ unsigned flags, bpe;
memset(out, 0, sizeof(*out));
- fmask.bo_alignment = 0;
- fmask.bo_size = 0;
- fmask.nsamples = 1;
- fmask.flags |= RADEON_SURF_FMASK;
+ if (rscreen->chip_class >= GFX9) {
+ out->alignment = rtex->surface.u.gfx9.fmask_alignment;
+ out->size = rtex->surface.u.gfx9.fmask_size;
+ return;
+ }
- /* Force 2D tiling if it wasn't set. This may occur when creating
- * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
- * destination buffer must have an FMASK too. */
- fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
- fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+ templ.nr_samples = 1;
+ flags = rtex->surface.flags | RADEON_SURF_FMASK;
- if (rscreen->chip_class >= SI) {
- fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
+ if (rscreen->chip_class <= CAYMAN) {
+ /* Use the same parameters and tile mode. */
+ fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
+ fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
+ fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
+ fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
+
+ if (nr_samples <= 4)
+ fmask.u.legacy.bankh = 4;
}
switch (nr_samples) {
case 2:
case 4:
- fmask.bpe = 1;
- if (rscreen->chip_class <= CAYMAN) {
- fmask.bankh = 4;
- }
+ bpe = 1;
break;
case 8:
- fmask.bpe = 4;
+ bpe = 4;
break;
default:
R600_ERR("Invalid sample count for FMASK allocation.\n");
@@ -672,25 +681,26 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
* This can be fixed by writing a separate FMASK allocator specifically
* for R600-R700 asics. */
if (rscreen->chip_class <= R700) {
- fmask.bpe *= 2;
+ bpe *= 2;
}
- if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
+ if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
+ RADEON_SURF_MODE_2D, &fmask)) {
R600_ERR("Got error in surface_init while allocating FMASK.\n");
return;
}
- assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
+ assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
- out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
+ out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
if (out->slice_tile_max)
out->slice_tile_max -= 1;
- out->tile_mode_index = fmask.tiling_index[0];
- out->pitch_in_pixels = fmask.level[0].nblk_x;
- out->bank_height = fmask.bankh;
- out->alignment = MAX2(256, fmask.bo_alignment);
- out->size = fmask.bo_size;
+ out->tile_mode_index = fmask.u.legacy.tiling_index[0];
+ out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
+ out->bank_height = fmask.u.legacy.bankh;
+ out->alignment = MAX2(256, fmask.surf_alignment);
+ out->size = fmask.surf_size;
}
static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
@@ -721,8 +731,8 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
- unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width);
- unsigned height = align(rtex->surface.npix_y, macro_tile_height);
+ unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width);
+ unsigned height = align(rtex->resource.b.b.height0, macro_tile_height);
unsigned base_align = num_pipes * pipe_interleave_bytes;
unsigned slice_bytes =
@@ -731,10 +741,6 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
assert(macro_tile_width % 128 == 0);
assert(macro_tile_height % 128 == 0);
- out->pitch = pitch_elements;
- out->height = height;
- out->xalign = macro_tile_width;
- out->yalign = macro_tile_height;
out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
out->alignment = MAX2(256, base_align);
out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
@@ -749,6 +755,12 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
unsigned num_pipes = rscreen->info.num_tile_pipes;
unsigned cl_width, cl_height;
+ if (rscreen->chip_class >= GFX9) {
+ out->alignment = rtex->surface.u.gfx9.cmask_alignment;
+ out->size = rtex->surface.u.gfx9.cmask_size;
+ return;
+ }
+
switch (num_pipes) {
case 2:
cl_width = 32;
@@ -773,17 +785,13 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
unsigned base_align = num_pipes * pipe_interleave_bytes;
- unsigned width = align(rtex->surface.npix_x, cl_width*8);
- unsigned height = align(rtex->surface.npix_y, cl_height*8);
+ unsigned width = align(rtex->resource.b.b.width0, cl_width*8);
+ unsigned height = align(rtex->resource.b.b.height0, cl_height*8);
unsigned slice_elements = (width * height) / (8*8);
/* Each element of CMASK is a nibble. */
unsigned slice_bytes = slice_elements / 2;
- out->pitch = width;
- out->height = height;
- out->xalign = cl_width * 8;
- out->yalign = cl_height * 8;
out->slice_tile_max = (width * height) / (128*128);
if (out->slice_tile_max)
out->slice_tile_max -= 1;
@@ -826,7 +834,9 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
}
rtex->cmask_buffer = (struct r600_resource *)
- r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
+ r600_aligned_buffer_create(&rscreen->b,
+ R600_RESOURCE_FLAG_UNMAPPABLE,
+ PIPE_USAGE_DEFAULT,
rtex->cmask.size,
rtex->cmask.alignment);
if (rtex->cmask_buffer == NULL) {
@@ -845,28 +855,32 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
-static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
+static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
{
unsigned cl_width, cl_height, width, height;
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
unsigned num_pipes = rscreen->info.num_tile_pipes;
+ assert(rscreen->chip_class <= VI);
+
+ rtex->surface.htile_size = 0;
+
if (rscreen->chip_class <= EVERGREEN &&
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
- return 0;
+ return;
/* HW bug on R6xx. */
if (rscreen->chip_class == R600 &&
- (rtex->surface.level[0].npix_x > 7680 ||
- rtex->surface.level[0].npix_y > 7680))
- return 0;
+ (rtex->resource.b.b.width0 > 7680 ||
+ rtex->resource.b.b.height0 > 7680))
+ return;
/* HTILE is broken with 1D tiling on old kernels and CIK. */
if (rscreen->chip_class >= CIK &&
- rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
+ rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
- return 0;
+ return;
/* Overalign HTILE on P2 configs to work around GPU hangs in
* piglit/depthstencil-render-miplevels 585.
@@ -901,11 +915,11 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
break;
default:
assert(0);
- return 0;
+ return;
}
- width = align(rtex->surface.npix_x, cl_width * 8);
- height = align(rtex->surface.npix_y, cl_height * 8);
+ width = align(rtex->resource.b.b.width0, cl_width * 8);
+ height = align(rtex->resource.b.b.height0, cl_height * 8);
slice_elements = (width * height) / (8 * 8);
slice_bytes = slice_elements * 4;
@@ -913,69 +927,122 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
base_align = num_pipes * pipe_interleave_bytes;
- rtex->htile.pitch = width;
- rtex->htile.height = height;
- rtex->htile.xalign = cl_width * 8;
- rtex->htile.yalign = cl_height * 8;
- rtex->htile.alignment = base_align;
-
- return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
+ rtex->surface.htile_alignment = base_align;
+ rtex->surface.htile_size =
+ (util_max_layer(&rtex->resource.b.b, 0) + 1) *
align(slice_bytes, base_align);
}
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
- uint64_t htile_size, alignment;
uint32_t clear_value;
- if (rtex->tc_compatible_htile) {
- htile_size = rtex->surface.htile_size;
- alignment = rtex->surface.htile_alignment;
+ if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile) {
clear_value = 0x0000030F;
} else {
- htile_size = r600_texture_get_htile_size(rscreen, rtex);
- alignment = rtex->htile.alignment;
+ r600_texture_get_htile_size(rscreen, rtex);
clear_value = 0;
}
- if (!htile_size)
+ if (!rtex->surface.htile_size)
return;
rtex->htile_buffer = (struct r600_resource*)
- r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT,
- htile_size, alignment);
+ r600_aligned_buffer_create(&rscreen->b,
+ R600_RESOURCE_FLAG_UNMAPPABLE,
+ PIPE_USAGE_DEFAULT,
+ rtex->surface.htile_size,
+ rtex->surface.htile_alignment);
if (rtex->htile_buffer == NULL) {
/* this is not a fatal error as we can still keep rendering
* without htile buffer */
R600_ERR("Failed to create buffer object for htile buffer.\n");
} else {
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
- 0, htile_size, clear_value,
- R600_COHERENCY_NONE);
+ 0, rtex->surface.htile_size,
+ clear_value);
}
}
-void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
+void r600_print_texture_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, FILE *f)
{
int i;
+ /* Common parameters. */
fprintf(f, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
- "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
+ "blk_h=%u, array_size=%u, last_level=%u, "
"bpe=%u, nsamples=%u, flags=0x%x, %s\n",
- rtex->surface.npix_x, rtex->surface.npix_y,
- rtex->surface.npix_z, rtex->surface.blk_w,
- rtex->surface.blk_h, rtex->surface.blk_d,
- rtex->surface.array_size, rtex->surface.last_level,
- rtex->surface.bpe, rtex->surface.nsamples,
+ rtex->resource.b.b.width0, rtex->resource.b.b.height0,
+ rtex->resource.b.b.depth0, rtex->surface.blk_w,
+ rtex->surface.blk_h,
+ rtex->resource.b.b.array_size, rtex->resource.b.b.last_level,
+ rtex->surface.bpe, rtex->resource.b.b.nr_samples,
rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
- fprintf(f, " Layout: size=%"PRIu64", alignment=%"PRIu64", bankw=%u, "
+ if (rscreen->chip_class >= GFX9) {
+ fprintf(f, " Surf: size=%"PRIu64", slice_size=%"PRIu64", "
+ "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n",
+ rtex->surface.surf_size,
+ rtex->surface.u.gfx9.surf_slice_size,
+ rtex->surface.surf_alignment,
+ rtex->surface.u.gfx9.surf.swizzle_mode,
+ rtex->surface.u.gfx9.surf.epitch,
+ rtex->surface.u.gfx9.surf_pitch);
+
+ if (rtex->fmask.size) {
+ fprintf(f, " FMASK: offset=%"PRIu64", size=%"PRIu64", "
+ "alignment=%u, swmode=%u, epitch=%u\n",
+ rtex->fmask.offset,
+ rtex->surface.u.gfx9.fmask_size,
+ rtex->surface.u.gfx9.fmask_alignment,
+ rtex->surface.u.gfx9.fmask.swizzle_mode,
+ rtex->surface.u.gfx9.fmask.epitch);
+ }
+
+ if (rtex->cmask.size) {
+ fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", "
+ "alignment=%u, rb_aligned=%u, pipe_aligned=%u\n",
+ rtex->cmask.offset,
+ rtex->surface.u.gfx9.cmask_size,
+ rtex->surface.u.gfx9.cmask_alignment,
+ rtex->surface.u.gfx9.cmask.rb_aligned,
+ rtex->surface.u.gfx9.cmask.pipe_aligned);
+ }
+
+ if (rtex->htile_buffer) {
+ fprintf(f, " HTile: size=%u, alignment=%u, "
+ "rb_aligned=%u, pipe_aligned=%u\n",
+ rtex->htile_buffer->b.b.width0,
+ rtex->htile_buffer->buf->alignment,
+ rtex->surface.u.gfx9.htile.rb_aligned,
+ rtex->surface.u.gfx9.htile.pipe_aligned);
+ }
+
+ if (rtex->dcc_offset) {
+ fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", "
+ "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
+ rtex->dcc_offset, rtex->surface.dcc_size,
+ rtex->surface.dcc_alignment,
+ rtex->surface.u.gfx9.dcc_pitch_max,
+ rtex->surface.num_dcc_levels);
+ }
+
+ if (rtex->surface.u.gfx9.stencil_offset) {
+ fprintf(f, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n",
+ rtex->surface.u.gfx9.stencil_offset,
+ rtex->surface.u.gfx9.stencil.swizzle_mode,
+ rtex->surface.u.gfx9.stencil.epitch);
+ }
+ return;
+ }
+
+ fprintf(f, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
"bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
- rtex->surface.bo_size, rtex->surface.bo_alignment, rtex->surface.bankw,
- rtex->surface.bankh, rtex->surface.num_banks, rtex->surface.mtilea,
- rtex->surface.tile_split, rtex->surface.pipe_config,
+ rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
+ rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
+ rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config,
(rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
if (rtex->fmask.size)
@@ -986,65 +1053,60 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
if (rtex->cmask.size)
- fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch=%u, "
- "height=%u, xalign=%u, yalign=%u, slice_tile_max=%u\n",
+ fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
+ "slice_tile_max=%u\n",
rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
- rtex->cmask.pitch, rtex->cmask.height, rtex->cmask.xalign,
- rtex->cmask.yalign, rtex->cmask.slice_tile_max);
+ rtex->cmask.slice_tile_max);
if (rtex->htile_buffer)
- fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
- "xalign=%u, yalign=%u, TC_compatible = %u\n",
+ fprintf(f, " HTile: size=%u, alignment=%u, TC_compatible = %u\n",
rtex->htile_buffer->b.b.width0,
- rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
- rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign,
+ rtex->htile_buffer->buf->alignment,
rtex->tc_compatible_htile);
if (rtex->dcc_offset) {
- fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
+ fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n",
rtex->dcc_offset, rtex->surface.dcc_size,
rtex->surface.dcc_alignment);
- for (i = 0; i <= rtex->surface.last_level; i++)
+ for (i = 0; i <= rtex->resource.b.b.last_level; i++)
fprintf(f, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
"fast_clear_size=%"PRIu64"\n",
- i, rtex->surface.level[i].dcc_enabled,
- rtex->surface.level[i].dcc_offset,
- rtex->surface.level[i].dcc_fast_clear_size);
+ i, i < rtex->surface.num_dcc_levels,
+ rtex->surface.u.legacy.level[i].dcc_offset,
+ rtex->surface.u.legacy.level[i].dcc_fast_clear_size);
}
- for (i = 0; i <= rtex->surface.last_level; i++)
+ for (i = 0; i <= rtex->resource.b.b.last_level; i++)
fprintf(f, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
"npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
- "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
- i, rtex->surface.level[i].offset,
- rtex->surface.level[i].slice_size,
+ "mode=%u, tiling_index = %u\n",
+ i, rtex->surface.u.legacy.level[i].offset,
+ rtex->surface.u.legacy.level[i].slice_size,
u_minify(rtex->resource.b.b.width0, i),
u_minify(rtex->resource.b.b.height0, i),
u_minify(rtex->resource.b.b.depth0, i),
- rtex->surface.level[i].nblk_x,
- rtex->surface.level[i].nblk_y,
- rtex->surface.level[i].nblk_z,
- rtex->surface.level[i].pitch_bytes,
- rtex->surface.level[i].mode);
+ rtex->surface.u.legacy.level[i].nblk_x,
+ rtex->surface.u.legacy.level[i].nblk_y,
+ rtex->surface.u.legacy.level[i].mode,
+ rtex->surface.u.legacy.tiling_index[i]);
if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
fprintf(f, " StencilLayout: tilesplit=%u\n",
- rtex->surface.stencil_tile_split);
- for (i = 0; i <= rtex->surface.last_level; i++) {
+ rtex->surface.u.legacy.stencil_tile_split);
+ for (i = 0; i <= rtex->resource.b.b.last_level; i++) {
fprintf(f, " StencilLevel[%i]: offset=%"PRIu64", "
"slice_size=%"PRIu64", npix_x=%u, "
"npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
- "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
- i, rtex->surface.stencil_level[i].offset,
- rtex->surface.stencil_level[i].slice_size,
+ "mode=%u, tiling_index = %u\n",
+ i, rtex->surface.u.legacy.stencil_level[i].offset,
+ rtex->surface.u.legacy.stencil_level[i].slice_size,
u_minify(rtex->resource.b.b.width0, i),
u_minify(rtex->resource.b.b.height0, i),
u_minify(rtex->resource.b.b.depth0, i),
- rtex->surface.stencil_level[i].nblk_x,
- rtex->surface.stencil_level[i].nblk_y,
- rtex->surface.stencil_level[i].nblk_z,
- rtex->surface.stencil_level[i].pitch_bytes,
- rtex->surface.stencil_level[i].mode);
+ rtex->surface.u.legacy.stencil_level[i].nblk_x,
+ rtex->surface.u.legacy.stencil_level[i].nblk_y,
+ rtex->surface.u.legacy.stencil_level[i].mode,
+ rtex->surface.u.legacy.stencil_tiling_index[i]);
}
}
}
@@ -1053,8 +1115,6 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
static struct r600_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
- unsigned pitch_in_bytes_override,
- unsigned offset,
struct pb_buffer *buf,
struct radeon_surf *surface)
{
@@ -1077,25 +1137,29 @@ r600_texture_create_object(struct pipe_screen *screen,
rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
rtex->surface = *surface;
- if (r600_setup_surface(screen, rtex, pitch_in_bytes_override, offset)) {
- FREE(rtex);
- return NULL;
- }
+ rtex->size = rtex->surface.surf_size;
- rtex->tc_compatible_htile = rtex->surface.htile_size != 0;
- assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ==
- rtex->tc_compatible_htile);
+ rtex->tc_compatible_htile = rtex->surface.htile_size != 0 &&
+ (rtex->surface.flags &
+ RADEON_SURF_TC_COMPATIBLE_HTILE);
- /* TC-compatible HTILE only supports Z32_FLOAT. */
- if (rtex->tc_compatible_htile)
- rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
- else
+ /* TC-compatible HTILE:
+ * - VI only supports Z32_FLOAT.
+ * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
+ if (rtex->tc_compatible_htile) {
+ if (rscreen->chip_class >= GFX9 &&
+ base->format == PIPE_FORMAT_Z16_UNORM)
+ rtex->db_render_format = base->format;
+ else
+ rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+ } else {
rtex->db_render_format = base->format;
+ }
/* Tiled depth textures utilize the non-displayable tile order.
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
- rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
+ rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
/* Applies to GCN. */
rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
@@ -1109,8 +1173,13 @@ r600_texture_create_object(struct pipe_screen *screen,
if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
rscreen->chip_class >= EVERGREEN) {
- rtex->can_sample_z = !rtex->surface.depth_adjusted;
- rtex->can_sample_s = !rtex->surface.stencil_adjusted;
+ if (rscreen->chip_class >= GFX9) {
+ rtex->can_sample_z = true;
+ rtex->can_sample_s = true;
+ } else {
+ rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
+ rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
+ }
} else {
if (rtex->resource.b.b.nr_samples <= 1 &&
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
@@ -1154,7 +1223,7 @@ r600_texture_create_object(struct pipe_screen *screen,
/* Now create the backing buffer. */
if (!buf) {
r600_init_resource_fields(rscreen, resource, rtex->size,
- rtex->surface.bo_alignment);
+ rtex->surface.surf_alignment);
resource->flags |= RADEON_FLAG_HANDLE;
@@ -1178,7 +1247,7 @@ r600_texture_create_object(struct pipe_screen *screen,
/* Initialize the cmask to 0xCC (= compressed state). */
r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
rtex->cmask.offset, rtex->cmask.size,
- 0xCCCCCCCC, R600_COHERENCY_NONE);
+ 0xCCCCCCCC);
}
/* Initialize DCC only if the texture is not being imported. */
@@ -1186,7 +1255,7 @@ r600_texture_create_object(struct pipe_screen *screen,
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
rtex->dcc_offset,
rtex->surface.dcc_size,
- 0xFFFFFFFF, R600_COHERENCY_NONE);
+ 0xFFFFFFFF);
}
/* Initialize the CMASK base register value. */
@@ -1203,15 +1272,16 @@ r600_texture_create_object(struct pipe_screen *screen,
if (rscreen->debug_flags & DBG_TEX) {
puts("Texture:");
- r600_print_texture_info(rtex, stdout);
+ r600_print_texture_info(rscreen, rtex, stdout);
fflush(stdout);
}
return rtex;
}
-static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
- const struct pipe_resource *templ)
+static enum radeon_surf_mode
+r600_choose_tiling(struct r600_common_screen *rscreen,
+ const struct pipe_resource *templ)
{
const struct util_format_description *desc = util_format_description(templ->format);
bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
@@ -1256,7 +1326,9 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
/* Textures with a very small height are recommended to be linear. */
if (templ->target == PIPE_TEXTURE_1D ||
templ->target == PIPE_TEXTURE_1D_ARRAY ||
- templ->height0 <= 4)
+ /* Only very thin and long 2D textures should benefit from
+ * linear_aligned. */
+ (templ->width0 > 8 && templ->height0 <= 2))
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Textures likely to be mapped often. */
@@ -1291,17 +1363,15 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
int r;
r = r600_init_surface(rscreen, &surface, templ,
- r600_choose_tiling(rscreen, templ),
- is_flushed_depth, tc_compatible_htile);
+ r600_choose_tiling(rscreen, templ), 0, 0,
+ false, false, is_flushed_depth,
+ tc_compatible_htile);
if (r) {
return NULL;
}
- r = rscreen->ws->surface_best(rscreen->ws, &surface);
- if (r) {
- return NULL;
- }
- return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0,
- 0, NULL, &surface);
+
+ return (struct pipe_resource *)
+ r600_texture_create_object(screen, templ, NULL, &surface);
}
static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
@@ -1317,6 +1387,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
int r;
struct radeon_bo_metadata metadata = {};
struct r600_texture *rtex;
+ bool is_scanout;
/* Support only 2D textures without mipmaps */
if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
@@ -1329,31 +1400,39 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
rscreen->ws->buffer_get_metadata(buf, &metadata);
- surface.pipe_config = metadata.pipe_config;
- surface.bankw = metadata.bankw;
- surface.bankh = metadata.bankh;
- surface.tile_split = metadata.tile_split;
- surface.mtilea = metadata.mtilea;
- surface.num_banks = metadata.num_banks;
-
- if (metadata.macrotile == RADEON_LAYOUT_TILED)
- array_mode = RADEON_SURF_MODE_2D;
- else if (metadata.microtile == RADEON_LAYOUT_TILED)
- array_mode = RADEON_SURF_MODE_1D;
- else
- array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+ if (rscreen->chip_class >= GFX9) {
+ if (metadata.u.gfx9.swizzle_mode > 0)
+ array_mode = RADEON_SURF_MODE_2D;
+ else
+ array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ is_scanout = metadata.u.gfx9.swizzle_mode == 0 ||
+ metadata.u.gfx9.swizzle_mode % 4 == 2;
+ } else {
+ surface.u.legacy.pipe_config = metadata.u.legacy.pipe_config;
+ surface.u.legacy.bankw = metadata.u.legacy.bankw;
+ surface.u.legacy.bankh = metadata.u.legacy.bankh;
+ surface.u.legacy.tile_split = metadata.u.legacy.tile_split;
+ surface.u.legacy.mtilea = metadata.u.legacy.mtilea;
+ surface.u.legacy.num_banks = metadata.u.legacy.num_banks;
+
+ if (metadata.u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ array_mode = RADEON_SURF_MODE_2D;
+ else if (metadata.u.legacy.microtile == RADEON_LAYOUT_TILED)
+ array_mode = RADEON_SURF_MODE_1D;
+ else
+ array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ is_scanout = metadata.u.legacy.scanout;
+ }
- r = r600_init_surface(rscreen, &surface, templ, array_mode,
- false, false);
+ r = r600_init_surface(rscreen, &surface, templ, array_mode, stride,
+ offset, true, is_scanout, false, false);
if (r) {
return NULL;
}
- if (metadata.scanout)
- surface.flags |= RADEON_SURF_SCANOUT;
-
- rtex = r600_texture_create_object(screen, templ, stride,
- offset, buf, &surface);
+ rtex = r600_texture_create_object(screen, templ, buf, &surface);
if (!rtex)
return NULL;
@@ -1363,6 +1442,11 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
if (rscreen->apply_opaque_metadata)
rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
+ /* Validate that addrlib arrived at the same surface parameters. */
+ if (rscreen->chip_class >= GFX9) {
+ assert(metadata.u.gfx9.swizzle_mode == surface.u.gfx9.surf.swizzle_mode);
+ }
+
return &rtex->resource.b.b;
}
@@ -1486,7 +1570,7 @@ static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
/* There is no point in discarding depth and tiled buffers. */
assert(!rtex->is_depth);
- assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED);
+ assert(rtex->surface.is_linear);
/* Reallocate the buffer in the same pipe_resource. */
r600_alloc_resource(rscreen, &rtex->resource);
@@ -1495,8 +1579,7 @@ static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
rtex->cmask.base_address_reg =
(rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
- r600_dirty_all_framebuffer_states(rscreen);
- p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
+ p_atomic_inc(&rscreen->dirty_tex_counter);
rctx->num_alloc_tex_transfer_bytes += rtex->size;
}
@@ -1517,6 +1600,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
bool use_staging_texture = false;
assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
+ assert(box->width && box->height && box->depth);
/* Depth textures use staging unconditionally. */
if (!rtex->is_depth) {
@@ -1539,17 +1623,18 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
/* Tiled textures need to be converted into a linear texture for CPU
* access. The staging texture is always linear and is placed in GART.
*
- * Reading from VRAM is slow, always use the staging texture in
- * this case.
+ * Reading from VRAM or GTT WC is slow, always use the staging
+ * texture in this case.
*
* Use the staging texture for uploads if the underlying BO
* is busy.
*/
- if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D)
+ if (!rtex->surface.is_linear)
use_staging_texture = true;
else if (usage & PIPE_TRANSFER_READ)
- use_staging_texture = (rtex->resource.domains &
- RADEON_DOMAIN_VRAM) != 0;
+ use_staging_texture =
+ rtex->resource.domains & RADEON_DOMAIN_VRAM ||
+ rtex->resource.flags & RADEON_FLAG_GTT_WC;
/* Write & linear only: */
else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
RADEON_USAGE_READWRITE) ||
@@ -1567,7 +1652,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
trans = CALLOC_STRUCT(r600_transfer);
if (!trans)
return NULL;
- trans->transfer.resource = texture;
+ pipe_resource_reference(&trans->transfer.resource, texture);
trans->transfer.level = level;
trans->transfer.usage = usage;
trans->transfer.box = *box;
@@ -1609,8 +1694,12 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
0, 0, 0, box->depth, 0, 0);
pipe_resource_reference(&temp, NULL);
}
- }
- else {
+
+ /* Just get the strides. */
+ r600_texture_get_offset(rctx->screen, staging_depth, level, NULL,
+ &trans->transfer.stride,
+ &trans->transfer.layer_stride);
+ } else {
/* XXX: only readback the rectangle which is being mapped? */
/* XXX: when discard is true, no need to read back from depth texture */
if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
@@ -1624,11 +1713,12 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
box->z, box->z + box->depth - 1,
0, 0);
- offset = r600_texture_get_offset(staging_depth, level, box);
+ offset = r600_texture_get_offset(rctx->screen, staging_depth,
+ level, box,
+ &trans->transfer.stride,
+ &trans->transfer.layer_stride);
}
- trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes;
- trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size;
trans->staging = (struct r600_resource*)staging_depth;
buf = trans->staging;
} else if (use_staging_texture) {
@@ -1648,8 +1738,11 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
return NULL;
}
trans->staging = &staging->resource;
- trans->transfer.stride = staging->surface.level[0].pitch_bytes;
- trans->transfer.layer_stride = staging->surface.level[0].slice_size;
+
+ /* Just get the strides. */
+ r600_texture_get_offset(rctx->screen, staging, 0, NULL,
+ &trans->transfer.stride,
+ &trans->transfer.layer_stride);
if (usage & PIPE_TRANSFER_READ)
r600_copy_to_staging_texture(ctx, trans);
@@ -1659,9 +1752,9 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
buf = trans->staging;
} else {
/* the resource is mapped directly */
- trans->transfer.stride = rtex->surface.level[level].pitch_bytes;
- trans->transfer.layer_stride = rtex->surface.level[level].slice_size;
- offset = r600_texture_get_offset(rtex, level, box);
+ offset = r600_texture_get_offset(rctx->screen, rtex, level, box,
+ &trans->transfer.stride,
+ &trans->transfer.layer_stride);
buf = &rtex->resource;
}
@@ -1717,6 +1810,7 @@ static void r600_texture_transfer_unmap(struct pipe_context *ctx,
rctx->num_alloc_tex_transfer_bytes = 0;
}
+ pipe_resource_reference(&transfer->resource, NULL);
FREE(transfer);
}
@@ -1813,15 +1907,26 @@ bool vi_dcc_formats_compatible(enum pipe_format format1,
type1 == type2;
}
-void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx,
+bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
+ unsigned level,
+ enum pipe_format view_format)
+{
+ struct r600_texture *rtex = (struct r600_texture *)tex;
+
+ return vi_dcc_enabled(rtex, level) &&
+ !vi_dcc_formats_compatible(tex->format, view_format);
+}
+
+/* This can't be merged with the above function, because
+ * vi_dcc_formats_compatible should be called only when DCC is enabled. */
+void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
struct pipe_resource *tex,
unsigned level,
enum pipe_format view_format)
{
struct r600_texture *rtex = (struct r600_texture *)tex;
- if (rtex->dcc_offset &&
- rtex->surface.level[level].dcc_enabled &&
+ if (vi_dcc_enabled(rtex, level) &&
!vi_dcc_formats_compatible(tex->format, view_format))
if (!r600_texture_disable_dcc(rctx, (struct r600_texture*)tex))
rctx->decompress_dcc(&rctx->b, rtex);
@@ -1830,10 +1935,9 @@ void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx,
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
+ unsigned width0, unsigned height0,
unsigned width, unsigned height)
{
- struct r600_common_context *rctx = (struct r600_common_context*)pipe;
- struct r600_texture *rtex = (struct r600_texture*)texture;
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
if (!surface)
@@ -1849,13 +1953,14 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
surface->base.width = width;
surface->base.height = height;
surface->base.u = templ->u;
- surface->level_info = &rtex->surface.level[templ->u.tex.level];
- if (texture->target != PIPE_BUFFER)
- vi_dcc_disable_if_incompatible_format(rctx, texture,
- templ->u.tex.level,
- templ->format);
+ surface->width0 = width0;
+ surface->height0 = height0;
+ surface->dcc_incompatible =
+ texture->target != PIPE_BUFFER &&
+ vi_dcc_formats_are_incompatible(texture, templ->u.tex.level,
+ templ->format);
return &surface->base;
}
@@ -1866,6 +1971,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
unsigned level = templ->u.tex.level;
unsigned width = u_minify(tex->width0, level);
unsigned height = u_minify(tex->height0, level);
+ unsigned width0 = tex->width0;
+ unsigned height0 = tex->height0;
if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
const struct util_format_description *tex_desc
@@ -1884,10 +1991,15 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
width = nblks_x * templ_desc->block.width;
height = nblks_y * templ_desc->block.height;
+
+ width0 = util_format_get_nblocksx(tex->format, width0);
+ height0 = util_format_get_nblocksy(tex->format, height0);
}
}
- return r600_create_surface_custom(pipe, tex, templ, width, height);
+ return r600_create_surface_custom(pipe, tex, templ,
+ width0, height0,
+ width, height);
}
static void r600_surface_destroy(struct pipe_context *pipe,
@@ -2157,7 +2269,7 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
if (!tex->resource.is_shared ||
!(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) ||
tex->resource.b.b.target != PIPE_TEXTURE_2D ||
- tex->surface.last_level > 0 ||
+ tex->resource.b.b.last_level > 0 ||
!tex->surface.dcc_size)
return;
@@ -2173,7 +2285,7 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
if (!vi_should_enable_separate_dcc(tex))
return; /* stats show that DCC decompression is too expensive */
- assert(tex->surface.level[0].dcc_enabled);
+ assert(tex->surface.num_dcc_levels);
assert(!tex->dcc_separate_buffer);
r600_texture_discard_cmask(rctx->screen, tex);
@@ -2186,7 +2298,8 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
tex->last_dcc_separate_buffer = NULL;
} else {
tex->dcc_separate_buffer = (struct r600_resource*)
- r600_aligned_buffer_create(rctx->b.screen, 0,
+ r600_aligned_buffer_create(rctx->b.screen,
+ R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
tex->surface.dcc_size,
tex->surface.dcc_alignment);
@@ -2272,7 +2385,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
memset(&uc, 0, sizeof(uc));
- if (util_format_get_blocksizebits(surface_format) == 128) {
+ if (rtex->surface.bpe == 16) {
/* DCC fast clear only:
* CLEAR_WORD0 = R = G = B
* CLEAR_WORD1 = A
@@ -2386,9 +2499,9 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
unsigned level, unsigned clear_value)
{
struct pipe_resource *dcc_buffer;
- uint64_t dcc_offset;
+ uint64_t dcc_offset, clear_size;
- assert(rtex->dcc_offset && rtex->surface.level[level].dcc_enabled);
+ assert(vi_dcc_enabled(rtex, level));
if (rtex->dcc_separate_buffer) {
dcc_buffer = &rtex->dcc_separate_buffer->b.b;
@@ -2398,10 +2511,18 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
dcc_offset = rtex->dcc_offset;
}
- dcc_offset += rtex->surface.level[level].dcc_offset;
+ if (rctx->chip_class >= GFX9) {
+ /* Mipmap level clears aren't implemented. */
+ assert(rtex->resource.b.b.last_level == 0);
+ /* MSAA needs a different clear size. */
+ assert(rtex->resource.b.b.nr_samples <= 1);
+ clear_size = rtex->surface.dcc_size;
+ } else {
+ dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
+ clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size;
+ }
- rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset,
- rtex->surface.level[level].dcc_fast_clear_size,
+ rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, clear_size,
clear_value, R600_COHERENCY_CB_META);
}
@@ -2413,27 +2534,59 @@ static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
if (rtex->resource.is_shared ||
- rtex->surface.nsamples <= 1 ||
+ rtex->resource.b.b.nr_samples <= 1 ||
rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
return;
- assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D);
- assert(rtex->surface.last_level == 0);
+ assert(rscreen->chip_class >= GFX9 ||
+ rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
+ assert(rtex->resource.b.b.last_level == 0);
+
+ if (rscreen->chip_class >= GFX9) {
+ /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
+ assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4);
+
+ /* If you do swizzle_mode % 4, you'll get:
+ * 0 = Depth
+ * 1 = Standard,
+ * 2 = Displayable
+ * 3 = Rotated
+ *
+ * Depth-sample order isn't allowed:
+ */
+ assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0);
- /* These magic numbers were copied from addrlib. It doesn't use any
- * definitions for them either. They are all 2D_TILED_THIN1 modes with
- * different bpp and micro tile mode.
- */
- if (rscreen->chip_class >= CIK) {
switch (rtex->last_msaa_resolve_target_micro_mode) {
- case 0: /* displayable */
- rtex->surface.tiling_index[0] = 10;
+ case RADEON_MICRO_MODE_DISPLAY:
+ rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
+ rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */
break;
- case 1: /* thin */
- rtex->surface.tiling_index[0] = 14;
+ case RADEON_MICRO_MODE_THIN:
+ rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
+ rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */
break;
- case 3: /* rotated */
- rtex->surface.tiling_index[0] = 28;
+ case RADEON_MICRO_MODE_ROTATED:
+ rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
+ rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */
+ break;
+ default: /* depth */
+ assert(!"unexpected micro mode");
+ return;
+ }
+ } else if (rscreen->chip_class >= CIK) {
+ /* These magic numbers were copied from addrlib. It doesn't use
+ * any definitions for them either. They are all 2D_TILED_THIN1
+ * modes with different bpp and micro tile mode.
+ */
+ switch (rtex->last_msaa_resolve_target_micro_mode) {
+ case RADEON_MICRO_MODE_DISPLAY:
+ rtex->surface.u.legacy.tiling_index[0] = 10;
+ break;
+ case RADEON_MICRO_MODE_THIN:
+ rtex->surface.u.legacy.tiling_index[0] = 14;
+ break;
+ case RADEON_MICRO_MODE_ROTATED:
+ rtex->surface.u.legacy.tiling_index[0] = 28;
break;
default: /* depth, thick */
assert(!"unexpected micro mode");
@@ -2441,32 +2594,32 @@ static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
}
} else { /* SI */
switch (rtex->last_msaa_resolve_target_micro_mode) {
- case 0: /* displayable */
+ case RADEON_MICRO_MODE_DISPLAY:
switch (rtex->surface.bpe) {
case 1:
- rtex->surface.tiling_index[0] = 10;
+ rtex->surface.u.legacy.tiling_index[0] = 10;
break;
case 2:
- rtex->surface.tiling_index[0] = 11;
+ rtex->surface.u.legacy.tiling_index[0] = 11;
break;
default: /* 4, 8 */
- rtex->surface.tiling_index[0] = 12;
+ rtex->surface.u.legacy.tiling_index[0] = 12;
break;
}
break;
- case 1: /* thin */
+ case RADEON_MICRO_MODE_THIN:
switch (rtex->surface.bpe) {
case 1:
- rtex->surface.tiling_index[0] = 14;
+ rtex->surface.u.legacy.tiling_index[0] = 14;
break;
case 2:
- rtex->surface.tiling_index[0] = 15;
+ rtex->surface.u.legacy.tiling_index[0] = 15;
break;
case 4:
- rtex->surface.tiling_index[0] = 16;
+ rtex->surface.u.legacy.tiling_index[0] = 16;
break;
default: /* 8, 16 */
- rtex->surface.tiling_index[0] = 17;
+ rtex->surface.u.legacy.tiling_index[0] = 17;
break;
}
break;
@@ -2478,8 +2631,7 @@ static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
- p_atomic_inc(&rscreen->dirty_fb_counter);
- p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
+ p_atomic_inc(&rscreen->dirty_tex_counter);
}
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
@@ -2523,7 +2675,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
}
/* only supported on tiled surfaces */
- if (tex->surface.level[0].mode < RADEON_SURF_MODE_1D) {
+ if (tex->surface.is_linear) {
continue;
}
@@ -2536,8 +2688,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
continue;
/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
- if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
- rctx->chip_class >= CIK &&
+ if (rctx->chip_class == CIK &&
+ tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
rctx->screen->info.drm_major == 2 &&
rctx->screen->info.drm_minor < 38) {
continue;
@@ -2550,9 +2702,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
!(rctx->screen->debug_flags & DBG_NO_DCC_FB)) {
vi_separate_dcc_try_enable(rctx, tex);
- /* Stoney can't do a CMASK-based clear, so all clears are
- * considered to be hypothetically slow clears, which
- * is weighed when determining to enable separate DCC.
+ /* RB+ isn't supported with a CMASK clear only on Stoney,
+ * so all clears are considered to be hypothetically slow
+ * clears, which is weighed when determining whether to
+ * enable separate DCC.
*/
if (tex->dcc_gather_statistics &&
rctx->family == CHIP_STONEY)
@@ -2560,10 +2713,14 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
}
/* Try to clear DCC first, otherwise try CMASK. */
- if (tex->dcc_offset && tex->surface.level[0].dcc_enabled) {
+ if (vi_dcc_enabled(tex, 0)) {
uint32_t reset_value;
bool clear_words_needed;
+ /* TODO: fix DCC clear */
+ if (rctx->chip_class >= GFX9)
+ continue;
+
if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
continue;
@@ -2574,16 +2731,23 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
vi_dcc_clear_level(rctx, tex, 0, reset_value);
- if (clear_words_needed)
- tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+ unsigned level_bit = 1 << fb->cbufs[i]->u.tex.level;
+ if (clear_words_needed) {
+ bool need_compressed_update = !tex->dirty_level_mask;
+
+ tex->dirty_level_mask |= level_bit;
+
+ if (need_compressed_update)
+ p_atomic_inc(&rctx->screen->compressed_colortex_counter);
+ }
tex->separate_dcc_dirty = true;
} else {
/* 128-bit formats are unusupported */
- if (util_format_get_blocksizebits(fb->cbufs[i]->format) > 64) {
+ if (tex->surface.bpe > 8) {
continue;
}
- /* Stoney/RB+ doesn't work with CMASK fast clear. */
+ /* RB+ doesn't work with CMASK fast clear on Stoney. */
if (rctx->family == CHIP_STONEY)
continue;
@@ -2598,7 +2762,12 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
tex->cmask.offset, tex->cmask.size, 0,
R600_COHERENCY_CB_META);
+ bool need_compressed_update = !tex->dirty_level_mask;
+
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+
+ if (need_compressed_update)
+ p_atomic_inc(&rctx->screen->compressed_colortex_counter);
}
/* We can change the micro tile mode before a full clear. */
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
index fb1491a28..d5352d9de 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c
@@ -91,6 +91,12 @@ struct ruvd_decoder {
bool use_legacy;
struct rvid_buffer ctx;
struct rvid_buffer sessionctx;
+ struct {
+ unsigned data0;
+ unsigned data1;
+ unsigned cmd;
+ unsigned cntl;
+ } reg;
};
/* flush IB to the hardware */
@@ -120,14 +126,14 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
uint64_t addr;
addr = dec->ws->buffer_get_virtual_address(buf);
addr = addr + off;
- set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr);
- set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32);
+ set_reg(dec, dec->reg.data0, addr);
+ set_reg(dec, dec->reg.data1, addr >> 32);
} else {
off += dec->ws->buffer_get_reloc_offset(buf);
set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
}
- set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
+ set_reg(dec, dec->reg.cmd, cmd << 1);
}
/* do the codec needs an IT buffer ?*/
@@ -151,6 +157,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
+ memset(dec->msg, 0, sizeof(*dec->msg));
+
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
if (have_it(dec))
dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);
@@ -322,6 +330,14 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_
return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
}
+static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
+{
+ if (((struct r600_common_screen*)dec->screen)->family < CHIP_VEGA10)
+ return 16;
+ else
+ return 32;
+}
+
/* calculate size of reference picture buffer */
static unsigned calc_dpb_size(struct ruvd_decoder *dec)
{
@@ -335,7 +351,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
unsigned max_references = dec->base.max_references + 1;
// aligned size of a single frame
- image_size = width * height;
+ image_size = align(width, get_db_pitch_alignment(dec)) * height;
image_size += image_size / 2;
image_size = align(image_size, 1024);
@@ -410,9 +426,9 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
width = align (width, 16);
height = align (height, 16);
if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
- dpb_size = align((width * height * 9) / 4, 256) * max_references;
+ dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * max_references;
else
- dpb_size = align((width * height * 3) / 2, 256) * max_references;
+ dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * max_references;
break;
case PIPE_VIDEO_FORMAT_VC1:
@@ -478,6 +494,7 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
memset(&result, 0, sizeof(result));
switch (pic->base.profile) {
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
result.profile = RUVD_H264_PROFILE_BASELINE;
break;
@@ -703,13 +720,16 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video
result.direct_reflist[i][j] = pic->RefPicList[i][j];
}
- if ((pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) &&
- (target->buffer_format == PIPE_FORMAT_NV12)) {
- result.p010_mode = 0;
- result.luma_10to8 = 5;
- result.chroma_10to8 = 5;
- result.sclr_luma10to8 = 4;
- result.sclr_chroma10to8 = 4;
+ if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
+ if (target->buffer_format == PIPE_FORMAT_P016) {
+ result.p010_mode = 1;
+ result.msb_mode = 1;
+ } else {
+ result.luma_10to8 = 5;
+ result.chroma_10to8 = 5;
+ result.sclr_luma10to8 = 4;
+ result.sclr_chroma10to8 = 4;
+ }
}
/* TODO
@@ -931,7 +951,6 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
assert(decoder);
map_msg_fb_it_buf(dec);
- memset(dec->msg, 0, sizeof(*dec->msg));
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_DESTROY;
dec->msg->stream_handle = dec->stream_handle;
@@ -1074,7 +1093,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
- dec->msg->body.decode.db_pitch = align(dec->base.width, 16);
+ dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));
if (dec->stream_type == RUVD_CODEC_H264_PERF &&
((struct r600_common_screen*)dec->screen)->family >= CHIP_POLARIS10)
@@ -1146,7 +1165,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
if (have_it(dec))
send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
- set_reg(dec, RUVD_ENGINE_CNTL, 1);
+ set_reg(dec, dec->reg.cntl, 1);
flush(dec, RADEON_FLUSH_ASYNC);
next_buffer(dec);
@@ -1280,6 +1299,18 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
rvid_clear_buffer(context, &dec->sessionctx);
}
+ if (info.family >= CHIP_VEGA10) {
+ dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
+ dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
+ dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
+ dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15;
+ } else {
+ dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;
+ dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;
+ dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;
+ dec->reg.cntl = RUVD_ENGINE_CNTL;
+ }
+
map_msg_fb_it_buf(dec);
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_CREATE;
@@ -1315,10 +1346,20 @@ error:
}
/* calculate top/bottom offset */
-static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
+static unsigned texture_offset(struct radeon_surf *surface, unsigned layer,
+ enum ruvd_surface_type type)
{
- return surface->level[0].offset +
- layer * surface->level[0].slice_size;
+ switch (type) {
+ default:
+ case RUVD_SURFACE_TYPE_LEGACY:
+ return surface->u.legacy.level[0].offset +
+ layer * surface->u.legacy.level[0].slice_size;
+ break;
+ case RUVD_SURFACE_TYPE_GFX9:
+ return surface->u.gfx9.surf_offset +
+ layer * surface->u.gfx9.surf_slice_size;
+ break;
+ }
}
/* hw encode the aspect of macro tiles */
@@ -1351,42 +1392,63 @@ static unsigned bank_wh(unsigned bankwh)
* fill decoding target field from the luma and chroma surfaces
*/
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
- struct radeon_surf *chroma)
+ struct radeon_surf *chroma, enum ruvd_surface_type type)
{
- msg->body.decode.dt_pitch = luma->level[0].pitch_bytes;
- switch (luma->level[0].mode) {
- case RADEON_SURF_MODE_LINEAR_ALIGNED:
- msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
- msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
- break;
- case RADEON_SURF_MODE_1D:
- msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
- msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
- break;
- case RADEON_SURF_MODE_2D:
- msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
- msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
- break;
+ switch (type) {
default:
- assert(0);
- break;
- }
+ case RUVD_SURFACE_TYPE_LEGACY:
+ msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x;
+ switch (luma->u.legacy.level[0].mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
+ break;
+ case RADEON_SURF_MODE_1D:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
+ break;
+ case RADEON_SURF_MODE_2D:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
+ break;
+ default:
+ assert(0);
+ break;
+ }
- msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
- msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
- if (msg->body.decode.dt_field_mode) {
- msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
- msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
- } else {
- msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
- msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
- }
+ msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);
+ msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);
+ if (msg->body.decode.dt_field_mode) {
+ msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);
+ msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);
+ } else {
+ msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
+ msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
+ }
- assert(luma->bankw == chroma->bankw);
- assert(luma->bankh == chroma->bankh);
- assert(luma->mtilea == chroma->mtilea);
+ assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
+ assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
+ assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
- msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->bankw));
- msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->bankh));
- msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->mtilea));
+ msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));
+ msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));
+ msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));
+ break;
+ case RUVD_SURFACE_TYPE_GFX9:
+ msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->bpe;
+ /* SWIZZLE LINEAR MODE */
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
+ msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);
+ msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);
+ if (msg->body.decode.dt_field_mode) {
+ msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);
+ msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);
+ } else {
+ msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
+ msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
+ }
+ msg->body.decode.dt_surf_tile_config = 0;
+ break;
+ }
}
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
index e3f8504d8..0c3797e22 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h
@@ -56,6 +56,11 @@
#define RUVD_GPCOM_VCPU_DATA1 0xEF14
#define RUVD_ENGINE_CNTL 0xEF18
+#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c
+#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710
+#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714
+#define RUVD_ENGINE_CNTL_SOC15 0x20718
+
/* UVD commands to VCPU */
#define RUVD_CMD_MSG_BUFFER 0x00000000
#define RUVD_CMD_DPB_BUFFER 0x00000001
@@ -111,6 +116,11 @@
#define RUVD_VC1_PROFILE_MAIN 0x00000001
#define RUVD_VC1_PROFILE_ADVANCED 0x00000002
+enum ruvd_surface_type {
+ RUVD_SURFACE_TYPE_LEGACY = 0,
+ RUVD_SURFACE_TYPE_GFX9
+};
+
struct ruvd_mvc_element {
uint16_t viewOrderIndex;
uint16_t viewId;
@@ -432,5 +442,5 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
/* fill decoding target field from the luma and chroma surfaces */
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
- struct radeon_surf *chroma);
+ struct radeon_surf *chroma, enum ruvd_surface_type type);
#endif
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
index ef93e46c1..70c1e60f5 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c
@@ -52,6 +52,7 @@
#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
+#define FW_53_19_4 ((53 << 24) | (19 << 16) | (4 << 8))
/**
* flush commands to the hardware
@@ -178,14 +179,15 @@ static unsigned get_cpb_num(struct rvce_encoder *enc)
case 41:
dpb = 32768;
break;
- default:
case 42:
dpb = 34816;
break;
case 50:
dpb = 110400;
break;
+ default:
case 51:
+ case 52:
dpb = 184320;
break;
}
@@ -223,9 +225,17 @@ struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset)
{
- unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
- unsigned vpitch = align(enc->luma->npix_y, 16);
- unsigned fsize = pitch * (vpitch + vpitch / 2);
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
+ unsigned pitch, vpitch, fsize;
+
+ if (rscreen->chip_class < GFX9) {
+ pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
+ vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
+ } else {
+ pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);
+ vpitch = align(enc->luma->u.gfx9.surf_height, 16);
+ }
+ fsize = pitch * (vpitch + vpitch / 2);
*luma_offset = slot->index * fsize;
*chroma_offset = *luma_offset + pitch * vpitch;
@@ -412,7 +422,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
enc->use_vui = true;
if (rscreen->info.family >= CHIP_TONGA &&
rscreen->info.family != CHIP_STONEY &&
- rscreen->info.family != CHIP_POLARIS11)
+ rscreen->info.family != CHIP_POLARIS11 &&
+ rscreen->info.family != CHIP_POLARIS12)
enc->dual_pipe = true;
/* TODO enable B frame with dual instance */
if ((rscreen->info.family >= CHIP_TONGA) &&
@@ -454,8 +465,14 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
goto error;
get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
- cpb_size = align(tmp_surf->level[0].pitch_bytes, 128);
- cpb_size = cpb_size * align(tmp_surf->npix_y, 32);
+
+ cpb_size = (rscreen->chip_class < GFX9) ?
+ align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+ align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
+
+ align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
+ align(tmp_surf->u.gfx9.surf_height, 32);
+
cpb_size = cpb_size * 3 / 2;
cpb_size = cpb_size * enc->cpb_num;
if (enc->dual_pipe)
@@ -493,6 +510,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
radeon_vce_52_init(enc);
get_pic_param = radeon_vce_52_get_param;
break;
+ case FW_53_19_4:
+ radeon_vce_52_init(enc);
+ get_pic_param = radeon_vce_52_get_param;
+ break;
default:
goto error;
@@ -525,6 +546,7 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
case FW_52_0_3:
case FW_52_4_3:
case FW_52_8_3:
+ case FW_53_19_4:
return true;
default:
return false;
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
index fe15ded39..b9afd089a 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -94,9 +94,9 @@ static void create(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // encPicStructRestriction
RVCE_CS(enc->base.width); // encImageWidth
RVCE_CS(enc->base.height); // encImageHeight
- RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch
- RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch
- RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw
+ RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch
+ RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch
+ RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw
RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO
RVCE_END();
}
@@ -320,12 +320,12 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // endOfSequence
RVCE_CS(0x00000000); // endOfStream
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
- enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+ enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
- enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
- RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
- RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
- RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+ enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo
+ RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch
+ RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch
+ RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch
RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode
RVCE_CS(0x00000000); // encInputPicTileConfig
RVCE_CS(enc->pic.picture_type); // encPicType
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c
index 262e13ba9..0d1181451 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c
@@ -127,12 +127,12 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // endOfSequence
RVCE_CS(0x00000000); // endOfStream
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
- enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+ enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
- enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
- RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
- RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
- RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+ enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo
+ RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch
+ RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch
+ RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch
if (enc->dual_pipe)
RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
else
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c
index 5db01fe52..36cf48047 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -167,6 +167,7 @@ void radeon_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_pict
static void create(struct rvce_encoder *enc)
{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
enc->task_info(enc, 0x00000000, 0, 0, 0);
RVCE_BEGIN(0x01000001); // create cmd
@@ -177,9 +178,17 @@ static void create(struct rvce_encoder *enc)
RVCE_CS(enc->enc_pic.ec.enc_pic_struct_restriction);
RVCE_CS(enc->base.width); // encImageWidth
RVCE_CS(enc->base.height); // encImageHeight
- RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch
- RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch
- RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw
+
+ if (rscreen->chip_class < GFX9) {
+ RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch
+ RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch
+ RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw
+ } else {
+ RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encRefPicLumaPitch
+ RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encRefPicChromaPitch
+ RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16) / 8); // encRefYHeightInQw
+ }
+
RVCE_CS(enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants);
RVCE_CS(enc->enc_pic.ec.enc_pre_encode_context_buffer_offset);
@@ -191,6 +200,7 @@ static void create(struct rvce_encoder *enc)
static void encode(struct rvce_encoder *enc)
{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
signed luma_offset, chroma_offset, bs_offset;
unsigned dep, bs_idx = enc->bs_idx++;
int i;
@@ -239,13 +249,25 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(enc->enc_pic.eo.insert_aud);
RVCE_CS(enc->enc_pic.eo.end_of_sequence);
RVCE_CS(enc->enc_pic.eo.end_of_stream);
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
- enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
- enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
- RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
- RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
- RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+
+ if (rscreen->chip_class < GFX9) {
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo
+ RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch
+ RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch
+ RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch
+ } else {
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.gfx9.surf_offset); // inputPictureLumaAddressHi/Lo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.gfx9.surf_offset); // inputPictureChromaAddressHi/Lo
+ RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16)); // encInputFrameYPitch
+ RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encInputPicLumaPitch
+ RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encInputPicChromaPitch
+ }
+
if (enc->dual_pipe)
enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00000000;
else
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_video.c b/lib/mesa/src/gallium/drivers/radeon/radeon_video.c
index de8e11cd8..c7ad7f7a3 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_video.c
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_video.c
@@ -72,7 +72,7 @@ bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
* non-sub-allocated buffer.
*/
buffer->res = (struct r600_resource *)
- pipe_buffer_create(screen, PIPE_BIND_CUSTOM | PIPE_BIND_SHARED,
+ pipe_buffer_create(screen, PIPE_BIND_SHARED,
usage, size);
return buffer->res != NULL;
@@ -129,8 +129,8 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)context;
- rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
- 0, R600_COHERENCY_NONE);
+ rctx->dma_clear_buffer(context, &buffer->res->b.b, 0,
+ buffer->res->buf->size, 0);
context->flush(context, NULL, 0);
}
@@ -138,26 +138,31 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
* join surfaces into the same buffer with identical tiling params
* sumup their sizes and replace the backend buffers with a single bo
*/
-void rvid_join_surfaces(struct radeon_winsys* ws,
+void rvid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
{
+ struct radeon_winsys* ws;
unsigned best_tiling, best_wh, off;
unsigned size, alignment;
struct pb_buffer *pb;
unsigned i, j;
+ ws = rctx->ws;
+
for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
unsigned wh;
if (!surfaces[i])
continue;
- /* choose the smallest bank w/h for now */
- wh = surfaces[i]->bankw * surfaces[i]->bankh;
- if (wh < best_wh) {
- best_wh = wh;
- best_tiling = i;
+ if (rctx->chip_class < GFX9) {
+ /* choose the smallest bank w/h for now */
+ wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
+ if (wh < best_wh) {
+ best_wh = wh;
+ best_tiling = i;
+ }
}
}
@@ -165,17 +170,22 @@ void rvid_join_surfaces(struct radeon_winsys* ws,
if (!surfaces[i])
continue;
- /* copy the tiling parameters */
- surfaces[i]->bankw = surfaces[best_tiling]->bankw;
- surfaces[i]->bankh = surfaces[best_tiling]->bankh;
- surfaces[i]->mtilea = surfaces[best_tiling]->mtilea;
- surfaces[i]->tile_split = surfaces[best_tiling]->tile_split;
-
/* adjust the texture layer offsets */
- off = align(off, surfaces[i]->bo_alignment);
- for (j = 0; j < ARRAY_SIZE(surfaces[i]->level); ++j)
- surfaces[i]->level[j].offset += off;
- off += surfaces[i]->bo_size;
+ off = align(off, surfaces[i]->surf_alignment);
+
+ if (rctx->chip_class < GFX9) {
+ /* copy the tiling parameters */
+ surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
+ surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
+ surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea;
+ surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split;
+
+ for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
+ surfaces[i]->u.legacy.level[j].offset += off;
+ } else
+ surfaces[i]->u.gfx9.surf_offset += off;
+
+ off += surfaces[i]->surf_size;
}
for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
@@ -279,7 +289,11 @@ int rvid_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return (rscreen->family < CHIP_TONGA) ? 1152 : 4096;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
- return PIPE_FORMAT_NV12;
+ if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ return PIPE_FORMAT_P016;
+ else
+ return PIPE_FORMAT_NV12;
+
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
if (rscreen->family < CHIP_PALM) {
@@ -331,6 +345,11 @@ boolean rvid_is_format_supported(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint)
{
+ /* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
+ if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ return (format == PIPE_FORMAT_NV12) ||
+ (format == PIPE_FORMAT_P016);
+
/* we can only handle this one with UVD */
if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
return format == PIPE_FORMAT_NV12;
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_video.h b/lib/mesa/src/gallium/drivers/radeon/radeon_video.h
index 39305b4fd..3347c4ebc 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_video.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_video.h
@@ -66,7 +66,7 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
/* join surfaces into the same buffer with identical tiling params
sumup their sizes and replace the backend buffers with a single bo */
-void rvid_join_surfaces(struct radeon_winsys* ws,
+void rvid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h b/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h
index 8946209d3..2e287c67e 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h
@@ -52,7 +52,8 @@ enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_GTT_WC = (1 << 0),
RADEON_FLAG_CPU_ACCESS = (1 << 1),
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
- RADEON_FLAG_HANDLE = (1 << 3), /* the buffer most not be suballocated */
+ RADEON_FLAG_HANDLE = (1 << 3), /* the buffer must not be suballocated */
+ RADEON_FLAG_SPARSE = (1 << 4),
};
enum radeon_bo_usage { /* bitfield */
@@ -66,6 +67,8 @@ enum radeon_bo_usage { /* bitfield */
RADEON_USAGE_SYNCHRONIZED = 8
};
+#define RADEON_SPARSE_PAGE_SIZE (64 * 1024)
+
enum ring_type {
RING_GFX = 0,
RING_COMPUTE,
@@ -81,16 +84,20 @@ enum radeon_value_id {
RADEON_MAPPED_VRAM,
RADEON_MAPPED_GTT,
RADEON_BUFFER_WAIT_TIME_NS,
+ RADEON_NUM_MAPPED_BUFFERS,
RADEON_TIMESTAMP,
- RADEON_NUM_CS_FLUSHES,
+ RADEON_NUM_GFX_IBS,
+ RADEON_NUM_SDMA_IBS,
RADEON_NUM_BYTES_MOVED,
RADEON_NUM_EVICTIONS,
RADEON_VRAM_USAGE,
+ RADEON_VRAM_VIS_USAGE,
RADEON_GTT_USAGE,
RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
RADEON_CURRENT_SCLK,
RADEON_CURRENT_MCLK,
RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
+ RADEON_CS_THREAD_TIME,
};
/* Each group of four has the same priority. */
@@ -182,6 +189,7 @@ struct radeon_info {
uint32_t gart_page_size;
uint64_t gart_size;
uint64_t vram_size;
+ uint64_t vram_vis_size;
uint64_t max_alloc_size;
uint32_t min_alloc_size;
bool has_dedicated_vram;
@@ -196,6 +204,7 @@ struct radeon_info {
uint32_t ce_fw_version;
uint32_t vce_harvest_config;
uint32_t clock_crystal_freq;
+ uint32_t tcc_cache_line_size;
/* Kernel info. */
uint32_t drm_major; /* version */
@@ -231,16 +240,25 @@ struct radeon_bo_metadata {
/* Tiling flags describing the texture layout for display code
* and DRI sharing.
*/
- enum radeon_bo_layout microtile;
- enum radeon_bo_layout macrotile;
- unsigned pipe_config;
- unsigned bankw;
- unsigned bankh;
- unsigned tile_split;
- unsigned mtilea;
- unsigned num_banks;
- unsigned stride;
- bool scanout;
+ union {
+ struct {
+ enum radeon_bo_layout microtile;
+ enum radeon_bo_layout macrotile;
+ unsigned pipe_config;
+ unsigned bankw;
+ unsigned bankh;
+ unsigned tile_split;
+ unsigned mtilea;
+ unsigned num_banks;
+ unsigned stride;
+ bool scanout;
+ } legacy;
+
+ struct {
+ /* surface flags */
+ unsigned swizzle_mode:5;
+ } gfx9;
+ } u;
/* Additional metadata associated with the buffer, in bytes.
* The maximum size is 64 * 4. This is opaque for the winsys & kernel.
@@ -255,99 +273,151 @@ enum radeon_feature_id {
RADEON_FID_R300_CMASK_ACCESS,
};
-#define RADEON_SURF_MAX_LEVEL 32
-
-#define RADEON_SURF_TYPE_MASK 0xFF
-#define RADEON_SURF_TYPE_SHIFT 0
-#define RADEON_SURF_TYPE_1D 0
-#define RADEON_SURF_TYPE_2D 1
-#define RADEON_SURF_TYPE_3D 2
-#define RADEON_SURF_TYPE_CUBEMAP 3
-#define RADEON_SURF_TYPE_1D_ARRAY 4
-#define RADEON_SURF_TYPE_2D_ARRAY 5
-#define RADEON_SURF_MODE_MASK 0xFF
-#define RADEON_SURF_MODE_SHIFT 8
-#define RADEON_SURF_MODE_LINEAR_ALIGNED 1
-#define RADEON_SURF_MODE_1D 2
-#define RADEON_SURF_MODE_2D 3
+#define RADEON_SURF_MAX_LEVELS 15
+
+enum radeon_surf_mode {
+ RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
+ RADEON_SURF_MODE_1D = 2,
+ RADEON_SURF_MODE_2D = 3,
+};
+
+/* These are defined exactly like GB_TILE_MODEn.MICRO_TILE_MODE_NEW. */
+enum radeon_micro_mode {
+ RADEON_MICRO_MODE_DISPLAY = 0,
+ RADEON_MICRO_MODE_THIN = 1,
+ RADEON_MICRO_MODE_DEPTH = 2,
+ RADEON_MICRO_MODE_ROTATED = 3,
+};
+
+/* the first 16 bits are reserved for libdrm_radeon, don't use them */
#define RADEON_SURF_SCANOUT (1 << 16)
#define RADEON_SURF_ZBUFFER (1 << 17)
#define RADEON_SURF_SBUFFER (1 << 18)
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
-#define RADEON_SURF_HAS_SBUFFER_MIPTREE (1 << 19)
-#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
+/* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
#define RADEON_SURF_FMASK (1 << 21)
#define RADEON_SURF_DISABLE_DCC (1 << 22)
#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
+#define RADEON_SURF_IMPORTED (1 << 24)
+#define RADEON_SURF_OPTIMIZE_FOR_SPACE (1 << 25)
-#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
-#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
-#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
-
-struct radeon_surf_level {
+struct legacy_surf_level {
uint64_t offset;
uint64_t slice_size;
- uint32_t npix_x;
- uint32_t npix_y;
- uint32_t npix_z;
- uint32_t nblk_x;
- uint32_t nblk_y;
- uint32_t nblk_z;
- uint32_t pitch_bytes;
- uint32_t mode;
uint64_t dcc_offset;
uint64_t dcc_fast_clear_size;
- bool dcc_enabled;
+ uint16_t nblk_x;
+ uint16_t nblk_y;
+ enum radeon_surf_mode mode;
};
-struct radeon_surf {
- /* These are inputs to the calculator. */
- uint32_t npix_x;
- uint32_t npix_y;
- uint32_t npix_z;
- uint32_t blk_w;
- uint32_t blk_h;
- uint32_t blk_d;
- uint32_t array_size;
- uint32_t last_level;
- uint32_t bpe;
- uint32_t nsamples;
- uint32_t flags;
-
- /* These are return values. Some of them can be set by the caller, but
- * they will be treated as hints (e.g. bankw, bankh) and might be
- * changed by the calculator.
- */
- uint64_t bo_size;
- uint64_t bo_alignment;
- /* This applies to EG and later. */
- uint32_t bankw;
- uint32_t bankh;
- uint32_t mtilea;
- uint32_t tile_split;
- uint32_t stencil_tile_split;
- struct radeon_surf_level level[RADEON_SURF_MAX_LEVEL];
- struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL];
- uint32_t tiling_index[RADEON_SURF_MAX_LEVEL];
- uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
- uint32_t pipe_config;
- uint32_t num_banks;
- uint32_t macro_tile_index;
- uint32_t micro_tile_mode; /* displayable, thin, depth, rotated */
+struct legacy_surf_layout {
+ unsigned bankw:4; /* max 8 */
+ unsigned bankh:4; /* max 8 */
+ unsigned mtilea:4; /* max 8 */
+ unsigned tile_split:13; /* max 4K */
+ unsigned stencil_tile_split:13; /* max 4K */
+ unsigned pipe_config:5; /* max 17 */
+ unsigned num_banks:5; /* max 16 */
+ unsigned macro_tile_index:4; /* max 15 */
/* Whether the depth miptree or stencil miptree as used by the DB are
* adjusted from their TC compatible form to ensure depth/stencil
* compatibility. If either is true, the corresponding plane cannot be
* sampled from.
*/
- bool depth_adjusted;
- bool stencil_adjusted;
+ unsigned depth_adjusted:1;
+ unsigned stencil_adjusted:1;
+
+ struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
+ struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
+ uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
+ uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
+};
+
+/* Same as addrlib - AddrResourceType. */
+enum gfx9_resource_type {
+ RADEON_RESOURCE_1D = 0,
+ RADEON_RESOURCE_2D,
+ RADEON_RESOURCE_3D,
+};
+
+struct gfx9_surf_flags {
+ uint16_t swizzle_mode; /* tile mode */
+ uint16_t epitch; /* (pitch - 1) or (height - 1) */
+};
+
+struct gfx9_surf_meta_flags {
+ unsigned rb_aligned:1; /* optimal for RBs */
+ unsigned pipe_aligned:1; /* optimal for TC */
+};
+
+struct gfx9_surf_layout {
+ struct gfx9_surf_flags surf; /* color or depth surface */
+ struct gfx9_surf_flags fmask; /* not added to surf_size */
+ struct gfx9_surf_flags stencil; /* added to surf_size, use stencil_offset */
+
+ struct gfx9_surf_meta_flags dcc; /* metadata of color */
+ struct gfx9_surf_meta_flags htile; /* metadata of depth and stencil */
+ struct gfx9_surf_meta_flags cmask; /* metadata of fmask */
+
+ enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
+ uint64_t surf_offset; /* 0 unless imported with an offset */
+ /* The size of the 2D plane containing all mipmap levels. */
+ uint64_t surf_slice_size;
+ uint16_t surf_pitch; /* in blocks */
+ uint16_t surf_height;
+ /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
+ uint32_t offset[RADEON_SURF_MAX_LEVELS];
+ uint16_t dcc_pitch_max; /* (mip chain pitch - 1) */
+
+ uint64_t stencil_offset; /* separate stencil */
+ uint64_t fmask_size;
+ uint64_t cmask_size;
+
+ uint32_t fmask_alignment;
+ uint32_t cmask_alignment;
+};
+
+struct radeon_surf {
+ /* Format properties. */
+ unsigned blk_w:4;
+ unsigned blk_h:4;
+ unsigned bpe:5;
+ /* Number of mipmap levels where DCC is enabled starting from level 0.
+ * Non-zero levels may be disabled due to alignment constraints, but not
+ * the first level.
+ */
+ unsigned num_dcc_levels:4;
+ unsigned is_linear:1;
+ /* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
+ unsigned micro_tile_mode:3;
+ uint32_t flags;
+
+ /* These are return values. Some of them can be set by the caller, but
+ * they will be treated as hints (e.g. bankw, bankh) and might be
+ * changed by the calculator.
+ */
+ uint64_t surf_size;
uint64_t dcc_size;
- uint64_t dcc_alignment;
- /* TC-compatible HTILE only. */
uint64_t htile_size;
- uint64_t htile_alignment;
+
+ uint32_t surf_alignment;
+ uint32_t dcc_alignment;
+ uint32_t htile_alignment;
+
+ union {
+ /* R600-VI return values.
+ *
+ * Some of them can be set by the caller if certain parameters are
+ * desirable. The allocator will try to obey them.
+ */
+ struct legacy_surf_layout legacy;
+
+ /* GFX9+ return values. */
+ struct gfx9_surf_layout gfx9;
+ } u;
};
struct radeon_bo_list_item {
@@ -508,6 +578,20 @@ struct radeon_winsys {
struct winsys_handle *whandle);
/**
+ * Change the commitment of a (64KB-page aligned) region of the given
+ * sparse buffer.
+ *
+ * \warning There is no automatic synchronization with command submission.
+ *
+ * \note Only implemented by the amdgpu winsys.
+ *
+ * \return false on out of memory or other failure, true on success.
+ */
+ bool (*buffer_commit)(struct pb_buffer *buf,
+ uint64_t offset, uint64_t size,
+ bool commit);
+
+ /**
* Return the virtual address of a buffer.
*
* When virtual memory is not in use, this is the offset relative to the
@@ -739,18 +823,16 @@ struct radeon_winsys {
* Initialize surface
*
* \param ws The winsys this function is called from.
- * \param surf Surface structure ptr
+ * \param tex Input texture description
+ * \param flags Bitmask of RADEON_SURF_* flags
+ * \param bpe Bytes per pixel, it can be different for Z buffers.
+ * \param mode Preferred tile mode. (linear, 1D, or 2D)
+ * \param surf Output structure
*/
int (*surface_init)(struct radeon_winsys *ws,
- struct radeon_surf *surf);
-
- /**
- * Find best values for a surface
- *
- * \param ws The winsys this function is called from.
- * \param surf Surface structure ptr
- */
- int (*surface_best)(struct radeon_winsys *ws,
+ const struct pipe_resource *tex,
+ unsigned flags, unsigned bpe,
+ enum radeon_surf_mode mode,
struct radeon_surf *surf);
uint64_t (*query_value)(struct radeon_winsys *ws,