summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/winsys/amdgpu
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2016-05-29 10:22:51 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2016-05-29 10:22:51 +0000
commitc9223eed3c16cd3e98a8f56dda953d8f299de0e3 (patch)
tree53e2a1c3f13bcf6b4ed201d7bc135e7213c94ebe /lib/mesa/src/gallium/winsys/amdgpu
parent6e8f2d062ab9c198239b9283b2b7ed12f4ea17d8 (diff)
Import Mesa 11.2.2
Diffstat (limited to 'lib/mesa/src/gallium/winsys/amdgpu')
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp45
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h2
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp6
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h3
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c291
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h18
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c164
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h11
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c52
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c97
-rw-r--r--lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h10
11 files changed, 372 insertions, 327 deletions
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp
index 7393953c1..570216241 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp
@@ -896,6 +896,49 @@ BOOL_32 CIAddrLib::HwlOverrideTileMode(
/**
***************************************************************************************************
+* CiAddrLib::GetPrtSwitchP4Threshold
+*
+* @brief
+* Return the threshold of switching to P4_* instead of P16_* for PRT resources
+***************************************************************************************************
+*/
+UINT_32 CIAddrLib::GetPrtSwitchP4Threshold() const
+{
+ UINT_32 threshold;
+
+ switch (m_pipes)
+ {
+ case 8:
+ threshold = 32;
+ break;
+ case 16:
+ if (m_settings.isFiji)
+ {
+ threshold = 16;
+ }
+ else if (m_settings.isHawaii)
+ {
+ threshold = 8;
+ }
+ else
+ {
+ ///@todo add for possible new ASICs.
+ ADDR_ASSERT_ALWAYS();
+ threshold = 16;
+ }
+ break;
+ default:
+ ///@todo add for possible new ASICs.
+ ADDR_ASSERT_ALWAYS();
+ threshold = 32;
+ break;
+ }
+
+ return threshold;
+}
+
+/**
+***************************************************************************************************
* CIAddrLib::HwlSetupTileInfo
*
* @brief
@@ -1123,7 +1166,7 @@ VOID CIAddrLib::HwlSetupTileInfo(
{
UINT_32 bytesXSamples = bpp * numSamples / 8;
UINT_32 bytesXThickness = bpp * thickness / 8;
- UINT_32 switchP4Threshold = (m_pipes == 16) ? 8 : 32;
+ UINT_32 switchP4Threshold = GetPrtSwitchP4Threshold();
if ((bytesXSamples > switchP4Threshold) || (bytesXThickness > switchP4Threshold))
{
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h
index 451508619..4cbe9706b 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h
@@ -167,6 +167,8 @@ private:
VOID ReadGbMacroTileCfg(
UINT_32 regValue, ADDR_TILEINFO* pCfg) const;
+ UINT_32 GetPrtSwitchP4Threshold() const;
+
BOOL_32 InitTileSettingTable(
const UINT_32 *pSetting, UINT_32 noOfEntries);
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp
index b1e008b83..088b64593 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp
@@ -352,6 +352,7 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMicroTiled(
ComputeSurfaceAlignmentsMicroTiled(expTileMode,
pIn->bpp,
pIn->flags,
+ pIn->mipLevel,
numSamples,
&pOut->baseAlign,
&pOut->pitchAlign,
@@ -647,6 +648,7 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled(
AddrTileMode tileMode, ///< [in] tile mode
UINT_32 bpp, ///< [in] bits per pixel
ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 mipLevel, ///< [in] mip level
UINT_32 numSamples, ///< [in] number of samples
UINT_32* pBaseAlign, ///< [out] base address alignment in bytes
UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels
@@ -669,10 +671,10 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled(
// ECR#393489
// Workaround 2 for 1D tiling - There is HW bug for Carrizo
// where it requires the following alignments for 1D tiling.
- if (flags.czDispCompatible)
+ if (flags.czDispCompatible && (mipLevel == 0))
{
*pBaseAlign = PowTwoAlign(*pBaseAlign, 4096); //Base address MOD 4096 = 0
- *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 >> (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
+ *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
}
// end Carrizo workaround for 1D tilling
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h
index 84adb66ee..25e38964b 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h
@@ -315,7 +315,8 @@ private:
UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
BOOL_32 ComputeSurfaceAlignmentsMicroTiled(
- AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+ UINT_32 mipLevel, UINT_32 numSamples,
UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
BOOL_32 ComputeSurfaceAlignmentsMacroTiled(
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index fe55dc310..59a801b14 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -37,47 +37,16 @@
#include <xf86drm.h>
#include <stdio.h>
-static const struct pb_vtbl amdgpu_winsys_bo_vtbl;
-
static inline struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo)
{
- assert(bo->vtbl == &amdgpu_winsys_bo_vtbl);
return (struct amdgpu_winsys_bo *)bo;
}
-struct amdgpu_bomgr {
- struct pb_manager base;
- struct amdgpu_winsys *rws;
-};
-
-static struct amdgpu_winsys *get_winsys(struct pb_manager *mgr)
-{
- return ((struct amdgpu_bomgr*)mgr)->rws;
-}
-
-static struct amdgpu_winsys_bo *get_amdgpu_winsys_bo(struct pb_buffer *_buf)
-{
- struct amdgpu_winsys_bo *bo = NULL;
-
- if (_buf->vtbl == &amdgpu_winsys_bo_vtbl) {
- bo = amdgpu_winsys_bo(_buf);
- } else {
- struct pb_buffer *base_buf;
- pb_size offset;
- pb_get_base_buffer(_buf, &base_buf, &offset);
-
- if (base_buf->vtbl == &amdgpu_winsys_bo_vtbl)
- bo = amdgpu_winsys_bo(base_buf);
- }
-
- return bo;
-}
-
static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
enum radeon_bo_usage usage)
{
- struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
- struct amdgpu_winsys *ws = bo->rws;
+ struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
+ struct amdgpu_winsys *ws = bo->ws;
int i;
if (bo->is_shared) {
@@ -149,16 +118,21 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
}
static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
- struct radeon_winsys_cs_handle *buf)
+ struct pb_buffer *buf)
{
return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
}
-static void amdgpu_bo_destroy(struct pb_buffer *_buf)
+void amdgpu_bo_destroy(struct pb_buffer *_buf)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
int i;
+ pipe_mutex_lock(bo->ws->global_bo_list_lock);
+ LIST_DEL(&bo->global_list_item);
+ bo->ws->num_buffers--;
+ pipe_mutex_unlock(bo->ws->global_bo_list_lock);
+
amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(bo->va_handle);
amdgpu_bo_free(bo->bo);
@@ -167,13 +141,23 @@ static void amdgpu_bo_destroy(struct pb_buffer *_buf)
amdgpu_fence_reference(&bo->fence[i], NULL);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- bo->rws->allocated_vram -= align(bo->base.size, bo->rws->gart_page_size);
+ bo->ws->allocated_vram -= align(bo->base.size, bo->ws->gart_page_size);
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- bo->rws->allocated_gtt -= align(bo->base.size, bo->rws->gart_page_size);
+ bo->ws->allocated_gtt -= align(bo->base.size, bo->ws->gart_page_size);
FREE(bo);
}
-static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
+static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
+{
+ struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
+
+ if (bo->use_reusable_pool)
+ pb_cache_add_buffer(&bo->cache_entry);
+ else
+ amdgpu_bo_destroy(_buf);
+}
+
+static void *amdgpu_bo_map(struct pb_buffer *buf,
struct radeon_winsys_cs *rcs,
enum pipe_transfer_usage usage)
{
@@ -241,7 +225,7 @@ static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
RADEON_USAGE_READWRITE);
}
- bo->rws->buffer_wait_time += os_time_get_nano() - time;
+ bo->ws->buffer_wait_time += os_time_get_nano() - time;
}
}
@@ -250,52 +234,43 @@ static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
return bo->user_ptr;
r = amdgpu_bo_cpu_map(bo->bo, &cpu);
+ if (r) {
+ /* Clear the cache and try again. */
+ pb_cache_release_all_buffers(&bo->ws->bo_cache);
+ r = amdgpu_bo_cpu_map(bo->bo, &cpu);
+ }
return r ? NULL : cpu;
}
-static void amdgpu_bo_unmap(struct radeon_winsys_cs_handle *buf)
+static void amdgpu_bo_unmap(struct pb_buffer *buf)
{
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
amdgpu_bo_cpu_unmap(bo->bo);
}
-static void amdgpu_bo_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- *base_buf = buf;
- *offset = 0;
-}
+static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
+ amdgpu_bo_destroy_or_cache
+ /* other functions are never called */
+};
-static enum pipe_error amdgpu_bo_validate(struct pb_buffer *_buf,
- struct pb_validate *vl,
- unsigned flags)
+static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
{
- /* Always pinned */
- return PIPE_OK;
-}
+ struct amdgpu_winsys *ws = bo->ws;
-static void amdgpu_bo_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence)
-{
+ pipe_mutex_lock(ws->global_bo_list_lock);
+ LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list);
+ ws->num_buffers++;
+ pipe_mutex_unlock(ws->global_bo_list_lock);
}
-static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
- amdgpu_bo_destroy,
- NULL, /* never called */
- NULL, /* never called */
- amdgpu_bo_validate,
- amdgpu_bo_fence,
- amdgpu_bo_get_base_buffer,
-};
-
-static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
- pb_size size,
- const struct pb_desc *desc)
+static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
+ unsigned size,
+ unsigned alignment,
+ unsigned usage,
+ enum radeon_bo_domain initial_domain,
+ unsigned flags)
{
- struct amdgpu_winsys *rws = get_winsys(_mgr);
- struct amdgpu_bo_desc *rdesc = (struct amdgpu_bo_desc*)desc;
struct amdgpu_bo_alloc_request request = {0};
amdgpu_bo_handle buf_handle;
uint64_t va = 0;
@@ -303,37 +278,39 @@ static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
amdgpu_va_handle va_handle;
int r;
- assert(rdesc->initial_domain & RADEON_DOMAIN_VRAM_GTT);
+ assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
bo = CALLOC_STRUCT(amdgpu_winsys_bo);
if (!bo) {
return NULL;
}
+ pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base);
request.alloc_size = size;
- request.phys_alignment = desc->alignment;
+ request.phys_alignment = alignment;
- if (rdesc->initial_domain & RADEON_DOMAIN_VRAM) {
+ if (initial_domain & RADEON_DOMAIN_VRAM)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
- if (rdesc->flags & RADEON_FLAG_CPU_ACCESS)
- request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- }
- if (rdesc->initial_domain & RADEON_DOMAIN_GTT) {
+ if (initial_domain & RADEON_DOMAIN_GTT)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
- if (rdesc->flags & RADEON_FLAG_GTT_WC)
- request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- }
- r = amdgpu_bo_alloc(rws->dev, &request, &buf_handle);
+ if (flags & RADEON_FLAG_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ if (flags & RADEON_FLAG_GTT_WC)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+ r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
fprintf(stderr, "amdgpu: size : %d bytes\n", size);
- fprintf(stderr, "amdgpu: alignment : %d bytes\n", desc->alignment);
- fprintf(stderr, "amdgpu: domains : %d\n", rdesc->initial_domain);
+ fprintf(stderr, "amdgpu: alignment : %d bytes\n", alignment);
+ fprintf(stderr, "amdgpu: domains : %d\n", initial_domain);
goto error_bo_alloc;
}
- r = amdgpu_va_range_alloc(rws->dev, amdgpu_gpu_va_range_general,
- size, desc->alignment, 0, &va, &va_handle, 0);
+ r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+ size, alignment, 0, &va, &va_handle, 0);
if (r)
goto error_va_alloc;
@@ -342,23 +319,25 @@ static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
goto error_va_map;
pipe_reference_init(&bo->base.reference, 1);
- bo->base.alignment = desc->alignment;
- bo->base.usage = desc->usage;
+ bo->base.alignment = alignment;
+ bo->base.usage = usage;
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
- bo->rws = rws;
+ bo->ws = ws;
bo->bo = buf_handle;
bo->va = va;
bo->va_handle = va_handle;
- bo->initial_domain = rdesc->initial_domain;
- bo->unique_id = __sync_fetch_and_add(&rws->next_bo_unique_id, 1);
+ bo->initial_domain = initial_domain;
+ bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
- if (rdesc->initial_domain & RADEON_DOMAIN_VRAM)
- rws->allocated_vram += align(size, rws->gart_page_size);
- else if (rdesc->initial_domain & RADEON_DOMAIN_GTT)
- rws->allocated_gtt += align(size, rws->gart_page_size);
+ if (initial_domain & RADEON_DOMAIN_VRAM)
+ ws->allocated_vram += align(size, ws->gart_page_size);
+ else if (initial_domain & RADEON_DOMAIN_GTT)
+ ws->allocated_gtt += align(size, ws->gart_page_size);
- return &bo->base;
+ amdgpu_add_buffer_to_global_list(bo);
+
+ return bo;
error_va_map:
amdgpu_va_range_free(va_handle);
@@ -371,48 +350,15 @@ error_bo_alloc:
return NULL;
}
-static void amdgpu_bomgr_flush(struct pb_manager *mgr)
-{
- /* NOP */
-}
-
-/* This is for the cache bufmgr. */
-static boolean amdgpu_bomgr_is_buffer_busy(struct pb_manager *_mgr,
- struct pb_buffer *_buf)
+bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
- return TRUE;
- }
-
- if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) {
- return TRUE;
+ return false;
}
- return FALSE;
-}
-
-static void amdgpu_bomgr_destroy(struct pb_manager *mgr)
-{
- FREE(mgr);
-}
-
-struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws)
-{
- struct amdgpu_bomgr *mgr;
-
- mgr = CALLOC_STRUCT(amdgpu_bomgr);
- if (!mgr)
- return NULL;
-
- mgr->base.destroy = amdgpu_bomgr_destroy;
- mgr->base.create_buffer = amdgpu_bomgr_create_bo;
- mgr->base.flush = amdgpu_bomgr_flush;
- mgr->base.is_buffer_busy = amdgpu_bomgr_is_buffer_busy;
-
- mgr->rws = rws;
- return &mgr->base;
+ return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
}
static unsigned eg_tile_split(unsigned tile_split)
@@ -453,7 +399,7 @@ static void amdgpu_bo_get_tiling(struct pb_buffer *_buf,
unsigned *mtilea,
bool *scanout)
{
- struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
+ struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_bo_info info = {0};
uint32_t tiling_flags;
int r;
@@ -494,7 +440,7 @@ static void amdgpu_bo_set_tiling(struct pb_buffer *_buf,
uint32_t pitch,
bool scanout)
{
- struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
+ struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_bo_metadata metadata = {0};
uint32_t tiling_flags = 0;
@@ -523,12 +469,6 @@ static void amdgpu_bo_set_tiling(struct pb_buffer *_buf,
amdgpu_bo_set_metadata(bo->bo, &metadata);
}
-static struct radeon_winsys_cs_handle *amdgpu_get_cs_handle(struct pb_buffer *_buf)
-{
- /* return a direct pointer to amdgpu_winsys_bo. */
- return (struct radeon_winsys_cs_handle*)get_amdgpu_winsys_bo(_buf);
-}
-
static struct pb_buffer *
amdgpu_bo_create(struct radeon_winsys *rws,
unsigned size,
@@ -538,9 +478,8 @@ amdgpu_bo_create(struct radeon_winsys *rws,
enum radeon_bo_flag flags)
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
- struct amdgpu_bo_desc desc;
- struct pb_manager *provider;
- struct pb_buffer *buffer;
+ struct amdgpu_winsys_bo *bo;
+ unsigned usage = 0;
/* Don't use VRAM if the GPU doesn't have much. This is only the initial
* domain. The kernel is free to move the buffer if it wants to.
@@ -552,9 +491,6 @@ amdgpu_bo_create(struct radeon_winsys *rws,
flags = RADEON_FLAG_GTT_WC;
}
- memset(&desc, 0, sizeof(desc));
- desc.base.alignment = alignment;
-
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
* like constant/uniform buffers, can benefit from better and more reuse.
@@ -565,26 +501,33 @@ amdgpu_bo_create(struct radeon_winsys *rws,
* might consider different sets of domains / flags compatible
*/
if (domain == RADEON_DOMAIN_VRAM_GTT)
- desc.base.usage = 1 << 2;
+ usage = 1 << 2;
else
- desc.base.usage = domain >> 1;
- assert(flags < sizeof(desc.base.usage) * 8 - 3);
- desc.base.usage |= 1 << (flags + 3);
-
- desc.initial_domain = domain;
- desc.flags = flags;
-
- /* Assign a buffer manager. */
- if (use_reusable_pool)
- provider = ws->cman;
- else
- provider = ws->kman;
+ usage = domain >> 1;
+ assert(flags < sizeof(usage) * 8 - 3);
+ usage |= 1 << (flags + 3);
+
+ /* Get a buffer from the cache. */
+ if (use_reusable_pool) {
+ bo = (struct amdgpu_winsys_bo*)
+ pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
+ usage);
+ if (bo)
+ return &bo->base;
+ }
- buffer = provider->create_buffer(provider, size, &desc.base);
- if (!buffer)
- return NULL;
+ /* Create a new one. */
+ bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags);
+ if (!bo) {
+ /* Clear the cache and try again. */
+ pb_cache_release_all_buffers(&ws->bo_cache);
+ bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags);
+ if (!bo)
+ return NULL;
+ }
- return (struct pb_buffer*)buffer;
+ bo->use_reusable_pool = use_reusable_pool;
+ return &bo->base;
}
static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
@@ -648,7 +591,7 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
bo->bo = result.buf_handle;
bo->base.size = result.alloc_size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
- bo->rws = ws;
+ bo->ws = ws;
bo->va = va;
bo->va_handle = va_handle;
bo->initial_domain = initial;
@@ -663,6 +606,8 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+ amdgpu_add_buffer_to_global_list(bo);
+
return &bo->base;
error_va_map:
@@ -680,12 +625,11 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
unsigned stride,
struct winsys_handle *whandle)
{
- struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(buffer);
+ struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
enum amdgpu_bo_handle_type type;
int r;
- if ((void*)bo != (void*)buffer)
- pb_cache_manager_remove_buffer(buffer);
+ bo->use_reusable_pool = false;
switch (whandle->type) {
case DRM_API_HANDLE_TYPE_SHARED:
@@ -740,7 +684,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
- bo->rws = ws;
+ bo->ws = ws;
bo->user_ptr = pointer;
bo->va = va;
bo->va_handle = va_handle;
@@ -749,6 +693,8 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+ amdgpu_add_buffer_to_global_list(bo);
+
return (struct pb_buffer*)bo;
error_va_map:
@@ -762,14 +708,18 @@ error:
return NULL;
}
-static uint64_t amdgpu_bo_get_va(struct radeon_winsys_cs_handle *buf)
+static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
+{
+ return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
+}
+
+static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
{
return ((struct amdgpu_winsys_bo*)buf)->va;
}
-void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws)
+void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
{
- ws->base.buffer_get_cs_handle = amdgpu_get_cs_handle;
ws->base.buffer_set_tiling = amdgpu_bo_set_tiling;
ws->base.buffer_get_tiling = amdgpu_bo_get_tiling;
ws->base.buffer_map = amdgpu_bo_map;
@@ -778,6 +728,7 @@ void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws)
ws->base.buffer_create = amdgpu_bo_create;
ws->base.buffer_from_handle = amdgpu_bo_from_handle;
ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
+ ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
ws->base.buffer_get_handle = amdgpu_bo_get_handle;
ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 3739fd136..54f5dbdc4 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -36,17 +36,11 @@
#include "amdgpu_winsys.h"
#include "pipebuffer/pb_bufmgr.h"
-struct amdgpu_bo_desc {
- struct pb_desc base;
-
- enum radeon_bo_domain initial_domain;
- unsigned flags;
-};
-
struct amdgpu_winsys_bo {
struct pb_buffer base;
+ struct pb_cache_entry cache_entry;
- struct amdgpu_winsys *rws;
+ struct amdgpu_winsys *ws;
void *user_ptr; /* from buffer_from_ptr */
amdgpu_bo_handle bo;
@@ -54,6 +48,7 @@ struct amdgpu_winsys_bo {
amdgpu_va_handle va_handle;
uint64_t va;
enum radeon_bo_domain initial_domain;
+ bool use_reusable_pool;
/* how many command streams is this bo referenced in? */
int num_cs_references;
@@ -65,10 +60,13 @@ struct amdgpu_winsys_bo {
/* Fences for buffer synchronization. */
struct pipe_fence_handle *fence[RING_LAST];
+
+ struct list_head global_list_item;
};
-struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws);
-void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws);
+bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf);
+void amdgpu_bo_destroy(struct pb_buffer *_buf);
+void amdgpu_bo_init_functions(struct amdgpu_winsys *ws);
static inline
void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst,
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 0f42298c2..83da740f6 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -200,46 +200,46 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
{
- /* The maximum size is 4MB - 1B, which is unaligned.
- * Use aligned size 4MB - 16B. */
- const unsigned max_ib_size = (1024 * 1024 - 16) * 4;
- const unsigned min_ib_size = 24 * 1024 * 4;
+ /* Small IBs are better than big IBs, because the GPU goes idle quicker
+ * and there is less waiting for buffers and fences. Proof:
+ * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
+ */
+ const unsigned buffer_size = 128 * 1024 * 4;
+ const unsigned ib_size = 20 * 1024 * 4;
cs->base.cdw = 0;
cs->base.buf = NULL;
/* Allocate a new buffer for IBs if the current buffer is all used. */
if (!cs->big_ib_buffer ||
- cs->used_ib_space + min_ib_size > cs->big_ib_buffer->size) {
+ cs->used_ib_space + ib_size > cs->big_ib_buffer->size) {
struct radeon_winsys *ws = &cs->ctx->ws->base;
- struct radeon_winsys_cs_handle *winsys_bo;
pb_reference(&cs->big_ib_buffer, NULL);
cs->big_ib_winsys_buffer = NULL;
cs->ib_mapped = NULL;
cs->used_ib_space = 0;
- cs->big_ib_buffer = ws->buffer_create(ws, max_ib_size,
+ cs->big_ib_buffer = ws->buffer_create(ws, buffer_size,
4096, true,
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS);
if (!cs->big_ib_buffer)
return false;
- winsys_bo = ws->buffer_get_cs_handle(cs->big_ib_buffer);
-
- cs->ib_mapped = ws->buffer_map(winsys_bo, NULL, PIPE_TRANSFER_WRITE);
+ cs->ib_mapped = ws->buffer_map(cs->big_ib_buffer, NULL,
+ PIPE_TRANSFER_WRITE);
if (!cs->ib_mapped) {
pb_reference(&cs->big_ib_buffer, NULL);
return false;
}
- cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)winsys_bo;
+ cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)cs->big_ib_buffer;
}
cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space;
cs->base.buf = (uint32_t*)(cs->ib_mapped + cs->used_ib_space);
- cs->base.max_dw = (cs->big_ib_buffer->size - cs->used_ib_space) / 4;
+ cs->base.max_dw = ib_size / 4;
return true;
}
@@ -336,7 +336,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
void *flush_ctx,
- struct radeon_winsys_cs_handle *trace_buf)
+ struct pb_buffer *trace_buf)
{
struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
struct amdgpu_cs *cs;
@@ -368,7 +368,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
-int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
+int amdgpu_lookup_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
{
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
int i = cs->buffer_indices_hashlist[hash];
@@ -377,15 +377,15 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
if (i == -1 || cs->buffers[i].bo == bo)
return i;
- /* Hash collision, look for the BO in the list of relocs linearly. */
+ /* Hash collision, look for the BO in the list of buffers linearly. */
for (i = cs->num_buffers - 1; i >= 0; i--) {
if (cs->buffers[i].bo == bo) {
- /* Put this reloc in the hash list.
+ /* Put this buffer in the hash list.
* This will prevent additional hash collisions if there are
- * several consecutive get_reloc calls for the same buffer.
+ * several consecutive lookup_buffer calls for the same buffer.
*
* Example: Assuming buffers A,B,C collide in the hash list,
- * the following sequence of relocs:
+ * the following sequence of buffers:
* AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
* will collide here: ^ and here: ^,
* meaning that we should get very few collisions in the end. */
@@ -396,32 +396,33 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
return -1;
}
-static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
+static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs,
struct amdgpu_winsys_bo *bo,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
unsigned priority,
enum radeon_bo_domain *added_domains)
{
- struct amdgpu_cs_buffer *reloc;
+ struct amdgpu_cs_buffer *buffer;
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
int i = -1;
- priority = MIN2(priority, 15);
+ assert(priority < 64);
*added_domains = 0;
- i = amdgpu_get_reloc(cs, bo);
+ i = amdgpu_lookup_buffer(cs, bo);
if (i >= 0) {
- reloc = &cs->buffers[i];
- reloc->usage |= usage;
- *added_domains = domains & ~reloc->domains;
- reloc->domains |= domains;
- cs->flags[i] = MAX2(cs->flags[i], priority);
+ buffer = &cs->buffers[i];
+ buffer->priority_usage |= 1llu << priority;
+ buffer->usage |= usage;
+ *added_domains = domains & ~buffer->domains;
+ buffer->domains |= domains;
+ cs->flags[i] = MAX2(cs->flags[i], priority / 4);
return i;
}
- /* New relocation, check if the backing array is large enough. */
+ /* New buffer, check if the backing array is large enough. */
if (cs->num_buffers >= cs->max_num_buffers) {
uint32_t size;
cs->max_num_buffers += 10;
@@ -435,16 +436,17 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
cs->flags = realloc(cs->flags, cs->max_num_buffers);
}
- /* Initialize the new relocation. */
+ /* Initialize the new buffer. */
cs->buffers[cs->num_buffers].bo = NULL;
amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
cs->handles[cs->num_buffers] = bo->bo;
- cs->flags[cs->num_buffers] = priority;
+ cs->flags[cs->num_buffers] = priority / 4;
p_atomic_inc(&bo->num_cs_references);
- reloc = &cs->buffers[cs->num_buffers];
- reloc->bo = bo;
- reloc->usage = usage;
- reloc->domains = domains;
+ buffer = &cs->buffers[cs->num_buffers];
+ buffer->bo = bo;
+ buffer->priority_usage = 1llu << priority;
+ buffer->usage = usage;
+ buffer->domains = domains;
cs->buffer_indices_hashlist[hash] = cs->num_buffers;
@@ -452,8 +454,8 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
return cs->num_buffers++;
}
-static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
- struct radeon_winsys_cs_handle *buf,
+static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
+ struct pb_buffer *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
enum radeon_bo_priority priority)
@@ -464,7 +466,7 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
enum radeon_bo_domain added_domains;
- unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain,
+ unsigned index = amdgpu_add_buffer(cs, bo, usage, bo->initial_domain,
priority, &added_domains);
if (added_domains & RADEON_DOMAIN_GTT)
@@ -475,12 +477,12 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
return index;
}
-static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs,
- struct radeon_winsys_cs_handle *buf)
+static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
+ struct pb_buffer *buf)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
- return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf);
+ return amdgpu_lookup_buffer(cs, (struct amdgpu_winsys_bo*)buf);
}
static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
@@ -498,6 +500,22 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64
return status;
}
+static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
+ struct radeon_bo_list_item *list)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+ int i;
+
+ if (list) {
+ for (i = 0; i < cs->num_buffers; i++) {
+ pb_reference(&list[i].buf, &cs->buffers[i].bo->base);
+ list[i].vm_address = cs->buffers[i].bo->va;
+ list[i].priority_usage = cs->buffers[i].priority_usage;
+ }
+ }
+ return cs->num_buffers;
+}
+
static void amdgpu_cs_do_submission(struct amdgpu_cs *cs,
struct pipe_fence_handle **out_fence)
{
@@ -587,6 +605,7 @@ static void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
}
DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", FALSE)
static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
unsigned flags,
@@ -599,25 +618,13 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
switch (cs->base.ring_type) {
case RING_DMA:
/* pad DMA ring to 8 DWs */
- if (ws->info.chip_class <= SI) {
- while (rcs->cdw & 7)
- OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
- } else {
- while (rcs->cdw & 7)
- OUT_CS(&cs->base, 0x00000000); /* NOP packet */
- }
+ while (rcs->cdw & 7)
+ OUT_CS(&cs->base, 0x00000000); /* NOP packet */
break;
case RING_GFX:
- /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
- * r6xx, requires at least 4 dw alignment to avoid a hw bug.
- */
- if (ws->info.chip_class <= SI) {
- while (rcs->cdw & 7)
- OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
- } else {
- while (rcs->cdw & 7)
- OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
- }
+ /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
+ while (rcs->cdw & 7)
+ OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
break;
case RING_UVD:
while (rcs->cdw & 15)
@@ -631,16 +638,42 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
fprintf(stderr, "amdgpu: command stream overflowed\n");
}
- amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer,
- RADEON_USAGE_READ, 0, RADEON_PRIO_MIN);
+ amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer,
+ RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
/* If the CS is not empty or overflowed.... */
if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
int r;
- r = amdgpu_bo_list_create(ws->dev, cs->num_buffers,
- cs->handles, cs->flags,
- &cs->request.resources);
+ /* Use a buffer list containing all allocated buffers if requested. */
+ if (debug_get_option_all_bos()) {
+ struct amdgpu_winsys_bo *bo;
+ amdgpu_bo_handle *handles;
+ unsigned num = 0;
+
+ pipe_mutex_lock(ws->global_bo_list_lock);
+
+ handles = malloc(sizeof(handles[0]) * ws->num_buffers);
+ if (!handles) {
+ pipe_mutex_unlock(ws->global_bo_list_lock);
+ goto cleanup;
+ }
+
+ LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
+ assert(num < ws->num_buffers);
+ handles[num++] = bo->bo;
+ }
+
+ r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
+ handles, NULL,
+ &cs->request.resources);
+ free(handles);
+ pipe_mutex_unlock(ws->global_bo_list_lock);
+ } else {
+ r = amdgpu_bo_list_create(ws->dev, cs->num_buffers,
+ cs->handles, cs->flags,
+ &cs->request.resources);
+ }
if (r) {
fprintf(stderr, "amdgpu: resource list creation failed (%d)\n", r);
@@ -676,7 +709,7 @@ static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
}
static boolean amdgpu_bo_is_referenced(struct radeon_winsys_cs *rcs,
- struct radeon_winsys_cs_handle *_buf,
+ struct pb_buffer *_buf,
enum radeon_bo_usage usage)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
@@ -692,10 +725,11 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
ws->base.cs_create = amdgpu_cs_create;
ws->base.cs_destroy = amdgpu_cs_destroy;
- ws->base.cs_add_reloc = amdgpu_cs_add_reloc;
- ws->base.cs_get_reloc = amdgpu_cs_get_reloc;
+ ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
+ ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer;
ws->base.cs_validate = amdgpu_cs_validate;
ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
+ ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
ws->base.cs_flush = amdgpu_cs_flush;
ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 12c6b624b..6ad3cddf7 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -45,6 +45,7 @@ struct amdgpu_ctx {
struct amdgpu_cs_buffer {
struct amdgpu_winsys_bo *bo;
+ uint64_t priority_usage;
enum radeon_bo_usage usage;
enum radeon_bo_domain domains;
};
@@ -68,7 +69,7 @@ struct amdgpu_cs {
struct amdgpu_cs_request request;
struct amdgpu_cs_ib_info ib;
- /* Relocs. */
+ /* Buffers. */
unsigned max_num_buffers;
unsigned num_buffers;
amdgpu_bo_handle *handles;
@@ -115,7 +116,7 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
*rdst = rsrc;
}
-int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
+int amdgpu_lookup_buffer(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
static inline struct amdgpu_cs *
amdgpu_cs(struct radeon_winsys_cs *base)
@@ -128,8 +129,8 @@ amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
struct amdgpu_winsys_bo *bo)
{
int num_refs = bo->num_cs_references;
- return num_refs == bo->rws->num_cs ||
- (num_refs && amdgpu_get_reloc(cs, bo) != -1);
+ return num_refs == bo->ws->num_cs ||
+ (num_refs && amdgpu_lookup_buffer(cs, bo) != -1);
}
static inline boolean
@@ -142,7 +143,7 @@ amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
if (!bo->num_cs_references)
return FALSE;
- index = amdgpu_get_reloc(cs, bo);
+ index = amdgpu_lookup_buffer(cs, bo);
if (index == -1)
return FALSE;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 358df3810..4c837a8e2 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -145,11 +145,9 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
regValue.backendDisables = ws->amdinfo.backend_disable[0];
regValue.pTileConfig = ws->amdinfo.gb_tile_mode;
- regValue.noOfEntries = sizeof(ws->amdinfo.gb_tile_mode) /
- sizeof(ws->amdinfo.gb_tile_mode[0]);
+ regValue.noOfEntries = ARRAY_SIZE(ws->amdinfo.gb_tile_mode);
regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
- regValue.noOfMacroEntries = sizeof(ws->amdinfo.gb_macro_tile_mode) /
- sizeof(ws->amdinfo.gb_macro_tile_mode[0]);
+ regValue.noOfMacroEntries = ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode);
createFlags.value = 0;
createFlags.useTileIndex = 1;
@@ -175,7 +173,9 @@ static int compute_level(struct amdgpu_winsys *ws,
struct radeon_surf *surf, bool is_stencil,
unsigned level, unsigned type, bool compressed,
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut)
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
+ ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
+ ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
{
struct radeon_surf_level *surf_level;
ADDR_E_RETURNCODE ret;
@@ -248,6 +248,31 @@ static int compute_level(struct amdgpu_winsys *ws,
surf->tiling_index[level] = AddrSurfInfoOut->tileIndex;
surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;
+
+ if (AddrSurfInfoIn->flags.dccCompatible) {
+ AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
+ AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
+ AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
+ AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
+ AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
+
+ ret = AddrComputeDccInfo(ws->addrlib,
+ AddrDccIn,
+ AddrDccOut);
+
+ if (ret == ADDR_OK) {
+ surf_level->dcc_offset = surf->dcc_size;
+ surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
+ surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
+ } else {
+ surf->dcc_size = 0;
+ surf_level->dcc_offset = 0;
+ }
+ } else {
+ surf->dcc_size = 0;
+ surf_level->dcc_offset = 0;
+ }
+
return 0;
}
@@ -259,6 +284,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
bool compressed;
ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
+ ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
+ ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
ADDR_TILEINFO AddrTileInfoIn = {0};
ADDR_TILEINFO AddrTileInfoOut = {0};
int r;
@@ -269,6 +296,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
+ AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
+ AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
type = RADEON_SURF_GET(surf->flags, TYPE);
@@ -318,10 +347,10 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
}
}
else {
- AddrSurfInfoIn.bpp = surf->bpe * 8;
+ AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
}
- AddrSurfInfoIn.numSamples = surf->nsamples;
+ AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples;
AddrSurfInfoIn.tileIndex = -1;
/* Set the micro tile type. */
@@ -339,6 +368,9 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
AddrSurfInfoIn.flags.degrade4Space = 1;
+ AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
+ !(surf->flags & RADEON_SURF_SCANOUT) &&
+ !compressed && AddrDccIn.numSamples <= 1;
/* This disables incorrect calculations (hacks) in addrlib. */
AddrSurfInfoIn.flags.noStencil = 1;
@@ -375,11 +407,13 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
}
surf->bo_size = 0;
+ surf->dcc_size = 0;
+ surf->dcc_alignment = 1;
/* Calculate texture layout information. */
for (level = 0; level <= surf->last_level; level++) {
r = compute_level(ws, surf, false, level, type, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut);
+ &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
if (r)
return r;
@@ -406,7 +440,7 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
for (level = 0; level <= surf->last_level; level++) {
r = compute_level(ws, surf, true, level, type, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut);
+ &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
if (r)
return r;
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 824f0d380..fc7562d8f 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -68,7 +68,6 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
case CIK__PIPE_CONFIG__ADDR_SURF_P2:
- default:
return 2;
case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
@@ -86,23 +85,13 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
return 16;
+ default:
+ fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n");
+ assert(!"this should never occur");
+ return 2;
}
}
-/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG
- * into GB_TILING_CONFIG register which is only present on R600-R700. */
-static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info)
-{
- unsigned num_pipes = info->gb_addr_cfg & 0x7;
- unsigned num_banks = info->mc_arb_ramcfg & 0x3;
- unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7;
- unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3;
-
- return num_pipes | (num_banks << 4) |
- (pipe_interleave_bytes << 8) |
- (row_size << 12);
-}
-
/* Helper function to do the ioctls needed for setup and init. */
static boolean do_winsys_init(struct amdgpu_winsys *ws)
{
@@ -185,10 +174,9 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
goto fail;
}
- /* LLVM 3.6 is required for VI. */
+ /* LLVM 3.6.1 is required for VI. */
if (ws->info.chip_class >= VI &&
- (HAVE_LLVM < 0x0306 ||
- (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 1))) {
+ HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 1) {
fprintf(stderr, "amdgpu: LLVM 3.6.1 is required, got LLVM %i.%i.%i\n",
HAVE_LLVM >> 8, HAVE_LLVM & 255, MESA_LLVM_VERSION_PATCH);
goto fail;
@@ -251,37 +239,31 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
ws->info.gart_size = gtt.heap_size;
ws->info.vram_size = vram.heap_size;
/* convert the shader clock from KHz to MHz */
- ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
+ ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000;
ws->info.max_se = ws->amdinfo.num_shader_engines;
ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
ws->info.has_uvd = uvd.available_rings != 0;
ws->info.vce_fw_version =
vce.available_rings ? vce_version : 0;
ws->info.has_userptr = TRUE;
- ws->info.r600_num_backends = ws->amdinfo.rb_pipes;
- ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
- ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo);
- ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
- ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */
- ws->info.r600_virtual_address = TRUE;
- ws->info.r600_has_dma = dma.available_rings != 0;
-
- /* Guess what the maximum compute unit number is by looking at the mask
- * of enabled CUs.
- */
+ ws->info.num_render_backends = ws->amdinfo.rb_pipes;
+ ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
+ ws->info.num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
+ ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
+ ws->info.has_virtual_memory = TRUE;
+ ws->info.has_sdma = dma.available_rings != 0;
+
+ /* Get the number of good compute units. */
+ ws->info.num_good_compute_units = 0;
for (i = 0; i < ws->info.max_se; i++)
- for (j = 0; j < ws->info.max_sh_per_se; j++) {
- unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
-
- if (ws->info.max_compute_units < max)
- ws->info.max_compute_units = max;
- }
- ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+ for (j = 0; j < ws->info.max_sh_per_se; j++)
+ ws->info.num_good_compute_units +=
+ util_bitcount(ws->amdinfo.cu_bitmap[i][j]);
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
sizeof(ws->amdinfo.gb_tile_mode));
ws->info.si_tile_mode_array_valid = TRUE;
- ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask;
+ ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask;
memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode,
sizeof(ws->amdinfo.gb_macro_tile_mode));
@@ -304,11 +286,9 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
pipe_mutex_destroy(ws->bo_fence_lock);
-
- ws->cman->destroy(ws->cman);
- ws->kman->destroy(ws->kman);
+ pb_cache_deinit(&ws->bo_cache);
+ pipe_mutex_destroy(ws->global_bo_list_lock);
AddrDestroy(ws->addrlib);
-
amdgpu_device_deinitialize(ws->dev);
FREE(rws);
}
@@ -365,14 +345,14 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
return 0;
}
-static void amdgpu_read_registers(struct radeon_winsys *rws,
+static bool amdgpu_read_registers(struct radeon_winsys *rws,
unsigned reg_offset,
unsigned num_registers, uint32_t *out)
{
struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
- amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
- 0xffffffff, 0, out);
+ return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
+ 0xffffffff, 0, out) == 0;
}
static unsigned hash_dev(void *key)
@@ -389,9 +369,9 @@ static int compare_dev(void *key1, void *key2)
return key1 != key2;
}
-static bool amdgpu_winsys_unref(struct radeon_winsys *ws)
+static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
{
- struct amdgpu_winsys *rws = (struct amdgpu_winsys*)ws;
+ struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
bool destroy;
/* When the reference counter drops to zero, remove the device pointer
@@ -401,9 +381,9 @@ static bool amdgpu_winsys_unref(struct radeon_winsys *ws)
* from the table when the counter drops to 0. */
pipe_mutex_lock(dev_tab_mutex);
- destroy = pipe_reference(&rws->reference, NULL);
+ destroy = pipe_reference(&ws->reference, NULL);
if (destroy && dev_tab)
- util_hash_table_remove(dev_tab, rws->dev);
+ util_hash_table_remove(dev_tab, ws->dev);
pipe_mutex_unlock(dev_tab_mutex);
return destroy;
@@ -461,13 +441,9 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
goto fail;
/* Create managers. */
- ws->kman = amdgpu_bomgr_create(ws);
- if (!ws->kman)
- goto fail;
- ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0,
- (ws->info.vram_size + ws->info.gart_size) / 8);
- if (!ws->cman)
- goto fail;
+ pb_cache_init(&ws->bo_cache, 500000, 2.0f, 0,
+ (ws->info.vram_size + ws->info.gart_size) / 8,
+ amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
/* init reference */
pipe_reference_init(&ws->reference, 1);
@@ -480,10 +456,12 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
ws->base.query_value = amdgpu_query_value;
ws->base.read_registers = amdgpu_read_registers;
- amdgpu_bomgr_init_functions(ws);
+ amdgpu_bo_init_functions(ws);
amdgpu_cs_init_functions(ws);
amdgpu_surface_init_functions(ws);
+ LIST_INITHEAD(&ws->global_bo_list);
+ pipe_mutex_init(ws->global_bo_list_lock);
pipe_mutex_init(ws->bo_fence_lock);
/* Create the screen at the end. The winsys must be initialized
@@ -509,10 +487,7 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
fail:
pipe_mutex_unlock(dev_tab_mutex);
- if (ws->cman)
- ws->cman->destroy(ws->cman);
- if (ws->kman)
- ws->kman->destroy(ws->kman);
+ pb_cache_deinit(&ws->bo_cache);
FREE(ws);
return NULL;
}
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index 4d07644c9..91b9be4bb 100644
--- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -32,6 +32,7 @@
#ifndef AMDGPU_WINSYS_H
#define AMDGPU_WINSYS_H
+#include "pipebuffer/pb_cache.h"
#include "gallium/drivers/radeon/radeon_winsys.h"
#include "addrlib/addrinterface.h"
#include "os/os_thread.h"
@@ -42,6 +43,7 @@ struct amdgpu_cs;
struct amdgpu_winsys {
struct radeon_winsys base;
struct pipe_reference reference;
+ struct pb_cache bo_cache;
amdgpu_device_handle dev;
@@ -57,13 +59,15 @@ struct amdgpu_winsys {
struct radeon_info info;
- struct pb_manager *kman;
- struct pb_manager *cman;
-
struct amdgpu_gpu_info amdinfo;
ADDR_HANDLE addrlib;
uint32_t rev_id;
unsigned family;
+
+ /* List of all allocated buffers */
+ pipe_mutex global_bo_list_lock;
+ struct list_head global_bo_list;
+ unsigned num_buffers;
};
static inline struct amdgpu_winsys *