diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-05-29 10:22:51 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-05-29 10:22:51 +0000 |
commit | c9223eed3c16cd3e98a8f56dda953d8f299de0e3 (patch) | |
tree | 53e2a1c3f13bcf6b4ed201d7bc135e7213c94ebe /lib/mesa/src/gallium/winsys/amdgpu/drm | |
parent | 6e8f2d062ab9c198239b9283b2b7ed12f4ea17d8 (diff) |
Import Mesa 11.2.2
Diffstat (limited to 'lib/mesa/src/gallium/winsys/amdgpu/drm')
11 files changed, 372 insertions, 327 deletions
diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp index 7393953c1..570216241 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp @@ -896,6 +896,49 @@ BOOL_32 CIAddrLib::HwlOverrideTileMode( /** *************************************************************************************************** +* CiAddrLib::GetPrtSwitchP4Threshold +* +* @brief +* Return the threshold of switching to P4_* instead of P16_* for PRT resources +*************************************************************************************************** +*/ +UINT_32 CIAddrLib::GetPrtSwitchP4Threshold() const +{ + UINT_32 threshold; + + switch (m_pipes) + { + case 8: + threshold = 32; + break; + case 16: + if (m_settings.isFiji) + { + threshold = 16; + } + else if (m_settings.isHawaii) + { + threshold = 8; + } + else + { + ///@todo add for possible new ASICs. + ADDR_ASSERT_ALWAYS(); + threshold = 16; + } + break; + default: + ///@todo add for possible new ASICs. + ADDR_ASSERT_ALWAYS(); + threshold = 32; + break; + } + + return threshold; +} + +/** +*************************************************************************************************** * CIAddrLib::HwlSetupTileInfo * * @brief @@ -1123,7 +1166,7 @@ VOID CIAddrLib::HwlSetupTileInfo( { UINT_32 bytesXSamples = bpp * numSamples / 8; UINT_32 bytesXThickness = bpp * thickness / 8; - UINT_32 switchP4Threshold = (m_pipes == 16) ? 8 : 32; + UINT_32 switchP4Threshold = GetPrtSwitchP4Threshold(); if ((bytesXSamples > switchP4Threshold) || (bytesXThickness > switchP4Threshold)) { diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h index 451508619..4cbe9706b 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h @@ -167,6 +167,8 @@ private: VOID ReadGbMacroTileCfg( UINT_32 regValue, ADDR_TILEINFO* pCfg) const; + UINT_32 GetPrtSwitchP4Threshold() const; + BOOL_32 InitTileSettingTable( const UINT_32 *pSetting, UINT_32 noOfEntries); diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp index b1e008b83..088b64593 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp @@ -352,6 +352,7 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMicroTiled( ComputeSurfaceAlignmentsMicroTiled(expTileMode, pIn->bpp, pIn->flags, + pIn->mipLevel, numSamples, &pOut->baseAlign, &pOut->pitchAlign, @@ -647,6 +648,7 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled( AddrTileMode tileMode, ///< [in] tile mode UINT_32 bpp, ///< [in] bits per pixel ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 mipLevel, ///< [in] mip level UINT_32 numSamples, ///< [in] number of samples UINT_32* pBaseAlign, ///< [out] base address alignment in bytes UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels @@ -669,10 +671,10 @@ BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled( // ECR#393489 // Workaround 2 for 1D tiling - There is HW bug for Carrizo // where it requires the following alignments for 1D tiling. - if (flags.czDispCompatible) + if (flags.czDispCompatible && (mipLevel == 0)) { *pBaseAlign = PowTwoAlign(*pBaseAlign, 4096); //Base address MOD 4096 = 0 - *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 >> (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0 + *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0 } // end Carrizo workaround for 1D tilling diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h index 84adb66ee..25e38964b 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h @@ -315,7 +315,8 @@ private: UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; BOOL_32 ComputeSurfaceAlignmentsMicroTiled( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 mipLevel, UINT_32 numSamples, UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; BOOL_32 ComputeSurfaceAlignmentsMacroTiled( diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index fe55dc310..59a801b14 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -37,47 +37,16 @@ #include <xf86drm.h> #include <stdio.h> -static const struct pb_vtbl amdgpu_winsys_bo_vtbl; - static inline struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo) { - assert(bo->vtbl == &amdgpu_winsys_bo_vtbl); return (struct amdgpu_winsys_bo *)bo; } -struct amdgpu_bomgr { - struct pb_manager base; - struct amdgpu_winsys *rws; -}; - -static struct amdgpu_winsys *get_winsys(struct pb_manager *mgr) -{ - return ((struct amdgpu_bomgr*)mgr)->rws; -} - -static struct amdgpu_winsys_bo *get_amdgpu_winsys_bo(struct pb_buffer *_buf) -{ - struct amdgpu_winsys_bo *bo = NULL; - - if (_buf->vtbl == &amdgpu_winsys_bo_vtbl) { - bo = amdgpu_winsys_bo(_buf); - } else { - struct pb_buffer *base_buf; - pb_size offset; - pb_get_base_buffer(_buf, &base_buf, &offset); - - if (base_buf->vtbl == &amdgpu_winsys_bo_vtbl) - bo = amdgpu_winsys_bo(base_buf); - } - - return bo; -} - static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, enum radeon_bo_usage usage) { - struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf); - struct amdgpu_winsys *ws = bo->rws; + struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); + struct amdgpu_winsys *ws = bo->ws; int i; if (bo->is_shared) { @@ -149,16 +118,21 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, } static enum radeon_bo_domain amdgpu_bo_get_initial_domain( - struct radeon_winsys_cs_handle *buf) + struct pb_buffer *buf) { return ((struct amdgpu_winsys_bo*)buf)->initial_domain; } -static void amdgpu_bo_destroy(struct pb_buffer *_buf) +void amdgpu_bo_destroy(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); int i; + pipe_mutex_lock(bo->ws->global_bo_list_lock); + LIST_DEL(&bo->global_list_item); + bo->ws->num_buffers--; + pipe_mutex_unlock(bo->ws->global_bo_list_lock); + amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); amdgpu_va_range_free(bo->va_handle); amdgpu_bo_free(bo->bo); @@ -167,13 +141,23 @@ static void amdgpu_bo_destroy(struct pb_buffer *_buf) amdgpu_fence_reference(&bo->fence[i], NULL); if (bo->initial_domain & RADEON_DOMAIN_VRAM) - bo->rws->allocated_vram -= align(bo->base.size, bo->rws->gart_page_size); + bo->ws->allocated_vram -= align(bo->base.size, bo->ws->gart_page_size); else if (bo->initial_domain & RADEON_DOMAIN_GTT) - bo->rws->allocated_gtt -= align(bo->base.size, bo->rws->gart_page_size); + bo->ws->allocated_gtt -= align(bo->base.size, bo->ws->gart_page_size); FREE(bo); } -static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf, +static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf) +{ + struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); + + if (bo->use_reusable_pool) + pb_cache_add_buffer(&bo->cache_entry); + else + amdgpu_bo_destroy(_buf); +} + +static void *amdgpu_bo_map(struct pb_buffer *buf, struct radeon_winsys_cs *rcs, enum pipe_transfer_usage usage) { @@ -241,7 +225,7 @@ static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf, RADEON_USAGE_READWRITE); } - bo->rws->buffer_wait_time += os_time_get_nano() - time; + bo->ws->buffer_wait_time += os_time_get_nano() - time; } } @@ -250,52 +234,43 @@ static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf, return bo->user_ptr; r = amdgpu_bo_cpu_map(bo->bo, &cpu); + if (r) { + /* Clear the cache and try again. */ + pb_cache_release_all_buffers(&bo->ws->bo_cache); + r = amdgpu_bo_cpu_map(bo->bo, &cpu); + } return r ? NULL : cpu; } -static void amdgpu_bo_unmap(struct radeon_winsys_cs_handle *buf) +static void amdgpu_bo_unmap(struct pb_buffer *buf) { struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; amdgpu_bo_cpu_unmap(bo->bo); } -static void amdgpu_bo_get_base_buffer(struct pb_buffer *buf, - struct pb_buffer **base_buf, - unsigned *offset) -{ - *base_buf = buf; - *offset = 0; -} +static const struct pb_vtbl amdgpu_winsys_bo_vtbl = { + amdgpu_bo_destroy_or_cache + /* other functions are never called */ +}; -static enum pipe_error amdgpu_bo_validate(struct pb_buffer *_buf, - struct pb_validate *vl, - unsigned flags) +static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo) { - /* Always pinned */ - return PIPE_OK; -} + struct amdgpu_winsys *ws = bo->ws; -static void amdgpu_bo_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ + pipe_mutex_lock(ws->global_bo_list_lock); + LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list); + ws->num_buffers++; + pipe_mutex_unlock(ws->global_bo_list_lock); } -static const struct pb_vtbl amdgpu_winsys_bo_vtbl = { - amdgpu_bo_destroy, - NULL, /* never called */ - NULL, /* never called */ - amdgpu_bo_validate, - amdgpu_bo_fence, - amdgpu_bo_get_base_buffer, -}; - -static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr, - pb_size size, - const struct pb_desc *desc) +static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, + unsigned size, + unsigned alignment, + unsigned usage, + enum radeon_bo_domain initial_domain, + unsigned flags) { - struct amdgpu_winsys *rws = get_winsys(_mgr); - struct amdgpu_bo_desc *rdesc = (struct amdgpu_bo_desc*)desc; struct amdgpu_bo_alloc_request request = {0}; amdgpu_bo_handle buf_handle; uint64_t va = 0; @@ -303,37 +278,39 @@ static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr, amdgpu_va_handle va_handle; int r; - assert(rdesc->initial_domain & RADEON_DOMAIN_VRAM_GTT); + assert(initial_domain & RADEON_DOMAIN_VRAM_GTT); bo = CALLOC_STRUCT(amdgpu_winsys_bo); if (!bo) { return NULL; } + pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base); request.alloc_size = size; - request.phys_alignment = desc->alignment; + request.phys_alignment = alignment; - if (rdesc->initial_domain & RADEON_DOMAIN_VRAM) { + if (initial_domain & RADEON_DOMAIN_VRAM) request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; - if (rdesc->flags & RADEON_FLAG_CPU_ACCESS) - request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - } - if (rdesc->initial_domain & RADEON_DOMAIN_GTT) { + if (initial_domain & RADEON_DOMAIN_GTT) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; - if (rdesc->flags & RADEON_FLAG_GTT_WC) - request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; - } - r = amdgpu_bo_alloc(rws->dev, &request, &buf_handle); + if (flags & RADEON_FLAG_CPU_ACCESS) + request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (flags & RADEON_FLAG_NO_CPU_ACCESS) + request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + if (flags & RADEON_FLAG_GTT_WC) + request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + + r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); if (r) { fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); fprintf(stderr, "amdgpu: size : %d bytes\n", size); - fprintf(stderr, "amdgpu: alignment : %d bytes\n", desc->alignment); - fprintf(stderr, "amdgpu: domains : %d\n", rdesc->initial_domain); + fprintf(stderr, "amdgpu: alignment : %d bytes\n", alignment); + fprintf(stderr, "amdgpu: domains : %d\n", initial_domain); goto error_bo_alloc; } - r = amdgpu_va_range_alloc(rws->dev, amdgpu_gpu_va_range_general, - size, desc->alignment, 0, &va, &va_handle, 0); + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, + size, alignment, 0, &va, &va_handle, 0); if (r) goto error_va_alloc; @@ -342,23 +319,25 @@ static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr, goto error_va_map; pipe_reference_init(&bo->base.reference, 1); - bo->base.alignment = desc->alignment; - bo->base.usage = desc->usage; + bo->base.alignment = alignment; + bo->base.usage = usage; bo->base.size = size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; - bo->rws = rws; + bo->ws = ws; bo->bo = buf_handle; bo->va = va; bo->va_handle = va_handle; - bo->initial_domain = rdesc->initial_domain; - bo->unique_id = __sync_fetch_and_add(&rws->next_bo_unique_id, 1); + bo->initial_domain = initial_domain; + bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); - if (rdesc->initial_domain & RADEON_DOMAIN_VRAM) - rws->allocated_vram += align(size, rws->gart_page_size); - else if (rdesc->initial_domain & RADEON_DOMAIN_GTT) - rws->allocated_gtt += align(size, rws->gart_page_size); + if (initial_domain & RADEON_DOMAIN_VRAM) + ws->allocated_vram += align(size, ws->gart_page_size); + else if (initial_domain & RADEON_DOMAIN_GTT) + ws->allocated_gtt += align(size, ws->gart_page_size); - return &bo->base; + amdgpu_add_buffer_to_global_list(bo); + + return bo; error_va_map: amdgpu_va_range_free(va_handle); @@ -371,48 +350,15 @@ error_bo_alloc: return NULL; } -static void amdgpu_bomgr_flush(struct pb_manager *mgr) -{ - /* NOP */ -} - -/* This is for the cache bufmgr. */ -static boolean amdgpu_bomgr_is_buffer_busy(struct pb_manager *_mgr, - struct pb_buffer *_buf) +bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); if (amdgpu_bo_is_referenced_by_any_cs(bo)) { - return TRUE; - } - - if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) { - return TRUE; + return false; } - return FALSE; -} - -static void amdgpu_bomgr_destroy(struct pb_manager *mgr) -{ - FREE(mgr); -} - -struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws) -{ - struct amdgpu_bomgr *mgr; - - mgr = CALLOC_STRUCT(amdgpu_bomgr); - if (!mgr) - return NULL; - - mgr->base.destroy = amdgpu_bomgr_destroy; - mgr->base.create_buffer = amdgpu_bomgr_create_bo; - mgr->base.flush = amdgpu_bomgr_flush; - mgr->base.is_buffer_busy = amdgpu_bomgr_is_buffer_busy; - - mgr->rws = rws; - return &mgr->base; + return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE); } static unsigned eg_tile_split(unsigned tile_split) @@ -453,7 +399,7 @@ static void amdgpu_bo_get_tiling(struct pb_buffer *_buf, unsigned *mtilea, bool *scanout) { - struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf); + struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); struct amdgpu_bo_info info = {0}; uint32_t tiling_flags; int r; @@ -494,7 +440,7 @@ static void amdgpu_bo_set_tiling(struct pb_buffer *_buf, uint32_t pitch, bool scanout) { - struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf); + struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); struct amdgpu_bo_metadata metadata = {0}; uint32_t tiling_flags = 0; @@ -523,12 +469,6 @@ static void amdgpu_bo_set_tiling(struct pb_buffer *_buf, amdgpu_bo_set_metadata(bo->bo, &metadata); } -static struct radeon_winsys_cs_handle *amdgpu_get_cs_handle(struct pb_buffer *_buf) -{ - /* return a direct pointer to amdgpu_winsys_bo. */ - return (struct radeon_winsys_cs_handle*)get_amdgpu_winsys_bo(_buf); -} - static struct pb_buffer * amdgpu_bo_create(struct radeon_winsys *rws, unsigned size, @@ -538,9 +478,8 @@ amdgpu_bo_create(struct radeon_winsys *rws, enum radeon_bo_flag flags) { struct amdgpu_winsys *ws = amdgpu_winsys(rws); - struct amdgpu_bo_desc desc; - struct pb_manager *provider; - struct pb_buffer *buffer; + struct amdgpu_winsys_bo *bo; + unsigned usage = 0; /* Don't use VRAM if the GPU doesn't have much. This is only the initial * domain. The kernel is free to move the buffer if it wants to. @@ -552,9 +491,6 @@ amdgpu_bo_create(struct radeon_winsys *rws, flags = RADEON_FLAG_GTT_WC; } - memset(&desc, 0, sizeof(desc)); - desc.base.alignment = alignment; - /* Align size to page size. This is the minimum alignment for normal * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. @@ -565,26 +501,33 @@ amdgpu_bo_create(struct radeon_winsys *rws, * might consider different sets of domains / flags compatible */ if (domain == RADEON_DOMAIN_VRAM_GTT) - desc.base.usage = 1 << 2; + usage = 1 << 2; else - desc.base.usage = domain >> 1; - assert(flags < sizeof(desc.base.usage) * 8 - 3); - desc.base.usage |= 1 << (flags + 3); - - desc.initial_domain = domain; - desc.flags = flags; - - /* Assign a buffer manager. */ - if (use_reusable_pool) - provider = ws->cman; - else - provider = ws->kman; + usage = domain >> 1; + assert(flags < sizeof(usage) * 8 - 3); + usage |= 1 << (flags + 3); + + /* Get a buffer from the cache. */ + if (use_reusable_pool) { + bo = (struct amdgpu_winsys_bo*) + pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, + usage); + if (bo) + return &bo->base; + } - buffer = provider->create_buffer(provider, size, &desc.base); - if (!buffer) - return NULL; + /* Create a new one. */ + bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags); + if (!bo) { + /* Clear the cache and try again. */ + pb_cache_release_all_buffers(&ws->bo_cache); + bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags); + if (!bo) + return NULL; + } - return (struct pb_buffer*)buffer; + bo->use_reusable_pool = use_reusable_pool; + return &bo->base; } static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, @@ -648,7 +591,7 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, bo->bo = result.buf_handle; bo->base.size = result.alloc_size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; - bo->rws = ws; + bo->ws = ws; bo->va = va; bo->va_handle = va_handle; bo->initial_domain = initial; @@ -663,6 +606,8 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, else if (bo->initial_domain & RADEON_DOMAIN_GTT) ws->allocated_gtt += align(bo->base.size, ws->gart_page_size); + amdgpu_add_buffer_to_global_list(bo); + return &bo->base; error_va_map: @@ -680,12 +625,11 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer, unsigned stride, struct winsys_handle *whandle) { - struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(buffer); + struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer); enum amdgpu_bo_handle_type type; int r; - if ((void*)bo != (void*)buffer) - pb_cache_manager_remove_buffer(buffer); + bo->use_reusable_pool = false; switch (whandle->type) { case DRM_API_HANDLE_TYPE_SHARED: @@ -740,7 +684,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; bo->base.size = size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; - bo->rws = ws; + bo->ws = ws; bo->user_ptr = pointer; bo->va = va; bo->va_handle = va_handle; @@ -749,6 +693,8 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, ws->allocated_gtt += align(bo->base.size, ws->gart_page_size); + amdgpu_add_buffer_to_global_list(bo); + return (struct pb_buffer*)bo; error_va_map: @@ -762,14 +708,18 @@ error: return NULL; } -static uint64_t amdgpu_bo_get_va(struct radeon_winsys_cs_handle *buf) +static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf) +{ + return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL; +} + +static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf) { return ((struct amdgpu_winsys_bo*)buf)->va; } -void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws) +void amdgpu_bo_init_functions(struct amdgpu_winsys *ws) { - ws->base.buffer_get_cs_handle = amdgpu_get_cs_handle; ws->base.buffer_set_tiling = amdgpu_bo_set_tiling; ws->base.buffer_get_tiling = amdgpu_bo_get_tiling; ws->base.buffer_map = amdgpu_bo_map; @@ -778,6 +728,7 @@ void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws) ws->base.buffer_create = amdgpu_bo_create; ws->base.buffer_from_handle = amdgpu_bo_from_handle; ws->base.buffer_from_ptr = amdgpu_bo_from_ptr; + ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr; ws->base.buffer_get_handle = amdgpu_bo_get_handle; ws->base.buffer_get_virtual_address = amdgpu_bo_get_va; ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain; diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 3739fd136..54f5dbdc4 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -36,17 +36,11 @@ #include "amdgpu_winsys.h" #include "pipebuffer/pb_bufmgr.h" -struct amdgpu_bo_desc { - struct pb_desc base; - - enum radeon_bo_domain initial_domain; - unsigned flags; -}; - struct amdgpu_winsys_bo { struct pb_buffer base; + struct pb_cache_entry cache_entry; - struct amdgpu_winsys *rws; + struct amdgpu_winsys *ws; void *user_ptr; /* from buffer_from_ptr */ amdgpu_bo_handle bo; @@ -54,6 +48,7 @@ struct amdgpu_winsys_bo { amdgpu_va_handle va_handle; uint64_t va; enum radeon_bo_domain initial_domain; + bool use_reusable_pool; /* how many command streams is this bo referenced in? */ int num_cs_references; @@ -65,10 +60,13 @@ struct amdgpu_winsys_bo { /* Fences for buffer synchronization. */ struct pipe_fence_handle *fence[RING_LAST]; + + struct list_head global_list_item; }; -struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws); -void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws); +bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf); +void amdgpu_bo_destroy(struct pb_buffer *_buf); +void amdgpu_bo_init_functions(struct amdgpu_winsys *ws); static inline void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst, diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 0f42298c2..83da740f6 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -200,46 +200,46 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx) static bool amdgpu_get_new_ib(struct amdgpu_cs *cs) { - /* The maximum size is 4MB - 1B, which is unaligned. - * Use aligned size 4MB - 16B. */ - const unsigned max_ib_size = (1024 * 1024 - 16) * 4; - const unsigned min_ib_size = 24 * 1024 * 4; + /* Small IBs are better than big IBs, because the GPU goes idle quicker + * and there is less waiting for buffers and fences. Proof: + * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1 + */ + const unsigned buffer_size = 128 * 1024 * 4; + const unsigned ib_size = 20 * 1024 * 4; cs->base.cdw = 0; cs->base.buf = NULL; /* Allocate a new buffer for IBs if the current buffer is all used. */ if (!cs->big_ib_buffer || - cs->used_ib_space + min_ib_size > cs->big_ib_buffer->size) { + cs->used_ib_space + ib_size > cs->big_ib_buffer->size) { struct radeon_winsys *ws = &cs->ctx->ws->base; - struct radeon_winsys_cs_handle *winsys_bo; pb_reference(&cs->big_ib_buffer, NULL); cs->big_ib_winsys_buffer = NULL; cs->ib_mapped = NULL; cs->used_ib_space = 0; - cs->big_ib_buffer = ws->buffer_create(ws, max_ib_size, + cs->big_ib_buffer = ws->buffer_create(ws, buffer_size, 4096, true, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); if (!cs->big_ib_buffer) return false; - winsys_bo = ws->buffer_get_cs_handle(cs->big_ib_buffer); - - cs->ib_mapped = ws->buffer_map(winsys_bo, NULL, PIPE_TRANSFER_WRITE); + cs->ib_mapped = ws->buffer_map(cs->big_ib_buffer, NULL, + PIPE_TRANSFER_WRITE); if (!cs->ib_mapped) { pb_reference(&cs->big_ib_buffer, NULL); return false; } - cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)winsys_bo; + cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)cs->big_ib_buffer; } cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space; cs->base.buf = (uint32_t*)(cs->ib_mapped + cs->used_ib_space); - cs->base.max_dw = (cs->big_ib_buffer->size - cs->used_ib_space) / 4; + cs->base.max_dw = ib_size / 4; return true; } @@ -336,7 +336,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), void *flush_ctx, - struct radeon_winsys_cs_handle *trace_buf) + struct pb_buffer *trace_buf) { struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; struct amdgpu_cs *cs; @@ -368,7 +368,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) -int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) +int amdgpu_lookup_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) { unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1); int i = cs->buffer_indices_hashlist[hash]; @@ -377,15 +377,15 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) if (i == -1 || cs->buffers[i].bo == bo) return i; - /* Hash collision, look for the BO in the list of relocs linearly. */ + /* Hash collision, look for the BO in the list of buffers linearly. */ for (i = cs->num_buffers - 1; i >= 0; i--) { if (cs->buffers[i].bo == bo) { - /* Put this reloc in the hash list. + /* Put this buffer in the hash list. * This will prevent additional hash collisions if there are - * several consecutive get_reloc calls for the same buffer. + * several consecutive lookup_buffer calls for the same buffer. * * Example: Assuming buffers A,B,C collide in the hash list, - * the following sequence of relocs: + * the following sequence of buffers: * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC * will collide here: ^ and here: ^, * meaning that we should get very few collisions in the end. */ @@ -396,32 +396,33 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) return -1; } -static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, +static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo, enum radeon_bo_usage usage, enum radeon_bo_domain domains, unsigned priority, enum radeon_bo_domain *added_domains) { - struct amdgpu_cs_buffer *reloc; + struct amdgpu_cs_buffer *buffer; unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1); int i = -1; - priority = MIN2(priority, 15); + assert(priority < 64); *added_domains = 0; - i = amdgpu_get_reloc(cs, bo); + i = amdgpu_lookup_buffer(cs, bo); if (i >= 0) { - reloc = &cs->buffers[i]; - reloc->usage |= usage; - *added_domains = domains & ~reloc->domains; - reloc->domains |= domains; - cs->flags[i] = MAX2(cs->flags[i], priority); + buffer = &cs->buffers[i]; + buffer->priority_usage |= 1llu << priority; + buffer->usage |= usage; + *added_domains = domains & ~buffer->domains; + buffer->domains |= domains; + cs->flags[i] = MAX2(cs->flags[i], priority / 4); return i; } - /* New relocation, check if the backing array is large enough. */ + /* New buffer, check if the backing array is large enough. */ if (cs->num_buffers >= cs->max_num_buffers) { uint32_t size; cs->max_num_buffers += 10; @@ -435,16 +436,17 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, cs->flags = realloc(cs->flags, cs->max_num_buffers); } - /* Initialize the new relocation. */ + /* Initialize the new buffer. */ cs->buffers[cs->num_buffers].bo = NULL; amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo); cs->handles[cs->num_buffers] = bo->bo; - cs->flags[cs->num_buffers] = priority; + cs->flags[cs->num_buffers] = priority / 4; p_atomic_inc(&bo->num_cs_references); - reloc = &cs->buffers[cs->num_buffers]; - reloc->bo = bo; - reloc->usage = usage; - reloc->domains = domains; + buffer = &cs->buffers[cs->num_buffers]; + buffer->bo = bo; + buffer->priority_usage = 1llu << priority; + buffer->usage = usage; + buffer->domains = domains; cs->buffer_indices_hashlist[hash] = cs->num_buffers; @@ -452,8 +454,8 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, return cs->num_buffers++; } -static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs, - struct radeon_winsys_cs_handle *buf, +static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs, + struct pb_buffer *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domains, enum radeon_bo_priority priority) @@ -464,7 +466,7 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs, struct amdgpu_cs *cs = amdgpu_cs(rcs); struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; enum radeon_bo_domain added_domains; - unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain, + unsigned index = amdgpu_add_buffer(cs, bo, usage, bo->initial_domain, priority, &added_domains); if (added_domains & RADEON_DOMAIN_GTT) @@ -475,12 +477,12 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs, return index; } -static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs, - struct radeon_winsys_cs_handle *buf) +static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs, + struct pb_buffer *buf) { struct amdgpu_cs *cs = amdgpu_cs(rcs); - return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf); + return amdgpu_lookup_buffer(cs, (struct amdgpu_winsys_bo*)buf); } static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs) @@ -498,6 +500,22 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64 return status; } +static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs, + struct radeon_bo_list_item *list) +{ + struct amdgpu_cs *cs = amdgpu_cs(rcs); + int i; + + if (list) { + for (i = 0; i < cs->num_buffers; i++) { + pb_reference(&list[i].buf, &cs->buffers[i].bo->base); + list[i].vm_address = cs->buffers[i].bo->va; + list[i].priority_usage = cs->buffers[i].priority_usage; + } + } + return cs->num_buffers; +} + static void amdgpu_cs_do_submission(struct amdgpu_cs *cs, struct pipe_fence_handle **out_fence) { @@ -587,6 +605,7 @@ static void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs) } DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", FALSE) static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, @@ -599,25 +618,13 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ - if (ws->info.chip_class <= SI) { - while (rcs->cdw & 7) - OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ - } else { - while (rcs->cdw & 7) - OUT_CS(&cs->base, 0x00000000); /* NOP packet */ - } + while (rcs->cdw & 7) + OUT_CS(&cs->base, 0x00000000); /* NOP packet */ break; case RING_GFX: - /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements - * r6xx, requires at least 4 dw alignment to avoid a hw bug. - */ - if (ws->info.chip_class <= SI) { - while (rcs->cdw & 7) - OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ - } else { - while (rcs->cdw & 7) - OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ - } + /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */ + while (rcs->cdw & 7) + OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ break; case RING_UVD: while (rcs->cdw & 15) @@ -631,16 +638,42 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, fprintf(stderr, "amdgpu: command stream overflowed\n"); } - amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer, - RADEON_USAGE_READ, 0, RADEON_PRIO_MIN); + amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer, + RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); /* If the CS is not empty or overflowed.... */ if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) { int r; - r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, - cs->handles, cs->flags, - &cs->request.resources); + /* Use a buffer list containing all allocated buffers if requested. */ + if (debug_get_option_all_bos()) { + struct amdgpu_winsys_bo *bo; + amdgpu_bo_handle *handles; + unsigned num = 0; + + pipe_mutex_lock(ws->global_bo_list_lock); + + handles = malloc(sizeof(handles[0]) * ws->num_buffers); + if (!handles) { + pipe_mutex_unlock(ws->global_bo_list_lock); + goto cleanup; + } + + LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) { + assert(num < ws->num_buffers); + handles[num++] = bo->bo; + } + + r = amdgpu_bo_list_create(ws->dev, ws->num_buffers, + handles, NULL, + &cs->request.resources); + free(handles); + pipe_mutex_unlock(ws->global_bo_list_lock); + } else { + r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, + cs->handles, cs->flags, + &cs->request.resources); + } if (r) { fprintf(stderr, "amdgpu: resource list creation failed (%d)\n", r); @@ -676,7 +709,7 @@ static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) } static boolean amdgpu_bo_is_referenced(struct radeon_winsys_cs *rcs, - struct radeon_winsys_cs_handle *_buf, + struct pb_buffer *_buf, enum radeon_bo_usage usage) { struct amdgpu_cs *cs = amdgpu_cs(rcs); @@ -692,10 +725,11 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws) ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; ws->base.cs_create = amdgpu_cs_create; ws->base.cs_destroy = amdgpu_cs_destroy; - ws->base.cs_add_reloc = amdgpu_cs_add_reloc; - ws->base.cs_get_reloc = amdgpu_cs_get_reloc; + ws->base.cs_add_buffer = amdgpu_cs_add_buffer; + ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer; ws->base.cs_validate = amdgpu_cs_validate; ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit; + ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list; ws->base.cs_flush = amdgpu_cs_flush; ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced; ws->base.cs_sync_flush = amdgpu_cs_sync_flush; diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 12c6b624b..6ad3cddf7 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -45,6 +45,7 @@ struct amdgpu_ctx { struct amdgpu_cs_buffer { struct amdgpu_winsys_bo *bo; + uint64_t priority_usage; enum radeon_bo_usage usage; enum radeon_bo_domain domains; }; @@ -68,7 +69,7 @@ struct amdgpu_cs { struct amdgpu_cs_request request; struct amdgpu_cs_ib_info ib; - /* Relocs. */ + /* Buffers. */ unsigned max_num_buffers; unsigned num_buffers; amdgpu_bo_handle *handles; @@ -115,7 +116,7 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst, *rdst = rsrc; } -int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo); +int amdgpu_lookup_buffer(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo); static inline struct amdgpu_cs * amdgpu_cs(struct radeon_winsys_cs *base) @@ -128,8 +129,8 @@ amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) { int num_refs = bo->num_cs_references; - return num_refs == bo->rws->num_cs || - (num_refs && amdgpu_get_reloc(cs, bo) != -1); + return num_refs == bo->ws->num_cs || + (num_refs && amdgpu_lookup_buffer(cs, bo) != -1); } static inline boolean @@ -142,7 +143,7 @@ amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs, if (!bo->num_cs_references) return FALSE; - index = amdgpu_get_reloc(cs, bo); + index = amdgpu_lookup_buffer(cs, bo); if (index == -1) return FALSE; diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c index 358df3810..4c837a8e2 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c @@ -145,11 +145,9 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws) regValue.backendDisables = ws->amdinfo.backend_disable[0]; regValue.pTileConfig = ws->amdinfo.gb_tile_mode; - regValue.noOfEntries = sizeof(ws->amdinfo.gb_tile_mode) / - sizeof(ws->amdinfo.gb_tile_mode[0]); + regValue.noOfEntries = ARRAY_SIZE(ws->amdinfo.gb_tile_mode); regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode; - regValue.noOfMacroEntries = sizeof(ws->amdinfo.gb_macro_tile_mode) / - sizeof(ws->amdinfo.gb_macro_tile_mode[0]); + regValue.noOfMacroEntries = ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode); createFlags.value = 0; createFlags.useTileIndex = 1; @@ -175,7 +173,9 @@ static int compute_level(struct amdgpu_winsys *ws, struct radeon_surf *surf, bool is_stencil, unsigned level, unsigned type, bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut) + ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, + ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, + ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut) { struct radeon_surf_level *surf_level; ADDR_E_RETURNCODE ret; @@ -248,6 +248,31 @@ static int compute_level(struct amdgpu_winsys *ws, surf->tiling_index[level] = AddrSurfInfoOut->tileIndex; surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize; + + if (AddrSurfInfoIn->flags.dccCompatible) { + AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; + AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; + AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; + AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; + AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; + + ret = AddrComputeDccInfo(ws->addrlib, + AddrDccIn, + AddrDccOut); + + if (ret == ADDR_OK) { + surf_level->dcc_offset = surf->dcc_size; + surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize; + surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign); + } else { + surf->dcc_size = 0; + surf_level->dcc_offset = 0; + } + } else { + surf->dcc_size = 0; + surf_level->dcc_offset = 0; + } + return 0; } @@ -259,6 +284,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, bool compressed; ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; + ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; + ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; ADDR_TILEINFO AddrTileInfoIn = {0}; ADDR_TILEINFO AddrTileInfoOut = {0}; int r; @@ -269,6 +296,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); + AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); + AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; type = RADEON_SURF_GET(surf->flags, TYPE); @@ -318,10 +347,10 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, } } else { - AddrSurfInfoIn.bpp = surf->bpe * 8; + AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; } - AddrSurfInfoIn.numSamples = surf->nsamples; + AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples; AddrSurfInfoIn.tileIndex = -1; /* Set the micro tile type. */ @@ -339,6 +368,9 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0; AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0; AddrSurfInfoIn.flags.degrade4Space = 1; + AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + !(surf->flags & RADEON_SURF_SCANOUT) && + !compressed && AddrDccIn.numSamples <= 1; /* This disables incorrect calculations (hacks) in addrlib. */ AddrSurfInfoIn.flags.noStencil = 1; @@ -375,11 +407,13 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, } surf->bo_size = 0; + surf->dcc_size = 0; + surf->dcc_alignment = 1; /* Calculate texture layout information. */ for (level = 0; level <= surf->last_level; level++) { r = compute_level(ws, surf, false, level, type, compressed, - &AddrSurfInfoIn, &AddrSurfInfoOut); + &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); if (r) return r; @@ -406,7 +440,7 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, for (level = 0; level <= surf->last_level; level++) { r = compute_level(ws, surf, true, level, type, compressed, - &AddrSurfInfoIn, &AddrSurfInfoOut); + &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); if (r) return r; diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 824f0d380..fc7562d8f 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -68,7 +68,6 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) { case CIK__PIPE_CONFIG__ADDR_SURF_P2: - default: return 2; case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16: case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16: @@ -86,23 +85,13 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16: case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16: return 16; + default: + fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n"); + assert(!"this should never occur"); + return 2; } } -/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG - * into GB_TILING_CONFIG register which is only present on R600-R700. */ -static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info) -{ - unsigned num_pipes = info->gb_addr_cfg & 0x7; - unsigned num_banks = info->mc_arb_ramcfg & 0x3; - unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7; - unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3; - - return num_pipes | (num_banks << 4) | - (pipe_interleave_bytes << 8) | - (row_size << 12); -} - /* Helper function to do the ioctls needed for setup and init. */ static boolean do_winsys_init(struct amdgpu_winsys *ws) { @@ -185,10 +174,9 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws) goto fail; } - /* LLVM 3.6 is required for VI. */ + /* LLVM 3.6.1 is required for VI. */ if (ws->info.chip_class >= VI && - (HAVE_LLVM < 0x0306 || - (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 1))) { + HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 1) { fprintf(stderr, "amdgpu: LLVM 3.6.1 is required, got LLVM %i.%i.%i\n", HAVE_LLVM >> 8, HAVE_LLVM & 255, MESA_LLVM_VERSION_PATCH); goto fail; @@ -251,37 +239,31 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws) ws->info.gart_size = gtt.heap_size; ws->info.vram_size = vram.heap_size; /* convert the shader clock from KHz to MHz */ - ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000; + ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000; ws->info.max_se = ws->amdinfo.num_shader_engines; ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine; ws->info.has_uvd = uvd.available_rings != 0; ws->info.vce_fw_version = vce.available_rings ? vce_version : 0; ws->info.has_userptr = TRUE; - ws->info.r600_num_backends = ws->amdinfo.rb_pipes; - ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq; - ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo); - ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo); - ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */ - ws->info.r600_virtual_address = TRUE; - ws->info.r600_has_dma = dma.available_rings != 0; - - /* Guess what the maximum compute unit number is by looking at the mask - * of enabled CUs. - */ + ws->info.num_render_backends = ws->amdinfo.rb_pipes; + ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq; + ws->info.num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo); + ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7); + ws->info.has_virtual_memory = TRUE; + ws->info.has_sdma = dma.available_rings != 0; + + /* Get the number of good compute units. */ + ws->info.num_good_compute_units = 0; for (i = 0; i < ws->info.max_se; i++) - for (j = 0; j < ws->info.max_sh_per_se; j++) { - unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]); - - if (ws->info.max_compute_units < max) - ws->info.max_compute_units = max; - } - ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se; + for (j = 0; j < ws->info.max_sh_per_se; j++) + ws->info.num_good_compute_units += + util_bitcount(ws->amdinfo.cu_bitmap[i][j]); memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode, sizeof(ws->amdinfo.gb_tile_mode)); ws->info.si_tile_mode_array_valid = TRUE; - ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask; + ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask; memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode, sizeof(ws->amdinfo.gb_macro_tile_mode)); @@ -304,11 +286,9 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws) struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; pipe_mutex_destroy(ws->bo_fence_lock); - - ws->cman->destroy(ws->cman); - ws->kman->destroy(ws->kman); + pb_cache_deinit(&ws->bo_cache); + pipe_mutex_destroy(ws->global_bo_list_lock); AddrDestroy(ws->addrlib); - amdgpu_device_deinitialize(ws->dev); FREE(rws); } @@ -365,14 +345,14 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws, return 0; } -static void amdgpu_read_registers(struct radeon_winsys *rws, +static bool amdgpu_read_registers(struct radeon_winsys *rws, unsigned reg_offset, unsigned num_registers, uint32_t *out) { struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; - amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, - 0xffffffff, 0, out); + return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, + 0xffffffff, 0, out) == 0; } static unsigned hash_dev(void *key) @@ -389,9 +369,9 @@ static int compare_dev(void *key1, void *key2) return key1 != key2; } -static bool amdgpu_winsys_unref(struct radeon_winsys *ws) +static bool amdgpu_winsys_unref(struct radeon_winsys *rws) { - struct amdgpu_winsys *rws = (struct amdgpu_winsys*)ws; + struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; bool destroy; /* When the reference counter drops to zero, remove the device pointer @@ -401,9 +381,9 @@ static bool amdgpu_winsys_unref(struct radeon_winsys *ws) * from the table when the counter drops to 0. */ pipe_mutex_lock(dev_tab_mutex); - destroy = pipe_reference(&rws->reference, NULL); + destroy = pipe_reference(&ws->reference, NULL); if (destroy && dev_tab) - util_hash_table_remove(dev_tab, rws->dev); + util_hash_table_remove(dev_tab, ws->dev); pipe_mutex_unlock(dev_tab_mutex); return destroy; @@ -461,13 +441,9 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create) goto fail; /* Create managers. */ - ws->kman = amdgpu_bomgr_create(ws); - if (!ws->kman) - goto fail; - ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0, - (ws->info.vram_size + ws->info.gart_size) / 8); - if (!ws->cman) - goto fail; + pb_cache_init(&ws->bo_cache, 500000, 2.0f, 0, + (ws->info.vram_size + ws->info.gart_size) / 8, + amdgpu_bo_destroy, amdgpu_bo_can_reclaim); /* init reference */ pipe_reference_init(&ws->reference, 1); @@ -480,10 +456,12 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create) ws->base.query_value = amdgpu_query_value; ws->base.read_registers = amdgpu_read_registers; - amdgpu_bomgr_init_functions(ws); + amdgpu_bo_init_functions(ws); amdgpu_cs_init_functions(ws); amdgpu_surface_init_functions(ws); + LIST_INITHEAD(&ws->global_bo_list); + pipe_mutex_init(ws->global_bo_list_lock); pipe_mutex_init(ws->bo_fence_lock); /* Create the screen at the end. The winsys must be initialized @@ -509,10 +487,7 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create) fail: pipe_mutex_unlock(dev_tab_mutex); - if (ws->cman) - ws->cman->destroy(ws->cman); - if (ws->kman) - ws->kman->destroy(ws->kman); + pb_cache_deinit(&ws->bo_cache); FREE(ws); return NULL; } diff --git a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index 4d07644c9..91b9be4bb 100644 --- a/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -32,6 +32,7 @@ #ifndef AMDGPU_WINSYS_H #define AMDGPU_WINSYS_H +#include "pipebuffer/pb_cache.h" #include "gallium/drivers/radeon/radeon_winsys.h" #include "addrlib/addrinterface.h" #include "os/os_thread.h" @@ -42,6 +43,7 @@ struct amdgpu_cs; struct amdgpu_winsys { struct radeon_winsys base; struct pipe_reference reference; + struct pb_cache bo_cache; amdgpu_device_handle dev; @@ -57,13 +59,15 @@ struct amdgpu_winsys { struct radeon_info info; - struct pb_manager *kman; - struct pb_manager *cman; - struct amdgpu_gpu_info amdinfo; ADDR_HANDLE addrlib; uint32_t rev_id; unsigned family; + + /* List of all allocated buffers */ + pipe_mutex global_bo_list_lock; + struct list_head global_bo_list; + unsigned num_buffers; }; static inline struct amdgpu_winsys * |