summaryrefslogtreecommitdiff
path: root/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2024-01-16 23:38:15 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2024-01-16 23:38:15 +0000
commit05d4111894ef4fd94ed8efa99fca0c508e89db16 (patch)
treee57cb1ce48f5e92fee1882516089e05f72d15426 /sys/dev/pci/drm/amd/amdkfd/kfd_svm.c
parent77a5aef715cfd74dd54c674bc228dfc80962248a (diff)
update drm to linux 6.6.12
Thanks to the OpenBSD Foundation for sponsoring this work.
Diffstat (limited to 'sys/dev/pci/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r--sys/dev/pci/drm/amd/amdkfd/kfd_svm.c532
1 files changed, 320 insertions, 212 deletions
diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c b/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c
index 208812512d8..63ce30ea689 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c
@@ -23,10 +23,14 @@
#include <linux/types.h>
#include <linux/sched/task.h>
+#include <linux/dynamic_debug.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
+
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
-#include "amdgpu_mn.h"
+#include "amdgpu_hmm.h"
#include "amdgpu.h"
#include "amdgpu_xgmi.h"
#include "kfd_priv.h"
@@ -45,6 +49,13 @@
* page table is updated.
*/
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
+#if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)
+#define dynamic_svm_range_dump(svms) \
+ _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
+#else
+#define dynamic_svm_range_dump(svms) \
+ do { if (0) svm_range_debug_dump(svms); } while (0)
+#endif
/* Giant svm range split into smaller ranges based on this, it is decided using
* minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to
@@ -169,12 +180,11 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
page = hmm_pfn_to_page(hmm_pfns[i]);
if (is_zone_device_page(page)) {
- struct amdgpu_device *bo_adev =
- amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
bo_adev->vm_manager.vram_base_offset -
- bo_adev->kfd.dev->pgmap.range.start;
+ bo_adev->kfd.pgmap.range.start;
addr[i] |= SVM_RANGE_VRAM_DOMAIN;
pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]);
continue;
@@ -239,7 +249,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
}
}
-void svm_range_free_dma_mappings(struct svm_range *prange)
+void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
{
struct kfd_process_device *pdd;
dma_addr_t *dma_addr;
@@ -259,14 +269,15 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
pr_debug("failed to find device idx %d\n", gpuidx);
continue;
}
- dev = &pdd->dev->pdev->dev;
- svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+ dev = &pdd->dev->adev->pdev->dev;
+ if (unmap_dma)
+ svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
kvfree(dma_addr);
prange->dma_addr[gpuidx] = NULL;
}
}
-static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
+static void svm_range_free(struct svm_range *prange, bool do_unmap)
{
uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
@@ -275,12 +286,12 @@ static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
prange->start, prange->last);
svm_range_vram_node_free(prange);
- svm_range_free_dma_mappings(prange);
+ svm_range_free_dma_mappings(prange, do_unmap);
- if (update_mem_usage && !p->xnack_enabled) {
+ if (do_unmap && !p->xnack_enabled) {
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
}
mutex_destroy(&prange->lock);
mutex_destroy(&prange->migrate_mutex);
@@ -313,7 +324,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
p = container_of(svms, struct kfd_process, svms);
if (!p->xnack_enabled && update_mem_usage &&
amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
pr_info("SVM mapping failed, exceeds resident system memory limit\n");
kfree(prange);
return NULL;
@@ -423,10 +434,8 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
}
static bool
-svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
+svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange)
{
- struct amdgpu_device *bo_adev;
-
mutex_lock(&prange->lock);
if (!prange->svm_bo) {
mutex_unlock(&prange->lock);
@@ -439,12 +448,11 @@ svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
}
if (svm_bo_ref_unless_zero(prange->svm_bo)) {
/*
- * Migrate from GPU to GPU, remove range from source bo_adev
- * svm_bo range list, and return false to allocate svm_bo from
- * destination adev.
+ * Migrate from GPU to GPU, remove range from source svm_bo->node
+ * range list, and return false to allocate svm_bo from destination
+ * node.
*/
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
- if (bo_adev != adev) {
+ if (prange->svm_bo->node != node) {
mutex_unlock(&prange->lock);
spin_lock(&prange->svm_bo->list_lock);
@@ -512,7 +520,7 @@ static struct svm_range_bo *svm_range_bo_new(void)
}
int
-svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
+svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear)
{
struct amdgpu_bo_param bp;
@@ -527,7 +535,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
prange->start, prange->last);
- if (svm_range_validate_svm_bo(adev, prange))
+ if (svm_range_validate_svm_bo(node, prange))
return 0;
svm_bo = svm_range_bo_new();
@@ -541,6 +549,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
kfree(svm_bo);
return -ESRCH;
}
+ svm_bo->node = node;
svm_bo->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
mm,
@@ -557,13 +566,20 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
bp.type = ttm_bo_type_device;
bp.resv = NULL;
+ if (node->xcp)
+ bp.xcp_id_plus1 = node->xcp->id + 1;
- r = amdgpu_bo_create_user(adev, &bp, &ubo);
+ r = amdgpu_bo_create_user(node->adev, &bp, &ubo);
if (r) {
pr_debug("failed %d to create bo\n", r);
goto create_bo_failed;
}
bo = &ubo->bo;
+
+ pr_debug("alloc bo at offset 0x%lx size 0x%lx on partition %d\n",
+ bo->tbo.resource->start << PAGE_SHIFT, bp.size,
+ bp.xcp_id_plus1 - 1);
+
r = amdgpu_bo_reserve(bo, true);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
@@ -623,45 +639,30 @@ void svm_range_vram_node_free(struct svm_range *prange)
mutex_unlock(&prange->lock);
}
-struct amdgpu_device *
-svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
+struct kfd_node *
+svm_range_get_node_by_id(struct svm_range *prange, uint32_t gpu_id)
{
- struct kfd_process_device *pdd;
struct kfd_process *p;
- int32_t gpu_idx;
+ struct kfd_process_device *pdd;
p = container_of(prange->svms, struct kfd_process, svms);
-
- gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id);
- if (gpu_idx < 0) {
- pr_debug("failed to get device by id 0x%x\n", gpu_id);
- return NULL;
- }
- pdd = kfd_process_device_from_gpuidx(p, gpu_idx);
+ pdd = kfd_process_device_data_by_id(p, gpu_id);
if (!pdd) {
- pr_debug("failed to get device by idx 0x%x\n", gpu_idx);
+ pr_debug("failed to get kfd process device by id 0x%x\n", gpu_id);
return NULL;
}
- return pdd->dev->adev;
+ return pdd->dev;
}
struct kfd_process_device *
-svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)
+svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node)
{
struct kfd_process *p;
- int32_t gpu_idx, gpuid;
- int r;
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx);
- if (r) {
- pr_debug("failed to get device id by adev %p\n", adev);
- return NULL;
- }
-
- return kfd_process_device_from_gpuidx(p, gpu_idx);
+ return kfd_get_process_device_data(node, p);
}
static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
@@ -741,7 +742,9 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
case KFD_IOCTL_SVM_ATTR_ACCESS:
case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
- *update_mapping = true;
+ if (!p->xnack_enabled)
+ *update_mapping = true;
+
gpuidx = kfd_process_gpuidx_from_gpuid(p,
attrs[i].value);
if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
@@ -864,6 +867,37 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
}
}
+static void *
+svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
+ uint64_t offset)
+{
+ unsigned char *dst;
+
+ dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
+ if (!dst)
+ return NULL;
+ memcpy(dst, (unsigned char *)psrc + offset, num_elements * size);
+
+ return (void *)dst;
+}
+
+static int
+svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
+{
+ int i;
+
+ for (i = 0; i < MAX_GPU_INSTANCE; i++) {
+ if (!src->dma_addr[i])
+ continue;
+ dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
+ sizeof(*src->dma_addr[i]), src->npages, 0);
+ if (!dst->dma_addr[i])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int
svm_range_split_array(void *ppnew, void *ppold, size_t size,
uint64_t old_start, uint64_t old_n,
@@ -878,22 +912,16 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
if (!pold)
return 0;
- new = kvmalloc_array(new_n, size, GFP_KERNEL);
+ d = (new_start - old_start) * size;
+ new = svm_range_copy_array(pold, size, new_n, d);
if (!new)
return -ENOMEM;
-
- d = (new_start - old_start) * size;
- memcpy(new, pold + d, new_n * size);
-
- old = kvmalloc_array(old_n, size, GFP_KERNEL);
+ d = (new_start == old_start) ? new_n * size : 0;
+ old = svm_range_copy_array(pold, size, old_n, d);
if (!old) {
kvfree(new);
return -ENOMEM;
}
-
- d = (new_start == old_start) ? new_n * size : 0;
- memcpy(old, pold + d, old_n * size);
-
kvfree(pold);
*(void **)ppold = old;
*(void **)ppnew = new;
@@ -1152,31 +1180,39 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
}
return 0;
}
+static bool
+svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
+{
+ return (node_a->adev == node_b->adev ||
+ amdgpu_xgmi_same_hive(node_a->adev, node_b->adev));
+}
static uint64_t
-svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
- int domain)
+svm_range_get_pte_flags(struct kfd_node *node,
+ struct svm_range *prange, int domain)
{
- struct amdgpu_device *bo_adev;
+ struct kfd_node *bo_node;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+ bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
+ unsigned int mtype_local;
if (domain == SVM_RANGE_VRAM_DOMAIN)
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ bo_node = prange->svm_bo->node;
- switch (KFD_GC_VERSION(adev->kfd.dev)) {
+ switch (node->adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
- if (bo_adev == adev) {
+ if (bo_node == node) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
} else {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ if (svm_nodes_in_same_hive(node, bo_node))
snoop = true;
}
} else {
@@ -1186,15 +1222,15 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
break;
case IP_VERSION(9, 4, 2):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
- if (bo_adev == adev) {
+ if (bo_node == node) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (node->adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ if (svm_nodes_in_same_hive(node, bo_node))
snoop = true;
}
} else {
@@ -1202,6 +1238,37 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
+ case IP_VERSION(9, 4, 3):
+ mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+ (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW);
+ snoop = true;
+ if (uncached) {
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
+ /* local HBM region close to partition */
+ if (bo_node->adev == node->adev &&
+ (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
+ mapping_flags |= mtype_local;
+ /* local HBM region far from partition or remote XGMI GPU */
+ else if (svm_nodes_in_same_hive(bo_node, node))
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ /* PCIe P2P */
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ /* system memory accessed by the APU */
+ } else if (node->adev->flags & AMD_IS_APU) {
+ /* On NUMA systems, locality is determined per-page
+ * in amdgpu_gmc_override_vm_pte_flags
+ */
+ if (num_possible_nodes() <= 1)
+ mapping_flags |= mtype_local;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ /* system memory accessed by the dGPU */
+ } else {
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ }
+ break;
default:
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
@@ -1218,7 +1285,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
- pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
+ pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
return pte_flags;
}
@@ -1325,7 +1392,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
last_start, prange->start + i, last_domain ? "GPU" : "CPU");
- pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
+ pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain);
if (readonly)
pte_flags &= ~AMDGPU_PTE_WRITEABLE;
@@ -1334,6 +1401,10 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags);
+ /* For dGPU mode, we use same vm_manager to allocate VRAM for
+ * different memory partition based on fpfn/lpfn, we should use
+ * same vm_manager.vram_base_offset regardless memory partition.
+ */
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
last_start, prange->start + i,
pte_flags,
@@ -1371,16 +1442,14 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
unsigned long *bitmap, bool wait, bool flush_tlb)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *bo_adev;
+ struct amdgpu_device *bo_adev = NULL;
struct kfd_process *p;
struct dma_fence *fence = NULL;
uint32_t gpuidx;
int r = 0;
if (prange->svm_bo && prange->ttm_res)
- bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
- else
- bo_adev = NULL;
+ bo_adev = prange->svm_bo->node->adev;
p = container_of(prange->svms, struct kfd_process, svms);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
@@ -1429,37 +1498,34 @@ struct svm_validate_context {
struct svm_range *prange;
bool intr;
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
- struct ttm_validate_buffer tv[MAX_GPU_INSTANCE];
- struct list_head validate_list;
- struct ww_acquire_ctx ticket;
+ struct drm_exec exec;
};
-static int svm_range_reserve_bos(struct svm_validate_context *ctx)
+static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
{
struct kfd_process_device *pdd;
struct amdgpu_vm *vm;
uint32_t gpuidx;
int r;
- INIT_LIST_HEAD(&ctx->validate_list);
- for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
- pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
- if (!pdd) {
- pr_debug("failed to find device idx %d\n", gpuidx);
- return -EINVAL;
- }
- vm = drm_priv_to_vm(pdd->drm_priv);
-
- ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
- ctx->tv[gpuidx].num_shared = 4;
- list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
- }
+ drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
+ drm_exec_until_all_locked(&ctx->exec) {
+ for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+ pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ r = -EINVAL;
+ goto unreserve_out;
+ }
+ vm = drm_priv_to_vm(pdd->drm_priv);
- r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
- ctx->intr, NULL);
- if (r) {
- pr_debug("failed %d to reserve bo\n", r);
- return r;
+ r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(r)) {
+ pr_debug("failed %d to reserve bo\n", r);
+ goto unreserve_out;
+ }
+ }
}
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
@@ -1482,13 +1548,13 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
return 0;
unreserve_out:
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
return r;
}
static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
{
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
}
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
@@ -1496,6 +1562,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
struct kfd_process_device *pdd;
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd)
+ return NULL;
return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
}
@@ -1528,48 +1596,54 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
struct svm_range *prange, int32_t gpuidx,
bool intr, bool wait, bool flush_tlb)
{
- struct svm_validate_context ctx;
+ struct svm_validate_context *ctx;
unsigned long start, end, addr;
struct kfd_process *p;
void *owner;
int32_t idx;
int r = 0;
- ctx.process = container_of(prange->svms, struct kfd_process, svms);
- ctx.prange = prange;
- ctx.intr = intr;
+ ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->process = container_of(prange->svms, struct kfd_process, svms);
+ ctx->prange = prange;
+ ctx->intr = intr;
if (gpuidx < MAX_GPU_INSTANCE) {
- bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
- bitmap_set(ctx.bitmap, gpuidx, 1);
- } else if (ctx.process->xnack_enabled) {
- bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+ bitmap_zero(ctx->bitmap, MAX_GPU_INSTANCE);
+ bitmap_set(ctx->bitmap, gpuidx, 1);
+ } else if (ctx->process->xnack_enabled) {
+ bitmap_copy(ctx->bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
/* If prefetch range to GPU, or GPU retry fault migrate range to
* GPU, which has ACCESS attribute to the range, create mapping
* on that GPU.
*/
if (prange->actual_loc) {
- gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process,
+ gpuidx = kfd_process_gpuidx_from_gpuid(ctx->process,
prange->actual_loc);
if (gpuidx < 0) {
WARN_ONCE(1, "failed get device by id 0x%x\n",
prange->actual_loc);
- return -EINVAL;
+ r = -EINVAL;
+ goto free_ctx;
}
if (test_bit(gpuidx, prange->bitmap_access))
- bitmap_set(ctx.bitmap, gpuidx, 1);
+ bitmap_set(ctx->bitmap, gpuidx, 1);
}
} else {
- bitmap_or(ctx.bitmap, prange->bitmap_access,
+ bitmap_or(ctx->bitmap, prange->bitmap_access,
prange->bitmap_aip, MAX_GPU_INSTANCE);
}
- if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
- if (!prange->mapped_to_gpu)
- return 0;
-
- bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+ bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ if (!prange->mapped_to_gpu ||
+ bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+ r = 0;
+ goto free_ctx;
+ }
}
if (prange->actual_loc && !prange->ttm_res) {
@@ -1577,15 +1651,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
* svm_migrate_ram_to_vram after allocating a BO.
*/
WARN_ONCE(1, "VRAM BO missing during validation\n");
- return -EINVAL;
+ r = -EINVAL;
+ goto free_ctx;
}
- svm_range_reserve_bos(&ctx);
+ svm_range_reserve_bos(ctx, intr);
p = container_of(prange->svms, struct kfd_process, svms);
- owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
+ owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
MAX_GPU_INSTANCE));
- for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
+ for_each_set_bit(idx, ctx->bitmap, MAX_GPU_INSTANCE) {
if (kfd_svm_page_owner(p, idx) != owner) {
owner = NULL;
break;
@@ -1594,73 +1669,72 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
- for (addr = start; addr < end && !r; ) {
+ for (addr = start; !r && addr < end; ) {
struct hmm_range *hmm_range;
struct vm_area_struct *vma;
- unsigned long next;
+ unsigned long next = 0;
unsigned long offset;
unsigned long npages;
bool readonly;
- vma = find_vma(mm, addr);
- if (!vma || addr < vma->vm_start) {
+ vma = vma_lookup(mm, addr);
+ if (vma) {
+ readonly = !(vma->vm_flags & VM_WRITE);
+
+ next = min(vma->vm_end, end);
+ npages = (next - addr) >> PAGE_SHIFT;
+ WRITE_ONCE(p->svms.faulting_task, current);
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
+ readonly, owner, NULL,
+ &hmm_range);
+ WRITE_ONCE(p->svms.faulting_task, NULL);
+ if (r) {
+ pr_debug("failed %d to get svm range pages\n", r);
+ if (r == -EBUSY)
+ r = -EAGAIN;
+ }
+ } else {
r = -EFAULT;
- goto unreserve_out;
- }
- readonly = !(vma->vm_flags & VM_WRITE);
-
- next = min(vma->vm_end, end);
- npages = (next - addr) >> PAGE_SHIFT;
- WRITE_ONCE(p->svms.faulting_task, current);
- r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
- addr, npages, &hmm_range,
- readonly, true, owner);
- WRITE_ONCE(p->svms.faulting_task, NULL);
- if (r) {
- pr_debug("failed %d to get svm range pages\n", r);
- goto unreserve_out;
}
- offset = (addr - start) >> PAGE_SHIFT;
- r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
- hmm_range->hmm_pfns);
- if (r) {
- pr_debug("failed %d to dma map range\n", r);
- goto unreserve_out;
+ if (!r) {
+ offset = (addr - start) >> PAGE_SHIFT;
+ r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
+ hmm_range->hmm_pfns);
+ if (r)
+ pr_debug("failed %d to dma map range\n", r);
}
svm_range_lock(prange);
- if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+ if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) {
pr_debug("hmm update the range, need validate again\n");
r = -EAGAIN;
- goto unlock_out;
}
- if (!list_empty(&prange->child_list)) {
+
+ if (!r && !list_empty(&prange->child_list)) {
pr_debug("range split by unmap in parallel, validate again\n");
r = -EAGAIN;
- goto unlock_out;
}
- r = svm_range_map_to_gpus(prange, offset, npages, readonly,
- ctx.bitmap, wait, flush_tlb);
+ if (!r)
+ r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+ ctx->bitmap, wait, flush_tlb);
+
+ if (!r && next == end)
+ prange->mapped_to_gpu = true;
-unlock_out:
svm_range_unlock(prange);
addr = next;
}
- if (addr == end) {
- prange->validated_once = true;
- prange->mapped_to_gpu = true;
- }
-
-unreserve_out:
- svm_range_unreserve_bos(&ctx);
-
+ svm_range_unreserve_bos(ctx);
if (!r)
prange->validate_timestamp = ktime_get_boottime();
+free_ctx:
+ kfree(ctx);
+
return r;
}
@@ -1789,6 +1863,7 @@ out_reschedule:
* @mm: current process mm_struct
* @start: starting process queue number
* @last: last process queue number
+ * @event: mmu notifier event when range is evicted or migrated
*
* Stop all queues of the process to ensure GPU doesn't access the memory, then
* return to let CPU evict the buffer and proceed CPU pagetable update.
@@ -1887,7 +1962,10 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new = svm_range_new(old->svms, old->start, old->last, false);
if (!new)
return NULL;
-
+ if (svm_range_copy_dma_addrs(new, old)) {
+ svm_range_free(new, false);
+ return NULL;
+ }
if (old->svm_bo) {
new->ttm_res = old->ttm_res;
new->offset = old->offset;
@@ -1912,14 +1990,23 @@ void svm_range_set_max_pages(struct amdgpu_device *adev)
{
uint64_t max_pages;
uint64_t pages, _pages;
+ uint64_t min_pages = 0;
+ int i, id;
+
+ for (i = 0; i < adev->kfd.dev->num_nodes; i++) {
+ if (adev->kfd.dev->nodes[i]->xcp)
+ id = adev->kfd.dev->nodes[i]->xcp->id;
+ else
+ id = -1;
+ pages = KFD_XCP_MEMORY_SIZE(adev, id) >> 17;
+ pages = clamp(pages, 1ULL << 9, 1ULL << 18);
+ pages = rounddown_pow_of_two(pages);
+ min_pages = min_not_zero(min_pages, pages);
+ }
- /* 1/32 VRAM size in pages */
- pages = adev->gmc.real_vram_size >> 17;
- pages = clamp(pages, 1ULL << 9, 1ULL << 18);
- pages = rounddown_pow_of_two(pages);
do {
max_pages = READ_ONCE(max_svm_range_pages);
- _pages = min_not_zero(max_pages, pages);
+ _pages = min_not_zero(max_pages, min_pages);
} while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages);
}
@@ -2010,7 +2097,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
next = interval_tree_iter_next(node, start, last);
next_start = min(node->last, last) + 1;
- if (svm_range_is_same_attrs(p, prange, nattr, attrs)) {
+ if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
+ prange->mapped_to_gpu) {
/* nothing to do */
} else if (node->start < start || node->last > last) {
/* node intersects the update range and its attributes
@@ -2178,7 +2266,15 @@ restart:
pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
- &pdd->dev->adev->irq.ih1);
+ pdd->dev->adev->irq.retry_cam_enabled ?
+ &pdd->dev->adev->irq.ih :
+ &pdd->dev->adev->irq.ih1);
+
+ if (pdd->dev->adev->irq.retry_cam_enabled)
+ amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
+ &pdd->dev->adev->irq.ih_soft);
+
+
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}
if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
@@ -2505,29 +2601,31 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
*/
static int32_t
svm_range_best_restore_location(struct svm_range *prange,
- struct amdgpu_device *adev,
+ struct kfd_node *node,
int32_t *gpuidx)
{
- struct amdgpu_device *bo_adev, *preferred_adev;
+ struct kfd_node *bo_node, *preferred_node;
struct kfd_process *p;
uint32_t gpuid;
int r;
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx);
+ r = kfd_process_gpuid_from_node(p, node, &gpuid, gpuidx);
if (r < 0) {
pr_debug("failed to get gpuid from kgd\n");
return -1;
}
+ if (node->adev->gmc.is_app_apu)
+ return 0;
+
if (prange->preferred_loc == gpuid ||
prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) {
return prange->preferred_loc;
} else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
- preferred_adev = svm_range_get_adev_by_id(prange,
- prange->preferred_loc);
- if (amdgpu_xgmi_same_hive(adev, preferred_adev))
+ preferred_node = svm_range_get_node_by_id(prange, prange->preferred_loc);
+ if (preferred_node && svm_nodes_in_same_hive(node, preferred_node))
return prange->preferred_loc;
/* fall through */
}
@@ -2539,8 +2637,8 @@ svm_range_best_restore_location(struct svm_range *prange,
if (!prange->actual_loc)
return 0;
- bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
- if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ bo_node = svm_range_get_node_by_id(prange, prange->actual_loc);
+ if (bo_node && svm_nodes_in_same_hive(node, bo_node))
return prange->actual_loc;
else
return 0;
@@ -2558,16 +2656,13 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
struct interval_tree_node *node;
unsigned long start_limit, end_limit;
- vma = find_vma(p->mm, addr << PAGE_SHIFT);
- if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+ vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
+ if (!vma) {
pr_debug("VMA does not exist in address [0x%llx]\n", addr);
return -EFAULT;
}
- *is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
- vma->vm_end >= vma->vm_mm->start_brk) ||
- (vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack);
+ *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
(unsigned long)ALIGN_DOWN(addr, 2UL << 8));
@@ -2657,7 +2752,7 @@ svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last,
}
static struct
-svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+svm_range *svm_range_create_unregistered_range(struct kfd_node *node,
struct kfd_process *p,
struct mm_struct *mm,
int64_t addr)
@@ -2692,7 +2787,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
pr_debug("Failed to create prange in address [0x%llx]\n", addr);
return NULL;
}
- if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) {
+ if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
pr_debug("failed to get gpuid from kgd\n");
svm_range_free(prange, true);
return NULL;
@@ -2746,7 +2841,7 @@ static bool svm_range_skip_recover(struct svm_range *prange)
}
static void
-svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
+svm_range_count_fault(struct kfd_node *node, struct kfd_process *p,
int32_t gpuidx)
{
struct kfd_process_device *pdd;
@@ -2759,7 +2854,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
uint32_t gpuid;
int r;
- r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx);
+ r = kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx);
if (r < 0)
return;
}
@@ -2787,6 +2882,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+ uint32_t vmid, uint32_t node_id,
uint64_t addr, bool write_fault)
{
struct mm_struct *mm = NULL;
@@ -2794,6 +2890,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
struct svm_range *prange;
struct kfd_process *p;
ktime_t timestamp = ktime_get_boottime();
+ struct kfd_node *node;
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
@@ -2801,7 +2898,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
bool migration = false;
int r = 0;
- if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
+ if (!KFD_IS_SVM_API_SUPPORTED(adev)) {
pr_debug("device does not support SVM\n");
return -EFAULT;
}
@@ -2837,6 +2934,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out;
}
+ node = kfd_node_by_irq_ids(adev, node_id, vmid);
+ if (!node) {
+ pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
+ vmid);
+ r = -EFAULT;
+ goto out;
+ }
mmap_read_lock(mm);
retry_write_locked:
mutex_lock(&svms->lock);
@@ -2855,7 +2959,7 @@ retry_write_locked:
write_locked = true;
goto retry_write_locked;
}
- prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+ prange = svm_range_create_unregistered_range(node, p, mm, addr);
if (!prange) {
pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
svms, addr);
@@ -2870,7 +2974,7 @@ retry_write_locked:
mutex_lock(&prange->migrate_mutex);
if (svm_range_skip_recover(prange)) {
- amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
r = 0;
goto out_unlock_range;
}
@@ -2887,8 +2991,8 @@ retry_write_locked:
/* __do_munmap removed VMA, return success as we are handling stale
* retry fault.
*/
- vma = find_vma(mm, addr << PAGE_SHIFT);
- if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+ vma = vma_lookup(mm, addr << PAGE_SHIFT);
+ if (!vma) {
pr_debug("address 0x%llx VMA is removed\n", addr);
r = 0;
goto out_unlock_range;
@@ -2901,7 +3005,7 @@ retry_write_locked:
goto out_unlock_range;
}
- best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
+ best_loc = svm_range_best_restore_location(prange, node, &gpuidx);
if (best_loc == -1) {
pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
svms, prange->start, prange->last);
@@ -2913,7 +3017,7 @@ retry_write_locked:
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
- kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr,
+ kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
write_fault, timestamp);
if (prange->actual_loc != best_loc) {
@@ -2951,7 +3055,7 @@ retry_write_locked:
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last);
- kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr,
+ kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
migration);
out_unlock_range:
@@ -2960,7 +3064,7 @@ out_unlock_svms:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
- svm_range_count_fault(adev, p, gpuidx);
+ svm_range_count_fault(node, p, gpuidx);
mmput(mm);
out:
@@ -2968,7 +3072,7 @@ out:
if (r == -EAGAIN) {
pr_debug("recover vm fault later\n");
- amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
r = 0;
}
return r;
@@ -2992,10 +3096,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
if (xnack_enabled) {
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
} else {
r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
if (r)
goto out_unlock;
reserved_size += size;
@@ -3005,10 +3109,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
size = (prange->last - prange->start + 1) << PAGE_SHIFT;
if (xnack_enabled) {
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
} else {
r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
if (r)
goto out_unlock;
reserved_size += size;
@@ -3021,7 +3125,7 @@ out_unlock:
if (r)
amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
else
/* Change xnack mode must be inside svms lock, to avoid race with
* svm_range_deferred_list_work unreserve memory in parallel.
@@ -3079,7 +3183,7 @@ int svm_range_list_init(struct kfd_process *p)
spin_lock_init(&svms->deferred_list_lock);
for (i = 0; i < p->n_pdds; i++)
- if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev))
+ if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
return 0;
@@ -3168,9 +3272,8 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
start <<= PAGE_SHIFT;
end = start + (size << PAGE_SHIFT);
do {
- vma = find_vma(p->mm, start);
- if (!vma || start < vma->vm_start ||
- (vma->vm_flags & device_vma))
+ vma = vma_lookup(p->mm, start);
+ if (!vma || (vma->vm_flags & device_vma))
return -EFAULT;
start = min(end, vma->vm_end);
} while (start < end);
@@ -3211,7 +3314,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
uint32_t best_loc = prange->prefetch_loc;
struct kfd_process_device *pdd;
- struct amdgpu_device *bo_adev;
+ struct kfd_node *bo_node;
struct kfd_process *p;
uint32_t gpuidx;
@@ -3220,9 +3323,14 @@ svm_range_best_prefetch_location(struct svm_range *prange)
if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
goto out;
- bo_adev = svm_range_get_adev_by_id(prange, best_loc);
- if (!bo_adev) {
- WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc);
+ bo_node = svm_range_get_node_by_id(prange, best_loc);
+ if (!bo_node) {
+ WARN_ONCE(1, "failed to get valid kfd node at id%x\n", best_loc);
+ best_loc = 0;
+ goto out;
+ }
+
+ if (bo_node->adev->gmc.is_app_apu) {
best_loc = 0;
goto out;
}
@@ -3240,10 +3348,10 @@ svm_range_best_prefetch_location(struct svm_range *prange)
continue;
}
- if (pdd->dev->adev == bo_adev)
+ if (pdd->dev->adev == bo_node->adev)
continue;
- if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
+ if (!svm_nodes_in_same_hive(pdd->dev, bo_node)) {
best_loc = 0;
break;
}
@@ -3400,7 +3508,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
struct svm_range *next;
bool update_mapping = false;
bool flush_tlb;
- int r = 0;
+ int r, ret = 0;
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
p->pasid, &p->svms, start, start + size - 1, size);
@@ -3488,10 +3596,10 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
out_unlock_range:
mutex_unlock(&prange->migrate_mutex);
if (r)
- break;
+ ret = r;
}
- svm_range_debug_dump(svms);
+ dynamic_svm_range_dump(svms);
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
@@ -3501,7 +3609,7 @@ out:
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
&p->svms, start, start + size - 1, r);
- return r;
+ return ret ? ret : r;
}
static int