summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2024-11-18 00:22:25 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2024-11-18 00:22:25 +0000
commit41ee54c1c8089202c2f892c12832a0a6071f18cd (patch)
treebde7160a5ee80e38e73f7d73c74d27be91264a6e
parent8fb78b37ed8978d4ab896bafabbb4c0b9892c747 (diff)
drm/amdkfd: Accounting pdd vram_usage for svm
From Philip Yang 4c332037fcbb9bb53c46ba4f156951429acc4d97 in linux-6.6.y/6.6.62 68d26c10ef503175df3142db6fcd75dd94860592 in mainline linux
-rw-r--r--sys/dev/pci/drm/amd/amdkfd/kfd_chardev.c6
-rw-r--r--sys/dev/pci/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--sys/dev/pci/drm/amd/amdkfd/kfd_process.c4
-rw-r--r--sys/dev/pci/drm/amd/amdkfd/kfd_svm.c26
4 files changed, 32 insertions, 6 deletions
diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_chardev.c b/sys/dev/pci/drm/amd/amdkfd/kfd_chardev.c
index 0a103352163..2b64809adbb 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_chardev.c
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_chardev.c
@@ -1164,7 +1164,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
size >>= 1;
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+ atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
}
mutex_unlock(&p->mutex);
@@ -1235,7 +1235,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+ atomic64_sub(size, &pdd->vram_usage);
err_unlock:
err_pdd:
@@ -2352,7 +2352,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
bo_bucket->restored_offset = offset;
/* Update the VRAM usage count */
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+ atomic64_add(bo_bucket->size, &pdd->vram_usage);
}
return 0;
}
diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_priv.h b/sys/dev/pci/drm/amd/amdkfd/kfd_priv.h
index 67204c3dfbb..27c9d5c4376 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_priv.h
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_priv.h
@@ -765,7 +765,7 @@ struct kfd_process_device {
enum kfd_pdd_bound bound;
/* VRAM usage */
- uint64_t vram_usage;
+ atomic64_t vram_usage;
struct attribute attr_vram;
char vram_filename[MAX_SYSFS_FILENAME_LEN];
diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_process.c b/sys/dev/pci/drm/amd/amdkfd/kfd_process.c
index 43f520b3796..6c90231e0ae 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_process.c
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_process.c
@@ -306,7 +306,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
} else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
- return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_sdma);
@@ -1589,7 +1589,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
- pdd->vram_usage = 0;
+ atomic64_set(&pdd->vram_usage, 0);
pdd->sdma_past_activity_counter = 0;
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c b/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c
index ce76d455499..6b7c6f45a80 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_svm.c
@@ -391,6 +391,27 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
+
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ struct kfd_process_device *pdd;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+
+ mm = svm_bo->eviction_fence->mm;
+ /*
+ * The forked child process takes svm_bo device pages ref, svm_bo could be
+ * released after parent process is gone.
+ */
+ p = kfd_lookup_process_by_mm(mm);
+ if (p) {
+ pdd = kfd_get_process_device_data(svm_bo->node, p);
+ if (pdd)
+ atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+ kfd_unref_process(p);
+ }
+ mmput(mm);
+ }
+
if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
/* We're not in the eviction worker. Signal the fence. */
dma_fence_signal(&svm_bo->eviction_fence->base);
@@ -518,6 +539,7 @@ int
svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear)
{
+ struct kfd_process_device *pdd;
struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo;
@@ -609,6 +631,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
list_add(&prange->svm_bo_list, &svm_bo->range_list);
spin_unlock(&svm_bo->list_lock);
+ pdd = svm_range_get_pdd_by_node(prange, node);
+ if (pdd)
+ atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
return 0;
reserve_bo_failed: