src - OpenBSD base system

diff options


context:
space:
mode:

author	Jonathan Gray <jsg@cvs.openbsd.org>	2023-03-31 02:12:22 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2023-03-31 02:12:22 +0000
commit	c48220bf40771bf04f3864ab240f977b7f7c464c (patch)
tree	8d1d9b7a049488e07aa6d7fdcac873d6a2d9642a /sys
parent	92c2495a84e22a41c261949753db62bfa3b0ed20 (diff)

drm/amdkfd: Fix the warning of array-index-out-of-bounds

From Ma Jun 0ac954ec7f6c6dd6b7e94340599ba4b788d056d5 in linux-6.1.y/6.1.22 c0cc999f3c32e65a7c88fb323893ddf897b24488 in mainline linux

Diffstat (limited to 'sys')

-rw-r--r--

sys/dev/pci/drm/amd/amdkfd/kfd_crat.c

312

-rw-r--r--

sys/dev/pci/drm/amd/amdkfd/kfd_crat.h

-rw-r--r--

sys/dev/pci/drm/amd/amdkfd/kfd_topology.c

245

-rw-r--r--

sys/dev/pci/drm/amd/amdkfd/kfd_topology.h

4 files changed, 282 insertions, 292 deletions

diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_crat.c b/sys/dev/pci/drm/amd/amdkfd/kfd_crat.c
index 5e0bad7e0b7..97700cb1bbe 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_crat.c
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_crat.c

@@ -50,16 +50,6 @@ static inline unsigned int get_and_inc_gpu_processor_id(

return current_id;

}

-/* Static table to describe GPU Cache information */

-struct kfd_gpu_cache_info {

- uint32_t cache_size;

- uint32_t cache_level;

- uint32_t flags;

- /* Indicates how many Compute Units share this cache

- * within a SA. Value = 1 indicates the cache is not shared

- */

- uint32_t num_cu_shared;

-};

static struct kfd_gpu_cache_info kaveri_cache_info[] = {

{

@@ -1119,9 +1109,13 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,

props->cachelines_per_tag = cache->lines_per_tag;

props->cache_assoc = cache->associativity;

props->cache_latency = cache->cache_latency;

memcpy(props->sibling_map, cache->sibling_map,

sizeof(props->sibling_map));

+ /* set the sibling_map_size as 32 for CRAT from ACPI */

+ props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;

if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)

props->cache_type |= HSA_CACHE_TYPE_DATA;

if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)

@@ -1339,125 +1333,6 @@ err:

return ret;

}

-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */

-static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,

- struct kfd_gpu_cache_info *pcache_info,

- struct kfd_cu_info *cu_info,

- int mem_available,

- int cu_bitmask,

- int cache_type, unsigned int cu_processor_id,

- int cu_block)

- unsigned int cu_sibling_map_mask;

- int first_active_cu;

- /* First check if enough memory is available */

- if (sizeof(struct crat_subtype_cache) > mem_available)

- return -ENOMEM;

- cu_sibling_map_mask = cu_bitmask;

- cu_sibling_map_mask >>= cu_block;

- cu_sibling_map_mask &=

- ((1 << pcache_info[cache_type].num_cu_shared) - 1);

- first_active_cu = ffs(cu_sibling_map_mask);

- /* CU could be inactive. In case of shared cache find the first active

- * CU. and incase of non-shared cache check if the CU is inactive. If

- * inactive active skip it

- */

- if (first_active_cu) {

- memset(pcache, 0, sizeof(struct crat_subtype_cache));

- pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;

- pcache->length = sizeof(struct crat_subtype_cache);

- pcache->flags = pcache_info[cache_type].flags;

- pcache->processor_id_low = cu_processor_id

- + (first_active_cu - 1);

- pcache->cache_level = pcache_info[cache_type].cache_level;

- pcache->cache_size = pcache_info[cache_type].cache_size;

- /* Sibling map is w.r.t processor_id_low, so shift out

- * inactive CU

- */

- cu_sibling_map_mask =

- cu_sibling_map_mask >> (first_active_cu - 1);

- pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);

- pcache->sibling_map[1] =

- (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);

- pcache->sibling_map[2] =

- (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);

- pcache->sibling_map[3] =

- (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);

- return 0;

- }

- return 1;

-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */

-static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,

- struct kfd_gpu_cache_info *pcache_info,

- struct kfd_cu_info *cu_info,

- int mem_available,

- int cache_type, unsigned int cu_processor_id)

- unsigned int cu_sibling_map_mask;

- int first_active_cu;

- int i, j, k;

- /* First check if enough memory is available */

- if (sizeof(struct crat_subtype_cache) > mem_available)

- return -ENOMEM;

- cu_sibling_map_mask = cu_info->cu_bitmap[0][0];

- cu_sibling_map_mask &=

- ((1 << pcache_info[cache_type].num_cu_shared) - 1);

- first_active_cu = ffs(cu_sibling_map_mask);

- /* CU could be inactive. In case of shared cache find the first active

- * CU. and incase of non-shared cache check if the CU is inactive. If

- * inactive active skip it

- */

- if (first_active_cu) {

- memset(pcache, 0, sizeof(struct crat_subtype_cache));

- pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;

- pcache->length = sizeof(struct crat_subtype_cache);

- pcache->flags = pcache_info[cache_type].flags;

- pcache->processor_id_low = cu_processor_id

- + (first_active_cu - 1);

- pcache->cache_level = pcache_info[cache_type].cache_level;

- pcache->cache_size = pcache_info[cache_type].cache_size;

- /* Sibling map is w.r.t processor_id_low, so shift out

- * inactive CU

- */

- cu_sibling_map_mask =

- cu_sibling_map_mask >> (first_active_cu - 1);

- k = 0;

- for (i = 0; i < cu_info->num_shader_engines; i++) {

- for (j = 0; j < cu_info->num_shader_arrays_per_engine;

- j++) {

- pcache->sibling_map[k] =

- (uint8_t)(cu_sibling_map_mask & 0xFF);

- pcache->sibling_map[k+1] =

- (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);

- pcache->sibling_map[k+2] =

- (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);

- pcache->sibling_map[k+3] =

- (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);

- k += 4;

- cu_sibling_map_mask =

- cu_info->cu_bitmap[i % 4][j + i / 4];

- cu_sibling_map_mask &= (

- (1 << pcache_info[cache_type].num_cu_shared)

- - 1);

- }

- return 0;

- }

- return 1;

-#define KFD_MAX_CACHE_TYPES 6

static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,

struct kfd_gpu_cache_info *pcache_info)

@@ -1531,231 +1406,133 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,

return i;

}

-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info

- * tables

- *

- * @kdev - [IN] GPU device

- * @gpu_processor_id - [IN] GPU processor ID to which these caches

- * associate

- * @available_size - [IN] Amount of memory available in pcache

- * @cu_info - [IN] Compute Unit info obtained from KGD

- * @pcache - [OUT] memory into which cache data is to be filled in.

- * @size_filled - [OUT] amount of data used up in pcache.

- * @num_of_entries - [OUT] number of caches added

- */

-static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,

- int gpu_processor_id,

- int available_size,

- struct kfd_cu_info *cu_info,

- struct crat_subtype_cache *pcache,

- int *size_filled,

- int *num_of_entries)

+int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info)

{

- struct kfd_gpu_cache_info *pcache_info;

- struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];

int num_of_cache_types = 0;

- int i, j, k;

- int ct = 0;

- int mem_available = available_size;

- unsigned int cu_processor_id;

- int ret;

- unsigned int num_cu_shared;

switch (kdev->adev->asic_type) {

case CHIP_KAVERI:

- pcache_info = kaveri_cache_info;

+ *pcache_info = kaveri_cache_info;

num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);

break;

case CHIP_HAWAII:

- pcache_info = hawaii_cache_info;

+ *pcache_info = hawaii_cache_info;

num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);

break;

case CHIP_CARRIZO:

- pcache_info = carrizo_cache_info;

+ *pcache_info = carrizo_cache_info;

num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);

break;

case CHIP_TONGA:

- pcache_info = tonga_cache_info;

+ *pcache_info = tonga_cache_info;

num_of_cache_types = ARRAY_SIZE(tonga_cache_info);

break;

case CHIP_FIJI:

- pcache_info = fiji_cache_info;

+ *pcache_info = fiji_cache_info;

num_of_cache_types = ARRAY_SIZE(fiji_cache_info);

break;

case CHIP_POLARIS10:

- pcache_info = polaris10_cache_info;

+ *pcache_info = polaris10_cache_info;

num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);

break;

case CHIP_POLARIS11:

- pcache_info = polaris11_cache_info;

+ *pcache_info = polaris11_cache_info;

num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);

break;

case CHIP_POLARIS12:

- pcache_info = polaris12_cache_info;

+ *pcache_info = polaris12_cache_info;

num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);

break;

case CHIP_VEGAM:

- pcache_info = vegam_cache_info;

+ *pcache_info = vegam_cache_info;

num_of_cache_types = ARRAY_SIZE(vegam_cache_info);

break;

default:

switch (KFD_GC_VERSION(kdev)) {

case IP_VERSION(9, 0, 1):

- pcache_info = vega10_cache_info;

+ *pcache_info = vega10_cache_info;

num_of_cache_types = ARRAY_SIZE(vega10_cache_info);

break;

case IP_VERSION(9, 2, 1):

- pcache_info = vega12_cache_info;

+ *pcache_info = vega12_cache_info;

num_of_cache_types = ARRAY_SIZE(vega12_cache_info);

break;

case IP_VERSION(9, 4, 0):

case IP_VERSION(9, 4, 1):

- pcache_info = vega20_cache_info;

+ *pcache_info = vega20_cache_info;

num_of_cache_types = ARRAY_SIZE(vega20_cache_info);

break;

case IP_VERSION(9, 4, 2):

- pcache_info = aldebaran_cache_info;

+ *pcache_info = aldebaran_cache_info;

num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);

break;

case IP_VERSION(9, 1, 0):

case IP_VERSION(9, 2, 2):

- pcache_info = raven_cache_info;

+ *pcache_info = raven_cache_info;

num_of_cache_types = ARRAY_SIZE(raven_cache_info);

break;

case IP_VERSION(9, 3, 0):

- pcache_info = renoir_cache_info;

+ *pcache_info = renoir_cache_info;

num_of_cache_types = ARRAY_SIZE(renoir_cache_info);

break;

case IP_VERSION(10, 1, 10):

case IP_VERSION(10, 1, 2):

case IP_VERSION(10, 1, 3):

case IP_VERSION(10, 1, 4):

- pcache_info = navi10_cache_info;

+ *pcache_info = navi10_cache_info;

num_of_cache_types = ARRAY_SIZE(navi10_cache_info);

break;

case IP_VERSION(10, 1, 1):

- pcache_info = navi14_cache_info;

+ *pcache_info = navi14_cache_info;

num_of_cache_types = ARRAY_SIZE(navi14_cache_info);

break;

case IP_VERSION(10, 3, 0):

- pcache_info = sienna_cichlid_cache_info;

+ *pcache_info = sienna_cichlid_cache_info;

num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);

break;

case IP_VERSION(10, 3, 2):

- pcache_info = navy_flounder_cache_info;

+ *pcache_info = navy_flounder_cache_info;

num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);

break;

case IP_VERSION(10, 3, 4):

- pcache_info = dimgrey_cavefish_cache_info;

+ *pcache_info = dimgrey_cavefish_cache_info;

num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);

break;

case IP_VERSION(10, 3, 1):

- pcache_info = vangogh_cache_info;

+ *pcache_info = vangogh_cache_info;

num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);

break;

case IP_VERSION(10, 3, 5):

- pcache_info = beige_goby_cache_info;

+ *pcache_info = beige_goby_cache_info;

num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);

break;

case IP_VERSION(10, 3, 3):

- pcache_info = yellow_carp_cache_info;

+ *pcache_info = yellow_carp_cache_info;

num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);

break;

case IP_VERSION(10, 3, 6):

- pcache_info = gc_10_3_6_cache_info;

+ *pcache_info = gc_10_3_6_cache_info;

num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);

break;

case IP_VERSION(10, 3, 7):

- pcache_info = gfx1037_cache_info;

+ *pcache_info = gfx1037_cache_info;

num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);

break;

case IP_VERSION(11, 0, 0):

case IP_VERSION(11, 0, 1):

case IP_VERSION(11, 0, 2):

case IP_VERSION(11, 0, 3):

- pcache_info = cache_info;

num_of_cache_types =

- kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);

+ kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);

break;

default:

- pcache_info = dummy_cache_info;

+ *pcache_info = dummy_cache_info;

num_of_cache_types = ARRAY_SIZE(dummy_cache_info);

pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");

break;

}

- *size_filled = 0;

- *num_of_entries = 0;

- /* For each type of cache listed in the kfd_gpu_cache_info table,

- * go through all available Compute Units.

- * The [i,j,k] loop will

- * if kfd_gpu_cache_info.num_cu_shared = 1

- * will parse through all available CU

- * If (kfd_gpu_cache_info.num_cu_shared != 1)

- * then it will consider only one CU from

- * the shared unit

- */

- for (ct = 0; ct < num_of_cache_types; ct++) {

- cu_processor_id = gpu_processor_id;

- if (pcache_info[ct].cache_level == 1) {

- for (i = 0; i < cu_info->num_shader_engines; i++) {

- for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {

- for (k = 0; k < cu_info->num_cu_per_sh;

- k += pcache_info[ct].num_cu_shared) {

- ret = fill_in_l1_pcache(pcache,

- pcache_info,

- cu_info,

- mem_available,

- cu_info->cu_bitmap[i % 4][j + i / 4],

- ct,

- cu_processor_id,

- k);

- if (ret < 0)

- break;

- if (!ret) {

- pcache++;

- (*num_of_entries)++;

- mem_available -= sizeof(*pcache);

- (*size_filled) += sizeof(*pcache);

- }

- /* Move to next CU block */

- num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=

- cu_info->num_cu_per_sh) ?

- pcache_info[ct].num_cu_shared :

- (cu_info->num_cu_per_sh - k);

- cu_processor_id += num_cu_shared;

- }

- } else {

- ret = fill_in_l2_l3_pcache(pcache,

- pcache_info,

- cu_info,

- mem_available,

- ct,

- cu_processor_id);

- if (ret < 0)

- break;

- if (!ret) {

- pcache++;

- (*num_of_entries)++;

- mem_available -= sizeof(*pcache);

- (*size_filled) += sizeof(*pcache);

- }

- pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);

- return 0;

+ return num_of_cache_types;

}

static bool kfd_ignore_crat(void)

@@ -2314,8 +2091,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,

struct kfd_cu_info cu_info;

int avail_size = *size;

uint32_t total_num_of_cu;

- int num_of_cache_entries = 0;

- int cache_mem_filled = 0;

uint32_t nid = 0;

int ret = 0;

@@ -2416,31 +2191,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,

crat_table->length += sizeof(struct crat_subtype_memory);

crat_table->total_entries++;

- /* TODO: Fill in cache information. This information is NOT readily

- * available in KGD

- */

- sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +

- sub_type_hdr->length);

- ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,

- avail_size,

- &cu_info,

- (struct crat_subtype_cache *)sub_type_hdr,

- &cache_mem_filled,

- &num_of_cache_entries);

- if (ret < 0)

- return ret;

- crat_table->length += cache_mem_filled;

- crat_table->total_entries += num_of_cache_entries;

- avail_size -= cache_mem_filled;

/* Fill in Subtype: IO_LINKS

* Only direct links are added here which is Link from GPU to

* its NUMA node. Indirect links are added by userspace.

sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +

- cache_mem_filled);

+ sub_type_hdr->length);

ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,

(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);

diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_crat.h b/sys/dev/pci/drm/amd/amdkfd/kfd_crat.h
index 482ba84a728..a8671061a17 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_crat.h
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_crat.h

@@ -317,6 +317,18 @@ struct cdit_header {

struct kfd_dev;

+/* Static table to describe GPU Cache information */

+struct kfd_gpu_cache_info {

+ uint32_t cache_size;

+ uint32_t cache_level;

+ uint32_t flags;

+ /* Indicates how many Compute Units share this cache

+ * within a SA. Value = 1 indicates the cache is not shared

+ */

+ uint32_t num_cu_shared;

+};

+int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info);

int kfd_create_crat_image_acpi(void **crat_image, size_t *size);

void kfd_destroy_crat_image(void *crat_image);

int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,

diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_topology.c b/sys/dev/pci/drm/amd/amdkfd/kfd_topology.c
index 35a9b702508..713f893d253 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_topology.c
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_topology.c

@@ -364,7 +364,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,

/* Making sure that the buffer is an empty string */

buffer[0] = 0;

cache = container_of(attr, struct kfd_cache_properties, attr);

if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))

return -EPERM;

@@ -379,12 +378,13 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,

sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);

sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);

sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);

offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");

- for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)

+ for (i = 0; i < cache->sibling_map_size; i++)

for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)

/* Check each bit */

offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",

- (cache->sibling_map[i] >> j) & 1);

+ (cache->sibling_map[i] >> j) & 1);

/* Replace the last "," with end of line */

buffer[offs-1] = '\n';

@@ -1198,7 +1198,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)

struct kfd_iolink_properties *iolink;

struct kfd_iolink_properties *p2plink;

- down_write(&topology_lock);

list_for_each_entry(dev, &topology_device_list, list) {

/* Discrete GPUs need their own topology device list

* entries. Don't assign them to CPU/APU nodes.

@@ -1222,7 +1221,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)

break;

}

- up_write(&topology_lock);

return out_dev;

}

@@ -1593,6 +1591,221 @@ out:

return ret;

}

+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */

+static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,

+ struct kfd_gpu_cache_info *pcache_info,

+ struct kfd_cu_info *cu_info,

+ int cu_bitmask,

+ int cache_type, unsigned int cu_processor_id,

+ int cu_block)

+ unsigned int cu_sibling_map_mask;

+ int first_active_cu;

+ struct kfd_cache_properties *pcache = NULL;

+ cu_sibling_map_mask = cu_bitmask;

+ cu_sibling_map_mask >>= cu_block;

+ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);

+ first_active_cu = ffs(cu_sibling_map_mask);

+ /* CU could be inactive. In case of shared cache find the first active

+ * CU. and incase of non-shared cache check if the CU is inactive. If

+ * inactive active skip it

+ */

+ if (first_active_cu) {

+ pcache = kfd_alloc_struct(pcache);

+ if (!pcache)

+ return -ENOMEM;

+ memset(pcache, 0, sizeof(struct kfd_cache_properties));

+ pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);

+ pcache->cache_level = pcache_info[cache_type].cache_level;

+ pcache->cache_size = pcache_info[cache_type].cache_size;

+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)

+ pcache->cache_type |= HSA_CACHE_TYPE_DATA;

+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)

+ pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;

+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)

+ pcache->cache_type |= HSA_CACHE_TYPE_CPU;

+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)

+ pcache->cache_type |= HSA_CACHE_TYPE_HSACU;

+ /* Sibling map is w.r.t processor_id_low, so shift out

+ * inactive CU

+ */

+ cu_sibling_map_mask =

+ cu_sibling_map_mask >> (first_active_cu - 1);

+ pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);

+ pcache->sibling_map[1] =

+ (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);

+ pcache->sibling_map[2] =

+ (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);

+ pcache->sibling_map[3] =

+ (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);

+ pcache->sibling_map_size = 4;

+ *props_ext = pcache;

+ return 0;

+ }

+ return 1;

+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */

+static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,

+ struct kfd_gpu_cache_info *pcache_info,

+ struct kfd_cu_info *cu_info,

+ int cache_type, unsigned int cu_processor_id)

+ unsigned int cu_sibling_map_mask;

+ int first_active_cu;

+ int i, j, k;

+ struct kfd_cache_properties *pcache = NULL;

+ cu_sibling_map_mask = cu_info->cu_bitmap[0][0];

+ cu_sibling_map_mask &=

+ ((1 << pcache_info[cache_type].num_cu_shared) - 1);

+ first_active_cu = ffs(cu_sibling_map_mask);

+ /* CU could be inactive. In case of shared cache find the first active

+ * CU. and incase of non-shared cache check if the CU is inactive. If

+ * inactive active skip it

+ */

+ if (first_active_cu) {

+ pcache = kfd_alloc_struct(pcache);

+ if (!pcache)

+ return -ENOMEM;

+ memset(pcache, 0, sizeof(struct kfd_cache_properties));

+ pcache->processor_id_low = cu_processor_id

+ + (first_active_cu - 1);

+ pcache->cache_level = pcache_info[cache_type].cache_level;

+ pcache->cache_size = pcache_info[cache_type].cache_size;

+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)

+ pcache->cache_type |= HSA_CACHE_TYPE_DATA;

+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)

+ pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;

+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)

+ pcache->cache_type |= HSA_CACHE_TYPE_CPU;

+ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)

+ pcache->cache_type |= HSA_CACHE_TYPE_HSACU;

+ /* Sibling map is w.r.t processor_id_low, so shift out

+ * inactive CU

+ */

+ cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);

+ k = 0;

+ for (i = 0; i < cu_info->num_shader_engines; i++) {

+ for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {

+ pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);

+ pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);

+ pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);

+ pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);

+ k += 4;

+ cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];

+ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);

+ }

+ pcache->sibling_map_size = k;

+ *props_ext = pcache;

+ return 0;

+ }

+ return 1;

+#define KFD_MAX_CACHE_TYPES 6

+/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info

+ * tables

+ */

+void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev)

+ struct kfd_gpu_cache_info *pcache_info = NULL;

+ int i, j, k;

+ int ct = 0;

+ unsigned int cu_processor_id;

+ int ret;

+ unsigned int num_cu_shared;

+ struct kfd_cu_info cu_info;

+ struct kfd_cu_info *pcu_info;

+ int gpu_processor_id;

+ struct kfd_cache_properties *props_ext;

+ int num_of_entries = 0;

+ int num_of_cache_types = 0;

+ struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];

+ amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);

+ pcu_info = &cu_info;

+ gpu_processor_id = dev->node_props.simd_id_base;

+ pcache_info = cache_info;

+ num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);

+ if (!num_of_cache_types) {

+ pr_warn("no cache info found\n");

+ return;

+ }

+ /* For each type of cache listed in the kfd_gpu_cache_info table,

+ * go through all available Compute Units.

+ * The [i,j,k] loop will

+ * if kfd_gpu_cache_info.num_cu_shared = 1

+ * will parse through all available CU

+ * If (kfd_gpu_cache_info.num_cu_shared != 1)

+ * then it will consider only one CU from

+ * the shared unit

+ */

+ for (ct = 0; ct < num_of_cache_types; ct++) {

+ cu_processor_id = gpu_processor_id;

+ if (pcache_info[ct].cache_level == 1) {

+ for (i = 0; i < pcu_info->num_shader_engines; i++) {

+ for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {

+ for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {

+ ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,

+ pcu_info->cu_bitmap[i % 4][j + i / 4], ct,

+ cu_processor_id, k);

+ if (ret < 0)

+ break;

+ if (!ret) {

+ num_of_entries++;

+ list_add_tail(&props_ext->list, &dev->cache_props);

+ }

+ /* Move to next CU block */

+ num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=

+ pcu_info->num_cu_per_sh) ?

+ pcache_info[ct].num_cu_shared :

+ (pcu_info->num_cu_per_sh - k);

+ cu_processor_id += num_cu_shared;

+ }

+ } else {

+ ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,

+ pcu_info, ct, cu_processor_id);

+ if (ret < 0)

+ break;

+ if (!ret) {

+ num_of_entries++;

+ list_add_tail(&props_ext->list, &dev->cache_props);

+ }

+ dev->node_props.caches_count += num_of_entries;

+ pr_debug("Added [%d] GPU cache entries\n", num_of_entries);

int kfd_topology_add_device(struct kfd_dev *gpu)

{

uint32_t gpu_id;

@@ -1617,9 +1830,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)

* CRAT to create a new topology device. Once created assign the gpu to

* that topology device

+ down_write(&topology_lock);

dev = kfd_assign_gpu(gpu);

if (!dev) {

- down_write(&topology_lock);

proximity_domain = ++topology_crat_proximity_domain;

res = kfd_create_crat_image_virtual(&crat_image, &image_size,

@@ -1631,6 +1844,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)

topology_crat_proximity_domain--;

return res;

}

res = kfd_parse_crat_table(crat_image,

&temp_topology_device_list,

proximity_domain);

@@ -1644,23 +1858,28 @@ int kfd_topology_add_device(struct kfd_dev *gpu)

kfd_topology_update_device_list(&temp_topology_device_list,

&topology_device_list);

+ dev = kfd_assign_gpu(gpu);

+ if (WARN_ON(!dev)) {

+ res = -ENODEV;

+ goto err;

+ }

+ /* Fill the cache affinity information here for the GPUs

+ * using VCRAT

+ */

+ kfd_fill_cache_non_crat_info(dev, gpu);

/* Update the SYSFS tree, since we added another topology

* device

res = kfd_topology_update_sysfs();

- up_write(&topology_lock);

if (!res)

sys_props.generation_count++;

else

pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",

gpu_id, res);

- dev = kfd_assign_gpu(gpu);

- if (WARN_ON(!dev)) {

- res = -ENODEV;

- goto err;

- }

}

+ up_write(&topology_lock);

dev->gpu_id = gpu_id;

gpu->id = gpu_id;

diff --git a/sys/dev/pci/drm/amd/amdkfd/kfd_topology.h b/sys/dev/pci/drm/amd/amdkfd/kfd_topology.h
index 9f6c949186c..19283b8b168 100644
--- a/sys/dev/pci/drm/amd/amdkfd/kfd_topology.h
+++ b/sys/dev/pci/drm/amd/amdkfd/kfd_topology.h

@@ -80,6 +80,8 @@ struct kfd_mem_properties {

struct attribute attr;

};

+#define CACHE_SIBLINGMAP_SIZE 64

struct kfd_cache_properties {

struct list_head list;

uint32_t processor_id_low;

@@ -90,10 +92,11 @@ struct kfd_cache_properties {

uint32_t cache_assoc;

uint32_t cache_latency;

uint32_t cache_type;

- uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE];

+ uint8_t sibling_map[CACHE_SIBLINGMAP_SIZE];

struct kfd_dev *gpu;

struct kobject *kobj;

struct attribute attr;

+ uint32_t sibling_map_size;

};

struct kfd_iolink_properties {