diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-11-14 08:39:58 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-11-14 08:39:58 +0000 |
commit | eb7b2da24a0f4fba243716cdf7336e718d6095b4 (patch) | |
tree | d8712ac93623fc4b6b7217d762d24ed2750911f9 | |
parent | 13d78d7be401f9ecff0bfee0c6779369816d1755 (diff) |
Import libdrm 2.4.114
31 files changed, 5100 insertions, 2217 deletions
diff --git a/lib/libdrm/.gitlab-ci.yml b/lib/libdrm/.gitlab-ci.yml index 3c79867ad..4a3ed2141 100644 --- a/lib/libdrm/.gitlab-ci.yml +++ b/lib/libdrm/.gitlab-ci.yml @@ -44,7 +44,7 @@ stages: FDO_DISTRIBUTION_EXEC: 'pip3 install meson==0.53.0' # bump this tag every time you change something which requires rebuilding the # base image - FDO_DISTRIBUTION_TAG: "2022-01-19.0" + FDO_DISTRIBUTION_TAG: "2022-08-22.0" .debian-x86_64: extends: @@ -68,10 +68,10 @@ stages: variables: BUILD_OS: freebsd FDO_DISTRIBUTION_VERSION: "13.0" - FDO_DISTRIBUTION_PACKAGES: 'meson ninja pkgconf libpciaccess libpthread-stubs py38-docutils cairo' + FDO_DISTRIBUTION_PACKAGES: 'meson ninja pkgconf libpciaccess libpthread-stubs py39-docutils cairo' # bump this tag every time you change something which requires rebuilding the # base image - FDO_DISTRIBUTION_TAG: "2021-11-10.1" + FDO_DISTRIBUTION_TAG: "2022-08-22.0" .freebsd-x86_64: extends: @@ -191,22 +191,8 @@ x86_64-freebsd-container_prep: GIT_DEPTH: 10 script: - meson build - -D amdgpu=true - -D cairo-tests=true - -D etnaviv=true - -D exynos=true - -D freedreno=true - -D freedreno-kgsl=true - -D intel=true - -D man-pages=true - -D nouveau=true - -D omap=true - -D radeon=true - -D tegra=true + --auto-features=enabled -D udev=true - -D valgrind=auto - -D vc4=true - -D vmwgfx=true - ninja -C build - ninja -C build test - DESTDIR=$PWD/install ninja -C build install @@ -227,7 +213,7 @@ x86_64-freebsd-container_prep: # the workspace to see details about the failed tests. - | set +e - /app/vmctl exec "pkg info; cd $CI_PROJECT_NAME ; meson build -D amdgpu=true -D cairo-tests=true -D intel=true -D man-pages=true -D nouveau=false -D radeon=true -D valgrind=auto && ninja -C build" + /app/vmctl exec "pkg info; cd $CI_PROJECT_NAME ; meson build --auto-features=enabled -D etnaviv=disabled -D nouveau=disabled -D valgrind=disabled && ninja -C build" set -ex scp -r vm:$CI_PROJECT_NAME/build/meson-logs . /app/vmctl exec "ninja -C $CI_PROJECT_NAME/build install" diff --git a/lib/libdrm/amdgpu/amdgpu_device.c b/lib/libdrm/amdgpu/amdgpu_device.c index 73fd27f60..aeb5e3c54 100644 --- a/lib/libdrm/amdgpu/amdgpu_device.c +++ b/lib/libdrm/amdgpu/amdgpu_device.c @@ -292,7 +292,10 @@ drm_public int amdgpu_device_get_fd(amdgpu_device_handle device_handle) drm_public const char *amdgpu_get_marketing_name(amdgpu_device_handle dev) { - return dev->marketing_name; + if (dev->marketing_name) + return dev->marketing_name; + else + return "AMD Radeon Graphics"; } drm_public int amdgpu_query_sw_info(amdgpu_device_handle dev, diff --git a/lib/libdrm/core-symbols.txt b/lib/libdrm/core-symbols.txt index 31bbcf8fd..9f8a323bb 100644 --- a/lib/libdrm/core-symbols.txt +++ b/lib/libdrm/core-symbols.txt @@ -103,11 +103,14 @@ drmModeAtomicGetCursor drmModeAtomicMerge drmModeAtomicSetCursor drmModeAttachMode +drmModeConnectorGetPossibleCrtcs drmModeConnectorSetProperty +drmModeCreateDumbBuffer drmModeCreateLease drmModeCreatePropertyBlob drmModeCrtcGetGamma drmModeCrtcSetGamma +drmModeDestroyDumbBuffer drmModeDestroyPropertyBlob drmModeDetachMode drmModeDirtyFB @@ -126,6 +129,7 @@ drmModeFreePropertyBlob drmModeFreeResources drmModeGetConnector drmModeGetConnectorCurrent +drmModeGetConnectorTypeName drmModeGetCrtc drmModeGetEncoder drmModeGetFB @@ -137,6 +141,7 @@ drmModeGetProperty drmModeGetPropertyBlob drmModeGetResources drmModeListLessees +drmModeMapDumbBuffer drmModeMoveCursor drmModeObjectGetProperties drmModeObjectSetProperty @@ -201,3 +206,4 @@ drmUpdateDrawableInfo drmWaitVBlank drmGetFormatModifierName drmGetFormatModifierVendor +drmGetFormatName diff --git a/lib/libdrm/data/amdgpu.ids b/lib/libdrm/data/amdgpu.ids index 0d2b8847e..abf3d6941 100644 --- a/lib/libdrm/data/amdgpu.ids +++ b/lib/libdrm/data/amdgpu.ids @@ -4,120 +4,177 @@ # device_id, revision_id, product_name <-- single tab after comma 1.0.0 -15DD, C3, AMD Radeon Vega 3 Graphics -15DD, CB, AMD Radeon Vega 3 Graphics -15DD, CE, AMD Radeon Vega 3 Graphics -15DD, D8, AMD Radeon Vega 3 Graphics -15DD, CC, AMD Radeon Vega 6 Graphics -15DD, D9, AMD Radeon Vega 6 Graphics -15DD, C2, AMD Radeon Vega 8 Graphics -15DD, C4, AMD Radeon Vega 8 Graphics -15DD, C8, AMD Radeon Vega 8 Graphics -15DD, CA, AMD Radeon Vega 8 Graphics -15DD, D1, AMD Radeon Vega 8 Graphics -15DD, D5, AMD Radeon Vega 8 Graphics -15DD, D7, AMD Radeon Vega 8 Graphics -15DD, C3, AMD Radeon Vega 10 Graphics -15DD, D0, AMD Radeon Vega 10 Graphics -15DD, C1, AMD Radeon Vega 11 Graphics -15DD, C6, AMD Radeon Vega 11 Graphics -15DD, C9, AMD Radeon Vega 11 Graphics -15DD, D3, AMD Radeon Vega 11 Graphics -15DD, D6, AMD Radeon Vega 11 Graphics -15DD, 81, AMD Ryzen Embedded V1807B with Radeon Vega Gfx -15DD, 82, AMD Ryzen Embedded V1756B with Radeon Vega Gfx -15DD, 83, AMD Ryzen Embedded V1605B with Radeon Vega Gfx -15DD, 85, AMD Ryzen Embedded V1202B with Radeon Vega Gfx +1309, 00, AMD Radeon R7 Graphics +130A, 00, AMD Radeon R6 Graphics +130B, 00, AMD Radeon R4 Graphics +130C, 00, AMD Radeon R7 Graphics +130D, 00, AMD Radeon R6 Graphics +130E, 00, AMD Radeon R5 Graphics +130F, 00, AMD Radeon R7 Graphics +130F, D4, AMD Radeon R7 Graphics +130F, D5, AMD Radeon R7 Graphics +130F, D6, AMD Radeon R7 Graphics +130F, D7, AMD Radeon R7 Graphics +1313, 00, AMD Radeon R7 Graphics +1313, D4, AMD Radeon R7 Graphics +1313, D5, AMD Radeon R7 Graphics +1313, D6, AMD Radeon R7 Graphics +1315, 00, AMD Radeon R5 Graphics +1315, D4, AMD Radeon R5 Graphics +1315, D5, AMD Radeon R5 Graphics +1315, D6, AMD Radeon R5 Graphics +1315, D7, AMD Radeon R5 Graphics +1316, 00, AMD Radeon R5 Graphics +1318, 00, AMD Radeon R5 Graphics +131B, 00, AMD Radeon R4 Graphics +131C, 00, AMD Radeon R7 Graphics +131D, 00, AMD Radeon R6 Graphics +15D8, 00, AMD Radeon RX Vega 8 Graphics WS +15D8, 91, AMD Radeon Vega 3 Graphics +15D8, 91, AMD Ryzen Embedded R1606G with Radeon Vega Gfx +15D8, 92, AMD Radeon Vega 3 Graphics +15D8, 92, AMD Ryzen Embedded R1505G with Radeon Vega Gfx 15D8, 93, AMD Radeon Vega 1 Graphics +15D8, A1, AMD Radeon Vega 10 Graphics +15D8, A2, AMD Radeon Vega 8 Graphics +15D8, A3, AMD Radeon Vega 6 Graphics +15D8, A4, AMD Radeon Vega 3 Graphics +15D8, B1, AMD Radeon Vega 10 Graphics +15D8, B2, AMD Radeon Vega 8 Graphics +15D8, B3, AMD Radeon Vega 6 Graphics +15D8, B4, AMD Radeon Vega 3 Graphics +15D8, C1, AMD Radeon Vega 10 Graphics +15D8, C2, AMD Radeon Vega 8 Graphics +15D8, C3, AMD Radeon Vega 6 Graphics 15D8, C4, AMD Radeon Vega 3 Graphics 15D8, C5, AMD Radeon Vega 3 Graphics +15D8, C8, AMD Radeon Vega 11 Graphics +15D8, C9, AMD Radeon Vega 8 Graphics +15D8, CA, AMD Radeon Vega 11 Graphics +15D8, CB, AMD Radeon Vega 8 Graphics 15D8, CC, AMD Radeon Vega 3 Graphics 15D8, CE, AMD Radeon Vega 3 Graphics -15D8, CF, AMD Radeon Vega 3 Graphics +15D8, CF, AMD Ryzen Embedded R1305G with Radeon Vega Gfx +15D8, D1, AMD Radeon Vega 10 Graphics +15D8, D2, AMD Radeon Vega 8 Graphics +15D8, D3, AMD Radeon Vega 6 Graphics 15D8, D4, AMD Radeon Vega 3 Graphics +15D8, D8, AMD Radeon Vega 11 Graphics +15D8, D9, AMD Radeon Vega 8 Graphics +15D8, DA, AMD Radeon Vega 11 Graphics +15D8, DB, AMD Radeon Vega 3 Graphics +15D8, DB, AMD Radeon Vega 8 Graphics 15D8, DC, AMD Radeon Vega 3 Graphics 15D8, DD, AMD Radeon Vega 3 Graphics 15D8, DE, AMD Radeon Vega 3 Graphics 15D8, DF, AMD Radeon Vega 3 Graphics 15D8, E3, AMD Radeon Vega 3 Graphics -15D8, E4, AMD Radeon Vega 3 Graphics -15D8, A3, AMD Radeon Vega 6 Graphics -15D8, B3, AMD Radeon Vega 6 Graphics -15D8, C3, AMD Radeon Vega 6 Graphics -15D8, D3, AMD Radeon Vega 6 Graphics -15D8, A2, AMD Radeon Vega 8 Graphics -15D8, B2, AMD Radeon Vega 8 Graphics -15D8, C2, AMD Radeon Vega 8 Graphics -15D8, C9, AMD Radeon Vega 8 Graphics -15D8, CB, AMD Radeon Vega 8 Graphics -15D8, D2, AMD Radeon Vega 8 Graphics -15D8, D9, AMD Radeon Vega 8 Graphics -15D8, DB, AMD Radeon Vega 8 Graphics -15D8, A1, AMD Radeon Vega 10 Graphics -15D8, B1, AMD Radeon Vega 10 Graphics -15D8, C1, AMD Radeon Vega 10 Graphics -15D8, D1, AMD Radeon Vega 10 Graphics -15D8, C8, AMD Radeon Vega 11 Graphics -15D8, CA, AMD Radeon Vega 11 Graphics -15D8, D8, AMD Radeon Vega 11 Graphics -15D8, DA, AMD Radeon Vega 11 Graphics -15D8, 91, AMD Ryzen Embedded R1606G with Radeon Vega Gfx -15D8, 92, AMD Ryzen Embedded R1505G with Radeon Vega Gfx -15D8, CF, AMD Ryzen Embedded R1305G with Radeon Vega Gfx 15D8, E4, AMD Ryzen Embedded R1102G with Radeon Vega Gfx +15DD, 81, AMD Ryzen Embedded V1807B with Radeon Vega Gfx +15DD, 82, AMD Ryzen Embedded V1756B with Radeon Vega Gfx +15DD, 83, AMD Ryzen Embedded V1605B with Radeon Vega Gfx +15DD, 84, AMD Radeon Vega 6 Graphics +15DD, 85, AMD Ryzen Embedded V1202B with Radeon Vega Gfx +15DD, 86, AMD Radeon Vega 11 Graphics +15DD, 88, AMD Radeon Vega 8 Graphics +15DD, C1, AMD Radeon Vega 11 Graphics +15DD, C2, AMD Radeon Vega 8 Graphics +15DD, C3, AMD Radeon Vega 3 / 10 Graphics +15DD, C4, AMD Radeon Vega 8 Graphics +15DD, C5, AMD Radeon Vega 3 Graphics +15DD, C6, AMD Radeon Vega 11 Graphics +15DD, C8, AMD Radeon Vega 8 Graphics +15DD, C9, AMD Radeon Vega 11 Graphics +15DD, CA, AMD Radeon Vega 8 Graphics +15DD, CB, AMD Radeon Vega 3 Graphics +15DD, CC, AMD Radeon Vega 6 Graphics +15DD, CE, AMD Radeon Vega 3 Graphics +15DD, CF, AMD Radeon Vega 3 Graphics +15DD, D0, AMD Radeon Vega 10 Graphics +15DD, D1, AMD Radeon Vega 8 Graphics +15DD, D3, AMD Radeon Vega 11 Graphics +15DD, D5, AMD Radeon Vega 8 Graphics +15DD, D6, AMD Radeon Vega 11 Graphics +15DD, D7, AMD Radeon Vega 8 Graphics +15DD, D8, AMD Radeon Vega 3 Graphics +15DD, D9, AMD Radeon Vega 6 Graphics +15DD, E1, AMD Radeon Vega 3 Graphics +15DD, E2, AMD Radeon Vega 3 Graphics 163F, AE, AMD Custom GPU 0405 -6600, 0, AMD Radeon HD 8600 / 8700M +6600, 00, AMD Radeon HD 8600 / 8700M 6600, 81, AMD Radeon R7 M370 -6601, 0, AMD Radeon HD 8500M / 8700M -6604, 0, AMD Radeon R7 M265 Series +6601, 00, AMD Radeon HD 8500M / 8700M +6604, 00, AMD Radeon R7 M265 Series 6604, 81, AMD Radeon R7 M350 -6605, 0, AMD Radeon R7 M260 Series +6605, 00, AMD Radeon R7 M260 Series 6605, 81, AMD Radeon R7 M340 -6606, 0, AMD Radeon HD 8790M -6607, 0, AMD Radeon HD 8530M -6608, 0, AMD FirePro W2100 -6610, 0, AMD Radeon HD 8600 Series +6606, 00, AMD Radeon HD 8790M +6607, 00, AMD Radeon R5 M240 +6608, 00, AMD FirePro W2100 +6610, 00, AMD Radeon R7 200 Series 6610, 81, AMD Radeon R7 350 6610, 83, AMD Radeon R5 340 -6611, 0, AMD Radeon HD 8500 Series -6613, 0, AMD Radeon HD 8500 series +6610, 87, AMD Radeon R7 200 Series +6611, 00, AMD Radeon R7 200 Series +6611, 87, AMD Radeon R7 200 Series +6613, 00, AMD Radeon R7 200 Series +6617, 00, AMD Radeon R7 240 Series +6617, 87, AMD Radeon R7 200 Series 6617, C7, AMD Radeon R7 240 Series -6640, 0, AMD Radeon HD 8950 +6640, 00, AMD Radeon HD 8950 6640, 80, AMD Radeon R9 M380 -6646, 0, AMD Radeon R9 M280X +6646, 00, AMD Radeon R9 M280X +6646, 80, AMD Radeon R9 M385 6646, 80, AMD Radeon R9 M470X -6647, 0, AMD Radeon R9 M270X +6647, 00, AMD Radeon R9 M200X Series 6647, 80, AMD Radeon R9 M380 -6649, 0, AMD FirePro W5100 -6658, 0, AMD Radeon R7 200 Series -665C, 0, AMD Radeon HD 7700 Series -665D, 0, AMD Radeon R7 200 Series -665F, 81, AMD Radeon R7 300 Series -6660, 0, AMD Radeon HD 8600M Series +6649, 00, AMD FirePro W5100 +6658, 00, AMD Radeon R7 200 Series +665C, 00, AMD Radeon HD 7700 Series +665D, 00, AMD Radeon R7 200 Series +665F, 81, AMD Radeon R7 360 Series +6660, 00, AMD Radeon HD 8600M Series 6660, 81, AMD Radeon R5 M335 6660, 83, AMD Radeon R5 M330 -6663, 0, AMD Radeon HD 8500M Series +6663, 00, AMD Radeon HD 8500M Series 6663, 83, AMD Radeon R5 M320 -6664, 0, AMD Radeon R5 M200 Series -6665, 0, AMD Radeon R5 M200 Series +6664, 00, AMD Radeon R5 M200 Series +6665, 00, AMD Radeon R5 M230 Series 6665, 83, AMD Radeon R5 M320 -6667, 0, AMD Radeon R5 M200 Series -666F, 0, AMD Radeon HD 8500M +6665, C3, AMD Radeon R5 M435 +6666, 00, AMD Radeon R5 M200 Series +6667, 00, AMD Radeon R5 M200 Series +666F, 00, AMD Radeon HD 8500M +66A1, 02, AMD Instinct MI60 / MI50 66A1, 06, AMD Radeon Pro VII 66AF, C1, AMD Radeon VII -6780, 0, ATI FirePro V (FireGL V) Graphics Adapter -678A, 0, ATI FirePro V (FireGL V) Graphics Adapter -6798, 0, AMD Radeon HD 7900 Series -679A, 0, AMD Radeon HD 7900 Series -679B, 0, AMD Radeon HD 7900 Series -679E, 0, AMD Radeon HD 7800 Series -67A0, 0, AMD Radeon FirePro W9100 -67A1, 0, AMD Radeon FirePro W8100 -67B0, 0, AMD Radeon R9 200 Series +6780, 00, AMD FirePro W9000 +6784, 00, ATI FirePro V (FireGL V) Graphics Adapter +6788, 00, ATI FirePro V (FireGL V) Graphics Adapter +678A, 00, AMD FirePro W8000 +6798, 00, AMD Radeon R9 200 / HD 7900 Series +6799, 00, AMD Radeon HD 7900 Series +679A, 00, AMD Radeon HD 7900 Series +679B, 00, AMD Radeon HD 7900 Series +679E, 00, AMD Radeon HD 7800 Series +67A0, 00, AMD Radeon FirePro W9100 +67A1, 00, AMD Radeon FirePro W8100 +67B0, 00, AMD Radeon R9 200 Series 67B0, 80, AMD Radeon R9 390 Series -67B1, 0, AMD Radeon R9 200 Series +67B1, 00, AMD Radeon R9 200 Series 67B1, 80, AMD Radeon R9 390 Series -67B9, 0, AMD Radeon R9 200 Series +67B9, 00, AMD Radeon R9 200 Series +67C0, 00, AMD Radeon Pro WX 7100 Graphics +67C0, 80, AMD Radeon E9550 +67C2, 01, AMD Radeon Pro V7350x2 +67C2, 02, AMD Radeon Pro V7300X +67C4, 00, AMD Radeon Pro WX 7100 Graphics +67C4, 80, AMD Radeon E9560 / E9565 Graphics +67C7, 00, AMD Radeon Pro WX 5100 Graphics +67C7, 80, AMD Radeon E9390 Graphics +67D0, 01, AMD Radeon Pro V7350x2 +67D0, 02, AMD Radeon Pro V7300X +67DF, C0, AMD Radeon Pro 580X 67DF, C1, AMD Radeon RX 580 Series 67DF, C2, AMD Radeon RX 570 Series 67DF, C3, AMD Radeon RX 580 Series @@ -131,17 +188,10 @@ 67DF, E1, AMD Radeon RX 590 Series 67DF, E3, AMD Radeon RX Series 67DF, E7, AMD Radeon RX 580 Series +67DF, EB, AMD Radeon Pro 580X 67DF, EF, AMD Radeon RX 570 Series 67DF, F7, AMD Radeon RX P30PH -67C2, 01, AMD Radeon Pro V7350x2 -67C2, 02, AMD Radeon Pro V7300X -67C4, 00, AMD Radeon Pro WX 7100 Graphics -67C4, 80, AMD Radeon E9560 / E9565 Graphics -67C7, 00, AMD Radeon Pro WX 5100 Graphics -67C7, 80, AMD Radeon E9390 Graphics -67C0, 00, AMD Radeon Pro WX 7100 Graphics -67D0, 01, AMD Radeon Pro V7350x2 -67D0, 02, AMD Radeon Pro V7300X +67DF, FF, AMD Radeon RX 470 Series 67E0, 00, AMD Radeon Pro WX Series 67E3, 00, AMD Radeon Pro WX 4100 67E8, 00, AMD Radeon Pro WX Series @@ -150,52 +200,60 @@ 67EB, 00, AMD Radeon Pro V5300X 67EF, C0, AMD Radeon RX Graphics 67EF, C1, AMD Radeon RX 460 Graphics +67EF, C2, AMD Radeon Pro Series 67EF, C3, AMD Radeon RX Series 67EF, C5, AMD Radeon RX 460 Graphics 67EF, C7, AMD Radeon RX Graphics 67EF, CF, AMD Radeon RX 460 Graphics -67EF, E2, AMD Radeon RX 560X 67EF, E0, AMD Radeon RX 560 Series 67EF, E1, AMD Radeon RX Series +67EF, E2, AMD Radeon RX 560X 67EF, E3, AMD Radeon RX Series 67EF, E5, AMD Radeon RX 560 Series -67EF, EF, AMD Radeon RX Graphics +67EF, E7, AMD Radeon RX 560 Series +67EF, EF, AMD Radeon 550 Series 67EF, FF, AMD Radeon RX 460 Graphics -67FF, C0, AMD Radeon RX Graphics -67FF, C1, AMD Radeon RX Graphics +67FF, C0, AMD Radeon Pro 465 +67FF, C1, AMD Radeon RX 560 Series 67FF, CF, AMD Radeon RX 560 Series 67FF, EF, AMD Radeon RX 560 Series 67FF, FF, AMD Radeon RX 550 Series -6800, 0, AMD Radeon HD 7970M -6801, 0, AMD Radeon HD 8970M -6808, 0, ATI FirePro V(FireGL V) Graphics Adapter -6809, 0, ATI FirePro V(FireGL V) Graphics Adapter -6810, 0, AMD Radeon HD 8800 Series -6810, 81, AMD Radeon R7 370 Series -6811, 0, AMD Radeon HD 8800 Series -6811, 81, AMD Radeon R7 300 Series -6818, 0, AMD Radeon HD 7800 Series -6819, 0, AMD Radeon HD 7800 Series -6820, 0, AMD Radeon HD 8800M Series +6800, 00, AMD Radeon HD 7970M +6801, 00, AMD Radeon HD 8970M +6806, 00, AMD Radeon R9 M290X +6808, 00, AMD FirePro W7000 +6808, 00, ATI FirePro V (FireGL V) Graphics Adapter +6809, 00, ATI FirePro W5000 +6810, 00, AMD Radeon R9 200 Series +6810, 81, AMD Radeon R9 370 Series +6811, 00, AMD Radeon R9 200 Series +6811, 81, AMD Radeon R7 370 Series +6818, 00, AMD Radeon HD 7800 Series +6819, 00, AMD Radeon HD 7800 Series +6820, 00, AMD Radeon R9 M275X 6820, 81, AMD Radeon R9 M375 6820, 83, AMD Radeon R9 M375X -6821, 0, AMD Radeon HD 8800M Series -6821, 87, AMD Radeon R7 M380 +6821, 00, AMD Radeon R9 M200X Series 6821, 83, AMD Radeon R9 M370X -6822, 0, AMD Radeon E8860 -6823, 0, AMD Radeon HD 8800M Series -6825, 0, AMD Radeon HD 7800M Series -6827, 0, AMD Radeon HD 7800M Series -6828, 0, ATI FirePro V(FireGL V) Graphics Adapter -682B, 0, AMD Radeon HD 8800M Series +6821, 87, AMD Radeon R7 M380 +6822, 00, AMD Radeon E8860 +6823, 00, AMD Radeon R9 M200X Series +6825, 00, AMD Radeon HD 7800M Series +6826, 00, AMD Radeon HD 7700M Series +6827, 00, AMD Radeon HD 7800M Series +6828, 00, AMD FirePro W600 +682B, 00, AMD Radeon HD 8800M Series 682B, 87, AMD Radeon R9 M360 -682C, 0, AMD FirePro W4100 -682D, 0, AMD Radeon HD 7700M Series -682F, 0, AMD Radeon HD 7700M Series -6835, 0, AMD Radeon R7 Series / HD 9000 Series -6837, 0, AMD Radeon HD 7700 Series -683D, 0, AMD Radeon HD 7700 Series -683F, 0, AMD Radeon HD 7700 Series +682C, 00, AMD FirePro W4100 +682D, 00, AMD Radeon HD 7700M Series +682F, 00, AMD Radeon HD 7700M Series +6830, 00, AMD Radeon 7800M Series +6831, 00, AMD Radeon 7700M Series +6835, 00, AMD Radeon R7 Series / HD 9000 Series +6837, 00, AMD Radeon HD 7700 Series +683D, 00, AMD Radeon HD 7700 Series +683F, 00, AMD Radeon HD 7700 Series +684C, 00, ATI FirePro V (FireGL V) Graphics Adapter 6860, 00, AMD Radeon Instinct MI25 6860, 01, AMD Radeon Instinct MI25 6860, 02, AMD Radeon Instinct MI25 @@ -207,6 +265,7 @@ 6863, 00, AMD Radeon Vega Frontier Edition 6864, 03, AMD Radeon Pro V340 6864, 04, AMD Radeon Instinct MI25x2 +6864, 05, AMD Radeon Pro V340 6868, 00, AMD Radeon Pro WX 8200 686C, 00, AMD Radeon Instinct MI25 MxGPU 686C, 01, AMD Radeon Instinct MI25 MxGPU @@ -215,35 +274,47 @@ 686C, 04, AMD Radeon Instinct MI25x2 MxGPU 686C, 05, AMD Radeon Pro V340L MxGPU 686C, 06, AMD Radeon Instinct MI25 MxGPU +687F, 01, AMD Radeon RX Vega 687F, C0, AMD Radeon RX Vega 687F, C1, AMD Radeon RX Vega 687F, C3, AMD Radeon RX Vega -6900, 0, AMD Radeon R7 M260 +687F, C7, AMD Radeon RX Vega +6900, 00, AMD Radeon R7 M260 6900, 81, AMD Radeon R7 M360 6900, 83, AMD Radeon R7 M340 -6901, 0, AMD Radeon R5 M255 -6907, 0, AMD Radeon R5 M255 +6900, C1, AMD Radeon R5 M465 Series +6900, C3, AMD Radeon R5 M445 Series +6900, D1, AMD Radeon 530 Series +6900, D3, AMD Radeon 530 Series +6901, 00, AMD Radeon R5 M255 +6902, 00, AMD Radeon Series +6907, 00, AMD Radeon R5 M255 6907, 87, AMD Radeon R5 M315 -6920, 0, AMD Radeon R9 M395X -6920, 1, AMD Radeon R9 M390X -6921, 0, AMD Radeon R9 M295X -6929, 0, AMD FirePro S7150 -692B, 0, AMD FirePro W7100 -6938, 0, AMD Radeon R9 200 Series +6920, 00, AMD Radeon R9 M395X +6920, 01, AMD Radeon R9 M390X +6921, 00, AMD Radeon R9 M390X +6929, 00, AMD FirePro S7150 +6929, 01, AMD FirePro S7100X +692B, 00, AMD FirePro W7100 +6938, 00, AMD Radeon R9 200 Series 6938, F0, AMD Radeon R9 200 Series 6938, F1, AMD Radeon R9 380 Series +6939, 00, AMD Radeon R9 200 Series 6939, F0, AMD Radeon R9 200 Series -6939, 0, AMD Radeon R9 200 Series 6939, F1, AMD Radeon R9 380 Series +694C, C0, AMD Radeon RX Vega M GH Graphics +694E, C0, AMD Radeon RX Vega M GL Graphics 6980, 00, AMD Radeon Pro WX 3100 6981, 00, AMD Radeon Pro WX 3200 Series 6981, 01, AMD Radeon Pro WX 3200 Series 6981, 10, AMD Radeon Pro WX 3200 Series 6985, 00, AMD Radeon Pro WX 3100 +6986, 00, AMD Radeon Pro WX 2100 6987, 80, AMD Embedded Radeon E9171 6987, C0, AMD Radeon 550X Series 6987, C1, AMD Radeon RX 640 6987, C3, AMD Radeon 540X Series +6987, C7, AMD Radeon 540 6995, 00, AMD Radeon Pro WX 2100 6997, 00, AMD Radeon Pro WX 2100 699F, 81, AMD Embedded Radeon E9170 Series @@ -251,11 +322,14 @@ 699F, C1, AMD Radeon 540 Series 699F, C3, AMD Radeon 500 Series 699F, C7, AMD Radeon RX 550 / 550 Series +699F, C9, AMD Radeon 540 +6FDF, E7, AMD Radeon RX 590 GME +6FDF, EF, AMD Radeon RX 580 2048SP 7300, C1, AMD FirePro S9300 x2 7300, C8, AMD Radeon R9 Fury Series 7300, C9, AMD Radeon Pro Duo -7300, CB, AMD Radeon R9 Fury Series 7300, CA, AMD Radeon R9 Fury Series +7300, CB, AMD Radeon R9 Fury Series 7312, 00, AMD Radeon Pro W5700 731E, C6, AMD Radeon RX 5700XTB 731E, C7, AMD Radeon RX 5700B @@ -268,12 +342,16 @@ 731F, CA, AMD Radeon RX 5600 XT 731F, CB, AMD Radeon RX 5600 OEM 7340, C1, AMD Radeon RX 5500M +7340, C3, AMD Radeon RX 5300M 7340, C5, AMD Radeon RX 5500 XT 7340, C7, AMD Radeon RX 5500 7340, C9, AMD Radeon RX 5500XTB 7340, CF, AMD Radeon RX 5300 7341, 00, AMD Radeon Pro W5500 7347, 00, AMD Radeon Pro W5500M +7360, 41, AMD Radeon Pro 5600M +7360, C3, AMD Radeon Pro V520 +738C, 01, AMD Instinct MI100 73A3, 00, AMD Radeon Pro W6800 73A5, C0, AMD Radeon RX 6950 XT 73AF, C0, AMD Radeon RX 6900 XT @@ -282,9 +360,11 @@ 73BF, C3, AMD Radeon RX 6800 73DF, C0, AMD Radeon RX 6750 XT 73DF, C1, AMD Radeon RX 6700 XT +73DF, C2, AMD Radeon RX 6800M 73DF, C3, AMD Radeon RX 6800M 73DF, C5, AMD Radeon RX 6700 XT 73DF, CF, AMD Radeon RX 6700M +73DF, D7, AMD TDC-235 73E1, 00, AMD Radeon Pro W6600M 73E3, 00, AMD Radeon Pro W6600 73EF, C0, AMD Radeon RX 6800S @@ -296,22 +376,112 @@ 73FF, C3, AMD Radeon RX 6600M 73FF, C7, AMD Radeon RX 6600 73FF, CB, AMD Radeon RX 6600S +7408, 00, AMD Instinct MI250X +740C, 01, AMD Instinct MI250X / MI250 +740F, 02, AMD Instinct MI210 7421, 00, AMD Radeon Pro W6500M -7422, 00, AMD Radeon PRO W6400 +7422, 00, AMD Radeon Pro W6400 7423, 00, AMD Radeon Pro W6300M +7423, 01, AMD Radeon Pro W6300 7424, 00, AMD Radeon RX 6300 743F, C1, AMD Radeon RX 6500 XT 743F, C3, AMD Radeon RX 6500 +743F, C3, AMD Radeon RX 6500M 743F, C7, AMD Radeon RX 6400 743F, CF, AMD Radeon RX 6300M +9830, 00, AMD Radeon HD 8400 / R3 Series +9831, 00, AMD Radeon HD 8400E +9832, 00, AMD Radeon HD 8330 +9833, 00, AMD Radeon HD 8330E +9834, 00, AMD Radeon HD 8210 +9835, 00, AMD Radeon HD 8210E +9836, 00, AMD Radeon HD 8200 / R3 Series +9837, 00, AMD Radeon HD 8280E +9838, 00, AMD Radeon HD 8200 / R3 series +9839, 00, AMD Radeon HD 8180 +983D, 00, AMD Radeon HD 8250 +9850, 00, AMD Radeon R3 Graphics +9850, 03, AMD Radeon R3 Graphics +9850, 40, AMD Radeon R2 Graphics +9850, 45, AMD Radeon R3 Graphics +9851, 00, AMD Radeon R4 Graphics +9851, 01, AMD Radeon R5E Graphics +9851, 05, AMD Radeon R5 Graphics +9851, 06, AMD Radeon R5E Graphics +9851, 40, AMD Radeon R4 Graphics +9851, 45, AMD Radeon R5 Graphics +9852, 00, AMD Radeon R2 Graphics +9852, 40, AMD Radeon E1 Graphics +9853, 00, AMD Radeon R2 Graphics +9853, 01, AMD Radeon R4E Graphics +9853, 03, AMD Radeon R2 Graphics +9853, 05, AMD Radeon R1E Graphics +9853, 06, AMD Radeon R1E Graphics +9853, 07, AMD Radeon R1E Graphics +9853, 08, AMD Radeon R1E Graphics +9853, 40, AMD Radeon R2 Graphics +9854, 00, AMD Radeon R3 Graphics +9854, 01, AMD Radeon R3E Graphics +9854, 02, AMD Radeon R3 Graphics +9854, 05, AMD Radeon R2 Graphics +9854, 06, AMD Radeon R4 Graphics +9854, 07, AMD Radeon R3 Graphics +9855, 02, AMD Radeon R6 Graphics +9855, 05, AMD Radeon R4 Graphics +9856, 00, AMD Radeon R2 Graphics +9856, 01, AMD Radeon R2E Graphics +9856, 02, AMD Radeon R2 Graphics +9856, 05, AMD Radeon R1E Graphics +9856, 06, AMD Radeon R2 Graphics +9856, 07, AMD Radeon R1E Graphics +9856, 08, AMD Radeon R1E Graphics +9856, 13, AMD Radeon R1E Graphics +9874, 81, AMD Radeon R6 Graphics +9874, 84, AMD Radeon R7 Graphics +9874, 85, AMD Radeon R6 Graphics +9874, 87, AMD Radeon R5 Graphics +9874, 88, AMD Radeon R7E Graphics +9874, 89, AMD Radeon R6E Graphics 9874, C4, AMD Radeon R7 Graphics 9874, C5, AMD Radeon R6 Graphics 9874, C6, AMD Radeon R6 Graphics 9874, C7, AMD Radeon R5 Graphics 9874, C8, AMD Radeon R7 Graphics -9874, 81, AMD Radeon R6 Graphics -9874, 87, AMD Radeon R5 Graphics -9874, 85, AMD Radeon R6 Graphics -9874, 84, AMD Radeon R7 Graphics -6FDF, E7, AMD Radeon RX 590 GME -6FDF, EF, AMD Radeon RX 580 2048SP +9874, C9, AMD Radeon R7 Graphics +9874, CA, AMD Radeon R5 Graphics +9874, CB, AMD Radeon R5 Graphics +9874, CC, AMD Radeon R7 Graphics +9874, CD, AMD Radeon R7 Graphics +9874, CE, AMD Radeon R5 Graphics +9874, E1, AMD Radeon R7 Graphics +9874, E2, AMD Radeon R7 Graphics +9874, E3, AMD Radeon R7 Graphics +9874, E4, AMD Radeon R7 Graphics +9874, E5, AMD Radeon R5 Graphics +9874, E6, AMD Radeon R5 Graphics +98E4, 80, AMD Radeon R5E Graphics +98E4, 81, AMD Radeon R4E Graphics +98E4, 83, AMD Radeon R2E Graphics +98E4, 84, AMD Radeon R2E Graphics +98E4, 86, AMD Radeon R1E Graphics +98E4, C0, AMD Radeon R4 Graphics +98E4, C1, AMD Radeon R5 Graphics +98E4, C2, AMD Radeon R4 Graphics +98E4, C4, AMD Radeon R5 Graphics +98E4, C6, AMD Radeon R5 Graphics +98E4, C8, AMD Radeon R4 Graphics +98E4, C9, AMD Radeon R4 Graphics +98E4, CA, AMD Radeon R5 Graphics +98E4, D0, AMD Radeon R2 Graphics +98E4, D1, AMD Radeon R2 Graphics +98E4, D2, AMD Radeon R2 Graphics +98E4, D4, AMD Radeon R2 Graphics +98E4, D9, AMD Radeon R5 Graphics +98E4, DA, AMD Radeon R5 Graphics +98E4, DB, AMD Radeon R3 Graphics +98E4, E1, AMD Radeon R3 Graphics +98E4, E2, AMD Radeon R3 Graphics +98E4, E9, AMD Radeon R4 Graphics +98E4, EA, AMD Radeon R4 Graphics +98E4, EB, AMD Radeon R3 Graphics +98E4, EB, AMD Radeon R4 Graphics diff --git a/lib/libdrm/gen_table_fourcc.py b/lib/libdrm/gen_table_fourcc.py index 4236fd793..1473d437a 100644 --- a/lib/libdrm/gen_table_fourcc.py +++ b/lib/libdrm/gen_table_fourcc.py @@ -56,7 +56,7 @@ with open(towrite, "w") as f: that script instead of adding here entries manually! */ static const struct drmFormatModifierInfo drm_format_modifier_table[] = { ''') - f.write(' { DRM_MODIFIER_INVALID(NONE, INVALID_MODIFIER) },\n') + f.write(' { DRM_MODIFIER_INVALID(NONE, INVALID) },\n') f.write(' { DRM_MODIFIER_LINEAR(NONE, LINEAR) },\n') for entry in fm_re['intel']: diff --git a/lib/libdrm/include/drm/amdgpu_drm.h b/lib/libdrm/include/drm/amdgpu_drm.h index 7f01f9830..c0a0ad101 100644 --- a/lib/libdrm/include/drm/amdgpu_drm.h +++ b/lib/libdrm/include/drm/amdgpu_drm.h @@ -553,6 +553,10 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_VCE 4 #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 +/* + * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support + * both encoding and decoding jobs. + */ #define AMDGPU_HW_IP_VCN_ENC 7 #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9 diff --git a/lib/libdrm/include/drm/drm_fourcc.h b/lib/libdrm/include/drm/drm_fourcc.h index 957c7be29..2c9051ffb 100644 --- a/lib/libdrm/include/drm/drm_fourcc.h +++ b/lib/libdrm/include/drm/drm_fourcc.h @@ -104,6 +104,12 @@ extern "C" { /* 8 bpp Red */ #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ +/* 10 bpp Red */ +#define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ + +/* 12 bpp Red */ +#define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ + /* 16 bpp Red */ #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ @@ -308,6 +314,13 @@ extern "C" { */ #define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ +/* 2 plane YCbCr420. + * 3 10 bit components and 2 padding bits packed into 4 bytes. + * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian + * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian + */ +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ + /* 3 plane non-subsampled (444) YCbCr * 16 bits per component, but only 10 bits are used and 6 bits are padded * index 0: Y plane, [15:0] Y:x [10:6] little endian @@ -373,6 +386,12 @@ extern "C" { #define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) +#define fourcc_mod_get_vendor(modifier) \ + (((modifier) >> 56) & 0xff) + +#define fourcc_mod_is_vendor(modifier, vendor) \ + (fourcc_mod_get_vendor(modifier) == DRM_FORMAT_MOD_VENDOR_## vendor) + #define fourcc_mod_code(vendor, val) \ ((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL)) @@ -540,7 +559,7 @@ extern "C" { * * The main surface is Y-tiled and is at plane index 0 whereas CCS is linear * and at index 1. The clear color is stored at index 2, and the pitch should - * be ignored. The clear color structure is 256 bits. The first 128 bits + * be 64 bytes aligned. The clear color structure is 256 bits. The first 128 bits * represents Raw Clear Color Red, Green, Blue and Alpha color each represented * by 32 bits. The raw clear color is consumed by the 3d engine and generates * the converted clear color of size 64 bits. The first 32 bits store the Lower @@ -554,6 +573,53 @@ extern "C" { #define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8) /* + * Intel Tile 4 layout + * + * This is a tiled layout using 4KB tiles in a row-major layout. It has the same + * shape as Tile Y at two granularities: 4KB (128B x 32) and 64B (16B x 4). It + * only differs from Tile Y at the 256B granularity in between. At this + * granularity, Tile Y has a shape of 16B x 32 rows, but this tiling has a shape + * of 64B x 8 rows. + */ +#define I915_FORMAT_MOD_4_TILED fourcc_mod_code(INTEL, 9) + +/* + * Intel color control surfaces (CCS) for DG2 render compression. + * + * The main surface is Tile 4 and at plane index 0. The CCS data is stored + * outside of the GEM object in a reserved memory area dedicated for the + * storage of the CCS data for all RC/RC_CC/MC compressible GEM objects. The + * main surface pitch is required to be a multiple of four Tile 4 widths. + */ +#define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS fourcc_mod_code(INTEL, 10) + +/* + * Intel color control surfaces (CCS) for DG2 media compression. + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all RC/RC_CC/MC compressible GEM objects. The main surface + * pitch is required to be a multiple of four Tile 4 widths. + */ +#define I915_FORMAT_MOD_4_TILED_DG2_MC_CCS fourcc_mod_code(INTEL, 11) + +/* + * Intel Color Control Surface with Clear Color (CCS) for DG2 render compression. + * + * The main surface is Tile 4 and at plane index 0. The CCS data is stored + * outside of the GEM object in a reserved memory area dedicated for the + * storage of the CCS data for all RC/RC_CC/MC compressible GEM objects. The + * main surface pitch is required to be a multiple of four Tile 4 widths. The + * clear color is stored at plane index 1 and the pitch should be 64 bytes + * aligned. The format of the 256 bits of clear color data matches the one used + * for the I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC modifier, see its description + * for details. + */ +#define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC fourcc_mod_code(INTEL, 12) + +/* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * * Macroblocks are laid in a Z-shape, and each pixel data is following the @@ -590,6 +656,28 @@ extern "C" { */ #define DRM_FORMAT_MOD_QCOM_COMPRESSED fourcc_mod_code(QCOM, 1) +/* + * Qualcomm Tiled Format + * + * Similar to DRM_FORMAT_MOD_QCOM_COMPRESSED but not compressed. + * Implementation may be platform and base-format specific. + * + * Each macrotile consists of m x n (mostly 4 x 4) tiles. + * Pixel data pitch/stride is aligned with macrotile width. + * Pixel data height is aligned with macrotile height. + * Entire pixel data buffer is aligned with 4k(bytes). + */ +#define DRM_FORMAT_MOD_QCOM_TILED3 fourcc_mod_code(QCOM, 3) + +/* + * Qualcomm Alternate Tiled Format + * + * Alternate tiled format typically only used within GMEM. + * Implementation may be platform and base-format specific. + */ +#define DRM_FORMAT_MOD_QCOM_TILED2 fourcc_mod_code(QCOM, 2) + + /* Vivante framebuffer modifiers */ /* @@ -842,6 +930,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * and UV. Some SAND-using hardware stores UV in a separate tiled * image from Y to reduce the column height, which is not supported * with these modifiers. + * + * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also + * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes + * wide, but as this is a 10 bpp format that translates to 96 pixels. */ #define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \ @@ -1271,6 +1363,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_TILE_VER_GFX9 1 #define AMD_FMT_MOD_TILE_VER_GFX10 2 #define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3 +#define AMD_FMT_MOD_TILE_VER_GFX11 4 /* * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical @@ -1286,6 +1379,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 #define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26 #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 +#define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31 #define AMD_FMT_MOD_DCC_BLOCK_64B 0 #define AMD_FMT_MOD_DCC_BLOCK_128B 1 @@ -1352,11 +1446,11 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_PIPE_MASK 0x7 #define AMD_FMT_MOD_SET(field, value) \ - ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT) + ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT) #define AMD_FMT_MOD_GET(field, value) \ (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) #define AMD_FMT_MOD_CLEAR(field) \ - (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) + (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) #if defined(__cplusplus) } diff --git a/lib/libdrm/intel/intel_chipset.c b/lib/libdrm/intel/intel_chipset.c index ee8866c1b..76b194034 100644 --- a/lib/libdrm/intel/intel_chipset.c +++ b/lib/libdrm/intel/intel_chipset.c @@ -35,6 +35,9 @@ static const struct pci_device { uint16_t gen; } pciids[] = { /* Keep ids sorted by gen; latest gen first */ + INTEL_MTL_IDS(12), + INTEL_ATS_M_IDS(12), + INTEL_DG2_IDS(12), INTEL_ADLN_IDS(12), INTEL_RPLP_IDS(12), INTEL_ADLP_IDS(12), diff --git a/lib/libdrm/intel/test_decode.c b/lib/libdrm/intel/test_decode.c index b9f5b9279..c47752c98 100644 --- a/lib/libdrm/intel/test_decode.c +++ b/lib/libdrm/intel/test_decode.c @@ -86,7 +86,8 @@ static void compare_batch(struct drm_intel_decode *ctx, const char *batch_filename) { FILE *out = NULL; - void *ptr, *ref_ptr, *batch_ptr; + char *ptr; + void *ref_ptr, *batch_ptr; #if HAVE_OPEN_MEMSTREAM size_t size; #endif @@ -106,7 +107,7 @@ compare_batch(struct drm_intel_decode *ctx, const char *batch_filename) * inside of an automake project's test infrastructure. */ #if HAVE_OPEN_MEMSTREAM - out = open_memstream((char **)&ptr, &size); + out = open_memstream(&ptr, &size); #else fprintf(stderr, "platform lacks open_memstream, skipping.\n"); exit(77); diff --git a/lib/libdrm/meson.build b/lib/libdrm/meson.build index 06f48dd97..3ff6bfaa2 100644 --- a/lib/libdrm/meson.build +++ b/lib/libdrm/meson.build @@ -21,12 +21,16 @@ project( 'libdrm', ['c'], - version : '2.4.111', + version : '2.4.114', license : 'MIT', meson_version : '>= 0.53', - default_options : ['buildtype=debugoptimized', 'c_std=c99'], + default_options : ['buildtype=debugoptimized', 'c_std=c11'], ) +if ['windows', 'darwin'].contains(host_machine.system()) + error('unsupported OS: @0@'.format(host_machine.system())) +endif + pkg = import('pkgconfig') config = configuration_data() @@ -34,6 +38,7 @@ config = configuration_data() config.set10('UDEV', get_option('udev')) with_freedreno_kgsl = get_option('freedreno-kgsl') with_install_tests = get_option('install-test-programs') +with_tests = get_option('tests') if ['freebsd', 'dragonfly', 'netbsd'].contains(host_machine.system()) dep_pthread_stubs = dependency('pthread-stubs', version : '>= 0.4') @@ -82,57 +87,59 @@ endif config.set10('HAVE_LIBDRM_ATOMIC_PRIMITIVES', intel_atomics) config.set10('HAVE_LIB_ATOMIC_OPS', lib_atomics) +dep_pciaccess = dependency('pciaccess', version : '>= 0.10', required : get_option('intel')) + with_intel = false _intel = get_option('intel') -if _intel != 'false' - if _intel == 'true' and not with_atomics +if not _intel.disabled() + if _intel.enabled() and not with_atomics error('libdrm_intel requires atomics.') else - with_intel = _intel == 'true' or host_machine.cpu_family().startswith('x86') + with_intel = (_intel.enabled() or host_machine.cpu_family().startswith('x86')) and with_atomics and dep_pciaccess.found() endif endif summary('Intel', with_intel) with_radeon = false _radeon = get_option('radeon') -if _radeon != 'false' - if _radeon == 'true' and not with_atomics +if not _radeon.disabled() + if _radeon.enabled() and not with_atomics error('libdrm_radeon requires atomics.') endif - with_radeon = true + with_radeon = with_atomics endif summary('Radeon', with_radeon) with_amdgpu = false _amdgpu = get_option('amdgpu') -if _amdgpu != 'false' - if _amdgpu == 'true' and not with_atomics +if not _amdgpu.disabled() + if _amdgpu.enabled() and not with_atomics error('libdrm_amdgpu requires atomics.') endif - with_amdgpu = true + with_amdgpu = with_atomics endif summary('AMDGPU', with_amdgpu) with_nouveau = false _nouveau = get_option('nouveau') -if _nouveau != 'false' - if _nouveau == 'true' and not with_atomics +if not _nouveau.disabled() + if _nouveau.enabled() and not with_atomics error('libdrm_nouveau requires atomics.') endif - with_nouveau = true + with_nouveau = with_atomics endif summary('Nouveau', with_nouveau) with_vmwgfx = false _vmwgfx = get_option('vmwgfx') -if _vmwgfx != 'false' +if not _vmwgfx.disabled() with_vmwgfx = true endif summary('vmwgfx', with_vmwgfx) with_omap = false _omap = get_option('omap') -if _omap == 'true' +if _omap.enabled() if not with_atomics error('libdrm_omap requires atomics.') endif @@ -142,11 +149,11 @@ summary('OMAP', with_omap) with_freedreno = false _freedreno = get_option('freedreno') -if _freedreno != 'false' - if _freedreno == 'true' and not with_atomics +if not _freedreno.disabled() + if _freedreno.enabled() and not with_atomics error('libdrm_freedreno requires atomics.') else - with_freedreno = _freedreno == 'true' or ['arm', 'aarch64'].contains(host_machine.cpu_family()) + with_freedreno = (_freedreno.enabled() or ['arm', 'aarch64'].contains(host_machine.cpu_family())) and with_atomics endif endif summary('Freedreno', with_freedreno) @@ -154,7 +161,7 @@ summary('Freedreon-kgsl', with_freedreno_kgsl) with_tegra = false _tegra = get_option('tegra') -if _tegra == 'true' +if _tegra.enabled() if not with_atomics error('libdrm_tegra requires atomics.') endif @@ -164,21 +171,26 @@ summary('Tegra', with_tegra) with_etnaviv = false _etnaviv = get_option('etnaviv') -if _etnaviv == 'true' - if not with_atomics +if not _etnaviv.disabled() + if _etnaviv.enabled() and not with_atomics error('libdrm_etnaviv requires atomics.') endif - with_etnaviv = true + with_etnaviv = _etnaviv.enabled() or ( + with_atomics and [ + 'loongarch64', 'mips', 'mips64', + 'arm', 'aarch64', 'arc', + ].contains(host_machine.cpu_family()) + ) endif summary('Etnaviv', with_etnaviv) -with_exynos = get_option('exynos') == 'true' +with_exynos = get_option('exynos').enabled() summary('EXYNOS', with_exynos) with_vc4 = false _vc4 = get_option('vc4') -if _vc4 != 'false' - with_vc4 = _vc4 == 'true' or ['arm', 'aarch64'].contains(host_machine.cpu_family()) +if not _vc4.disabled() + with_vc4 = _vc4.enabled() or ['arm', 'aarch64'].contains(host_machine.cpu_family()) endif summary('VC4', with_vc4) @@ -234,32 +246,19 @@ libdrm_c_args = cc.get_supported_arguments([ '-Wno-unused-parameter', '-Wno-attributes', '-Wno-long-long', '-Wno-missing-field-initializers']) -dep_pciaccess = dependency('pciaccess', version : '>= 0.10', required : with_intel) dep_cunit = dependency('cunit', version : '>= 2.1', required : false) -_cairo_tests = get_option('cairo-tests') -if _cairo_tests != 'false' - dep_cairo = dependency('cairo', required : _cairo_tests == 'true') - with_cairo_tests = dep_cairo.found() -else - dep_cairo = [] - with_cairo_tests = false -endif -_valgrind = get_option('valgrind') -if _valgrind != 'false' - if with_freedreno - dep_valgrind = dependency('valgrind', required : _valgrind == 'true', version : '>=3.10.0') - else - dep_valgrind = dependency('valgrind', required : _valgrind == 'true') - endif - with_valgrind = dep_valgrind.found() -else - dep_valgrind = [] - with_valgrind = false +dep_cairo = dependency('cairo', required : get_option('cairo-tests')) +with_cairo_tests = dep_cairo.found() + +valgrind_version = [] +if with_freedreno + valgrind_version = '>=3.10.0' endif +dep_valgrind = dependency('valgrind', required : get_option('valgrind'), version : valgrind_version) +with_valgrind = dep_valgrind.found() -with_man_pages = get_option('man-pages') -prog_rst2man = find_program('rst2man', 'rst2man.py', required: with_man_pages == 'true') -with_man_pages = with_man_pages != 'false' and prog_rst2man.found() +prog_rst2man = find_program('rst2man', 'rst2man.py', required: get_option('man-pages')) +with_man_pages = prog_rst2man.found() config.set10('HAVE_VISIBILITY', cc.has_function_attribute('visibility:hidden')) @@ -391,4 +390,6 @@ if with_man_pages subdir('man') endif subdir('data') -subdir('tests') +if with_tests + subdir('tests') +endif diff --git a/lib/libdrm/meson_options.txt b/lib/libdrm/meson_options.txt index f5d066f08..e80d79e2c 100644 --- a/lib/libdrm/meson_options.txt +++ b/lib/libdrm/meson_options.txt @@ -20,100 +20,75 @@ option( 'intel', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for Intel's KMS API.''', ) option( 'radeon', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for radeons's KMS API.''', ) option( 'amdgpu', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for amdgpu's KMS API.''', ) option( 'nouveau', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for nouveau's KMS API.''', ) option( 'vmwgfx', - type : 'combo', - value : 'true', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for vmgfx's KMS API.''', ) option( 'omap', - type : 'combo', - value : 'false', - choices : ['true', 'false', 'auto'], + type : 'feature', + value : 'disabled', description : '''Enable support for OMAP's experimental KMS API.''', ) option( 'exynos', - type : 'combo', - value : 'false', - choices : ['true', 'false', 'auto'], + type : 'feature', + value : 'disabled', description : '''Enable support for EXYNOS's experimental KMS API.''', ) option( 'freedreno', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for freedreno's KMS API.''', ) option( 'tegra', - type : 'combo', - value : 'false', - choices : ['true', 'false', 'auto'], + type : 'feature', + value : 'disabled', description : '''Enable support for Tegra's experimental KMS API.''', ) option( 'vc4', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for vc4's KMS API.''', ) option( 'etnaviv', - type : 'combo', - value : 'false', - choices : ['true', 'false', 'auto'], - description : '''Enable support for etnaviv's experimental KMS API.''', + type : 'feature', + description : '''Enable support for etnaviv's KMS API.''', ) option( 'cairo-tests', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : 'Enable support for Cairo rendering in tests.', ) option( 'man-pages', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : 'Enable manpage generation and installation.', ) option( 'valgrind', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : 'Build libdrm with valgrind support.', ) option( @@ -134,3 +109,9 @@ option( value : false, description : 'Enable support for using udev instead of mknod.', ) +option( + 'tests', + type : 'boolean', + value : true, + description : 'Build test programs.', +) diff --git a/lib/libdrm/tests/amdgpu/amdgpu_test.c b/lib/libdrm/tests/amdgpu/amdgpu_test.c index 7f3aee40b..9abe57309 100644 --- a/lib/libdrm/tests/amdgpu/amdgpu_test.c +++ b/lib/libdrm/tests/amdgpu/amdgpu_test.c @@ -549,6 +549,14 @@ static void amdgpu_disable_suites() "gfx ring slow bad draw test (set amdgpu.lockup_timeout=50)", CU_FALSE)) fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + if (amdgpu_set_test_active(DEADLOCK_TESTS_STR, + "sdma ring corrupted header test (set amdgpu.lockup_timeout=50)", CU_FALSE)) + fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + + if (amdgpu_set_test_active(DEADLOCK_TESTS_STR, + "sdma ring slow linear copy test (set amdgpu.lockup_timeout=50)", CU_FALSE)) + fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + if (amdgpu_set_test_active(BASIC_TESTS_STR, "bo eviction Test", CU_FALSE)) fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); diff --git a/lib/libdrm/tests/amdgpu/amdgpu_test.h b/lib/libdrm/tests/amdgpu/amdgpu_test.h index 9f4453db6..e2ba043bb 100644 --- a/lib/libdrm/tests/amdgpu/amdgpu_test.h +++ b/lib/libdrm/tests/amdgpu/amdgpu_test.h @@ -282,12 +282,6 @@ CU_BOOL suite_cp_dma_tests_enable(void); */ extern CU_TestInfo cp_dma_tests[]; -void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type); -void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type); -void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, - int version, int hang); -void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version); - /** * Initialize security test suite */ @@ -314,7 +308,12 @@ amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle unsigned ip_type, bool secure); - +extern void amdgpu_test_dispatch_helper(amdgpu_device_handle device_handle, unsigned ip); +extern void amdgpu_test_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip); +extern void amdgpu_test_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip); +extern void amdgpu_test_draw_helper(amdgpu_device_handle device_handle); +extern void amdgpu_test_draw_hang_helper(amdgpu_device_handle device_handle); +extern void amdgpu_test_draw_hang_slow_helper(amdgpu_device_handle device_handle); /** * Initialize hotunplug test suite diff --git a/lib/libdrm/tests/amdgpu/basic_tests.c b/lib/libdrm/tests/amdgpu/basic_tests.c index 688260d96..42176c007 100644 --- a/lib/libdrm/tests/amdgpu/basic_tests.c +++ b/lib/libdrm/tests/amdgpu/basic_tests.c @@ -857,6 +857,13 @@ static void amdgpu_command_submission_gfx_separate_ibs(void) amdgpu_bo_list_handle bo_list; amdgpu_va_handle va_handle, va_handle_ce; int r, i = 0; + struct drm_amdgpu_info_hw_ip info; + + r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); + CU_ASSERT_EQUAL(r, 0); + + if (info.hw_ip_version_major >= 11) + return; r = amdgpu_cs_ctx_create(device_handle, &context_handle); CU_ASSERT_EQUAL(r, 0); @@ -948,6 +955,13 @@ static void amdgpu_command_submission_gfx_shared_ib(void) amdgpu_bo_list_handle bo_list; amdgpu_va_handle va_handle; int r, i = 0; + struct drm_amdgpu_info_hw_ip info; + + r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); + CU_ASSERT_EQUAL(r, 0); + + if (info.hw_ip_version_major >= 11) + return; r = amdgpu_cs_ctx_create(device_handle, &context_handle); CU_ASSERT_EQUAL(r, 0); @@ -2070,6 +2084,13 @@ static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) amdgpu_va_handle va_handle, va_handle_ce; int r; int i = 0, ib_cs_num = 2; + struct drm_amdgpu_info_hw_ip info; + + r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); + CU_ASSERT_EQUAL(r, 0); + + if (info.hw_ip_version_major >= 11) + return; r = amdgpu_cs_ctx_create(device_handle, &context_handle); CU_ASSERT_EQUAL(r, 0); @@ -2457,1813 +2478,19 @@ static void amdgpu_sync_dependency_test(void) free(ibs_request.dependencies); } -static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) -{ - struct amdgpu_test_shader *shader; - int i, loop = 0x10000; - - switch (family) { - case AMDGPU_FAMILY_AI: - shader = &memcpy_cs_hang_slow_ai; - break; - case AMDGPU_FAMILY_RV: - shader = &memcpy_cs_hang_slow_rv; - break; - case AMDGPU_FAMILY_NV: - shader = &memcpy_cs_hang_slow_nv; - break; - default: - return -1; - break; - } - - memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); - - for (i = 0; i < loop; i++) - memcpy(ptr + shader->header_length + shader->body_length * i, - shader->shader + shader->header_length, - shader->body_length * sizeof(uint32_t)); - - memcpy(ptr + shader->header_length + shader->body_length * loop, - shader->shader + shader->header_length + shader->body_length, - shader->foot_length * sizeof(uint32_t)); - - return 0; -} - -static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, - int cs_type, - uint32_t version) -{ - uint32_t shader_size; - const uint32_t *shader; - - switch (cs_type) { - case CS_BUFFERCLEAR: - if (version == 9) { - shader = bufferclear_cs_shader_gfx9; - shader_size = sizeof(bufferclear_cs_shader_gfx9); - } else if (version == 10) { - shader = bufferclear_cs_shader_gfx10; - shader_size = sizeof(bufferclear_cs_shader_gfx10); - } - break; - case CS_BUFFERCOPY: - if (version == 9) { - shader = buffercopy_cs_shader_gfx9; - shader_size = sizeof(buffercopy_cs_shader_gfx9); - } else if (version == 10) { - shader = buffercopy_cs_shader_gfx10; - shader_size = sizeof(buffercopy_cs_shader_gfx10); - } - break; - case CS_HANG: - shader = memcpy_ps_hang; - shader_size = sizeof(memcpy_ps_hang); - break; - default: - return -1; - break; - } - - memcpy(ptr, shader, shader_size); - return 0; -} - -static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type, uint32_t version) -{ - int i = 0; - - /* Write context control and load shadowing register if necessary */ - if (ip_type == AMDGPU_HW_IP_GFX) { - ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); - ptr[i++] = 0x80000000; - ptr[i++] = 0x80000000; - } - - /* Issue commands to set default compute state. */ - /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); - ptr[i++] = 0x204; - i += 3; - - /* clear mmCOMPUTE_TMPRING_SIZE */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); - ptr[i++] = 0x218; - ptr[i++] = 0; - - /* Set new sh registers in GFX10 to 0 */ - if (version == 10) { - /* mmCOMPUTE_SHADER_CHKSUM */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); - ptr[i++] = 0x22a; - ptr[i++] = 0; - /* mmCOMPUTE_REQ_CTRL */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 6); - ptr[i++] = 0x222; - i += 6; - /* mmCP_COHER_START_DELAY */ - ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ptr[i++] = 0x7b; - ptr[i++] = 0x20; - } - return i; -} - -static int amdgpu_dispatch_write_cumask(uint32_t *ptr, uint32_t version) -{ - int i = 0; - - /* Issue commands to set cu mask used in current dispatch */ - if (version == 9) { - /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); - ptr[i++] = 0x216; - ptr[i++] = 0xffffffff; - ptr[i++] = 0xffffffff; - /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); - ptr[i++] = 0x219; - ptr[i++] = 0xffffffff; - ptr[i++] = 0xffffffff; - } else if (version == 10) { - /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2); - ptr[i++] = 0x30000216; - ptr[i++] = 0xffffffff; - ptr[i++] = 0xffffffff; - /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2); - ptr[i++] = 0x30000219; - ptr[i++] = 0xffffffff; - ptr[i++] = 0xffffffff; - } - - return i; -} - -static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr, uint32_t version) -{ - int i, j; - - i = 0; - - /* Writes shader state to HW */ - /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); - ptr[i++] = 0x20c; - ptr[i++] = (shader_addr >> 8); - ptr[i++] = (shader_addr >> 40); - /* write sh regs*/ - for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); - /* - Gfx9ShRegBase */ - ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; - ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; - } - - if (version == 10) { - /* mmCOMPUTE_PGM_RSRC3 */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); - ptr[i++] = 0x228; - ptr[i++] = 0; - } - - return i; -} - -static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, - uint32_t ip_type, - uint32_t ring, - uint32_t version) -{ - amdgpu_context_handle context_handle; - amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; - volatile unsigned char *ptr_dst; - void *ptr_shader; - uint32_t *ptr_cmd; - uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; - amdgpu_va_handle va_dst, va_shader, va_cmd; - int i, r; - int bo_dst_size = 16384; - int bo_shader_size = 4096; - int bo_cmd_size = 4096; - struct amdgpu_cs_request ibs_request = {0}; - struct amdgpu_cs_ib_info ib_info= {0}; - amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_fence fence_status = {0}; - uint32_t expired; - - r = amdgpu_cs_ctx_create(device_handle, &context_handle); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, - AMDGPU_GEM_DOMAIN_GTT, 0, - &bo_cmd, (void **)&ptr_cmd, - &mc_address_cmd, &va_cmd); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_cmd, 0, bo_cmd_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader, &ptr_shader, - &mc_address_shader, &va_shader); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader, 0, bo_shader_size); - - r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR, version); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_dst, (void **)&ptr_dst, - &mc_address_dst, &va_dst); - CU_ASSERT_EQUAL(r, 0); - - i = 0; - i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); - - /* Issue commands to set cu mask used in current dispatch */ - i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); - - /* Writes shader state to HW */ - i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); - - /* Write constant data */ - /* Writes the UAV constant data to the SGPRs. */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0x240; - ptr_cmd[i++] = mc_address_dst; - ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; - ptr_cmd[i++] = 0x400; - if (version == 9) - ptr_cmd[i++] = 0x74fac; - else if (version == 10) - ptr_cmd[i++] = 0x1104bfac; - - /* Sets a range of pixel shader constants */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0x244; - ptr_cmd[i++] = 0x22222222; - ptr_cmd[i++] = 0x22222222; - ptr_cmd[i++] = 0x22222222; - ptr_cmd[i++] = 0x22222222; - - /* clear mmCOMPUTE_RESOURCE_LIMITS */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); - ptr_cmd[i++] = 0x215; - ptr_cmd[i++] = 0; - - /* dispatch direct command */ - ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); - ptr_cmd[i++] = 0x10; - ptr_cmd[i++] = 1; - ptr_cmd[i++] = 1; - ptr_cmd[i++] = 1; - - while (i & 7) - ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ - - resources[0] = bo_dst; - resources[1] = bo_shader; - resources[2] = bo_cmd; - r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); - CU_ASSERT_EQUAL(r, 0); - - ib_info.ib_mc_address = mc_address_cmd; - ib_info.size = i; - ibs_request.ip_type = ip_type; - ibs_request.ring = ring; - ibs_request.resources = bo_list; - ibs_request.number_of_ibs = 1; - ibs_request.ibs = &ib_info; - ibs_request.fence_info.handle = NULL; - - /* submit CS */ - r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_list_destroy(bo_list); - CU_ASSERT_EQUAL(r, 0); - - fence_status.ip_type = ip_type; - fence_status.ip_instance = 0; - fence_status.ring = ring; - fence_status.context = context_handle; - fence_status.fence = ibs_request.seq_no; - - /* wait for IB accomplished */ - r = amdgpu_cs_query_fence_status(&fence_status, - AMDGPU_TIMEOUT_INFINITE, - 0, &expired); - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(expired, true); - - /* verify if memset test result meets with expected */ - i = 0; - while(i < bo_dst_size) { - CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); - } - - r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_cs_ctx_free(context_handle); - CU_ASSERT_EQUAL(r, 0); -} - -static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, - uint32_t ip_type, - uint32_t ring, - uint32_t version, - int hang) -{ - amdgpu_context_handle context_handle; - amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; - volatile unsigned char *ptr_dst; - void *ptr_shader; - unsigned char *ptr_src; - uint32_t *ptr_cmd; - uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; - amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; - int i, r; - int bo_dst_size = 16384; - int bo_shader_size = 4096; - int bo_cmd_size = 4096; - struct amdgpu_cs_request ibs_request = {0}; - struct amdgpu_cs_ib_info ib_info= {0}; - uint32_t expired, hang_state, hangs; - enum cs_type cs_type; - amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_fence fence_status = {0}; - - r = amdgpu_cs_ctx_create(device_handle, &context_handle); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, - AMDGPU_GEM_DOMAIN_GTT, 0, - &bo_cmd, (void **)&ptr_cmd, - &mc_address_cmd, &va_cmd); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_cmd, 0, bo_cmd_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader, &ptr_shader, - &mc_address_shader, &va_shader); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader, 0, bo_shader_size); - - cs_type = hang ? CS_HANG : CS_BUFFERCOPY; - r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_src, (void **)&ptr_src, - &mc_address_src, &va_src); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_dst, (void **)&ptr_dst, - &mc_address_dst, &va_dst); - CU_ASSERT_EQUAL(r, 0); - - memset(ptr_src, 0x55, bo_dst_size); - - i = 0; - i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); - - /* Issue commands to set cu mask used in current dispatch */ - i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); - - /* Writes shader state to HW */ - i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); - - /* Write constant data */ - /* Writes the texture resource constants data to the SGPRs */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0x240; - ptr_cmd[i++] = mc_address_src; - ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; - ptr_cmd[i++] = 0x400; - if (version == 9) - ptr_cmd[i++] = 0x74fac; - else if (version == 10) - ptr_cmd[i++] = 0x1104bfac; - - /* Writes the UAV constant data to the SGPRs. */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0x244; - ptr_cmd[i++] = mc_address_dst; - ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; - ptr_cmd[i++] = 0x400; - if (version == 9) - ptr_cmd[i++] = 0x74fac; - else if (version == 10) - ptr_cmd[i++] = 0x1104bfac; - - /* clear mmCOMPUTE_RESOURCE_LIMITS */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); - ptr_cmd[i++] = 0x215; - ptr_cmd[i++] = 0; - - /* dispatch direct command */ - ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); - ptr_cmd[i++] = 0x10; - ptr_cmd[i++] = 1; - ptr_cmd[i++] = 1; - ptr_cmd[i++] = 1; - - while (i & 7) - ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ - - resources[0] = bo_shader; - resources[1] = bo_src; - resources[2] = bo_dst; - resources[3] = bo_cmd; - r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); - CU_ASSERT_EQUAL(r, 0); - - ib_info.ib_mc_address = mc_address_cmd; - ib_info.size = i; - ibs_request.ip_type = ip_type; - ibs_request.ring = ring; - ibs_request.resources = bo_list; - ibs_request.number_of_ibs = 1; - ibs_request.ibs = &ib_info; - ibs_request.fence_info.handle = NULL; - r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); - CU_ASSERT_EQUAL(r, 0); - - fence_status.ip_type = ip_type; - fence_status.ip_instance = 0; - fence_status.ring = ring; - fence_status.context = context_handle; - fence_status.fence = ibs_request.seq_no; - - /* wait for IB accomplished */ - r = amdgpu_cs_query_fence_status(&fence_status, - AMDGPU_TIMEOUT_INFINITE, - 0, &expired); - - if (!hang) { - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(expired, true); - - /* verify if memcpy test result meets with expected */ - i = 0; - while(i < bo_dst_size) { - CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); - i++; - } - } else { - r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); - } - - r = amdgpu_bo_list_destroy(bo_list); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); - CU_ASSERT_EQUAL(r, 0); - r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_cs_ctx_free(context_handle); - CU_ASSERT_EQUAL(r, 0); -} - static void amdgpu_compute_dispatch_test(void) { - int r; - struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id, version; - - r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); - CU_ASSERT_EQUAL(r, 0); - if (!info.available_rings) - printf("SKIP ... as there's no compute ring\n"); - - version = info.hw_ip_version_major; - if (version != 9 && version != 10) { - printf("SKIP ... unsupported gfx version %d\n", version); - return; - } - - for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version); - amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version, 0); - } + amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_COMPUTE); } - static void amdgpu_gfx_dispatch_test(void) { - int r; - struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id, version; - - r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); - CU_ASSERT_EQUAL(r, 0); - if (!info.available_rings) - printf("SKIP ... as there's no graphics ring\n"); - - version = info.hw_ip_version_major; - if (version != 9 && version != 10) { - printf("SKIP ... unsupported gfx version %d\n", version); - return; - } - - for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version); - amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version, 0); - } -} - -void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) -{ - int r; - struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id, version; - - r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); - CU_ASSERT_EQUAL(r, 0); - if (!info.available_rings) - printf("SKIP ... as there's no ring for ip %d\n", ip_type); - - version = info.hw_ip_version_major; - if (version != 9 && version != 10) { - printf("SKIP ... unsupported gfx version %d\n", version); - return; - } - - for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 1); - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); - } -} - -static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, - uint32_t ip_type, uint32_t ring, int version) -{ - amdgpu_context_handle context_handle; - amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; - volatile unsigned char *ptr_dst; - void *ptr_shader; - unsigned char *ptr_src; - uint32_t *ptr_cmd; - uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; - amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; - int i, r; - int bo_dst_size = 0x4000000; - int bo_shader_size = 0x400000; - int bo_cmd_size = 4096; - struct amdgpu_cs_request ibs_request = {0}; - struct amdgpu_cs_ib_info ib_info= {0}; - uint32_t hang_state, hangs, expired; - struct amdgpu_gpu_info gpu_info = {0}; - amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_fence fence_status = {0}; - - r = amdgpu_query_gpu_info(device_handle, &gpu_info); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_cs_ctx_create(device_handle, &context_handle); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, - AMDGPU_GEM_DOMAIN_GTT, 0, - &bo_cmd, (void **)&ptr_cmd, - &mc_address_cmd, &va_cmd); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_cmd, 0, bo_cmd_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader, &ptr_shader, - &mc_address_shader, &va_shader); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader, 0, bo_shader_size); - - r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_src, (void **)&ptr_src, - &mc_address_src, &va_src); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_dst, (void **)&ptr_dst, - &mc_address_dst, &va_dst); - CU_ASSERT_EQUAL(r, 0); - - memset(ptr_src, 0x55, bo_dst_size); - - i = 0; - i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); - - /* Issue commands to set cu mask used in current dispatch */ - i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); - - /* Writes shader state to HW */ - i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); - - /* Write constant data */ - /* Writes the texture resource constants data to the SGPRs */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0x240; - ptr_cmd[i++] = mc_address_src; - ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; - ptr_cmd[i++] = 0x400000; - if (version == 9) - ptr_cmd[i++] = 0x74fac; - else if (version == 10) - ptr_cmd[i++] = 0x1104bfac; - - /* Writes the UAV constant data to the SGPRs. */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0x244; - ptr_cmd[i++] = mc_address_dst; - ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; - ptr_cmd[i++] = 0x400000; - if (version == 9) - ptr_cmd[i++] = 0x74fac; - else if (version == 10) - ptr_cmd[i++] = 0x1104bfac; - - /* clear mmCOMPUTE_RESOURCE_LIMITS */ - ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); - ptr_cmd[i++] = 0x215; - ptr_cmd[i++] = 0; - - /* dispatch direct command */ - ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); - ptr_cmd[i++] = 0x10000; - ptr_cmd[i++] = 1; - ptr_cmd[i++] = 1; - ptr_cmd[i++] = 1; - - while (i & 7) - ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ - - resources[0] = bo_shader; - resources[1] = bo_src; - resources[2] = bo_dst; - resources[3] = bo_cmd; - r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); - CU_ASSERT_EQUAL(r, 0); - - ib_info.ib_mc_address = mc_address_cmd; - ib_info.size = i; - ibs_request.ip_type = ip_type; - ibs_request.ring = ring; - ibs_request.resources = bo_list; - ibs_request.number_of_ibs = 1; - ibs_request.ibs = &ib_info; - ibs_request.fence_info.handle = NULL; - r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); - CU_ASSERT_EQUAL(r, 0); - - fence_status.ip_type = ip_type; - fence_status.ip_instance = 0; - fence_status.ring = ring; - fence_status.context = context_handle; - fence_status.fence = ibs_request.seq_no; - - /* wait for IB accomplished */ - r = amdgpu_cs_query_fence_status(&fence_status, - AMDGPU_TIMEOUT_INFINITE, - 0, &expired); - - r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); - - r = amdgpu_bo_list_destroy(bo_list); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); - CU_ASSERT_EQUAL(r, 0); - r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_cs_ctx_free(context_handle); - CU_ASSERT_EQUAL(r, 0); -} - -void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) -{ - int r; - struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id, version; - - r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); - CU_ASSERT_EQUAL(r, 0); - if (!info.available_rings) - printf("SKIP ... as there's no ring for ip %d\n", ip_type); - - version = info.hw_ip_version_major; - if (version != 9 && version != 10) { - printf("SKIP ... unsupported gfx version %d\n", version); - return; - } - - for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); - amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id, version); - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); - } -} - -static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) -{ - struct amdgpu_test_shader *shader; - int i, loop = 0x40000; - - switch (family) { - case AMDGPU_FAMILY_AI: - case AMDGPU_FAMILY_RV: - shader = &memcpy_ps_hang_slow_ai; - break; - default: - return -1; - break; - } - - memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); - - for (i = 0; i < loop; i++) - memcpy(ptr + shader->header_length + shader->body_length * i, - shader->shader + shader->header_length, - shader->body_length * sizeof(uint32_t)); - - memcpy(ptr + shader->header_length + shader->body_length * loop, - shader->shader + shader->header_length + shader->body_length, - shader->foot_length * sizeof(uint32_t)); - - return 0; -} - -static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type, uint32_t version) -{ - int i; - uint32_t shader_offset= 256; - uint32_t mem_offset, patch_code_offset; - uint32_t shader_size, patchinfo_code_size; - const uint32_t *shader; - const uint32_t *patchinfo_code; - const uint32_t *patchcode_offset; - - switch (ps_type) { - case PS_CONST: - if (version == 9) { - shader = ps_const_shader_gfx9; - shader_size = sizeof(ps_const_shader_gfx9); - patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; - patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; - patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; - } else if (version == 10){ - shader = ps_const_shader_gfx10; - shader_size = sizeof(ps_const_shader_gfx10); - patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx10; - patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx10; - patchcode_offset = ps_const_shader_patchinfo_offset_gfx10; - } - break; - case PS_TEX: - if (version == 9) { - shader = ps_tex_shader_gfx9; - shader_size = sizeof(ps_tex_shader_gfx9); - patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; - patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; - patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; - } else if (version == 10) { - shader = ps_tex_shader_gfx10; - shader_size = sizeof(ps_tex_shader_gfx10); - patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx10; - patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx10; - patchcode_offset = ps_tex_shader_patchinfo_offset_gfx10; - } - break; - case PS_HANG: - shader = memcpy_ps_hang; - shader_size = sizeof(memcpy_ps_hang); - - memcpy(ptr, shader, shader_size); - return 0; - default: - return -1; - break; - } - - /* write main shader program */ - for (i = 0 ; i < 10; i++) { - mem_offset = i * shader_offset; - memcpy(ptr + mem_offset, shader, shader_size); - } - - /* overwrite patch codes */ - for (i = 0 ; i < 10; i++) { - mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); - patch_code_offset = i * patchinfo_code_size; - memcpy(ptr + mem_offset, - patchinfo_code + patch_code_offset, - patchinfo_code_size * sizeof(uint32_t)); - } - - return 0; -} - -/* load RectPosTexFast_VS */ -static int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version) -{ - const uint32_t *shader; - uint32_t shader_size; - - if (version == 9) { - shader = vs_RectPosTexFast_shader_gfx9; - shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); - } else if (version == 10) { - shader = vs_RectPosTexFast_shader_gfx10; - shader_size = sizeof(vs_RectPosTexFast_shader_gfx10); - } - - memcpy(ptr, shader, shader_size); - - return 0; -} - -static int amdgpu_draw_init(uint32_t *ptr, uint32_t version) -{ - int i = 0; - const uint32_t *preamblecache_ptr; - uint32_t preamblecache_size; - - /* Write context control and load shadowing register if necessary */ - ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); - ptr[i++] = 0x80000000; - ptr[i++] = 0x80000000; - - if (version == 9) { - preamblecache_ptr = preamblecache_gfx9; - preamblecache_size = sizeof(preamblecache_gfx9); - } else if (version == 10) { - preamblecache_ptr = preamblecache_gfx10; - preamblecache_size = sizeof(preamblecache_gfx10); - } - - memcpy(ptr + i, preamblecache_ptr, preamblecache_size); - return i + preamblecache_size/sizeof(uint32_t); -} - -static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, - uint64_t dst_addr, - uint32_t version, - int hang_slow) -{ - int i = 0; - - /* setup color buffer */ - if (version == 9) { - /* offset reg - 0xA318 CB_COLOR0_BASE - 0xA319 CB_COLOR0_BASE_EXT - 0xA31A CB_COLOR0_ATTRIB2 - 0xA31B CB_COLOR0_VIEW - 0xA31C CB_COLOR0_INFO - 0xA31D CB_COLOR0_ATTRIB - 0xA31E CB_COLOR0_DCC_CONTROL - 0xA31F CB_COLOR0_CMASK - 0xA320 CB_COLOR0_CMASK_BASE_EXT - 0xA321 CB_COLOR0_FMASK - 0xA322 CB_COLOR0_FMASK_BASE_EXT - 0xA323 CB_COLOR0_CLEAR_WORD0 - 0xA324 CB_COLOR0_CLEAR_WORD1 - 0xA325 CB_COLOR0_DCC_BASE - 0xA326 CB_COLOR0_DCC_BASE_EXT */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); - ptr[i++] = 0x318; - ptr[i++] = dst_addr >> 8; - ptr[i++] = dst_addr >> 40; - ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f; - ptr[i++] = 0; - ptr[i++] = 0x50438; - ptr[i++] = 0x10140000; - i += 9; - - /* mmCB_MRT0_EPITCH */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x1e8; - ptr[i++] = hang_slow ? 0xfff : 0x1f; - } else if (version == 10) { - /* 0xA318 CB_COLOR0_BASE - 0xA319 CB_COLOR0_PITCH - 0xA31A CB_COLOR0_SLICE - 0xA31B CB_COLOR0_VIEW - 0xA31C CB_COLOR0_INFO - 0xA31D CB_COLOR0_ATTRIB - 0xA31E CB_COLOR0_DCC_CONTROL - 0xA31F CB_COLOR0_CMASK - 0xA320 CB_COLOR0_CMASK_SLICE - 0xA321 CB_COLOR0_FMASK - 0xA322 CB_COLOR0_FMASK_SLICE - 0xA323 CB_COLOR0_CLEAR_WORD0 - 0xA324 CB_COLOR0_CLEAR_WORD1 - 0xA325 CB_COLOR0_DCC_BASE */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14); - ptr[i++] = 0x318; - ptr[i++] = dst_addr >> 8; - i += 3; - ptr[i++] = 0x50438; - i += 9; - - /* 0xA390 CB_COLOR0_BASE_EXT */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x390; - ptr[i++] = dst_addr >> 40; - - /* 0xA398 CB_COLOR0_CMASK_BASE_EXT */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x398; - ptr[i++] = 0; - - /* 0xA3A0 CB_COLOR0_FMASK_BASE_EXT */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x3a0; - ptr[i++] = 0; - - /* 0xA3A8 CB_COLOR0_DCC_BASE_EXT */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x3a8; - ptr[i++] = 0; - - /* 0xA3B0 CB_COLOR0_ATTRIB2 */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x3b0; - ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f; - - /* 0xA3B8 CB_COLOR0_ATTRIB3 */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x3b8; - ptr[i++] = 0x9014000; - } - - /* 0xA32B CB_COLOR1_BASE */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x32b; - ptr[i++] = 0; - - /* 0xA33A CB_COLOR1_BASE */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x33a; - ptr[i++] = 0; - - /* SPI_SHADER_COL_FORMAT */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x1c5; - ptr[i++] = 9; - - /* Setup depth buffer */ - if (version == 9) { - /* mmDB_Z_INFO */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); - ptr[i++] = 0xe; - i += 2; - } else if (version == 10) { - /* mmDB_Z_INFO */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); - ptr[i++] = 0x10; - i += 2; - } - - return i; -} - -static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, - uint32_t version, - int hang_slow) -{ - int i = 0; - const uint32_t *cached_cmd_ptr; - uint32_t cached_cmd_size; - - /* mmPA_SC_TILE_STEERING_OVERRIDE */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0xd7; - ptr[i++] = 0; - - ptr[i++] = 0xffff1000; - ptr[i++] = 0xc0021000; - - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0xd7; - if (version == 9) - ptr[i++] = 1; - else if (version == 10) - ptr[i++] = 0; - - /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); - ptr[i++] = 0x2fe; - i += 16; - - /* mmPA_SC_CENTROID_PRIORITY_0 */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); - ptr[i++] = 0x2f5; - i += 2; - - if (version == 9) { - cached_cmd_ptr = cached_cmd_gfx9; - cached_cmd_size = sizeof(cached_cmd_gfx9); - } else if (version == 10) { - cached_cmd_ptr = cached_cmd_gfx10; - cached_cmd_size = sizeof(cached_cmd_gfx10); - } - - memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); - if (hang_slow) - *(ptr + i + 12) = 0x8000800; - i += cached_cmd_size/sizeof(uint32_t); - - if (version == 10) { - /* mmCB_RMI_GL2_CACHE_CONTROL */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x104; - ptr[i++] = 0x40aa0055; - /* mmDB_RMI_L2_CACHE_CONTROL */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x1f; - ptr[i++] = 0x2a0055; - } - - return i; -} - -static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, - int ps_type, - uint64_t shader_addr, - uint32_t version, - int hang_slow) -{ - int i = 0; - - /* mmPA_CL_VS_OUT_CNTL */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x207; - ptr[i++] = 0; - - if (version == 9) { - /* mmSPI_SHADER_PGM_RSRC3_VS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); - ptr[i++] = 0x46; - ptr[i++] = 0xffff; - } else if (version == 10) { - /* mmSPI_SHADER_PGM_RSRC3_VS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); - ptr[i++] = 0x30000046; - ptr[i++] = 0xffff; - /* mmSPI_SHADER_PGM_RSRC4_VS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); - ptr[i++] = 0x30000041; - ptr[i++] = 0xffff; - } - - /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); - ptr[i++] = 0x48; - ptr[i++] = shader_addr >> 8; - ptr[i++] = shader_addr >> 40; - - /* mmSPI_SHADER_PGM_RSRC1_VS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); - ptr[i++] = 0x4a; - if (version == 9) - ptr[i++] = 0xc0081; - else if (version == 10) - ptr[i++] = 0xc0041; - /* mmSPI_SHADER_PGM_RSRC2_VS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); - ptr[i++] = 0x4b; - ptr[i++] = 0x18; - - /* mmSPI_VS_OUT_CONFIG */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x1b1; - ptr[i++] = 2; - - /* mmSPI_SHADER_POS_FORMAT */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x1c3; - ptr[i++] = 4; - - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); - ptr[i++] = 0x4c; - i += 2; - ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; - ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; - - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); - ptr[i++] = 0x50; - i += 2; - if (ps_type == PS_CONST) { - i += 2; - } else if (ps_type == PS_TEX) { - ptr[i++] = 0x3f800000; - ptr[i++] = 0x3f800000; - } - - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); - ptr[i++] = 0x54; - i += 4; - - return i; -} - -static int amdgpu_draw_ps_write2hw(uint32_t *ptr, - int ps_type, - uint64_t shader_addr, - uint32_t version) -{ - int i, j; - const uint32_t *sh_registers; - const uint32_t *context_registers; - uint32_t num_sh_reg, num_context_reg; - - if (ps_type == PS_CONST) { - if (version == 9) { - sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; - num_sh_reg = ps_num_sh_registers_gfx9; - } else if (version == 10) { - sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10; - num_sh_reg = ps_num_sh_registers_gfx10; - } - context_registers = (const uint32_t *)ps_const_context_reg_gfx9; - num_context_reg = ps_num_context_registers_gfx9; - } else if (ps_type == PS_TEX) { - sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; - context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; - num_sh_reg = ps_num_sh_registers_gfx9; - num_context_reg = ps_num_context_registers_gfx9; - } - - i = 0; - - if (version == 9) { - /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS - 0x2c08 SPI_SHADER_PGM_LO_PS - 0x2c09 SPI_SHADER_PGM_HI_PS */ - /* multiplicator 9 is from SPI_SHADER_COL_FORMAT */ - shader_addr += 256 * 9; - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); - ptr[i++] = 0x7; - ptr[i++] = 0xffff; - ptr[i++] = shader_addr >> 8; - ptr[i++] = shader_addr >> 40; - } else if (version == 10) { - shader_addr += 256 * 9; - /* 0x2c08 SPI_SHADER_PGM_LO_PS - 0x2c09 SPI_SHADER_PGM_HI_PS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); - ptr[i++] = 0x8; - ptr[i++] = shader_addr >> 8; - ptr[i++] = shader_addr >> 40; - - /* mmSPI_SHADER_PGM_RSRC3_PS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); - ptr[i++] = 0x30000007; - ptr[i++] = 0xffff; - /* mmSPI_SHADER_PGM_RSRC4_PS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); - ptr[i++] = 0x30000001; - ptr[i++] = 0xffff; - } - - for (j = 0; j < num_sh_reg; j++) { - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); - ptr[i++] = sh_registers[j * 2] - 0x2c00; - ptr[i++] = sh_registers[j * 2 + 1]; - } - - for (j = 0; j < num_context_reg; j++) { - if (context_registers[j * 2] != 0xA1C5) { - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = context_registers[j * 2] - 0xa000; - ptr[i++] = context_registers[j * 2 + 1]; - } - - if (context_registers[j * 2] == 0xA1B4) { - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x1b3; - ptr[i++] = 2; - } - } - - return i; -} - -static int amdgpu_draw_draw(uint32_t *ptr, uint32_t version) -{ - int i = 0; - - if (version == 9) { - /* mmIA_MULTI_VGT_PARAM */ - ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ptr[i++] = 0x40000258; - ptr[i++] = 0xd00ff; - /* mmVGT_PRIMITIVE_TYPE */ - ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ptr[i++] = 0x10000242; - ptr[i++] = 0x11; - } else if (version == 10) { - /* mmGE_CNTL */ - ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ptr[i++] = 0x25b; - ptr[i++] = 0xff; - /* mmVGT_PRIMITIVE_TYPE */ - ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ptr[i++] = 0x242; - ptr[i++] = 0x11; - } - - ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); - ptr[i++] = 3; - ptr[i++] = 2; - - return i; -} - -void amdgpu_memset_draw(amdgpu_device_handle device_handle, - amdgpu_bo_handle bo_shader_ps, - amdgpu_bo_handle bo_shader_vs, - uint64_t mc_address_shader_ps, - uint64_t mc_address_shader_vs, - uint32_t ring_id, uint32_t version) -{ - amdgpu_context_handle context_handle; - amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; - volatile unsigned char *ptr_dst; - uint32_t *ptr_cmd; - uint64_t mc_address_dst, mc_address_cmd; - amdgpu_va_handle va_dst, va_cmd; - int i, r; - int bo_dst_size = 16384; - int bo_cmd_size = 4096; - struct amdgpu_cs_request ibs_request = {0}; - struct amdgpu_cs_ib_info ib_info = {0}; - struct amdgpu_cs_fence fence_status = {0}; - uint32_t expired; - amdgpu_bo_list_handle bo_list; - - r = amdgpu_cs_ctx_create(device_handle, &context_handle); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, - AMDGPU_GEM_DOMAIN_GTT, 0, - &bo_cmd, (void **)&ptr_cmd, - &mc_address_cmd, &va_cmd); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_cmd, 0, bo_cmd_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_dst, (void **)&ptr_dst, - &mc_address_dst, &va_dst); - CU_ASSERT_EQUAL(r, 0); - - i = 0; - i += amdgpu_draw_init(ptr_cmd + i, version); - - i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0); - - i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0); - - i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, - version, 0); - - i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps, version); - - ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0xc; - ptr_cmd[i++] = 0x33333333; - ptr_cmd[i++] = 0x33333333; - ptr_cmd[i++] = 0x33333333; - ptr_cmd[i++] = 0x33333333; - - i += amdgpu_draw_draw(ptr_cmd + i, version); - - while (i & 7) - ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ - - resources[0] = bo_dst; - resources[1] = bo_shader_ps; - resources[2] = bo_shader_vs; - resources[3] = bo_cmd; - r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); - CU_ASSERT_EQUAL(r, 0); - - ib_info.ib_mc_address = mc_address_cmd; - ib_info.size = i; - ibs_request.ip_type = AMDGPU_HW_IP_GFX; - ibs_request.ring = ring_id; - ibs_request.resources = bo_list; - ibs_request.number_of_ibs = 1; - ibs_request.ibs = &ib_info; - ibs_request.fence_info.handle = NULL; - - /* submit CS */ - r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_list_destroy(bo_list); - CU_ASSERT_EQUAL(r, 0); - - fence_status.ip_type = AMDGPU_HW_IP_GFX; - fence_status.ip_instance = 0; - fence_status.ring = ring_id; - fence_status.context = context_handle; - fence_status.fence = ibs_request.seq_no; - - /* wait for IB accomplished */ - r = amdgpu_cs_query_fence_status(&fence_status, - AMDGPU_TIMEOUT_INFINITE, - 0, &expired); - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(expired, true); - - /* verify if memset test result meets with expected */ - i = 0; - while(i < bo_dst_size) { - CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); - } - - r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_cs_ctx_free(context_handle); - CU_ASSERT_EQUAL(r, 0); -} - -static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, - uint32_t ring, int version) -{ - amdgpu_bo_handle bo_shader_ps, bo_shader_vs; - void *ptr_shader_ps; - void *ptr_shader_vs; - uint64_t mc_address_shader_ps, mc_address_shader_vs; - amdgpu_va_handle va_shader_ps, va_shader_vs; - int r; - int bo_shader_size = 4096; - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader_ps, &ptr_shader_ps, - &mc_address_shader_ps, &va_shader_ps); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader_ps, 0, bo_shader_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader_vs, &ptr_shader_vs, - &mc_address_shader_vs, &va_shader_vs); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader_vs, 0, bo_shader_size); - - r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST, version); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); - CU_ASSERT_EQUAL(r, 0); - - amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, - mc_address_shader_ps, mc_address_shader_vs, - ring, version); - - r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); - CU_ASSERT_EQUAL(r, 0); -} - -static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, - amdgpu_bo_handle bo_shader_ps, - amdgpu_bo_handle bo_shader_vs, - uint64_t mc_address_shader_ps, - uint64_t mc_address_shader_vs, - uint32_t ring, int version, int hang) -{ - amdgpu_context_handle context_handle; - amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; - volatile unsigned char *ptr_dst; - unsigned char *ptr_src; - uint32_t *ptr_cmd; - uint64_t mc_address_dst, mc_address_src, mc_address_cmd; - amdgpu_va_handle va_dst, va_src, va_cmd; - int i, r; - int bo_size = 16384; - int bo_cmd_size = 4096; - struct amdgpu_cs_request ibs_request = {0}; - struct amdgpu_cs_ib_info ib_info= {0}; - uint32_t hang_state, hangs; - uint32_t expired; - amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_fence fence_status = {0}; - - r = amdgpu_cs_ctx_create(device_handle, &context_handle); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, - AMDGPU_GEM_DOMAIN_GTT, 0, - &bo_cmd, (void **)&ptr_cmd, - &mc_address_cmd, &va_cmd); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_cmd, 0, bo_cmd_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_src, (void **)&ptr_src, - &mc_address_src, &va_src); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_dst, (void **)&ptr_dst, - &mc_address_dst, &va_dst); - CU_ASSERT_EQUAL(r, 0); - - memset(ptr_src, 0x55, bo_size); - - i = 0; - i += amdgpu_draw_init(ptr_cmd + i, version); - - i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0); - - i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0); - - i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, - version, 0); - - i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version); - - ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); - if (version == 9) { - ptr_cmd[i++] = 0xc; - ptr_cmd[i++] = mc_address_src >> 8; - ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; - ptr_cmd[i++] = 0x7c01f; - ptr_cmd[i++] = 0x90500fac; - ptr_cmd[i++] = 0x3e000; - i += 3; - } else if (version == 10) { - ptr_cmd[i++] = 0xc; - ptr_cmd[i++] = mc_address_src >> 8; - ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000; - ptr_cmd[i++] = 0x8007c007; - ptr_cmd[i++] = 0x90500fac; - i += 2; - ptr_cmd[i++] = 0x400; - i++; - } - - ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0x14; - ptr_cmd[i++] = 0x92; - i += 3; - - ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr_cmd[i++] = 0x191; - ptr_cmd[i++] = 0; - - i += amdgpu_draw_draw(ptr_cmd + i, version); - - while (i & 7) - ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ - - resources[0] = bo_dst; - resources[1] = bo_src; - resources[2] = bo_shader_ps; - resources[3] = bo_shader_vs; - resources[4] = bo_cmd; - r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); - CU_ASSERT_EQUAL(r, 0); - - ib_info.ib_mc_address = mc_address_cmd; - ib_info.size = i; - ibs_request.ip_type = AMDGPU_HW_IP_GFX; - ibs_request.ring = ring; - ibs_request.resources = bo_list; - ibs_request.number_of_ibs = 1; - ibs_request.ibs = &ib_info; - ibs_request.fence_info.handle = NULL; - r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); - CU_ASSERT_EQUAL(r, 0); - - fence_status.ip_type = AMDGPU_HW_IP_GFX; - fence_status.ip_instance = 0; - fence_status.ring = ring; - fence_status.context = context_handle; - fence_status.fence = ibs_request.seq_no; - - /* wait for IB accomplished */ - r = amdgpu_cs_query_fence_status(&fence_status, - AMDGPU_TIMEOUT_INFINITE, - 0, &expired); - if (!hang) { - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(expired, true); - - /* verify if memcpy test result meets with expected */ - i = 0; - while(i < bo_size) { - CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); - i++; - } - } else { - r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); - } - - r = amdgpu_bo_list_destroy(bo_list); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); - CU_ASSERT_EQUAL(r, 0); - r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_cs_ctx_free(context_handle); - CU_ASSERT_EQUAL(r, 0); -} - -void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, - int version, int hang) -{ - amdgpu_bo_handle bo_shader_ps, bo_shader_vs; - void *ptr_shader_ps; - void *ptr_shader_vs; - uint64_t mc_address_shader_ps, mc_address_shader_vs; - amdgpu_va_handle va_shader_ps, va_shader_vs; - int bo_shader_size = 4096; - enum ps_type ps_type = hang ? PS_HANG : PS_TEX; - int r; - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader_ps, &ptr_shader_ps, - &mc_address_shader_ps, &va_shader_ps); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader_ps, 0, bo_shader_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader_vs, &ptr_shader_vs, - &mc_address_shader_vs, &va_shader_vs); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader_vs, 0, bo_shader_size); - - r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type, version); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); - CU_ASSERT_EQUAL(r, 0); - - amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, - mc_address_shader_ps, mc_address_shader_vs, - ring, version, hang); - - r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); - CU_ASSERT_EQUAL(r, 0); + amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_GFX); } static void amdgpu_draw_test(void) { - int r; - struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id, version; - - r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); - CU_ASSERT_EQUAL(r, 0); - if (!info.available_rings) - printf("SKIP ... as there's no graphics ring\n"); - - version = info.hw_ip_version_major; - if (version != 9 && version != 10) { - printf("SKIP ... unsupported gfx version %d\n", version); - return; - } - - for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memset_draw_test(device_handle, ring_id, version); - amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); - } -} - -void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version) -{ - amdgpu_context_handle context_handle; - amdgpu_bo_handle bo_shader_ps, bo_shader_vs; - amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; - void *ptr_shader_ps; - void *ptr_shader_vs; - volatile unsigned char *ptr_dst; - unsigned char *ptr_src; - uint32_t *ptr_cmd; - uint64_t mc_address_dst, mc_address_src, mc_address_cmd; - uint64_t mc_address_shader_ps, mc_address_shader_vs; - amdgpu_va_handle va_shader_ps, va_shader_vs; - amdgpu_va_handle va_dst, va_src, va_cmd; - struct amdgpu_gpu_info gpu_info = {0}; - int i, r; - int bo_size = 0x4000000; - int bo_shader_ps_size = 0x400000; - int bo_shader_vs_size = 4096; - int bo_cmd_size = 4096; - struct amdgpu_cs_request ibs_request = {0}; - struct amdgpu_cs_ib_info ib_info= {0}; - uint32_t hang_state, hangs, expired; - amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_fence fence_status = {0}; - - r = amdgpu_query_gpu_info(device_handle, &gpu_info); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_cs_ctx_create(device_handle, &context_handle); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, - AMDGPU_GEM_DOMAIN_GTT, 0, - &bo_cmd, (void **)&ptr_cmd, - &mc_address_cmd, &va_cmd); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_cmd, 0, bo_cmd_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader_ps, &ptr_shader_ps, - &mc_address_shader_ps, &va_shader_ps); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader_ps, 0, bo_shader_ps_size); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_shader_vs, &ptr_shader_vs, - &mc_address_shader_vs, &va_shader_vs); - CU_ASSERT_EQUAL(r, 0); - memset(ptr_shader_vs, 0, bo_shader_vs_size); - - r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_src, (void **)&ptr_src, - &mc_address_src, &va_src); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, - AMDGPU_GEM_DOMAIN_VRAM, 0, - &bo_dst, (void **)&ptr_dst, - &mc_address_dst, &va_dst); - CU_ASSERT_EQUAL(r, 0); - - memset(ptr_src, 0x55, bo_size); - - i = 0; - i += amdgpu_draw_init(ptr_cmd + i, version); - - i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 1); - - i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 1); - - i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, - mc_address_shader_vs, version, 1); - - i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version); - - ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); - - if (version == 9) { - ptr_cmd[i++] = 0xc; - ptr_cmd[i++] = mc_address_src >> 8; - ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; - ptr_cmd[i++] = 0x1ffcfff; - ptr_cmd[i++] = 0x90500fac; - ptr_cmd[i++] = 0x1ffe000; - i += 3; - } else if (version == 10) { - ptr_cmd[i++] = 0xc; - ptr_cmd[i++] = mc_address_src >> 8; - ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000; - ptr_cmd[i++] = 0x81ffc1ff; - ptr_cmd[i++] = 0x90500fac; - i += 4; - } - - ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); - ptr_cmd[i++] = 0x14; - ptr_cmd[i++] = 0x92; - i += 3; - - ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr_cmd[i++] = 0x191; - ptr_cmd[i++] = 0; - - i += amdgpu_draw_draw(ptr_cmd + i, version); - - while (i & 7) - ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ - - resources[0] = bo_dst; - resources[1] = bo_src; - resources[2] = bo_shader_ps; - resources[3] = bo_shader_vs; - resources[4] = bo_cmd; - r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); - CU_ASSERT_EQUAL(r, 0); - - ib_info.ib_mc_address = mc_address_cmd; - ib_info.size = i; - ibs_request.ip_type = AMDGPU_HW_IP_GFX; - ibs_request.ring = ring; - ibs_request.resources = bo_list; - ibs_request.number_of_ibs = 1; - ibs_request.ibs = &ib_info; - ibs_request.fence_info.handle = NULL; - r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); - CU_ASSERT_EQUAL(r, 0); - - fence_status.ip_type = AMDGPU_HW_IP_GFX; - fence_status.ip_instance = 0; - fence_status.ring = ring; - fence_status.context = context_handle; - fence_status.fence = ibs_request.seq_no; - - /* wait for IB accomplished */ - r = amdgpu_cs_query_fence_status(&fence_status, - AMDGPU_TIMEOUT_INFINITE, - 0, &expired); - - r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); - - r = amdgpu_bo_list_destroy(bo_list); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); - CU_ASSERT_EQUAL(r, 0); - r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); - CU_ASSERT_EQUAL(r, 0); - r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); - CU_ASSERT_EQUAL(r, 0); - - r = amdgpu_cs_ctx_free(context_handle); - CU_ASSERT_EQUAL(r, 0); + amdgpu_test_draw_helper(device_handle); } - static void amdgpu_gpu_reset_test(void) { int r; diff --git a/lib/libdrm/tests/amdgpu/deadlock_tests.c b/lib/libdrm/tests/amdgpu/deadlock_tests.c index f29a83ab5..2928233df 100644 --- a/lib/libdrm/tests/amdgpu/deadlock_tests.c +++ b/lib/libdrm/tests/amdgpu/deadlock_tests.c @@ -124,6 +124,8 @@ static void amdgpu_dispatch_hang_slow_gfx(void); static void amdgpu_dispatch_hang_slow_compute(void); static void amdgpu_draw_hang_gfx(void); static void amdgpu_draw_hang_slow_gfx(void); +static void amdgpu_hang_sdma(void); +static void amdgpu_hang_slow_sdma(void); CU_BOOL suite_deadlock_tests_enable(void) { @@ -208,6 +210,8 @@ CU_TestInfo deadlock_tests[] = { { "compute ring bad slow dispatch test (set amdgpu.lockup_timeout=50,50)", amdgpu_dispatch_hang_slow_compute }, { "gfx ring bad draw test (set amdgpu.lockup_timeout=50)", amdgpu_draw_hang_gfx }, { "gfx ring slow bad draw test (set amdgpu.lockup_timeout=50)", amdgpu_draw_hang_slow_gfx }, + { "sdma ring corrupted header test (set amdgpu.lockup_timeout=50)", amdgpu_hang_sdma }, + { "sdma ring slow linear copy test (set amdgpu.lockup_timeout=50)", amdgpu_hang_slow_sdma }, CU_TEST_INFO_NULL, }; @@ -511,66 +515,182 @@ static void amdgpu_illegal_mem_access() static void amdgpu_dispatch_hang_gfx(void) { - amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_GFX); + amdgpu_test_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_GFX); } - static void amdgpu_dispatch_hang_compute(void) { - amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_COMPUTE); + amdgpu_test_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_COMPUTE); } - static void amdgpu_dispatch_hang_slow_gfx(void) { - amdgpu_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_GFX); + amdgpu_test_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_GFX); } - static void amdgpu_dispatch_hang_slow_compute(void) { - amdgpu_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_COMPUTE); + amdgpu_test_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_COMPUTE); } - static void amdgpu_draw_hang_gfx(void) { - int r; - struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id, version; + amdgpu_test_draw_hang_helper(device_handle); +} +static void amdgpu_draw_hang_slow_gfx(void) +{ + amdgpu_test_draw_hang_slow_helper(device_handle); +} + +#define DMA_CORRUPTED_HEADER_HANG 1 +#define DMA_SLOW_LINEARCOPY_HANG 2 + +static void amdgpu_hang_sdma_helper(unsigned hang_type) +{ + const int sdma_write_length = 1024; + amdgpu_context_handle context_handle; + amdgpu_bo_handle ib_result_handle; + amdgpu_bo_handle bo1, bo2; + amdgpu_bo_handle resources[3]; + amdgpu_bo_list_handle bo_list; + void *ib_result_cpu; + struct amdgpu_cs_ib_info ib_info; + struct amdgpu_cs_request ibs_request; + struct amdgpu_cs_fence fence_status; + uint64_t bo1_mc, bo2_mc; + uint64_t ib_result_mc_address; + volatile unsigned char *bo1_cpu, *bo2_cpu; + amdgpu_va_handle bo1_va_handle, bo2_va_handle; + amdgpu_va_handle va_handle; + struct drm_amdgpu_info_hw_ip hw_ip_info; + int i, j, r; + uint32_t expired, ib_size; - r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); + r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_DMA, 0, &hw_ip_info); CU_ASSERT_EQUAL(r, 0); - if (!info.available_rings) - printf("SKIP ... as there's no graphic ring\n"); - version = info.hw_ip_version_major; - if (version != 9 && version != 10) { - printf("SKIP ... unsupported gfx version %d\n", version); - return; - } + r = amdgpu_cs_ctx_create(device_handle, &context_handle); + CU_ASSERT_EQUAL(r, 0); - for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); - amdgpu_memcpy_draw_test(device_handle, ring_id, version, 1); - amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); - } -} + if (hang_type == DMA_CORRUPTED_HEADER_HANG) + ib_size = 4096; + else + ib_size = 4096 * 0x20000; -static void amdgpu_draw_hang_slow_gfx(void) -{ - struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id, version; - int r; + r = amdgpu_bo_alloc_and_map(device_handle, ib_size, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address, &va_handle); + CU_ASSERT_EQUAL(r, 0); - r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); + r = amdgpu_bo_alloc_and_map(device_handle, + sdma_write_length, 4096, + AMDGPU_GEM_DOMAIN_GTT, + 0, &bo1, + (void**)&bo1_cpu, &bo1_mc, + &bo1_va_handle); CU_ASSERT_EQUAL(r, 0); - version = info.hw_ip_version_major; - if (version != 9 && version != 10) { - printf("SKIP ... unsupported gfx version %d\n", version); - return; - } + /* set bo1 */ + memset((void*)bo1_cpu, 0xaa, sdma_write_length); - for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); - amdgpu_memcpy_draw_hang_slow_test(device_handle, ring_id, version); - amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); + /* allocate UC bo2 for sDMA use */ + r = amdgpu_bo_alloc_and_map(device_handle, + sdma_write_length, 4096, + AMDGPU_GEM_DOMAIN_GTT, + 0, &bo2, + (void**)&bo2_cpu, &bo2_mc, + &bo2_va_handle); + CU_ASSERT_EQUAL(r, 0); + + /* clear bo2 */ + memset((void*)bo2_cpu, 0, sdma_write_length); + + resources[0] = bo1; + resources[1] = bo2; + resources[2] = ib_result_handle; + r = amdgpu_bo_list_create(device_handle, 3, + resources, NULL, &bo_list); + + /* fulfill PM4: with bad copy linear header */ + ptr = ib_result_cpu; + i = 0; + if (hang_type == DMA_CORRUPTED_HEADER_HANG) { + ptr[i++] = 0x23decd3d; + ptr[i++] = sdma_write_length - 1; + ptr[i++] = 0; + ptr[i++] = 0xffffffff & bo1_mc; + ptr[i++] = (0xffffffff00000000 & bo1_mc) >> 32; + ptr[i++] = 0xffffffff & bo2_mc; + ptr[i++] = (0xffffffff00000000 & bo2_mc) >> 32; + } else { + for (j = 1; j < 0x20000; j++) { + ptr[i++] = 0x1; + ptr[i++] = sdma_write_length - 1; + ptr[i++] = 0; + ptr[i++] = 0xffffffff & bo1_mc; + ptr[i++] = (0xffffffff00000000 & bo1_mc) >> 32; + ptr[i++] = 0xffffffff & bo2_mc; + ptr[i++] = (0xffffffff00000000 & bo2_mc) >> 32; + ptr[i++] = 0x1; + ptr[i++] = sdma_write_length - 1; + ptr[i++] = 0; + ptr[i++] = 0xffffffff & bo2_mc; + ptr[i++] = (0xffffffff00000000 & bo2_mc) >> 32; + ptr[i++] = 0xffffffff & bo1_mc; + ptr[i++] = (0xffffffff00000000 & bo1_mc) >> 32; + } } + + /* exec command */ + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); + ib_info.ib_mc_address = ib_result_mc_address; + ib_info.size = i; + + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); + ibs_request.ip_type = AMDGPU_HW_IP_DMA; + ibs_request.ring = 0; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.resources = bo_list; + ibs_request.fence_info.handle = NULL; + + r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + + memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); + fence_status.context = context_handle; + fence_status.ip_type = AMDGPU_HW_IP_DMA; + fence_status.ip_instance = 0; + fence_status.ring = 0; + fence_status.fence = ibs_request.seq_no; + + r = amdgpu_cs_query_fence_status(&fence_status, + AMDGPU_TIMEOUT_INFINITE, + 0, &expired); + CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1); + + r = amdgpu_bo_list_destroy(bo_list); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, + ib_result_mc_address, 4096); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, + sdma_write_length); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, + sdma_write_length); + CU_ASSERT_EQUAL(r, 0); + + /* end of test */ + r = amdgpu_cs_ctx_free(context_handle); + CU_ASSERT_EQUAL(r, 0); +} + +static void amdgpu_hang_sdma(void) +{ + amdgpu_hang_sdma_helper(DMA_CORRUPTED_HEADER_HANG); +} +static void amdgpu_hang_slow_sdma(void) +{ + amdgpu_hang_sdma_helper(DMA_SLOW_LINEARCOPY_HANG); } diff --git a/lib/libdrm/tests/amdgpu/meson.build b/lib/libdrm/tests/amdgpu/meson.build index 53f2010bf..8618f6a19 100644 --- a/lib/libdrm/tests/amdgpu/meson.build +++ b/lib/libdrm/tests/amdgpu/meson.build @@ -25,7 +25,7 @@ if dep_cunit.found() 'amdgpu_test.c', 'basic_tests.c', 'bo_tests.c', 'cs_tests.c', 'vce_tests.c', 'uvd_enc_tests.c', 'vcn_tests.c', 'deadlock_tests.c', 'vm_tests.c', 'ras_tests.c', 'syncobj_tests.c', 'security_tests.c', - 'hotunplug_tests.c', 'jpeg_tests.c', 'cp_dma_tests.c' + 'hotunplug_tests.c', 'jpeg_tests.c', 'cp_dma_tests.c', 'shader_test_util.c' ), dependencies : [dep_cunit, dep_threads, dep_atomic_ops], include_directories : [inc_root, inc_drm, include_directories('../../amdgpu')], diff --git a/lib/libdrm/tests/amdgpu/shader_code.h b/lib/libdrm/tests/amdgpu/shader_code.h new file mode 100644 index 000000000..74d32bb58 --- /dev/null +++ b/lib/libdrm/tests/amdgpu/shader_code.h @@ -0,0 +1,153 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +#ifndef _shader_code_h_ +#define _shader_code_h_ + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + +enum amdgpu_test_gfx_version { + AMDGPU_TEST_GFX_V9 = 0, + AMDGPU_TEST_GFX_V10, + AMDGPU_TEST_GFX_V11, + AMDGPU_TEST_GFX_MAX, +}; + +enum cs_type { + CS_BUFFERCLEAR = 0, + CS_BUFFERCOPY, + CS_HANG, + CS_HANG_SLOW, +}; + +enum ps_type { + PS_CONST, + PS_TEX, + PS_HANG, + PS_HANG_SLOW +}; + +enum vs_type { + VS_RECTPOSTEXFAST, +}; + +struct reg_info { + uint32_t reg_offset; ///< Memory mapped register offset + uint32_t reg_value; ///< register value +}; + +#include "shader_code_hang.h" +#include "shader_code_gfx9.h" +#include "shader_code_gfx10.h" +#include "shader_code_gfx11.h" + +struct shader_test_cs_shader { + const uint32_t *shader; + uint32_t shader_size; + const struct reg_info *sh_reg; + uint32_t num_sh_reg; + const struct reg_info *context_reg; + uint32_t num_context_reg; +}; + +struct shader_test_ps_shader { + const uint32_t *shader; + unsigned shader_size; + const uint32_t patchinfo_code_size; + const uint32_t *patchinfo_code; + const uint32_t *patchinfo_code_offset; + const struct reg_info *sh_reg; + const uint32_t num_sh_reg; + const struct reg_info *context_reg; + const uint32_t num_context_reg; +}; + +struct shader_test_vs_shader { + const uint32_t *shader; + uint32_t shader_size; + const struct reg_info *sh_reg; + uint32_t num_sh_reg; + const struct reg_info *context_reg; + uint32_t num_context_reg; +}; + +static const struct shader_test_cs_shader shader_test_cs[AMDGPU_TEST_GFX_MAX][2] = { + // gfx9, cs_bufferclear + {{bufferclear_cs_shader_gfx9, sizeof(bufferclear_cs_shader_gfx9), bufferclear_cs_shader_registers_gfx9, ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9)}, + // gfx9, cs_buffercopy + {buffercopy_cs_shader_gfx9, sizeof(buffercopy_cs_shader_gfx9), bufferclear_cs_shader_registers_gfx9, ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9)}}, + // gfx10, cs_bufferclear + {{bufferclear_cs_shader_gfx10, sizeof(bufferclear_cs_shader_gfx10), bufferclear_cs_shader_registers_gfx9, ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9)}, + // gfx10, cs_buffercopy + {buffercopy_cs_shader_gfx10, sizeof(bufferclear_cs_shader_gfx10), bufferclear_cs_shader_registers_gfx9, ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9)}}, + // gfx11, cs_bufferclear + {{bufferclear_cs_shader_gfx11, sizeof(bufferclear_cs_shader_gfx11), bufferclear_cs_shader_registers_gfx11, ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11)}, + // gfx11, cs_buffercopy + {buffercopy_cs_shader_gfx11, sizeof(bufferclear_cs_shader_gfx11), bufferclear_cs_shader_registers_gfx11, ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11)}}, +}; + +#define SHADER_PS_INFO(_ps, _n) \ + {ps_##_ps##_shader_gfx##_n, sizeof(ps_##_ps##_shader_gfx##_n), \ + ps_##_ps##_shader_patchinfo_code_size_gfx##_n, \ + ps_##_ps##_shader_patchinfo_code_gfx##_n, \ + ps_##_ps##_shader_patchinfo_offset_gfx##_n, \ + ps_##_ps##_sh_registers_gfx##_n, ps_##_ps##_num_sh_registers_gfx##_n, \ + ps_##_ps##_context_registers_gfx##_n, ps_##_ps##_num_context_registers_gfx##_n} +static const struct shader_test_ps_shader shader_test_ps[AMDGPU_TEST_GFX_MAX][2] = { + {SHADER_PS_INFO(const, 9), SHADER_PS_INFO(tex, 9)}, + {SHADER_PS_INFO(const, 10), SHADER_PS_INFO(tex, 10)}, + {SHADER_PS_INFO(const, 11), SHADER_PS_INFO(tex, 11)}, +}; + +#define SHADER_VS_INFO(_vs, _n) \ + {vs_##_vs##_shader_gfx##_n, sizeof(vs_##_vs##_shader_gfx##_n), \ + vs_##_vs##_sh_registers_gfx##_n, vs_##_vs##_num_sh_registers_gfx##_n, \ + vs_##_vs##_context_registers_gfx##_n, vs_##_vs##_num_context_registers_gfx##_n} +static const struct shader_test_vs_shader shader_test_vs[AMDGPU_TEST_GFX_MAX][1] = { + {SHADER_VS_INFO(RectPosTexFast, 9)}, + {SHADER_VS_INFO(RectPosTexFast, 10)}, + {SHADER_VS_INFO(RectPosTexFast, 11)}, +}; + +struct shader_test_gfx_info { + const uint32_t *preamble_cache; + uint32_t size_preamble_cache; + const uint32_t *cached_cmd; + uint32_t size_cached_cmd; + uint32_t sh_reg_base; + uint32_t context_reg_base; +}; + +#define SHADER_TEST_GFX_INFO(_n) \ + preamblecache_gfx##_n, sizeof(preamblecache_gfx##_n), \ + cached_cmd_gfx##_n, sizeof(cached_cmd_gfx##_n), \ + sh_reg_base_gfx##_n, context_reg_base_gfx##_n + +static struct shader_test_gfx_info shader_test_gfx_info[AMDGPU_TEST_GFX_MAX] = { + {SHADER_TEST_GFX_INFO(9),}, + {SHADER_TEST_GFX_INFO(10),}, + {SHADER_TEST_GFX_INFO(11),}, +}; +#endif diff --git a/lib/libdrm/tests/amdgpu/shader_code_gfx10.h b/lib/libdrm/tests/amdgpu/shader_code_gfx10.h new file mode 100644 index 000000000..4849bbc9b --- /dev/null +++ b/lib/libdrm/tests/amdgpu/shader_code_gfx10.h @@ -0,0 +1,202 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +#ifndef _shader_code_gfx10_h_ +#define _shader_code_gfx10_h_ + +static const uint32_t bufferclear_cs_shader_gfx10[] = { + 0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205, + 0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004, + 0xBF810000 +}; + +static const uint32_t buffercopy_cs_shader_gfx10[] = { + 0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201, + 0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000 +}; + +static const uint32_t ps_const_shader_gfx10[] = { + 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, + 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, + 0xF8001C0F, 0x00000100, 0xBF810000 +}; + +static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6; + +static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = { + {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 }, + { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, + { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 }, + { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 }, + { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 }, + { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 } + } +}; + +static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = { + 0x00000004 +}; + +static const uint32_t ps_const_num_sh_registers_gfx10 = 2; + +static const struct reg_info ps_const_sh_registers_gfx10[] = { + {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 }, + {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } +}; + +static const struct reg_info ps_const_context_registers_gfx10[] = +{ + {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, + {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, + {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, + {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, + {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, + {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, + {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 /* SI_EXPORT_FMT_FP16_ABGR */ } +}; + +static const uint32_t ps_const_num_context_registers_gfx10 = 7; + +static const uint32_t ps_tex_shader_gfx10[] = { + 0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000, + 0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A, + 0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70, + 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, + 0xF8001C0F, 0x00000100, 0xBF810000 +}; + +static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = { + 0x0000000C +}; + +static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6; + +static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = { + {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 }, + { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, + { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 }, + { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 }, + { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, + { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 } + } +}; + +static const struct reg_info ps_tex_sh_registers_gfx10[] = +{ + {0x2C0A, 0xc0081}, //0x020C0080 }, //{ mmSPI_SHADER_PGM_RSRC1_PS, 0x020C0080 }, + {0x2C0B, 0x00000018 }, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } +}; + +static const uint32_t ps_tex_num_sh_registers_gfx10 = 2; + +// Holds Context Register Information +static const struct reg_info ps_tex_context_registers_gfx10[] = +{ + {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, + {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, + {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, + {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, + {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, + {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, + {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 /* SI_EXPORT_FMT_FP16_ABGR */ } +}; + +static const uint32_t ps_tex_num_context_registers_gfx10 = 7; + +static const uint32_t vs_RectPosTexFast_shader_gfx10[] = { + 0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206, + 0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200, + 0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207, + 0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001, + 0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002, + 0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209, + 0xF80008CF, 0x05030100, 0xF800020F, 0x05060402, + 0xBF810000 +}; + +static const struct reg_info vs_RectPosTexFast_sh_registers_gfx10[] = +{ + {0x2C4A, 0x080C0041 }, //{ mmSPI_SHADER_PGM_RSRC1_VS, 0x080C0041 }, + {0x2C4B, 0x00000018 }, //{ mmSPI_SHADER_PGM_RSRC2_VS, 0x00000018 } +}; + +static const uint32_t vs_RectPosTexFast_num_sh_registers_gfx10 = 2; + +// Holds Context Register Information +static const struct reg_info vs_RectPosTexFast_context_registers_gfx10[] = +{ + {0xA1B1, 0x00000000}, //{ mmSPI_VS_OUT_CONFIG, 0x00000000 }, + {0xA1C3, 0x00000000}, //{ mmSPI_SHADER_POS_FORMAT, 0x00000000 /* Always 0 for now */} +}; + +static const uint32_t vs_RectPosTexFast_num_context_registers_gfx10 = 2; + +static const uint32_t preamblecache_gfx10[] = { + 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, + 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, + 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, + 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, + 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, + 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, + 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, + 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, + 0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20, + 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, + 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0, + 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, + 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, + 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, + 0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2, + 0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0, + 0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff, + 0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0, + 0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0, + 0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 +}; + +static const uint32_t cached_cmd_gfx10[] = { + 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, + 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, + 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, + 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18, + 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, + 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, + 0xc0026900, 0x292, 0x20, 0x6020000, + 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 +}; + +static const uint32_t sh_reg_base_gfx10 = 0x2C00; +static const uint32_t context_reg_base_gfx10 = 0xA000; + +#endif diff --git a/lib/libdrm/tests/amdgpu/shader_code_gfx11.h b/lib/libdrm/tests/amdgpu/shader_code_gfx11.h new file mode 100644 index 000000000..d9ee0a7c2 --- /dev/null +++ b/lib/libdrm/tests/amdgpu/shader_code_gfx11.h @@ -0,0 +1,320 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +#ifndef _shader_code_gfx11_h_ +#define _shader_code_gfx11_h_ + +static const uint32_t bufferclear_cs_shader_gfx11[] = { + 0xB0802006, 0xBF840003, 0x360000FF, 0x000003FF, + 0x7E020205, 0x7E040206, 0x7E060207, 0xBF870004, + 0xD6460004, 0x04010C08, 0x7E000204, 0xE01C0000, + 0x80800004, 0xBFB60003, 0xBFB00000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000 +}; + +static const struct reg_info bufferclear_cs_shader_registers_gfx11[] = { + {0x2e12, 0x600C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x600C0041 }, + {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, + {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, + {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, + {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } +}; + +static const uint32_t buffercopy_cs_shader_gfx11[] = { + 0xB0802006, 0xBF840003, 0x360000FF, 0x000003FF, + 0xBF870001, 0xD6460001, 0x04010C08, 0xE00C0000, + 0x80800201, 0xBF8903F7, 0xE01C0000, 0x80810201, + 0xBFB60003, 0xBFB00000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000 +}; + +static const uint32_t ps_const_shader_gfx11[] = { + 0xB0802006, 0xBF840003, 0x7E000200, 0x7E020201, + 0x7E040202, 0x7E060203, 0x5E000300, 0x5E020702, + 0xBF800000, 0xBF800000, 0xF8000803, 0x00000100, + 0xBFB00000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000 +}; + +static const uint32_t ps_const_shader_patchinfo_code_size_gfx11 = 6; + +static const uint32_t ps_const_shader_patchinfo_code_gfx11[][10][6] = { + {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8000890, 0x00000000 }, // SI_EXPORT_FMT_ZERO + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8000801, 0x00000000 }, // SI_EXPORT_FMT_32_R + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_32_GR + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8000803, 0x00000300 }, // SI_EXPORT_FMT_32_AR + { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_FP16_ABGR + { 0xD7220000, 0x00020300, 0xD7220001, 0x00020702, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_UNORM16_ABGR + { 0xD7210000, 0x00020300, 0xD7210001, 0x00020702, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_SNORM16_ABGR + { 0xD7230000, 0x00020300, 0xD7230001, 0x00020702, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_uint32_t16_ABGR + { 0xD7240000, 0x00020300, 0xD7240001, 0x00020702, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_SINT16_ABGR + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800080F, 0x03020100 } // SI_EXPORT_FMT_32_ABGR + } +}; + +static const uint32_t ps_const_shader_patchinfo_offset_gfx11[] = { + 0x00000006 +}; + +static const uint32_t ps_const_num_sh_registers_gfx11 = 2; + +static const struct reg_info ps_const_sh_registers_gfx11[] = { + {0x2C0A, 0x020C0000}, //{ mmSPI_SHADER_PGM_RSRC1_PS, 0x020C0000 }, + {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } +}; + +static const struct reg_info ps_const_context_registers_gfx11[] = { + {0xA1B4, 0x00000002 }, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, + {0xA1B6, 0x00000000 }, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, + {0xA08F, 0x0000000F }, //{ mmCB_SHADER_MASK, 0x0000000F }, + {0xA203, 0x00000010 }, //{ mmDB_SHADER_CONTROL, 0x00000010 }, + {0xA1C4, 0x00000000 }, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, + {0xA1B8, 0x00000000 }, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, + {0xA1C5, 0x00000004 }, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 /* SI_EXPORT_FMT_FP16_ABGR */ } +}; + +static const uint32_t ps_const_num_context_registers_gfx11 = 7; + +static const uint32_t ps_tex_shader_gfx11[] = +{ + 0xB0802006, 0xBF840003, 0xBEFD000C, 0xBE8E017E, + 0xBEFE1D7E, 0xCE000003, 0xCE000102, 0xCD000104, + 0x040E0103, 0xCD000000, 0x040A0102, 0xBF870112, + 0xCD010703, 0x04120303, 0xCD010700, 0x04020302, + 0x8BFE0E7E, 0xF06C0F05, 0x08000003, 0x00000000, + 0xBEFE010E, 0xBF8903F7, 0x5E000300, 0x5E020702, + 0xBF800000, 0xBF800000, 0xF8000803, 0x00000100, + 0xBFB00000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000 +}; + +static const uint32_t ps_tex_shader_patchinfo_offset_gfx11[] = +{ + 0x00000016 +}; + +// Denotes the Patch Info Code Length +static const uint32_t ps_tex_shader_patchinfo_code_size_gfx11 = 6; + +static const uint32_t ps_tex_shader_patchinfo_code_gfx11[][10][6] = +{ + {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8000890, 0x00000000 }, // SI_EXPORT_FMT_ZERO + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8000801, 0x00000000 }, // SI_EXPORT_FMT_32_R + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_32_GR + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8000803, 0x00000300 }, // SI_EXPORT_FMT_32_AR + { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_FP16_ABGR + { 0xD7220000, 0x00020300, 0xD7220001, 0x00020702, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_UNORM16_ABGR + { 0xD7210000, 0x00020300, 0xD7210001, 0x00020702, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_SNORM16_ABGR + { 0xD7230000, 0x00020300, 0xD7230001, 0x00020702, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_uint32_t16_ABGR + { 0xD7240000, 0x00020300, 0xD7240001, 0x00020702, 0xF8000803, 0x00000100 }, // SI_EXPORT_FMT_SINT16_ABGR + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800080F, 0x03020100 } // SI_EXPORT_FMT_32_ABGR + } +}; +// Holds Sh Register Information +static const struct reg_info ps_tex_sh_registers_gfx11[] = +{ + {0x2C0A, 0x020C0081 }, //{ mmSPI_SHADER_PGM_RSRC1_PS, 0x020C0081 }, + {0x2C0B, 0x00000018 } //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } +}; + +static const uint32_t ps_tex_num_sh_registers_gfx11 = 2; + +// Holds Context Register Information +static const struct reg_info ps_tex_context_registers_gfx11[] = +{ + {0xA1B4, 0x00000002 }, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, + {0xA1B6, 0x00000001 }, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, + {0xA08F, 0x0000000F }, //{ mmCB_SHADER_MASK, 0x0000000F }, + {0xA203, 0x00000010 }, //{ mmDB_SHADER_CONTROL, 0x00000010 }, + {0xA1C4, 0x00000000 }, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, + {0xA1B8, 0x00000000 }, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, + {0xA1C5, 0x00000004 } //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 /* SI_EXPORT_FMT_FP16_ABGR */ } +}; + +static const uint32_t ps_tex_num_context_registers_gfx11 = 7; + + +static const uint32_t vs_RectPosTexFast_shader_gfx11[] = +{ + 0xB0802006, 0xBEFE01C1, 0xBF840003, 0xF408050A, + 0xF80000B0, 0xD71F0001, 0x000100C1, 0x9300FF03, + 0x00040018, 0x9301FF02, 0x0009000C, 0xBF870091, + 0xD7200001, 0x000202C1, 0xD60B0001, 0x04058000, + 0xBF870001, 0xD4490000, 0x00000301, 0xBE862100, + 0x7E040B05, 0xBFA5001C, 0x7E06020A, 0x7E08020E, + 0x7E0A020F, 0xBF8701B4, 0x060404F3, 0x7E140211, + 0x7E0E0210, 0x7C240480, 0x060404F3, 0xD5010003, + 0x01AA0608, 0xD5010004, 0x01AA080C, 0xBF870003, + 0xD4120012, 0x00010102, 0x7E04020B, 0xBEEA1F12, + 0xBF870483, 0xD5010008, 0x01AA080C, 0xD5010006, + 0x01AA0608, 0xBF870003, 0xD5010004, 0x004A0409, + 0xD5010009, 0x004A0A0D, 0xBEFE0106, 0x9302FF02, + 0x00090016, 0xBF870009, 0xD4C9007E, 0x00000501, + 0xBFA50002, 0xF8000941, 0x00000000, 0xBF89FFF0, + 0x8BFE0006, 0xD71F0000, 0x000100C1, 0xBFA50013, + 0x7E1602F2, 0x9300FF03, 0x00040018, 0x8B01FF05, + 0x00007FFF, 0xBF8704B2, 0xD7200000, 0x000200C1, + 0x7E0202F2, 0x84018901, 0x80018001, 0xBF870002, + 0xD60B0000, 0x04018000, 0xF80008CF, 0x01070406, + 0xBF89FC07, 0xE0744000, 0x01850800, 0xBFB00000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000 +}; + +static const struct reg_info vs_RectPosTexFast_sh_registers_gfx11[] = +{ + {0x2C8A, 0x020C00C2}, //{ mmSPI_SHADER_PGM_RSRC1_GS, 0x020C00C2 }, + {0x2C8B, 0x0008001C}, //{ mmSPI_SHADER_PGM_RSRC2_GS, 0x0008001C } +}; + +static const uint32_t vs_RectPosTexFast_num_sh_registers_gfx11 = 2; + +// Holds Context Register Information +static const struct reg_info vs_RectPosTexFast_context_registers_gfx11[] = +{ + {0xA1B1, 0x00000000}, //{ mmSPI_VS_OUT_CONFIG, 0x00000000 }, + {0xA1C2, 0x00000001}, //{ mmSPI_SHADER_IDX_FORMAT, 0x00000001 }, + {0xA1C3, 0x00000000}, //{ mmSPI_SHADER_POS_FORMAT, 0x00000000 /* Always 0 for now */}, + {0xA2E4, 0x00000000}, //{ mmVGT_GS_INSTANCE_CNT, 0x00000000 }, + {0xA2AB, 0x00000004}, //{ mmVGT_ESGS_RING_ITEMSIZE, 0x00000004 }, + {0xA2CE, 0x00000001}, //{ mmVGT_GS_MAX_VERT_OUT, 0x00000001 } +}; + +static const uint32_t vs_RectPosTexFast_num_context_registers_gfx11 = 6; + +static const uint32_t preamblecache_gfx11[] = { + 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, + 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, + 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 0xc0016900, 0x208, 0x0, + 0xc0016900, 0x2a1, 0x0, 0xc0016900, 0x2ad, 0x0, 0xc0016900, 0x2dc, 0x0, + 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, + 0xc0046900, 0x310, 0x0, 0x3, 0x0, 0x100000, 0xc0016900, 0x349, 0x0, + 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 0xc0016900, 0x376, 0x0, + 0xc0016900, 0x385, 0x0, 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0026900, 0x204, 0x90000, 0x4, 0xc0016900, 0x20c, 0x0, 0xc0026900, 0x20e, 0x0, 0x0, + 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, + 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0x0, 0xc0016900, 0x2a6, 0x0, + 0xc0016900, 0x210, 0x0, 0xc0016900, 0x2db, 0x0, 0xc0016900, 0x2e4, 0x0, + 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x0, 0xc0016900, 0x206, 0x300, + 0xc0016900, 0x212, 0x200, 0xc0016900, 0xf4, 0x0, 0xc0016900, 0x18, 0x0, + 0xc0016900, 0x1d4, 0xff, 0xc0016900, 0x2ce, 0x1, 0xc0016900, 0x2d3, 0x20001, + 0xc0016900, 0x1ff, 0x80, 0xc0016900, 0x2d5, 0x6012010, 0xc0017a00, 0x20000243, 0x0, + 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 0xc0017900, 0x24b, 0x0, + 0xc0017900, 0x259, 0xffffffff, 0xc0017900, 0x25f, 0x0, 0xc0017900, 0x260, 0x0, + 0xc0017900, 0x262, 0x0, 0xc0017900, 0x444, 0x0, 0xc0017900, 0x445, 0x0, + 0xc0017600, 0x6, 0x0, 0xc0017600, 0x80, 0x0, 0xc0017600, 0xb0, 0x0, + 0xc0047600, 0xb2, 0x0, 0x0, 0x0, 0x0, 0xc0017600, 0x30, 0x0, + 0xc0047600, 0x32, 0x0, 0x0, 0x0, 0x0 +}; + +static const uint32_t cached_cmd_gfx11[] = { + 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, + 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, + 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, + 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, + 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, + 0xc0026900, 0x292, 0x20, 0x6020000, + 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0, + 0xc0046900, 0x1d5, 0x0, 0x0, 0x0, 0x0, 0xc0016900, 0x104, 0x4a00005, + 0xc0016900, 0x1f, 0xf2a0055, 0xc0017900, 0x266, 0x4 +}; +static const uint32_t sh_reg_base_gfx11 = 0x2C00; +static const uint32_t context_reg_base_gfx11 = 0xA000; + +#endif diff --git a/lib/libdrm/tests/amdgpu/shader_code_gfx9.h b/lib/libdrm/tests/amdgpu/shader_code_gfx9.h new file mode 100644 index 000000000..3ad1ca8fb --- /dev/null +++ b/lib/libdrm/tests/amdgpu/shader_code_gfx9.h @@ -0,0 +1,204 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +#ifndef _shader_code_gfx9_h_ +#define _shader_code_gfx9_h_ + +static const uint32_t bufferclear_cs_shader_gfx9[] = { + 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, + 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206, + 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000, + 0xbf810000 +}; + +static const struct reg_info bufferclear_cs_shader_registers_gfx9[] = { + {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, + {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, + {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, + {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, + {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } +}; + +static const uint32_t buffercopy_cs_shader_gfx9[] = { + 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, + 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70, + 0xe01c2000, 0x80010200, 0xbf810000 +}; + +static const uint32_t ps_const_shader_gfx9[] = { + 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, + 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, + 0xC4001C0F, 0x00000100, 0xBF810000 +}; + +static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; + +static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { + {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, + { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, + { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, + { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, + { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, + { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } + } +}; + +static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { + 0x00000004 +}; + +static const uint32_t ps_const_num_sh_registers_gfx9 = 2; + +static const struct reg_info ps_const_sh_registers_gfx9[] = { + {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, + {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } +}; + +static const uint32_t ps_const_num_context_registers_gfx9 = 7; + +static const struct reg_info ps_const_context_registers_gfx9[] = { + {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, + {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, + {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, + {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, + {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, + {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, + {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } +}; + +static const uint32_t ps_tex_shader_gfx9[] = { + 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, + 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, + 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, + 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, + 0x00000100, 0xBF810000 +}; + +static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { + 0x0000000B +}; + +static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; + +static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { + {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, + { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, + { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, + { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, + { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, + { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } + } +}; + +static const uint32_t ps_tex_num_sh_registers_gfx9 = 2; +static const struct reg_info ps_tex_sh_registers_gfx9[] = { + {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, + {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } +}; + +static const uint32_t ps_tex_num_context_registers_gfx9 = 7; + +static const struct reg_info ps_tex_context_registers_gfx9[] = { + {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, + {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, + {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, + {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, + {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, + {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, + {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } +}; + +static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { + 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, + 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, + 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, + 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, + 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, + 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, + 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, + 0xC400020F, 0x05060403, 0xBF810000 +}; + +static const struct reg_info vs_RectPosTexFast_sh_registers_gfx9[] = +{ + {0x2C4A, 0x000C0081}, //{ mmSPI_SHADER_PGM_RSRC1_VS, 0x000C0081 }, + {0x2C4B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_VS, 0x00000018 } +}; + +static const uint32_t vs_RectPosTexFast_num_sh_registers_gfx9 = 2; + +// Holds Context Register Information +static const struct reg_info vs_RectPosTexFast_context_registers_gfx9[] = +{ + {0xA1B1, 0x00000000}, //{ mmSPI_VS_OUT_CONFIG, 0x00000000 }, + {0xA1C3, 0x00000000}, //{ mmSPI_SHADER_POS_FORMAT, 0x00000000 /* Always 0 for now */} +}; + +static const uint32_t vs_RectPosTexFast_num_context_registers_gfx9 = 2; + +static const uint32_t preamblecache_gfx9[] = { + 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, + 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, + 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, + 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, + 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, + 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, + 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, + 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, + 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, + 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, + 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, + 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, + 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, + 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, + 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, + 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, + 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, + 0xc0017900, 0x24b, 0x0 +}; + +static const uint32_t cached_cmd_gfx9[] = { + 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, + 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, + 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, + 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, + 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, + 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, + 0xc0026900, 0x292, 0x20, 0x60201b8, + 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 +}; + +static const uint32_t sh_reg_base_gfx9 = 0x2C00; +static const uint32_t context_reg_base_gfx9 = 0xA000; + +#endif diff --git a/lib/libdrm/tests/amdgpu/shader_code_hang.h b/lib/libdrm/tests/amdgpu/shader_code_hang.h new file mode 100644 index 000000000..070bd7186 --- /dev/null +++ b/lib/libdrm/tests/amdgpu/shader_code_hang.h @@ -0,0 +1,104 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +#ifndef _shader_code_hang_h_ +#define _shader_code_hang_h_ + +static const unsigned int memcpy_shader_hang[] = { + 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, + 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, + 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, + 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, + 0xF800180F, 0x03020100, 0xBF810000 +}; + +struct shader_test_shader_bin { + const uint32_t *shader; + uint32_t header_length; + uint32_t body_length; + uint32_t foot_length; +}; + +static const unsigned int memcpy_cs_hang_slow_ai_codes[] = { + 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, + 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 +}; + +static struct shader_test_shader_bin memcpy_cs_hang_slow_ai = { + memcpy_cs_hang_slow_ai_codes, 4, 3, 1 +}; + +static const unsigned int memcpy_cs_hang_slow_rv_codes[] = { + 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, + 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 +}; + +static struct shader_test_shader_bin memcpy_cs_hang_slow_rv = { + memcpy_cs_hang_slow_rv_codes, 4, 3, 1 +}; + +static const unsigned int memcpy_cs_hang_slow_nv_codes[] = { + 0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100, + 0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000 +}; + +static struct shader_test_shader_bin memcpy_cs_hang_slow_nv = { + memcpy_cs_hang_slow_nv_codes, 4, 3, 1 +}; + + +static const unsigned int memcpy_ps_hang_slow_ai_codes[] = { + 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, + 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, + 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, + 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, + 0x03020100, 0xbf810000 +}; + +static struct shader_test_shader_bin memcpy_ps_hang_slow_ai = { + memcpy_ps_hang_slow_ai_codes, 7, 2, 9 +}; + +static const unsigned int memcpy_ps_hang_slow_navi10_codes[] = { + 0xBEFC030C,0xBE8E047E,0xBEFE0A7E,0xC8080000, + 0xC80C0100,0xC8090001,0xC80D0101,0xF0800F0A, + 0x00400402,0x00000003,0xBEFE040E,0xBF8C0F70, + 0xBF800000,0xBF800000,0xBF800000,0xBF800000, + 0xF800180F,0x07060504,0xBF810000 +}; + +static struct shader_test_shader_bin memcpy_ps_hang_slow_navi10 = { + memcpy_ps_hang_slow_navi10_codes, 7, 3, 9 +}; + +static const unsigned int memcpy_ps_hang_slow_navi21_codes[] = { + 0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000, 0xC8000100, 0xC8090001, 0xC8010101, 0x87FE0E7E, // header + 0xF0800F0A, 0x00400002, 0x00000000, // body - image_sample instruction + 0xBFA3FFE3, 0xBEFE040E, 0xBF8C3F70, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100, 0xBF810000 // footer +}; + +static struct shader_test_shader_bin memcpy_ps_hang_slow_navi21 = { + memcpy_ps_hang_slow_navi21_codes, 8, 3, 10 +}; + +#endif diff --git a/lib/libdrm/tests/amdgpu/shader_test_util.c b/lib/libdrm/tests/amdgpu/shader_test_util.c new file mode 100644 index 000000000..60148fb8f --- /dev/null +++ b/lib/libdrm/tests/amdgpu/shader_test_util.c @@ -0,0 +1,2156 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + +#include "CUnit/Basic.h" +#include "amdgpu_test.h" +#include "shader_code.h" + +#define PACKET3_DISPATCH_DIRECT 0x15 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_OFFSET 0x77 +#define PACKET3_SET_UCONFIG_REG 0x79 +#define PACKET3_SET_SH_REG_INDEX 0x9B + +#define PACKET_TYPE3 3 +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) +#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) + + +struct shader_test_bo { + amdgpu_bo_handle bo; + unsigned size; + unsigned heap; + void *ptr; + uint64_t mc_address; + amdgpu_va_handle va; +}; + +struct shader_test_draw { + struct shader_test_bo ps_bo; + enum ps_type ps_type; + struct shader_test_bo vs_bo; + enum vs_type vs_type; +}; +struct shader_test_dispatch { + struct shader_test_bo cs_bo; + enum cs_type cs_type; +}; + +struct shader_test_info { + amdgpu_device_handle device_handle; + enum amdgpu_test_gfx_version version; + unsigned ip; + unsigned ring; + int hang; + int hang_slow; +}; + +struct shader_test_priv { + const struct shader_test_info *info; + unsigned cmd_curr; + + union { + struct shader_test_draw shader_draw; + struct shader_test_dispatch shader_dispatch; + }; + struct shader_test_bo vtx_attributes_mem; + struct shader_test_bo cmd; + struct shader_test_bo src; + struct shader_test_bo dst; +}; + +static int shader_test_bo_alloc(amdgpu_device_handle device_handle, + struct shader_test_bo *shader_test_bo) +{ + return amdgpu_bo_alloc_and_map(device_handle, shader_test_bo->size, 4096, + shader_test_bo->heap, 0, + &(shader_test_bo->bo), (void **)&(shader_test_bo->ptr), + &(shader_test_bo->mc_address), &(shader_test_bo->va)); +} + +static int shader_test_bo_free(struct shader_test_bo *shader_test_bo) +{ + return amdgpu_bo_unmap_and_free(shader_test_bo->bo, shader_test_bo->va, + shader_test_bo->mc_address, + shader_test_bo->size); +} + +void shader_test_for_each(amdgpu_device_handle device_handle, unsigned ip, + void (*fn)(struct shader_test_info *test_info)) +{ + int r; + uint32_t ring_id; + struct shader_test_info test_info = {0}; + struct drm_amdgpu_info_hw_ip info = {0}; + + r = amdgpu_query_hw_ip_info(device_handle, ip, 0, &info); + CU_ASSERT_EQUAL(r, 0); + if (!info.available_rings) { + printf("SKIP ... as there's no %s ring\n", + (ip == AMDGPU_HW_IP_GFX) ? "graphics": "compute"); + return; + } + + switch (info.hw_ip_version_major) { + case 9: + test_info.version = AMDGPU_TEST_GFX_V9; + break; + case 10: + test_info.version = AMDGPU_TEST_GFX_V10; + break; + case 11: + test_info.version = AMDGPU_TEST_GFX_V11; + break; + default: + printf("SKIP ... unsupported gfx version %d\n", info.hw_ip_version_major); + return; + } + + test_info.device_handle = device_handle; + test_info.ip = ip; + + printf("\n"); + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { + printf("%s ring %d\n", (ip == AMDGPU_HW_IP_GFX) ? "graphics": "compute", + ring_id); + test_info.ring = ring_id; + fn(&test_info); + } +} + +static void write_context_control(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + + if (test_priv->info->ip == AMDGPU_HW_IP_GFX) { + ptr[i++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1); + ptr[i++] = 0x80000000; + ptr[i++] = 0x80000000; + } + + test_priv->cmd_curr = i; +} + +static void shader_test_load_shader_hang_slow(struct shader_test_bo *shader_bo, + struct shader_test_shader_bin *shader_bin) +{ + int i, j, loop; + + loop = (shader_bo->size / sizeof(uint32_t) - shader_bin->header_length + - shader_bin->foot_length) / shader_bin->body_length; + + memcpy(shader_bo->ptr, shader_bin->shader, shader_bin->header_length * sizeof(uint32_t)); + + j = shader_bin->header_length; + for (i = 0; i < loop; i++) { + memcpy(shader_bo->ptr + j, + shader_bin->shader + shader_bin->header_length, + shader_bin->body_length * sizeof(uint32_t)); + j += shader_bin->body_length; + } + + memcpy(shader_bo->ptr + j, + shader_bin->shader + shader_bin->header_length + shader_bin->body_length, + shader_bin->foot_length * sizeof(uint32_t)); +} + +static void amdgpu_dispatch_load_cs_shader_hang_slow(struct shader_test_priv *test_priv) +{ + struct amdgpu_gpu_info gpu_info = {0}; + struct shader_test_shader_bin *cs_shader_bin; + int r; + + r = amdgpu_query_gpu_info(test_priv->info->device_handle, &gpu_info); + CU_ASSERT_EQUAL(r, 0); + + switch (gpu_info.family_id) { + case AMDGPU_FAMILY_AI: + cs_shader_bin = &memcpy_cs_hang_slow_ai; + break; + case AMDGPU_FAMILY_RV: + cs_shader_bin = &memcpy_cs_hang_slow_rv; + break; + default: + cs_shader_bin = &memcpy_cs_hang_slow_nv; + break; + } + + shader_test_load_shader_hang_slow(&test_priv->shader_dispatch.cs_bo, cs_shader_bin); +} + +static void amdgpu_dispatch_load_cs_shader(struct shader_test_priv *test_priv) +{ + if (test_priv->info->hang) { + if (test_priv->info->hang_slow) + amdgpu_dispatch_load_cs_shader_hang_slow(test_priv); + else + memcpy(test_priv->shader_dispatch.cs_bo.ptr, memcpy_shader_hang, + sizeof(memcpy_shader_hang)); + } else { + memcpy(test_priv->shader_dispatch.cs_bo.ptr, + shader_test_cs[test_priv->info->version][test_priv->shader_dispatch.cs_type].shader, + shader_test_cs[test_priv->info->version][test_priv->shader_dispatch.cs_type].shader_size); + } +} + +static void amdgpu_dispatch_init_gfx9(struct shader_test_priv *test_priv) +{ + int i; + uint32_t *ptr = test_priv->cmd.ptr; + + /* Write context control and load shadowing register if necessary */ + write_context_control(test_priv); + + i = test_priv->cmd_curr; + + /* Issue commands to set default compute state. */ + /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 3); + ptr[i++] = 0x204; + i += 3; + + /* clear mmCOMPUTE_TMPRING_SIZE */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x218; + ptr[i++] = 0; + + test_priv->cmd_curr = i; +} + +static void amdgpu_dispatch_init_gfx10(struct shader_test_priv *test_priv) +{ + int i; + uint32_t *ptr = test_priv->cmd.ptr; + + amdgpu_dispatch_init_gfx9(test_priv); + + i = test_priv->cmd_curr; + + /* mmCOMPUTE_SHADER_CHKSUM */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x22a; + ptr[i++] = 0; + /* mmCOMPUTE_REQ_CTRL */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 6); + ptr[i++] = 0x222; + i += 6; + /* mmCP_COHER_START_DELAY */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x7b; + ptr[i++] = 0x20; + + test_priv->cmd_curr = i; +} + +static void amdgpu_dispatch_init_gfx11(struct shader_test_priv *test_priv) +{ + int i; + uint32_t *ptr = test_priv->cmd.ptr; + + /* Write context control and load shadowing register if necessary */ + write_context_control(test_priv); + + i = test_priv->cmd_curr; + + /* Issue commands to set default compute state. */ + /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 3); + ptr[i++] = 0x204; + i += 3; + + /* clear mmCOMPUTE_TMPRING_SIZE */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x218; + ptr[i++] = 0; + + /* mmCOMPUTE_REQ_CTRL */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x222; + ptr[i++] = 0; + + /* mmCOMPUTE_USER_ACCUM_0 .. 3*/ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x224; + i += 4; + + /* mmCOMPUTE_SHADER_CHKSUM */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x22a; + ptr[i++] = 0; + + test_priv->cmd_curr = i; +} + +static void amdgpu_dispatch_init(struct shader_test_priv *test_priv) +{ + switch (test_priv->info->version) { + case AMDGPU_TEST_GFX_V9: + amdgpu_dispatch_init_gfx9(test_priv); + break; + case AMDGPU_TEST_GFX_V10: + amdgpu_dispatch_init_gfx10(test_priv); + break; + case AMDGPU_TEST_GFX_V11: + amdgpu_dispatch_init_gfx11(test_priv); + break; + } +} + +static void amdgpu_dispatch_write_cumask(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + + /* Issue commands to set cu mask used in current dispatch */ + switch (test_priv->info->version) { + case AMDGPU_TEST_GFX_V9: + /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x216; + ptr[i++] = 0xffffffff; + ptr[i++] = 0xffffffff; + /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x219; + ptr[i++] = 0xffffffff; + ptr[i++] = 0xffffffff; + break; + case AMDGPU_TEST_GFX_V10: + case AMDGPU_TEST_GFX_V11: + /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG_INDEX, 2); + ptr[i++] = 0x30000216; + ptr[i++] = 0xffffffff; + ptr[i++] = 0xffffffff; + /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG_INDEX, 2); + ptr[i++] = 0x30000219; + ptr[i++] = 0xffffffff; + ptr[i++] = 0xffffffff; + break; + } + + test_priv->cmd_curr = i; +} + +static void amdgpu_dispatch_write2hw_gfx9(struct shader_test_priv *test_priv) +{ + const struct shader_test_cs_shader *cs_shader = &shader_test_cs[test_priv->info->version][test_priv->shader_dispatch.cs_type]; + int j, i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + uint64_t shader_addr = test_priv->shader_dispatch.cs_bo.mc_address; + + /* Writes shader state to HW */ + /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x20c; + ptr[i++] = (shader_addr >> 8); + ptr[i++] = (shader_addr >> 40); + /* write sh regs*/ + for (j = 0; j < cs_shader->num_sh_reg; j++) { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + /* - Gfx9ShRegBase */ + ptr[i++] = cs_shader->sh_reg[j].reg_offset - shader_test_gfx_info[test_priv->info->version].sh_reg_base; + ptr[i++] = cs_shader->sh_reg[j].reg_value; + } + + /* Write constant data */ + if (CS_BUFFERCLEAR == test_priv->shader_dispatch.cs_type) { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x240; + ptr[i++] = test_priv->dst.mc_address; + ptr[i++] = (test_priv->dst.mc_address >> 32) | 0x100000; + ptr[i++] = test_priv->dst.size / 16; + ptr[i++] = 0x74fac; + + /* Sets a range of pixel shader constants */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x244; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + } else { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x240; + ptr[i++] = test_priv->src.mc_address; + ptr[i++] = (test_priv->src.mc_address >> 32) | 0x100000; + ptr[i++] = test_priv->src.size / 16; + ptr[i++] = 0x74fac; + + /* Writes the UAV constant data to the SGPRs. */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x244; + ptr[i++] = test_priv->dst.mc_address; + ptr[i++] = (test_priv->dst.mc_address >> 32) | 0x100000; + ptr[i++] = test_priv->dst.size / 16; + ptr[i++] = 0x74fac; + } + + test_priv->cmd_curr = i; +} + +static void amdgpu_dispatch_write2hw_gfx10(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + const struct shader_test_cs_shader *cs_shader = &shader_test_cs[test_priv->info->version][test_priv->shader_dispatch.cs_type]; + int j; + uint64_t shader_addr = test_priv->shader_dispatch.cs_bo.mc_address; + + /* Writes shader state to HW */ + /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x20c; + ptr[i++] = (shader_addr >> 8); + ptr[i++] = (shader_addr >> 40); + /* write sh regs*/ + for (j = 0; j < cs_shader->num_sh_reg; j++) { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + /* - Gfx9ShRegBase */ + ptr[i++] = cs_shader->sh_reg[j].reg_offset - shader_test_gfx_info[test_priv->info->version].sh_reg_base; + ptr[i++] = cs_shader->sh_reg[j].reg_value; + } + + /* mmCOMPUTE_PGM_RSRC3 */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x228; + ptr[i++] = 0; + + if (CS_BUFFERCLEAR == test_priv->shader_dispatch.cs_type) { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x240; + ptr[i++] = test_priv->dst.mc_address; + ptr[i++] = (test_priv->dst.mc_address >> 32) | 0x100000; + ptr[i++] = test_priv->dst.size / 16; + ptr[i++] = 0x1104bfac; + + /* Sets a range of pixel shader constants */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x244; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + } else { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x240; + ptr[i++] = test_priv->src.mc_address; + ptr[i++] = (test_priv->src.mc_address >> 32) | 0x100000; + ptr[i++] = test_priv->src.size / 16; + ptr[i++] = 0x1104bfac; + + /* Writes the UAV constant data to the SGPRs. */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x244; + ptr[i++] = test_priv->dst.mc_address; + ptr[i++] = (test_priv->dst.mc_address>> 32) | 0x100000; + ptr[i++] = test_priv->dst.size / 16; + ptr[i++] = 0x1104bfac; + } + + test_priv->cmd_curr = i; +} + +static void amdgpu_dispatch_write2hw_gfx11(struct shader_test_priv *test_priv) +{ + enum amdgpu_test_gfx_version version = test_priv->info->version; + const struct shader_test_cs_shader *cs_shader = &shader_test_cs[version][test_priv->shader_dispatch.cs_type]; + int j, i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + uint64_t shader_addr = test_priv->shader_dispatch.cs_bo.mc_address; + + /* Writes shader state to HW */ + /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x20c; + ptr[i++] = (shader_addr >> 8); + ptr[i++] = (shader_addr >> 40); + + /* write sh regs*/ + for (j = 0; j < cs_shader->num_sh_reg; j++) { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + /* - Gfx9ShRegBase */ + ptr[i++] = cs_shader->sh_reg[j].reg_offset - shader_test_gfx_info[version].sh_reg_base; + ptr[i++] = cs_shader->sh_reg[j].reg_value; + if (cs_shader->sh_reg[j].reg_offset == 0x2E12) + ptr[i-1] &= ~(1<<29); + } + + /* mmCOMPUTE_PGM_RSRC3 */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x228; + ptr[i++] = 0x3f0; + + /* Write constant data */ + /* Writes the texture resource constants data to the SGPRs */ + if (CS_BUFFERCLEAR == test_priv->shader_dispatch.cs_type) { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x240; + ptr[i++] = test_priv->dst.mc_address; + ptr[i++] = (test_priv->dst.mc_address >> 32) | 0x100000; + ptr[i++] = test_priv->dst.size / 16; + ptr[i++] = 0x1003dfac; + + /* Sets a range of pixel shader constants */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x244; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + ptr[i++] = 0x22222222; + } else { + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x240; + ptr[i++] = test_priv->src.mc_address; + ptr[i++] = (test_priv->src.mc_address >> 32) | 0x100000; + ptr[i++] = test_priv->src.size / 16; + ptr[i++] = 0x1003dfac; + + /* Writes the UAV constant data to the SGPRs. */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x244; + ptr[i++] = test_priv->dst.mc_address; + ptr[i++] = (test_priv->dst.mc_address>> 32) | 0x100000; + ptr[i++] = test_priv->dst.size / 16; + ptr[i++] = 0x1003dfac; + } + + test_priv->cmd_curr = i; +} + +static void amdgpu_dispatch_write2hw(struct shader_test_priv *test_priv) +{ + switch (test_priv->info->version) { + case AMDGPU_TEST_GFX_V9: + amdgpu_dispatch_write2hw_gfx9(test_priv); + break; + case AMDGPU_TEST_GFX_V10: + amdgpu_dispatch_write2hw_gfx10(test_priv); + break; + case AMDGPU_TEST_GFX_V11: + amdgpu_dispatch_write2hw_gfx11(test_priv); + break; + } +} + +static void amdgpu_dispatch_write_dispatch_cmd(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + + /* clear mmCOMPUTE_RESOURCE_LIMITS */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x215; + ptr[i++] = 0; + + /* dispatch direct command */ + ptr[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); + ptr[i++] = (test_priv->dst.size / 16 + 0x40 - 1 ) / 0x40;//0x10; + ptr[i++] = 1; + ptr[i++] = 1; + ptr[i++] = 1; + + test_priv->cmd_curr = i; +} +static void amdgpu_test_dispatch_memset(struct shader_test_info *test_info) +{ + amdgpu_context_handle context_handle; + amdgpu_bo_handle resources[3]; + struct shader_test_priv test_priv; + struct shader_test_bo *cmd = &(test_priv.cmd); + struct shader_test_bo *dst = &(test_priv.dst); + struct shader_test_bo *shader = &(test_priv.shader_dispatch.cs_bo); + uint32_t *ptr_cmd; + uint8_t *ptr_dst; + int i, r; + struct amdgpu_cs_request ibs_request = {0}; + struct amdgpu_cs_ib_info ib_info= {0}; + amdgpu_bo_list_handle bo_list; + struct amdgpu_cs_fence fence_status = {0}; + uint32_t expired; + uint8_t cptr[16]; + + memset(&test_priv, 0, sizeof(test_priv)); + test_priv.info = test_info; + test_priv.shader_dispatch.cs_type = CS_BUFFERCLEAR; + r = amdgpu_cs_ctx_create(test_info->device_handle, &context_handle); + CU_ASSERT_EQUAL(r, 0); + + cmd->size = 4096; + cmd->heap = AMDGPU_GEM_DOMAIN_GTT; + r = shader_test_bo_alloc(test_info->device_handle, cmd); + CU_ASSERT_EQUAL(r, 0); + ptr_cmd = cmd->ptr; + memset(ptr_cmd, 0, cmd->size); + + shader->size = 4096; + shader->heap = AMDGPU_GEM_DOMAIN_VRAM; + r = shader_test_bo_alloc(test_info->device_handle, shader); + CU_ASSERT_EQUAL(r, 0); + memset(shader->ptr, 0, shader->size); + amdgpu_dispatch_load_cs_shader(&test_priv); + + dst->size = 0x4000; + dst->heap = AMDGPU_GEM_DOMAIN_VRAM; + r = shader_test_bo_alloc(test_info->device_handle, dst); + CU_ASSERT_EQUAL(r, 0); + + amdgpu_dispatch_init(&test_priv); + + /* Issue commands to set cu mask used in current dispatch */ + amdgpu_dispatch_write_cumask(&test_priv); + + /* Writes shader state to HW */ + amdgpu_dispatch_write2hw(&test_priv); + + amdgpu_dispatch_write_dispatch_cmd(&test_priv); + + i = test_priv.cmd_curr; + while (i & 7) + ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ + test_priv.cmd_curr = i; + + resources[0] = dst->bo; + resources[1] = shader->bo; + resources[2] = cmd->bo; + r = amdgpu_bo_list_create(test_info->device_handle, 3, resources, NULL, &bo_list); + CU_ASSERT_EQUAL(r, 0); + + ib_info.ib_mc_address = cmd->mc_address; + ib_info.size = test_priv.cmd_curr; + ibs_request.ip_type = test_info->ip; + ibs_request.ring = test_info->ring; + ibs_request.resources = bo_list; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.fence_info.handle = NULL; + + /* submit CS */ + r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_list_destroy(bo_list); + CU_ASSERT_EQUAL(r, 0); + + fence_status.ip_type = test_info->ip; + fence_status.ip_instance = 0; + fence_status.ring = test_info->ring; + fence_status.context = context_handle; + fence_status.fence = ibs_request.seq_no; + + /* wait for IB accomplished */ + r = amdgpu_cs_query_fence_status(&fence_status, + AMDGPU_TIMEOUT_INFINITE, + 0, &expired); + CU_ASSERT_EQUAL(r, 0); + CU_ASSERT_EQUAL(expired, true); + + /* verify if memset test result meets with expected */ + i = 0; + ptr_dst = (uint8_t *)(dst->ptr); + memset(cptr, 0x22, 16); + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0); + i = dst->size - 16; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0); + i = dst->size / 2; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0); + + r = shader_test_bo_free(dst); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(shader); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(cmd); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_cs_ctx_free(context_handle); + CU_ASSERT_EQUAL(r, 0); +} + +static +void amdgpu_test_dispatch_memcpy(struct shader_test_info *test_info) +{ + struct shader_test_priv test_priv; + amdgpu_context_handle context_handle; + amdgpu_bo_handle resources[4]; + struct shader_test_bo *cmd = &(test_priv.cmd); + struct shader_test_bo *src = &(test_priv.src); + struct shader_test_bo *dst = &(test_priv.dst); + struct shader_test_bo *shader = &(test_priv.shader_dispatch.cs_bo); + uint32_t *ptr_cmd; + uint8_t *ptr_src; + uint8_t *ptr_dst; + int i, r; + struct amdgpu_cs_request ibs_request = {0}; + struct amdgpu_cs_ib_info ib_info= {0}; + uint32_t expired, hang_state, hangs; + amdgpu_bo_list_handle bo_list; + struct amdgpu_cs_fence fence_status = {0}; + + memset(&test_priv, 0, sizeof(test_priv)); + test_priv.info = test_info; + test_priv.cmd.size = 4096; + test_priv.cmd.heap = AMDGPU_GEM_DOMAIN_GTT; + + test_priv.shader_dispatch.cs_bo.heap = AMDGPU_GEM_DOMAIN_VRAM; + test_priv.shader_dispatch.cs_type = CS_BUFFERCOPY; + test_priv.src.heap = AMDGPU_GEM_DOMAIN_VRAM; + test_priv.dst.heap = AMDGPU_GEM_DOMAIN_VRAM; + if (test_info->hang_slow) { + test_priv.shader_dispatch.cs_bo.size = 0x4000000; + test_priv.src.size = 0x4000000; + test_priv.dst.size = 0x4000000; + } else { + test_priv.shader_dispatch.cs_bo.size = 4096; + test_priv.src.size = 0x4000; + test_priv.dst.size = 0x4000; + } + + r = amdgpu_cs_ctx_create(test_info->device_handle, &context_handle); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_alloc(test_info->device_handle, cmd); + CU_ASSERT_EQUAL(r, 0); + ptr_cmd = cmd->ptr; + memset(ptr_cmd, 0, cmd->size); + + r = shader_test_bo_alloc(test_info->device_handle, shader); + CU_ASSERT_EQUAL(r, 0); + memset(shader->ptr, 0, shader->size); + amdgpu_dispatch_load_cs_shader(&test_priv); + + r = shader_test_bo_alloc(test_info->device_handle, src); + CU_ASSERT_EQUAL(r, 0); + ptr_src = (uint8_t *)(src->ptr); + memset(ptr_src, 0x55, src->size); + + r = shader_test_bo_alloc(test_info->device_handle, dst); + CU_ASSERT_EQUAL(r, 0); + + amdgpu_dispatch_init(&test_priv); + + /* Issue commands to set cu mask used in current dispatch */ + amdgpu_dispatch_write_cumask(&test_priv); + + /* Writes shader state to HW */ + amdgpu_dispatch_write2hw(&test_priv); + + amdgpu_dispatch_write_dispatch_cmd(&test_priv); + + i = test_priv.cmd_curr; + while (i & 7) + ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ + test_priv.cmd_curr = i; + + resources[0] = shader->bo; + resources[1] = src->bo; + resources[2] = dst->bo; + resources[3] = cmd->bo; + r = amdgpu_bo_list_create(test_info->device_handle, 4, resources, NULL, &bo_list); + CU_ASSERT_EQUAL(r, 0); + + ib_info.ib_mc_address = cmd->mc_address; + ib_info.size = test_priv.cmd_curr; + ibs_request.ip_type = test_info->ip; + ibs_request.ring = test_info->ring; + ibs_request.resources = bo_list; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.fence_info.handle = NULL; + r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + + fence_status.ip_type = test_info->ip; + fence_status.ip_instance = 0; + fence_status.ring = test_info->ring; + fence_status.context = context_handle; + fence_status.fence = ibs_request.seq_no; + + /* wait for IB accomplished */ + r = amdgpu_cs_query_fence_status(&fence_status, + AMDGPU_TIMEOUT_INFINITE, + 0, &expired); + + if (!test_info->hang) { + CU_ASSERT_EQUAL(r, 0); + CU_ASSERT_EQUAL(expired, true); + + /* verify if memcpy test result meets with expected */ + i = 0; + ptr_dst = (uint8_t *)dst->ptr; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0); + i = dst->size - 16; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0); + i = dst->size / 2; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0); + } else { + r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); + CU_ASSERT_EQUAL(r, 0); + CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); + } + + r = amdgpu_bo_list_destroy(bo_list); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(src); + CU_ASSERT_EQUAL(r, 0); + r = shader_test_bo_free(dst); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(shader); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(cmd); + + r = amdgpu_cs_ctx_free(context_handle); + CU_ASSERT_EQUAL(r, 0); +} + +static void shader_test_dispatch_cb(struct shader_test_info *test_info) +{ + amdgpu_test_dispatch_memset(test_info); + amdgpu_test_dispatch_memcpy(test_info); +} +static void shader_test_dispatch_hang_cb(struct shader_test_info *test_info) +{ + test_info->hang = 0; + amdgpu_test_dispatch_memcpy(test_info); + + test_info->hang = 1; + amdgpu_test_dispatch_memcpy(test_info); + + test_info->hang = 0; + amdgpu_test_dispatch_memcpy(test_info); +} + +static void shader_test_dispatch_hang_slow_cb(struct shader_test_info *test_info) +{ + test_info->hang = 0; + test_info->hang_slow = 0; + amdgpu_test_dispatch_memcpy(test_info); + + test_info->hang = 1; + test_info->hang_slow = 1; + amdgpu_test_dispatch_memcpy(test_info); + + test_info->hang = 0; + test_info->hang_slow = 0; + amdgpu_test_dispatch_memcpy(test_info); +} + +void amdgpu_test_dispatch_helper(amdgpu_device_handle device_handle, unsigned ip) +{ + shader_test_for_each(device_handle, ip, shader_test_dispatch_cb); +} + +void amdgpu_test_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip) +{ + shader_test_for_each(device_handle, ip, shader_test_dispatch_hang_cb); +} + +void amdgpu_test_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip) +{ + shader_test_for_each(device_handle, ip, shader_test_dispatch_hang_slow_cb); +} + +static void amdgpu_draw_load_ps_shader_hang_slow(struct shader_test_priv *test_priv) +{ + struct amdgpu_gpu_info gpu_info = {0}; + struct shader_test_shader_bin *ps_shader_bin = &memcpy_ps_hang_slow_navi21; + int r; + + r = amdgpu_query_gpu_info(test_priv->info->device_handle, &gpu_info); + CU_ASSERT_EQUAL(r, 0); + + switch (gpu_info.family_id) { + case AMDGPU_FAMILY_AI: + case AMDGPU_FAMILY_RV: + ps_shader_bin = &memcpy_ps_hang_slow_ai; + break; + case AMDGPU_FAMILY_NV: + if (gpu_info.chip_external_rev < 40) + ps_shader_bin = &memcpy_ps_hang_slow_navi10; + break; + } + + shader_test_load_shader_hang_slow(&test_priv->shader_draw.ps_bo, ps_shader_bin); +} + +static uint32_t round_up_size(uint32_t size) +{ + return (size + 255) & ~255; +} +static void amdgpu_draw_load_ps_shader(struct shader_test_priv *test_priv) +{ + uint8_t *ptr_shader = test_priv->shader_draw.ps_bo.ptr; + const struct shader_test_ps_shader *shader; + uint32_t shader_offset, num_export_fmt; + uint32_t mem_offset, patch_code_offset; + int i; + + if (test_priv->info->hang) { + if (test_priv->info->hang_slow) + amdgpu_draw_load_ps_shader_hang_slow(test_priv); + else + memcpy(ptr_shader, memcpy_shader_hang, sizeof(memcpy_shader_hang)); + + return; + } + + shader = &shader_test_ps[test_priv->info->version][test_priv->shader_draw.ps_type]; + num_export_fmt = 10; + shader_offset = round_up_size(shader->shader_size); + /* write main shader program */ + for (i = 0 ; i < num_export_fmt; i++) { + mem_offset = i * shader_offset; + memcpy(ptr_shader + mem_offset, shader->shader, shader->shader_size); + } + + /* overwrite patch codes */ + for (i = 0 ; i < num_export_fmt; i++) { + mem_offset = i * shader_offset + shader->patchinfo_code_offset[0] * sizeof(uint32_t); + patch_code_offset = i * shader->patchinfo_code_size; + memcpy(ptr_shader + mem_offset, + shader->patchinfo_code + patch_code_offset, + shader->patchinfo_code_size * sizeof(uint32_t)); + } +} + +/* load RectPosTexFast_VS */ +static void amdgpu_draw_load_vs_shader(struct shader_test_priv *test_priv) +{ + uint8_t *ptr_shader = test_priv->shader_draw.vs_bo.ptr; + const struct shader_test_vs_shader *shader = &shader_test_vs[test_priv->info->version][test_priv->shader_draw.vs_type]; + + memcpy(ptr_shader, shader->shader, shader->shader_size); +} + +static void amdgpu_draw_init(struct shader_test_priv *test_priv) +{ + int i; + uint32_t *ptr = test_priv->cmd.ptr; + const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version]; + + /* Write context control and load shadowing register if necessary */ + write_context_control(test_priv); + i = test_priv->cmd_curr; + + if (test_priv->info->version == AMDGPU_TEST_GFX_V11) { + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x446; + ptr[i++] = (test_priv->vtx_attributes_mem.mc_address >> 16); + // mmSPI_ATTRIBUTE_RING_SIZE + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x447; + ptr[i++] = 0x20001; + } + memcpy(ptr + i, gfx_info->preamble_cache, gfx_info->size_preamble_cache); + + test_priv->cmd_curr = i + gfx_info->size_preamble_cache/sizeof(uint32_t); +} + +static void amdgpu_draw_setup_and_write_drawblt_surf_info_gfx9(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + + /* setup color buffer */ + /* offset reg + 0xA318 CB_COLOR0_BASE + 0xA319 CB_COLOR0_BASE_EXT + 0xA31A CB_COLOR0_ATTRIB2 + 0xA31B CB_COLOR0_VIEW + 0xA31C CB_COLOR0_INFO + 0xA31D CB_COLOR0_ATTRIB + 0xA31E CB_COLOR0_DCC_CONTROL + 0xA31F CB_COLOR0_CMASK + 0xA320 CB_COLOR0_CMASK_BASE_EXT + 0xA321 CB_COLOR0_FMASK + 0xA322 CB_COLOR0_FMASK_BASE_EXT + 0xA323 CB_COLOR0_CLEAR_WORD0 + 0xA324 CB_COLOR0_CLEAR_WORD1 + 0xA325 CB_COLOR0_DCC_BASE + 0xA326 CB_COLOR0_DCC_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); + ptr[i++] = 0x318; + ptr[i++] = test_priv->dst.mc_address >> 8; + ptr[i++] = test_priv->dst.mc_address >> 40; + ptr[i++] = test_priv->info->hang_slow ? 0x3ffc7ff : 0x7c01f; + ptr[i++] = 0; + ptr[i++] = 0x50438; + ptr[i++] = 0x10140000; + i += 9; + + /* mmCB_MRT0_EPITCH */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1e8; + ptr[i++] = test_priv->info->hang_slow ? 0xfff : 0x1f; + + /* 0xA32B CB_COLOR1_BASE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x32b; + ptr[i++] = 0; + + /* 0xA33A CB_COLOR1_BASE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x33a; + ptr[i++] = 0; + + /* SPI_SHADER_COL_FORMAT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1c5; + ptr[i++] = 9; + + /* Setup depth buffer */ + /* mmDB_Z_INFO */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + ptr[i++] = 0xe; + i += 2; + + test_priv->cmd_curr = i; +} +static void amdgpu_draw_setup_and_write_drawblt_surf_info_gfx10(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + + /* setup color buffer */ + /* 0xA318 CB_COLOR0_BASE + 0xA319 CB_COLOR0_PITCH + 0xA31A CB_COLOR0_SLICE + 0xA31B CB_COLOR0_VIEW + 0xA31C CB_COLOR0_INFO + 0xA31D CB_COLOR0_ATTRIB + 0xA31E CB_COLOR0_DCC_CONTROL + 0xA31F CB_COLOR0_CMASK + 0xA320 CB_COLOR0_CMASK_SLICE + 0xA321 CB_COLOR0_FMASK + 0xA322 CB_COLOR0_FMASK_SLICE + 0xA323 CB_COLOR0_CLEAR_WORD0 + 0xA324 CB_COLOR0_CLEAR_WORD1 + 0xA325 CB_COLOR0_DCC_BASE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14); + ptr[i++] = 0x318; + ptr[i++] = test_priv->dst.mc_address >> 8; + i += 3; + ptr[i++] = 0x50438; + i += 9; + + /* 0xA390 CB_COLOR0_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x390; + ptr[i++] = test_priv->dst.mc_address >> 40; + + /* 0xA398 CB_COLOR0_CMASK_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x398; + ptr[i++] = 0; + + /* 0xA3A0 CB_COLOR0_FMASK_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3a0; + ptr[i++] = 0; + + /* 0xA3A8 CB_COLOR0_DCC_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3a8; + ptr[i++] = 0; + + /* 0xA3B0 CB_COLOR0_ATTRIB2 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3b0; + ptr[i++] = test_priv->info->hang_slow ? 0x3ffc7ff : 0x7c01f; + + /* 0xA3B8 CB_COLOR0_ATTRIB3 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3b8; + ptr[i++] = 0x9014000; + + /* 0xA32B CB_COLOR1_BASE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x32b; + ptr[i++] = 0; + + /* 0xA33A CB_COLOR1_BASE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x33a; + ptr[i++] = 0; + + /* SPI_SHADER_COL_FORMAT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1c5; + ptr[i++] = 9; + + /* Setup depth buffer */ + /* mmDB_Z_INFO */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + ptr[i++] = 0x10; + i += 2; + + test_priv->cmd_curr = i; +} + +static void amdgpu_draw_setup_and_write_drawblt_surf_info_gfx11(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + + /* mmCB_COLOR0_BASE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x318; + ptr[i++] = test_priv->dst.mc_address >> 8; + /* mmCB_COLOR0_VIEW .. mmCB_COLOR0_DCC_CONTROL */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 4); + ptr[i++] = 0x31b; + i++; + ptr[i++] = 0x5040e; + i += 2; + /* mmCB_COLOR0_DCC_BASE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x325; + ptr[i++] = 0; + /* mmCB_COLOR0_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x390; + ptr[i++] = (test_priv->dst.mc_address >> 40) & 0xFF; + /* mmCB_COLOR0_DCC_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3a8; + ptr[i++] = 0; + /* mmCB_COLOR0_ATTRIB2 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3b0; + ptr[i++] = test_priv->info->hang_slow ? 0x1ffc7ff : 0x7c01f; + /* mmCB_COLOR0_ATTRIB3 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3b8; + ptr[i++] = test_priv->info->hang_slow ? 0x1028000 : 0x1018000; + /* mmCB_COLOR0_INFO */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x32b; + ptr[i++] = 0; + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x33a; + ptr[i++] = 0; + /* mmSPI_SHADER_COL_FORMAT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1c5; + ptr[i++] = 0x9; + /* mmDB_Z_INFO */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + ptr[i++] = 0x10; + i += 2; + + test_priv->cmd_curr = i; +} + +static void amdgpu_draw_setup_and_write_drawblt_surf_info(struct shader_test_priv *test_priv) +{ + switch (test_priv->info->version) { + case AMDGPU_TEST_GFX_V9: + amdgpu_draw_setup_and_write_drawblt_surf_info_gfx9(test_priv); + break; + case AMDGPU_TEST_GFX_V10: + amdgpu_draw_setup_and_write_drawblt_surf_info_gfx10(test_priv); + break; + case AMDGPU_TEST_GFX_V11: + amdgpu_draw_setup_and_write_drawblt_surf_info_gfx11(test_priv); + break; + } +} + +static void amdgpu_draw_setup_and_write_drawblt_state_gfx9(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version]; + + /* mmPA_SC_TILE_STEERING_OVERRIDE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0xd7; + ptr[i++] = 0; + + ptr[i++] = 0xffff1000; + ptr[i++] = 0xc0021000; + + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0xd7; + ptr[i++] = 1; + + /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); + ptr[i++] = 0x2fe; + i += 16; + + /* mmPA_SC_CENTROID_PRIORITY_0 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + ptr[i++] = 0x2f5; + i += 2; + + memcpy(ptr + i, gfx_info->cached_cmd, gfx_info->size_cached_cmd); + if (test_priv->info->hang_slow) + *(ptr + i + 12) = 0x8000800; + + test_priv->cmd_curr = i + gfx_info->size_cached_cmd/sizeof(uint32_t); +} + +static void amdgpu_draw_setup_and_write_drawblt_state_gfx10(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version]; + + /* mmPA_SC_TILE_STEERING_OVERRIDE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0xd7; + ptr[i++] = 0; + + ptr[i++] = 0xffff1000; + ptr[i++] = 0xc0021000; + + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0xd7; + ptr[i++] = 0; + + /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); + ptr[i++] = 0x2fe; + i += 16; + + /* mmPA_SC_CENTROID_PRIORITY_0 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + ptr[i++] = 0x2f5; + i += 2; + + memcpy(ptr + i, gfx_info->cached_cmd, gfx_info->size_cached_cmd); + if (test_priv->info->hang_slow) + *(ptr + i + 12) = 0x8000800; + i += gfx_info->size_cached_cmd/sizeof(uint32_t); + + /* mmCB_RMI_GL2_CACHE_CONTROL */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x104; + ptr[i++] = 0x40aa0055; + /* mmDB_RMI_L2_CACHE_CONTROL */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1f; + ptr[i++] = 0x2a0055; + + test_priv->cmd_curr = i; +} + +static void amdgpu_draw_setup_and_write_drawblt_state_gfx11(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version]; + + /* mmPA_SC_TILE_STEERING_OVERRIDE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0xd7; + ptr[i++] = 0; + + ptr[i++] = 0xffff1000; + ptr[i++] = 0xc0021000; + + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0xd7; + i++; + + /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); + ptr[i++] = 0x2fe; + i += 16; + + /* mmPA_SC_CENTROID_PRIORITY_0 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + ptr[i++] = 0x2f5; + i += 2; + + memcpy(ptr + i, gfx_info->cached_cmd, gfx_info->size_cached_cmd); + if (test_priv->info->hang_slow) + *(ptr + i + 12) = 0x8000800; + + test_priv->cmd_curr = i + gfx_info->size_cached_cmd/sizeof(uint32_t); +} + +static void amdgpu_draw_setup_and_write_drawblt_state(struct shader_test_priv *test_priv) +{ + switch (test_priv->info->version) { + case AMDGPU_TEST_GFX_V9: + amdgpu_draw_setup_and_write_drawblt_state_gfx9(test_priv); + break; + case AMDGPU_TEST_GFX_V10: + amdgpu_draw_setup_and_write_drawblt_state_gfx10(test_priv); + break; + case AMDGPU_TEST_GFX_V11: + amdgpu_draw_setup_and_write_drawblt_state_gfx11(test_priv); + break; + } +} + +static void amdgpu_draw_vs_RectPosTexFast_write2hw_gfx9(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + uint64_t shader_addr = test_priv->shader_draw.vs_bo.mc_address; + enum ps_type ps = test_priv->shader_draw.ps_type; + + /* mmPA_CL_VS_OUT_CNTL */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x207; + ptr[i++] = 0; + + /* mmSPI_SHADER_PGM_RSRC3_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x46; + ptr[i++] = 0xffff; + + /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x48; + ptr[i++] = shader_addr >> 8; + ptr[i++] = shader_addr >> 40; + + /* mmSPI_SHADER_PGM_RSRC1_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x4a; + ptr[i++] = 0xc0081; + + /* mmSPI_SHADER_PGM_RSRC2_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x4b; + ptr[i++] = 0x18; + + /* mmSPI_VS_OUT_CONFIG */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1b1; + ptr[i++] = 2; + + /* mmSPI_SHADER_POS_FORMAT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1c3; + ptr[i++] = 4; + + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x4c; + i += 2; + ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000; + ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000; + + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x50; + i += 2; + if (ps == PS_CONST) { + i += 2; + } else if (ps == PS_TEX) { + ptr[i++] = 0x3f800000; + ptr[i++] = 0x3f800000; + } + + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x54; + i += 4; + + test_priv->cmd_curr = i; +} + +static void amdgpu_draw_vs_RectPosTexFast_write2hw_gfx10(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + uint64_t shader_addr = test_priv->shader_draw.vs_bo.mc_address; + enum ps_type ps = test_priv->shader_draw.ps_type; + + /* mmPA_CL_VS_OUT_CNTL */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x207; + ptr[i++] = 0; + + /* mmSPI_SHADER_PGM_RSRC3_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000046; + ptr[i++] = 0xffff; + /* mmSPI_SHADER_PGM_RSRC4_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000041; + ptr[i++] = 0xffff; + + /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x48; + ptr[i++] = shader_addr >> 8; + ptr[i++] = shader_addr >> 40; + + /* mmSPI_SHADER_PGM_RSRC1_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x4a; + ptr[i++] = 0xc0041; + /* mmSPI_SHADER_PGM_RSRC2_VS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1); + ptr[i++] = 0x4b; + ptr[i++] = 0x18; + + /* mmSPI_VS_OUT_CONFIG */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1b1; + ptr[i++] = 2; + + /* mmSPI_SHADER_POS_FORMAT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1c3; + ptr[i++] = 4; + + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x4c; + i += 2; + ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000; + ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000; + + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x50; + i += 2; + if (ps == PS_CONST) { + i += 2; + } else if (ps == PS_TEX) { + ptr[i++] = 0x3f800000; + ptr[i++] = 0x3f800000; + } + + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x54; + i += 4; + + test_priv->cmd_curr = i; +} + + +static void amdgpu_draw_vs_RectPosTexFast_write2hw_gfx11(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version]; + uint64_t shader_addr = test_priv->shader_draw.vs_bo.mc_address; + const struct shader_test_vs_shader *shader = &shader_test_vs[test_priv->info->version][test_priv->shader_draw.vs_type]; + enum ps_type ps = test_priv->shader_draw.ps_type; + int j, offset; + + /* mmPA_CL_VS_OUT_CNTL */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x207; + ptr[i++] = 0; + + /* mmSPI_SHADER_PGM_RSRC3_GS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000087; + ptr[i++] = 0xffff; + /* mmSPI_SHADER_PGM_RSRC4_GS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000081; + ptr[i++] = 0x1fff0001; + + /* mmSPI_SHADER_PGM_LO_ES */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0xc8; + ptr[i++] = shader_addr >> 8; + ptr[i++] = shader_addr >> 40; + + /* write sh reg */ + for (j = 0; j < shader->num_sh_reg; j++) { + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1); + ptr[i++] = shader->sh_reg[j].reg_offset - gfx_info->sh_reg_base; + ptr[i++] = shader->sh_reg[j].reg_value; + } + /* write context reg */ + for (j = 0; j < shader->num_context_reg; j++) { + switch (shader->context_reg[j].reg_offset) { + case 0xA1B1: //mmSPI_VS_OUT_CONFIG + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = shader->context_reg[j].reg_offset - gfx_info->context_reg_base; + ptr[i++] = 2; + break; + case 0xA1C3: //mmSPI_SHADER_POS_FORMAT + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = shader->context_reg[j].reg_offset - gfx_info->context_reg_base; + ptr[i++] = 4; + break; + case 0xA2E4: //mmVGT_GS_INSTANCE_CNT + case 0xA2CE: //mmVGT_GS_MAX_VERT_OUT + break; + default: + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = shader->context_reg[j].reg_offset - gfx_info->context_reg_base; + ptr[i++] = shader->context_reg[j].reg_value; + break; + } + } + + // write constant + // dst rect + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x8c; + i += 2; + ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000; + ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000; + // src rect + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x90; + i += 2; + if (ps == PS_CONST) { + i += 2; + } else if (ps == PS_TEX) { + ptr[i++] = 0x3f800000; + ptr[i++] = 0x3f800000; + } + + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr[i++] = 0x94; + i += 4; + // vtx_attributes_mem + ptr[i++] = 0xc02f1000; + offset = i * sizeof(uint32_t); + i += 44; + ptr[i++] = test_priv->vtx_attributes_mem.mc_address & 0xffffffff; + ptr[i++] = 0xc0100000 | ((test_priv->vtx_attributes_mem.mc_address >> 32) & 0xffff); + ptr[i++] = test_priv->vtx_attributes_mem.size / 16; + ptr[i++] = 0x2043ffac; + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_OFFSET, 2); + ptr[i++] = 0x98; + ptr[i++] = offset; + i++; + + test_priv->cmd_curr = i; +} + +static void amdgpu_draw_vs_RectPosTexFast_write2hw(struct shader_test_priv *test_priv) +{ + switch (test_priv->info->version) { + case AMDGPU_TEST_GFX_V9: + amdgpu_draw_vs_RectPosTexFast_write2hw_gfx9(test_priv); + break; + case AMDGPU_TEST_GFX_V10: + amdgpu_draw_vs_RectPosTexFast_write2hw_gfx10(test_priv); + break; + case AMDGPU_TEST_GFX_V11: + amdgpu_draw_vs_RectPosTexFast_write2hw_gfx11(test_priv); + break; + } +} + +static void amdgpu_draw_ps_write2hw_gfx9_10(struct shader_test_priv *test_priv) +{ + int i, j; + uint64_t shader_addr = test_priv->shader_draw.ps_bo.mc_address; + const struct shader_test_ps_shader *ps = &shader_test_ps[test_priv->info->version][test_priv->shader_draw.ps_type]; + uint32_t *ptr = test_priv->cmd.ptr; + + i = test_priv->cmd_curr; + + if (test_priv->info->version == AMDGPU_TEST_GFX_V9) { + /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS + 0x2c08 SPI_SHADER_PGM_LO_PS + 0x2c09 SPI_SHADER_PGM_HI_PS */ + /* multiplicator 9 is from SPI_SHADER_COL_FORMAT */ + if (!test_priv->info->hang) + shader_addr += 256 * 9; + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 3); + ptr[i++] = 0x7; + ptr[i++] = 0xffff; + ptr[i++] = shader_addr >> 8; + ptr[i++] = shader_addr >> 40; + } else { + //if (!test_priv->info->hang) + shader_addr += 256 * 9; + /* 0x2c08 SPI_SHADER_PGM_LO_PS + 0x2c09 SPI_SHADER_PGM_HI_PS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x8; + ptr[i++] = shader_addr >> 8; + ptr[i++] = shader_addr >> 40; + + /* mmSPI_SHADER_PGM_RSRC3_PS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000007; + ptr[i++] = 0xffff; + /* mmSPI_SHADER_PGM_RSRC4_PS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000001; + ptr[i++] = 0xffff; + } + + for (j = 0; j < ps->num_sh_reg; j++) { + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1); + ptr[i++] = ps->sh_reg[j].reg_offset - 0x2c00; + ptr[i++] = ps->sh_reg[j].reg_value; + } + + for (j = 0; j < ps->num_context_reg; j++) { + if (ps->context_reg[j].reg_offset != 0xA1C5) { + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = ps->context_reg[j].reg_offset - 0xa000; + ptr[i++] = ps->context_reg[j].reg_value; + } + + if (ps->context_reg[j].reg_offset == 0xA1B4) { + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1b3; + ptr[i++] = 2; + } + } + + test_priv->cmd_curr = i; +} + +static void amdgpu_draw_ps_write2hw_gfx11(struct shader_test_priv *test_priv) +{ + int i, j; + uint64_t shader_addr = test_priv->shader_draw.ps_bo.mc_address; + enum amdgpu_test_gfx_version version = test_priv->info->version; + const struct shader_test_ps_shader *ps = &shader_test_ps[version][test_priv->shader_draw.ps_type]; + uint32_t *ptr = test_priv->cmd.ptr; + uint32_t export_shader_offset; + + i = test_priv->cmd_curr; + + /* SPI_SHADER_PGM_LO_PS + SPI_SHADER_PGM_HI_PS */ + shader_addr >>= 8; + if (!test_priv->info->hang) { + export_shader_offset = (round_up_size(ps->shader_size) * 9) >> 8; + shader_addr += export_shader_offset; + } + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2); + ptr[i++] = 0x8; + ptr[i++] = shader_addr & 0xffffffff; + ptr[i++] = (shader_addr >> 32) & 0xffffffff; + /* mmSPI_SHADER_PGM_RSRC3_PS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000007; + ptr[i++] = 0xffff; + /* mmSPI_SHADER_PGM_RSRC4_PS */ + ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000001; + ptr[i++] = 0x3fffff; + + for (j = 0; j < ps->num_sh_reg; j++) { + ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1); + ptr[i++] = ps->sh_reg[j].reg_offset - shader_test_gfx_info[version].sh_reg_base; + ptr[i++] = ps->sh_reg[j].reg_value; + } + + for (j = 0; j < ps->num_context_reg; j++) { + /* !mmSPI_SHADER_COL_FORMAT */ + if (ps->context_reg[j].reg_offset != 0xA1C5) { + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = ps->context_reg[j].reg_offset - shader_test_gfx_info[version].context_reg_base; + ptr[i++] = ps->context_reg[j].reg_value; + } + + /* mmSPI_PS_INPUT_ADDR */ + if (ps->context_reg[j].reg_offset == 0xA1B4) { + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1b3; + ptr[i++] = 2; + } + } + + test_priv->cmd_curr = i; +} + +static void amdgpu_draw_ps_write2hw(struct shader_test_priv *test_priv) +{ + switch (test_priv->info->version) { + case AMDGPU_TEST_GFX_V9: + case AMDGPU_TEST_GFX_V10: + amdgpu_draw_ps_write2hw_gfx9_10(test_priv); + break; + case AMDGPU_TEST_GFX_V11: + amdgpu_draw_ps_write2hw_gfx11(test_priv); + break; + } +} + +static void amdgpu_draw_draw(struct shader_test_priv *test_priv) +{ + int i = test_priv->cmd_curr; + uint32_t *ptr = test_priv->cmd.ptr; + + switch (test_priv->info->version) { + case AMDGPU_TEST_GFX_V9: + /* mmIA_MULTI_VGT_PARAM */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x40000258; + ptr[i++] = 0xd00ff; + /* mmVGT_PRIMITIVE_TYPE */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x10000242; + ptr[i++] = 0x11; + break; + case AMDGPU_TEST_GFX_V10: + /* mmGE_CNTL */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x25b; + ptr[i++] = 0xff; + /* mmVGT_PRIMITIVE_TYPE */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x242; + ptr[i++] = 0x11; + break; + case AMDGPU_TEST_GFX_V11: + /* mmGE_CNTL */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x25b; + ptr[i++] = 0x80fc80; + /* mmVGT_PRIMITIVE_TYPE */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x242; + ptr[i++] = 0x11; + break; + } + + ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); + ptr[i++] = 3; + ptr[i++] = 2; + + test_priv->cmd_curr = i; +} + +static void amdgpu_memset_draw_test(struct shader_test_info *test_info) +{ + struct shader_test_priv test_priv; + amdgpu_context_handle context_handle; + struct shader_test_bo *ps_bo = &(test_priv.shader_draw.ps_bo); + struct shader_test_bo *vs_bo = &(test_priv.shader_draw.vs_bo); + struct shader_test_bo *dst = &(test_priv.dst); + struct shader_test_bo *cmd = &(test_priv.cmd); + struct shader_test_bo *vtx_attributes_mem = &(test_priv.vtx_attributes_mem); + amdgpu_bo_handle resources[5]; + uint8_t *ptr_dst; + uint32_t *ptr_cmd; + int i, r; + struct amdgpu_cs_request ibs_request = {0}; + struct amdgpu_cs_ib_info ib_info = {0}; + struct amdgpu_cs_fence fence_status = {0}; + uint32_t expired; + amdgpu_bo_list_handle bo_list; + uint8_t cptr[16]; + + memset(&test_priv, 0, sizeof(test_priv)); + test_priv.info = test_info; + + r = amdgpu_cs_ctx_create(test_info->device_handle, &context_handle); + CU_ASSERT_EQUAL(r, 0); + + ps_bo->size = 0x2000; + ps_bo->heap = AMDGPU_GEM_DOMAIN_VRAM; + r = shader_test_bo_alloc(test_info->device_handle, ps_bo); + CU_ASSERT_EQUAL(r, 0); + memset(ps_bo->ptr, 0, ps_bo->size); + + vs_bo->size = 4096; + vs_bo->heap = AMDGPU_GEM_DOMAIN_VRAM; + r = shader_test_bo_alloc(test_info->device_handle, vs_bo); + CU_ASSERT_EQUAL(r, 0); + memset(vs_bo->ptr, 0, vs_bo->size); + + test_priv.shader_draw.ps_type = PS_CONST; + amdgpu_draw_load_ps_shader(&test_priv); + + test_priv.shader_draw.vs_type = VS_RECTPOSTEXFAST; + amdgpu_draw_load_vs_shader(&test_priv); + + cmd->size = 4096; + cmd->heap = AMDGPU_GEM_DOMAIN_GTT; + r = shader_test_bo_alloc(test_info->device_handle, cmd); + CU_ASSERT_EQUAL(r, 0); + ptr_cmd = cmd->ptr; + memset(ptr_cmd, 0, cmd->size); + + dst->size = 0x4000; + dst->heap = AMDGPU_GEM_DOMAIN_VRAM; + r = shader_test_bo_alloc(test_info->device_handle, dst); + CU_ASSERT_EQUAL(r, 0); + + if (test_info->version == AMDGPU_TEST_GFX_V11) { + vtx_attributes_mem->size = 0x4040000; + vtx_attributes_mem->heap = AMDGPU_GEM_DOMAIN_VRAM; + + r = shader_test_bo_alloc(test_info->device_handle, vtx_attributes_mem); + CU_ASSERT_EQUAL(r, 0); + } + + amdgpu_draw_init(&test_priv); + + amdgpu_draw_setup_and_write_drawblt_surf_info(&test_priv); + + amdgpu_draw_setup_and_write_drawblt_state(&test_priv); + + amdgpu_draw_vs_RectPosTexFast_write2hw(&test_priv); + + amdgpu_draw_ps_write2hw(&test_priv); + + i = test_priv.cmd_curr; + /* ps constant data */ + ptr_cmd[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr_cmd[i++] = 0xc; + ptr_cmd[i++] = 0x33333333; + ptr_cmd[i++] = 0x33333333; + ptr_cmd[i++] = 0x33333333; + ptr_cmd[i++] = 0x33333333; + test_priv.cmd_curr = i; + + amdgpu_draw_draw(&test_priv); + + i = test_priv.cmd_curr; + while (i & 7) + ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ + test_priv.cmd_curr = i; + + i = 0; + resources[i++] = dst->bo; + resources[i++] = ps_bo->bo; + resources[i++] = vs_bo->bo; + resources[i++] = cmd->bo; + if (vtx_attributes_mem->size) + resources[i++] = vtx_attributes_mem->bo; + r = amdgpu_bo_list_create(test_info->device_handle, i, resources, NULL, &bo_list); + CU_ASSERT_EQUAL(r, 0); + + ib_info.ib_mc_address = cmd->mc_address; + ib_info.size = test_priv.cmd_curr; + ibs_request.ip_type = test_info->ip; + ibs_request.ring = test_info->ring; + ibs_request.resources = bo_list; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.fence_info.handle = NULL; + + /* submit CS */ + r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_list_destroy(bo_list); + CU_ASSERT_EQUAL(r, 0); + + fence_status.ip_type = test_info->ip; + fence_status.ip_instance = 0; + fence_status.ring = test_info->ring; + fence_status.context = context_handle; + fence_status.fence = ibs_request.seq_no; + + /* wait for IB accomplished */ + r = amdgpu_cs_query_fence_status(&fence_status, + AMDGPU_TIMEOUT_INFINITE, + 0, &expired); + CU_ASSERT_EQUAL(r, 0); + CU_ASSERT_EQUAL(expired, true); + + /* verify if memset test result meets with expected */ + i = 0; + ptr_dst = dst->ptr; + memset(cptr, 0x33, 16); + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0); + i = dst->size - 16; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0); + i = dst->size / 2; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0); + + if (vtx_attributes_mem->size) { + r = shader_test_bo_free(vtx_attributes_mem); + CU_ASSERT_EQUAL(r, 0); + } + + r = shader_test_bo_free(dst); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(cmd); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(ps_bo); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(vs_bo); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_cs_ctx_free(context_handle); + CU_ASSERT_EQUAL(r, 0); +} + +static void amdgpu_memcpy_draw_test(struct shader_test_info *test_info) +{ + struct shader_test_priv test_priv; + amdgpu_context_handle context_handle; + struct shader_test_bo *ps_bo = &(test_priv.shader_draw.ps_bo); + struct shader_test_bo *vs_bo = &(test_priv.shader_draw.vs_bo); + struct shader_test_bo *src = &(test_priv.src); + struct shader_test_bo *dst = &(test_priv.dst); + struct shader_test_bo *cmd = &(test_priv.cmd); + struct shader_test_bo *vtx_attributes_mem = &(test_priv.vtx_attributes_mem); + amdgpu_bo_handle resources[6]; + uint8_t *ptr_dst; + uint8_t *ptr_src; + uint32_t *ptr_cmd; + int i, r; + struct amdgpu_cs_request ibs_request = {0}; + struct amdgpu_cs_ib_info ib_info = {0}; + uint32_t hang_state, hangs; + uint32_t expired; + amdgpu_bo_list_handle bo_list; + struct amdgpu_cs_fence fence_status = {0}; + + memset(&test_priv, 0, sizeof(test_priv)); + test_priv.info = test_info; + test_priv.cmd.size = 4096; + test_priv.cmd.heap = AMDGPU_GEM_DOMAIN_GTT; + + ps_bo->heap = AMDGPU_GEM_DOMAIN_VRAM; + test_priv.shader_draw.ps_type = PS_TEX; + vs_bo->size = 4096; + vs_bo->heap = AMDGPU_GEM_DOMAIN_VRAM; + test_priv.shader_draw.vs_type = VS_RECTPOSTEXFAST; + test_priv.src.heap = AMDGPU_GEM_DOMAIN_VRAM; + test_priv.dst.heap = AMDGPU_GEM_DOMAIN_VRAM; + if (test_info->hang_slow) { + test_priv.shader_draw.ps_bo.size = 16*1024*1024; + test_priv.src.size = 0x4000000; + test_priv.dst.size = 0x4000000; + } else { + test_priv.shader_draw.ps_bo.size = 0x2000; + test_priv.src.size = 0x4000; + test_priv.dst.size = 0x4000; + } + + r = amdgpu_cs_ctx_create(test_info->device_handle, &context_handle); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_alloc(test_info->device_handle, ps_bo); + CU_ASSERT_EQUAL(r, 0); + memset(ps_bo->ptr, 0, ps_bo->size); + + r = shader_test_bo_alloc(test_info->device_handle, vs_bo); + CU_ASSERT_EQUAL(r, 0); + memset(vs_bo->ptr, 0, vs_bo->size); + + amdgpu_draw_load_ps_shader(&test_priv); + amdgpu_draw_load_vs_shader(&test_priv); + + r = shader_test_bo_alloc(test_info->device_handle, cmd); + CU_ASSERT_EQUAL(r, 0); + ptr_cmd = cmd->ptr; + memset(ptr_cmd, 0, cmd->size); + + r = shader_test_bo_alloc(test_info->device_handle, src); + CU_ASSERT_EQUAL(r, 0); + ptr_src = src->ptr; + memset(ptr_src, 0x55, src->size); + + r = shader_test_bo_alloc(test_info->device_handle, dst); + CU_ASSERT_EQUAL(r, 0); + + if (test_info->version == AMDGPU_TEST_GFX_V11) { + vtx_attributes_mem->size = 0x4040000; + vtx_attributes_mem->heap = AMDGPU_GEM_DOMAIN_VRAM; + + r = shader_test_bo_alloc(test_info->device_handle, vtx_attributes_mem); + CU_ASSERT_EQUAL(r, 0); + } + + amdgpu_draw_init(&test_priv); + + amdgpu_draw_setup_and_write_drawblt_surf_info(&test_priv); + + amdgpu_draw_setup_and_write_drawblt_state(&test_priv); + + amdgpu_draw_vs_RectPosTexFast_write2hw(&test_priv); + + amdgpu_draw_ps_write2hw(&test_priv); + + // write ps user constant data + i = test_priv.cmd_curr; + ptr_cmd[i++] = PACKET3(PACKET3_SET_SH_REG, 8); + switch (test_info->version) { + case AMDGPU_TEST_GFX_V9: + ptr_cmd[i++] = 0xc; + ptr_cmd[i++] = src->mc_address >> 8; + ptr_cmd[i++] = src->mc_address >> 40 | 0x10e00000; + ptr_cmd[i++] = test_info->hang_slow ? 0x1ffcfff : 0x7c01f; + ptr_cmd[i++] = 0x90500fac; + ptr_cmd[i++] = test_info->hang_slow ? 0x1ffe000 : 0x3e000; + i += 3; + break; + case AMDGPU_TEST_GFX_V10: + ptr_cmd[i++] = 0xc; + ptr_cmd[i++] = src->mc_address >> 8; + ptr_cmd[i++] = src->mc_address >> 40 | 0xc4b00000; + ptr_cmd[i++] = test_info->hang_slow ? 0x81ffc1ff : 0x8007c007; + ptr_cmd[i++] = 0x90500fac; + i += 2; + ptr_cmd[i++] = test_info->hang_slow ? 0 : 0x400; + i++; + break; + case AMDGPU_TEST_GFX_V11: + ptr_cmd[i++] = 0xc; + ptr_cmd[i++] = src->mc_address >> 8; + ptr_cmd[i++] = src->mc_address >> 40 | 0xc4b00000; + ptr_cmd[i++] = test_info->hang_slow ? 0x1ffc1ff : 0x7c007; + ptr_cmd[i++] = test_info->hang_slow ? 0x90a00fac : 0x90600fac; + i += 2; + ptr_cmd[i++] = 0x400; + i++; + break; + } + + ptr_cmd[i++] = PACKET3(PACKET3_SET_SH_REG, 4); + ptr_cmd[i++] = 0x14; + ptr_cmd[i++] = 0x92; + i += 3; + + ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr_cmd[i++] = 0x191; + ptr_cmd[i++] = 0; + test_priv.cmd_curr = i; + + amdgpu_draw_draw(&test_priv); + + i = test_priv.cmd_curr; + while (i & 7) + ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ + test_priv.cmd_curr = i; + + i = 0; + resources[i++] = dst->bo; + resources[i++] = src->bo; + resources[i++] = ps_bo->bo; + resources[i++] = vs_bo->bo; + resources[i++] = cmd->bo; + if (vtx_attributes_mem->size) + resources[i++] = vtx_attributes_mem->bo; + r = amdgpu_bo_list_create(test_info->device_handle, i, resources, NULL, &bo_list); + CU_ASSERT_EQUAL(r, 0); + + ib_info.ib_mc_address = cmd->mc_address; + ib_info.size = test_priv.cmd_curr; + ibs_request.ip_type = test_info->ip; + ibs_request.ring = test_info->ring; + ibs_request.resources = bo_list; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.fence_info.handle = NULL; + r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + + fence_status.ip_type = test_info->ip; + fence_status.ip_instance = 0; + fence_status.ring = test_info->ring; + fence_status.context = context_handle; + fence_status.fence = ibs_request.seq_no; + + /* wait for IB accomplished */ + r = amdgpu_cs_query_fence_status(&fence_status, + AMDGPU_TIMEOUT_INFINITE, + 0, &expired); + if (!test_info->hang) { + CU_ASSERT_EQUAL(r, 0); + CU_ASSERT_EQUAL(expired, true); + + /* verify if memcpy test result meets with expected */ + i = 0; + ptr_dst = dst->ptr; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0); + i = dst->size - 16; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0); + i = dst->size / 2; + CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0); + } else { + r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); + CU_ASSERT_EQUAL(r, 0); + CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); + } + + r = amdgpu_bo_list_destroy(bo_list); + CU_ASSERT_EQUAL(r, 0); + + if (vtx_attributes_mem->size) { + r = shader_test_bo_free(vtx_attributes_mem); + CU_ASSERT_EQUAL(r, 0); + } + + r = shader_test_bo_free(src); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(dst); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(cmd); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(ps_bo); + CU_ASSERT_EQUAL(r, 0); + + r = shader_test_bo_free(vs_bo); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_cs_ctx_free(context_handle); + CU_ASSERT_EQUAL(r, 0); +} + +static void shader_test_draw_cb(struct shader_test_info *test_info) +{ + amdgpu_memset_draw_test(test_info); + amdgpu_memcpy_draw_test(test_info); +} + +static void shader_test_draw_hang_cb(struct shader_test_info *test_info) +{ + test_info->hang = 0; + amdgpu_memcpy_draw_test(test_info); + + test_info->hang = 1; + amdgpu_memcpy_draw_test(test_info); + + test_info->hang = 0; + amdgpu_memcpy_draw_test(test_info); +} + +static void shader_test_draw_hang_slow_cb(struct shader_test_info *test_info) +{ + test_info->hang = 0; + test_info->hang_slow = 0; + amdgpu_memcpy_draw_test(test_info); + + test_info->hang = 1; + test_info->hang_slow = 1; + amdgpu_memcpy_draw_test(test_info); + + test_info->hang = 0; + test_info->hang_slow = 0; + amdgpu_memcpy_draw_test(test_info); +} + + +void amdgpu_test_draw_helper(amdgpu_device_handle device_handle) +{ + shader_test_for_each(device_handle, AMDGPU_HW_IP_GFX, shader_test_draw_cb); +} + +void amdgpu_test_draw_hang_helper(amdgpu_device_handle device_handle) +{ + shader_test_for_each(device_handle, AMDGPU_HW_IP_GFX, shader_test_draw_hang_cb); +} + +void amdgpu_test_draw_hang_slow_helper(amdgpu_device_handle device_handle) +{ + shader_test_for_each(device_handle, AMDGPU_HW_IP_GFX, shader_test_draw_hang_slow_cb); +} diff --git a/lib/libdrm/tests/amdgpu/vce_tests.c b/lib/libdrm/tests/amdgpu/vce_tests.c index 4e925cae2..9aa0a8eb6 100644 --- a/lib/libdrm/tests/amdgpu/vce_tests.c +++ b/lib/libdrm/tests/amdgpu/vce_tests.c @@ -96,7 +96,7 @@ CU_TestInfo vce_tests[] = { CU_BOOL suite_vce_tests_enable(void) { - uint32_t version, feature, asic_id; + uint32_t version, feature; CU_BOOL ret_mv = CU_FALSE; if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, @@ -107,7 +107,6 @@ CU_BOOL suite_vce_tests_enable(void) chip_rev = device_handle->info.chip_rev; chip_id = device_handle->info.chip_external_rev; ids_flags = device_handle->info.ids_flags; - asic_id = device_handle->info.asic_id; amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 0, &version, &feature); diff --git a/lib/libdrm/tests/amdgpu/vcn_tests.c b/lib/libdrm/tests/amdgpu/vcn_tests.c index ff97f3445..5e20fb65b 100644 --- a/lib/libdrm/tests/amdgpu/vcn_tests.c +++ b/lib/libdrm/tests/amdgpu/vcn_tests.c @@ -22,20 +22,65 @@ */ #include <stdio.h> +#include <string.h> #include <inttypes.h> +#include <unistd.h> #include "CUnit/Basic.h" +#include <unistd.h> #include "util_math.h" #include "amdgpu_test.h" #include "amdgpu_drm.h" #include "amdgpu_internal.h" #include "decode_messages.h" +#include "frame.h" #define IB_SIZE 4096 #define MAX_RESOURCES 16 +#define DECODE_CMD_MSG_BUFFER 0x00000000 +#define DECODE_CMD_DPB_BUFFER 0x00000001 +#define DECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 +#define DECODE_CMD_FEEDBACK_BUFFER 0x00000003 +#define DECODE_CMD_PROB_TBL_BUFFER 0x00000004 +#define DECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 +#define DECODE_CMD_BITSTREAM_BUFFER 0x00000100 +#define DECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 +#define DECODE_CMD_CONTEXT_BUFFER 0x00000206 + +#define DECODE_IB_PARAM_DECODE_BUFFER (0x00000001) + +#define DECODE_CMDBUF_FLAGS_MSG_BUFFER (0x00000001) +#define DECODE_CMDBUF_FLAGS_DPB_BUFFER (0x00000002) +#define DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER (0x00000004) +#define DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER (0x00000008) +#define DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER (0x00000010) +#define DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER (0x00000200) +#define DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER (0x00000800) +#define DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER (0x00001000) +#define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER (0x00100000) + +static bool vcn_dec_sw_ring = false; +static bool vcn_unified_ring = false; + +#define H264_NAL_TYPE_NON_IDR_SLICE 1 +#define H264_NAL_TYPE_DP_A_SLICE 2 +#define H264_NAL_TYPE_DP_B_SLICE 3 +#define H264_NAL_TYPE_DP_C_SLICE 0x4 +#define H264_NAL_TYPE_IDR_SLICE 0x5 +#define H264_NAL_TYPE_SEI 0x6 +#define H264_NAL_TYPE_SEQ_PARAM 0x7 +#define H264_NAL_TYPE_PIC_PARAM 0x8 +#define H264_NAL_TYPE_ACCESS_UNIT 0x9 +#define H264_NAL_TYPE_END_OF_SEQ 0xa +#define H264_NAL_TYPE_END_OF_STREAM 0xb +#define H264_NAL_TYPE_FILLER_DATA 0xc +#define H264_NAL_TYPE_SEQ_EXTENSION 0xd + +#define H264_START_CODE 0x000001 + struct amdgpu_vcn_bo { amdgpu_bo_handle handle; amdgpu_va_handle va_handle; @@ -44,6 +89,48 @@ struct amdgpu_vcn_bo { uint8_t *ptr; }; +typedef struct rvcn_decode_buffer_s { + unsigned int valid_buf_flag; + unsigned int msg_buffer_address_hi; + unsigned int msg_buffer_address_lo; + unsigned int dpb_buffer_address_hi; + unsigned int dpb_buffer_address_lo; + unsigned int target_buffer_address_hi; + unsigned int target_buffer_address_lo; + unsigned int session_contex_buffer_address_hi; + unsigned int session_contex_buffer_address_lo; + unsigned int bitstream_buffer_address_hi; + unsigned int bitstream_buffer_address_lo; + unsigned int context_buffer_address_hi; + unsigned int context_buffer_address_lo; + unsigned int feedback_buffer_address_hi; + unsigned int feedback_buffer_address_lo; + unsigned int luma_hist_buffer_address_hi; + unsigned int luma_hist_buffer_address_lo; + unsigned int prob_tbl_buffer_address_hi; + unsigned int prob_tbl_buffer_address_lo; + unsigned int sclr_coeff_buffer_address_hi; + unsigned int sclr_coeff_buffer_address_lo; + unsigned int it_sclr_table_buffer_address_hi; + unsigned int it_sclr_table_buffer_address_lo; + unsigned int sclr_target_buffer_address_hi; + unsigned int sclr_target_buffer_address_lo; + unsigned int cenc_size_info_buffer_address_hi; + unsigned int cenc_size_info_buffer_address_lo; + unsigned int mpeg2_pic_param_buffer_address_hi; + unsigned int mpeg2_pic_param_buffer_address_lo; + unsigned int mpeg2_mb_control_buffer_address_hi; + unsigned int mpeg2_mb_control_buffer_address_lo; + unsigned int mpeg2_idct_coeff_buffer_address_hi; + unsigned int mpeg2_idct_coeff_buffer_address_lo; +} rvcn_decode_buffer_t; + +typedef struct rvcn_decode_ib_package_s { + unsigned int package_size; + unsigned int package_type; +} rvcn_decode_ib_package_t; + + struct amdgpu_vcn_reg { uint32_t data0; uint32_t data1; @@ -52,6 +139,23 @@ struct amdgpu_vcn_reg { uint32_t cntl; }; +typedef struct BufferInfo_t { + uint32_t numOfBitsInBuffer; + const uint8_t *decBuffer; + uint8_t decData; + uint32_t decBufferSize; + const uint8_t *end; +} bufferInfo; + +typedef struct h264_decode_t { + uint8_t profile; + uint8_t level_idc; + uint8_t nal_ref_idc; + uint8_t nal_unit_type; + uint32_t pic_width, pic_height; + uint32_t slice_type; +} h264_decode; + static amdgpu_device_handle device_handle; static uint32_t major_version; static uint32_t minor_version; @@ -60,13 +164,19 @@ static uint32_t chip_rev; static uint32_t chip_id; static uint32_t asic_id; static uint32_t chip_rev; -static uint32_t chip_id; +static struct amdgpu_vcn_bo enc_buf; +static struct amdgpu_vcn_bo cpb_buf; +static uint32_t enc_task_id; static amdgpu_context_handle context_handle; static amdgpu_bo_handle ib_handle; static amdgpu_va_handle ib_va_handle; static uint64_t ib_mc_address; static uint32_t *ib_cpu; +static uint32_t *ib_checksum; +static uint32_t *ib_size_in_dw; + +static rvcn_decode_buffer_t *decode_buffer; static amdgpu_bo_handle resources[MAX_RESOURCES]; static unsigned num_resources; @@ -78,6 +188,9 @@ static struct amdgpu_vcn_reg reg[] = { {0x10, 0x11, 0xf, 0x29, 0x26d}, }; +uint32_t gWidth, gHeight, gSliceType; +static uint32_t vcn_ip_version_major; +static uint32_t vcn_ip_version_minor; static void amdgpu_cs_vcn_dec_create(void); static void amdgpu_cs_vcn_dec_decode(void); static void amdgpu_cs_vcn_dec_destroy(void); @@ -86,6 +199,22 @@ static void amdgpu_cs_vcn_enc_create(void); static void amdgpu_cs_vcn_enc_encode(void); static void amdgpu_cs_vcn_enc_destroy(void); +static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc); +static void amdgpu_cs_sq_ib_tail(uint32_t *end); +static void h264_check_0s (bufferInfo * bufInfo, int count); +static int32_t h264_se (bufferInfo * bufInfo); +static inline uint32_t bs_read_u1(bufferInfo *bufinfo); +static inline int bs_eof(bufferInfo *bufinfo); +static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n); +static inline uint32_t bs_read_ue(bufferInfo* bufinfo); +static uint32_t remove_03 (uint8_t *bptr, uint32_t len); +static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo); +static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo); +static void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo); +static uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo); +static uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen); +static int verify_checksum(uint8_t *buffer, uint32_t buffer_size); + CU_TestInfo vcn_tests[] = { { "VCN DEC create", amdgpu_cs_vcn_dec_create }, @@ -93,7 +222,7 @@ CU_TestInfo vcn_tests[] = { { "VCN DEC destroy", amdgpu_cs_vcn_dec_destroy }, { "VCN ENC create", amdgpu_cs_vcn_enc_create }, - { "VCN ENC decode", amdgpu_cs_vcn_enc_encode }, + { "VCN ENC encode", amdgpu_cs_vcn_enc_encode }, { "VCN ENC destroy", amdgpu_cs_vcn_enc_destroy }, CU_TEST_INFO_NULL, }; @@ -101,6 +230,7 @@ CU_TestInfo vcn_tests[] = { CU_BOOL suite_vcn_tests_enable(void) { struct drm_amdgpu_info_hw_ip info; + bool enc_ring, dec_ring; int r; if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, @@ -112,12 +242,31 @@ CU_BOOL suite_vcn_tests_enable(void) chip_rev = device_handle->info.chip_rev; chip_id = device_handle->info.chip_external_rev; - r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info); + r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info); + if (!r) { + vcn_ip_version_major = info.hw_ip_version_major; + vcn_ip_version_minor = info.hw_ip_version_minor; + enc_ring = !!info.available_rings; + /* in vcn 4.0 it re-uses encoding queue as unified queue */ + if (vcn_ip_version_major >= 4) { + vcn_unified_ring = true; + vcn_dec_sw_ring = true; + dec_ring = enc_ring; + } else { + r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info); + dec_ring = !!info.available_rings; + } + } if (amdgpu_device_deinitialize(device_handle)) - return CU_FALSE; + return CU_FALSE; + + if (r) { + printf("\n\nASIC query hw info failed\n"); + return CU_FALSE; + } - if (r != 0 || !info.available_rings || + if (!(dec_ring || enc_ring) || (family_id < AMDGPU_FAMILY_RV && (family_id == AMDGPU_FAMILY_AI && (chip_id - chip_rev) < 0x32))) { /* Arcturus */ @@ -125,21 +274,25 @@ CU_BOOL suite_vcn_tests_enable(void) return CU_FALSE; } - if (family_id == AMDGPU_FAMILY_AI) { + if (!dec_ring) { + amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE); + amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE); + amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE); + } + + if (family_id == AMDGPU_FAMILY_AI || !enc_ring) { amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE); - amdgpu_set_test_active("VCN Tests", "VCN ENC decode", CU_FALSE); + amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE); amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE); } - if (info.hw_ip_version_major == 1) + if (vcn_ip_version_major == 1) vcn_reg_index = 0; - else if (info.hw_ip_version_major == 2) + else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0) vcn_reg_index = 1; - else if ((info.hw_ip_version_major == 2 && info.hw_ip_version_minor >= 5) || - info.hw_ip_version_major == 3) + else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) || + vcn_ip_version_major == 3) vcn_reg_index = 2; - else - return CU_FALSE; return CU_TRUE; } @@ -189,6 +342,43 @@ int suite_vcn_tests_clean(void) return CUE_SUCCESS; } +static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc) +{ + /* signature */ + *(base + (*offset)++) = 0x00000010; + *(base + (*offset)++) = 0x30000002; + ib_checksum = base + (*offset)++; + ib_size_in_dw = base + (*offset)++; + + /* engine info */ + *(base + (*offset)++) = 0x00000010; + *(base + (*offset)++) = 0x30000001; + *(base + (*offset)++) = enc ? 2 : 3; + *(base + (*offset)++) = 0x00000000; +} + +static void amdgpu_cs_sq_ib_tail(uint32_t *end) +{ + uint32_t size_in_dw; + uint32_t checksum = 0; + + /* if the pointers are invalid, no need to process */ + if (ib_checksum == NULL || ib_size_in_dw == NULL) + return; + + size_in_dw = end - ib_size_in_dw - 1; + *ib_size_in_dw = size_in_dw; + *(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t); + + for (int i = 0; i < size_in_dw; i++) + checksum += *(ib_checksum + 2 + i); + + *ib_checksum = checksum; + + ib_checksum = NULL; + ib_size_in_dw = NULL; +} + static int submit(unsigned ndw, unsigned ip) { struct amdgpu_cs_request ibs_request = {0}; @@ -282,17 +472,91 @@ static void free_resource(struct amdgpu_vcn_bo *vcn_bo) static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx) { - ib_cpu[(*idx)++] = reg[vcn_reg_index].data0; - ib_cpu[(*idx)++] = addr; - ib_cpu[(*idx)++] = reg[vcn_reg_index].data1; - ib_cpu[(*idx)++] = addr >> 32; - ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd; - ib_cpu[(*idx)++] = cmd << 1; + if (vcn_dec_sw_ring == false) { + ib_cpu[(*idx)++] = reg[vcn_reg_index].data0; + ib_cpu[(*idx)++] = addr; + ib_cpu[(*idx)++] = reg[vcn_reg_index].data1; + ib_cpu[(*idx)++] = addr >> 32; + ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd; + ib_cpu[(*idx)++] = cmd << 1; + return; + } + + /* Support decode software ring message */ + if (!(*idx)) { + rvcn_decode_ib_package_t *ib_header; + + if (vcn_unified_ring) + amdgpu_cs_sq_head(ib_cpu, idx, false); + + ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx]; + ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + + sizeof(struct rvcn_decode_ib_package_s); + + (*idx)++; + ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER); + (*idx)++; + + decode_buffer = (rvcn_decode_buffer_t *)&(ib_cpu[*idx]); + *idx += sizeof(struct rvcn_decode_buffer_s) / 4; + memset(decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); + } + + switch(cmd) { + case DECODE_CMD_MSG_BUFFER: + decode_buffer->valid_buf_flag |= DECODE_CMDBUF_FLAGS_MSG_BUFFER; + decode_buffer->msg_buffer_address_hi = (addr >> 32); + decode_buffer->msg_buffer_address_lo = (addr); + break; + case DECODE_CMD_DPB_BUFFER: + decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DPB_BUFFER); + decode_buffer->dpb_buffer_address_hi = (addr >> 32); + decode_buffer->dpb_buffer_address_lo = (addr); + break; + case DECODE_CMD_DECODING_TARGET_BUFFER: + decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER); + decode_buffer->target_buffer_address_hi = (addr >> 32); + decode_buffer->target_buffer_address_lo = (addr); + break; + case DECODE_CMD_FEEDBACK_BUFFER: + decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER); + decode_buffer->feedback_buffer_address_hi = (addr >> 32); + decode_buffer->feedback_buffer_address_lo = (addr); + break; + case DECODE_CMD_PROB_TBL_BUFFER: + decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER); + decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32); + decode_buffer->prob_tbl_buffer_address_lo = (addr); + break; + case DECODE_CMD_SESSION_CONTEXT_BUFFER: + decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER); + decode_buffer->session_contex_buffer_address_hi = (addr >> 32); + decode_buffer->session_contex_buffer_address_lo = (addr); + break; + case DECODE_CMD_BITSTREAM_BUFFER: + decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER); + decode_buffer->bitstream_buffer_address_hi = (addr >> 32); + decode_buffer->bitstream_buffer_address_lo = (addr); + break; + case DECODE_CMD_IT_SCALING_TABLE_BUFFER: + decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER); + decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32); + decode_buffer->it_sclr_table_buffer_address_lo = (addr); + break; + case DECODE_CMD_CONTEXT_BUFFER: + decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER); + decode_buffer->context_buffer_address_hi = (addr >> 32); + decode_buffer->context_buffer_address_lo = (addr); + break; + default: + printf("Not Support!\n"); + } } static void amdgpu_cs_vcn_dec_create(void) { struct amdgpu_vcn_bo msg_buf; + unsigned ip; int len, r; num_resources = 0; @@ -307,18 +571,29 @@ static void amdgpu_cs_vcn_dec_create(void) memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg)); len = 0; - ib_cpu[len++] = reg[vcn_reg_index].data0; - ib_cpu[len++] = msg_buf.addr; - ib_cpu[len++] = reg[vcn_reg_index].data1; - ib_cpu[len++] = msg_buf.addr >> 32; - ib_cpu[len++] = reg[vcn_reg_index].cmd; - ib_cpu[len++] = 0; - for (; len % 16; ) { - ib_cpu[len++] = reg[vcn_reg_index].nop; + if (vcn_dec_sw_ring == true) + vcn_dec_cmd(msg_buf.addr, 0, &len); + else { + ib_cpu[len++] = reg[vcn_reg_index].data0; + ib_cpu[len++] = msg_buf.addr; + ib_cpu[len++] = reg[vcn_reg_index].data1; + ib_cpu[len++] = msg_buf.addr >> 32; + ib_cpu[len++] = reg[vcn_reg_index].cmd; ib_cpu[len++] = 0; + for (; len % 16; ) { + ib_cpu[len++] = reg[vcn_reg_index].nop; + ib_cpu[len++] = 0; + } } - r = submit(len, AMDGPU_HW_IP_VCN_DEC); + if (vcn_unified_ring) { + amdgpu_cs_sq_ib_tail(ib_cpu + len); + ip = AMDGPU_HW_IP_VCN_ENC; + } else + ip = AMDGPU_HW_IP_VCN_DEC; + + r = submit(len, ip); + CU_ASSERT_EQUAL(r, 0); free_resource(&msg_buf); @@ -330,6 +605,7 @@ static void amdgpu_cs_vcn_dec_decode(void) uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum; struct amdgpu_vcn_bo dec_buf; int size, len, i, r; + unsigned ip; uint8_t *dec; size = 4*1024; /* msg */ @@ -339,7 +615,7 @@ static void amdgpu_cs_vcn_dec_decode(void) size += ALIGN(dpb_size, 4*1024); size += ALIGN(dt_size, 4*1024); - num_resources = 0; + num_resources = 0; alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = dec_buf.handle; resources[num_resources++] = ib_handle; @@ -382,14 +658,22 @@ static void amdgpu_cs_vcn_dec_decode(void) vcn_dec_cmd(it_addr, 0x204, &len); vcn_dec_cmd(ctx_addr, 0x206, &len); - ib_cpu[len++] = reg[vcn_reg_index].cntl; - ib_cpu[len++] = 0x1; - for (; len % 16; ) { - ib_cpu[len++] = reg[vcn_reg_index].nop; - ib_cpu[len++] = 0; + if (vcn_dec_sw_ring == false) { + ib_cpu[len++] = reg[vcn_reg_index].cntl; + ib_cpu[len++] = 0x1; + for (; len % 16; ) { + ib_cpu[len++] = reg[vcn_reg_index].nop; + ib_cpu[len++] = 0; + } } - r = submit(len, AMDGPU_HW_IP_VCN_DEC); + if (vcn_unified_ring) { + amdgpu_cs_sq_ib_tail(ib_cpu + len); + ip = AMDGPU_HW_IP_VCN_ENC; + } else + ip = AMDGPU_HW_IP_VCN_DEC; + + r = submit(len, ip); CU_ASSERT_EQUAL(r, 0); for (i = 0, sum = 0; i < dt_size; ++i) @@ -403,9 +687,10 @@ static void amdgpu_cs_vcn_dec_decode(void) static void amdgpu_cs_vcn_dec_destroy(void) { struct amdgpu_vcn_bo msg_buf; + unsigned ip; int len, r; - num_resources = 0; + num_resources = 0; alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = msg_buf.handle; resources[num_resources++] = ib_handle; @@ -417,18 +702,28 @@ static void amdgpu_cs_vcn_dec_destroy(void) memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg)); len = 0; - ib_cpu[len++] = reg[vcn_reg_index].data0; - ib_cpu[len++] = msg_buf.addr; - ib_cpu[len++] = reg[vcn_reg_index].data1; - ib_cpu[len++] = msg_buf.addr >> 32; - ib_cpu[len++] = reg[vcn_reg_index].cmd; - ib_cpu[len++] = 0; - for (; len % 16; ) { - ib_cpu[len++] = reg[vcn_reg_index].nop; + if (vcn_dec_sw_ring == true) + vcn_dec_cmd(msg_buf.addr, 0, &len); + else { + ib_cpu[len++] = reg[vcn_reg_index].data0; + ib_cpu[len++] = msg_buf.addr; + ib_cpu[len++] = reg[vcn_reg_index].data1; + ib_cpu[len++] = msg_buf.addr >> 32; + ib_cpu[len++] = reg[vcn_reg_index].cmd; ib_cpu[len++] = 0; + for (; len % 16; ) { + ib_cpu[len++] = reg[vcn_reg_index].nop; + ib_cpu[len++] = 0; + } } - r = submit(len, AMDGPU_HW_IP_VCN_DEC); + if (vcn_unified_ring) { + amdgpu_cs_sq_ib_tail(ib_cpu + len); + ip = AMDGPU_HW_IP_VCN_ENC; + } else + ip = AMDGPU_HW_IP_VCN_DEC; + + r = submit(len, ip); CU_ASSERT_EQUAL(r, 0); free_resource(&msg_buf); @@ -436,15 +731,905 @@ static void amdgpu_cs_vcn_dec_destroy(void) static void amdgpu_cs_vcn_enc_create(void) { - /* TODO */ + int len, r; + uint32_t *p_task_size = NULL; + uint32_t task_offset = 0, st_offset; + uint32_t *st_size = NULL; + unsigned width = 160, height = 128, buf_size; + uint32_t fw_maj = 1, fw_min = 9; + + if (vcn_ip_version_major == 2) { + fw_maj = 1; + fw_min = 1; + } else if (vcn_ip_version_major == 3) { + fw_maj = 1; + fw_min = 0; + } + + gWidth = width; + gHeight = height; + buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; + enc_task_id = 1; + + num_resources = 0; + alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); + alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT); + resources[num_resources++] = enc_buf.handle; + resources[num_resources++] = cpb_buf.handle; + resources[num_resources++] = ib_handle; + + r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr); + memset(enc_buf.ptr, 0, 128 * 1024); + r = amdgpu_bo_cpu_unmap(enc_buf.handle); + + r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr); + memset(enc_buf.ptr, 0, buf_size * 2); + r = amdgpu_bo_cpu_unmap(cpb_buf.handle); + + len = 0; + + if (vcn_unified_ring) + amdgpu_cs_sq_head(ib_cpu, &len, true); + + /* session info */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ + ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); + ib_cpu[len++] = enc_buf.addr >> 32; + ib_cpu[len++] = enc_buf.addr; + ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ + *st_size = (len - st_offset) * 4; + + /* task info */ + task_offset = len; + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ + p_task_size = &ib_cpu[len++]; + ib_cpu[len++] = enc_task_id++; /* task_id */ + ib_cpu[len++] = 0; /* feedback */ + *st_size = (len - st_offset) * 4; + + /* op init */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x01000001; /* RENCODE_IB_OP_INITIALIZE */ + *st_size = (len - st_offset) * 4; + + /* session_init */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000003; /* RENCODE_IB_PARAM_SESSION_INIT */ + ib_cpu[len++] = 1; /* RENCODE_ENCODE_STANDARD_H264 */ + ib_cpu[len++] = width; + ib_cpu[len++] = height; + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; /* pre encode mode */ + ib_cpu[len++] = 0; /* chroma enabled : false */ + *st_size = (len - st_offset) * 4; + + /* slice control */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00200001; /* RENCODE_H264_IB_PARAM_SLICE_CONTROL */ + ib_cpu[len++] = 0; /* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */ + ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16; + *st_size = (len - st_offset) * 4; + + /* enc spec misc */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00200002; /* RENCODE_H264_IB_PARAM_SPEC_MISC */ + ib_cpu[len++] = 0; /* constrained intra pred flag */ + ib_cpu[len++] = 0; /* cabac enable */ + ib_cpu[len++] = 0; /* cabac init idc */ + ib_cpu[len++] = 1; /* half pel enabled */ + ib_cpu[len++] = 1; /* quarter pel enabled */ + ib_cpu[len++] = 100; /* BASELINE profile */ + ib_cpu[len++] = 11; /* level */ + if (vcn_ip_version_major == 3) { + ib_cpu[len++] = 0; /* b_picture_enabled */ + ib_cpu[len++] = 0; /* weighted_bipred_idc */ + } + *st_size = (len - st_offset) * 4; + + /* deblocking filter */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00200004; /* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */ + ib_cpu[len++] = 0; /* disable deblocking filter idc */ + ib_cpu[len++] = 0; /* alpha c0 offset */ + ib_cpu[len++] = 0; /* tc offset */ + ib_cpu[len++] = 0; /* cb offset */ + ib_cpu[len++] = 0; /* cr offset */ + *st_size = (len - st_offset) * 4; + + /* layer control */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000004; /* RENCODE_IB_PARAM_LAYER_CONTROL */ + ib_cpu[len++] = 1; /* max temporal layer */ + ib_cpu[len++] = 1; /* no of temporal layer */ + *st_size = (len - st_offset) * 4; + + /* rc_session init */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000006; /* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */ + ib_cpu[len++] = 0; /* rate control */ + ib_cpu[len++] = 48; /* vbv buffer level */ + *st_size = (len - st_offset) * 4; + + /* quality params */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000009; /* RENCODE_IB_PARAM_QUALITY_PARAMS */ + ib_cpu[len++] = 0; /* vbaq mode */ + ib_cpu[len++] = 0; /* scene change sensitivity */ + ib_cpu[len++] = 0; /* scene change min idr interval */ + ib_cpu[len++] = 0; + if (vcn_ip_version_major == 3) + ib_cpu[len++] = 0; + *st_size = (len - st_offset) * 4; + + /* layer select */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ + ib_cpu[len++] = 0; /* temporal layer */ + *st_size = (len - st_offset) * 4; + + /* rc layer init */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000007; /* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */ + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; + ib_cpu[len++] = 25; + ib_cpu[len++] = 1; + ib_cpu[len++] = 0x01312d00; + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; + *st_size = (len - st_offset) * 4; + + /* layer select */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ + ib_cpu[len++] = 0; /* temporal layer */ + *st_size = (len - st_offset) * 4; + + /* rc per pic */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000008; /* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */ + ib_cpu[len++] = 20; + ib_cpu[len++] = 0; + ib_cpu[len++] = 51; + ib_cpu[len++] = 0; + ib_cpu[len++] = 1; + ib_cpu[len++] = 0; + ib_cpu[len++] = 1; + *st_size = (len - st_offset) * 4; + + /* op init rc */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x01000004; /* RENCODE_IB_OP_INIT_RC */ + *st_size = (len - st_offset) * 4; + + /* op init rc vbv */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x01000005; /* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */ + *st_size = (len - st_offset) * 4; + + *p_task_size = (len - task_offset) * 4; + + if (vcn_unified_ring) + amdgpu_cs_sq_ib_tail(ib_cpu + len); + + r = submit(len, AMDGPU_HW_IP_VCN_ENC); + CU_ASSERT_EQUAL(r, 0); +} + +static int32_t h264_se (bufferInfo * bufInfo) +{ + uint32_t ret; + + ret = bs_read_ue (bufInfo); + if ((ret & 0x1) == 0) { + ret >>= 1; + int32_t temp = 0 - ret; + return temp; + } + + return (ret + 1) >> 1; +} + +static void h264_check_0s (bufferInfo * bufInfo, int count) +{ + uint32_t val; + + val = bs_read_u (bufInfo, count); + if (val != 0) { + printf ("field error - %d bits should be 0 is %x\n", count, val); + } +} + +static inline int bs_eof(bufferInfo * bufinfo) +{ + if (bufinfo->decBuffer >= bufinfo->end) + return 1; + else + return 0; +} + +static inline uint32_t bs_read_u1(bufferInfo *bufinfo) +{ + uint32_t r = 0; + uint32_t temp = 0; + + bufinfo->numOfBitsInBuffer--; + if (! bs_eof(bufinfo)) { + temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer); + r = temp & 0x01; + } + + if (bufinfo->numOfBitsInBuffer == 0) { + bufinfo->decBuffer++; + bufinfo->decData = *bufinfo->decBuffer; + bufinfo->numOfBitsInBuffer = 8; + } + + return r; +} + +static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n) +{ + uint32_t r = 0; + int i; + + for (i = 0; i < n; i++) { + r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) ); + } + + return r; +} + +static inline uint32_t bs_read_ue(bufferInfo* bufinfo) +{ + int32_t r = 0; + int i = 0; + + while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) { + i++; + } + r = bs_read_u(bufinfo, i); + r += (1 << i) - 1; + return r; +} + +static uint32_t remove_03 (uint8_t * bptr, uint32_t len) +{ + uint32_t nal_len = 0; + while (nal_len + 2 < len) { + if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) { + bptr += 2; + nal_len += 2; + len--; + memmove (bptr, bptr + 1, len - nal_len); + } else { + bptr++; + nal_len++; + } + } + return len; +} + +static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo) +{ + uint32_t lastScale = 8, nextScale = 8; + uint32_t jx; + int deltaScale; + + for (jx = 0; jx < sizeOfScalingList; jx++) { + if (nextScale != 0) { + deltaScale = h264_se (bufInfo); + nextScale = (lastScale + deltaScale + 256) % 256; + } + if (nextScale == 0) { + lastScale = lastScale; + } else { + lastScale = nextScale; + } + } +} + +static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo) +{ + uint32_t temp; + + dec->profile = bs_read_u (bufInfo, 8); + bs_read_u (bufInfo, 1); /* constaint_set0_flag */ + bs_read_u (bufInfo, 1); /* constaint_set1_flag */ + bs_read_u (bufInfo, 1); /* constaint_set2_flag */ + bs_read_u (bufInfo, 1); /* constaint_set3_flag */ + bs_read_u (bufInfo, 1); /* constaint_set4_flag */ + bs_read_u (bufInfo, 1); /* constaint_set5_flag */ + + + h264_check_0s (bufInfo, 2); + dec->level_idc = bs_read_u (bufInfo, 8); + bs_read_ue (bufInfo); /* SPS id*/ + + if (dec->profile == 100 || dec->profile == 110 || + dec->profile == 122 || dec->profile == 144) { + uint32_t chroma_format_idc = bs_read_ue (bufInfo); + if (chroma_format_idc == 3) { + bs_read_u (bufInfo, 1); /* residual_colour_transform_flag */ + } + bs_read_ue (bufInfo); /* bit_depth_luma_minus8 */ + bs_read_ue (bufInfo); /* bit_depth_chroma_minus8 */ + bs_read_u (bufInfo, 1); /* qpprime_y_zero_transform_bypass_flag */ + uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1); + + if (seq_scaling_matrix_present_flag) { + for (uint32_t ix = 0; ix < 8; ix++) { + temp = bs_read_u (bufInfo, 1); + if (temp) { + scaling_list (ix, ix < 6 ? 16 : 64, bufInfo); + } + } + } + } + + bs_read_ue (bufInfo); /* log2_max_frame_num_minus4 */ + uint32_t pic_order_cnt_type = bs_read_ue (bufInfo); + + if (pic_order_cnt_type == 0) { + bs_read_ue (bufInfo); /* log2_max_pic_order_cnt_lsb_minus4 */ + } else if (pic_order_cnt_type == 1) { + bs_read_u (bufInfo, 1); /* delta_pic_order_always_zero_flag */ + h264_se (bufInfo); /* offset_for_non_ref_pic */ + h264_se (bufInfo); /* offset_for_top_to_bottom_field */ + temp = bs_read_ue (bufInfo); + for (uint32_t ix = 0; ix < temp; ix++) { + h264_se (bufInfo); /* offset_for_ref_frame[index] */ + } + } + bs_read_ue (bufInfo); /* num_ref_frames */ + bs_read_u (bufInfo, 1); /* gaps_in_frame_num_flag */ + uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1; + + dec->pic_width = PicWidthInMbs * 16; + uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1; + + dec->pic_height = PicHeightInMapUnits * 16; + uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1); + if (!frame_mbs_only_flag) { + bs_read_u (bufInfo, 1); /* mb_adaptive_frame_field_flag */ + } + bs_read_u (bufInfo, 1); /* direct_8x8_inference_flag */ + temp = bs_read_u (bufInfo, 1); + if (temp) { + bs_read_ue (bufInfo); /* frame_crop_left_offset */ + bs_read_ue (bufInfo); /* frame_crop_right_offset */ + bs_read_ue (bufInfo); /* frame_crop_top_offset */ + bs_read_ue (bufInfo); /* frame_crop_bottom_offset */ + } + temp = bs_read_u (bufInfo, 1); /* VUI Parameters */ +} + +static void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo) +{ + uint32_t temp; + + bs_read_ue (bufInfo); /* first_mb_in_slice */ + temp = bs_read_ue (bufInfo); + dec->slice_type = ((temp > 5) ? (temp - 5) : temp); +} + +static uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo) +{ + uint8_t type = 0; + + h264_check_0s (bufInfo, 1); + dec->nal_ref_idc = bs_read_u (bufInfo, 2); + dec->nal_unit_type = type = bs_read_u (bufInfo, 5); + switch (type) + { + case H264_NAL_TYPE_NON_IDR_SLICE: + case H264_NAL_TYPE_IDR_SLICE: + h264_slice_header (dec, bufInfo); + break; + case H264_NAL_TYPE_SEQ_PARAM: + h264_parse_sequence_parameter_set (dec, bufInfo); + break; + case H264_NAL_TYPE_PIC_PARAM: + case H264_NAL_TYPE_SEI: + case H264_NAL_TYPE_ACCESS_UNIT: + case H264_NAL_TYPE_SEQ_EXTENSION: + /* NOP */ + break; + default: + printf ("Nal type unknown %d \n ", type); + break; + } + return type; +} + +static uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen) +{ + uint32_t val; + uint32_t offset, startBytes; + + offset = startBytes = 0; + if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) { + pBuf += 4; + offset = 4; + startBytes = 1; + } else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) { + pBuf += 3; + offset = 3; + startBytes = 1; + } + val = 0xffffffff; + while (offset < bufLen - 3) { + val <<= 8; + val |= *pBuf++; + offset++; + if (val == H264_START_CODE) + return offset - 4; + + if ((val & 0x00ffffff) == H264_START_CODE) + return offset - 3; + } + if (bufLen - offset <= 3 && startBytes == 0) { + startBytes = 0; + return 0; + } + + return offset; +} + +static int verify_checksum(uint8_t *buffer, uint32_t buffer_size) +{ + uint32_t buffer_pos = 0; + int done = 0; + h264_decode dec; + + memset(&dec, 0, sizeof(h264_decode)); + do { + uint32_t ret; + + ret = h264_find_next_start_code (buffer + buffer_pos, + buffer_size - buffer_pos); + if (ret == 0) { + done = 1; + if (buffer_pos == 0) { + fprintf (stderr, + "couldn't find start code in buffer from 0\n"); + } + } else { + /* have a complete NAL from buffer_pos to end */ + if (ret > 3) { + uint32_t nal_len; + bufferInfo bufinfo; + + nal_len = remove_03 (buffer + buffer_pos, ret); + bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4); + bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8; + bufinfo.end = buffer + buffer_pos + nal_len; + bufinfo.numOfBitsInBuffer = 8; + bufinfo.decData = *bufinfo.decBuffer; + h264_parse_nal (&dec, &bufinfo); + } + buffer_pos += ret; /* buffer_pos points to next code */ + } + } while (done == 0); + + if ((dec.pic_width == gWidth) && + (dec.pic_height == gHeight) && + (dec.slice_type == gSliceType)) + return 0; + else + return -1; +} + +static void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type) +{ + uint32_t *fb_ptr; + uint8_t *bs_ptr; + uint32_t size; + int r; +/* uint64_t s[3] = {0, 1121279001727, 1059312481445}; */ + + r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr); + CU_ASSERT_EQUAL(r, 0); + fb_ptr = (uint32_t*)fb_buf.ptr; + size = fb_ptr[6]; + r = amdgpu_bo_cpu_unmap(fb_buf.handle); + CU_ASSERT_EQUAL(r, 0); + r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr); + CU_ASSERT_EQUAL(r, 0); + + bs_ptr = (uint8_t*)bs_buf.ptr; + r = verify_checksum(bs_ptr, size); + CU_ASSERT_EQUAL(r, 0); + r = amdgpu_bo_cpu_unmap(bs_buf.handle); + + CU_ASSERT_EQUAL(r, 0); +} + +static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) +{ + struct amdgpu_vcn_bo bs_buf, fb_buf, vbv_buf; + int len, r, i; + unsigned width = 160, height = 128, buf_size; + uint32_t *p_task_size = NULL; + uint32_t task_offset = 0, st_offset; + uint32_t *st_size = NULL; + uint32_t fw_maj = 1, fw_min = 9; + + if (vcn_ip_version_major == 2) { + fw_maj = 1; + fw_min = 1; + } else if (vcn_ip_version_major == 3) { + fw_maj = 1; + fw_min = 0; + } + gSliceType = frame_type; + buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; + + num_resources = 0; + alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); + alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); + alloc_resource(&vbv_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT); + resources[num_resources++] = enc_buf.handle; + resources[num_resources++] = cpb_buf.handle; + resources[num_resources++] = bs_buf.handle; + resources[num_resources++] = fb_buf.handle; + resources[num_resources++] = vbv_buf.handle; + resources[num_resources++] = ib_handle; + + + r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr); + memset(bs_buf.ptr, 0, 4096); + r = amdgpu_bo_cpu_unmap(bs_buf.handle); + + r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr); + memset(fb_buf.ptr, 0, 4096); + r = amdgpu_bo_cpu_unmap(fb_buf.handle); + + r = amdgpu_bo_cpu_map(vbv_buf.handle, (void **)&vbv_buf.ptr); + CU_ASSERT_EQUAL(r, 0); + + for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++) + memcpy(vbv_buf.ptr + i * ALIGN(width, 256), frame + i * width, width); + + r = amdgpu_bo_cpu_unmap(vbv_buf.handle); + CU_ASSERT_EQUAL(r, 0); + + len = 0; + + if (vcn_unified_ring) + amdgpu_cs_sq_head(ib_cpu, &len, true); + + /* session info */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ + ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); + ib_cpu[len++] = enc_buf.addr >> 32; + ib_cpu[len++] = enc_buf.addr; + ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE */; + *st_size = (len - st_offset) * 4; + + /* task info */ + task_offset = len; + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ + p_task_size = &ib_cpu[len++]; + ib_cpu[len++] = enc_task_id++; /* task_id */ + ib_cpu[len++] = 1; /* feedback */ + *st_size = (len - st_offset) * 4; + + if (frame_type == 2) { + /* sps */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(vcn_ip_version_major == 1) + ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */ + else + ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3 */ + ib_cpu[len++] = 0x00000002; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */ + ib_cpu[len++] = 0x00000011; /* sps len */ + ib_cpu[len++] = 0x00000001; /* start code */ + ib_cpu[len++] = 0x6764440b; + ib_cpu[len++] = 0xac54c284; + ib_cpu[len++] = 0x68078442; + ib_cpu[len++] = 0x37000000; + *st_size = (len - st_offset) * 4; + + /* pps */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(vcn_ip_version_major == 1) + ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/ + else + ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3*/ + ib_cpu[len++] = 0x00000003; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */ + ib_cpu[len++] = 0x00000008; /* pps len */ + ib_cpu[len++] = 0x00000001; /* start code */ + ib_cpu[len++] = 0x68ce3c80; + *st_size = (len - st_offset) * 4; + } + + /* slice header */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(vcn_ip_version_major == 1) + ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */ + else + ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 2,3 */ + if (frame_type == 2) { + ib_cpu[len++] = 0x65000000; + ib_cpu[len++] = 0x11040000; + } else { + ib_cpu[len++] = 0x41000000; + ib_cpu[len++] = 0x34210000; + } + ib_cpu[len++] = 0xe0000000; + for(i = 0; i < 13; i++) + ib_cpu[len++] = 0x00000000; + + ib_cpu[len++] = 0x00000001; + ib_cpu[len++] = 0x00000008; + ib_cpu[len++] = 0x00020000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000001; + ib_cpu[len++] = 0x00000015; + ib_cpu[len++] = 0x00020001; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000001; + ib_cpu[len++] = 0x00000003; + for(i = 0; i < 22; i++) + ib_cpu[len++] = 0x00000000; + + *st_size = (len - st_offset) * 4; + + /* encode params */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(vcn_ip_version_major == 1) + ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1*/ + else + ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 2,3*/ + ib_cpu[len++] = frame_type; + ib_cpu[len++] = 0x0001f000; + ib_cpu[len++] = vbv_buf.addr >> 32; + ib_cpu[len++] = vbv_buf.addr; + ib_cpu[len++] = (vbv_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32; + ib_cpu[len++] = vbv_buf.addr + ALIGN(width, 256) * ALIGN(height, 32); + ib_cpu[len++] = 0x00000100; + ib_cpu[len++] = 0x00000080; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0xffffffff; + ib_cpu[len++] = 0x00000000; + *st_size = (len - st_offset) * 4; + + /* encode params h264 */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00200003; /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */ + if (vcn_ip_version_major != 3) { + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0xffffffff; + } else { + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0xffffffff; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0xffffffff; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + } + *st_size = (len - st_offset) * 4; + + /* encode context */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(vcn_ip_version_major == 1) + ib_cpu[len++] = 0x0000000d; /* ENCODE_CONTEXT_BUFFER vcn 1 */ + else + ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER vcn 2,3 */ + ib_cpu[len++] = cpb_buf.addr >> 32; + ib_cpu[len++] = cpb_buf.addr; + ib_cpu[len++] = 0x00000000; /* swizzle mode */ + ib_cpu[len++] = 0x00000100; /* luma pitch */ + ib_cpu[len++] = 0x00000100; /* chroma pitch */ + ib_cpu[len++] = 0x00000003; /* no reconstructed picture */ + ib_cpu[len++] = 0x00000000; /* reconstructed pic 1 luma offset */ + ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32); /* pic1 chroma offset */ + ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; /* pic2 luma offset */ + ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2; /* pic2 chroma offset */ + + for (int i = 0; i < 136; i++) + ib_cpu[len++] = 0x00000000; + *st_size = (len - st_offset) * 4; + + /* bitstream buffer */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(vcn_ip_version_major == 1) + ib_cpu[len++] = 0x0000000e; /* VIDEO_BITSTREAM_BUFFER vcn 1 */ + else + ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER vcn 2,3 */ + ib_cpu[len++] = 0x00000000; /* mode */ + ib_cpu[len++] = bs_buf.addr >> 32; + ib_cpu[len++] = bs_buf.addr; + ib_cpu[len++] = 0x0001f000; + ib_cpu[len++] = 0x00000000; + *st_size = (len - st_offset) * 4; + + /* feedback */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(vcn_ip_version_major == 1) + ib_cpu[len++] = 0x00000010; /* FEEDBACK_BUFFER vcn 1 */ + else + ib_cpu[len++] = 0x00000015; /* FEEDBACK_BUFFER vcn 2,3 */ + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = fb_buf.addr >> 32; + ib_cpu[len++] = fb_buf.addr; + ib_cpu[len++] = 0x00000010; + ib_cpu[len++] = 0x00000028; + *st_size = (len - st_offset) * 4; + + /* intra refresh */ + st_offset = len; + st_size = &ib_cpu[len++]; + if(vcn_ip_version_major == 1) + ib_cpu[len++] = 0x0000000c; /* INTRA_REFRESH vcn 1 */ + else + ib_cpu[len++] = 0x00000010; /* INTRA_REFRESH vcn 2,3 */ + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + *st_size = (len - st_offset) * 4; + + if(vcn_ip_version_major != 1) { + /* Input Format */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x0000000c; + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_PACKING_FORMAT_NV12 */ + *st_size = (len - st_offset) * 4; + + /* Output Format */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x0000000d; + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ + *st_size = (len - st_offset) * 4; + } + /* op_speed */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x01000006; /* SPEED_ENCODING_MODE */ + *st_size = (len - st_offset) * 4; + + /* op_enc */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x01000003; + *st_size = (len - st_offset) * 4; + + *p_task_size = (len - task_offset) * 4; + + if (vcn_unified_ring) + amdgpu_cs_sq_ib_tail(ib_cpu + len); + + r = submit(len, AMDGPU_HW_IP_VCN_ENC); + CU_ASSERT_EQUAL(r, 0); + + /* check result */ + check_result(fb_buf, bs_buf, frame_type); + + free_resource(&fb_buf); + free_resource(&bs_buf); + free_resource(&vbv_buf); } static void amdgpu_cs_vcn_enc_encode(void) { - /* TODO */ + amdgpu_cs_vcn_enc_encode_frame(2); /* IDR frame */ } static void amdgpu_cs_vcn_enc_destroy(void) { - /* TODO */ + int len = 0, r; + uint32_t *p_task_size = NULL; + uint32_t task_offset = 0, st_offset; + uint32_t *st_size = NULL; + uint32_t fw_maj = 1, fw_min = 9; + + if (vcn_ip_version_major == 2) { + fw_maj = 1; + fw_min = 1; + } else if (vcn_ip_version_major == 3) { + fw_maj = 1; + fw_min = 0; + } + + num_resources = 0; +/* alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */ + resources[num_resources++] = enc_buf.handle; + resources[num_resources++] = ib_handle; + + if (vcn_unified_ring) + amdgpu_cs_sq_head(ib_cpu, &len, true); + + /* session info */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ + ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); + ib_cpu[len++] = enc_buf.addr >> 32; + ib_cpu[len++] = enc_buf.addr; + ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ + *st_size = (len - st_offset) * 4; + + /* task info */ + task_offset = len; + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ + p_task_size = &ib_cpu[len++]; + ib_cpu[len++] = enc_task_id++; /* task_id */ + ib_cpu[len++] = 0; /* feedback */ + *st_size = (len - st_offset) * 4; + + /* op close */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x01000002; /* RENCODE_IB_OP_CLOSE_SESSION */ + *st_size = (len - st_offset) * 4; + + *p_task_size = (len - task_offset) * 4; + + if (vcn_unified_ring) + amdgpu_cs_sq_ib_tail(ib_cpu + len); + + r = submit(len, AMDGPU_HW_IP_VCN_ENC); + CU_ASSERT_EQUAL(r, 0); + + free_resource(&cpb_buf); + free_resource(&enc_buf); } diff --git a/lib/libdrm/tests/drmdevice.c b/lib/libdrm/tests/drmdevice.c index f1c1cd3a0..b4b62d9c5 100644 --- a/lib/libdrm/tests/drmdevice.c +++ b/lib/libdrm/tests/drmdevice.c @@ -142,7 +142,7 @@ main(void) for (int j = 0; j < DRM_NODE_MAX; j++) { if (devices[i]->available_nodes & 1 << j) { printf("--- Opening device node %s ---\n", devices[i]->nodes[j]); - fd = open(devices[i]->nodes[j], O_RDONLY | O_CLOEXEC, 0); + fd = open(devices[i]->nodes[j], O_RDONLY | O_CLOEXEC); if (fd < 0) { printf("Failed - %s (%d)\n", strerror(errno), errno); continue; diff --git a/lib/libdrm/tests/modeprint/modeprint.c b/lib/libdrm/tests/modeprint/modeprint.c index 9372ad92d..0f644c748 100644 --- a/lib/libdrm/tests/modeprint/modeprint.c +++ b/lib/libdrm/tests/modeprint/modeprint.c @@ -135,7 +135,7 @@ static int printConnector(int fd, drmModeResPtr res, drmModeConnectorPtr connect drmModePropertyPtr props; const char *connector_type_name = NULL; - connector_type_name = util_lookup_connector_type_name(connector->connector_type); + connector_type_name = drmModeGetConnectorTypeName(connector->connector_type); if (connector_type_name) printf("Connector: %s-%d\n", connector_type_name, diff --git a/lib/libdrm/tests/modetest/buffers.c b/lib/libdrm/tests/modetest/buffers.c index 8a8d9e014..0b55aeddf 100644 --- a/lib/libdrm/tests/modetest/buffers.c +++ b/lib/libdrm/tests/modetest/buffers.c @@ -37,6 +37,7 @@ #include "libdrm_macros.h" #include "xf86drm.h" +#include "xf86drmMode.h" #include "buffers.h" @@ -44,10 +45,9 @@ struct bo { int fd; void *ptr; - size_t size; - size_t offset; - size_t pitch; - unsigned handle; + uint64_t size; + uint32_t pitch; + uint32_t handle; }; /* ----------------------------------------------------------------------------- @@ -57,7 +57,6 @@ struct bo static struct bo * bo_create_dumb(int fd, unsigned int width, unsigned int height, unsigned int bpp) { - struct drm_mode_create_dumb arg; struct bo *bo; int ret; @@ -67,12 +66,8 @@ bo_create_dumb(int fd, unsigned int width, unsigned int height, unsigned int bpp return NULL; } - memset(&arg, 0, sizeof(arg)); - arg.bpp = bpp; - arg.width = width; - arg.height = height; - - ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &arg); + ret = drmModeCreateDumbBuffer(fd, width, height, bpp, 0, &bo->handle, + &bo->pitch, &bo->size); if (ret) { fprintf(stderr, "failed to create dumb buffer: %s\n", strerror(errno)); @@ -81,28 +76,22 @@ bo_create_dumb(int fd, unsigned int width, unsigned int height, unsigned int bpp } bo->fd = fd; - bo->handle = arg.handle; - bo->size = arg.size; - bo->pitch = arg.pitch; return bo; } static int bo_map(struct bo *bo, void **out) { - struct drm_mode_map_dumb arg; void *map; int ret; + uint64_t offset; - memset(&arg, 0, sizeof(arg)); - arg.handle = bo->handle; - - ret = drmIoctl(bo->fd, DRM_IOCTL_MODE_MAP_DUMB, &arg); + ret = drmModeMapDumbBuffer(bo->fd, bo->handle, &offset); if (ret) return ret; map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - bo->fd, arg.offset); + bo->fd, offset); if (map == MAP_FAILED) return -EINVAL; @@ -340,13 +329,9 @@ bo_create(int fd, unsigned int format, void bo_destroy(struct bo *bo) { - struct drm_mode_destroy_dumb arg; int ret; - memset(&arg, 0, sizeof(arg)); - arg.handle = bo->handle; - - ret = drmIoctl(bo->fd, DRM_IOCTL_MODE_DESTROY_DUMB, &arg); + ret = drmModeDestroyDumbBuffer(bo->fd, bo->handle); if (ret) fprintf(stderr, "failed to destroy dumb buffer: %s\n", strerror(errno)); diff --git a/lib/libdrm/tests/modetest/modetest.c b/lib/libdrm/tests/modetest/modetest.c index d6ab9dc8f..42e2d1f47 100644 --- a/lib/libdrm/tests/modetest/modetest.c +++ b/lib/libdrm/tests/modetest/modetest.c @@ -187,11 +187,9 @@ static bit_name_fn(mode_flag) static void dump_fourcc(uint32_t fourcc) { - printf(" %c%c%c%c", - fourcc, - fourcc >> 8, - fourcc >> 16, - fourcc >> 24); + char *name = drmGetFormatName(fourcc); + printf(" %s", name); + free(name); } static void dump_encoders(struct device *dev) @@ -656,7 +654,7 @@ static struct resources *get_resources(struct device *dev) int num; num = asprintf(&connector->name, "%s-%u", - util_lookup_connector_type_name(conn->connector_type), + drmModeGetConnectorTypeName(conn->connector_type), conn->connector_type_id); if (num < 0) goto error; diff --git a/lib/libdrm/tests/proptest/proptest.c b/lib/libdrm/tests/proptest/proptest.c index 88bed10b2..97cb01fa5 100644 --- a/lib/libdrm/tests/proptest/proptest.c +++ b/lib/libdrm/tests/proptest/proptest.c @@ -192,7 +192,7 @@ static void listConnectorProperties(void) } printf("Connector %u (%s-%u)\n", c->connector_id, - util_lookup_connector_type_name(c->connector_type), + drmModeGetConnectorTypeName(c->connector_type), c->connector_type_id); listObjectProperties(c->connector_id, diff --git a/lib/libdrm/tests/util/kms.c b/lib/libdrm/tests/util/kms.c index 39a93866a..95b7a455d 100644 --- a/lib/libdrm/tests/util/kms.c +++ b/lib/libdrm/tests/util/kms.c @@ -96,33 +96,6 @@ const char *util_lookup_connector_status_name(unsigned int status) ARRAY_SIZE(connector_status_names)); } -static const struct type_name connector_type_names[] = { - { DRM_MODE_CONNECTOR_Unknown, "unknown" }, - { DRM_MODE_CONNECTOR_VGA, "VGA" }, - { DRM_MODE_CONNECTOR_DVII, "DVI-I" }, - { DRM_MODE_CONNECTOR_DVID, "DVI-D" }, - { DRM_MODE_CONNECTOR_DVIA, "DVI-A" }, - { DRM_MODE_CONNECTOR_Composite, "composite" }, - { DRM_MODE_CONNECTOR_SVIDEO, "s-video" }, - { DRM_MODE_CONNECTOR_LVDS, "LVDS" }, - { DRM_MODE_CONNECTOR_Component, "component" }, - { DRM_MODE_CONNECTOR_9PinDIN, "9-pin DIN" }, - { DRM_MODE_CONNECTOR_DisplayPort, "DP" }, - { DRM_MODE_CONNECTOR_HDMIA, "HDMI-A" }, - { DRM_MODE_CONNECTOR_HDMIB, "HDMI-B" }, - { DRM_MODE_CONNECTOR_TV, "TV" }, - { DRM_MODE_CONNECTOR_eDP, "eDP" }, - { DRM_MODE_CONNECTOR_VIRTUAL, "Virtual" }, - { DRM_MODE_CONNECTOR_DSI, "DSI" }, - { DRM_MODE_CONNECTOR_DPI, "DPI" }, -}; - -const char *util_lookup_connector_type_name(unsigned int type) -{ - return util_lookup_type_name(type, connector_type_names, - ARRAY_SIZE(connector_type_names)); -} - static const char * const modules[] = { "i915", "amdgpu", @@ -150,6 +123,8 @@ static const char * const modules[] = { "komeda", "imx-dcss", "mxsfb-drm", + "simpledrm", + "imx-lcdif", }; int util_open(const char *device, const char *module) diff --git a/lib/libdrm/tests/util/kms.h b/lib/libdrm/tests/util/kms.h index dde2ed2c5..f79f4617f 100644 --- a/lib/libdrm/tests/util/kms.h +++ b/lib/libdrm/tests/util/kms.h @@ -28,7 +28,6 @@ const char *util_lookup_encoder_type_name(unsigned int type); const char *util_lookup_connector_status_name(unsigned int type); -const char *util_lookup_connector_type_name(unsigned int type); int util_open(const char *device, const char *module); |