summaryrefslogtreecommitdiff
path: root/lib/mesa/.gitlab-ci/lava
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-02-24 01:57:18 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-02-24 01:57:18 +0000
commitb24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch)
tree658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/.gitlab-ci/lava
parent57768bbb154c2879d34ec20e401b19472e77aaf7 (diff)
Import Mesa 21.3.7
Diffstat (limited to 'lib/mesa/.gitlab-ci/lava')
-rwxr-xr-xlib/mesa/.gitlab-ci/lava/lava-gitlab-ci.yml120
-rwxr-xr-xlib/mesa/.gitlab-ci/lava/lava-submit.sh47
-rwxr-xr-xlib/mesa/.gitlab-ci/lava/lava_job_submitter.py351
3 files changed, 518 insertions, 0 deletions
diff --git a/lib/mesa/.gitlab-ci/lava/lava-gitlab-ci.yml b/lib/mesa/.gitlab-ci/lava/lava-gitlab-ci.yml
new file mode 100755
index 000000000..7bd368468
--- /dev/null
+++ b/lib/mesa/.gitlab-ci/lava/lava-gitlab-ci.yml
@@ -0,0 +1,120 @@
+.lava-test:
+ extends:
+ - .ci-run-policy
+ # Cancel job if a newer commit is pushed to the same branch
+ interruptible: true
+ variables:
+ GIT_STRATEGY: none # testing doesn't build anything from source
+ FDO_CI_CONCURRENT: 6 # should be replaced by per-machine definitions
+ DEQP_VER: gles2
+ # proxy used to cache data locally
+ FDO_HTTP_CACHE_URI: "http://caching-proxy/cache/?uri="
+ # base system generated by the container build job, shared between many pipelines
+ BASE_SYSTEM_HOST_PREFIX: "${MINIO_HOST}/mesa-lava"
+ BASE_SYSTEM_MAINLINE_HOST_PATH: "${BASE_SYSTEM_HOST_PREFIX}/${FDO_UPSTREAM_REPO}/${DISTRIBUTION_TAG}/${ARCH}"
+ BASE_SYSTEM_FORK_HOST_PATH: "${BASE_SYSTEM_HOST_PREFIX}/${CI_PROJECT_PATH}/${DISTRIBUTION_TAG}/${ARCH}"
+ # per-job build artifacts
+ MESA_BUILD_PATH: "${PIPELINE_ARTIFACTS_BASE}/mesa-${ARCH}.tar.gz"
+ JOB_ROOTFS_OVERLAY_PATH: "${JOB_ARTIFACTS_BASE}/job-rootfs-overlay.tar.gz"
+ JOB_RESULTS_PATH: "${JOB_ARTIFACTS_BASE}/results.tar.gz"
+ PIGLIT_NO_WINDOW: 1
+ VISIBILITY_GROUP: "Collabora+fdo"
+ script:
+ - ./artifacts/lava/lava-submit.sh
+ artifacts:
+ name: "mesa_${CI_JOB_NAME}"
+ when: always
+ paths:
+ - results/
+ exclude:
+ - results/*.shader_cache
+ after_script:
+ - wget -q "https://${JOB_RESULTS_PATH}" -O- | tar -xz
+
+.lava-test:armhf:
+ variables:
+ ARCH: armhf
+ KERNEL_IMAGE_NAME: zImage
+ KERNEL_IMAGE_TYPE: "zimage"
+ BOOT_METHOD: u-boot
+ HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
+ extends:
+ - .use-debian/arm_build # for same $MESA_ARTIFACTS_TAG as in kernel+rootfs_armhf
+ - .use-debian/x86_build
+ - .lava-test
+ - .use-kernel+rootfs-arm
+ needs:
+ - kernel+rootfs_armhf
+ - debian/x86_build
+ - debian-armhf
+
+.lava-test:arm64:
+ variables:
+ ARCH: arm64
+ KERNEL_IMAGE_NAME: Image
+ KERNEL_IMAGE_TYPE: "image"
+ BOOT_METHOD: u-boot
+ HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
+ extends:
+ - .use-debian/arm_build # for same $MESA_ARTIFACTS_TAG as in kernel+rootfs_arm64
+ - .use-debian/x86_build
+ - .lava-test
+ - .use-kernel+rootfs-arm
+ dependencies:
+ - debian-arm64
+ needs:
+ - kernel+rootfs_arm64
+ - debian/x86_build
+ - debian-arm64
+
+.lava-test:amd64:
+ variables:
+ ARCH: amd64
+ KERNEL_IMAGE_NAME: bzImage
+ KERNEL_IMAGE_TYPE: "zimage"
+ BOOT_METHOD: u-boot
+ HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
+ extends:
+ - .use-debian/x86_build-base # for same $MESA_ARTIFACTS_BASE_TAG as in kernel+rootfs_amd64
+ - .use-debian/x86_build
+ - .lava-test
+ - .use-kernel+rootfs-amd64
+ needs:
+ - kernel+rootfs_amd64
+ - debian-testing
+
+.lava-traces-base:
+ variables:
+ HWCI_TEST_SCRIPT: "/install/piglit/run.sh"
+ artifacts:
+ reports:
+ junit: results/junit.xml
+
+.lava-piglit:
+ variables:
+ PIGLIT_REPLAY_DEVICE_NAME: "gl-${GPU_VERSION}"
+ PIGLIT_RESULTS: "${GPU_VERSION}-${PIGLIT_PROFILES}"
+ HWCI_TEST_SCRIPT: "/install/piglit/piglit-runner.sh"
+
+.lava-piglit-traces:amd64:
+ extends:
+ - .lava-test:amd64
+ - .lava-piglit
+ - .lava-traces-base
+
+.lava-piglit-traces:armhf:
+ extends:
+ - .lava-test:armhf
+ - .lava-piglit
+ - .lava-traces-base
+
+.lava-piglit-traces:arm64:
+ extends:
+ - .lava-test:arm64
+ - .lava-piglit
+ - .lava-traces-base
+
+.lava-piglit:amd64:
+ extends:
+ - .lava-test:amd64
+ - .lava-piglit
diff --git a/lib/mesa/.gitlab-ci/lava/lava-submit.sh b/lib/mesa/.gitlab-ci/lava/lava-submit.sh
new file mode 100755
index 000000000..1d3a24531
--- /dev/null
+++ b/lib/mesa/.gitlab-ci/lava/lava-submit.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+set -e
+set -x
+
+# Try to use the kernel and rootfs built in mainline first, so we're more
+# likely to hit cache
+if wget -q --method=HEAD "https://${BASE_SYSTEM_MAINLINE_HOST_PATH}/done"; then
+ BASE_SYSTEM_HOST_PATH="${BASE_SYSTEM_MAINLINE_HOST_PATH}"
+else
+ BASE_SYSTEM_HOST_PATH="${BASE_SYSTEM_FORK_HOST_PATH}"
+fi
+
+rm -rf results
+mkdir -p results/job-rootfs-overlay/
+
+# LAVA always uploads to MinIO when necessary as we don't have direct upload
+# from the DUT
+export PIGLIT_REPLAY_UPLOAD_TO_MINIO=1
+cp artifacts/ci-common/capture-devcoredump.sh results/job-rootfs-overlay/
+cp artifacts/ci-common/init-*.sh results/job-rootfs-overlay/
+artifacts/ci-common/generate-env.sh > results/job-rootfs-overlay/set-job-env-vars.sh
+
+tar zcf job-rootfs-overlay.tar.gz -C results/job-rootfs-overlay/ .
+ci-fairy minio login "${CI_JOB_JWT}"
+ci-fairy minio cp job-rootfs-overlay.tar.gz "minio://${JOB_ROOTFS_OVERLAY_PATH}"
+
+touch results/lava.log
+tail -f results/lava.log &
+artifacts/lava/lava_job_submitter.py \
+ --dump-yaml \
+ --pipeline-info "$CI_JOB_NAME: $CI_PIPELINE_URL on $CI_COMMIT_REF_NAME ${CI_NODE_INDEX}/${CI_NODE_TOTAL}" \
+ --base-system-url-prefix "https://${BASE_SYSTEM_HOST_PATH}" \
+ --mesa-build-url "${FDO_HTTP_CACHE_URI:-}https://${MESA_BUILD_PATH}" \
+ --job-rootfs-overlay-url "${FDO_HTTP_CACHE_URI:-}https://${JOB_ROOTFS_OVERLAY_PATH}" \
+ --job-artifacts-base ${JOB_ARTIFACTS_BASE} \
+ --job-timeout ${JOB_TIMEOUT:-30} \
+ --first-stage-init artifacts/ci-common/init-stage1.sh \
+ --ci-project-dir ${CI_PROJECT_DIR} \
+ --device-type ${DEVICE_TYPE} \
+ --dtb ${DTB} \
+ --jwt "${CI_JOB_JWT}" \
+ --kernel-image-name ${KERNEL_IMAGE_NAME} \
+ --kernel-image-type "${KERNEL_IMAGE_TYPE}" \
+ --boot-method ${BOOT_METHOD} \
+ --visibility-group ${VISIBILITY_GROUP} \
+ --lava-tags "${LAVA_TAGS}" >> results/lava.log
diff --git a/lib/mesa/.gitlab-ci/lava/lava_job_submitter.py b/lib/mesa/.gitlab-ci/lava/lava_job_submitter.py
new file mode 100755
index 000000000..bf2032c4f
--- /dev/null
+++ b/lib/mesa/.gitlab-ci/lava/lava_job_submitter.py
@@ -0,0 +1,351 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2020, 2021 Collabora Limited
+# Author: Gustavo Padovan <gustavo.padovan@collabora.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Send a job to LAVA, track it and collect log back"""
+
+import argparse
+import lavacli
+import os
+import sys
+import time
+import traceback
+import urllib.parse
+import xmlrpc
+import yaml
+
+from datetime import datetime, timedelta
+from lavacli.utils import loader
+
+# Timeout in minutes to decide if the device from the dispatched LAVA job has
+# hung or not due to the lack of new log output.
+DEVICE_HANGING_TIMEOUT_MIN = 5
+
+# How many seconds the script should wait before try a new polling iteration to
+# check if the dispatched LAVA job is running or waiting in the job queue.
+WAIT_FOR_DEVICE_POLLING_TIME_SEC = 10
+
+# How many seconds to wait between log output LAVA RPC calls.
+LOG_POLLING_TIME_SEC = 5
+
+# How many retries should be made when a timeout happen.
+NUMBER_OF_RETRIES_TIMEOUT_DETECTION = 2
+
+
+def print_log(msg):
+ print("{}: {}".format(datetime.now(), msg))
+
+def fatal_err(msg):
+ print_log(msg)
+ sys.exit(1)
+
+def generate_lava_yaml(args):
+ # General metadata and permissions, plus also inexplicably kernel arguments
+ values = {
+ 'job_name': 'mesa: {}'.format(args.pipeline_info),
+ 'device_type': args.device_type,
+ 'visibility': { 'group': [ args.visibility_group ] },
+ 'priority': 75,
+ 'context': {
+ 'extra_nfsroot_args': ' init=/init rootwait minio_results={}'.format(args.job_artifacts_base)
+ },
+ 'timeouts': {
+ 'job': {
+ 'minutes': args.job_timeout
+ }
+ },
+ }
+
+ if args.lava_tags:
+ values['tags'] = args.lava_tags.split(',')
+
+ # URLs to our kernel rootfs to boot from, both generated by the base
+ # container build
+ deploy = {
+ 'timeout': { 'minutes': 10 },
+ 'to': 'tftp',
+ 'os': 'oe',
+ 'kernel': {
+ 'url': '{}/{}'.format(args.base_system_url_prefix, args.kernel_image_name),
+ },
+ 'nfsrootfs': {
+ 'url': '{}/lava-rootfs.tgz'.format(args.base_system_url_prefix),
+ 'compression': 'gz',
+ }
+ }
+ if args.kernel_image_type:
+ deploy['kernel']['type'] = args.kernel_image_type
+ if args.dtb:
+ deploy['dtb'] = {
+ 'url': '{}/{}.dtb'.format(args.base_system_url_prefix, args.dtb)
+ }
+
+ # always boot over NFS
+ boot = {
+ 'timeout': { 'minutes': 25 },
+ 'method': args.boot_method,
+ 'commands': 'nfs',
+ 'prompts': ['lava-shell:'],
+ }
+
+ # skeleton test definition: only declaring each job as a single 'test'
+ # since LAVA's test parsing is not useful to us
+ test = {
+ 'timeout': { 'minutes': args.job_timeout },
+ 'failure_retry': 1,
+ 'definitions': [ {
+ 'name': 'mesa',
+ 'from': 'inline',
+ 'path': 'inline/mesa.yaml',
+ 'repository': {
+ 'metadata': {
+ 'name': 'mesa',
+ 'description': 'Mesa test plan',
+ 'os': [ 'oe' ],
+ 'scope': [ 'functional' ],
+ 'format': 'Lava-Test Test Definition 1.0',
+ },
+ 'parse': {
+ 'pattern': r'hwci: (?P<test_case_id>\S*):\s+(?P<result>(pass|fail))'
+ },
+ 'run': {
+ },
+ },
+ } ],
+ }
+
+ # job execution script:
+ # - inline .gitlab-ci/common/init-stage1.sh
+ # - fetch and unpack per-pipeline build artifacts from build job
+ # - fetch and unpack per-job environment from lava-submit.sh
+ # - exec .gitlab-ci/common/init-stage2.sh
+ init_lines = []
+ with open(args.first_stage_init, 'r') as init_sh:
+ init_lines += [ x.rstrip() for x in init_sh if not x.startswith('#') and x.rstrip() ]
+ init_lines += [
+ 'mkdir -p {}'.format(args.ci_project_dir),
+ 'wget -S --progress=dot:giga -O- {} | tar -xz -C {}'.format(args.mesa_build_url, args.ci_project_dir),
+ 'wget -S --progress=dot:giga -O- {} | tar -xz -C /'.format(args.job_rootfs_overlay_url),
+ 'set +x',
+ 'export CI_JOB_JWT="{}"'.format(args.jwt),
+ 'set -x',
+ 'exec /init-stage2.sh',
+ ]
+ test['definitions'][0]['repository']['run']['steps'] = init_lines
+
+ values['actions'] = [
+ { 'deploy': deploy },
+ { 'boot': boot },
+ { 'test': test },
+ ]
+
+ return yaml.dump(values, width=10000000)
+
+
+def setup_lava_proxy():
+ config = lavacli.load_config("default")
+ uri, usr, tok = (config.get(key) for key in ("uri", "username", "token"))
+ uri_obj = urllib.parse.urlparse(uri)
+ uri_str = "{}://{}:{}@{}{}".format(uri_obj.scheme, usr, tok, uri_obj.netloc, uri_obj.path)
+ transport = lavacli.RequestsTransport(
+ uri_obj.scheme,
+ config.get("proxy"),
+ config.get("timeout", 120.0),
+ config.get("verify_ssl_cert", True),
+ )
+ proxy = xmlrpc.client.ServerProxy(
+ uri_str, allow_none=True, transport=transport)
+
+ print_log("Proxy for {} created.".format(config['uri']))
+
+ return proxy
+
+
+def _call_proxy(fn, *args):
+ retries = 60
+ for n in range(1, retries + 1):
+ try:
+ return fn(*args)
+ except xmlrpc.client.ProtocolError as err:
+ if n == retries:
+ traceback.print_exc()
+ fatal_err("A protocol error occurred (Err {} {})".format(err.errcode, err.errmsg))
+ else:
+ time.sleep(15)
+ pass
+ except xmlrpc.client.Fault as err:
+ traceback.print_exc()
+ fatal_err("FATAL: Fault: {} (code: {})".format(err.faultString, err.faultCode))
+
+
+def get_job_results(proxy, job_id, test_suite, test_case):
+ # Look for infrastructure errors and retry if we see them.
+ results_yaml = _call_proxy(proxy.results.get_testjob_results_yaml, job_id)
+ results = yaml.load(results_yaml, Loader=loader(False))
+ for res in results:
+ metadata = res['metadata']
+ if not 'result' in metadata or metadata['result'] != 'fail':
+ continue
+ if 'error_type' in metadata and metadata['error_type'] == "Infrastructure":
+ print_log("LAVA job {} failed with Infrastructure Error. Retry.".format(job_id))
+ return False
+ if 'case' in metadata and metadata['case'] == "validate":
+ print_log("LAVA job {} failed validation (possible download error). Retry.".format(job_id))
+ return False
+
+ results_yaml = _call_proxy(proxy.results.get_testcase_results_yaml, job_id, test_suite, test_case)
+ results = yaml.load(results_yaml, Loader=loader(False))
+ if not results:
+ fatal_err("LAVA: no result for test_suite '{}', test_case '{}'".format(test_suite, test_case))
+
+ print_log("LAVA: result for test_suite '{}', test_case '{}': {}".format(test_suite, test_case, results[0]['result']))
+ if results[0]['result'] != 'pass':
+ fatal_err("FAIL")
+
+ return True
+
+def wait_until_job_is_started(proxy, job_id):
+ print_log(f"Waiting for job {job_id} to start.")
+ current_state = "Submitted"
+ waiting_states = ["Submitted", "Scheduling", "Scheduled"]
+ while current_state in waiting_states:
+ job_state = _call_proxy(proxy.scheduler.job_state, job_id)
+ current_state = job_state["job_state"]
+
+ time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC)
+ print_log(f"Job {job_id} started.")
+
+def follow_job_execution(proxy, job_id):
+ line_count = 0
+ finished = False
+ last_time_logs = datetime.now()
+ while not finished:
+ (finished, data) = _call_proxy(proxy.scheduler.jobs.logs, job_id, line_count)
+ logs = yaml.load(str(data), Loader=loader(False))
+ if logs:
+ # Reset the timeout
+ last_time_logs = datetime.now()
+ for line in logs:
+ print("{} {}".format(line["dt"], line["msg"]))
+
+ line_count += len(logs)
+
+ else:
+ time_limit = timedelta(minutes=DEVICE_HANGING_TIMEOUT_MIN)
+ if datetime.now() - last_time_logs > time_limit:
+ print_log("LAVA job {} doesn't advance (machine got hung?). Retry.".format(job_id))
+ return False
+
+ # `proxy.scheduler.jobs.logs` does not block, even when there is no
+ # new log to be fetched. To avoid dosing the LAVA dispatcher
+ # machine, let's add a sleep to save them some stamina.
+ time.sleep(LOG_POLLING_TIME_SEC)
+
+ return True
+
+def show_job_data(proxy, job_id):
+ show = _call_proxy(proxy.scheduler.jobs.show, job_id)
+ for field, value in show.items():
+ print("{}\t: {}".format(field, value))
+
+
+def validate_job(proxy, job_file):
+ try:
+ return _call_proxy(proxy.scheduler.jobs.validate, job_file, True)
+ except:
+ return False
+
+def submit_job(proxy, job_file):
+ return _call_proxy(proxy.scheduler.jobs.submit, job_file)
+
+
+def main(args):
+ proxy = setup_lava_proxy()
+
+ yaml_file = generate_lava_yaml(args)
+
+ if args.dump_yaml:
+ censored_args = args
+ censored_args.jwt = "jwt-hidden"
+ print(generate_lava_yaml(censored_args))
+
+ if args.validate_only:
+ ret = validate_job(proxy, yaml_file)
+ if not ret:
+ fatal_err("Error in LAVA job definition")
+ print("LAVA job definition validated successfully")
+ return
+
+ retry_count = NUMBER_OF_RETRIES_TIMEOUT_DETECTION
+
+ while retry_count >= 0:
+ job_id = submit_job(proxy, yaml_file)
+
+ print_log("LAVA job id: {}".format(job_id))
+
+ wait_until_job_is_started(proxy, job_id)
+
+ if not follow_job_execution(proxy, job_id):
+ print_log(f"Job {job_id} has timed out. Cancelling it.")
+ # Cancel the job as it is considered unreachable by Mesa CI.
+ proxy.scheduler.jobs.cancel(job_id)
+
+ retry_count -= 1
+ continue
+
+ show_job_data(proxy, job_id)
+
+ if get_job_results(proxy, job_id, "0_mesa", "mesa") == True:
+ break
+
+
+if __name__ == '__main__':
+ # given that we proxy from DUT -> LAVA dispatcher -> LAVA primary -> us ->
+ # GitLab runner -> GitLab primary -> user, safe to say we don't need any
+ # more buffering
+ sys.stdout.reconfigure(line_buffering=True)
+ sys.stderr.reconfigure(line_buffering=True)
+ parser = argparse.ArgumentParser("LAVA job submitter")
+
+ parser.add_argument("--pipeline-info")
+ parser.add_argument("--base-system-url-prefix")
+ parser.add_argument("--mesa-build-url")
+ parser.add_argument("--job-rootfs-overlay-url")
+ parser.add_argument("--job-artifacts-base")
+ parser.add_argument("--job-timeout", type=int)
+ parser.add_argument("--first-stage-init")
+ parser.add_argument("--ci-project-dir")
+ parser.add_argument("--device-type")
+ parser.add_argument("--dtb", nargs='?', default="")
+ parser.add_argument("--kernel-image-name")
+ parser.add_argument("--kernel-image-type", nargs='?', default="")
+ parser.add_argument("--boot-method")
+ parser.add_argument("--lava-tags", nargs='?', default="")
+ parser.add_argument("--jwt")
+ parser.add_argument("--validate-only", action='store_true')
+ parser.add_argument("--dump-yaml", action='store_true')
+ parser.add_argument("--visibility-group")
+
+ parser.set_defaults(func=main)
+ args = parser.parse_args()
+ args.func(args)