diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
commit | b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch) | |
tree | 658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/.gitlab-ci/lava | |
parent | 57768bbb154c2879d34ec20e401b19472e77aaf7 (diff) |
Import Mesa 21.3.7
Diffstat (limited to 'lib/mesa/.gitlab-ci/lava')
-rwxr-xr-x | lib/mesa/.gitlab-ci/lava/lava-gitlab-ci.yml | 120 | ||||
-rwxr-xr-x | lib/mesa/.gitlab-ci/lava/lava-submit.sh | 47 | ||||
-rwxr-xr-x | lib/mesa/.gitlab-ci/lava/lava_job_submitter.py | 351 |
3 files changed, 518 insertions, 0 deletions
diff --git a/lib/mesa/.gitlab-ci/lava/lava-gitlab-ci.yml b/lib/mesa/.gitlab-ci/lava/lava-gitlab-ci.yml new file mode 100755 index 000000000..7bd368468 --- /dev/null +++ b/lib/mesa/.gitlab-ci/lava/lava-gitlab-ci.yml @@ -0,0 +1,120 @@ +.lava-test: + extends: + - .ci-run-policy + # Cancel job if a newer commit is pushed to the same branch + interruptible: true + variables: + GIT_STRATEGY: none # testing doesn't build anything from source + FDO_CI_CONCURRENT: 6 # should be replaced by per-machine definitions + DEQP_VER: gles2 + # proxy used to cache data locally + FDO_HTTP_CACHE_URI: "http://caching-proxy/cache/?uri=" + # base system generated by the container build job, shared between many pipelines + BASE_SYSTEM_HOST_PREFIX: "${MINIO_HOST}/mesa-lava" + BASE_SYSTEM_MAINLINE_HOST_PATH: "${BASE_SYSTEM_HOST_PREFIX}/${FDO_UPSTREAM_REPO}/${DISTRIBUTION_TAG}/${ARCH}" + BASE_SYSTEM_FORK_HOST_PATH: "${BASE_SYSTEM_HOST_PREFIX}/${CI_PROJECT_PATH}/${DISTRIBUTION_TAG}/${ARCH}" + # per-job build artifacts + MESA_BUILD_PATH: "${PIPELINE_ARTIFACTS_BASE}/mesa-${ARCH}.tar.gz" + JOB_ROOTFS_OVERLAY_PATH: "${JOB_ARTIFACTS_BASE}/job-rootfs-overlay.tar.gz" + JOB_RESULTS_PATH: "${JOB_ARTIFACTS_BASE}/results.tar.gz" + PIGLIT_NO_WINDOW: 1 + VISIBILITY_GROUP: "Collabora+fdo" + script: + - ./artifacts/lava/lava-submit.sh + artifacts: + name: "mesa_${CI_JOB_NAME}" + when: always + paths: + - results/ + exclude: + - results/*.shader_cache + after_script: + - wget -q "https://${JOB_RESULTS_PATH}" -O- | tar -xz + +.lava-test:armhf: + variables: + ARCH: armhf + KERNEL_IMAGE_NAME: zImage + KERNEL_IMAGE_TYPE: "zimage" + BOOT_METHOD: u-boot + HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" + extends: + - .use-debian/arm_build # for same $MESA_ARTIFACTS_TAG as in kernel+rootfs_armhf + - .use-debian/x86_build + - .lava-test + - .use-kernel+rootfs-arm + needs: + - kernel+rootfs_armhf + - debian/x86_build + - debian-armhf + +.lava-test:arm64: + variables: + ARCH: arm64 + KERNEL_IMAGE_NAME: Image + KERNEL_IMAGE_TYPE: "image" + BOOT_METHOD: u-boot + HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" + extends: + - .use-debian/arm_build # for same $MESA_ARTIFACTS_TAG as in kernel+rootfs_arm64 + - .use-debian/x86_build + - .lava-test + - .use-kernel+rootfs-arm + dependencies: + - debian-arm64 + needs: + - kernel+rootfs_arm64 + - debian/x86_build + - debian-arm64 + +.lava-test:amd64: + variables: + ARCH: amd64 + KERNEL_IMAGE_NAME: bzImage + KERNEL_IMAGE_TYPE: "zimage" + BOOT_METHOD: u-boot + HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" + extends: + - .use-debian/x86_build-base # for same $MESA_ARTIFACTS_BASE_TAG as in kernel+rootfs_amd64 + - .use-debian/x86_build + - .lava-test + - .use-kernel+rootfs-amd64 + needs: + - kernel+rootfs_amd64 + - debian-testing + +.lava-traces-base: + variables: + HWCI_TEST_SCRIPT: "/install/piglit/run.sh" + artifacts: + reports: + junit: results/junit.xml + +.lava-piglit: + variables: + PIGLIT_REPLAY_DEVICE_NAME: "gl-${GPU_VERSION}" + PIGLIT_RESULTS: "${GPU_VERSION}-${PIGLIT_PROFILES}" + HWCI_TEST_SCRIPT: "/install/piglit/piglit-runner.sh" + +.lava-piglit-traces:amd64: + extends: + - .lava-test:amd64 + - .lava-piglit + - .lava-traces-base + +.lava-piglit-traces:armhf: + extends: + - .lava-test:armhf + - .lava-piglit + - .lava-traces-base + +.lava-piglit-traces:arm64: + extends: + - .lava-test:arm64 + - .lava-piglit + - .lava-traces-base + +.lava-piglit:amd64: + extends: + - .lava-test:amd64 + - .lava-piglit diff --git a/lib/mesa/.gitlab-ci/lava/lava-submit.sh b/lib/mesa/.gitlab-ci/lava/lava-submit.sh new file mode 100755 index 000000000..1d3a24531 --- /dev/null +++ b/lib/mesa/.gitlab-ci/lava/lava-submit.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +set -e +set -x + +# Try to use the kernel and rootfs built in mainline first, so we're more +# likely to hit cache +if wget -q --method=HEAD "https://${BASE_SYSTEM_MAINLINE_HOST_PATH}/done"; then + BASE_SYSTEM_HOST_PATH="${BASE_SYSTEM_MAINLINE_HOST_PATH}" +else + BASE_SYSTEM_HOST_PATH="${BASE_SYSTEM_FORK_HOST_PATH}" +fi + +rm -rf results +mkdir -p results/job-rootfs-overlay/ + +# LAVA always uploads to MinIO when necessary as we don't have direct upload +# from the DUT +export PIGLIT_REPLAY_UPLOAD_TO_MINIO=1 +cp artifacts/ci-common/capture-devcoredump.sh results/job-rootfs-overlay/ +cp artifacts/ci-common/init-*.sh results/job-rootfs-overlay/ +artifacts/ci-common/generate-env.sh > results/job-rootfs-overlay/set-job-env-vars.sh + +tar zcf job-rootfs-overlay.tar.gz -C results/job-rootfs-overlay/ . +ci-fairy minio login "${CI_JOB_JWT}" +ci-fairy minio cp job-rootfs-overlay.tar.gz "minio://${JOB_ROOTFS_OVERLAY_PATH}" + +touch results/lava.log +tail -f results/lava.log & +artifacts/lava/lava_job_submitter.py \ + --dump-yaml \ + --pipeline-info "$CI_JOB_NAME: $CI_PIPELINE_URL on $CI_COMMIT_REF_NAME ${CI_NODE_INDEX}/${CI_NODE_TOTAL}" \ + --base-system-url-prefix "https://${BASE_SYSTEM_HOST_PATH}" \ + --mesa-build-url "${FDO_HTTP_CACHE_URI:-}https://${MESA_BUILD_PATH}" \ + --job-rootfs-overlay-url "${FDO_HTTP_CACHE_URI:-}https://${JOB_ROOTFS_OVERLAY_PATH}" \ + --job-artifacts-base ${JOB_ARTIFACTS_BASE} \ + --job-timeout ${JOB_TIMEOUT:-30} \ + --first-stage-init artifacts/ci-common/init-stage1.sh \ + --ci-project-dir ${CI_PROJECT_DIR} \ + --device-type ${DEVICE_TYPE} \ + --dtb ${DTB} \ + --jwt "${CI_JOB_JWT}" \ + --kernel-image-name ${KERNEL_IMAGE_NAME} \ + --kernel-image-type "${KERNEL_IMAGE_TYPE}" \ + --boot-method ${BOOT_METHOD} \ + --visibility-group ${VISIBILITY_GROUP} \ + --lava-tags "${LAVA_TAGS}" >> results/lava.log diff --git a/lib/mesa/.gitlab-ci/lava/lava_job_submitter.py b/lib/mesa/.gitlab-ci/lava/lava_job_submitter.py new file mode 100755 index 000000000..bf2032c4f --- /dev/null +++ b/lib/mesa/.gitlab-ci/lava/lava_job_submitter.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2020, 2021 Collabora Limited +# Author: Gustavo Padovan <gustavo.padovan@collabora.com> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Send a job to LAVA, track it and collect log back""" + +import argparse +import lavacli +import os +import sys +import time +import traceback +import urllib.parse +import xmlrpc +import yaml + +from datetime import datetime, timedelta +from lavacli.utils import loader + +# Timeout in minutes to decide if the device from the dispatched LAVA job has +# hung or not due to the lack of new log output. +DEVICE_HANGING_TIMEOUT_MIN = 5 + +# How many seconds the script should wait before try a new polling iteration to +# check if the dispatched LAVA job is running or waiting in the job queue. +WAIT_FOR_DEVICE_POLLING_TIME_SEC = 10 + +# How many seconds to wait between log output LAVA RPC calls. +LOG_POLLING_TIME_SEC = 5 + +# How many retries should be made when a timeout happen. +NUMBER_OF_RETRIES_TIMEOUT_DETECTION = 2 + + +def print_log(msg): + print("{}: {}".format(datetime.now(), msg)) + +def fatal_err(msg): + print_log(msg) + sys.exit(1) + +def generate_lava_yaml(args): + # General metadata and permissions, plus also inexplicably kernel arguments + values = { + 'job_name': 'mesa: {}'.format(args.pipeline_info), + 'device_type': args.device_type, + 'visibility': { 'group': [ args.visibility_group ] }, + 'priority': 75, + 'context': { + 'extra_nfsroot_args': ' init=/init rootwait minio_results={}'.format(args.job_artifacts_base) + }, + 'timeouts': { + 'job': { + 'minutes': args.job_timeout + } + }, + } + + if args.lava_tags: + values['tags'] = args.lava_tags.split(',') + + # URLs to our kernel rootfs to boot from, both generated by the base + # container build + deploy = { + 'timeout': { 'minutes': 10 }, + 'to': 'tftp', + 'os': 'oe', + 'kernel': { + 'url': '{}/{}'.format(args.base_system_url_prefix, args.kernel_image_name), + }, + 'nfsrootfs': { + 'url': '{}/lava-rootfs.tgz'.format(args.base_system_url_prefix), + 'compression': 'gz', + } + } + if args.kernel_image_type: + deploy['kernel']['type'] = args.kernel_image_type + if args.dtb: + deploy['dtb'] = { + 'url': '{}/{}.dtb'.format(args.base_system_url_prefix, args.dtb) + } + + # always boot over NFS + boot = { + 'timeout': { 'minutes': 25 }, + 'method': args.boot_method, + 'commands': 'nfs', + 'prompts': ['lava-shell:'], + } + + # skeleton test definition: only declaring each job as a single 'test' + # since LAVA's test parsing is not useful to us + test = { + 'timeout': { 'minutes': args.job_timeout }, + 'failure_retry': 1, + 'definitions': [ { + 'name': 'mesa', + 'from': 'inline', + 'path': 'inline/mesa.yaml', + 'repository': { + 'metadata': { + 'name': 'mesa', + 'description': 'Mesa test plan', + 'os': [ 'oe' ], + 'scope': [ 'functional' ], + 'format': 'Lava-Test Test Definition 1.0', + }, + 'parse': { + 'pattern': r'hwci: (?P<test_case_id>\S*):\s+(?P<result>(pass|fail))' + }, + 'run': { + }, + }, + } ], + } + + # job execution script: + # - inline .gitlab-ci/common/init-stage1.sh + # - fetch and unpack per-pipeline build artifacts from build job + # - fetch and unpack per-job environment from lava-submit.sh + # - exec .gitlab-ci/common/init-stage2.sh + init_lines = [] + with open(args.first_stage_init, 'r') as init_sh: + init_lines += [ x.rstrip() for x in init_sh if not x.startswith('#') and x.rstrip() ] + init_lines += [ + 'mkdir -p {}'.format(args.ci_project_dir), + 'wget -S --progress=dot:giga -O- {} | tar -xz -C {}'.format(args.mesa_build_url, args.ci_project_dir), + 'wget -S --progress=dot:giga -O- {} | tar -xz -C /'.format(args.job_rootfs_overlay_url), + 'set +x', + 'export CI_JOB_JWT="{}"'.format(args.jwt), + 'set -x', + 'exec /init-stage2.sh', + ] + test['definitions'][0]['repository']['run']['steps'] = init_lines + + values['actions'] = [ + { 'deploy': deploy }, + { 'boot': boot }, + { 'test': test }, + ] + + return yaml.dump(values, width=10000000) + + +def setup_lava_proxy(): + config = lavacli.load_config("default") + uri, usr, tok = (config.get(key) for key in ("uri", "username", "token")) + uri_obj = urllib.parse.urlparse(uri) + uri_str = "{}://{}:{}@{}{}".format(uri_obj.scheme, usr, tok, uri_obj.netloc, uri_obj.path) + transport = lavacli.RequestsTransport( + uri_obj.scheme, + config.get("proxy"), + config.get("timeout", 120.0), + config.get("verify_ssl_cert", True), + ) + proxy = xmlrpc.client.ServerProxy( + uri_str, allow_none=True, transport=transport) + + print_log("Proxy for {} created.".format(config['uri'])) + + return proxy + + +def _call_proxy(fn, *args): + retries = 60 + for n in range(1, retries + 1): + try: + return fn(*args) + except xmlrpc.client.ProtocolError as err: + if n == retries: + traceback.print_exc() + fatal_err("A protocol error occurred (Err {} {})".format(err.errcode, err.errmsg)) + else: + time.sleep(15) + pass + except xmlrpc.client.Fault as err: + traceback.print_exc() + fatal_err("FATAL: Fault: {} (code: {})".format(err.faultString, err.faultCode)) + + +def get_job_results(proxy, job_id, test_suite, test_case): + # Look for infrastructure errors and retry if we see them. + results_yaml = _call_proxy(proxy.results.get_testjob_results_yaml, job_id) + results = yaml.load(results_yaml, Loader=loader(False)) + for res in results: + metadata = res['metadata'] + if not 'result' in metadata or metadata['result'] != 'fail': + continue + if 'error_type' in metadata and metadata['error_type'] == "Infrastructure": + print_log("LAVA job {} failed with Infrastructure Error. Retry.".format(job_id)) + return False + if 'case' in metadata and metadata['case'] == "validate": + print_log("LAVA job {} failed validation (possible download error). Retry.".format(job_id)) + return False + + results_yaml = _call_proxy(proxy.results.get_testcase_results_yaml, job_id, test_suite, test_case) + results = yaml.load(results_yaml, Loader=loader(False)) + if not results: + fatal_err("LAVA: no result for test_suite '{}', test_case '{}'".format(test_suite, test_case)) + + print_log("LAVA: result for test_suite '{}', test_case '{}': {}".format(test_suite, test_case, results[0]['result'])) + if results[0]['result'] != 'pass': + fatal_err("FAIL") + + return True + +def wait_until_job_is_started(proxy, job_id): + print_log(f"Waiting for job {job_id} to start.") + current_state = "Submitted" + waiting_states = ["Submitted", "Scheduling", "Scheduled"] + while current_state in waiting_states: + job_state = _call_proxy(proxy.scheduler.job_state, job_id) + current_state = job_state["job_state"] + + time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC) + print_log(f"Job {job_id} started.") + +def follow_job_execution(proxy, job_id): + line_count = 0 + finished = False + last_time_logs = datetime.now() + while not finished: + (finished, data) = _call_proxy(proxy.scheduler.jobs.logs, job_id, line_count) + logs = yaml.load(str(data), Loader=loader(False)) + if logs: + # Reset the timeout + last_time_logs = datetime.now() + for line in logs: + print("{} {}".format(line["dt"], line["msg"])) + + line_count += len(logs) + + else: + time_limit = timedelta(minutes=DEVICE_HANGING_TIMEOUT_MIN) + if datetime.now() - last_time_logs > time_limit: + print_log("LAVA job {} doesn't advance (machine got hung?). Retry.".format(job_id)) + return False + + # `proxy.scheduler.jobs.logs` does not block, even when there is no + # new log to be fetched. To avoid dosing the LAVA dispatcher + # machine, let's add a sleep to save them some stamina. + time.sleep(LOG_POLLING_TIME_SEC) + + return True + +def show_job_data(proxy, job_id): + show = _call_proxy(proxy.scheduler.jobs.show, job_id) + for field, value in show.items(): + print("{}\t: {}".format(field, value)) + + +def validate_job(proxy, job_file): + try: + return _call_proxy(proxy.scheduler.jobs.validate, job_file, True) + except: + return False + +def submit_job(proxy, job_file): + return _call_proxy(proxy.scheduler.jobs.submit, job_file) + + +def main(args): + proxy = setup_lava_proxy() + + yaml_file = generate_lava_yaml(args) + + if args.dump_yaml: + censored_args = args + censored_args.jwt = "jwt-hidden" + print(generate_lava_yaml(censored_args)) + + if args.validate_only: + ret = validate_job(proxy, yaml_file) + if not ret: + fatal_err("Error in LAVA job definition") + print("LAVA job definition validated successfully") + return + + retry_count = NUMBER_OF_RETRIES_TIMEOUT_DETECTION + + while retry_count >= 0: + job_id = submit_job(proxy, yaml_file) + + print_log("LAVA job id: {}".format(job_id)) + + wait_until_job_is_started(proxy, job_id) + + if not follow_job_execution(proxy, job_id): + print_log(f"Job {job_id} has timed out. Cancelling it.") + # Cancel the job as it is considered unreachable by Mesa CI. + proxy.scheduler.jobs.cancel(job_id) + + retry_count -= 1 + continue + + show_job_data(proxy, job_id) + + if get_job_results(proxy, job_id, "0_mesa", "mesa") == True: + break + + +if __name__ == '__main__': + # given that we proxy from DUT -> LAVA dispatcher -> LAVA primary -> us -> + # GitLab runner -> GitLab primary -> user, safe to say we don't need any + # more buffering + sys.stdout.reconfigure(line_buffering=True) + sys.stderr.reconfigure(line_buffering=True) + parser = argparse.ArgumentParser("LAVA job submitter") + + parser.add_argument("--pipeline-info") + parser.add_argument("--base-system-url-prefix") + parser.add_argument("--mesa-build-url") + parser.add_argument("--job-rootfs-overlay-url") + parser.add_argument("--job-artifacts-base") + parser.add_argument("--job-timeout", type=int) + parser.add_argument("--first-stage-init") + parser.add_argument("--ci-project-dir") + parser.add_argument("--device-type") + parser.add_argument("--dtb", nargs='?', default="") + parser.add_argument("--kernel-image-name") + parser.add_argument("--kernel-image-type", nargs='?', default="") + parser.add_argument("--boot-method") + parser.add_argument("--lava-tags", nargs='?', default="") + parser.add_argument("--jwt") + parser.add_argument("--validate-only", action='store_true') + parser.add_argument("--dump-yaml", action='store_true') + parser.add_argument("--visibility-group") + + parser.set_defaults(func=main) + args = parser.parse_args() + args.func(args) |