Merge branch 'ci/fix-retry-failed-jobs-stage' into 'master'

ci: add redundant job to ensure 'retry_failed_jobs' job is not skipped

See merge request espressif/esp-idf!32754
This commit is contained in:
Aleksei Apaseev 2024-08-15 18:08:10 +08:00
commit 73ac0bcdff
7 changed files with 82 additions and 21 deletions

View File

@ -168,3 +168,15 @@ pipeline_variables:
- pipeline.env
expire_in: 1 week
when: always
redundant_pass_job:
stage: pre_check
tags: [shiny, fast_run]
image: $ESP_ENV_IMAGE
dependencies: null
before_script: []
cache: []
extends: []
script:
- echo "This job is redundant to ensure the 'retry_failed_jobs' job can exist and not be skipped"
when: always

View File

@ -1,6 +1,7 @@
retry_failed_jobs:
stage: retry_failed_jobs
tags: [shiny, fast_run]
allow_failure: true
image: $ESP_ENV_IMAGE
dependencies: null
before_script: []
@ -11,4 +12,4 @@ retry_failed_jobs:
- python tools/ci/python_packages/gitlab_api.py retry_failed_jobs $CI_MERGE_REQUEST_PROJECT_ID --pipeline_id $CI_PIPELINE_ID
when: manual
needs:
- generate_failed_jobs_report
- redundant_pass_job

View File

@ -17,6 +17,9 @@ DEFAULT_CASES_TEST_PER_JOB = 30
DEFAULT_BUILD_CHILD_PIPELINE_FILEPATH = os.path.join(IDF_PATH, 'build_child_pipeline.yml')
DEFAULT_TARGET_TEST_CHILD_PIPELINE_FILEPATH = os.path.join(IDF_PATH, 'target_test_child_pipeline.yml')
DEFAULT_BUILD_CHILD_PIPELINE_NAME = 'Build Child Pipeline'
DEFAULT_TARGET_TEST_CHILD_PIPELINE_NAME = 'Target Test Child Pipeline'
TEST_RELATED_BUILD_JOB_NAME = 'build_test_related_apps'
NON_TEST_RELATED_BUILD_JOB_NAME = 'build_non_test_related_apps'

View File

@ -9,6 +9,7 @@ import __init__ # noqa: F401 # inject the system path
import yaml
from dynamic_pipelines.constants import DEFAULT_APPS_BUILD_PER_JOB
from dynamic_pipelines.constants import DEFAULT_BUILD_CHILD_PIPELINE_FILEPATH
from dynamic_pipelines.constants import DEFAULT_BUILD_CHILD_PIPELINE_NAME
from dynamic_pipelines.constants import DEFAULT_TEST_PATHS
from dynamic_pipelines.constants import NON_TEST_RELATED_APPS_FILENAME
from dynamic_pipelines.constants import NON_TEST_RELATED_BUILD_JOB_NAME
@ -133,7 +134,7 @@ def main(arguments: argparse.Namespace) -> None:
else:
extra_include_yml = ['tools/ci/dynamic_pipelines/templates/test_child_pipeline.yml']
dump_jobs_to_yaml(build_jobs, arguments.yaml_output, extra_include_yml)
dump_jobs_to_yaml(build_jobs, arguments.yaml_output, DEFAULT_BUILD_CHILD_PIPELINE_NAME, extra_include_yml)
print(f'Generate child pipeline yaml file {arguments.yaml_output} with {sum(j.parallel for j in build_jobs)} jobs')

View File

@ -19,6 +19,7 @@ import yaml
from dynamic_pipelines.constants import BUILD_ONLY_LABEL
from dynamic_pipelines.constants import DEFAULT_CASES_TEST_PER_JOB
from dynamic_pipelines.constants import DEFAULT_TARGET_TEST_CHILD_PIPELINE_FILEPATH
from dynamic_pipelines.constants import DEFAULT_TARGET_TEST_CHILD_PIPELINE_NAME
from dynamic_pipelines.constants import DEFAULT_TEST_PATHS
from dynamic_pipelines.constants import KNOWN_GENERATE_TEST_CHILD_PIPELINE_WARNINGS_FILEPATH
from dynamic_pipelines.models import EmptyJob
@ -170,7 +171,7 @@ def generate_target_test_child_pipeline(
if no_env_marker_test_cases_fail or no_runner_tags_fail:
raise SystemExit('Failed to generate target test child pipeline.')
dump_jobs_to_yaml(target_test_jobs, output_filepath, extra_include_yml)
dump_jobs_to_yaml(target_test_jobs, output_filepath, DEFAULT_TARGET_TEST_CHILD_PIPELINE_NAME, extra_include_yml)
print(f'Generate child pipeline yaml file {output_filepath} with {sum(j.parallel for j in target_test_jobs)} jobs')

View File

@ -21,7 +21,10 @@ from .models import TestCase
def dump_jobs_to_yaml(
jobs: t.List[Job], output_filepath: str, extra_include_yml: t.Optional[t.List[str]] = None
jobs: t.List[Job],
output_filepath: str,
pipeline_name: str,
extra_include_yml: t.Optional[t.List[str]] = None,
) -> None:
yaml_dict = {}
for job in jobs:
@ -35,6 +38,7 @@ def dump_jobs_to_yaml(
'.gitlab/ci/common.yml',
],
'workflow': {
'name': pipeline_name,
'rules': [
# always run the child pipeline, if they are created
{'when': 'always'},
@ -102,7 +106,7 @@ def fetch_failed_jobs(commit_id: str) -> t.List[GitlabJob]:
"""
response = requests.get(
f'{CI_DASHBOARD_API}/commits/{commit_id}/jobs',
headers={'Authorization': f'Bearer {CI_JOB_TOKEN}'}
headers={'CI-Job-Token': CI_JOB_TOKEN},
)
if response.status_code != 200:
print(f'Failed to fetch jobs data: {response.status_code} with error: {response.text}')
@ -117,7 +121,7 @@ def fetch_failed_jobs(commit_id: str) -> t.List[GitlabJob]:
failed_job_names = [job['name'] for job in jobs if job['status'] == 'failed']
response = requests.post(
f'{CI_DASHBOARD_API}/jobs/failure_ratio',
headers={'Authorization': f'Bearer {CI_JOB_TOKEN}'},
headers={'CI-Job-Token': CI_JOB_TOKEN},
json={'job_names': failed_job_names, 'exclude_branches': [os.getenv('CI_MERGE_REQUEST_SOURCE_BRANCH_NAME', '')]},
)
if response.status_code != 200:
@ -145,7 +149,7 @@ def fetch_failed_testcases_failure_ratio(failed_testcases: t.List[TestCase], bra
req_json = {'testcase_names': list(set([testcase.name for testcase in failed_testcases])), **branches_filter}
response = requests.post(
f'{CI_DASHBOARD_API}/testcases/failure_ratio',
headers={'Authorization': f'Bearer {CI_JOB_TOKEN}'},
headers={'CI-Job-Token': CI_JOB_TOKEN},
json=req_json,
)
if response.status_code != 200:

View File

@ -67,6 +67,7 @@ class Gitlab(object):
JOB_NAME_PATTERN = re.compile(r'(\w+)(\s+(\d+)/(\d+))?')
DOWNLOAD_ERROR_MAX_RETRIES = 3
DEFAULT_BUILD_CHILD_PIPELINE_NAME = 'Build Child Pipeline'
def __init__(self, project_id: Union[int, str, None] = None):
config_data_from_env = os.getenv('PYTHON_GITLAB_CONFIG')
@ -279,6 +280,39 @@ class Gitlab(object):
job = self.project.jobs.get(job_id)
return ','.join(job.tag_list)
def get_downstream_pipeline_ids(self, main_pipeline_id: int) -> List[int]:
"""
Retrieve the IDs of all downstream child pipelines for a given main pipeline.
:param main_pipeline_id: The ID of the main pipeline to start the search.
:return: A list of IDs of all downstream child pipelines.
"""
bridge_pipeline_ids = []
child_pipeline_ids = []
main_pipeline_bridges = self.project.pipelines.get(main_pipeline_id).bridges.list()
for bridge in main_pipeline_bridges:
downstream_pipeline = bridge.attributes.get('downstream_pipeline')
if not downstream_pipeline:
continue
bridge_pipeline_ids.append(downstream_pipeline['id'])
for bridge_pipeline_id in bridge_pipeline_ids:
child_pipeline_ids.append(bridge_pipeline_id)
bridge_pipeline = self.project.pipelines.get(bridge_pipeline_id)
if not bridge_pipeline.name == self.DEFAULT_BUILD_CHILD_PIPELINE_NAME:
continue
child_bridges = bridge_pipeline.bridges.list()
for child_bridge in child_bridges:
downstream_child_pipeline = child_bridge.attributes.get('downstream_pipeline')
if not downstream_child_pipeline:
continue
child_pipeline_ids.append(downstream_child_pipeline.get('id'))
return [pid for pid in child_pipeline_ids if pid is not None]
def retry_failed_jobs(self, pipeline_id: int, retry_allowed_failures: bool = False) -> List[int]:
"""
Retry failed jobs for a specific pipeline. Optionally include jobs marked as 'allowed failures'.
@ -286,20 +320,25 @@ class Gitlab(object):
:param pipeline_id: ID of the pipeline whose failed jobs are to be retried.
:param retry_allowed_failures: Whether to retry jobs that are marked as allowed failures.
"""
pipeline = self.project.pipelines.get(pipeline_id)
jobs_to_retry = [
job
jobs_succeeded_retry = []
pipeline_ids = [pipeline_id] + self.get_downstream_pipeline_ids(pipeline_id)
logging.info(f'Retrying jobs for pipelines: {pipeline_ids}')
for pid in pipeline_ids:
pipeline = self.project.pipelines.get(pid)
job_ids_to_retry = [
job.id
for job in pipeline.jobs.list(scope='failed')
if retry_allowed_failures or not job.attributes.get('allow_failure', False)
]
jobs_succeeded_retry = []
for job in jobs_to_retry:
logging.info(f'Failed jobs for pipeline {pid}: {job_ids_to_retry}')
for job_id in job_ids_to_retry:
try:
res = self.project.jobs.get(job.id).retry()
jobs_succeeded_retry.append(job.id)
logging.info(f'Retried job {job.id} with result {res}')
res = self.project.jobs.get(job_id).retry()
jobs_succeeded_retry.append(job_id)
logging.info(f'Retried job {job_id} with result {res}')
except Exception as e:
logging.error(f'Failed to retry job {job.id}: {str(e)}')
logging.error(f'Failed to retry job {job_id}: {str(e)}')
return jobs_succeeded_retry
@ -334,7 +373,7 @@ def main() -> None:
print('project id: {}'.format(ret))
elif args.action == 'retry_failed_jobs':
res = gitlab_inst.retry_failed_jobs(args.pipeline_id, args.retry_allowed_failures)
print('job retried successfully: {}'.format(res))
print('jobs retried successfully: {}'.format(res))
elif args.action == 'get_job_tags':
ret = gitlab_inst.get_job_tags(args.job_id)
print(ret)