From 6622215f239e5139d1a0c07ad598a5b5f27bae20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 11 Jan 2023 19:17:34 +0100 Subject: [PATCH] GH-14997: [Release] Archery curate release takes into account both GitHub and old Apache Arrow JIRA issues --- dev/archery/archery/release/cli.py | 12 +- dev/archery/archery/release/core.py | 185 +++++++++++++++--- dev/archery/archery/release/reports.py | 5 +- .../archery/templates/release_curation.txt.j2 | 20 +- dev/archery/setup.py | 2 +- 5 files changed, 183 insertions(+), 41 deletions(-) diff --git a/dev/archery/archery/release/cli.py b/dev/archery/archery/release/cli.py index 4fbf93861e6f1..4c7a1d648d835 100644 --- a/dev/archery/archery/release/cli.py +++ b/dev/archery/archery/release/cli.py @@ -40,14 +40,18 @@ def release(obj, src, jira_cache): obj['repo'] = src.path -@release.command('curate', help="Lists release related Jira issues.") +@release.command('curate', help="Lists release related issues.") @click.argument('version') @click.option('--minimal/--full', '-m/-f', - help="Only show actionable Jira issues.", default=False) + help="Only show actionable issues.", default=False) +@click.option('--github-token', '-t', default=None, + envvar="CROSSBOW_GITHUB_TOKEN", + help='OAuth token for GitHub authentication') @click.pass_obj -def release_curate(obj, version, minimal): +def release_curate(obj, version, minimal, github_token): """Release curation.""" - release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo']) + release = Release(version, jira=None, repo=obj['repo'], + github_token=github_token) curation = release.curate(minimal) click.echo(curation.render('console')) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 03eceb80a1034..e73a6e262117e 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -25,6 +25,7 @@ import warnings from git import Repo +from github import Github from jira import JIRA from semver import VersionInfo as SemVer @@ -58,13 +59,28 @@ def from_jira(cls, jira_version): release_date=getattr(jira_version, 'releaseDate', None) ) + @classmethod + def from_milestone(cls, milestone): + return cls.parse( + milestone.title, + released=milestone.state == "closed", + release_date=milestone.due_on + ) + + +ORIGINAL_ARROW_REGEX = re.compile( + r"\*This issue was originally created as " + + r"\[(?PARROW\-(?P(\d+)))\]" +) + class Issue: - def __init__(self, key, type, summary): + def __init__(self, key, type, summary, github_issue=None): self.key = key self.type = type self.summary = summary + self._github_issue = github_issue @classmethod def from_jira(cls, jira_issue): @@ -74,6 +90,23 @@ def from_jira(cls, jira_issue): summary=jira_issue.fields.summary ) + @classmethod + def from_github(cls, github_issue): + original_jira = cls.original_jira_id(github_issue) + key = original_jira or github_issue.number + return cls( + key=key, + type=next( + iter( + [ + label for label in github_issue.labels + if label.name.startswith("Type:") + ] + ), None), + summary=github_issue.title, + github_issue=github_issue + ) + @property def project(self): return self.key.split('-')[0] @@ -82,6 +115,25 @@ def project(self): def number(self): return int(self.key.split('-')[1]) + @cached_property + def pr(self): + if self.is_pr: + return self._github_issue.as_pull_request() + + @cached_property + def is_pr(self): + return bool(self._github_issue and self._github_issue.pull_request) + + @classmethod + def original_jira_id(cls, github_issue): + # All migrated issues contain body + if not github_issue.body: + return None + matches = ORIGINAL_ARROW_REGEX.search(github_issue.body) + if matches: + values = matches.groupdict() + return values['issue'] + class Jira(JIRA): @@ -112,6 +164,63 @@ def project_issues(self, version, project='ARROW'): return list(map(Issue.from_jira, issues)) +class IssueTracker: + + def __init__(self, jira=None, github_token=None): + if not jira: + github = Github(github_token) + self.github_repo = github.get_repo('apache/arrow') + self.jira = jira + + def project_version(self, version_string, *args, **kwargs): + if self.jira: + return self.jira.project_version(version_string, *args, **kwargs) + else: + milestones = self.github_repo.get_milestones(state="all") + for milestone in milestones: + if milestone.title == version_string: + return Version.from_milestone(milestone) + + def project_versions(self, *args, **kwargs): + if self.jira: + return self.jira.project_versions(*args, **kwargs) + else: + versions = [] + milestones = self.github_repo.get_milestones(state="all") + for milestone in milestones: + try: + versions.append(Version.from_milestone(milestone)) + except ValueError: + # ignore invalid semantic versions like JS-0.4.0 + continue + return sorted(versions, reverse=True) + + def _milestone_from_semver(self, semver): + milestones = self.github_repo.get_milestones(state="all") + for milestone in milestones: + try: + if milestone.title == semver: + return milestone + except ValueError: + # ignore invalid semantic versions like JS-0.3.0 + continue + + def project_issues(self, version, *args, **kwargs): + if self.jira: + return self.jira.project_issues(version, *args, **kwargs) + else: + issues = self.github_repo.get_issues( + milestone=self._milestone_from_semver(version), + state="all") + return list(map(Issue.from_github, issues)) + + def issue(self, key): + if self.jira: + return Issue.from_jira(super().issue(key)) + else: + return Issue.from_github(self.github_repo.get_issue(key)) + + class CachedJira: def __init__(self, cache_path, jira=None): @@ -135,7 +244,7 @@ def wrapper(*args, **kwargs): _TITLE_REGEX = re.compile( - r"(?P(?P(ARROW|PARQUET))\-\d+)?\s*:?\s*" + r"(?P(?P(ARROW|PARQUET|GH))\-(?P(\d+)))?\s*:?\s*" r"(?P(MINOR))?\s*:?\s*" r"(?P\[.*\])?\s*(?P.*)" ) @@ -145,9 +254,10 @@ def wrapper(*args, **kwargs): class CommitTitle: def __init__(self, summary, project=None, issue=None, minor=None, - components=None): + components=None, issue_id=None): self.project = project self.issue = issue + self.issue_id = issue_id self.components = components or [] self.summary = summary self.minor = bool(minor) @@ -186,6 +296,7 @@ def parse(cls, headline): values['summary'], project=values.get('project'), issue=values.get('issue'), + issue_id=values.get('issue_id'), minor=values.get('minor'), components=components ) @@ -230,7 +341,7 @@ def title(self): class Release: - def __new__(self, version, jira=None, repo=None): + def __new__(self, version, jira=None, repo=None, github_token=None): if isinstance(version, str): version = Version.parse(version) elif not isinstance(version, Version): @@ -250,15 +361,15 @@ def __new__(self, version, jira=None, repo=None): return super().__new__(klass) - def __init__(self, version, jira, repo): + def __init__(self, version, jira, repo, github_token=None): if jira is None: - jira = Jira() + pass elif isinstance(jira, str): jira = Jira(jira) elif not isinstance(jira, (Jira, CachedJira)): raise TypeError("`jira` argument must be a server url or a valid " "Jira instance") - + issue_tracker = IssueTracker(jira, github_token=github_token) if repo is None: arrow = ArrowSources.find() repo = Repo(arrow.path) @@ -269,13 +380,16 @@ def __init__(self, version, jira, repo): "instance") if isinstance(version, str): - version = jira.project_version(version, project='ARROW') + version = issue_tracker.project_version(version) + elif not isinstance(version, Version): raise TypeError(version) self.version = version self.jira = jira self.repo = repo + self.issue_tracker = issue_tracker + self._github_token = github_token def __repr__(self): if self.version.released: @@ -322,7 +436,8 @@ def previous(self): # first release doesn't have a previous one return None else: - return Release.from_jira(previous, jira=self.jira, repo=self.repo) + return Release(previous, jira=self.jira, repo=self.repo, + github_token=self._github_token) @cached_property def next(self): @@ -332,11 +447,14 @@ def next(self): raise ValueError("There is no upcoming release set in JIRA after " f"version {self.version}") upcoming = self.siblings[position - 1] - return Release.from_jira(upcoming, jira=self.jira, repo=self.repo) + return Release(upcoming, jira=self.jira, repo=self.repo, + github_token=self._github_token) @cached_property def issues(self): - issues = self.jira.project_issues(self.version, project='ARROW') + issues = self.issue_tracker.project_issues( + self.version, project='ARROW' + ) return {i.key: i for i in issues} @cached_property @@ -403,29 +521,43 @@ def default_branch(self): return default_branch_name def curate(self, minimal=False): - # handle commits with parquet issue key specially and query them from - # jira and add it to the issues + # handle commits with parquet issue key specially release_issues = self.issues - - within, outside, nojira, parquet = [], [], [], [] + within, outside, noissue, parquet, minor = [], [], [], [], [] for c in self.commits: if c.issue is None: - nojira.append(c) - elif c.issue in release_issues: - within.append((release_issues[c.issue], c)) + if c.title.minor: + minor.append(c) + else: + noissue.append(c) + elif c.project == 'GH': + if int(c.issue_id) in release_issues: + within.append((release_issues[int(c.issue_id)], c)) + else: + outside.append( + (self.issue_tracker.issue(int(c.issue_id)), c)) + elif c.project == 'ARROW': + if c.issue in release_issues: + within.append((release_issues[c.issue], c)) + else: + outside.append((c.issue, c)) elif c.project == 'PARQUET': - parquet.append((self.jira.issue(c.issue), c)) + parquet.append((c.issue, c)) else: - outside.append((self.jira.issue(c.issue), c)) + warnings.warn( + f'Issue {c.issue} is not MINOR nor pertains to GH' + + ', ARROW or PARQUET') + outside.append((c.issue, c)) # remaining jira tickets within_keys = {i.key for i, c in within} + # Take into account that some issues milestoned are prs nopatch = [issue for key, issue in release_issues.items() - if key not in within_keys] + if key not in within_keys and issue.is_pr is False] return ReleaseCuration(release=self, within=within, outside=outside, - nojira=nojira, parquet=parquet, nopatch=nopatch, - minimal=minimal) + noissue=noissue, parquet=parquet, + nopatch=nopatch, minimal=minimal, minor=minor) def changelog(self): issue_commit_pairs = [] @@ -525,7 +657,7 @@ def siblings(self): Filter only the major releases. """ # handle minor releases before 1.0 as major releases - return [v for v in self.jira.project_versions('ARROW') + return [v for v in self.issue_tracker.project_versions('ARROW') if v.patch == 0 and (v.major == 0 or v.minor == 0)] @@ -544,7 +676,8 @@ def siblings(self): """ Filter the major and minor releases. """ - return [v for v in self.jira.project_versions('ARROW') if v.patch == 0] + return [v for v in self.issue_tracker.project_versions('ARROW') + if v.patch == 0] class PatchRelease(Release): @@ -562,4 +695,4 @@ def siblings(self): """ No filtering, consider all releases. """ - return self.jira.project_versions('ARROW') + return self.issue_tracker.project_versions('ARROW') diff --git a/dev/archery/archery/release/reports.py b/dev/archery/archery/release/reports.py index 43093487c023f..aa78e9d07fabc 100644 --- a/dev/archery/archery/release/reports.py +++ b/dev/archery/archery/release/reports.py @@ -27,10 +27,11 @@ class ReleaseCuration(JinjaReport): 'release', 'within', 'outside', - 'nojira', + 'noissue', 'parquet', 'nopatch', - 'minimal' + 'minimal', + 'minor' ] diff --git a/dev/archery/archery/templates/release_curation.txt.j2 b/dev/archery/archery/templates/release_curation.txt.j2 index 4f524d001ce3f..0796f451625f1 100644 --- a/dev/archery/archery/templates/release_curation.txt.j2 +++ b/dev/archery/archery/templates/release_curation.txt.j2 @@ -17,26 +17,30 @@ # under the License. #} {%- if not minimal -%} -Total number of JIRA tickets assigned to version {{ release.version }}: {{ release.issues|length }} +### Total number of GitHub tickets assigned to version {{ release.version }}: {{ release.issues|length }} -Total number of applied patches since version {{ release.previous.version }}: {{ release.commits|length }} +### Total number of applied patches since version {{ release.previous.version }}: {{ release.commits|length }} -Patches with assigned issue in version {{ release.version }}: +### Patches with assigned issue in version {{ release.version }}: {{ within|length }} {% for issue, commit in within -%} - {{ commit.url }} {{ commit.title }} {% endfor %} {% endif -%} -Patches with assigned issue outside of version {{ release.version }}: +### Patches with assigned issue outside of version {{ release.version }}: {{ outside|length }} {% for issue, commit in outside -%} - {{ commit.url }} {{ commit.title }} {% endfor %} {% if not minimal -%} -Patches in version {{ release.version }} without a linked issue: -{% for commit in nojira -%} +### Minor patches in version {{ release.version }}: {{ minor|length }} +{% for commit in minor -%} - {{ commit.url }} {{ commit.title }} {% endfor %} -JIRA issues in version {{ release.version }} without a linked patch: +### Patches in version {{ release.version }} without a linked issue: +{% for commit in noissue -%} + - {{ commit.url }} {{ commit.title }} +{% endfor %} +### JIRA issues in version {{ release.version }} without a linked patch: {{ nopatch|length }} {% for issue in nopatch -%} - - https://issues.apache.org/jira/browse/{{ issue.key }} + - https://github.com/apache/arrow/issues/{{ issue.key }} {% endfor %} {%- endif -%} \ No newline at end of file diff --git a/dev/archery/setup.py b/dev/archery/setup.py index 4b13608cf833b..51f066c9ede40 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -31,7 +31,7 @@ 'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8', 'cmake_format==0.6.13'], 'benchmark': ['pandas'], 'docker': ['ruamel.yaml', 'python-dotenv'], - 'release': [jinja_req, 'jira', 'semver', 'gitpython'], + 'release': ['pygithub', jinja_req, 'jira', 'semver', 'gitpython'], 'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'requests', 'ruamel.yaml', 'setuptools_scm'], 'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml',