From 54ed5846fea9caa899387affde17183190fa69f7 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 13:41:58 +0900 Subject: [PATCH 01/17] fix Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/client.py | 185 +++++++++++++++++++++++++++++++++++++++++++++++++- src/dump.py | 22 +++--- src/models.py | 8 +-- 3 files changed, 199 insertions(+), 16 deletions(-) diff --git a/src/client.py b/src/client.py index 07591d6d..7495e23d 100644 --- a/src/client.py +++ b/src/client.py @@ -37,7 +37,8 @@ def get_paginate(self, end_point, params=None): while True: logger.info(f"{end_point} {page}") res = self.get( - end_point, params={**(params or {}), "page": page, "per_page": self.per_page} + end_point, + params={**(params or {}), "page": page, "per_page": self.per_page}, ) yield from res if len(res) < self.per_page: @@ -135,3 +136,185 @@ def get_discussions(self, owner, repo): after = page_info["endCursor"] if not page_info["hasNextPage"]: break + + def get_issues_graphql(self, owner, repo): + query = """ +query { + repository(owner: "%s", name: "%s") { + issues(first: %d, states: [OPEN, CLOSED], orderBy: {field: CREATED_AT, direction: ASC}) { + totalCount + pageInfo { + endCursor + hasNextPage + } + nodes { + id + number + title + body + state + closedAt + createdAt + updatedAt + url + author { + login + ... on User { id } + } + } + } + } +} +""" % ( + owner, + repo, + self.per_page, + # state, + ) + + query_with_cursor = """ +query { + repository(owner: "%s", name: "%s") { + issues(first: %d, states: [OPEN, CLOSED], after: "AFTER", orderBy: {field: CREATED_AT, direction: ASC}) { + totalCount + pageInfo { + endCursor + hasNextPage + } + nodes { + id + number + title + body + state + closedAt + createdAt + updatedAt + url + author { + login + ... on User { id } + } + } + } + } +} +""" % ( + owner, + repo, + self.per_page, + ) + after = None + while True: + q = query if after is None else query_with_cursor.replace("AFTER", after) + data = self.run_graphql_query(q) + issues = data["data"]["repository"]["issues"] + for node in issues["nodes"]: + # Normalize author and pullRequest for compatibility with models.py + if node["author"] and "id" in node["author"]: + node["user"] = { + "id": node["author"]["id"], + "login": node["author"]["login"], + } + else: + node["user"] = { + "id": 0, + "login": node["author"]["login"] if node["author"] else None, + } + node["pullRequest"] = False + yield node + page_info = issues["pageInfo"] + after = page_info["endCursor"] + if not page_info["hasNextPage"]: + break + + def get_pulls_graphql(self, owner, repo): + query = """ +query { + repository(owner: "%s", name: "%s") { + pullRequests(first: %d, states: [OPEN, CLOSED], orderBy: {field: CREATED_AT, direction: ASC}) { + totalCount + pageInfo { + endCursor + hasNextPage + } + nodes { + id + number + title + body + state + closedAt + createdAt + updatedAt + url + author { + login + ... on User { id } + } + } + } + } +} +""" % ( + owner, + repo, + self.per_page, + # state, + ) + + query_with_cursor = """ +query { + repository(owner: "%s", name: "%s") { + pullRequests(first: %d, states: [OPEN, CLOSED], after: "AFTER", orderBy: {field: CREATED_AT, direction: ASC}) { + totalCount + pageInfo { + endCursor + hasNextPage + } + nodes { + id + number + title + body + state + closedAt + createdAt + updatedAt + url + author { + login + ... on User { id } + } + } + } + } +} +""" % ( + owner, + repo, + self.per_page, + ) + after = None + while True: + q = query if after is None else query_with_cursor.replace("AFTER", after) + data = self.run_graphql_query(q) + pulls = data["data"]["repository"]["pullRequests"] + for node in pulls["nodes"]: + # Normalize author and pullRequest for compatibility with models.py + if node["author"] and "id" in node["author"]: + node["user"] = { + "id": node["author"]["id"], + "login": node["author"]["login"], + } + else: + node["user"] = { + "id": 0, + "login": node["author"]["login"] if node["author"] else None, + } + node["pullRequest"] = True + yield node + page_info = pulls["pageInfo"] + after = page_info["endCursor"] + if not page_info["hasNextPage"]: + break diff --git a/src/dump.py b/src/dump.py index 745121e7..abed4d56 100644 --- a/src/dump.py +++ b/src/dump.py @@ -44,7 +44,7 @@ def main(): g = GitHubApiClient(per_page=100) pprint(g.get_rate_limit()) since = datetime(1970, 1, 1) - # since = datetime(2022, 7, 1) + # since = datetime(2025, 4, 1) logger.info("Collecting commits") commits = g.get_commits( @@ -61,22 +61,22 @@ def main(): logger.info("Collecting mlflow org members") mlflow_org_members = set( - HashableDict(id=m["id"], login=m["login"]) for m in g.get_organization_members("mlflow") + HashableDict(id=m["id"], login=m["login"]) + for m in g.get_organization_members("mlflow") ) collaborators = set( - HashableDict(id=c["id"], login=c["login"]) for c in g.get_collaborators(*repo) + HashableDict(id=c["id"], login=c["login"]) + for c in g.get_collaborators(*repo) + ) + session.add_all( + M.MlflowOrgMember.from_gh_objects(mlflow_org_members.union(collaborators)) ) - session.add_all(M.MlflowOrgMember.from_gh_objects(mlflow_org_members.union(collaborators))) logger.info("Collecting issues") - issues = g.get_issues( - *repo, - params={ - "state": "all", - "since": since, - }, - ) + issues = g.get_issues_graphql(*repo) + pulls = g.get_pulls_graphql(*repo) session.add_all(M.Issue.from_gh_objects(issues)) + session.add_all(M.Issue.from_gh_objects(pulls)) logger.info("Collecting discussions") discussions = g.get_discussions(*repo) diff --git a/src/models.py b/src/models.py index 6a0d3f78..7eb3c716 100644 --- a/src/models.py +++ b/src/models.py @@ -143,10 +143,10 @@ def from_gh_object(cls, issue): body=issue["body"], state=issue["state"], closed_at=closed_at and parse_datetime(closed_at), - created_at=parse_datetime(issue["created_at"]), - updated_at=parse_datetime(issue["updated_at"]), - html_url=issue["html_url"], - is_pr="pull_request" in issue, + created_at=parse_datetime(issue["createdAt"]), + updated_at=parse_datetime(issue["updatedAt"]), + html_url=issue["url"], + is_pr=issue.get("pullRequest", False), ) From 940cc1b7e3eba35ce333b68fafc39b5ead63a331 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 13:44:14 +0900 Subject: [PATCH 02/17] test Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- .github/workflows/build.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0406a743..867b2a53 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -6,6 +6,7 @@ on: push: branches: - master + pull_request: workflow_dispatch: permissions: @@ -26,6 +27,7 @@ jobs: - name: Install dependencies run: pip install -r requirements.txt - name: Run builder + if: github.event_name != 'pull_request' run: | python src/dump.py python src/build.py @@ -33,6 +35,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.HARUPY_GITHUB_TOKEN }} - name: Deploy 🚀 uses: JamesIves/github-pages-deploy-action@v4 + if: github.event_name != 'pull_request' with: branch: gh-pages folder: dist From fa186c5483ab95ed807b97a04c651ce217ecd706 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 13:45:49 +0900 Subject: [PATCH 03/17] fix Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- .github/workflows/build.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 867b2a53..16ffc59d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,7 +27,6 @@ jobs: - name: Install dependencies run: pip install -r requirements.txt - name: Run builder - if: github.event_name != 'pull_request' run: | python src/dump.py python src/build.py From c8febeb4dd49f0cec46c668d82f3bb0ddd1037b0 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 13:48:55 +0900 Subject: [PATCH 04/17] a Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/client.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/client.py b/src/client.py index 7495e23d..4d4d68fe 100644 --- a/src/client.py +++ b/src/client.py @@ -205,7 +205,10 @@ def get_issues_graphql(self, owner, repo): self.per_page, ) after = None + page = 0 while True: + page += 1 + logger.info(f"Issues page {page}") q = query if after is None else query_with_cursor.replace("AFTER", after) data = self.run_graphql_query(q) issues = data["data"]["repository"]["issues"] @@ -296,7 +299,10 @@ def get_pulls_graphql(self, owner, repo): self.per_page, ) after = None + page = 0 while True: + page += 1 + logger.info(f"Pulls page {page}") q = query if after is None else query_with_cursor.replace("AFTER", after) data = self.run_graphql_query(q) pulls = data["data"]["repository"]["pullRequests"] From cda5648460bf03aa06ce329485a13c08deeaafa7 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:01:15 +0900 Subject: [PATCH 05/17] fix Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/build.py | 2 +- src/client.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/build.py b/src/build.py index 6024e428..4b675bc4 100644 --- a/src/build.py +++ b/src/build.py @@ -357,7 +357,7 @@ def main(): ) opened_pulls = opened_pulls[(opened_pulls._merge == "left_only")].drop("_merge", axis=1) opened_pulls_by_month = count_by_month(opened_pulls, "created_at") - closed_pulls = opened_pulls[opened_pulls["state"] == "closed"] + closed_pulls = opened_pulls[opened_pulls["state"] == "closed" | opened_pulls["state"] == "merged"] closed_pulls_by_month = count_by_month(closed_pulls, "closed_at") pulls_non_maintainers_plot_path = plots_dir.joinpath("pulls_non_maintainers.html") make_plot( diff --git a/src/client.py b/src/client.py index 4d4d68fe..0edee415 100644 --- a/src/client.py +++ b/src/client.py @@ -225,6 +225,7 @@ def get_issues_graphql(self, owner, repo): "login": node["author"]["login"] if node["author"] else None, } node["pullRequest"] = False + node["state"] = node["state"].lower() yield node page_info = issues["pageInfo"] after = page_info["endCursor"] @@ -235,7 +236,7 @@ def get_pulls_graphql(self, owner, repo): query = """ query { repository(owner: "%s", name: "%s") { - pullRequests(first: %d, states: [OPEN, CLOSED], orderBy: {field: CREATED_AT, direction: ASC}) { + pullRequests(first: %d, states: [OPEN, CLOSED, MERGED], orderBy: {field: CREATED_AT, direction: ASC}) { totalCount pageInfo { endCursor @@ -269,7 +270,7 @@ def get_pulls_graphql(self, owner, repo): query_with_cursor = """ query { repository(owner: "%s", name: "%s") { - pullRequests(first: %d, states: [OPEN, CLOSED], after: "AFTER", orderBy: {field: CREATED_AT, direction: ASC}) { + pullRequests(first: %d, states: [OPEN, CLOSED, MERGED], after: "AFTER", orderBy: {field: CREATED_AT, direction: ASC}) { totalCount pageInfo { endCursor @@ -319,6 +320,7 @@ def get_pulls_graphql(self, owner, repo): "login": node["author"]["login"] if node["author"] else None, } node["pullRequest"] = True + node["state"] = node["state"].lower() yield node page_info = pulls["pageInfo"] after = page_info["endCursor"] From 9141d4687183256c433acb6890fda170491b09e8 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:09:26 +0900 Subject: [PATCH 06/17] fix --- src/build.py | 79 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/src/build.py b/src/build.py index 4b675bc4..fa7374e2 100644 --- a/src/build.py +++ b/src/build.py @@ -24,7 +24,9 @@ def count_by_month(df, datetime_col): .pipe( lambda df_: ( df_.set_index( - df_.index.map(lambda year_month: datetime(year_month[0], year_month[1], 1)) + df_.index.map( + lambda year_month: datetime(year_month[0], year_month[1], 1) + ) ) ) ) @@ -136,13 +138,13 @@ def main(): x_tick_vals=x_tick_vals, x_axis_range=x_axis_range, y_axis_range=get_y_axis_range( - contributors_by_month[contributors_by_month["date"] >= year_ago]["count"] + contributors_by_month[contributors_by_month["date"] >= year_ago][ + "count" + ] ), ).write_html(contributors_plot_path, include_plotlyjs="cdn") - commits_url_template = ( - "https://github.com/mlflow/mlflow/commits?author={author}&since={since}&until={until}" - ) + commits_url_template = "https://github.com/mlflow/mlflow/commits?author={author}&since={since}&until={until}" anchor_template = '{text}' six_month_ago = now - relativedelta(months=6) active_contributors = ( @@ -167,11 +169,15 @@ def main(): ) .assign( user=lambda df: df.apply( - lambda row: anchor_template.format(url=row["user_url"], text=row["user_login"]), + lambda row: anchor_template.format( + url=row["user_url"], text=row["user_login"] + ), axis=1, ), PRs=lambda df: df.apply( - lambda row: anchor_template.format(url=row["commits"], text=row["PRs"]), + lambda row: anchor_template.format( + url=row["commits"], text=row["PRs"] + ), axis=1, ), ) @@ -197,7 +203,9 @@ def main(): first_commits = raw_commits.sort_values("date").groupby("user_name").head(1) total_contributors_by_month = count_by_month(first_commits, "date") - total_contributors_by_month["count"] = total_contributors_by_month["count"].cumsum() + total_contributors_by_month["count"] = total_contributors_by_month[ + "count" + ].cumsum() total_contributors_path = plots_dir.joinpath("total_contributors.html") make_plot( go.Scatter( @@ -209,9 +217,9 @@ def main(): x_tick_vals=x_tick_vals, x_axis_range=x_axis_range, y_axis_range=get_y_axis_range( - total_contributors_by_month[total_contributors_by_month["date"] >= year_ago][ - "count" - ] + total_contributors_by_month[ + total_contributors_by_month["date"] >= year_ago + ]["count"] ), ).write_html(total_contributors_path, include_plotlyjs="cdn") @@ -306,8 +314,12 @@ def main(): x_tick_vals=x_tick_vals, x_axis_range=x_axis_range, y_axis_range=get_y_axis_range( - opened_issues_by_month[opened_issues_by_month["date"] >= year_ago]["count"], - closed_issues_by_month[closed_issues_by_month["date"] >= year_ago]["count"], + opened_issues_by_month[opened_issues_by_month["date"] >= year_ago][ + "count" + ], + closed_issues_by_month[closed_issues_by_month["date"] >= year_ago][ + "count" + ], ), ).write_html(issues_plot_path, include_plotlyjs="cdn") @@ -319,9 +331,14 @@ def main(): how="outer", indicator=True, ) - opened_pulls = opened_pulls[(opened_pulls._merge == "both")].drop("_merge", axis=1) + opened_pulls = opened_pulls[(opened_pulls._merge == "both")].drop( + "_merge", axis=1 + ) opened_pulls_by_month = count_by_month(opened_pulls, "created_at") - closed_pulls = opened_pulls[opened_pulls["state"] == "closed"] + closed_pulls = opened_pulls[ + opened_pulls["state"] == "closed" | opened_pulls["state"] == "merged" + ] + print(closed_pulls) closed_pulls_by_month = count_by_month(closed_pulls, "closed_at") pulls_maintainers_plot_path = plots_dir.joinpath("pulls_all.html") make_plot( @@ -341,8 +358,12 @@ def main(): x_tick_vals=x_tick_vals, x_axis_range=x_axis_range, y_axis_range=get_y_axis_range( - opened_pulls_by_month[opened_pulls_by_month["date"] >= year_ago]["count"], - closed_pulls_by_month[closed_pulls_by_month["date"] >= year_ago]["count"], + opened_pulls_by_month[opened_pulls_by_month["date"] >= year_ago][ + "count" + ], + closed_pulls_by_month[closed_pulls_by_month["date"] >= year_ago][ + "count" + ], ), ).write_html(pulls_maintainers_plot_path, include_plotlyjs="cdn") @@ -355,11 +376,17 @@ def main(): how="outer", indicator=True, ) - opened_pulls = opened_pulls[(opened_pulls._merge == "left_only")].drop("_merge", axis=1) + opened_pulls = opened_pulls[(opened_pulls._merge == "left_only")].drop( + "_merge", axis=1 + ) opened_pulls_by_month = count_by_month(opened_pulls, "created_at") - closed_pulls = opened_pulls[opened_pulls["state"] == "closed" | opened_pulls["state"] == "merged"] + closed_pulls = opened_pulls[ + opened_pulls["state"] == "closed" | opened_pulls["state"] == "merged" + ] closed_pulls_by_month = count_by_month(closed_pulls, "closed_at") - pulls_non_maintainers_plot_path = plots_dir.joinpath("pulls_non_maintainers.html") + pulls_non_maintainers_plot_path = plots_dir.joinpath( + "pulls_non_maintainers.html" + ) make_plot( go.Scatter( x=opened_pulls_by_month["date"], @@ -377,8 +404,12 @@ def main(): x_tick_vals=x_tick_vals, x_axis_range=x_axis_range, y_axis_range=get_y_axis_range( - opened_pulls_by_month[opened_pulls_by_month["date"] >= year_ago]["count"], - closed_pulls_by_month[closed_pulls_by_month["date"] >= year_ago]["count"], + opened_pulls_by_month[opened_pulls_by_month["date"] >= year_ago][ + "count" + ], + closed_pulls_by_month[closed_pulls_by_month["date"] >= year_ago][ + "count" + ], ), ).write_html(pulls_non_maintainers_plot_path, include_plotlyjs="cdn") @@ -441,7 +472,9 @@ def main(): iframes = [] for plot in plots: iframes.append(iframe_html_template.format(src=plot.relative_to(dist_dir))) - plots_html += '
{plots}
'.format(plots="".join(iframes)) + plots_html += '
{plots}
'.format( + plots="".join(iframes) + ) logo = Path("assets", "MLflow-logo-final-black.png") favicon = Path("assets", "icon.svg") From 1f37a606c1ccc397b3f6df9045e1a16e2727e6ad Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:14:15 +0900 Subject: [PATCH 07/17] test Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/build.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/build.py b/src/build.py index fa7374e2..d3293ec0 100644 --- a/src/build.py +++ b/src/build.py @@ -297,6 +297,10 @@ def main(): closed_issues = opened_issues[opened_issues["state"] == "closed"] closed_issues_by_month = count_by_month(closed_issues, "closed_at") issues_plot_path = plots_dir.joinpath("issues.html") + print( + opened_issues_by_month[opened_issues_by_month["date"] >= year_ago]["count"], + closed_issues_by_month[closed_issues_by_month["date"] >= year_ago]["count"], + ) make_plot( go.Scatter( x=opened_issues_by_month["date"], From e04fdab40e835d8565de4e0f5d45e850a2f5504f Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:17:07 +0900 Subject: [PATCH 08/17] chain Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/build.py | 4 ---- src/dump.py | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/build.py b/src/build.py index d3293ec0..fa7374e2 100644 --- a/src/build.py +++ b/src/build.py @@ -297,10 +297,6 @@ def main(): closed_issues = opened_issues[opened_issues["state"] == "closed"] closed_issues_by_month = count_by_month(closed_issues, "closed_at") issues_plot_path = plots_dir.joinpath("issues.html") - print( - opened_issues_by_month[opened_issues_by_month["date"] >= year_ago]["count"], - closed_issues_by_month[closed_issues_by_month["date"] >= year_ago]["count"], - ) make_plot( go.Scatter( x=opened_issues_by_month["date"], diff --git a/src/dump.py b/src/dump.py index abed4d56..33940a2e 100644 --- a/src/dump.py +++ b/src/dump.py @@ -1,5 +1,6 @@ import logging import sqlite3 +import itertools from datetime import datetime from pathlib import Path from pprint import pprint @@ -75,8 +76,7 @@ def main(): logger.info("Collecting issues") issues = g.get_issues_graphql(*repo) pulls = g.get_pulls_graphql(*repo) - session.add_all(M.Issue.from_gh_objects(issues)) - session.add_all(M.Issue.from_gh_objects(pulls)) + session.add_all(M.Issue.from_gh_objects(itertools.chain(issues, pulls))) logger.info("Collecting discussions") discussions = g.get_discussions(*repo) From 5c0dadfac45c4f47bf70e9655a827c501990e6ca Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:23:41 +0900 Subject: [PATCH 09/17] isin --- src/build.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/build.py b/src/build.py index fa7374e2..573c50bf 100644 --- a/src/build.py +++ b/src/build.py @@ -335,9 +335,7 @@ def main(): "_merge", axis=1 ) opened_pulls_by_month = count_by_month(opened_pulls, "created_at") - closed_pulls = opened_pulls[ - opened_pulls["state"] == "closed" | opened_pulls["state"] == "merged" - ] + closed_pulls = opened_pulls[opened_pulls["state"].isin(["closed", "merged"])] print(closed_pulls) closed_pulls_by_month = count_by_month(closed_pulls, "closed_at") pulls_maintainers_plot_path = plots_dir.joinpath("pulls_all.html") @@ -380,9 +378,7 @@ def main(): "_merge", axis=1 ) opened_pulls_by_month = count_by_month(opened_pulls, "created_at") - closed_pulls = opened_pulls[ - opened_pulls["state"] == "closed" | opened_pulls["state"] == "merged" - ] + closed_pulls = opened_pulls[opened_pulls["state"].isin(["closed", "merged"])] closed_pulls_by_month = count_by_month(closed_pulls, "closed_at") pulls_non_maintainers_plot_path = plots_dir.joinpath( "pulls_non_maintainers.html" From 72a1c06eed8f06193b2d83a8a001682ac03e0f86 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:25:57 +0900 Subject: [PATCH 10/17] test Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- .github/workflows/build.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 16ffc59d..0fc70637 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -32,6 +32,9 @@ jobs: python src/build.py env: GITHUB_TOKEN: ${{ secrets.HARUPY_GITHUB_TOKEN }} + - uses: actions/upload-artifact@v3 + with: + path: dist - name: Deploy 🚀 uses: JamesIves/github-pages-deploy-action@v4 if: github.event_name != 'pull_request' From 81a5b92c9c320dbe25a82289d73e12e60000cfb1 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:27:12 +0900 Subject: [PATCH 11/17] v2 Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0fc70637..6ecfbbb4 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -32,7 +32,7 @@ jobs: python src/build.py env: GITHUB_TOKEN: ${{ secrets.HARUPY_GITHUB_TOKEN }} - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: dist - name: Deploy 🚀 From ddcb3a51a9b7b9e5585e9df4973f4d0ac0d23815 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:35:22 +0900 Subject: [PATCH 12/17] fix Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/models.py b/src/models.py index 7eb3c716..ca372908 100644 --- a/src/models.py +++ b/src/models.py @@ -134,7 +134,6 @@ class Issue(BaseModel): @classmethod def from_gh_object(cls, issue): - closed_at = issue.get("closed_at") return cls( id=issue["id"], user_id=issue["user"]["id"], @@ -142,7 +141,7 @@ def from_gh_object(cls, issue): title=issue["title"], body=issue["body"], state=issue["state"], - closed_at=closed_at and parse_datetime(closed_at), + closed_at=(ca := issue.get("closedAt")) and parse_datetime(ca), created_at=parse_datetime(issue["createdAt"]), updated_at=parse_datetime(issue["updatedAt"]), html_url=issue["url"], From 1477ddcaa49972b4041e6c156216841ff826e8c6 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 14:42:56 +0900 Subject: [PATCH 13/17] a Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/build.py b/src/build.py index 573c50bf..047beb6f 100644 --- a/src/build.py +++ b/src/build.py @@ -336,7 +336,7 @@ def main(): ) opened_pulls_by_month = count_by_month(opened_pulls, "created_at") closed_pulls = opened_pulls[opened_pulls["state"].isin(["closed", "merged"])] - print(closed_pulls) + print(opened_pulls, closed_pulls) closed_pulls_by_month = count_by_month(closed_pulls, "closed_at") pulls_maintainers_plot_path = plots_dir.joinpath("pulls_all.html") make_plot( From 2cab1705bab94648bb7407d266aba3d510949929 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 15:33:48 +0900 Subject: [PATCH 14/17] node_id --- src/dump.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dump.py b/src/dump.py index 33940a2e..0f582995 100644 --- a/src/dump.py +++ b/src/dump.py @@ -62,11 +62,11 @@ def main(): logger.info("Collecting mlflow org members") mlflow_org_members = set( - HashableDict(id=m["id"], login=m["login"]) + HashableDict(id=m["node_id"], login=m["login"]) for m in g.get_organization_members("mlflow") ) collaborators = set( - HashableDict(id=c["id"], login=c["login"]) + HashableDict(id=c["node_id"], login=c["login"]) for c in g.get_collaborators(*repo) ) session.add_all( From 1146ea8cc3310e13e9f943c5c07939aa61524950 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 15:34:16 +0900 Subject: [PATCH 15/17] test Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models.py b/src/models.py index ca372908..c1868e94 100644 --- a/src/models.py +++ b/src/models.py @@ -54,7 +54,7 @@ def from_gh_object(cls, user): class MlflowOrgMember(BaseModel): __tablename__ = "mlflow_org_members" - id = Column(Integer, primary_key=True) + id = Column(String, primary_key=True) login = Column(String, unique=True) @classmethod From cd8da7ae2b2520e3a5a4adcd9d4d100c387964f7 Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 15:40:10 +0900 Subject: [PATCH 16/17] fix Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/models.py b/src/models.py index c1868e94..a22ef32b 100644 --- a/src/models.py +++ b/src/models.py @@ -80,7 +80,7 @@ class Commit(BaseModel): id = Column(String(40), primary_key=True) html_url = Column(String) url = Column(String) - user_id = Column(Integer, ForeignKey("users.id"), nullable=True) + user_id = Column(String, ForeignKey("users.id"), nullable=True) user_name = Column(String, nullable=True) user_login = Column(String, nullable=True) user_email = Column(String, nullable=True) @@ -92,7 +92,7 @@ def from_gh_object(cls, commit): id=commit["sha"], url=commit["url"], html_url=commit["html_url"], - user_id=(commit.get("author") or {}).get("id", 0), + user_id=(commit.get("author") or {}).get("node_id", 0), user_name=(commit["commit"].get("author") or {}).get("name", ""), user_login=(commit.get("author") or {}).get("login", ""), user_email=(commit["commit"].get("author") or {}).get("email", ""), @@ -103,7 +103,7 @@ def from_gh_object(cls, commit): class Stargazer(BaseModel): __tablename__ = "stargazers" - id = Column(Integer, primary_key=True) + id = Column(String, primary_key=True) starred_at = Column(DateTime) user_id = Column(Integer, ForeignKey("users.id")) @@ -113,7 +113,7 @@ def from_gh_object(cls, stargazer): return return cls( starred_at=parse_datetime(stargazer["starred_at"]), - user_id=stargazer["user"]["id"], + user_id=stargazer["user"]["node_id"], ) From 0e535a11f2c4082f94596833d11d7de61cabd5af Mon Sep 17 00:00:00 2001 From: harupy <17039389+harupy@users.noreply.github.com> Date: Fri, 2 May 2025 15:40:36 +0900 Subject: [PATCH 17/17] a Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- src/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models.py b/src/models.py index a22ef32b..73478a60 100644 --- a/src/models.py +++ b/src/models.py @@ -120,7 +120,7 @@ def from_gh_object(cls, stargazer): class Issue(BaseModel): __tablename__ = "issues" - id = Column(Integer, primary_key=True) + id = Column(String, primary_key=True) user_id = Column(Integer, primary_key=True) number = Column(Integer) title = Column(String)