Skip to content

Use GraphQL API to avoid pagination limit #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
push:
branches:
- master
pull_request:
workflow_dispatch:

permissions:
Expand All @@ -31,8 +32,12 @@ jobs:
python src/build.py
env:
GITHUB_TOKEN: ${{ secrets.HARUPY_GITHUB_TOKEN }}
- uses: actions/upload-artifact@v4
with:
path: dist
- name: Deploy 🚀
uses: JamesIves/github-pages-deploy-action@v4
if: github.event_name != 'pull_request'
with:
branch: gh-pages
folder: dist
75 changes: 52 additions & 23 deletions src/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ def count_by_month(df, datetime_col):
.pipe(
lambda df_: (
df_.set_index(
df_.index.map(lambda year_month: datetime(year_month[0], year_month[1], 1))
df_.index.map(
lambda year_month: datetime(year_month[0], year_month[1], 1)
)
)
)
)
Expand Down Expand Up @@ -136,13 +138,13 @@ def main():
x_tick_vals=x_tick_vals,
x_axis_range=x_axis_range,
y_axis_range=get_y_axis_range(
contributors_by_month[contributors_by_month["date"] >= year_ago]["count"]
contributors_by_month[contributors_by_month["date"] >= year_ago][
"count"
]
),
).write_html(contributors_plot_path, include_plotlyjs="cdn")

commits_url_template = (
"https://github.com/mlflow/mlflow/commits?author={author}&since={since}&until={until}"
)
commits_url_template = "https://github.com/mlflow/mlflow/commits?author={author}&since={since}&until={until}"
anchor_template = '<a href="{url}">{text}</a>'
six_month_ago = now - relativedelta(months=6)
active_contributors = (
Expand All @@ -167,11 +169,15 @@ def main():
)
.assign(
user=lambda df: df.apply(
lambda row: anchor_template.format(url=row["user_url"], text=row["user_login"]),
lambda row: anchor_template.format(
url=row["user_url"], text=row["user_login"]
),
axis=1,
),
PRs=lambda df: df.apply(
lambda row: anchor_template.format(url=row["commits"], text=row["PRs"]),
lambda row: anchor_template.format(
url=row["commits"], text=row["PRs"]
),
axis=1,
),
)
Expand All @@ -197,7 +203,9 @@ def main():

first_commits = raw_commits.sort_values("date").groupby("user_name").head(1)
total_contributors_by_month = count_by_month(first_commits, "date")
total_contributors_by_month["count"] = total_contributors_by_month["count"].cumsum()
total_contributors_by_month["count"] = total_contributors_by_month[
"count"
].cumsum()
total_contributors_path = plots_dir.joinpath("total_contributors.html")
make_plot(
go.Scatter(
Expand All @@ -209,9 +217,9 @@ def main():
x_tick_vals=x_tick_vals,
x_axis_range=x_axis_range,
y_axis_range=get_y_axis_range(
total_contributors_by_month[total_contributors_by_month["date"] >= year_ago][
"count"
]
total_contributors_by_month[
total_contributors_by_month["date"] >= year_ago
]["count"]
),
).write_html(total_contributors_path, include_plotlyjs="cdn")

Expand Down Expand Up @@ -306,8 +314,12 @@ def main():
x_tick_vals=x_tick_vals,
x_axis_range=x_axis_range,
y_axis_range=get_y_axis_range(
opened_issues_by_month[opened_issues_by_month["date"] >= year_ago]["count"],
closed_issues_by_month[closed_issues_by_month["date"] >= year_ago]["count"],
opened_issues_by_month[opened_issues_by_month["date"] >= year_ago][
"count"
],
closed_issues_by_month[closed_issues_by_month["date"] >= year_ago][
"count"
],
),
).write_html(issues_plot_path, include_plotlyjs="cdn")

Expand All @@ -319,9 +331,12 @@ def main():
how="outer",
indicator=True,
)
opened_pulls = opened_pulls[(opened_pulls._merge == "both")].drop("_merge", axis=1)
opened_pulls = opened_pulls[(opened_pulls._merge == "both")].drop(
"_merge", axis=1
)
opened_pulls_by_month = count_by_month(opened_pulls, "created_at")
closed_pulls = opened_pulls[opened_pulls["state"] == "closed"]
closed_pulls = opened_pulls[opened_pulls["state"].isin(["closed", "merged"])]
print(opened_pulls, closed_pulls)
closed_pulls_by_month = count_by_month(closed_pulls, "closed_at")
pulls_maintainers_plot_path = plots_dir.joinpath("pulls_all.html")
make_plot(
Expand All @@ -341,8 +356,12 @@ def main():
x_tick_vals=x_tick_vals,
x_axis_range=x_axis_range,
y_axis_range=get_y_axis_range(
opened_pulls_by_month[opened_pulls_by_month["date"] >= year_ago]["count"],
closed_pulls_by_month[closed_pulls_by_month["date"] >= year_ago]["count"],
opened_pulls_by_month[opened_pulls_by_month["date"] >= year_ago][
"count"
],
closed_pulls_by_month[closed_pulls_by_month["date"] >= year_ago][
"count"
],
),
).write_html(pulls_maintainers_plot_path, include_plotlyjs="cdn")

Expand All @@ -355,11 +374,15 @@ def main():
how="outer",
indicator=True,
)
opened_pulls = opened_pulls[(opened_pulls._merge == "left_only")].drop("_merge", axis=1)
opened_pulls = opened_pulls[(opened_pulls._merge == "left_only")].drop(
"_merge", axis=1
)
opened_pulls_by_month = count_by_month(opened_pulls, "created_at")
closed_pulls = opened_pulls[opened_pulls["state"] == "closed"]
closed_pulls = opened_pulls[opened_pulls["state"].isin(["closed", "merged"])]
closed_pulls_by_month = count_by_month(closed_pulls, "closed_at")
pulls_non_maintainers_plot_path = plots_dir.joinpath("pulls_non_maintainers.html")
pulls_non_maintainers_plot_path = plots_dir.joinpath(
"pulls_non_maintainers.html"
)
make_plot(
go.Scatter(
x=opened_pulls_by_month["date"],
Expand All @@ -377,8 +400,12 @@ def main():
x_tick_vals=x_tick_vals,
x_axis_range=x_axis_range,
y_axis_range=get_y_axis_range(
opened_pulls_by_month[opened_pulls_by_month["date"] >= year_ago]["count"],
closed_pulls_by_month[closed_pulls_by_month["date"] >= year_ago]["count"],
opened_pulls_by_month[opened_pulls_by_month["date"] >= year_ago][
"count"
],
closed_pulls_by_month[closed_pulls_by_month["date"] >= year_ago][
"count"
],
),
).write_html(pulls_non_maintainers_plot_path, include_plotlyjs="cdn")

Expand Down Expand Up @@ -441,7 +468,9 @@ def main():
iframes = []
for plot in plots:
iframes.append(iframe_html_template.format(src=plot.relative_to(dist_dir)))
plots_html += '<div style="display: flex">{plots}</div>'.format(plots="".join(iframes))
plots_html += '<div style="display: flex">{plots}</div>'.format(
plots="".join(iframes)
)

logo = Path("assets", "MLflow-logo-final-black.png")
favicon = Path("assets", "icon.svg")
Expand Down
193 changes: 192 additions & 1 deletion src/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def get_paginate(self, end_point, params=None):
while True:
logger.info(f"{end_point} {page}")
res = self.get(
end_point, params={**(params or {}), "page": page, "per_page": self.per_page}
end_point,
params={**(params or {}), "page": page, "per_page": self.per_page},
)
yield from res
if len(res) < self.per_page:
Expand Down Expand Up @@ -135,3 +136,193 @@ def get_discussions(self, owner, repo):
after = page_info["endCursor"]
if not page_info["hasNextPage"]:
break

def get_issues_graphql(self, owner, repo):
query = """
query {
repository(owner: "%s", name: "%s") {
issues(first: %d, states: [OPEN, CLOSED], orderBy: {field: CREATED_AT, direction: ASC}) {
totalCount
pageInfo {
endCursor
hasNextPage
}
nodes {
id
number
title
body
state
closedAt
createdAt
updatedAt
url
author {
login
... on User { id }
}
}
}
}
}
""" % (
owner,
repo,
self.per_page,
# state,
)

query_with_cursor = """
query {
repository(owner: "%s", name: "%s") {
issues(first: %d, states: [OPEN, CLOSED], after: "AFTER", orderBy: {field: CREATED_AT, direction: ASC}) {
totalCount
pageInfo {
endCursor
hasNextPage
}
nodes {
id
number
title
body
state
closedAt
createdAt
updatedAt
url
author {
login
... on User { id }
}
}
}
}
}
""" % (
owner,
repo,
self.per_page,
)
after = None
page = 0
while True:
page += 1
logger.info(f"Issues page {page}")
q = query if after is None else query_with_cursor.replace("AFTER", after)
data = self.run_graphql_query(q)
issues = data["data"]["repository"]["issues"]
for node in issues["nodes"]:
# Normalize author and pullRequest for compatibility with models.py
if node["author"] and "id" in node["author"]:
node["user"] = {
"id": node["author"]["id"],
"login": node["author"]["login"],
}
else:
node["user"] = {
"id": 0,
"login": node["author"]["login"] if node["author"] else None,
}
node["pullRequest"] = False
node["state"] = node["state"].lower()
yield node
page_info = issues["pageInfo"]
after = page_info["endCursor"]
if not page_info["hasNextPage"]:
break

def get_pulls_graphql(self, owner, repo):
query = """
query {
repository(owner: "%s", name: "%s") {
pullRequests(first: %d, states: [OPEN, CLOSED, MERGED], orderBy: {field: CREATED_AT, direction: ASC}) {
totalCount
pageInfo {
endCursor
hasNextPage
}
nodes {
id
number
title
body
state
closedAt
createdAt
updatedAt
url
author {
login
... on User { id }
}
}
}
}
}
""" % (
owner,
repo,
self.per_page,
# state,
)

query_with_cursor = """
query {
repository(owner: "%s", name: "%s") {
pullRequests(first: %d, states: [OPEN, CLOSED, MERGED], after: "AFTER", orderBy: {field: CREATED_AT, direction: ASC}) {
totalCount
pageInfo {
endCursor
hasNextPage
}
nodes {
id
number
title
body
state
closedAt
createdAt
updatedAt
url
author {
login
... on User { id }
}
}
}
}
}
""" % (
owner,
repo,
self.per_page,
)
after = None
page = 0
while True:
page += 1
logger.info(f"Pulls page {page}")
q = query if after is None else query_with_cursor.replace("AFTER", after)
data = self.run_graphql_query(q)
pulls = data["data"]["repository"]["pullRequests"]
for node in pulls["nodes"]:
# Normalize author and pullRequest for compatibility with models.py
if node["author"] and "id" in node["author"]:
node["user"] = {
"id": node["author"]["id"],
"login": node["author"]["login"],
}
else:
node["user"] = {
"id": 0,
"login": node["author"]["login"] if node["author"] else None,
}
node["pullRequest"] = True
node["state"] = node["state"].lower()
yield node
page_info = pulls["pageInfo"]
after = page_info["endCursor"]
if not page_info["hasNextPage"]:
break
Loading
Loading