From 9530defba87decf43a7a2f418dad1ec623f5edc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Ram=C3=ADrez?= Date: Mon, 30 Jan 2023 16:15:56 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Compute=20FastAPI=20Experts=20inclu?= =?UTF-8?q?ding=20GitHub=20Discussions=20(#5941)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/actions/people/app/main.py | 206 +++++++++++++++++++++++++++-- 1 file changed, 192 insertions(+), 14 deletions(-) diff --git a/.github/actions/people/app/main.py b/.github/actions/people/app/main.py index 31756a5fc..05cbc71e5 100644 --- a/.github/actions/people/app/main.py +++ b/.github/actions/people/app/main.py @@ -4,7 +4,7 @@ import sys from collections import Counter, defaultdict from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Container, DefaultDict, Dict, List, Set, Union +from typing import Any, Container, DefaultDict, Dict, List, Set, Union import httpx import yaml @@ -12,6 +12,50 @@ from github import Github from pydantic import BaseModel, BaseSettings, SecretStr github_graphql_url = "https://api.github.com/graphql" +questions_category_id = "MDE4OkRpc2N1c3Npb25DYXRlZ29yeTMyMDAxNDM0" + +discussions_query = """ +query Q($after: String, $category_id: ID) { + repository(name: "fastapi", owner: "tiangolo") { + discussions(first: 100, after: $after, categoryId: $category_id) { + edges { + cursor + node { + number + author { + login + avatarUrl + url + } + title + createdAt + comments(first: 100) { + nodes { + createdAt + author { + login + avatarUrl + url + } + isAnswer + replies(first: 10) { + nodes { + createdAt + author { + login + avatarUrl + url + } + } + } + } + } + } + } + } + } +} +""" issues_query = """ query Q($after: String) { @@ -131,15 +175,30 @@ class Author(BaseModel): url: str +# Issues and Discussions + + class CommentsNode(BaseModel): createdAt: datetime author: Union[Author, None] = None +class Replies(BaseModel): + nodes: List[CommentsNode] + + +class DiscussionsCommentsNode(CommentsNode): + replies: Replies + + class Comments(BaseModel): nodes: List[CommentsNode] +class DiscussionsComments(BaseModel): + nodes: List[DiscussionsCommentsNode] + + class IssuesNode(BaseModel): number: int author: Union[Author, None] = None @@ -149,27 +208,59 @@ class IssuesNode(BaseModel): comments: Comments +class DiscussionsNode(BaseModel): + number: int + author: Union[Author, None] = None + title: str + createdAt: datetime + comments: DiscussionsComments + + class IssuesEdge(BaseModel): cursor: str node: IssuesNode +class DiscussionsEdge(BaseModel): + cursor: str + node: DiscussionsNode + + class Issues(BaseModel): edges: List[IssuesEdge] +class Discussions(BaseModel): + edges: List[DiscussionsEdge] + + class IssuesRepository(BaseModel): issues: Issues +class DiscussionsRepository(BaseModel): + discussions: Discussions + + class IssuesResponseData(BaseModel): repository: IssuesRepository +class DiscussionsResponseData(BaseModel): + repository: DiscussionsRepository + + class IssuesResponse(BaseModel): data: IssuesResponseData +class DiscussionsResponse(BaseModel): + data: DiscussionsResponseData + + +# PRs + + class LabelNode(BaseModel): name: str @@ -219,6 +310,9 @@ class PRsResponse(BaseModel): data: PRsResponseData +# Sponsors + + class SponsorEntity(BaseModel): login: str avatarUrl: str @@ -264,10 +358,16 @@ class Settings(BaseSettings): def get_graphql_response( - *, settings: Settings, query: str, after: Union[str, None] = None -): + *, + settings: Settings, + query: str, + after: Union[str, None] = None, + category_id: Union[str, None] = None, +) -> Dict[str, Any]: headers = {"Authorization": f"token {settings.input_token.get_secret_value()}"} - variables = {"after": after} + # category_id is only used by one query, but GraphQL allows unused variables, so + # keep it here for simplicity + variables = {"after": after, "category_id": category_id} response = httpx.post( github_graphql_url, headers=headers, @@ -275,7 +375,9 @@ def get_graphql_response( json={"query": query, "variables": variables, "operationName": "Q"}, ) if response.status_code != 200: - logging.error(f"Response was not 200, after: {after}") + logging.error( + f"Response was not 200, after: {after}, category_id: {category_id}" + ) logging.error(response.text) raise RuntimeError(response.text) data = response.json() @@ -288,6 +390,21 @@ def get_graphql_issue_edges(*, settings: Settings, after: Union[str, None] = Non return graphql_response.data.repository.issues.edges +def get_graphql_question_discussion_edges( + *, + settings: Settings, + after: Union[str, None] = None, +): + data = get_graphql_response( + settings=settings, + query=discussions_query, + after=after, + category_id=questions_category_id, + ) + graphql_response = DiscussionsResponse.parse_obj(data) + return graphql_response.data.repository.discussions.edges + + def get_graphql_pr_edges(*, settings: Settings, after: Union[str, None] = None): data = get_graphql_response(settings=settings, query=prs_query, after=after) graphql_response = PRsResponse.parse_obj(data) @@ -300,7 +417,7 @@ def get_graphql_sponsor_edges(*, settings: Settings, after: Union[str, None] = N return graphql_response.data.user.sponsorshipsAsMaintainer.edges -def get_experts(settings: Settings): +def get_issues_experts(settings: Settings): issue_nodes: List[IssuesNode] = [] issue_edges = get_graphql_issue_edges(settings=settings) @@ -326,13 +443,74 @@ def get_experts(settings: Settings): for comment in issue.comments.nodes: if comment.author: authors[comment.author.login] = comment.author - if comment.author.login == issue_author_name: - continue - issue_commentors.add(comment.author.login) + if comment.author.login != issue_author_name: + issue_commentors.add(comment.author.login) for author_name in issue_commentors: commentors[author_name] += 1 if issue.createdAt > one_month_ago: last_month_commentors[author_name] += 1 + + return commentors, last_month_commentors, authors + + +def get_discussions_experts(settings: Settings): + discussion_nodes: List[DiscussionsNode] = [] + discussion_edges = get_graphql_question_discussion_edges(settings=settings) + + while discussion_edges: + for discussion_edge in discussion_edges: + discussion_nodes.append(discussion_edge.node) + last_edge = discussion_edges[-1] + discussion_edges = get_graphql_question_discussion_edges( + settings=settings, after=last_edge.cursor + ) + + commentors = Counter() + last_month_commentors = Counter() + authors: Dict[str, Author] = {} + + now = datetime.now(tz=timezone.utc) + one_month_ago = now - timedelta(days=30) + + for discussion in discussion_nodes: + discussion_author_name = None + if discussion.author: + authors[discussion.author.login] = discussion.author + discussion_author_name = discussion.author.login + discussion_commentors = set() + for comment in discussion.comments.nodes: + if comment.author: + authors[comment.author.login] = comment.author + if comment.author.login != discussion_author_name: + discussion_commentors.add(comment.author.login) + for reply in comment.replies.nodes: + if reply.author: + authors[reply.author.login] = reply.author + if reply.author.login != discussion_author_name: + discussion_commentors.add(reply.author.login) + for author_name in discussion_commentors: + commentors[author_name] += 1 + if discussion.createdAt > one_month_ago: + last_month_commentors[author_name] += 1 + return commentors, last_month_commentors, authors + + +def get_experts(settings: Settings): + ( + issues_commentors, + issues_last_month_commentors, + issues_authors, + ) = get_issues_experts(settings=settings) + ( + discussions_commentors, + discussions_last_month_commentors, + discussions_authors, + ) = get_discussions_experts(settings=settings) + commentors = issues_commentors + discussions_commentors + last_month_commentors = ( + issues_last_month_commentors + discussions_last_month_commentors + ) + authors = {**issues_authors, **discussions_authors} return commentors, last_month_commentors, authors @@ -425,13 +603,13 @@ if __name__ == "__main__": logging.info(f"Using config: {settings.json()}") g = Github(settings.input_standard_token.get_secret_value()) repo = g.get_repo(settings.github_repository) - issue_commentors, issue_last_month_commentors, issue_authors = get_experts( + question_commentors, question_last_month_commentors, question_authors = get_experts( settings=settings ) contributors, pr_commentors, reviewers, pr_authors = get_contributors( settings=settings ) - authors = {**issue_authors, **pr_authors} + authors = {**question_authors, **pr_authors} maintainers_logins = {"tiangolo"} bot_names = {"codecov", "github-actions", "pre-commit-ci", "dependabot"} maintainers = [] @@ -440,7 +618,7 @@ if __name__ == "__main__": maintainers.append( { "login": login, - "answers": issue_commentors[login], + "answers": question_commentors[login], "prs": contributors[login], "avatarUrl": user.avatarUrl, "url": user.url, @@ -453,13 +631,13 @@ if __name__ == "__main__": min_count_reviewer = 4 skip_users = maintainers_logins | bot_names experts = get_top_users( - counter=issue_commentors, + counter=question_commentors, min_count=min_count_expert, authors=authors, skip_users=skip_users, ) last_month_active = get_top_users( - counter=issue_last_month_commentors, + counter=question_last_month_commentors, min_count=min_count_last_month, authors=authors, skip_users=skip_users,