Skip to content

Add script to fetch PR review comments #1722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
da2efa9
feat: Add script to fetch PR review comments
google-labs-jules[bot] Jun 7, 2025
0678049
feat: Enhance PR comment script with context and filters
google-labs-jules[bot] Jun 7, 2025
e84b02d
fix: Correct IndentationError in get_pr_review_comments.py
google-labs-jules[bot] Jun 7, 2025
5948b96
fix: Correct --context-lines behavior for non-line-specific comments
google-labs-jules[bot] Jun 7, 2025
565eed2
feat: Simplify diff hunk display and add comment filters
google-labs-jules[bot] Jun 7, 2025
24a03ea
refactor: Update script description and format diff hunks
google-labs-jules[bot] Jun 7, 2025
7e182aa
fix: Adjust 'next command' timestamp increment to 2 seconds
google-labs-jules[bot] Jun 7, 2025
599845b
docs: Minor textual cleanups in PR comments script
google-labs-jules[bot] Jun 7, 2025
77d1ed2
feat: Format output as Markdown for improved readability
google-labs-jules[bot] Jun 7, 2025
9cb8d42
style: Adjust Markdown headings for structure and conciseness
google-labs-jules[bot] Jun 7, 2025
203e88f
style: Adjust default context lines and Markdown spacing
google-labs-jules[bot] Jun 7, 2025
b900c7f
feat: Refactor comment filtering with new status terms and flags
google-labs-jules[bot] Jun 7, 2025
5a4010f
feat: Improve context display and suggested command robustness
google-labs-jules[bot] Jun 7, 2025
94417e7
style: Refactor hunk printing to use join for conciseness
google-labs-jules[bot] Jun 7, 2025
9312a0c
fix: Align 'since' filter and next command with observed API behavior…
google-labs-jules[bot] Jun 7, 2025
07d06bb
style: Condense printing of trailing hunk lines
google-labs-jules[bot] Jun 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions scripts/gha/firebase_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,49 @@ def get_reviews(token, pull_number):
return results


def get_pull_request_review_comments(token, pull_number, since=None): # Added since=None
"""https://docs.github.com/en/rest/pulls/comments#list-review-comments-on-a-pull-request"""
url = f'{GITHUB_API_URL}/pulls/{pull_number}/comments'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}

page = 1
per_page = 100
results = []

# Base parameters for the API request
base_params = {'per_page': per_page}
if since:
base_params['since'] = since

while True: # Loop indefinitely until explicitly broken
current_page_params = base_params.copy()
current_page_params['page'] = page

try:
with requests_retry_session().get(url, headers=headers, params=current_page_params,
stream=True, timeout=TIMEOUT) as response:
response.raise_for_status()
# Log which page and if 'since' was used for clarity
logging.info("get_pull_request_review_comments: %s params %s response: %s", url, current_page_params, response)

current_page_results = response.json()
if not current_page_results: # No more results on this page
break # Exit loop, no more comments to fetch

results.extend(current_page_results)

# If fewer results than per_page were returned, it's the last page
if len(current_page_results) < per_page:
break # Exit loop, this was the last page

page += 1 # Increment page for the next iteration

except requests.exceptions.RequestException as e:
logging.error(f"Error fetching review comments (page {page}, params: {current_page_params}) for PR {pull_number}: {e}")
break # Stop trying if there's an error
return results


def create_workflow_dispatch(token, workflow_id, ref, inputs):
"""https://docs.github.com/en/rest/reference/actions#create-a-workflow-dispatch-event"""
url = f'{GITHUB_API_URL}/actions/workflows/{workflow_id}/dispatches'
Expand Down
244 changes: 244 additions & 0 deletions scripts/gha/get_pr_review_comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
#!/usr/bin/env python3
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Fetches and formats review comments from a GitHub Pull Request."""

import argparse
import os
import sys
import firebase_github
import datetime
from datetime import timezone, timedelta


# Attempt to configure logging for firebase_github if absl is available
try:
from absl import logging as absl_logging
# Set verbosity for absl logging if you want to see logs from firebase_github
# absl_logging.set_verbosity(absl_logging.INFO)
except ImportError:
pass # firebase_github.py uses absl.logging.info, so this won't redirect.

def main():
STATUS_IRRELEVANT = "[IRRELEVANT]"
STATUS_OLD = "[OLD]"
STATUS_CURRENT = "[CURRENT]"

default_owner = firebase_github.OWNER
default_repo = firebase_github.REPO

parser = argparse.ArgumentParser(
description="Fetch review comments from a GitHub PR and format into simple text output.",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"--pull_number",
type=int,
required=True,
help="Pull request number."
)
parser.add_argument(
"--owner",
type=str,
default=default_owner,
help=f"Repository owner. Defaults to '{default_owner}'."
)
parser.add_argument(
"--repo",
type=str,
default=default_repo,
help=f"Repository name. Defaults to '{default_repo}'."
)
parser.add_argument(
"--token",
type=str,
default=os.environ.get("GITHUB_TOKEN"),
help="GitHub token. Can also be set via GITHUB_TOKEN env var."
)
parser.add_argument(
"--context-lines",
type=int,
default=10,
help="Number of context lines from the diff hunk. 0 for full hunk. If > 0, shows header (if any) and last N lines of the remaining hunk. Default: 10."
)
parser.add_argument(
"--since",
type=str,
default=None,
help="Only show comments updated at or after this ISO 8601 timestamp (e.g., YYYY-MM-DDTHH:MM:SSZ)."
)
parser.add_argument(
"--exclude-old",
action="store_true",
default=False,
help="Exclude comments marked [OLD] (where line number has changed due to code updates but position is still valid)."
)
parser.add_argument(
"--include-irrelevant",
action="store_true",
default=False,
help="Include comments marked [IRRELEVANT] (where GitHub can no longer anchor the comment to the diff, i.e., position is null)."
)

args = parser.parse_args()

if not args.token:
sys.stderr.write("Error: GitHub token not provided. Set GITHUB_TOKEN or use --token.\n")
sys.exit(1)

if args.owner != firebase_github.OWNER or args.repo != firebase_github.REPO:
repo_url = f"https://github.com/{args.owner}/{args.repo}"
if not firebase_github.set_repo_url(repo_url):
sys.stderr.write(f"Error: Invalid repo URL: {args.owner}/{args.repo}. Expected https://github.com/owner/repo\n")
sys.exit(1)
sys.stderr.write(f"Targeting repository: {firebase_github.OWNER}/{firebase_github.REPO}\n")

sys.stderr.write(f"Fetching comments for PR #{args.pull_number} from {firebase_github.OWNER}/{firebase_github.REPO}...\n")
if args.since:
sys.stderr.write(f"Filtering comments updated since: {args.since}\n")
# Removed skip_outdated message block


comments = firebase_github.get_pull_request_review_comments(
args.token,
args.pull_number,
since=args.since
)

if not comments:
sys.stderr.write(f"No review comments found for PR #{args.pull_number} (or matching filters), or an error occurred.\n")
return

latest_activity_timestamp_obj = None
processed_comments_count = 0
print("# Review Comments\n\n")
for comment in comments:
# This replaces the previous status/skip logic for each comment
created_at_str = comment.get("created_at")

current_pos = comment.get("position")
current_line = comment.get("line")
original_line = comment.get("original_line")

status_text = ""
line_to_display = None
# is_effectively_outdated is no longer needed with the new distinct flags

if current_pos is None:
status_text = STATUS_IRRELEVANT
line_to_display = original_line
elif original_line is not None and current_line != original_line:
status_text = STATUS_OLD
line_to_display = current_line
else:
status_text = STATUS_CURRENT
line_to_display = current_line

if line_to_display is None:
line_to_display = "N/A"

if status_text == STATUS_IRRELEVANT and not args.include_irrelevant:
continue
if status_text == STATUS_OLD and args.exclude_old:
continue

# Update latest activity timestamp (only for comments that will be printed)
# This will be based on updated_at for suggesting the next --since value.
# created_at_str is still used for display.
updated_at_str = comment.get("updated_at")
if updated_at_str: # Check if updated_at_str is not None and not empty
try:
if sys.version_info < (3, 11):
dt_str_updated = updated_at_str.replace("Z", "+00:00")
else:
dt_str_updated = updated_at_str
current_comment_activity_dt = datetime.datetime.fromisoformat(dt_str_updated)
if latest_activity_timestamp_obj is None or current_comment_activity_dt > latest_activity_timestamp_obj:
latest_activity_timestamp_obj = current_comment_activity_dt
except ValueError:
sys.stderr.write(f"Warning: Could not parse updated_at timestamp: {updated_at_str}\n")

# Get other comment details (user is already fetched if needed for other logic)
user = comment.get("user", {}).get("login", "Unknown user")
path = comment.get("path", "N/A")
body = comment.get("body", "").strip()

if not body:
continue

processed_comments_count += 1

diff_hunk = comment.get("diff_hunk")
html_url = comment.get("html_url", "N/A")
comment_id = comment.get("id")
in_reply_to_id = comment.get("in_reply_to_id")

print(f"## Comment by: **{user}** (ID: `{comment_id}`){f' (In Reply To: `{in_reply_to_id}`)' if in_reply_to_id else ''}\n")
if created_at_str:
print(f"* **Timestamp**: `{created_at_str}`")
print(f"* **Status**: `{status_text}`")
print(f"* **File**: `{path}`")
print(f"* **Line**: `{line_to_display}`") # Label changed from "Line in File Diff"
print(f"* **URL**: <{html_url}>\n")

print("\n### Context:")
print("```") # Start of Markdown code block
if diff_hunk and diff_hunk.strip():
if args.context_lines == 0: # User wants the full hunk
print(diff_hunk)
else: # User wants N lines of context (args.context_lines > 0)
hunk_lines = diff_hunk.split('\n')
if hunk_lines and hunk_lines[0].startswith("@@ "):
print(hunk_lines[0])
hunk_lines = hunk_lines[1:] # Modify list in place for remaining operations

# Proceed with the (potentially modified) hunk_lines
# If hunk_lines is empty here (e.g. original hunk was only a header that was removed),
# hunk_lines[-args.context_lines:] will be [], and "\n".join([]) is "",
# so print("") will effectively print a newline. This is acceptable.
print("\n".join(hunk_lines[-args.context_lines:]))
else: # diff_hunk was None or empty
print("(No diff hunk available for this comment)")
print("```") # End of Markdown code block

print("\n### Comment:")
print(body)
print("\n---")

sys.stderr.write(f"\nPrinted {processed_comments_count} comments to stdout.\n")

if latest_activity_timestamp_obj:
try:
# Ensure it's UTC before adding timedelta, then format
next_since_dt = latest_activity_timestamp_obj.astimezone(timezone.utc) + timedelta(seconds=2)
next_since_str = next_since_dt.strftime('%Y-%m-%dT%H:%M:%SZ')

new_cmd_args = [sys.executable, sys.argv[0]] # Start with interpreter and script path
i = 1 # Start checking from actual arguments in sys.argv
while i < len(sys.argv):
if sys.argv[i] == "--since":
i += 2 # Skip --since and its value
continue
new_cmd_args.append(sys.argv[i])
i += 1

new_cmd_args.extend(["--since", next_since_str])
suggested_cmd = " ".join(new_cmd_args)
sys.stderr.write(f"\nTo get comments created after the last one in this batch, try:\n{suggested_cmd}\n")
except Exception as e:
sys.stderr.write(f"\nWarning: Could not generate next command suggestion: {e}\n")

if __name__ == "__main__":
main()
Loading