firebase · jonsimantov · Jun 9, 2025 · Jun 7, 2025 · Jun 7, 2025 · Jun 7, 2025
diff --git a/scripts/gha/firebase_github.py b/scripts/gha/firebase_github.py
@@ -225,6 +225,49 @@ def get_reviews(token, pull_number):
   return results
 
 
+def get_pull_request_review_comments(token, pull_number, since=None):
+  """https://docs.github.com/en/rest/pulls/comments#list-review-comments-on-a-pull-request"""
+  url = f'{GITHUB_API_URL}/pulls/{pull_number}/comments'
+  headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
+
+  page = 1
+  per_page = 100
+  results = []
+
+  # Base parameters for the API request
+  base_params = {'per_page': per_page}
+  if since:
+    base_params['since'] = since
+
+  while True: # Loop indefinitely until explicitly broken
+    current_page_params = base_params.copy()
+    current_page_params['page'] = page
+
+    try:
+      with requests_retry_session().get(url, headers=headers, params=current_page_params,
+                        stream=True, timeout=TIMEOUT) as response:
+        response.raise_for_status()
+        # Log which page and if 'since' was used for clarity
+        logging.info("get_pull_request_review_comments: %s params %s response: %s", url, current_page_params, response)
+
+        current_page_results = response.json()
+        if not current_page_results: # No more results on this page
+            break # Exit loop, no more comments to fetch
+
+        results.extend(current_page_results)
+
+        # If fewer results than per_page were returned, it's the last page
+        if len(current_page_results) < per_page:
+            break # Exit loop, this was the last page
+
+        page += 1 # Increment page for the next iteration
+
+    except requests.exceptions.RequestException as e:
+      logging.error(f"Error fetching review comments (page {page}, params: {current_page_params}) for PR {pull_number}: {e}")
+      break # Stop trying if there's an error
+  return results
+
+
 def create_workflow_dispatch(token, workflow_id, ref, inputs):
   """https://docs.github.com/en/rest/reference/actions#create-a-workflow-dispatch-event"""
   url = f'{GITHUB_API_URL}/actions/workflows/{workflow_id}/dispatches'

diff --git a/scripts/gha/get_pr_review_comments.py b/scripts/gha/get_pr_review_comments.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Fetches and formats review comments from a GitHub Pull Request."""
+
+import argparse
+import os
+import sys
+import firebase_github
+import datetime
+from datetime import timezone, timedelta
+
+
+def main():
+    STATUS_IRRELEVANT = "[IRRELEVANT]"
+    STATUS_OLD = "[OLD]"
+    STATUS_CURRENT = "[CURRENT]"
+
+    default_owner = firebase_github.OWNER
+    default_repo = firebase_github.REPO
+
+    parser = argparse.ArgumentParser(
+        description="Fetch review comments from a GitHub PR and format into simple text output.",
+        formatter_class=argparse.RawTextHelpFormatter
+    )
+    parser.add_argument(
+        "--pull_number",
+        type=int,
+        required=True,
+        help="Pull request number."
+    )
+    parser.add_argument(
+        "--owner",
+        type=str,
+        default=default_owner,
+        help=f"Repository owner. Defaults to '{default_owner}'."
+    )
+    parser.add_argument(
+        "--repo",
+        type=str,
+        default=default_repo,
+        help=f"Repository name. Defaults to '{default_repo}'."
+    )
+    parser.add_argument(
+        "--token",
+        type=str,
+        default=os.environ.get("GITHUB_TOKEN"),
+        help="GitHub token. Can also be set via GITHUB_TOKEN env var."
+    )
+    parser.add_argument(
+        "--context-lines",
+        type=int,
+        default=10,
+        help="Number of context lines from the diff hunk. 0 for full hunk. If > 0, shows header (if any) and last N lines of the remaining hunk. Default: 10."
+    )
+    parser.add_argument(
+        "--since",
+        type=str,
+        default=None,
+        help="Only show comments updated at or after this ISO 8601 timestamp (e.g., YYYY-MM-DDTHH:MM:SSZ)."
+    )
+    parser.add_argument(
+        "--exclude-old",
+        action="store_true",
+        default=False,
+        help="Exclude comments marked [OLD] (where line number has changed due to code updates but position is still valid)."
+    )
+    parser.add_argument(
+        "--include-irrelevant",
+        action="store_true",
+        default=False,
+        help="Include comments marked [IRRELEVANT] (where GitHub can no longer anchor the comment to the diff, i.e., position is null)."
+    )
+
+    args = parser.parse_args()
+
+    if not args.token:
+        sys.stderr.write("Error: GitHub token not provided. Set GITHUB_TOKEN or use --token.\n")
+        sys.exit(1)
+
+    if args.owner != firebase_github.OWNER or args.repo != firebase_github.REPO:
+        repo_url = f"https://github.com/{args.owner}/{args.repo}"
+        if not firebase_github.set_repo_url(repo_url):
+            sys.stderr.write(f"Error: Invalid repo URL: {args.owner}/{args.repo}. Expected https://github.com/owner/repo\n")
+            sys.exit(1)
+        sys.stderr.write(f"Targeting repository: {firebase_github.OWNER}/{firebase_github.REPO}\n")
+
+    sys.stderr.write(f"Fetching comments for PR #{args.pull_number} from {firebase_github.OWNER}/{firebase_github.REPO}...\n")
+    if args.since:
+        sys.stderr.write(f"Filtering comments updated since: {args.since}\n")
+
+
+    comments = firebase_github.get_pull_request_review_comments(
+        args.token,
+        args.pull_number,
+        since=args.since
+    )
+
+    if not comments:
+        sys.stderr.write(f"No review comments found for PR #{args.pull_number} (or matching filters), or an error occurred.\n")
+        return
+
+    latest_activity_timestamp_obj = None
+    processed_comments_count = 0
+    print("# Review Comments\n\n")
+    for comment in comments:
+        created_at_str = comment.get("created_at")
+
+        current_pos = comment.get("position")
+        current_line = comment.get("line")
+        original_line = comment.get("original_line")
+
+        status_text = ""
+        line_to_display = None
+
+        if current_pos is None:
+            status_text = STATUS_IRRELEVANT
+            line_to_display = original_line
+        elif original_line is not None and current_line != original_line:
+            status_text = STATUS_OLD
+            line_to_display = current_line
+        else:
+            status_text = STATUS_CURRENT
+            line_to_display = current_line
+
+        if line_to_display is None:
+            line_to_display = "N/A"
+
+        if status_text == STATUS_IRRELEVANT and not args.include_irrelevant:
+            continue
+        if status_text == STATUS_OLD and args.exclude_old:
+            continue
+
+        # Track latest 'updated_at' for '--since' suggestion; 'created_at' is for display.
+        updated_at_str = comment.get("updated_at")
+        if updated_at_str: # Check if updated_at_str is not None and not empty
+            try:
+                if sys.version_info < (3, 11):
+                    dt_str_updated = updated_at_str.replace("Z", "+00:00")
+                else:
+                    dt_str_updated = updated_at_str
+                current_comment_activity_dt = datetime.datetime.fromisoformat(dt_str_updated)
+                if latest_activity_timestamp_obj is None or current_comment_activity_dt > latest_activity_timestamp_obj:
+                    latest_activity_timestamp_obj = current_comment_activity_dt
+            except ValueError:
+                sys.stderr.write(f"Warning: Could not parse updated_at timestamp: {updated_at_str}\n")
+
+        # Get other comment details
+        user = comment.get("user", {}).get("login", "Unknown user")
+        path = comment.get("path", "N/A")
+        body = comment.get("body", "").strip()
+
+        if not body:
+            continue
+
+        processed_comments_count += 1
+
+        diff_hunk = comment.get("diff_hunk")
+        html_url = comment.get("html_url", "N/A")
+        comment_id = comment.get("id")
+        in_reply_to_id = comment.get("in_reply_to_id")
+
+        print(f"## Comment by: **{user}** (ID: `{comment_id}`){f' (In Reply To: `{in_reply_to_id}`)' if in_reply_to_id else ''}\n")
+        if created_at_str:
+            print(f"*   **Timestamp**: `{created_at_str}`")
+        print(f"*   **Status**: `{status_text}`")
+        print(f"*   **File**: `{path}`")
+        print(f"*   **Line**: `{line_to_display}`")
+        print(f"*   **URL**: <{html_url}>\n")
+
+        print("\n### Context:")
+        print("```") # Start of Markdown code block
+        if diff_hunk and diff_hunk.strip():
+            if args.context_lines == 0: # User wants the full hunk
+                print(diff_hunk)
+            else: # User wants N lines of context (args.context_lines > 0)
+                hunk_lines = diff_hunk.split('\n')
+                if hunk_lines and hunk_lines[0].startswith("@@ "):
+                    print(hunk_lines[0])
+                    hunk_lines = hunk_lines[1:] # Modify list in place for remaining operations
+
+                # Proceed with the (potentially modified) hunk_lines
+                # If hunk_lines is empty here (e.g. original hunk was only a header that was removed),
+                # hunk_lines[-args.context_lines:] will be [], and "\n".join([]) is "",
+                # so print("") will effectively print a newline. This is acceptable.
+                print("\n".join(hunk_lines[-args.context_lines:]))
+        else: # diff_hunk was None or empty
+            print("(No diff hunk available for this comment)")
+        print("```") # End of Markdown code block
+
+        print("\n### Comment:")
+        print(body)
+        print("\n---")
+
+    sys.stderr.write(f"\nPrinted {processed_comments_count} comments to stdout.\n")
+
+    if latest_activity_timestamp_obj:
+        try:
+            # Ensure it's UTC before adding timedelta, then format
+            next_since_dt = latest_activity_timestamp_obj.astimezone(timezone.utc) + timedelta(seconds=2)
+            next_since_str = next_since_dt.strftime('%Y-%m-%dT%H:%M:%SZ')
+
+            new_cmd_args = [sys.executable, sys.argv[0]] # Start with interpreter and script path
+            i = 1 # Start checking from actual arguments in sys.argv
+            while i < len(sys.argv):
+                if sys.argv[i] == "--since":
+                    i += 2 # Skip --since and its value
+                    continue
+                new_cmd_args.append(sys.argv[i])
+                i += 1
+
+            new_cmd_args.extend(["--since", next_since_str])
+            suggested_cmd = " ".join(new_cmd_args)
+            sys.stderr.write(f"\nTo get comments created after the last one in this batch, try:\n{suggested_cmd}\n")
+        except Exception as e:
+            sys.stderr.write(f"\nWarning: Could not generate next command suggestion: {e}\n")
+
+if __name__ == "__main__":
+    main()