Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 44 additions & 24 deletions pr_agent/git_providers/codecommit_provider.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import re
from collections import Counter
from typing import List, Optional, Tuple
from urllib.parse import urlparse

Expand Down Expand Up @@ -76,7 +75,7 @@ def is_supported(self, capability: str) -> bool:
"create_inline_comment",
"publish_inline_comments",
"get_labels",
"gfm_markdown"
"gfm_markdown",
]:
return False
return True
Expand All @@ -91,13 +90,19 @@ def get_files(self) -> list[CodeCommitFile]:
return self.git_files

self.git_files = []
differences = self.codecommit_client.get_differences(self.repo_name, self.pr.destination_commit, self.pr.source_commit)
differences = self.codecommit_client.get_differences(
self.repo_name, self.pr.destination_commit, self.pr.source_commit
)
for item in differences:
self.git_files.append(CodeCommitFile(item.before_blob_path,
item.before_blob_id,
item.after_blob_path,
item.after_blob_id,
CodeCommitProvider._get_edit_type(item.change_type)))
self.git_files.append(
CodeCommitFile(
item.before_blob_path,
item.before_blob_id,
item.after_blob_path,
item.after_blob_id,
CodeCommitProvider._get_edit_type(item.change_type),
)
)
return self.git_files

def get_diff_files(self) -> list[FilePatchInfo]:
Expand All @@ -121,15 +126,18 @@ def get_diff_files(self) -> list[FilePatchInfo]:
if diff_item.a_blob_id is not None:
patch_filename = diff_item.a_path
original_file_content_str = self.codecommit_client.get_file(
self.repo_name, diff_item.a_path, self.pr.destination_commit)
self.repo_name, diff_item.a_path, self.pr.destination_commit
)
if isinstance(original_file_content_str, (bytes, bytearray)):
original_file_content_str = original_file_content_str.decode("utf-8")
else:
original_file_content_str = ""

if diff_item.b_blob_id is not None:
patch_filename = diff_item.b_path
new_file_content_str = self.codecommit_client.get_file(self.repo_name, diff_item.b_path, self.pr.source_commit)
new_file_content_str = self.codecommit_client.get_file(
self.repo_name, diff_item.b_path, self.pr.source_commit
)
if isinstance(new_file_content_str, (bytes, bytearray)):
new_file_content_str = new_file_content_str.decode("utf-8")
else:
Expand All @@ -144,9 +152,7 @@ def get_diff_files(self) -> list[FilePatchInfo]:
patch,
diff_item.b_path,
edit_type=diff_item.edit_type,
old_filename=None
if diff_item.a_path == diff_item.b_path
else diff_item.a_path,
old_filename=None if diff_item.a_path == diff_item.b_path else diff_item.a_path,
)
# Only add valid files to the diff list
# "bad extensions" are set in the language_extensions.toml file
Expand Down Expand Up @@ -190,12 +196,16 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
for suggestion in code_suggestions:
# Verify that each suggestion has the required keys
if not all(key in suggestion for key in ["body", "relevant_file", "relevant_lines_start"]):
get_logger().warning(f"Skipping code suggestion #{counter}: Each suggestion must have 'body', 'relevant_file', 'relevant_lines_start' keys")
get_logger().warning(
f"Skipping code suggestion #{counter}: Each suggestion must have 'body', 'relevant_file', 'relevant_lines_start' keys"
)
continue

# Publish the code suggestion to CodeCommit
try:
get_logger().debug(f"Code Suggestion #{counter} in file: {suggestion['relevant_file']}: {suggestion['relevant_lines_start']}")
get_logger().debug(
f"Code Suggestion #{counter} in file: {suggestion['relevant_file']}: {suggestion['relevant_lines_start']}"
)
self.codecommit_client.publish_comment(
repo_name=self.repo_name,
pr_number=self.pr_num,
Expand Down Expand Up @@ -227,7 +237,9 @@ def remove_initial_comment(self):
def remove_comment(self, comment):
return "" # not implemented yet

def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):
def publish_inline_comment(
self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None
):
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_compared_commit.html
raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet")

Expand Down Expand Up @@ -257,7 +269,7 @@ def get_languages(self):
- dict: A dictionary where each key is a language name and the corresponding value is the percentage of that language in the PR.
"""
commit_files = self.get_files()
filenames = [ item.filename for item in commit_files ]
filenames = [item.filename for item in commit_files]
extensions = CodeCommitProvider._get_file_extensions(filenames)

# Calculate the percentage of each file extension in the PR
Expand Down Expand Up @@ -402,7 +414,7 @@ def _add_additional_newlines(body: str) -> str:
Returns:
- str: the PR body with the double newlines added
"""
return re.sub(r'(?<!\n)\n(?!\n)', '\n\n', body)
return re.sub(r"(?<!\n)\n(?!\n)", "\n\n", body)

@staticmethod
def _remove_markdown_html(comment: str) -> str:
Expand Down Expand Up @@ -488,10 +500,18 @@ def _get_language_percentages(extensions):
if total_files == 0:
return {}

# Identify language by file extension and count
lang_count = Counter(extensions)
# Convert counts to percentages
lang_percentage = {
lang: round(count / total_files * 100) for lang, count in lang_count.items()
}
# Optimization: Avoid Counter and one-pass dict accumulation,
# minimize rounding and float operations by deferring calculation
counts = {}
for ext in extensions:
counts[ext] = counts.get(ext, 0) + 1

inv_total = 100 / total_files # hoist division for repeated use

lang_percentage = {}
for lang, count in counts.items():
# Use integer arithmetics as much as possible for speed
percent = round(count * inv_total)
lang_percentage[lang] = percent

return lang_percentage