From 076914b3bbbbbf3db34b8f2b8f3ab7d3e4ca80fd Mon Sep 17 00:00:00 2001 From: Greg Mondello <72952982+gmondello@users.noreply.github.com> Date: Wed, 15 Oct 2025 19:38:01 +0000 Subject: [PATCH] feat: Major performance optimizations and budget creation support Performance Optimizations: - Add bulk membership optimization: Check cost center membership before batching to avoid unnecessary API calls - Add newly created cost centers optimization: Skip orphaned user checks for cost centers created in current run (100% API call reduction for clean slate scenarios) - Improve batching logic to happen after membership verification Budget Creation Features: - Add budget creation support with --create-budgets flag for unreleased GitHub Budgets API - Implement graceful error handling when Budgets API is unavailable (404 responses) - Add duplicate budget checking with API bug workaround (name-based lookup) - Create budgets for cost centers automatically when requested Code Quality & Maintenance: - Add BudgetsAPIUnavailableError exception for proper error handling - Enhance logging with performance metrics and optimization status - Clean up debugging scripts and temporary files - Update project naming from 'Copilot-Cost-Center-Manager' to 'cost-center-automation' - Update documentation and container names for consistency GitHub Actions Compatibility: - All optimizations work seamlessly in Actions environment - Graceful degradation when budget features aren't available - Reduced API calls improve rate limit handling - Enhanced logging integrates well with Actions logs Breaking Changes: None - all changes are backward compatible Performance Impact: Significant reduction in API calls, especially for clean slate deployments --- .gitignore | 8 +- README.md | 2 +- automation/update_cost_centers.sh | 3 +- docker-compose.yml | 8 +- main.py | 32 +++- scripts/performance_comparison.py | 174 ------------------- src/cost_center_cache.py | 198 --------------------- src/github_api.py | 277 ++++++++++++++++++++++++++---- src/teams_cost_center_manager.py | 143 ++++++++++----- 9 files changed, 389 insertions(+), 456 deletions(-) delete mode 100644 scripts/performance_comparison.py delete mode 100644 src/cost_center_cache.py diff --git a/.gitignore b/.gitignore index ea6400c..3719f2f 100644 --- a/.gitignore +++ b/.gitignore @@ -88,4 +88,10 @@ exports/* # Temporary files tmp/ -temp/ \ No newline at end of file +temp/ + +# Local utility scripts (not for production) +scripts/cleanup_*.py +scripts/test_*.py +scripts/check_*.py +scripts/README_cleanup.md \ No newline at end of file diff --git a/README.md b/README.md index 9d41d9e..d061213 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# GitHub Copilot Cost Center Automation +# GitHub Cost Center Automation Automate GitHub Copilot license cost center assignments for your enterprise with two powerful modes: diff --git a/automation/update_cost_centers.sh b/automation/update_cost_centers.sh index 8e9bc65..dbeb738 100755 --- a/automation/update_cost_centers.sh +++ b/automation/update_cost_centers.sh @@ -1,6 +1,7 @@ #!/bin/bash -# GitHub Copilot Cost Center Update Script +#!/bin/bash +# GitHub Cost Center Update Script # This script runs the cost center assignment and exports data # Usage: ./update_cost_centers.sh [full] # - Default: Incremental processing (only new users since last run) diff --git a/docker-compose.yml b/docker-compose.yml index 3585429..45b8da5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,9 @@ version: '3.8' services: - copilot-manager: + cost-center-automation: build: . - container_name: copilot-cost-center-manager + container_name: cost-center-automation volumes: - ./config:/app/config:ro - ./exports:/app/exports @@ -14,9 +14,9 @@ services: command: python main.py --assign-cost-centers --export both --summary-report # Optional: Scheduled runner using cron-like functionality - copilot-scheduler: + cost-center-scheduler: build: . - container_name: copilot-scheduler + container_name: cost-center-scheduler volumes: - ./config:/app/config:ro - ./exports:/app/exports diff --git a/main.py b/main.py index f406f7e..74bb49e 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -GitHub Copilot Cost Center Management Script +GitHub Cost Center Management Script Automates cost center assignments for GitHub Copilot users with two operational modes: @@ -51,7 +51,7 @@ def handle_interrupt(signum, frame): def parse_arguments(): """Parse command line arguments.""" parser = argparse.ArgumentParser( - description="GitHub Copilot Cost Center Management - PRU-based or Teams-based assignment", + description="GitHub Cost Center Management - PRU-based or Teams-based assignment", epilog=""" Examples: # PRU-based mode (default) @@ -217,8 +217,26 @@ def _handle_teams_mode(args, config: ConfigManager, teams_manager, logger) -> No # Exit early if only showing config if args.show_config and not any([args.assign_cost_centers, args.summary_report]): + logger.info("Configuration displayed. Use --mode plan or --mode apply to process teams.") return + # If no action flags specified, default behavior depends on mode + if not any([args.assign_cost_centers, args.summary_report]): + if args.mode == "apply": + # In apply mode, user likely wants to assign cost centers + print("\n" + "="*60) + print("ℹ️ APPLY MODE - Processing cost center assignments") + print("="*60) + print("πŸ’‘ Tip: Use --mode plan to preview changes without applying them.\n") + args.assign_cost_centers = True + else: + # In plan mode, show plan + print("\n" + "="*60) + print("ℹ️ PLAN MODE - Showing what would be done (no changes)") + print("="*60) + print("πŸ’‘ Tip: Use --mode apply --yes to actually sync the assignments.\n") + args.assign_cost_centers = True # Enable preview in plan mode too! + # Generate summary report if requested if args.summary_report: logger.info("Generating teams-based cost center summary...") @@ -247,10 +265,16 @@ def _handle_teams_mode(args, config: ConfigManager, teams_manager, logger) -> No # Assign cost centers if requested if args.assign_cost_centers: - logger.info("Processing team-based cost center assignments...") - + print("\n" + "="*60) if args.mode == "plan": + print("πŸ“‹ PLAN MODE - Preview Only (No Changes)") + print("="*60) logger.info("MODE=plan (no changes will be made)") + else: + print("⚑ APPLY MODE - Will Make Changes") + print("="*60) + + logger.info("Processing team-based cost center assignments...") # Build and optionally sync assignments if args.mode == "plan": diff --git a/scripts/performance_comparison.py b/scripts/performance_comparison.py deleted file mode 100644 index 5b2c4cb..0000000 --- a/scripts/performance_comparison.py +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env python3 -""" -Performance comparison script to demonstrate cost center caching benefits. - -This script simulates the performance difference between cached and non-cached -cost center operations by creating mock API calls and measuring execution time. -""" - -import time -from typing import Set -import json -from src.cost_center_cache import CostCenterCache - - -class MockGitHubManager: - """Mock GitHub manager that simulates API call delays.""" - - def __init__(self, api_delay: float = 0.1): - self.api_delay = api_delay - self.api_calls = 0 - - def create_cost_center(self, name: str) -> str: - """Simulate API call delay and return mock cost center ID.""" - time.sleep(self.api_delay) # Simulate network delay - self.api_calls += 1 - return f"cc_{hash(name) % 100000:05d}" - - -def simulate_without_cache(cost_centers: Set[str], github_manager: MockGitHubManager) -> dict: - """Simulate cost center resolution without caching.""" - start_time = time.time() - - cost_center_map = {} - for cost_center_name in cost_centers: - cost_center_id = github_manager.create_cost_center(cost_center_name) - cost_center_map[cost_center_name] = cost_center_id - - end_time = time.time() - return { - 'duration': end_time - start_time, - 'api_calls': github_manager.api_calls, - 'mappings': cost_center_map - } - - -def simulate_with_cache(cost_centers: Set[str], github_manager: MockGitHubManager, cache: CostCenterCache) -> dict: - """Simulate cost center resolution with caching.""" - start_time = time.time() - - cost_center_map = {} - cache_hits = 0 - api_calls_before = github_manager.api_calls - - for cost_center_name in cost_centers: - # Check cache first - cached_id = cache.get_cost_center_id(cost_center_name) - - if cached_id: - cost_center_map[cost_center_name] = cached_id - cache_hits += 1 - else: - # Cache miss - make API call - cost_center_id = github_manager.create_cost_center(cost_center_name) - cost_center_map[cost_center_name] = cost_center_id - # Cache the result - cache.set_cost_center_id(cost_center_name, cost_center_id) - - end_time = time.time() - api_calls_made = github_manager.api_calls - api_calls_before - - return { - 'duration': end_time - start_time, - 'api_calls': api_calls_made, - 'cache_hits': cache_hits, - 'mappings': cost_center_map - } - - -def main(): - print("πŸš€ Cost Center Caching Performance Comparison") - print("=" * 50) - - # Test data - simulate 20 cost centers (typical enterprise scenario) - cost_centers = { - "Engineering - Backend", "Engineering - Frontend", "Engineering - Mobile", - "Engineering - DevOps", "Engineering - Security", "Product - Core", - "Product - Growth", "Product - Analytics", "Design - UX", "Design - Visual", - "Marketing - Growth", "Marketing - Content", "Sales - Enterprise", - "Sales - SMB", "Customer Success", "Support", "HR", "Finance", - "Legal", "Operations" - } - - print(f"πŸ“Š Testing with {len(cost_centers)} cost centers") - print(f"🌐 Simulating 100ms API delay per call (typical network latency)") - print() - - # Initialize cache (but don't clear it initially) - cache = CostCenterCache() - cache.clear_cache() # Start with clean slate for fair comparison - - # Test 1: Without cache (cold start) - print("πŸ₯Ά Test 1: Cold start (no cache)") - github_manager1 = MockGitHubManager(api_delay=0.1) - result1 = simulate_without_cache(cost_centers.copy(), github_manager1) - - print(f" Duration: {result1['duration']:.2f} seconds") - print(f" API calls: {result1['api_calls']}") - print(f" Avg per cost center: {result1['duration']/len(cost_centers)*1000:.0f}ms") - print() - - # Pre-populate cache with first run's data - for name, cost_center_id in result1['mappings'].items(): - cache.set_cost_center_id(name, cost_center_id) - - # Test 2: With cache (warm cache - simulate second run) - print("πŸ”₯ Test 2: Warm cache (second run)") - github_manager2 = MockGitHubManager(api_delay=0.1) - result2 = simulate_with_cache(cost_centers.copy(), github_manager2, cache) - - print(f" Duration: {result2['duration']:.2f} seconds") - print(f" API calls: {result2['api_calls']}") - print(f" Cache hits: {result2['cache_hits']}") - print(f" Cache hit rate: {result2['cache_hits']/len(cost_centers)*100:.1f}%") - print(f" Avg per cost center: {result2['duration']/len(cost_centers)*1000:.0f}ms") - print() - - # Performance comparison - speedup = result1['duration'] / result2['duration'] if result2['duration'] > 0 else float('inf') - api_reduction = ((result1['api_calls'] - result2['api_calls']) / result1['api_calls'] * 100) if result1['api_calls'] > 0 else 0 - time_saved = result1['duration'] - result2['duration'] - - print("πŸ“ˆ Performance Improvement") - print("-" * 30) - print(f" Speed improvement: {speedup:.1f}x faster") - print(f" Time saved: {time_saved:.2f} seconds ({time_saved*1000:.0f}ms)") - print(f" API calls reduced: {api_reduction:.1f}%") - print(f" Network requests saved: {result1['api_calls'] - result2['api_calls']}") - print() - - # Cache statistics - stats = cache.get_cache_stats() - print("πŸ’Ύ Final Cache Statistics") - print("-" * 30) - print(f" Total entries: {stats['total_entries']}") - print(f" Valid entries: {stats['valid_entries']}") - print(f" Cache efficiency: {stats['valid_entries']/stats['total_entries']*100:.1f}%") - print(f" Storage: {stats['cache_file']}") - print() - - print("πŸ’‘ Key Takeaways:") - print(" β€’ Caching provides significant performance improvements") - print(" β€’ Especially beneficial for large teams with many cost centers") - print(" β€’ Essential for CI/CD pipelines and automated workflows") - print(" β€’ Reduces GitHub API rate limit consumption") - - # Test 3: Mixed scenario (some cached, some new) - print() - print("🎯 Test 3: Mixed scenario (adding 5 new cost centers)") - new_cost_centers = cost_centers | {"New Team 1", "New Team 2", "New Team 3", "New Team 4", "New Team 5"} - - github_manager3 = MockGitHubManager(api_delay=0.1) - result3 = simulate_with_cache(new_cost_centers, github_manager3, cache) - - print(f" Duration: {result3['duration']:.2f} seconds") - print(f" API calls: {result3['api_calls']} (only for new cost centers)") - print(f" Cache hits: {result3['cache_hits']}") - print(f" Cache hit rate: {result3['cache_hits']/len(new_cost_centers)*100:.1f}%") - print(f" Avg per cost center: {result3['duration']/len(new_cost_centers)*1000:.0f}ms") - - print("\nβœ… Performance comparison complete!") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/cost_center_cache.py b/src/cost_center_cache.py deleted file mode 100644 index da7081b..0000000 --- a/src/cost_center_cache.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -Cost Center Cache Manager - -Provides caching functionality for teamβ†’cost center mappings to improve performance -by avoiding redundant API calls to check existing cost centers. -""" - -import json -import logging -import os -from datetime import datetime, timedelta -from pathlib import Path -from typing import Dict, Optional - - -class CostCenterCache: - """Manages persistent cache of cost center mappings to improve performance.""" - - def __init__(self, cache_file: str = ".cache/cost_centers.json", cache_ttl_hours: int = 24): - """ - Initialize the cost center cache. - - Args: - cache_file: Path to the cache file - cache_ttl_hours: Time-to-live for cache entries in hours - """ - self.logger = logging.getLogger(__name__) - self.cache_file = Path(cache_file) - self.cache_ttl = timedelta(hours=cache_ttl_hours) - - # Ensure cache directory exists - self.cache_file.parent.mkdir(parents=True, exist_ok=True) - - # Load existing cache - self.cache = self._load_cache() - - def _load_cache(self) -> Dict: - """Load cache from file.""" - if not self.cache_file.exists(): - self.logger.debug(f"Cache file {self.cache_file} doesn't exist, starting with empty cache") - return {"version": "1.0", "last_updated": None, "cost_centers": {}} - - try: - with open(self.cache_file, 'r', encoding='utf-8') as f: - cache_data = json.load(f) - - # Validate cache structure - if not isinstance(cache_data, dict) or "cost_centers" not in cache_data: - self.logger.warning(f"Invalid cache format in {self.cache_file}, resetting cache") - return {"version": "1.0", "last_updated": None, "cost_centers": {}} - - self.logger.debug(f"Loaded cache with {len(cache_data.get('cost_centers', {}))} entries") - return cache_data - - except (json.JSONDecodeError, IOError) as e: - self.logger.warning(f"Failed to load cache from {self.cache_file}: {e}, starting fresh") - return {"version": "1.0", "last_updated": None, "cost_centers": {}} - - def _save_cache(self) -> None: - """Save cache to file.""" - try: - # Update timestamp - self.cache["last_updated"] = datetime.utcnow().isoformat() - - # Write to temporary file first, then atomic rename - temp_file = self.cache_file.with_suffix('.tmp') - with open(temp_file, 'w', encoding='utf-8') as f: - json.dump(self.cache, f, indent=2, sort_keys=True) - - # Atomic rename - temp_file.replace(self.cache_file) - - self.logger.debug(f"Cache saved to {self.cache_file}") - - except IOError as e: - self.logger.error(f"Failed to save cache to {self.cache_file}: {e}") - - def get_cost_center_id(self, cost_center_name: str) -> Optional[str]: - """ - Get cost center ID from cache. - - Args: - cost_center_name: Name of the cost center - - Returns: - Cost center ID if found and not expired, None otherwise - """ - cost_centers = self.cache.get("cost_centers", {}) - - if cost_center_name not in cost_centers: - return None - - entry = cost_centers[cost_center_name] - - # Check if entry has expired - if "timestamp" in entry: - try: - entry_time = datetime.fromisoformat(entry["timestamp"]) - if datetime.utcnow() - entry_time > self.cache_ttl: - self.logger.debug(f"Cache entry for '{cost_center_name}' has expired") - return None - except ValueError: - self.logger.warning(f"Invalid timestamp in cache entry for '{cost_center_name}'") - return None - - cost_center_id = entry.get("id") - if cost_center_id: - self.logger.debug(f"Cache hit: '{cost_center_name}' β†’ {cost_center_id}") - return cost_center_id - - return None - - def set_cost_center_id(self, cost_center_name: str, cost_center_id: str) -> None: - """ - Cache a cost center ID. - - Args: - cost_center_name: Name of the cost center - cost_center_id: ID of the cost center - """ - if "cost_centers" not in self.cache: - self.cache["cost_centers"] = {} - - self.cache["cost_centers"][cost_center_name] = { - "id": cost_center_id, - "timestamp": datetime.utcnow().isoformat() - } - - self.logger.debug(f"Cached: '{cost_center_name}' β†’ {cost_center_id}") - - # Save cache after each update to persist changes - self._save_cache() - - def has_cost_center(self, cost_center_name: str) -> bool: - """Check if cost center exists in cache and is not expired.""" - return self.get_cost_center_id(cost_center_name) is not None - - def clear_cache(self) -> None: - """Clear all cached entries.""" - self.cache = {"version": "1.0", "last_updated": None, "cost_centers": {}} - self._save_cache() - self.logger.info("Cache cleared") - - def get_cache_stats(self) -> Dict: - """Get cache statistics.""" - cost_centers = self.cache.get("cost_centers", {}) - - # Count valid (non-expired) entries - valid_entries = 0 - expired_entries = 0 - - for entry in cost_centers.values(): - if "timestamp" in entry: - try: - entry_time = datetime.fromisoformat(entry["timestamp"]) - if datetime.utcnow() - entry_time <= self.cache_ttl: - valid_entries += 1 - else: - expired_entries += 1 - except ValueError: - expired_entries += 1 - else: - expired_entries += 1 - - return { - "total_entries": len(cost_centers), - "valid_entries": valid_entries, - "expired_entries": expired_entries, - "last_updated": self.cache.get("last_updated"), - "cache_file": str(self.cache_file), - "ttl_hours": self.cache_ttl.total_seconds() / 3600 - } - - def cleanup_expired_entries(self) -> int: - """Remove expired entries from cache.""" - cost_centers = self.cache.get("cost_centers", {}) - expired_keys = [] - - for name, entry in cost_centers.items(): - if "timestamp" in entry: - try: - entry_time = datetime.fromisoformat(entry["timestamp"]) - if datetime.utcnow() - entry_time > self.cache_ttl: - expired_keys.append(name) - except ValueError: - expired_keys.append(name) - else: - expired_keys.append(name) - - # Remove expired entries - for key in expired_keys: - del cost_centers[key] - - if expired_keys: - self._save_cache() - self.logger.info(f"Cleaned up {len(expired_keys)} expired cache entries") - - return len(expired_keys) \ No newline at end of file diff --git a/src/github_api.py b/src/github_api.py index e8166bb..703a576 100644 --- a/src/github_api.py +++ b/src/github_api.py @@ -12,6 +12,11 @@ from urllib3.util.retry import Retry +class BudgetsAPIUnavailableError(Exception): + """Raised when the GitHub Budgets API is not available for this enterprise.""" + pass + + class GitHubCopilotManager: """Manages GitHub API operations for Copilot licenses.""" @@ -83,6 +88,9 @@ def _make_request(self, url: str, params: Optional[Dict] = None, method: str = ' response.raise_for_status() return response.json() + except requests.exceptions.HTTPError as e: + self.logger.error(f"API request failed: {str(e)}") + raise # Re-raise as HTTPError so specific handlers can catch it except requests.exceptions.RequestException as e: self.logger.error(f"API request failed: {str(e)}") raise @@ -196,28 +204,30 @@ def add_users_to_cost_center(self, cost_center_id: str, usernames: List[str], ig users_already_in_target = [] users_in_other_cost_center = [] + # Check if users are already in the target cost center for safety + # (this may be redundant if bulk check was done at batch level, but ensures correctness) + current_members_in_target = set(self.get_cost_center_members(cost_center_id)) + self.logger.debug(f"get_cost_center_members returned {len(current_members_in_target)} members for {cost_center_id}") + users_in_target = [u for u in usernames if u in current_members_in_target] + users_not_in_target = [u for u in usernames if u not in current_members_in_target] + + # Only log bulk check if there are users already in target (avoid noise) + if users_in_target: + self.logger.info(f"πŸ” Bulk membership check: {len(users_in_target)}/{len(usernames)} already in target cost center {cost_center_id}") + + # Mark users already in target as successful (no need to add) + for username in users_in_target: + users_already_in_target.append(username) + self.logger.debug(f"User {username} already in target cost center {cost_center_id}") + if ignore_current_cost_center: - # Fast path: add all users without checking current membership - users_to_add = usernames.copy() - self.logger.debug(f"ignore_current_cost_center=True: Adding all {len(users_to_add)} users without membership checks") + # Fast path: add all users NOT in target, don't check if they're in other cost centers + users_to_add = users_not_in_target.copy() + self.logger.debug(f"ignore_current_cost_center=True: Adding {len(users_to_add)} users without checking other cost centers") else: - # OPTIMIZED safe path: bulk comparison + targeted individual checks - self.logger.debug(f"ignore_current_cost_center=False: Optimized membership checking for {len(usernames)} users") - - # Step 1: Bulk check who's already in the target cost center - current_members_in_target = set(self.get_cost_center_members(cost_center_id)) - users_in_target = [u for u in usernames if u in current_members_in_target] - users_not_in_target = [u for u in usernames if u not in current_members_in_target] + # Safe path: check if users NOT in target are in OTHER cost centers + self.logger.debug(f"ignore_current_cost_center=False: Checking {len(users_not_in_target)} users for membership in other cost centers") - self.logger.debug(f"Bulk optimization: {len(users_in_target)} already in target cost center, " - f"{len(users_not_in_target)} need individual membership check") - - # Step 2: Users already in target cost center - mark as already assigned - for username in users_in_target: - users_already_in_target.append(username) - self.logger.debug(f"User {username} already in target cost center {cost_center_id}") - - # Step 3: Individual checks only for users NOT in target cost center for username in users_not_in_target: existing_membership = self.check_user_cost_center_membership(username) @@ -338,17 +348,35 @@ def bulk_update_cost_center_assignments(self, cost_center_assignments: Dict[str, if not usernames: continue - # Process users in batches of 50 - batch_size = 50 - batches = [usernames[i:i + batch_size] for i in range(0, len(usernames), batch_size)] + # OPTIMIZATION: Check bulk membership BEFORE batching to avoid unnecessary batches + # This is especially important when most/all users are already in the correct cost center + current_members_in_target = set(self.get_cost_center_members(cost_center_id)) + users_already_in_target = [u for u in usernames if u in current_members_in_target] + users_not_in_target = [u for u in usernames if u not in current_members_in_target] - self.logger.info(f"Processing {len(usernames)} users for cost center {cost_center_id} in {len(batches)} batches") + self.logger.info(f"πŸ” Bulk membership check: {len(users_already_in_target)}/{len(usernames)} already in target cost center {cost_center_id}") - cost_center_results = {} - for i, batch in enumerate(batches, 1): - self.logger.info(f"Processing batch {i}/{len(batches)} ({len(batch)} users) for cost center {cost_center_id}") - batch_results = self.add_users_to_cost_center(cost_center_id, batch, ignore_current_cost_center) - cost_center_results.update(batch_results) + # If all users are already in target, skip batching entirely + if not users_not_in_target: + self.logger.info(f"All {len(usernames)} users already assigned to cost center {cost_center_id}") + cost_center_results = {username: True for username in usernames} + else: + # Only create batches for users who actually need to be processed + usernames_to_process = users_not_in_target if ignore_current_cost_center else usernames + batch_size = 50 + batches = [usernames_to_process[i:i + batch_size] for i in range(0, len(usernames_to_process), batch_size)] + + self.logger.info(f"Processing {len(usernames_to_process)} users for cost center {cost_center_id} in {len(batches)} batches") + + cost_center_results = {} + # Add users already in target as successful + for username in users_already_in_target: + cost_center_results[username] = True + + for i, batch in enumerate(batches, 1): + self.logger.info(f"Processing batch {i}/{len(batches)} ({len(batch)} users) for cost center {cost_center_id}") + batch_results = self.add_users_to_cost_center(cost_center_id, batch, ignore_current_cost_center) + cost_center_results.update(batch_results) batch_success_count = sum(1 for success in batch_results.values() if success) batch_failure_count = len(batch_results) - batch_success_count @@ -582,6 +610,38 @@ def get_enterprise_team_members(self, team_slug: str) -> List[Dict]: self.logger.info(f"Total members found in enterprise team {team_slug}: {len(all_members)}") return all_members + def get_all_active_cost_centers(self) -> Dict[str, str]: + """ + Get all active cost centers from the enterprise for performance optimization. + + Returns: + Dict mapping cost center name -> cost center ID + """ + if not self.use_enterprise or not self.enterprise_name: + self.logger.error("Cost center operations only available for GitHub Enterprise") + return {} + + url = f"{self.base_url}/enterprises/{self.enterprise_name}/settings/billing/cost-centers" + + try: + response_data = self._make_request(url) + cost_centers = response_data.get('costCenters', []) + + active_centers_map = {} + for cc in cost_centers: + if cc.get('state', '').lower() == 'active': + name = cc.get('name', '') + uuid = cc.get('id', '') + if name and uuid: + active_centers_map[name] = uuid + + self.logger.debug(f"Found {len(active_centers_map)} active cost centers out of {len(cost_centers)} total") + return active_centers_map + + except Exception as e: + self.logger.error(f"Error fetching active cost centers: {str(e)}") + return {} + def create_cost_center(self, name: str) -> Optional[str]: """ Create a new cost center in the enterprise. @@ -657,6 +717,98 @@ def create_cost_center(self, name: str) -> Optional[str]: self.logger.error(f"Error creating cost center '{name}': {str(e)}") return None + def create_cost_center_with_preload_fallback(self, name: str, active_centers_map: Dict[str, str]) -> Optional[str]: + """ + Create a cost center with preload optimization and fallback to collision handling. + + Args: + name: The name for the new cost center + active_centers_map: Preloaded map of active cost center names to IDs + + Returns: + The cost center ID if successful, None if failed + """ + if not self.use_enterprise or not self.enterprise_name: + self.logger.error("Cost center creation only available for GitHub Enterprise") + return None + + # Check if it already exists in the preloaded map + if name in active_centers_map: + cost_center_id = active_centers_map[name] + self.logger.debug(f"Found existing cost center in preload map: '{name}' β†’ {cost_center_id}") + return cost_center_id + + url = f"{self.base_url}/enterprises/{self.enterprise_name}/settings/billing/cost-centers" + + payload = { + "name": name + } + + # Set proper headers including API version + headers = { + "accept": "application/vnd.github+json", + "x-github-api-version": "2022-11-28", + "content-type": "application/json" + } + + try: + response = self.session.post(url, json=payload, headers=headers) + + # Handle rate limiting + if response.status_code == 429: + reset_time = int(response.headers.get('X-RateLimit-Reset', time.time() + 60)) + wait_time = reset_time - int(time.time()) + 1 + self.logger.warning(f"Rate limit hit. Waiting {wait_time} seconds...") + time.sleep(wait_time) + return self.create_cost_center_with_preload_fallback(name, active_centers_map) + + if response.status_code in [200, 201]: + response_data = response.json() + cost_center_id = response_data.get('id') + self.logger.info(f"Successfully created cost center '{name}' with ID: {cost_center_id}") + # Update the preload map for subsequent calls in the same batch + active_centers_map[name] = cost_center_id + return cost_center_id + elif response.status_code == 409: + # Race condition - someone else created it between preload and now + self.logger.info(f"Cost center '{name}' was created by another process (race condition)") + + try: + response_data = response.json() + error_message = response_data.get('message', '') + + # Try to extract UUID from message first + uuid_pattern = r'existing cost center UUID:\s*([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})' + match = re.search(uuid_pattern, error_message, re.IGNORECASE) + + if match: + cost_center_id = match.group(1) + self.logger.info(f"Extracted cost center ID from race condition response: {cost_center_id}") + # Update the preload map for subsequent calls + active_centers_map[name] = cost_center_id + return cost_center_id + else: + # Fallback to original collision handling + self.logger.warning(f"Could not extract UUID from race condition response, falling back to name search") + cost_center_id = self._find_cost_center_by_name(name) + if cost_center_id: + active_centers_map[name] = cost_center_id + return cost_center_id + + except (ValueError, KeyError) as e: + self.logger.warning(f"Could not parse race condition response: {str(e)}, falling back to name search...") + cost_center_id = self._find_cost_center_by_name(name) + if cost_center_id: + active_centers_map[name] = cost_center_id + return cost_center_id + else: + self.logger.error(f"Failed to create cost center '{name}': {response.status_code} {response.text}") + return None + + except requests.exceptions.RequestException as e: + self.logger.error(f"Error creating cost center '{name}': {str(e)}") + return None + def _find_cost_center_by_name(self, name: str) -> Optional[str]: """ Find an ACTIVE cost center by name. @@ -695,12 +847,15 @@ def _find_cost_center_by_name(self, name: str) -> Optional[str]: # Log what we found for debugging if deleted_centers: inactive_list = [f"{cc_id} ({status})" for cc_id, status in deleted_centers] - self.logger.warning(f"Found {len(deleted_centers)} inactive cost centers with name '{name}': {', '.join(inactive_list)}") + self.logger.error(f"❌ DELETED COST CENTER: '{name}' exists but is DELETED") + self.logger.error(f" Found {len(deleted_centers)} deleted cost center(s): {', '.join(inactive_list)}") + self.logger.error(f" ⚠️ Cannot assign users to deleted cost centers!") + self.logger.error(f" πŸ’‘ Solution: Delete and recreate the cost center, or contact GitHub support to reactivate it") if not active_centers and not deleted_centers: self.logger.error(f"No cost center found with name '{name}' (despite 409 conflict)") else: - self.logger.error(f"No ACTIVE cost center found with name '{name}' - only inactive ones exist") + self.logger.error(f"No ACTIVE cost center found with name '{name}' - only deleted ones exist") return None @@ -887,6 +1042,51 @@ def check_user_cost_center_membership(self, username: str) -> Optional[Dict]: self.logger.debug(f"Failed to check cost center membership for {username}: {str(e)}") return None + def check_cost_center_has_budget(self, cost_center_id: str, cost_center_name: str) -> bool: + """ + Check if a cost center already has a budget. + + Due to a known bug in the Budget API, when we create a budget with the cost center UUID + as budget_entity_name, the API stores the cost center NAME instead. So we need to + check against the name, not the ID. + + Args: + cost_center_id: UUID of the cost center to check + cost_center_name: Name of the cost center to check + + Returns: + True if a budget already exists for this cost center, False otherwise + + Raises: + BudgetsAPIUnavailableError: If the Budgets API is not available for this enterprise + """ + if not self.use_enterprise or not self.enterprise_name: + return False + + url = f"{self.base_url}/enterprises/{self.enterprise_name}/settings/billing/budgets" + + try: + response_data = self._make_request(url) + budgets = response_data.get('budgets', []) + + # Check if any budget has this cost center NAME as the entity name + # (API bug: stores name even when we send ID) + for budget in budgets: + if budget.get('budget_scope') == 'cost_center' and budget.get('budget_entity_name') == cost_center_name: + self.logger.debug(f"Budget already exists for cost center '{cost_center_name}' (ID: {cost_center_id})") + return True + + return False + + except requests.exceptions.HTTPError as e: + if hasattr(e, 'response') and e.response is not None and e.response.status_code == 404: + raise BudgetsAPIUnavailableError(f"Budgets API is not available for enterprise '{self.enterprise_name}'. This feature may not be enabled for your enterprise.") + self.logger.warning(f"Failed to check budget for cost center '{cost_center_name}' (ID: {cost_center_id}): {str(e)}") + return False + except requests.exceptions.RequestException as e: + self.logger.warning(f"Failed to check budget for cost center '{cost_center_name}' (ID: {cost_center_id}): {str(e)}") + return False + def create_cost_center_budget(self, cost_center_id: str, cost_center_name: str) -> bool: """ Create a budget for a cost center using the GitHub Enterprise Budgets API. @@ -900,11 +1100,23 @@ def create_cost_center_budget(self, cost_center_id: str, cost_center_name: str) Returns: True if budget was created successfully, False otherwise + + Raises: + BudgetsAPIUnavailableError: If the Budgets API is not available for this enterprise """ if not self.use_enterprise or not self.enterprise_name: self.logger.error("Budget creation only available for GitHub Enterprise") return False + # Check if budget already exists (this may raise BudgetsAPIUnavailableError) + try: + if self.check_cost_center_has_budget(cost_center_id, cost_center_name): + self.logger.info(f"Budget already exists for cost center: {cost_center_name} (ID: {cost_center_id})") + return True + except BudgetsAPIUnavailableError: + # Re-raise the exception to be handled by the caller + raise + url = f"{self.base_url}/enterprises/{self.enterprise_name}/settings/billing/budgets" payload = { @@ -931,6 +1143,11 @@ def create_cost_center_budget(self, cost_center_id: str, cost_center_name: str) self.logger.info(f"Successfully created budget for cost center: {cost_center_name} (ID: {cost_center_id})") return True + except requests.exceptions.HTTPError as e: + if hasattr(e, 'response') and e.response is not None and e.response.status_code == 404: + raise BudgetsAPIUnavailableError(f"Budgets API is not available for enterprise '{self.enterprise_name}'. This feature may not be enabled for your enterprise.") + self.logger.error(f"Failed to create budget for cost center '{cost_center_name}' (ID: {cost_center_id}): {str(e)}") + return False except requests.exceptions.RequestException as e: self.logger.error(f"Failed to create budget for cost center '{cost_center_name}' (ID: {cost_center_id}): {str(e)}") return False \ No newline at end of file diff --git a/src/teams_cost_center_manager.py b/src/teams_cost_center_manager.py index 07a7f62..9969c2c 100644 --- a/src/teams_cost_center_manager.py +++ b/src/teams_cost_center_manager.py @@ -4,8 +4,9 @@ import logging from typing import Dict, List, Set, Tuple, Optional +from .github_api import BudgetsAPIUnavailableError + -from .cost_center_cache import CostCenterCache class TeamsCostCenterManager: @@ -25,8 +26,7 @@ def __init__(self, config, github_manager, create_budgets: bool = False): self.create_budgets = create_budgets self.logger = logging.getLogger(__name__) - # Initialize cost center cache - self.cost_center_cache = CostCenterCache() + # Teams configuration self.teams_scope = config.teams_scope # "organization" or "enterprise" @@ -270,72 +270,117 @@ def build_team_assignments(self) -> Dict[str, List[Tuple[str, str, str]]]: return assignments - def ensure_cost_centers_exist(self, cost_centers: Set[str]) -> Dict[str, str]: + def _preload_active_cost_centers(self) -> Dict[str, str]: + """ + Preload all active cost centers from the enterprise for performance optimization. + + Returns: + Dict mapping cost center name -> cost center ID + """ + try: + active_centers_map = self.github_manager.get_all_active_cost_centers() + self.logger.info(f"Preloaded {len(active_centers_map)} active cost centers for performance optimization") + return active_centers_map + except Exception as e: + self.logger.warning(f"Failed to preload cost centers: {e}") + self.logger.info("Falling back to individual cost center creation approach") + return {} + + def ensure_cost_centers_exist(self, cost_centers: Set[str]) -> Tuple[Dict[str, str], Set[str]]: """ Ensure all required cost centers exist, creating them if needed. - Uses caching to avoid redundant API calls for performance. + Uses preload optimization with fallback to individual creation for performance. Args: cost_centers: Set of cost center names or IDs Returns: - Dict mapping original name/ID -> actual cost center ID + Tuple of: + - Dict mapping original name/ID -> actual cost center ID + - Set of cost center IDs that were newly created in this run """ if not self.auto_create: self.logger.info("Auto-creation disabled, assuming cost center IDs are valid") - # Return identity mapping (assume they're already IDs) - return {cc: cc for cc in cost_centers} + # Return identity mapping (assume they're already IDs) and empty set for newly created + return {cc: cc for cc in cost_centers}, set() self.logger.info(f"Ensuring {len(cost_centers)} cost centers exist...") - # Check cache stats - cache_stats = self.cost_center_cache.get_cache_stats() - self.logger.debug(f"Cache stats: {cache_stats['valid_entries']}/{cache_stats['total_entries']} valid entries") - cost_center_map = {} - cache_hits = 0 + newly_created_ids = set() + preload_hits = 0 api_calls = 0 + # Step 1: Preload all active cost centers for performance optimization + self.logger.info(f"Preloading active cost centers for {len(cost_centers)} cost centers...") + active_centers_map = self._preload_active_cost_centers() + + # Step 2: Check preloaded map for existing cost centers + still_need_creation = set() for cost_center_name in cost_centers: - # Check cache first - cached_id = self.cost_center_cache.get_cost_center_id(cost_center_name) - - if cached_id: - cost_center_map[cost_center_name] = cached_id - cache_hits += 1 - self.logger.debug(f"Cache hit: '{cost_center_name}' β†’ {cached_id}") + if cost_center_name in active_centers_map: + cost_center_id = active_centers_map[cost_center_name] + cost_center_map[cost_center_name] = cost_center_id + preload_hits += 1 + self.logger.debug(f"Preload hit: '{cost_center_name}' β†’ {cost_center_id}") + + # If budget creation is enabled and cost center already exists, check/create budget + if self.create_budgets: + self.logger.debug(f"Checking/creating budget for existing cost center: {cost_center_name}") + try: + budget_success = self.github_manager.create_cost_center_budget(cost_center_id, cost_center_name) + if budget_success: + self.logger.debug(f"Budget ready for: {cost_center_name}") + else: + self.logger.warning(f"Failed to ensure budget for: {cost_center_name}") + except BudgetsAPIUnavailableError as e: + self.logger.warning(f"Budgets API not available - skipping budget creation: {str(e)}") + # Disable budget creation for remaining cost centers to avoid repeated errors + self.create_budgets = False else: - # Cache miss - make API call - api_calls += 1 - cost_center_id = self.github_manager.create_cost_center(cost_center_name) + still_need_creation.add(cost_center_name) + + # Step 3: Create cost centers that don't exist yet + for cost_center_name in still_need_creation: + api_calls += 1 + cost_center_id = self.github_manager.create_cost_center_with_preload_fallback( + cost_center_name, active_centers_map + ) + + if cost_center_id: + cost_center_map[cost_center_name] = cost_center_id + newly_created_ids.add(cost_center_id) # Track newly created cost center + self.logger.debug(f"API call: '{cost_center_name}' β†’ {cost_center_id}") - if cost_center_id: - cost_center_map[cost_center_name] = cost_center_id - # Cache the result for future use - self.cost_center_cache.set_cost_center_id(cost_center_name, cost_center_id) - self.logger.debug(f"API call: '{cost_center_name}' β†’ {cost_center_id} (cached)") - - # Create budget if requested (and if this is a newly created cost center) - if self.create_budgets: - self.logger.info(f"Creating budget for cost center: {cost_center_name}") + # Create budget if requested (and if this is a newly created cost center) + if self.create_budgets: + self.logger.info(f"Creating budget for cost center: {cost_center_name}") + try: budget_success = self.github_manager.create_cost_center_budget(cost_center_id, cost_center_name) if budget_success: self.logger.info(f"Successfully created budget for: {cost_center_name}") else: self.logger.warning(f"Failed to create budget for: {cost_center_name}") - else: - self.logger.error(f"Failed to create/find cost center: {cost_center_name}") - # Use the name as fallback (will likely fail assignment but won't crash) - cost_center_map[cost_center_name] = cost_center_name + except BudgetsAPIUnavailableError as e: + self.logger.warning(f"Budgets API not available - skipping budget creation: {str(e)}") + # Disable budget creation for remaining cost centers to avoid repeated errors + self.create_budgets = False + else: + self.logger.error(f"Failed to create/find cost center: {cost_center_name}") + # Use the name as fallback (will likely fail assignment but won't crash) + cost_center_map[cost_center_name] = cost_center_name # Log performance metrics - total_requests = cache_hits + api_calls - cache_hit_rate = (cache_hits / total_requests * 100) if total_requests > 0 else 0 + total_requests = preload_hits + api_calls + preload_hit_rate = (preload_hits / total_requests * 100) if total_requests > 0 else 0 self.logger.info(f"Cost center resolution complete: {len(cost_center_map)} resolved") - self.logger.info(f"Performance: {cache_hits} cache hits, {api_calls} API calls ({cache_hit_rate:.1f}% cache hit rate)") + self.logger.info(f"Performance: {preload_hits} preload hits, {api_calls} API calls ({preload_hit_rate:.1f}% preload hit rate)") - return cost_center_map + if newly_created_ids: + self.logger.debug(f"Newly created cost centers in this run: {len(newly_created_ids)} cost centers") + + return cost_center_map, newly_created_ids def sync_team_assignments(self, mode: str = "plan", ignore_current_cost_center: bool = False) -> Dict[str, Dict[str, bool]]: """ @@ -361,9 +406,10 @@ def sync_team_assignments(self, mode: str = "plan", ignore_current_cost_center: if mode == "plan": # In plan mode, just use the names as-is (no actual creation) cost_center_id_map = {cc: cc for cc in cost_centers_needed} + newly_created_cost_center_ids = set() # No creation in plan mode self.logger.info(f"Plan mode: Would ensure {len(cost_centers_needed)} cost centers exist") else: - cost_center_id_map = self.ensure_cost_centers_exist(cost_centers_needed) + cost_center_id_map, newly_created_cost_center_ids = self.ensure_cost_centers_exist(cost_centers_needed) # Convert assignments to use actual cost center IDs id_based_assignments: Dict[str, List[str]] = {} @@ -411,6 +457,7 @@ def sync_team_assignments(self, mode: str = "plan", ignore_current_cost_center: removed_user_results = self._remove_users_no_longer_in_teams( id_based_assignments, cost_center_id_map, + newly_created_cost_center_ids, remove=self.config.teams_remove_users_no_longer_in_teams ) @@ -425,6 +472,7 @@ def sync_team_assignments(self, mode: str = "plan", ignore_current_cost_center: def _remove_users_no_longer_in_teams(self, expected_assignments: Dict[str, List[str]], cost_center_id_map: Dict[str, str], + newly_created_cost_center_ids: Set[str], remove: bool = True) -> Dict[str, Dict[str, bool]]: """ Detect and optionally remove users who are no longer in teams from cost centers. @@ -435,6 +483,7 @@ def _remove_users_no_longer_in_teams(self, expected_assignments: Dict[str, List[ Args: expected_assignments: Dict mapping cost_center_id -> list of expected usernames cost_center_id_map: Dict mapping cost_center_name -> cost_center_id + newly_created_cost_center_ids: Set of cost center IDs that were created in this run (skip these) remove: If True, remove users no longer in teams. If False, only detect and log. Returns: @@ -444,9 +493,17 @@ def _remove_users_no_longer_in_teams(self, expected_assignments: Dict[str, List[ total_removed_users = 0 total_successfully_removed = 0 - self.logger.info(f"Checking {len(expected_assignments)} cost centers for users no longer in teams...") + # Filter out newly created cost centers - they can't have users who left teams yet + cost_centers_to_check = {cc_id: users for cc_id, users in expected_assignments.items() + if cc_id not in newly_created_cost_center_ids} + + skipped_newly_created = len(expected_assignments) - len(cost_centers_to_check) + if skipped_newly_created > 0: + self.logger.info(f"⚑ Performance optimization: Skipping {skipped_newly_created} newly created cost centers (no users could have left teams yet)") + + self.logger.info(f"Checking {len(cost_centers_to_check)} cost centers for users no longer in teams...") - for cost_center_id, expected_users in expected_assignments.items(): + for cost_center_id, expected_users in cost_centers_to_check.items(): # Get current members of the cost center current_members = self.github_manager.get_cost_center_members(cost_center_id)