Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
956f388
feat: implement hybrid cache architecture for repository cloning
fabiovincenzi Oct 10, 2025
cc49057
feat: add configurable cache limits via proxy.config.json
fabiovincenzi Oct 10, 2025
d6413ce
test: update clearBareClone tests for hybrid cache structure
fabiovincenzi Oct 10, 2025
cf76665
chore: fix metric logging
fabiovincenzi Oct 10, 2025
e998d07
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 13, 2025
734621c
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 20, 2025
452eb18
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 20, 2025
168d9b0
feat: implement git-operations module with native git commands
fabiovincenzi Oct 22, 2025
08116ba
refactor: update pullRemote to use git-operations module
fabiovincenzi Oct 22, 2025
992c862
test: add comprehensive hybrid cache integration tests
fabiovincenzi Oct 22, 2025
d5e1b5b
chore: remove unused isomorphic-git dependency
fabiovincenzi Oct 22, 2025
6d5f886
Merge branch 'feature/hybrid-cache' of https://github.com/fabiovincen…
fabiovincenzi Oct 22, 2025
6bc0ddc
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 22, 2025
627137b
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 27, 2025
235e152
chore: remove redundant cache config fallback
fabiovincenzi Oct 27, 2025
c762b5e
refactor: use bytes internally in CacheManager for consistency
fabiovincenzi Oct 27, 2025
ab28d78
chore: use toSorted()
fabiovincenzi Oct 27, 2025
88bbce8
refactor: remove test-only cleanup, rely on CacheManager limits
fabiovincenzi Oct 27, 2025
1737bfd
refactor: use spawnSync instead of execSync
fabiovincenzi Oct 27, 2025
d35d109
test: increase timeout for git clone tests in ConfigLoader
fabiovincenzi Oct 27, 2025
0a54773
perf: use performance.now() instead of Date.now()
fabiovincenzi Oct 27, 2025
133e5e6
refactor: use cache paths from configuration
fabiovincenzi Oct 27, 2025
f03d686
feat: add mutex to prevent race conditions in cache operations
fabiovincenzi Oct 29, 2025
bc0be9f
perf: remove unnecessary sort from getCacheStats
fabiovincenzi Oct 29, 2025
d07ed9c
fix: add logging for silent errors in getDirectorySize
fabiovincenzi Oct 29, 2025
2acaee7
refactor: rename cacheDir to repoCacheDir to disambiguate from Config…
fabiovincenzi Oct 29, 2025
49695ff
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 29, 2025
b5bc3d8
refactor: rename cacheDir to repoCacheDir to disambiguate from Config…
fabiovincenzi Oct 29, 2025
d110463
Merge branch 'feature/hybrid-cache' of https://github.com/fabiovincen…
fabiovincenzi Oct 29, 2025
b073eb3
docs: add readme and cache benchmark script
fabiovincenzi Oct 31, 2025
928846d
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 31, 2025
5b94ec9
chore: add results csv to gitignore
fabiovincenzi Nov 5, 2025
7c05bfb
chore: move and edit benchmark script
fabiovincenzi Nov 5, 2025
a42bd2e
refactor: use multiplier for speed improvement
fabiovincenzi Nov 5, 2025
fd23676
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Nov 5, 2025
bd14e59
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Nov 5, 2025
e67ff36
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Nov 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,7 @@ website/.docusaurus

# Generated from testing
/test/fixtures/test-package/package-lock.json

# Benchmark results
benchmark-detailed-*/
results*.csv
20 changes: 20 additions & 0 deletions config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,26 @@
}
}
}
},
"cache": {
"description": "Configuration for bare repository cache (hybrid cache system)",
"type": "object",
"properties": {
"maxSizeGB": {
"type": "number",
"description": "Maximum cache size in gigabytes (default 2GB)"
},
"maxRepositories": {
"type": "number",
"description": "Maximum number of repositories in cache (default 50)"
},
"cacheDir": {
"type": "string",
"description": "Directory path for bare repository cache (default ./.remote/cache)"
}
},
"required": ["maxSizeGB", "maxRepositories", "cacheDir"],
"additionalProperties": false
}
},
"definitions": {
Expand Down
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@
"express-rate-limit": "^8.1.0",
"express-session": "^1.18.2",
"history": "5.3.0",
"isomorphic-git": "^1.34.0",
"jsonwebtoken": "^9.0.2",
"jwk-to-pem": "^2.0.7",
"load-plugin": "^6.0.3",
Expand Down
5 changes: 5 additions & 0 deletions proxy.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -178,5 +178,10 @@
"loginRequired": true
}
]
},
"cache": {
"maxSizeGB": 2,
"maxRepositories": 50,
"cacheDir": "./.remote/cache"
}
}
173 changes: 173 additions & 0 deletions scripts/cache-benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#!/bin/bash

RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'

echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════${NC}"
echo -e "${BOLD}${BLUE} Git Proxy Hybrid Cache - Detailed Performance Benchmark${NC}"
echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════${NC}"
echo ""

PROXY_URL="http://localhost:8000"
GITHUB_REPO="${1}"
TEST_BRANCH="${2:-main}"
NUM_PUSHES="${3:-10}"

if [ -z "$GITHUB_REPO" ]; then
echo -e "${RED}ERROR: GitHub repository required${NC}"
echo ""
echo "Usage: $0 <owner/repo> [branch] [num_pushes]"
echo "Example: $0 yourFork/backstage main 10"
echo ""
echo -e "${YELLOW}Note: You must have push access to the specified repository${NC}"
exit 1
fi

PROXY_REPO_URL="$PROXY_URL/github.com/$GITHUB_REPO.git"

echo -e "${CYAN}Configuration:${NC}"
echo " Proxy URL: $PROXY_URL"
echo " GitHub Repo: $GITHUB_REPO"
echo " Branch: $TEST_BRANCH"
echo " Number of pushes: $NUM_PUSHES (1 cold + $((NUM_PUSHES-1)) warm)"
echo ""

echo -e "${YELLOW}[1/5] Checking git-proxy status...${NC}"
if ! curl -s "$PROXY_URL" > /dev/null 2>&1; then
echo -e "${RED}✗ ERROR: git-proxy not running on $PROXY_URL${NC}"
exit 1
fi
echo -e "${GREEN}✓ Git-proxy is running${NC}\n"

echo -e "${YELLOW}[2/5] Retrieving GitHub credentials...${NC}"
CREDENTIALS=$(echo -e "protocol=https\nhost=github.com\n" | git credential fill 2>/dev/null)
if [ -z "$CREDENTIALS" ]; then
echo -e "${RED}✗ ERROR: No GitHub credentials found${NC}"
exit 1
fi

GITHUB_USERNAME=$(echo "$CREDENTIALS" | grep "^username=" | cut -d= -f2)
GITHUB_TOKEN=$(echo "$CREDENTIALS" | grep "^password=" | cut -d= -f2)
GITHUB_EMAIL=$(git config --global user.email || echo "[email protected]")
echo -e "${GREEN}✓ Credentials retrieved for: $GITHUB_USERNAME${NC}\n"

TEST_DIR="./benchmark-detailed-$(date +%s)"
echo -e "${YELLOW}[3/5] Setting up test environment...${NC}"
mkdir -p "$TEST_DIR" && cd "$TEST_DIR"
REPO_NAME=$(basename "$GITHUB_REPO")

echo " → Clearing cache..."
rm -rf ../.remote/cache/* ../.remote/work/* 2>/dev/null || true
echo -e "${GREEN}✓ Cache cleared${NC}\n"

echo -e "${YELLOW}[4/5] Performing initial clone (one-time operation)...${NC}"
echo -e "${CYAN}→ Cloning $GITHUB_REPO via proxy...${NC}\n"
START_INITIAL_CLONE=$(date +%s.%N)
git clone "$PROXY_REPO_URL" "$REPO_NAME"
CLONE_EXIT_CODE=$?
END_INITIAL_CLONE=$(date +%s.%N)

INITIAL_CLONE_TIME=$(echo "$END_INITIAL_CLONE - $START_INITIAL_CLONE" | bc)

cd "$REPO_NAME"
git config user.email "$GITHUB_EMAIL"
git config user.name "$GITHUB_USERNAME"
echo -e "${GREEN}✓ Initial clone completed in ${INITIAL_CLONE_TIME}s${NC}\n"

RESULTS_FILE="../results-detailed.csv"
echo "push_number,is_cold,push_time_s" > "$RESULTS_FILE"

perform_push() {
local push_num=$1
local is_cold=$2
local label=$([ "$is_cold" = "true" ] && echo "COLD CACHE" || echo "WARM CACHE")

echo -e "${BLUE}═══ Push #$push_num ($label) ═══${NC}"

local commit_file="benchmark-push-$push_num-$(date +%s).txt"
echo "Benchmark push $push_num at $(date)" > "$commit_file"
git add "$commit_file" > /dev/null 2>&1
git commit -m "Benchmark push #$push_num" > /dev/null 2>&1

echo -n " Pushing... "
START_PUSH=$(date +%s.%N)
PUSH_OUTPUT=$(git -c credential.helper="!f() { echo username=$GITHUB_USERNAME; echo password=$GITHUB_TOKEN; }; f" \
push "$PROXY_REPO_URL" "HEAD:refs/heads/benchmark-test-$push_num" 2>&1)
PUSH_EXIT_CODE=$?
END_PUSH=$(date +%s.%N)
PUSH_TIME=$(echo "$END_PUSH - $START_PUSH" | bc)

if [ $PUSH_EXIT_CODE -ne 0 ]; then
echo -e "${RED}✗ FAILED${NC}"
echo "$PUSH_OUTPUT"
echo ""
exit 1
fi

echo -e "${GREEN}✓ ${PUSH_TIME}s${NC}"
echo "$push_num,$is_cold,$PUSH_TIME" >> "$RESULTS_FILE"
echo ""
}

echo -e "${YELLOW}[5/5] Running push benchmark...${NC}\n"

perform_push 1 true
for i in $(seq 2 $NUM_PUSHES); do
perform_push $i false
done

cd ..

echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════${NC}"
echo -e "${BOLD}${BLUE} Performance Analysis ${NC}"
echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════${NC}\n"

RESULTS_CSV="results-detailed.csv"
COLD_TIME=$(awk -F, 'NR==2 {print $3}' "$RESULTS_CSV")
WARM_TIMES=$(awk -F, 'NR>2 {print $3}' "$RESULTS_CSV")

WARM_MIN=$(echo "$WARM_TIMES" | sort -n | head -1)
WARM_MAX=$(echo "$WARM_TIMES" | sort -n | tail -1)
WARM_AVG=$(echo "$WARM_TIMES" | awk '{sum+=$1; count++} END {print sum/count}')
WARM_COUNT=$(echo "$WARM_TIMES" | wc -l | tr -d ' ')
WARM_STDDEV=$(echo "$WARM_TIMES" | awk -v avg="$WARM_AVG" '{sum+=($1-avg)^2; count++} END {print sqrt(sum/count)}')

SPEEDUP=$(echo "scale=2; $COLD_TIME / $WARM_AVG" | bc)
IMPROVEMENT=$(echo "scale=1; (1 - $WARM_AVG / $COLD_TIME) * 100" | bc)

TOTAL_WARM_TIME=$(echo "$WARM_TIMES" | awk '{sum+=$1} END {print sum}')
HYPOTHETICAL_NO_CACHE=$(echo "scale=2; $COLD_TIME * $WARM_COUNT" | bc)
TIME_SAVED=$(echo "scale=2; $HYPOTHETICAL_NO_CACHE - $TOTAL_WARM_TIME" | bc)
TIME_SAVED_MINUTES=$(echo "scale=1; $TIME_SAVED / 60" | bc)

echo -e "${CYAN}${BOLD}Push Performance:${NC}\n"
printf " %-25s %10.2fs\n" "Cold cache (Push #1):" "$COLD_TIME"
printf " %-25s %10.2fs\n" "Warm cache (average):" "$WARM_AVG"
printf " %-25s %10.2fs\n" "Warm cache (min):" "$WARM_MIN"
printf " %-25s %10.2fs\n" "Warm cache (max):" "$WARM_MAX"
printf " %-25s %10.2fs\n" "Warm cache (std dev):" "$WARM_STDDEV"

echo -e "\n${GREEN}${BOLD}Performance Improvement:${NC}\n"
printf " %-25s %10.1f%%\n" "Speed improvement:" "$IMPROVEMENT"
printf " %-25s %10.2fx\n" "Speedup ratio:" "$SPEEDUP"

echo -e "\n${CYAN}${BOLD}Total Time Saved:${NC}\n"
printf " %-30s %10.2fs\n" "Total warm pushes time:" "$TOTAL_WARM_TIME"
printf " %-30s %10.2fs\n" "Hypothetical (no cache):" "$HYPOTHETICAL_NO_CACHE"
printf " %-30s %10.2fs (%.1fm)\n" "Time saved:" "$TIME_SAVED" "$TIME_SAVED_MINUTES"

echo -e "\n${CYAN}${BOLD}Cache Statistics:${NC}\n"
CACHE_DIR="../.remote/cache"
if [ -d "$CACHE_DIR" ]; then
FINAL_CACHE_SIZE=$(du -sh "$CACHE_DIR" 2>/dev/null | cut -f1)
FINAL_CACHE_COUNT=$(ls -1 "$CACHE_DIR" 2>/dev/null | wc -l | tr -d ' ')
printf " %-25s %10s\n" "Cache size:" "$FINAL_CACHE_SIZE"
printf " %-25s %10s\n" "Cached repositories:" "$FINAL_CACHE_COUNT"
fi

echo -e "\n${GREEN}${BOLD}✓ Benchmark complete!${NC}"
31 changes: 31 additions & 0 deletions src/config/generated/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ export interface GitProxyConfig {
* List of repositories that are authorised to be pushed to through the proxy.
*/
authorisedList?: AuthorisedRepo[];
/**
* Configuration for bare repository cache (hybrid cache system)
*/
cache?: Cache;
/**
* Block commits based on rules defined over author/committer e-mail addresses, commit
* message content and diff content
Expand Down Expand Up @@ -286,6 +290,24 @@ export interface AuthorisedRepo {
[property: string]: any;
}

/**
* Configuration for bare repository cache (hybrid cache system)
*/
export interface Cache {
/**
* Directory path for bare repository cache (default ./.remote/cache)
*/
cacheDir: string;
/**
* Maximum number of repositories in cache (default 50)
*/
maxRepositories: number;
/**
* Maximum cache size in gigabytes (default 2GB)
*/
maxSizeGB: number;
}

/**
* Block commits based on rules defined over author/committer e-mail addresses, commit
* message content and diff content
Expand Down Expand Up @@ -690,6 +712,7 @@ const typeMap: any = {
typ: u(undefined, a(r('AuthenticationElement'))),
},
{ json: 'authorisedList', js: 'authorisedList', typ: u(undefined, a(r('AuthorisedRepo'))) },
{ json: 'cache', js: 'cache', typ: u(undefined, r('Cache')) },
{ json: 'commitConfig', js: 'commitConfig', typ: u(undefined, r('CommitConfig')) },
{ json: 'configurationSources', js: 'configurationSources', typ: u(undefined, 'any') },
{ json: 'contactEmail', js: 'contactEmail', typ: u(undefined, '') },
Expand Down Expand Up @@ -793,6 +816,14 @@ const typeMap: any = {
],
'any',
),
Cache: o(
[
{ json: 'cacheDir', js: 'cacheDir', typ: '' },
{ json: 'maxRepositories', js: 'maxRepositories', typ: 3.14 },
{ json: 'maxSizeGB', js: 'maxSizeGB', typ: 3.14 },
],
false,
),
CommitConfig: o(
[
{ json: 'author', js: 'author', typ: u(undefined, r('Author')) },
Expand Down
9 changes: 9 additions & 0 deletions src/config/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ function mergeConfigurations(
commitConfig: { ...defaultConfig.commitConfig, ...userSettings.commitConfig },
attestationConfig: { ...defaultConfig.attestationConfig, ...userSettings.attestationConfig },
rateLimit: userSettings.rateLimit || defaultConfig.rateLimit,
cache: userSettings.cache
? { ...defaultConfig.cache, ...userSettings.cache }
: defaultConfig.cache,
tls: tlsConfig,
tempPassword: { ...defaultConfig.tempPassword, ...userSettings.tempPassword },
// Preserve legacy SSL fields
Expand Down Expand Up @@ -196,6 +199,7 @@ export const logConfiguration = () => {
console.log(`data sink = ${JSON.stringify(getDatabase())}`);
console.log(`authentication = ${JSON.stringify(getAuthMethods())}`);
console.log(`rateLimit = ${JSON.stringify(getRateLimit())}`);
console.log(`cache = ${JSON.stringify(getCacheConfig())}`);
};

export const getAPIs = () => {
Expand Down Expand Up @@ -285,6 +289,11 @@ export const getRateLimit = () => {
return config.rateLimit;
};

export const getCacheConfig = () => {
const config = loadFullConfiguration();
return config.cache;
};

// Function to handle configuration updates
const handleConfigUpdate = async (newConfig: Configuration) => {
console.log('Configuration updated from external source');
Expand Down
Loading
Loading