Skip to content

Commit 6ff5d29

Browse files
authored
Merge branch 'main' into updated-confidence
2 parents f282d81 + a8158d9 commit 6ff5d29

File tree

77 files changed

+10690
-486
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+10690
-486
lines changed

.github/workflows/main.yaml

+32-13
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ jobs:
228228
- name: Setup Hugo
229229
uses: peaceiris/actions-hugo@v3
230230
with:
231-
hugo-version: "${{ vars.HUGO_BUILD_VERSION }}"
231+
hugo-version: "${{ vars.HUGO_BUILD_VERSION }}"
232232
extended: true
233233

234234
- name: Build
@@ -296,7 +296,7 @@ jobs:
296296

297297
# Offload Images
298298
OffloadImages:
299-
name: "Ofload Site Images to Blob Storage"
299+
name: "Cleanup and Ofload to Blob Storage"
300300
runs-on: ubuntu-latest
301301
if: ${{ success() }}
302302
needs: [BuildSite, Setup]
@@ -323,6 +323,11 @@ jobs:
323323
shell: pwsh
324324
run: |
325325
. ./.powershell/_includes/ImagesToBlobStorage.ps1
326+
if ($env:ACTIONS_STEP_DEBUG -eq "true") {
327+
$levelSwitch.MinimumLevel = 'Debug'
328+
} else {
329+
$levelSwitch.MinimumLevel = 'Information'
330+
}
326331
Upload-ImageFiles -LocalPath $env:LOCAL_IMAGE_PATH -BlobUrlBase $env:BLOB_STORAGE_URL -AzureSASToken $env:AZURE_BLOB_STORAGE_SAS_TOKEN
327332
env:
328333
LOCAL_IMAGE_PATH: "./_site/"
@@ -333,26 +338,40 @@ jobs:
333338
shell: pwsh
334339
run: |
335340
. ./.powershell/_includes/ImagesToBlobStorage.ps1
341+
if ($env:ACTIONS_STEP_DEBUG -eq "true") {
342+
$levelSwitch.MinimumLevel = 'Debug'
343+
} else {
344+
$levelSwitch.MinimumLevel = 'Information'
345+
}
336346
Rewrite-ImageLinks -LocalPath $env:LOCAL_IMAGE_PATH -BlobUrl $env:BLOB_URL_BIT
337347
env:
338348
LOCAL_IMAGE_PATH: "./_site/"
339349
BLOB_URL_BIT: ${{needs.Setup.outputs.nkdAgility_BLOB_URL_BIT}}
340-
- name: "Delete Local Images"
350+
- name: "Clean Local Images"
341351
shell: pwsh
342352
run: |
343353
. ./.powershell/_includes/ImagesToBlobStorage.ps1
344-
Delete-LocalImageFiles -LocalPath $env:LOCAL_IMAGE_PATH
354+
if ($env:ACTIONS_STEP_DEBUG -eq "true") {
355+
$levelSwitch.MinimumLevel = 'Debug'
356+
} else {
357+
$levelSwitch.MinimumLevel = 'Information'
358+
}
359+
$deletedImagesCount = Delete-LocalImageFiles -LocalPath $env:LOCAL_IMAGE_PATH
360+
345361
env:
346362
LOCAL_IMAGE_PATH: "./_site/"
347-
# - name: "Build NKDAgility Outputs"
348-
# shell: pwsh
349-
# run: ./.powershell/build/Sync-BlobStorageImages.ps1
350-
# env:
351-
# LOCAL_IMAGE_PATH: "./_site/"
352-
# BLOB_URL_BIT: "/blob"
353-
# BLOB_STORAGE_URL: "https://nkdagilityblobs.blob.core.windows.net/`$web"
354-
# AZURE_BLOB_STORAGE_SAS_TOKEN: ${{ secrets.AZURE_BLOB_STORAGE_SAS_TOKEN }}
355-
# AZCOPY_FAIL_ON_ERROR: "true"
363+
- name: "Clean Local Data Files"
364+
shell: pwsh
365+
run: |
366+
. ./.powershell/_includes/CleanupDataFiles.ps1
367+
if ($env:ACTIONS_STEP_DEBUG -eq "true") {
368+
$levelSwitch.MinimumLevel = 'Debug'
369+
} else {
370+
$levelSwitch.MinimumLevel = 'Information'
371+
}
372+
$deletedCount = Delete-LocalDataFiles -LocalPath $env:LOCAL_DATA_PATH
373+
env:
374+
LOCAL_DATA_PATH: "./_site/resources/"
356375
- uses: actions/upload-artifact@v4
357376
with:
358377
name: Site-Blobbed

.powershell/_includes/ClassificationHelpers.ps1

+8-3
Original file line numberDiff line numberDiff line change
@@ -302,9 +302,14 @@ function Get-CategoryConfidenceWithChecksum {
302302
$result = Get-ConfidenceFromAIResponse -AIResponseJson $aiResponseJson -ResourceTitle $ResourceTitle -ResourceContent $ResourceContent
303303
if ($result.reasoning -ne $null -and $result.category -ne "unknown") {
304304
$oldConfidence = $cachedData[$result.category]?.ai_confidence ?? 0
305-
$DaysAgo = [math]::Round(([DateTimeOffset]::Now - [DateTimeOffset]$cachedData[$result.category].calculated_at).TotalDays)
306-
$confidenceDiff = "{0}{1}" -f ($(if (($result.ai_confidence - $oldConfidence) -ge 0) { '+' } else { '-' }), [math]::Abs($result.ai_confidence - $oldConfidence))
307-
Write-InformationLog "Updating {category} confidence {diff}! The old confidence of {old} was calculated {daysago} days ago. The new confidence is {confidence}!" -PropertyValues $result.category, $confidenceDiff, $oldConfidence, $DaysAgo, $result.ai_confidence
305+
$DaysAgo = if ($cachedData[$result.category]?.calculated_at -is [DateTime]) {
306+
[math]::Round(([DateTimeOffset]::Now - [DateTimeOffset]$cachedData[$result.category].calculated_at).TotalDays)
307+
}
308+
else {
309+
0
310+
}
311+
312+
Write-InformationLog "Updating {category} with confidence of {old} calculated {daysago} to new confidence of {confidence} " -PropertyValues $result.category, $oldConfidence, $DaysAgo, $result.ai_confidence
308313
$CatalogFromCache[$result.category] = $result
309314
$cachedData[$result.category] = $result
310315
# Save cache after each API call
+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Helpers
2+
. ./.powershell/_includes/LoggingHelper.ps1
3+
4+
function Delete-LocalDataFiles {
5+
param (
6+
[string]$LocalPath
7+
)
8+
$count = 0
9+
try {
10+
Write-InfoLog "Deleting all data files locally from '$LocalPath'..."
11+
$files = Get-ChildItem -Path $LocalPath -Recurse -Include data.captions.*.srt, data.captions.json, data.json, data.index.classifications.json
12+
if ($files.Count -eq 0) {
13+
Write-InfoLog "No files found."
14+
return 0;
15+
}
16+
17+
$totalFiles = $files.Count
18+
$size = ($files | Measure-Object -Property Length -Sum).Sum
19+
$sizeString = "{0:N2} MB" -f ($size / 1MB)
20+
Write-InfoLog "Found ($totalFiles) files totalling $sizeString."
21+
22+
$lastPercentage = 0 # To track when to log progress
23+
$progressInterval = 10 # Percentage interval for logging
24+
25+
$files | ForEach-Object -Begin { $index = 0 } -Process {
26+
try {
27+
Remove-Item -Path $_.FullName -Force
28+
Write-DebugLog "Deleted: $($_.FullName)"
29+
$count++
30+
$index++
31+
32+
# Calculate percentage progress
33+
$percentage = [math]::Round(($index / $totalFiles) * 100, 0)
34+
35+
# Log progress at defined intervals (e.g., every 10%)
36+
if ($percentage -ge $lastPercentage + $progressInterval) {
37+
Write-InfoLog "Progress: $percentage% ($index of $totalFiles files deleted)"
38+
$lastPercentage = $percentage
39+
}
40+
}
41+
catch {
42+
Write-ErrorLog "Error deleting file $($_.FullName): $_"
43+
}
44+
}
45+
}
46+
catch {
47+
Write-ErrorLog "Error during file deletion: $_"
48+
}
49+
Write-InfoLog "Completed: Deleted $count files."
50+
return $count;
51+
}

.powershell/_includes/ImagesToBlobStorage.ps1

+80-32
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ function Upload-ImageFiles {
1212
[string]$AzureSASToken
1313
)
1414
try {
15-
Write-Debug "Uploading image files to Azure Blob Storage using azcopy..."
15+
Write-InfoLog "Uploading image files to Azure Blob Storage using azcopy..."
1616
azcopy sync $LocalPath "$BlobUrlBase`?$AzureSASToken" --recursive=true --include-pattern "*.jpg;*.jpeg;*.png;*.gif;*.webp;*.svg" --compare-hash=MD5
17-
Write-Debug "Upload complete."
17+
Write-InfoLog "Upload complete."
1818
}
1919
catch {
20-
Write-Debug "Error during upload: $_"
20+
Write-ErrorLog"Error during upload: $_"
2121
}
2222
}
2323

@@ -26,23 +26,52 @@ function Delete-LocalImageFiles {
2626
param (
2727
[string]$LocalPath
2828
)
29+
$count = 0
2930
try {
30-
Write-Debug "Deleting all image files locally..."
31-
Get-ChildItem -Path $LocalPath -Recurse -Include *.jpg, *.jpeg, *.png, *.gif, *.webp, *.svg | ForEach-Object {
31+
Write-InfoLog "Deleting all image files locally..."
32+
$images = Get-ChildItem -Path $LocalPath -Recurse -Include *.jpg, *.jpeg, *.png, *.gif, *.webp, *.svg
33+
if ($images.Count -eq 0) {
34+
Write-InfoLog "No image files found."
35+
return 0;
36+
}
37+
38+
$totalFiles = $images.Count
39+
$size = ($images | Measure-Object -Property Length -Sum).Sum
40+
$sizeString = "{0:N2} MB" -f ($size / 1MB)
41+
Write-InfoLog "Found ($totalFiles) image files totalling $sizeString."
42+
43+
$lastPercentage = 0 # Tracks when to log progress
44+
$progressInterval = 10 # Percentage interval for logging
45+
46+
$images | ForEach-Object -Begin { $index = 0 } -Process {
3247
try {
3348
Remove-Item -Path $_.FullName -Force
34-
Write-Debug "Deleted: $($_.FullName)"
49+
Write-DebugLog "Deleted: $($_.FullName)"
50+
$count++
51+
$index++
52+
53+
# Calculate percentage progress
54+
$percentage = [math]::Round(($index / $totalFiles) * 100, 0)
55+
56+
# Log progress at defined intervals
57+
if ($percentage -ge $lastPercentage + $progressInterval) {
58+
Write-InfoLog "Progress: $percentage% ($index of $totalFiles image files deleted)"
59+
$lastPercentage = $percentage
60+
}
3561
}
3662
catch {
37-
Write-Debug "Error deleting file $($_.FullName): $_"
63+
Write-ErrorLog "Error deleting file $($_.FullName): $_"
3864
}
3965
}
4066
}
4167
catch {
42-
Write-Debug "Error during file deletion: $_"
68+
Write-ErrorLog "Error during image file deletion: $_"
4369
}
70+
Write-InfoLog "Completed: Deleted $count image files."
71+
return $count;
4472
}
4573

74+
4675
# Method 3: Rewrite image links in .html files using regex
4776
function Rewrite-ImageLinks {
4877
param (
@@ -55,10 +84,18 @@ function Rewrite-ImageLinks {
5584
$HtmlFiles = Get-ChildItem -Path $LocalPath -Recurse -Include *.html
5685

5786
$totalLinks = 0;
87+
$totalFiles = $HtmlFiles.Count
88+
89+
if ($totalFiles -eq 0) {
90+
Write-InfoLog "No .html files found for processing."
91+
return
92+
}
93+
94+
$lastPercentage = 0 # Tracks when to log progress
95+
$progressInterval = 10 # Percentage interval for logging
5896

5997
foreach ($HtmlFile in $HtmlFiles) {
6098

61-
# $FileContent = Get-Content -Path (Resolve-Path $HtmlFile.FullName) -Raw
6299
$FileContent = Get-Content -LiteralPath $HtmlFile.FullName -Raw
63100
# Regex to match all src attributes with image paths
64101
$ImageRegex = "(?i)(src|content|href)\s*=\s*([""']?)(?<url>[^\s""'>]+\.(jpg|jpeg|png|gif|webp|svg))\2"
@@ -91,18 +128,21 @@ function Rewrite-ImageLinks {
91128
try {
92129
# Define the regex pattern
93130
$allowedPattern = '^(?:https?:\/\/)?(?:nkdagility\.com|preview\.nkdagility\.com|yellow-pond-042d21b03.*\.westeurope\.5\.azurestaticapps\.net)(\/.*)?$'
94-
if ($OriginalUrl -match $allowedPattern) {
95-
continue
131+
if ($OriginalPath -match $allowedPattern) {
132+
$pattern = '^(?:https?:\/\/)?[^\/]+(?<path>\/.*)$'
133+
if ($OriginalPath -match $pattern) {
134+
$path = $matches['path']
135+
$UpdatedPath = "$BlobUrl/" + $path -join '/'
136+
}
137+
96138
}
97-
98-
$pattern = '^(?:https?:\/\/)?[^\/]+(?<path>\/.*)$'
99-
if ($OriginalUrl -match $pattern) {
100-
$path = $matches['path']
101-
$UpdatedPath = "$BlobUrl/" + $path -join '/'
102-
}
139+
else {
140+
Write-DebugLog " Skipping : $OriginalPath"
141+
}
142+
103143
}
104144
catch {
105-
Write-Debug " ERROR HTTP: $OriginalPath -> $UpdatedPath : $_"
145+
Write-DebugLog " ERROR HTTP: $OriginalPath -> $UpdatedPath : $_"
106146
}
107147
}
108148
elseif ($OriginalPath.StartsWith("/")) {
@@ -115,52 +155,60 @@ function Rewrite-ImageLinks {
115155
# Relative paths - Ensure consistency by converting to root-relative
116156
# 1. Get the parent directory of the HTML file
117157
$ParentDirectory = Split-Path -Path $HtmlFile.FullName -Parent
118-
Write-Debug "Parent Directory: $ParentDirectory"
158+
Write-DebugLog "Parent Directory: $ParentDirectory"
119159

120160
# 2. Combine the parent directory with the original path
121161
$CombinedPath = Join-Path -Path $ParentDirectory -ChildPath $OriginalPath
122-
Write-Debug "Combined Path: $CombinedPath"
162+
Write-DebugLog "Combined Path: $CombinedPath"
123163

124164
if (-not (Test-Path -Path $CombinedPath)) {
125-
Write-Debug " Path does not exist: $CombinedPath"
165+
Write-DebugLog " Path does not exist: $CombinedPath"
126166
continue;
127167
}
128168
# 3. Resolve the full path
129169
$ResolvedPath = Resolve-Path -Path $CombinedPath
130-
Write-Debug "Resolved Path: $ResolvedPath"
170+
Write-DebugLog "Resolved Path: $ResolvedPath"
131171

132172
# 4. Get the root-relative path
133173
$LocalImagesFullPath = (Get-Item $LocalPath).FullName
134-
Write-Debug "Local Images Full Path: $LocalImagesFullPath"
174+
Write-DebugLog "Local Images Full Path: $LocalImagesFullPath"
135175

136176
$RootRelativePath = $ResolvedPath.Path.Replace($LocalImagesFullPath, "").Replace("\", "/")
137-
Write-Debug "Root Relative Path: $RootRelativePath"
177+
Write-DebugLog "Root Relative Path: $RootRelativePath"
138178

139179
# 5. Construct the updated path
140180
$UpdatedPath = "$BlobUrl/$RootRelativePath"
141-
Write-Debug " Updated Path: $UpdatedPath"
181+
Write-DebugLog " Updated Path: $UpdatedPath"
142182
}
143183
catch {
144-
Write-Debug " Error resolving path: $_"
184+
Write-ErrorLog " Error resolving path: $_"
145185
continue;
146186
}
147187
}
148188

149189
# Replace the original path in the content
150190
if ($OriginalPath -ne $UpdatedPath) {
151191
$FileContent = $FileContent -replace [regex]::Escape($OriginalPath), $UpdatedPath
152-
Write-Debug " Replaced: $OriginalPath -> $UpdatedPath"
192+
Write-DebugLog " Replaced: $OriginalPath -> $UpdatedPath"
153193
$totalLinks += 1;
154194
}
155195

156196
}
157197

158198
# Save updated content back to the file
159199
Set-Content -LiteralPath $HtmlFile.FullName -Value $FileContent
160-
Write-InfoLog "Updated ($($Matches.count)): $($HtmlFile.FullName)"
200+
Write-DebugLog "Updated ($($Matches.count)): $($HtmlFile.FullName)"
201+
202+
# **Progress tracking**
203+
$index++
204+
$percentage = [math]::Round(($index / $totalFiles) * 100, 0)
205+
206+
# Log progress every 10%
207+
if ($percentage -ge $lastPercentage + $progressInterval) {
208+
Write-InfoLog "Progress: $percentage% ($index of $totalFiles HTML files processed with $totalLinks links updated)"
209+
$lastPercentage = $percentage
210+
}
161211

162212
}
163-
Write-InfoLog "HTML link rewriting complete of $totalLinks."
164-
165-
166-
}
213+
Write-InfoLog "HTML link rewriting complete: $totalLinks links updated across $totalFiles files."
214+
}

.powershell/build/Update-YoutubeMarkdownFiles.ps1

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ function Update-YoutubeMarkdownFiles {
7070
}
7171
else {
7272
$frontMatter = [ordered]@{}
73-
$hugoMarkdown = [HugoMarkdown]::new($frontMatter, "")
73+
$hugoMarkdown = [HugoMarkdown]::new($frontMatter, "", $videoDir )
7474
$source = "youtube"
7575
}
7676

.powershell/build/Update-YoutubeTranscriptMarkdown.ps1

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Turn this YouTube srt transcript into readable Markdown using only the original
2020
- Ensure correct capitalisation and punctuation.
2121
- Ensure the text is readable with paragraphs and line breaks.
2222
- Output only the cleaned transcript content.
23+
- correct only what are obvious mistakes in the original transcript.
2324
2425
~~~
2526
$captionsText

0 commit comments

Comments
 (0)