Skip to content

Commit 31519b8

Browse files
authored
Update selection of calssifications (#338)
2 parents 8b73991 + 6e463b8 commit 31519b8

File tree

2 files changed

+44
-60
lines changed

2 files changed

+44
-60
lines changed

.powershell/_includes/ClassificationHelpers.ps1

+36-44
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,16 @@ $catalogues["catalog"]["tags"] = Get-CatalogHashtable -Classification "tags"
3434
$catalogues["catalog_full"] = $catalogues["catalog"]["categories"] + $catalogues["catalog"]["tags"]
3535
$catalogues["marketing"] = Get-CatalogHashtable -Classification "marketing"
3636

37-
function Get-CategoryConfidenceWithChecksum {
37+
function Get-ClassificationsForType {
3838
param (
39-
[string]$ResourceContent,
40-
[string]$ResourceTitle,
41-
[string]$CacheFolder,
39+
[Parameter(Mandatory = $true)]
40+
[HugoMarkdown]$hugoMarkdown,
4241
[string]$ClassificationType = "classification",
4342
[switch]$batch,
4443
[switch]$updateMissing
4544
)
4645

47-
if (!(Test-Path $CacheFolder)) {
48-
New-Item -ItemType Directory -Path $CacheFolder -Force | Out-Null
49-
}
46+
$CacheFolder = $hugoMarkdown.FolderPath
5047

5148
$batchFile = Join-Path $CacheFolder "data.index.classifications.$ClassificationType.batch"
5249
$batchJsonlOutout = Join-Path $CacheFolder "data.index.classifications.$ClassificationType-output.jsonl"
@@ -156,7 +153,7 @@ function Get-CategoryConfidenceWithChecksum {
156153
Write-ErrorLog "Error parsing AI response for $CacheFolder. Skipping."
157154
continue
158155
}
159-
$newEntry = Get-ConfidenceFromAIResponse -AIResponseJson $aiResponseJson -ResourceTitle $ResourceTitle -ResourceContent $ResourceContent
156+
$newEntry = Get-ConfidenceFromAIResponse -AIResponseJson $aiResponseJson -ResourceTitle $hugoMarkdown.FrontMatter.title -ResourceContent $hugoMarkdown.BodyContent
160157
if ($cachedData.ContainsKey($newEntry.category)) {
161158
$oldEntry = $cachedData.($newEntry.category)
162159
if ([System.DateTimeOffset]$oldEntry.calculated_at -gt $newEntry.calculated_at) {
@@ -267,8 +264,8 @@ function Get-CategoryConfidenceWithChecksum {
267264
268265
do not wrap the json in anything else, just return the json object.
269266
270-
**Content Title:** "$ResourceTitle"
271-
**Content:** "$ResourceContent"
267+
**Content Title:** "$($hugoMarkdown.FrontMatter.Title)"
268+
**Content:** "$($hugoMarkdown.BodyContent)"
272269
"@
273270
$prompts += $prompt
274271
}
@@ -299,17 +296,12 @@ function Get-CategoryConfidenceWithChecksum {
299296
#Write-Progress -Id 2 -Activity "Classification of $ClassificationType" -Status "Processing prompt [$count/$($prompts.count)]" -PercentComplete (($count / $prompts.count) * 100)
300297
# Calls processing
301298
$aiResponseJson = Get-OpenAIResponse -Prompt $prompt
302-
$result = Get-ConfidenceFromAIResponse -AIResponseJson $aiResponseJson -ResourceTitle $ResourceTitle -ResourceContent $ResourceContent
299+
$result = Get-ConfidenceFromAIResponse -AIResponseJson $aiResponseJson -hugoMarkdown $hugoMarkdown
303300
if ($result.reasoning -ne $null -and $result.category -ne "unknown") {
304301
$oldConfidence = $cachedData[$result.category]?.ai_confidence ?? 0
305-
$DaysAgo = if ($cachedData[$result.category]?.calculated_at -is [DateTime]) {
306-
[math]::Round(([DateTimeOffset]::Now - [DateTimeOffset]$cachedData[$result.category].calculated_at).TotalDays)
307-
}
308-
else {
309-
0
310-
}
311-
312-
Write-InformationLog "Updating {category} with confidence of {old} calculated {daysago} to new confidence of {confidence} " -PropertyValues $result.category, $oldConfidence, $DaysAgo, $result.ai_confidence
302+
$DaysAgo = [math]::Round(([DateTimeOffset]::Now - [DateTimeOffset]$cachedData[$result.category].calculated_at).TotalDays)
303+
$confidenceDiff = "{0}{1}" -f ($(if (($result.ai_confidence - $oldConfidence) -ge 0) { '+' } else { '-' }), [math]::Abs($result.ai_confidence - $oldConfidence))
304+
Write-InformationLog "Updating {category} confidence {diff}! The old confidence of {old} was calculated {daysago} days ago. The new confidence is {confidence}!" -PropertyValues $result.category, $confidenceDiff, $oldConfidence, $DaysAgo, $result.ai_confidence
313305
$CatalogFromCache[$result.category] = $result
314306
$cachedData[$result.category] = $result
315307
# Save cache after each API call
@@ -328,17 +320,13 @@ function Get-CategoryConfidenceWithChecksum {
328320
#==========================================
329321
#=================return===================
330322
#==========================================
331-
$finalSelection = Get-FinalSelection -categoryScores $CatalogFromCache
332-
return $finalSelection | Sort-Object final_score -Descending | ConvertTo-Json -Depth 2
323+
return $CatalogFromCache.Values | Sort-Object final_score -Descending
333324
#==========================================
334325
#================/return===================
335326
#==========================================
336-
337-
338-
339327
}
340328

341-
function Get-ClassificationFromCache {
329+
function Get-Classification {
342330
param (
343331
[string]$CacheFolder,
344332
[string]$ClassificationName
@@ -357,27 +345,31 @@ function Get-ClassificationFromCache {
357345
Return $cachedData.$ClassificationName
358346
}
359347

360-
function Get-FinalSelection {
348+
function Get-ClassificationOrderedList {
361349
param (
362-
[hashtable]$categoryScores,
363-
[string[]]$levels = @("Primary", "Secondary", "Tertiary", "Quaternary", "Quinary")
350+
[array]$Classifications,
351+
[int] $minScore = 30,
352+
[string[]]$levels = @("Primary", "Secondary", "Tertiary", "Quaternary", "Quinary"),
353+
[switch]$byLevel
364354
)
365-
366-
$finalSelection = @()
367355

368-
foreach ($level in $levels) {
369-
$currentSelection = $categoryScores.Values | Where-Object { $_.final_score -gt 30 -and $_.level -eq $level } | Sort-Object final_score -Descending
370-
if ($currentSelection.Count -gt 0) {
371-
$finalSelection += $currentSelection
372-
break
373-
}
356+
$filtered = $Classifications | Where-Object { $_.final_score -gt $minScore }
357+
$selected = @()
358+
if ($byLevel) {
359+
foreach ($level in $levels) {
360+
$currentSelection = $filtered | Where-Object { $_.level -eq $level } | Sort-Object final_score -Descending
361+
if ($currentSelection.Count -gt 0) {
362+
$selected += $currentSelection
363+
break
364+
}
365+
}
374366
}
375-
376-
return $finalSelection | Sort-Object final_score -Descending
367+
else {
368+
$selected = $filtered | Sort-Object final_score -Descending
369+
}
370+
return $selected | Sort-Object -Property @{Expression = "final_score"; Descending = $true }, @{Expression = "ai_alignment"; Descending = $true }, @{Expression = "ai_depth"; Descending = $true }, @{Expression = "category"; Descending = $false }
377371
}
378372

379-
380-
381373
function Get-ComputedConfidence {
382374
param (
383375
[int]$aiConfidence,
@@ -402,9 +394,9 @@ function Get-ComputedLevel {
402394

403395
function Get-ConfidenceFromAIResponse {
404396
param (
405-
[string]$AIResponseJson,
406-
[string]$ResourceTitle,
407-
[string]$ResourceContent
397+
[Parameter(Mandatory = $true)]
398+
[HugoMarkdown]$hugoMarkdown,
399+
[string]$AIResponseJson
408400
)
409401
$responceOK = $true
410402
try {
@@ -448,7 +440,7 @@ function Get-ConfidenceFromAIResponse {
448440
# Non-AI Confidence Calculation
449441
$nonAiConfidence = 0
450442
$categoryWords = $category -split '\s+'
451-
$contentWords = ($ResourceTitle + " " + $ResourceContent) -split '\s+'
443+
$contentWords = ($hugoMarkdown.FrontMatter.title + " " + $hugoMarkdown.BodyContent) -split '\s+'
452444
$escapedCategory = [Regex]::Escape($category)
453445

454446
if ($category -in $contentWords) {

.powershell/single-use/resources/Update-ReourcesFrontMatter.ps1

+8-16
Original file line numberDiff line numberDiff line change
@@ -207,27 +207,19 @@ while ($hugoMarkdownQueue.Count -gt 0 -or $hugoMarkdownBatchQueue.Count -gt 0) {
207207
# $categories = $marketingClassification | ConvertFrom-Json | ForEach-Object { $_.category } #| Sort-Object
208208
# Update-StringList -frontMatter $hugoMarkdown.FrontMatter -fieldName 'marketing' -values @($categories) -Overwrite
209209
#-----------------Categories-------------------
210-
$categoryClassification = Get-CategoryConfidenceWithChecksum -updateMissing `
211-
-ClassificationType "categories" `
212-
-Catalog $categoriesCatalog `
213-
-CacheFolder $hugoMarkdown.FolderPath `
214-
-ResourceContent $BodyContent `
215-
-ResourceTitle $hugoMarkdown.FrontMatter.title
216-
$categories = $categoryClassification | ConvertFrom-Json | Sort-Object -Property @{Expression = "final_score"; Descending = $true }, @{Expression = "category"; Descending = $false } | Select-Object -First 3 | ForEach-Object { $_.category }
210+
$categoryClassification = Get-ClassificationsForType -updateMissing -ClassificationType "categories" -hugoMarkdown $hugoMarkdown
211+
$categoryClassificationOrdered = Get-ClassificationOrderedList -byLevel -classifications $categoryClassification | Select-Object -First 3
212+
$categories = $categoryClassificationOrdered | ForEach-Object { $_.category }
217213

218214
Update-StringList -frontMatter $hugoMarkdown.FrontMatter -fieldName 'categories' -values @($categories) -Overwrite
219215
#-----------------Tags-------------------
220-
$tagClassification = Get-CategoryConfidenceWithChecksum -updateMissing `
221-
-ClassificationType "tags" `
222-
-Catalog $tagsCatalog `
223-
-CacheFolder $hugoMarkdown.FolderPath `
224-
-ResourceContent $BodyContent `
225-
-ResourceTitle $hugoMarkdown.FrontMatter.title
226-
$tags = $tagClassification | ConvertFrom-Json | Sort-Object -Property @{Expression = "final_score"; Descending = $true }, @{Expression = "category"; Descending = $false } | Select-Object -First 10 | ForEach-Object { $_.category }
216+
$tagClassification = Get-ClassificationsForType -updateMissing -ClassificationType "tags" -hugoMarkdown $hugoMarkdown
217+
$tagClassificationOrdered = Get-ClassificationOrderedList -byLevel -classifications $tagClassification | Select-Object -First 10
218+
$tags = $tagClassificationOrdered | ForEach-Object { $_.category }
227219
Update-StringList -frontMatter $hugoMarkdown.FrontMatter -fieldName 'tags' -values @($tags) -Overwrite
228220
# =================COMPLETE===================
229-
$eeResult = Get-ClassificationFromCache -CacheFolder $hugoMarkdown.FolderPath -ClassificationName "Engineering Excellence"
230-
$tlResult = Get-ClassificationFromCache -CacheFolder $hugoMarkdown.FolderPath -ClassificationName "Technical Leadership"
221+
$eeResult = Get-Classification -CacheFolder $hugoMarkdown.FolderPath -ClassificationName "Engineering Excellence"
222+
$tlResult = Get-Classification -CacheFolder $hugoMarkdown.FolderPath -ClassificationName "Technical Leadership"
231223
$weight = [math]::Round(((1000 - ($eeResult.final_score * 10)) + (1000 - ($tlResult.final_score * 10))) / 2)
232224
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'weight' -fieldValue $weight -addAfter 'date' -Overwrite
233225
# =================CONTENT===================

0 commit comments

Comments
 (0)