Skip to content

Commit cbe08d1

Browse files
author
Brad Miller
committed
theshold and column updates
1 parent 61dc69b commit cbe08d1

9 files changed

+198
-36
lines changed

sourcecode/scoring/constants.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444

4545
# Max flip rates
4646
prescoringAllUnlockedNotesMaxCrhChurn = 0.2
47-
prescoringAllNotesCreatedThreeToThirteenDaysAgoMaxChurn = 0.06
48-
finalUnlockedNotesWithNoNewRatingsMaxCrhChurn = 0.05
47+
prescoringAllNotesCreatedThreeToThirteenDaysAgoMaxChurn = 0.09
48+
finalUnlockedNotesWithNoNewRatingsMaxCrhChurn = 0.075
4949
finalNotesWithNewRatingsMaxNewCrhChurn = 0.80
5050
finalNotesWithNewRatingsMaxOldCrhChurn = 0.25
5151
finalNotesThatJustFlippedStatusMaxCrhChurn = 1e8
@@ -669,6 +669,8 @@ def rater_factor_key(i):
669669
(lowDiligenceNoteInterceptKey, np.double),
670670
(lowDiligenceNoteFactor1Key, np.double),
671671
(lowDiligenceNoteInterceptRound2Key, np.double),
672+
(harassmentNoteInterceptKey, np.double),
673+
(harassmentNoteFactor1Key, np.double),
672674
]
673675
prescoringNoteModelOutputTSVColumns = [
674676
col for (col, dtype) in prescoringNoteModelOutputTSVColumnsAndTypes
@@ -767,6 +769,12 @@ def rater_factor_key(i):
767769
(incorrectTagRatingsMadeByRaterKey, pd.Int64Dtype()),
768770
(totalRatingsMadeByRaterKey, pd.Int64Dtype()),
769771
(postSelectionValueKey, pd.Int64Dtype()),
772+
(successfulRatingHelpfulCount, pd.Int64Dtype()),
773+
(successfulRatingNotHelpfulCount, pd.Int64Dtype()),
774+
(unsuccessfulRatingHelpfulCount, pd.Int64Dtype()),
775+
(unsuccessfulRatingNotHelpfulCount, pd.Int64Dtype()),
776+
(totalHelpfulHarassmentRatingsPenaltyKey, np.double),
777+
(raterAgreeRatioWithHarassmentAbusePenaltyKey, np.double),
770778
]
771779
prescoringRaterModelOutputTSVColumns = [
772780
col for (col, dtype) in prescoringRaterModelOutputTSVColumnsAndTypes

sourcecode/scoring/helpfulness_scores.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,16 @@ def _rater_helpfulness(validRatings: pd.DataFrame) -> pd.DataFrame:
6363
"""
6464

6565
raterCounts = validRatings.groupby(c.raterParticipantIdKey).sum()[
66-
[c.ratingAgreesWithNoteStatusKey, c.ratingCountKey]
66+
[
67+
c.ratingAgreesWithNoteStatusKey,
68+
c.successfulRatingNotHelpfulCount,
69+
c.successfulRatingHelpfulCount,
70+
c.unsuccessfulRatingNotHelpfulCount,
71+
c.unsuccessfulRatingHelpfulCount,
72+
c.ratingCountKey,
73+
]
6774
]
75+
6876
raterCounts[c.raterAgreeRatioKey] = (
6977
raterCounts[c.ratingAgreesWithNoteStatusKey] / raterCounts[c.ratingCountKey]
7078
)
@@ -124,6 +132,10 @@ def compute_general_helpfulness_scores(
124132
# reason we didn't see warnings for ratingCountKey before was that they type may have already
125133
# been float64 going into the join.
126134
c.ratingCountKey,
135+
c.successfulRatingNotHelpfulCount,
136+
c.successfulRatingHelpfulCount,
137+
c.unsuccessfulRatingNotHelpfulCount,
138+
c.unsuccessfulRatingHelpfulCount,
127139
},
128140
)
129141
.reset_index()
@@ -135,6 +147,10 @@ def compute_general_helpfulness_scores(
135147
c.raterAgreeRatioKey,
136148
c.ratingAgreesWithNoteStatusKey,
137149
c.ratingCountKey,
150+
c.successfulRatingNotHelpfulCount,
151+
c.successfulRatingHelpfulCount,
152+
c.unsuccessfulRatingNotHelpfulCount,
153+
c.unsuccessfulRatingHelpfulCount,
138154
]
139155
]
140156
)

sourcecode/scoring/mf_base_scorer.py

+46-6
Original file line numberDiff line numberDiff line change
@@ -576,13 +576,21 @@ def _prescore_notes_and_users(
576576
c.meanNoteScoreKey,
577577
c.raterAgreeRatioKey,
578578
c.aboveHelpfulnessThresholdKey,
579+
c.successfulRatingNotHelpfulCount,
580+
c.successfulRatingHelpfulCount,
581+
c.unsuccessfulRatingNotHelpfulCount,
582+
c.unsuccessfulRatingHelpfulCount,
583+
c.totalHelpfulHarassmentRatingsPenaltyKey,
584+
c.raterAgreeRatioWithHarassmentAbusePenaltyKey,
579585
]
580586
] = np.nan
581587
noteParams = noteParamsUnfiltered
582588
raterParams = raterParamsUnfiltered
583589
# TODO: delete after we run prescoring diligence properly
584590
# diligenceGlobalIntercept = None
585591
finalRoundRatings = ratingsForTraining
592+
harassmentAbuseNoteParams = noteParamsUnfiltered[[c.noteIdKey]]
593+
harassmentAbuseNoteParams[[c.harassmentNoteInterceptKey, c.harassmentNoteFactor1Key]] = np.nan
586594
else:
587595
assert "Topic" not in self.get_name(), f"Unexpected scorer: {self.get_name()}"
588596
logger.info(f"Performing rep-filtering for {self.get_name()}")
@@ -668,7 +676,15 @@ def _prescore_notes_and_users(
668676
]
669677
],
670678
validRatings[
671-
[c.raterParticipantIdKey, c.ratingAgreesWithNoteStatusKey, c.ratingCountKey]
679+
[
680+
c.raterParticipantIdKey,
681+
c.ratingAgreesWithNoteStatusKey,
682+
c.ratingCountKey,
683+
c.successfulRatingNotHelpfulCount,
684+
c.successfulRatingHelpfulCount,
685+
c.unsuccessfulRatingNotHelpfulCount,
686+
c.unsuccessfulRatingHelpfulCount,
687+
]
672688
],
673689
self._minMeanNoteScore,
674690
self._minCRHVsCRNHRatio,
@@ -705,10 +721,12 @@ def _prescore_notes_and_users(
705721

706722
with self.time_block("Harassment tag consensus"):
707723
harassmentAbuseNoteParams, _, _ = tag_consensus.train_tag_model(
708-
ratingsHelpfulnessScoreFilteredPreHarassmentFilter,
709-
c.notHelpfulSpamHarassmentOrAbuseTagKey,
710-
noteParamsUnfiltered[[c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]],
711-
raterParamsUnfiltered[
724+
ratings=ratingsHelpfulnessScoreFilteredPreHarassmentFilter,
725+
tag=c.notHelpfulSpamHarassmentOrAbuseTagKey,
726+
helpfulModelNoteParams=noteParamsUnfiltered[
727+
[c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]
728+
],
729+
helpfulModelRaterParams=raterParamsUnfiltered[
712730
[c.raterParticipantIdKey, c.internalRaterInterceptKey, c.internalRaterFactor1Key]
713731
],
714732
name="harassment",
@@ -731,7 +749,15 @@ def _prescore_notes_and_users(
731749
]
732750
],
733751
validRatings[
734-
[c.raterParticipantIdKey, c.ratingAgreesWithNoteStatusKey, c.ratingCountKey]
752+
[
753+
c.raterParticipantIdKey,
754+
c.ratingAgreesWithNoteStatusKey,
755+
c.ratingCountKey,
756+
c.successfulRatingNotHelpfulCount,
757+
c.successfulRatingHelpfulCount,
758+
c.unsuccessfulRatingNotHelpfulCount,
759+
c.unsuccessfulRatingHelpfulCount,
760+
]
735761
],
736762
self._minMeanNoteScore,
737763
self._minCRHVsCRNHRatio,
@@ -800,6 +826,14 @@ def _prescore_notes_and_users(
800826
raterInitStateDiligence=raterParamsDiligenceInit,
801827
)
802828
noteParams = noteParams.merge(diligenceNoteParams, on=c.noteIdKey)
829+
830+
noteParams = noteParams.merge(
831+
harassmentAbuseNoteParams[
832+
[c.noteIdKey, c.harassmentNoteInterceptKey, c.harassmentNoteFactor1Key]
833+
],
834+
on=c.noteIdKey,
835+
how="left",
836+
)
803837
raterParams = raterParams.merge(diligenceRaterParams, on=c.raterParticipantIdKey)
804838

805839
# Compute scored notes -- currently not returned; only used for downstream computation.
@@ -892,6 +926,12 @@ def _prescore_notes_and_users(
892926
c.meanNoteScoreKey,
893927
c.raterAgreeRatioKey,
894928
c.aboveHelpfulnessThresholdKey,
929+
c.successfulRatingHelpfulCount,
930+
c.successfulRatingNotHelpfulCount,
931+
c.unsuccessfulRatingHelpfulCount,
932+
c.unsuccessfulRatingNotHelpfulCount,
933+
c.totalHelpfulHarassmentRatingsPenaltyKey,
934+
c.raterAgreeRatioWithHarassmentAbusePenaltyKey,
895935
]
896936
],
897937
on=c.raterParticipantIdKey,

sourcecode/scoring/note_status_history.py

+41-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
import time
3-
from typing import Optional
3+
from typing import Optional, Tuple
44

55
from . import constants as c
66
from .scoring_rules import RuleID
@@ -193,7 +193,7 @@ def _update_single_note_status_history(mergedNote, currentTimeMillis, newScoredN
193193
return mergedNote
194194

195195

196-
def check_flips(mergedStatuses: pd.DataFrame, noteSubset: c.NoteSubset) -> None:
196+
def check_flips(mergedStatuses: pd.DataFrame, noteSubset: c.NoteSubset) -> Tuple[bool, str]:
197197
"""Validate that number of CRH notes remains within an accepted bound.
198198
199199
Assert fails and scoring exits with error if maximum allowable churn is exceeded.
@@ -215,15 +215,26 @@ def check_flips(mergedStatuses: pd.DataFrame, noteSubset: c.NoteSubset) -> None:
215215
if noteSubset.noteSet is not None:
216216
mergedStatuses = mergedStatuses[mergedStatuses[c.noteIdKey].isin(noteSubset.noteSet)]
217217

218-
_check_flips(mergedStatuses, noteSubset.maxNewCrhChurnRate, noteSubset.maxOldCrhChurnRate)
218+
return _check_flips(
219+
mergedStatuses,
220+
noteSubset.maxNewCrhChurnRate,
221+
noteSubset.maxOldCrhChurnRate,
222+
description=noteSubset.description,
223+
)
224+
return False, ""
219225

220226

221227
def _check_flips(
222228
mergedStatuses: pd.DataFrame,
223229
maxNewCrhChurn: float,
224230
maxOldCrhChurn: Optional[float] = None,
225231
smoothingCount: int = 100,
226-
) -> None:
232+
description: Optional[c.RescoringRuleID] = None,
233+
sampleSizeToPrintInFailedAssert: int = 30,
234+
) -> Tuple[bool, str]:
235+
desc = ""
236+
failedCheckFlips = False
237+
227238
if maxOldCrhChurn is None:
228239
maxOldCrhChurn = maxNewCrhChurn
229240

@@ -247,13 +258,32 @@ def _check_flips(
247258
f"Raw old note ratio: {rawOldNoteRatio}, smoothed old note ratio: {smoothedOldNoteRatio}. (newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(oldCrhNotes - newCrhNotes)}"
248259
)
249260

250-
assert (
251-
smoothedNewNoteRatio < maxNewCrhChurn
252-
), f"Too many new CRH notes: newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(newCrhNotes - oldCrhNotes)}"
253-
254-
assert (
255-
smoothedOldNoteRatio < maxOldCrhChurn
256-
), f"Too many notes lost CRH status: oldCrhNotes={len(oldCrhNotes)}, newCrhNotes={len(newCrhNotes)}, delta={len(oldCrhNotes - newCrhNotes)}"
261+
pd.set_option("display.max_columns", 50)
262+
pd.set_option("display.max_rows", max(20, sampleSizeToPrintInFailedAssert))
263+
264+
if smoothedNewNoteRatio > maxNewCrhChurn:
265+
failedCheckFlips = True
266+
desc += f"""Too many new CRH notes (rescoringRule: {description}):
267+
smoothedNewNoteRatio={smoothedNewNoteRatio}
268+
maxNewCrhChurn={maxNewCrhChurn}
269+
newCrhNotes={len(newCrhNotes)}
270+
oldCrhNotes={len(oldCrhNotes)}
271+
delta={len(newCrhNotes - oldCrhNotes)}
272+
Sample Notes:
273+
{mergedStatuses[(mergedStatuses[c.noteIdKey].isin(newCrhNotes - oldCrhNotes))].sample(min(len(newCrhNotes - oldCrhNotes), sampleSizeToPrintInFailedAssert))}"""
274+
275+
if smoothedOldNoteRatio > maxOldCrhChurn:
276+
failedCheckFlips = True
277+
desc += f"""Too many notes lost CRH status (rescoringRule: {description}):
278+
smoothedOldNoteRatio={smoothedOldNoteRatio}
279+
maxOldCrhChurn={maxOldCrhChurn}
280+
oldCrhNotes={len(oldCrhNotes)}
281+
newCrhNotes={len(newCrhNotes)}
282+
delta={len(oldCrhNotes - newCrhNotes)}
283+
Sample Notes:
284+
{mergedStatuses[(mergedStatuses[c.noteIdKey].isin(oldCrhNotes - newCrhNotes))].sample(min(len(oldCrhNotes - newCrhNotes), sampleSizeToPrintInFailedAssert))}"""
285+
286+
return failedCheckFlips, desc
257287

258288

259289
def merge_old_and_new_note_statuses(

sourcecode/scoring/pandas_utils.py

+4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from collections import Counter
2525
from dataclasses import dataclass
2626
from enum import Enum
27+
from functools import wraps
2728
from hashlib import sha256
2829
import re
2930
import sys
@@ -647,6 +648,7 @@ def patch_pandas(main: Callable) -> Callable:
647648
main: "main" function for program binary
648649
"""
649650

651+
@wraps(main)
650652
def _inner(*args, **kwargs) -> Any:
651653
"""Determine patching behavior, apply patch and add logging."""
652654
print("Patching pandas")
@@ -662,9 +664,11 @@ def _inner(*args, **kwargs) -> Any:
662664
# birdwatch/scoring/src/main/python/run_final_scoring.py
663665
# birdwatch/scoring/src/main/python/run_contributor_scoring.py
664666
# birdwatch/scoring/src/main/python/run.py
667+
# birdwatch/scoring/src/main/python/public/scoring/run_scoring.py
665668
assert len(args) == 1, f"unexpected 1 positional args, but found {len(args)}"
666669
assert len(kwargs) == 0, f"expected kwargs to be empty, but found {len(kwargs)}"
667670
clArgs = args[0]
671+
668672
# Apply patches, configured based on whether types should be enforced or logged
669673
patcher = PandasPatcher(clArgs.enforce_types)
670674
pd.concat = patcher.safe_concat()

0 commit comments

Comments
 (0)