twitter
diff --git a/‎sourcecode/scoring/constants.py
Lines changed: 10 additions & 2 deletions b/‎sourcecode/scoring/constants.py
Lines changed: 10 additions & 2 deletions
diff --git a/‎sourcecode/scoring/helpfulness_scores.py
Lines changed: 17 additions & 1 deletion b/‎sourcecode/scoring/helpfulness_scores.py
Lines changed: 17 additions & 1 deletion
diff --git a/‎sourcecode/scoring/mf_base_scorer.py
Lines changed: 46 additions & 6 deletions b/‎sourcecode/scoring/mf_base_scorer.py
Lines changed: 46 additions & 6 deletions
diff --git a/‎sourcecode/scoring/note_status_history.py
Lines changed: 41 additions & 11 deletions b/‎sourcecode/scoring/note_status_history.py
Lines changed: 41 additions & 11 deletions
diff --git a/‎sourcecode/scoring/pandas_utils.py
Lines changed: 4 additions & 0 deletions b/‎sourcecode/scoring/pandas_utils.py
Lines changed: 4 additions & 0 deletions
@@ -44,8 +44,8 @@
 
 # Max flip rates
 prescoringAllUnlockedNotesMaxCrhChurn = 0.2
-prescoringAllNotesCreatedThreeToThirteenDaysAgoMaxChurn = 0.06
-finalUnlockedNotesWithNoNewRatingsMaxCrhChurn = 0.05
+prescoringAllNotesCreatedThreeToThirteenDaysAgoMaxChurn = 0.09
+finalUnlockedNotesWithNoNewRatingsMaxCrhChurn = 0.075
 finalNotesWithNewRatingsMaxNewCrhChurn = 0.80
 finalNotesWithNewRatingsMaxOldCrhChurn = 0.25
 finalNotesThatJustFlippedStatusMaxCrhChurn = 1e8
@@ -669,6 +669,8 @@ def rater_factor_key(i):
   (lowDiligenceNoteInterceptKey, np.double),
   (lowDiligenceNoteFactor1Key, np.double),
   (lowDiligenceNoteInterceptRound2Key, np.double),
+  (harassmentNoteInterceptKey, np.double),
+  (harassmentNoteFactor1Key, np.double),
 ]
 prescoringNoteModelOutputTSVColumns = [
   col for (col, dtype) in prescoringNoteModelOutputTSVColumnsAndTypes
@@ -767,6 +769,12 @@ def rater_factor_key(i):
   (incorrectTagRatingsMadeByRaterKey, pd.Int64Dtype()),
   (totalRatingsMadeByRaterKey, pd.Int64Dtype()),
   (postSelectionValueKey, pd.Int64Dtype()),
+  (successfulRatingHelpfulCount, pd.Int64Dtype()),
+  (successfulRatingNotHelpfulCount, pd.Int64Dtype()),
+  (unsuccessfulRatingHelpfulCount, pd.Int64Dtype()),
+  (unsuccessfulRatingNotHelpfulCount, pd.Int64Dtype()),
+  (totalHelpfulHarassmentRatingsPenaltyKey, np.double),
+  (raterAgreeRatioWithHarassmentAbusePenaltyKey, np.double),
 ]
 prescoringRaterModelOutputTSVColumns = [
   col for (col, dtype) in prescoringRaterModelOutputTSVColumnsAndTypes
 
@@ -63,8 +63,16 @@ def _rater_helpfulness(validRatings: pd.DataFrame) -> pd.DataFrame:
   """
 
   raterCounts = validRatings.groupby(c.raterParticipantIdKey).sum()[
-    [c.ratingAgreesWithNoteStatusKey, c.ratingCountKey]
+    [
+      c.ratingAgreesWithNoteStatusKey,
+      c.successfulRatingNotHelpfulCount,
+      c.successfulRatingHelpfulCount,
+      c.unsuccessfulRatingNotHelpfulCount,
+      c.unsuccessfulRatingHelpfulCount,
+      c.ratingCountKey,
+    ]
   ]
+
   raterCounts[c.raterAgreeRatioKey] = (
     raterCounts[c.ratingAgreesWithNoteStatusKey] / raterCounts[c.ratingCountKey]
   )
@@ -124,6 +132,10 @@ def compute_general_helpfulness_scores(
         # reason we didn't see warnings for ratingCountKey before was that they type may have already
         # been float64 going into the join.
         c.ratingCountKey,
+        c.successfulRatingNotHelpfulCount,
+        c.successfulRatingHelpfulCount,
+        c.unsuccessfulRatingNotHelpfulCount,
+        c.unsuccessfulRatingHelpfulCount,
       },
     )
     .reset_index()
@@ -135,6 +147,10 @@ def compute_general_helpfulness_scores(
         c.raterAgreeRatioKey,
         c.ratingAgreesWithNoteStatusKey,
         c.ratingCountKey,
+        c.successfulRatingNotHelpfulCount,
+        c.successfulRatingHelpfulCount,
+        c.unsuccessfulRatingNotHelpfulCount,
+        c.unsuccessfulRatingHelpfulCount,
       ]
     ]
   )
 
@@ -576,13 +576,21 @@ def _prescore_notes_and_users(
           c.meanNoteScoreKey,
           c.raterAgreeRatioKey,
           c.aboveHelpfulnessThresholdKey,
+          c.successfulRatingNotHelpfulCount,
+          c.successfulRatingHelpfulCount,
+          c.unsuccessfulRatingNotHelpfulCount,
+          c.unsuccessfulRatingHelpfulCount,
+          c.totalHelpfulHarassmentRatingsPenaltyKey,
+          c.raterAgreeRatioWithHarassmentAbusePenaltyKey,
         ]
       ] = np.nan
       noteParams = noteParamsUnfiltered
       raterParams = raterParamsUnfiltered
       # TODO: delete after we run prescoring diligence properly
       # diligenceGlobalIntercept = None
       finalRoundRatings = ratingsForTraining
+      harassmentAbuseNoteParams = noteParamsUnfiltered[[c.noteIdKey]]
+      harassmentAbuseNoteParams[[c.harassmentNoteInterceptKey, c.harassmentNoteFactor1Key]] = np.nan
     else:
       assert "Topic" not in self.get_name(), f"Unexpected scorer: {self.get_name()}"
       logger.info(f"Performing rep-filtering for {self.get_name()}")
@@ -668,7 +676,15 @@ def _prescore_notes_and_users(
               ]
             ],
             validRatings[
-              [c.raterParticipantIdKey, c.ratingAgreesWithNoteStatusKey, c.ratingCountKey]
+              [
+                c.raterParticipantIdKey,
+                c.ratingAgreesWithNoteStatusKey,
+                c.ratingCountKey,
+                c.successfulRatingNotHelpfulCount,
+                c.successfulRatingHelpfulCount,
+                c.unsuccessfulRatingNotHelpfulCount,
+                c.unsuccessfulRatingHelpfulCount,
+              ]
             ],
             self._minMeanNoteScore,
             self._minCRHVsCRNHRatio,
@@ -705,10 +721,12 @@ def _prescore_notes_and_users(
 
       with self.time_block("Harassment tag consensus"):
         harassmentAbuseNoteParams, _, _ = tag_consensus.train_tag_model(
-          ratingsHelpfulnessScoreFilteredPreHarassmentFilter,
-          c.notHelpfulSpamHarassmentOrAbuseTagKey,
-          noteParamsUnfiltered[[c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]],
-          raterParamsUnfiltered[
+          ratings=ratingsHelpfulnessScoreFilteredPreHarassmentFilter,
+          tag=c.notHelpfulSpamHarassmentOrAbuseTagKey,
+          helpfulModelNoteParams=noteParamsUnfiltered[
+            [c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]
+          ],
+          helpfulModelRaterParams=raterParamsUnfiltered[
             [c.raterParticipantIdKey, c.internalRaterInterceptKey, c.internalRaterFactor1Key]
           ],
           name="harassment",
@@ -731,7 +749,15 @@ def _prescore_notes_and_users(
             ]
           ],
           validRatings[
-            [c.raterParticipantIdKey, c.ratingAgreesWithNoteStatusKey, c.ratingCountKey]
+            [
+              c.raterParticipantIdKey,
+              c.ratingAgreesWithNoteStatusKey,
+              c.ratingCountKey,
+              c.successfulRatingNotHelpfulCount,
+              c.successfulRatingHelpfulCount,
+              c.unsuccessfulRatingNotHelpfulCount,
+              c.unsuccessfulRatingHelpfulCount,
+            ]
           ],
           self._minMeanNoteScore,
           self._minCRHVsCRNHRatio,
@@ -800,6 +826,14 @@ def _prescore_notes_and_users(
         raterInitStateDiligence=raterParamsDiligenceInit,
       )
       noteParams = noteParams.merge(diligenceNoteParams, on=c.noteIdKey)
+
+      noteParams = noteParams.merge(
+        harassmentAbuseNoteParams[
+          [c.noteIdKey, c.harassmentNoteInterceptKey, c.harassmentNoteFactor1Key]
+        ],
+        on=c.noteIdKey,
+        how="left",
+      )
       raterParams = raterParams.merge(diligenceRaterParams, on=c.raterParticipantIdKey)
 
     # Compute scored notes -- currently not returned; only used for downstream computation.
@@ -892,6 +926,12 @@ def _prescore_notes_and_users(
           c.meanNoteScoreKey,
           c.raterAgreeRatioKey,
           c.aboveHelpfulnessThresholdKey,
+          c.successfulRatingHelpfulCount,
+          c.successfulRatingNotHelpfulCount,
+          c.unsuccessfulRatingHelpfulCount,
+          c.unsuccessfulRatingNotHelpfulCount,
+          c.totalHelpfulHarassmentRatingsPenaltyKey,
+          c.raterAgreeRatioWithHarassmentAbusePenaltyKey,
         ]
       ],
       on=c.raterParticipantIdKey,
 
@@ -1,6 +1,6 @@
 import logging
 import time
-from typing import Optional
+from typing import Optional, Tuple
 
 from . import constants as c
 from .scoring_rules import RuleID
@@ -193,7 +193,7 @@ def _update_single_note_status_history(mergedNote, currentTimeMillis, newScoredN
   return mergedNote
 
 
-def check_flips(mergedStatuses: pd.DataFrame, noteSubset: c.NoteSubset) -> None:
+def check_flips(mergedStatuses: pd.DataFrame, noteSubset: c.NoteSubset) -> Tuple[bool, str]:
   """Validate that number of CRH notes remains within an accepted bound.
 
   Assert fails and scoring exits with error if maximum allowable churn is exceeded.
@@ -215,15 +215,26 @@ def check_flips(mergedStatuses: pd.DataFrame, noteSubset: c.NoteSubset) -> None:
     if noteSubset.noteSet is not None:
       mergedStatuses = mergedStatuses[mergedStatuses[c.noteIdKey].isin(noteSubset.noteSet)]
 
-    _check_flips(mergedStatuses, noteSubset.maxNewCrhChurnRate, noteSubset.maxOldCrhChurnRate)
+    return _check_flips(
+      mergedStatuses,
+      noteSubset.maxNewCrhChurnRate,
+      noteSubset.maxOldCrhChurnRate,
+      description=noteSubset.description,
+    )
+  return False, ""
 
 
 def _check_flips(
   mergedStatuses: pd.DataFrame,
   maxNewCrhChurn: float,
   maxOldCrhChurn: Optional[float] = None,
   smoothingCount: int = 100,
-) -> None:
+  description: Optional[c.RescoringRuleID] = None,
+  sampleSizeToPrintInFailedAssert: int = 30,
+) -> Tuple[bool, str]:
+  desc = ""
+  failedCheckFlips = False
+
   if maxOldCrhChurn is None:
     maxOldCrhChurn = maxNewCrhChurn
 
@@ -247,13 +258,32 @@ def _check_flips(
       f"Raw old note ratio: {rawOldNoteRatio}, smoothed old note ratio: {smoothedOldNoteRatio}. (newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(oldCrhNotes - newCrhNotes)}"
     )
 
-    assert (
-      smoothedNewNoteRatio < maxNewCrhChurn
-    ), f"Too many new CRH notes: newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(newCrhNotes - oldCrhNotes)}"
-
-    assert (
-      smoothedOldNoteRatio < maxOldCrhChurn
-    ), f"Too many notes lost CRH status: oldCrhNotes={len(oldCrhNotes)}, newCrhNotes={len(newCrhNotes)}, delta={len(oldCrhNotes - newCrhNotes)}"
+    pd.set_option("display.max_columns", 50)
+    pd.set_option("display.max_rows", max(20, sampleSizeToPrintInFailedAssert))
+
+    if smoothedNewNoteRatio > maxNewCrhChurn:
+      failedCheckFlips = True
+      desc += f"""Too many new CRH notes (rescoringRule: {description}): 
+      smoothedNewNoteRatio={smoothedNewNoteRatio}
+      maxNewCrhChurn={maxNewCrhChurn}
+      newCrhNotes={len(newCrhNotes)}
+      oldCrhNotes={len(oldCrhNotes)}
+      delta={len(newCrhNotes - oldCrhNotes)}
+      Sample Notes: 
+      {mergedStatuses[(mergedStatuses[c.noteIdKey].isin(newCrhNotes - oldCrhNotes))].sample(min(len(newCrhNotes - oldCrhNotes), sampleSizeToPrintInFailedAssert))}"""
+
+    if smoothedOldNoteRatio > maxOldCrhChurn:
+      failedCheckFlips = True
+      desc += f"""Too many notes lost CRH status (rescoringRule: {description}): 
+      smoothedOldNoteRatio={smoothedOldNoteRatio}
+      maxOldCrhChurn={maxOldCrhChurn}
+      oldCrhNotes={len(oldCrhNotes)}
+      newCrhNotes={len(newCrhNotes)}
+      delta={len(oldCrhNotes - newCrhNotes)}
+      Sample Notes: 
+      {mergedStatuses[(mergedStatuses[c.noteIdKey].isin(oldCrhNotes - newCrhNotes))].sample(min(len(oldCrhNotes - newCrhNotes), sampleSizeToPrintInFailedAssert))}"""
+
+  return failedCheckFlips, desc
 
 
 def merge_old_and_new_note_statuses(
 
@@ -24,6 +24,7 @@
 from collections import Counter
 from dataclasses import dataclass
 from enum import Enum
+from functools import wraps
 from hashlib import sha256
 import re
 import sys
@@ -647,6 +648,7 @@ def patch_pandas(main: Callable) -> Callable:
     main: "main" function for program binary
   """
 
+  @wraps(main)
   def _inner(*args, **kwargs) -> Any:
     """Determine patching behavior, apply patch and add logging."""
     print("Patching pandas")
@@ -662,9 +664,11 @@ def _inner(*args, **kwargs) -> Any:
       # birdwatch/scoring/src/main/python/run_final_scoring.py
       # birdwatch/scoring/src/main/python/run_contributor_scoring.py
       # birdwatch/scoring/src/main/python/run.py
+      # birdwatch/scoring/src/main/python/public/scoring/run_scoring.py
       assert len(args) == 1, f"unexpected 1 positional args, but found {len(args)}"
       assert len(kwargs) == 0, f"expected kwargs to be empty, but found {len(kwargs)}"
       clArgs = args[0]
+
     # Apply patches, configured based on whether types should be enforced or logged
     patcher = PandasPatcher(clArgs.enforce_types)
     pd.concat = patcher.safe_concat()