@@ -105,6 +105,104 @@ describe("sanitizeUnknownInPlace", () => {
105105 expect ( sanitizeUnknownInPlace ( null ) ) . toEqual ( { value : null , fixed : 0 } ) ;
106106 expect ( sanitizeUnknownInPlace ( undefined ) ) . toEqual ( { value : undefined , fixed : 0 } ) ;
107107 } ) ;
108+
109+ // ─── Out-of-range integers (TRI-9755) ──────────────────────────────────────
110+ // ClickHouse's JSON(max_dynamic_paths) column rejects bare integer tokens
111+ // outside [Int64.MIN, UInt64.MAX]. Such Numbers serialise as bare integer
112+ // form via JSON.stringify (no exponent, since |value| < 1e21) so they reach
113+ // ClickHouse as unquoted oversized ints. Sanitizer replaces them with the
114+ // string form, which ClickHouse's dynamic JSON column accepts as a String
115+ // subtype on that path.
116+
117+ it ( "replaces an integer-valued Number above UInt64.MAX with its string form" , ( ) => {
118+ // 117039831458782870000 is the actual prod value (Google Plus ID after
119+ // upstream JS-Number precision loss from 117039831458782873093).
120+ const result = sanitizeUnknownInPlace ( 117039831458782870000 ) ;
121+ expect ( result . value ) . toBe ( "117039831458782870000" ) ;
122+ expect ( result . fixed ) . toBe ( 1 ) ;
123+ } ) ;
124+
125+ it ( "catches the float64 boundary at exactly 2**64 (UInt64.MAX + 1)" , ( ) => {
126+ // float64 cannot represent UInt64.MAX (2^64 - 1) exactly — the literal
127+ // 18446744073709551615 in JS source rounds to 2^64. JSON.stringify
128+ // emits this Number as "18446744073709552000", which exceeds UInt64.MAX
129+ // and trips ClickHouse. Regression for the BigInt-based comparison;
130+ // a naïve `value > 18446744073709551615` would let this pass.
131+ const result = sanitizeUnknownInPlace ( 2 ** 64 ) ;
132+ expect ( result . value ) . toBe ( "18446744073709552000" ) ;
133+ expect ( result . fixed ) . toBe ( 1 ) ;
134+ } ) ;
135+
136+ it ( "replaces an integer-valued Number below Int64.MIN with its string form" , ( ) => {
137+ // -9223372036854775809 is the first failing negative; in float64 it
138+ // rounds to the same representation as Int64.MIN (-9223372036854775808),
139+ // but for completeness we check a clearly-out-of-range negative.
140+ const result = sanitizeUnknownInPlace ( - 1e20 ) ;
141+ expect ( result . value ) . toBe ( "-100000000000000000000" ) ;
142+ expect ( result . fixed ) . toBe ( 1 ) ;
143+ } ) ;
144+
145+ it ( "leaves safe integers and boundary values untouched" , ( ) => {
146+ // 42 — safe integer
147+ expect ( sanitizeUnknownInPlace ( 42 ) ) . toEqual ( { value : 42 , fixed : 0 } ) ;
148+ // Number.MAX_SAFE_INTEGER (2^53 - 1) — JSON.stringify still emits as integer
149+ expect ( sanitizeUnknownInPlace ( Number . MAX_SAFE_INTEGER ) ) . toEqual ( {
150+ value : Number . MAX_SAFE_INTEGER ,
151+ fixed : 0 ,
152+ } ) ;
153+ // 2^63 (Int64.MAX + 1) — still fits in UInt64, CH accepts it
154+ expect ( sanitizeUnknownInPlace ( 2 ** 63 ) ) . toEqual ( { value : 2 ** 63 , fixed : 0 } ) ;
155+ } ) ;
156+
157+ it ( "leaves non-integer numbers untouched (floats, NaN, Infinity)" , ( ) => {
158+ // Numbers with a fractional part — emitted with `.` in JSON
159+ expect ( sanitizeUnknownInPlace ( 3.14 ) ) . toEqual ( { value : 3.14 , fixed : 0 } ) ;
160+ // Very large float-form (>= 1e21) — JSON.stringify uses exponent form,
161+ // CH parses as Float64 successfully
162+ expect ( sanitizeUnknownInPlace ( 1e25 ) ) . toEqual ( { value : 1e25 , fixed : 0 } ) ;
163+ // NaN / Infinity — JSON.stringify emits `null`, so harmless on the wire
164+ expect ( sanitizeUnknownInPlace ( Number . NaN ) ) . toEqual ( { value : Number . NaN , fixed : 0 } ) ;
165+ expect ( sanitizeUnknownInPlace ( Number . POSITIVE_INFINITY ) ) . toEqual ( {
166+ value : Number . POSITIVE_INFINITY ,
167+ fixed : 0 ,
168+ } ) ;
169+ } ) ;
170+
171+ it ( "finds an oversized integer nested deep inside the actual scan-social-profiles shape" , ( ) => {
172+ const row = {
173+ output : {
174+ data : {
175+ profiles : [
176+ { module : "linktree" , query : "x@example.com" } ,
177+ {
178+ module : "poshmark" ,
179+ spec_format : [
180+ {
181+ platform_variables : [
182+ {
183+ key : "gp_id" ,
184+ proper_key : "Gp Id" ,
185+ // The actual prod value — bare JSON integer > UInt64.MAX
186+ value : 117039831458782870000 ,
187+ type : "int" ,
188+ } ,
189+ ] ,
190+ } ,
191+ ] ,
192+ } ,
193+ ] ,
194+ } ,
195+ } ,
196+ } ;
197+ const result = sanitizeUnknownInPlace ( row ) ;
198+ expect ( result . fixed ) . toBe ( 1 ) ;
199+ expect (
200+ ( row . output . data . profiles [ 1 ] . spec_format ! [ 0 ] . platform_variables [ 0 ] as any ) . value
201+ ) . toBe ( "117039831458782870000" ) ;
202+ // Untouched neighbours
203+ expect ( row . output . data . profiles [ 0 ] . module ) . toBe ( "linktree" ) ;
204+ expect ( row . output . data . profiles [ 1 ] . spec_format ! [ 0 ] . platform_variables [ 0 ] . type ) . toBe ( "int" ) ;
205+ } ) ;
108206} ) ;
109207
110208describe ( "sanitizeRows" , ( ) => {
@@ -158,4 +256,36 @@ describe("sanitizeRows", () => {
158256 expect ( result . rowsTouched ) . toBe ( 1 ) ;
159257 expect ( result . fieldsSanitized ) . toBe ( 2 ) ;
160258 } ) ;
259+
260+ it ( "counts surrogate fixes and out-of-range integer fixes together (TRI-9755)" , ( ) => {
261+ const rows = [
262+ {
263+ id : "r0" ,
264+ attributes : {
265+ surrogate : `bad ${ HIGH_SURROGATE } ` ,
266+ bigint : 117039831458782870000 ,
267+ clean : "fine" ,
268+ safe : 42 ,
269+ } ,
270+ } ,
271+ {
272+ id : "r1" ,
273+ attributes : {
274+ bigint : - 1e20 ,
275+ clean : "still fine" ,
276+ } ,
277+ } ,
278+ {
279+ id : "r2" ,
280+ attributes : { clean : "no fixes needed" } ,
281+ } ,
282+ ] ;
283+ const result = sanitizeRows ( rows ) ;
284+ expect ( result . rowsTouched ) . toBe ( 2 ) ;
285+ expect ( result . fieldsSanitized ) . toBe ( 3 ) ;
286+ expect ( rows [ 0 ] . attributes . surrogate ) . toBe ( INVALID_UTF16_SENTINEL ) ;
287+ expect ( rows [ 0 ] . attributes . bigint ) . toBe ( "117039831458782870000" ) ;
288+ expect ( rows [ 0 ] . attributes . safe ) . toBe ( 42 ) ;
289+ expect ( rows [ 1 ] . attributes . bigint ) . toBe ( "-100000000000000000000" ) ;
290+ } ) ;
161291} ) ;
0 commit comments