@@ -37,7 +37,7 @@ public class ContentDispositionHeaderValue
3737
3838 // attr-char definition from RFC5987
3939 // Same as token except ( "*" / "'" / "%" )
40- private static readonly SearchValues < char > AttrChar =
40+ private static readonly SearchValues < char > Rfc5987AttrChar =
4141 SearchValues . Create ( "!#$&+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~" ) ;
4242
4343 private static readonly HttpHeaderParser < ContentDispositionHeaderValue > Parser
@@ -618,54 +618,36 @@ private static bool TryDecodeMime(StringSegment input, [NotNullWhen(true)] out s
618618 private static string Encode5987 ( StringSegment input )
619619 {
620620 var builder = new StringBuilder ( "UTF-8\' \' " ) ;
621-
622- var maxInputBytes = Encoding . UTF8 . GetMaxByteCount ( input . Length ) ;
623- byte [ ] ? bufferFromPool = null ;
624- Span < byte > inputBytes = maxInputBytes <= MaxStackAllocSizeBytes
625- ? stackalloc byte [ MaxStackAllocSizeBytes ]
626- : bufferFromPool = ArrayPool < byte > . Shared . Rent ( maxInputBytes ) ;
627-
628- var bytesWritten = Encoding . UTF8 . GetBytes ( input , inputBytes ) ;
629- inputBytes = inputBytes [ ..bytesWritten ] ;
630-
631- int totalBytesConsumed = 0 ;
632- while ( totalBytesConsumed < inputBytes . Length )
621+ var remaining = input . AsSpan ( ) ;
622+ while ( remaining . Length > 0 )
633623 {
634- if ( Ascii . IsValid ( inputBytes [ totalBytesConsumed ] ) )
624+ var length = remaining . IndexOfAnyExcept ( Rfc5987AttrChar ) ;
625+ if ( length < 0 )
635626 {
636- // This is an ASCII char. Let's handle it ourselves.
637-
638- char c = ( char ) inputBytes [ totalBytesConsumed ] ;
639- if ( ! AttrChar . Contains ( c ) )
640- {
641- HexEscape ( builder , c ) ;
642- }
643- else
644- {
645- builder . Append ( c ) ;
646- }
647-
648- totalBytesConsumed ++ ;
627+ length = remaining . Length ;
649628 }
650- else
651- {
652- // Non-ASCII, let's rely on Rune to decode it.
629+ builder . Append ( remaining [ ..length ] ) ;
653630
654- Rune . DecodeFromUtf8 ( inputBytes . Slice ( totalBytesConsumed ) , out Rune r , out int bytesConsumedForRune ) ;
655- Contract . Assert ( ! r . IsAscii , "We shouldn't have gotten here if the Rune is ASCII." ) ;
631+ remaining = remaining . Slice ( length ) ;
632+ if ( remaining . Length == 0 )
633+ {
634+ break ;
635+ }
656636
657- for ( int i = 0 ; i < bytesConsumedForRune ; i ++ )
658- {
659- HexEscape ( builder , ( char ) inputBytes [ totalBytesConsumed + i ] ) ;
660- }
637+ length = remaining . IndexOfAny ( Rfc5987AttrChar ) ;
638+ if ( length < 0 )
639+ {
640+ length = remaining . Length ;
641+ }
661642
662- totalBytesConsumed += bytesConsumedForRune ;
643+ for ( var i = 0 ; i < length ; )
644+ {
645+ Rune . DecodeFromUtf16 ( remaining . Slice ( i ) , out Rune rune , out var runeLength ) ;
646+ EncodeToUtf8Hex ( rune , builder ) ;
647+ i += runeLength ;
663648 }
664- }
665649
666- if ( bufferFromPool is not null )
667- {
668- ArrayPool < byte > . Shared . Return ( bufferFromPool ) ;
650+ remaining = remaining . Slice ( length ) ;
669651 }
670652
671653 return builder . ToString ( ) ;
@@ -675,11 +657,45 @@ private static string Encode5987(StringSegment input)
675657 '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' ,
676658 '8' , '9' , 'A' , 'B' , 'C' , 'D' , 'E' , 'F' } ;
677659
678- private static void HexEscape ( StringBuilder builder , char c )
660+ private static void EncodeToUtf8Hex ( Rune rune , StringBuilder builder )
679661 {
680- builder . Append ( '%' ) ;
681- builder . Append ( HexUpperChars [ ( c & 0xf0 ) >> 4 ] ) ;
682- builder . Append ( HexUpperChars [ c & 0xf ] ) ;
662+ // Inspired by https://source.dot.net/#System.Private.CoreLib/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs TryEncodeToUtf8
663+ var value = ( uint ) rune . Value ;
664+ if ( rune . IsAscii )
665+ {
666+ var byteValue = ( byte ) value ;
667+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
668+ }
669+ else if ( rune . Value <= 0x7FFu )
670+ {
671+ // Scalar 00000yyy yyxxxxxx -> bytes [ 110yyyyy 10xxxxxx ]
672+ var byteValue = ( byte ) ( ( value + ( 0b110u << 11 ) ) >> 6 ) ;
673+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
674+ byteValue = ( byte ) ( ( value & 0x3Fu ) + 0x80u ) ;
675+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
676+ }
677+ else if ( rune . Value <= 0xFFFFu )
678+ {
679+ // Scalar zzzzyyyy yyxxxxxx -> bytes [ 1110zzzz 10yyyyyy 10xxxxxx ]
680+ var byteValue = ( byte ) ( ( value + ( 0b1110 << 16 ) ) >> 12 ) ;
681+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
682+ byteValue = ( byte ) ( ( ( value & ( 0x3Fu << 6 ) ) >> 6 ) + 0x80u ) ;
683+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
684+ byteValue = ( byte ) ( ( value & 0x3Fu ) + 0x80u ) ;
685+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
686+ }
687+ else
688+ {
689+ // Scalar 000uuuuu zzzzyyyy yyxxxxxx -> bytes [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]
690+ var byteValue = ( byte ) ( ( value + ( 0b11110 << 21 ) ) >> 18 ) ;
691+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
692+ byteValue = ( byte ) ( ( ( value & ( 0x3Fu << 12 ) ) >> 12 ) + 0x80u ) ;
693+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
694+ byteValue = ( byte ) ( ( ( value & ( 0x3Fu << 6 ) ) >> 6 ) + 0x80u ) ;
695+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
696+ byteValue = ( byte ) ( ( value & 0x3Fu ) + 0x80u ) ;
697+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
698+ }
683699 }
684700
685701 // Attempt to decode using RFC 5987 encoding.
0 commit comments