-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathStringParse.hs
88 lines (66 loc) · 3.11 KB
/
StringParse.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
-- | This is an almost straight copy from Text.Parsec.Token, for the purposes of parsing
-- strings that are delimited by other characters besides "
module StringParse (stringLit) where
import Control.Monad
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token
import Data.Char (digitToInt)
stringLit lexer d = Token.lexeme lexer (
do{ str <- between (char d)
(char d <?> "end of string")
(many $ stringChar d)
; return (foldr (maybe id (:)) "" str)
}
<?> "literal string")
stringChar d = do{ c <- stringLetter d; return (Just c) }
<|> stringEscape
<?> "string character"
stringLetter d = satisfy (\c -> (c /= d) && (c /= '\\') && (c > '\026'))
stringEscape = do{ char '\\'
; do{ escapeGap ; return Nothing }
<|> do{ escapeEmpty; return Nothing }
<|> do{ esc <- escapeCode; return (Just esc) }
}
escapeEmpty = char '&'
escapeGap = do{ many1 space
; char '\\' <?> "end of string gap"
}
-- escape codes
escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
<?> "escape code"
charControl = do{ char '^'
; code <- upper
; return (toEnum (fromEnum code - fromEnum 'A'))
}
charNum = do{ code <- decimal
<|> do{ char 'o'; number 8 octDigit }
<|> do{ char 'x'; number 16 hexDigit }
; return (toEnum (fromInteger code))
}
charEsc = choice (map parseEsc escMap)
where
parseEsc (c,code) = do{ char c; return code }
charAscii = choice (map parseAscii asciiMap)
where
parseAscii (asc,code) = try (do{ string asc; return code })
-- escape code tables
escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
"FS","GS","RS","US","SP"]
ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
"DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
"CAN","SUB","ESC","DEL"]
ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
'\EM','\FS','\GS','\RS','\US','\SP']
ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
'\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
'\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
decimal = number 10 digit
number base baseDigit
= do{ digits <- many1 baseDigit
; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
; seq n (return n)
}