-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathTokenizer.hs
336 lines (286 loc) · 11.2 KB
/
Tokenizer.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
module Tokenizer where
import System.IO
import Control.Monad
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token
import StringParse
data PHPValue = PHPString String
| PHPInt Integer
| PHPFloat Double
| PHPBool Bool
| PHPNull
deriving (Show)
data PHPVariable = PHPVariable String | PHPVariableVariable String deriving (Show)
data FunctionCall = FunctionCall String | FunctionCallVar PHPVariable deriving (Show)
data PHPExpr = Literal PHPValue
| Variable PHPVariable
| Assign PHPVariable PHPExpr
| Neg PHPExpr
| Not PHPExpr
| BinaryExpr BinOp PHPExpr PHPExpr
| UnaryExpr UnaryType UnaryOp PHPVariable
| Call FunctionCall [PHPExpr]
| Isset [PHPVariable]
| Print PHPExpr
deriving (Show)
data UnaryType = Before | After deriving (Show)
data UnaryOp = Increment | Decrement deriving (Show)
mkUnaryOp :: String -> UnaryOp
mkUnaryOp "++" = Increment
mkUnaryOp "--" = Decrement
mkUnaryOp _ = error "Invalid unary op"
data BinOp = Add | Subtract | Multiply | Divide | Modulo | And | Or | Greater | Less | Equals | StrictEquals | Concat deriving (Show)
data ElseExpr = Else PHPStmt
| ElseIf PHPExpr PHPStmt (Maybe ElseExpr)
deriving (Show)
data FunctionArgumentDef = FunctionArgumentDef { argName :: String
, argDefault :: Maybe PHPValue
}
deriving (Show)
data ParseResult = PlainText String | PHPCode PHPStmt deriving (Show)
data StaticVar = StaticVar String (Maybe PHPValue) deriving (Show)
data PHPStmt = Seq [PHPStmt]
| Expression PHPExpr
| If PHPExpr PHPStmt (Maybe ElseExpr)
| Function String [FunctionArgumentDef] PHPStmt
| Return PHPExpr
| While PHPExpr PHPStmt
| For [PHPExpr] [PHPExpr] [PHPExpr] PHPStmt
| Echo [PHPExpr]
| Global PHPVariable
| Static [StaticVar]
deriving (Show)
langDef = emptyDef { Token.commentStart = "/*"
, Token.commentEnd = "*/"
, Token.commentLine = "//"
, Token.identStart = letter
, Token.identLetter = alphaNum <|> char '_'
, Token.reservedNames = [ "if", "else", "elseif", "while", "break", "do", "for", "continue"
, "true", "false", "null", "and", "or", "class", "function", "return"
, "<?php", "?>", "echo", "print"
]
, Token.reservedOpNames = [ "=", "==", "===", "->", ".", "+", "-", "*", "/", "%", "<", ">"
, "and", "or", "||", "&&", "!", "++", "--"
]
}
lexer = Token.makeTokenParser langDef
phpString = stringLit lexer '"' <|> stringLit lexer '\''
identifier = Token.identifier lexer
reserved = Token.reserved lexer
float = Token.float lexer
stringTok = phpString
reservedOp = (Token.lexeme lexer) . string
parens = Token.parens lexer
braces = Token.braces lexer
integer = Token.integer lexer
semi = Token.semi lexer
whiteSpace = Token.whiteSpace lexer
whileParser :: Parser [ParseResult]
whileParser = many (parsePHPCode <|> parsePlainText)
phpEof = try $ do
optional $ char '\n'
eof
parsePlainText :: Parser ParseResult
parsePlainText = liftM PlainText $ do
c <- anyChar
har <- manyTill anyChar ((lookAhead $ reserved "<?php") <|> phpEof)
return (c : har)
parsePHPCode :: Parser ParseResult
parsePHPCode = do
reserved "<?php"
seq <- sequenceOfStmt
(optional $ string "?>") <|> phpEof
return $ PHPCode seq
sequenceOfStmt = do
list <- many1 oneStatement
return $ Seq list
statementZeroOrMore = liftM Seq $ many oneStatement
statementZeroOrOne = liftM Seq $ option [] (liftM (:[]) oneStatement)
-- Match a valid PHP end of statement.
-- Must have ; after expression, unless closing tag
-- ?> comes immediately after
phpEnd = semi <|> try (string "?>")
-- Parse a single PHP statement
oneStatement :: Parser PHPStmt
oneStatement = ifStmt
<|> functionStmt
<|> returnStmt
<|> whileStmt
<|> forStmt
<|> echoStmt
<|> globalStmt
<|> staticStmt
<|> stmtExpr
-- Special case for an expression that's a statement
-- Expressions can be used without a semicolon in the end in ifs or whatever,
-- but a valid statement expression needs a semi in the end
where stmtExpr = do
expr <- phpExpression
phpEnd
return $ Expression expr
staticStmt :: Parser PHPStmt
staticStmt = do
stmt <- reserved "static" >> (liftM Static $ sepBy staticArg (Token.symbol lexer ","))
semi
return stmt
where staticArg = do
char '$'
name <- identifier
defValue <- optionMaybe $ do
Token.symbol lexer "="
phpValue
return $ StaticVar name defValue
globalStmt :: Parser PHPStmt
globalStmt = do
global <- reserved "global" >> liftM Global plainVariableExpr
semi
return global
echoStmt :: Parser PHPStmt
echoStmt = do
reserved "echo"
-- echo take one arg only if parens are used, otherwise 1 or more
args <- (liftM (:[]) $ parens phpExpression) <|> argList
phpEnd
return $ Echo args
where argList = sepBy phpExpression (Token.symbol lexer ",")
returnStmt :: Parser PHPStmt
returnStmt = do
reserved "return"
ret <- liftM Return phpExpression
phpEnd
return $ ret
functionStmt :: Parser PHPStmt
functionStmt = do
reserved "function"
name <- identifier
argDefs <- parens $ sepBy argDefExpr (optional (Token.symbol lexer ","))
body <- braces statementZeroOrMore
return $ Function name argDefs body
where
argDefExpr = do
char '$'
name <- identifier
defValue <- optionMaybe $ do
Token.symbol lexer "="
phpValue
return $ FunctionArgumentDef name defValue
whileStmt :: Parser PHPStmt
whileStmt = do
reserved "while"
cond <- parens phpExpression
stmt <- (braces statementZeroOrMore) <|> oneStatement
return $ While cond stmt
forStmt :: Parser PHPStmt
forStmt = do
reserved "for"
(init, cond, iter) <- parens $ do
minit <- sepBy phpExpression (Token.symbol lexer ",")
semi
mcond <- sepBy phpExpression (Token.symbol lexer ",")
semi
miter <- sepBy phpExpression (Token.symbol lexer ",")
return (minit, mcond, miter)
body <- (braces statementZeroOrMore) <|> do { s <- statementZeroOrOne; semi; return s }
return $ For init cond iter body
ifStmt :: Parser PHPStmt
ifStmt = do
reserved "if"
cond <- parens phpExpression
stmt1 <- (braces statementZeroOrMore) <|> oneStatement
cont <- optionMaybe (elseIfStmt <|> elseStmt)
return $ If cond stmt1 cont
elseStmt :: Parser ElseExpr
elseStmt = do
reserved "else"
stmt <- (braces statementZeroOrMore) <|> oneStatement
return $ Else stmt
elseIfStmt :: Parser ElseExpr
elseIfStmt = do
reserved "elseif"
cond <- parens phpExpression
stmt <- (braces statementZeroOrMore) <|> oneStatement
cont <- optionMaybe (elseIfStmt <|> elseStmt)
return $ ElseIf cond stmt cont
assignExpr :: Parser PHPExpr
assignExpr = do
var <- plainVariableExpr
reservedOp "="
expr <- phpExpression
return $ Assign var expr
plainVariableExpr :: Parser PHPVariable
plainVariableExpr = try varVarExpr <|> normalVariableExpr
where
varVarExpr = char '$' >> char '$' >> fmap PHPVariableVariable identifier
normalVariableExpr :: Parser PHPVariable
normalVariableExpr = char '$' >> fmap PHPVariable identifier
phpExpression :: Parser PHPExpr
phpExpression = buildExpressionParser phpOperators phpTerm
phpOperators = [ [Infix (reservedOp "*" >> return (BinaryExpr Multiply)) AssocLeft]
, [Infix (reservedOp "/" >> return (BinaryExpr Divide)) AssocLeft]
, [Infix (reservedOp "+" >> return (BinaryExpr Add)) AssocLeft]
, [Infix (reservedOp "-" >> return (BinaryExpr Subtract)) AssocLeft]
, [Infix (reservedOp "." >> return (BinaryExpr Concat)) AssocLeft]
, [Infix (reservedOp "==" >> return (BinaryExpr Equals)) AssocLeft]
, [Infix (reservedOp "===" >> return (BinaryExpr StrictEquals)) AssocLeft]
, [Prefix (reservedOp "!" >> return (Not))]
, [Infix (reservedOp "&&" >> return (BinaryExpr And)) AssocLeft]
, [Infix (reservedOp "||" >> return (BinaryExpr Or)) AssocLeft]
, [Infix (reservedOp "<" >> return (BinaryExpr Less)) AssocLeft]
, [Infix (reservedOp ">" >> return (BinaryExpr Greater)) AssocLeft]
]
phpTerm = parens phpExpression
<|> try issetExpr
<|> try printExpr
<|> try functionCallExpr
<|> try assignExpr
<|> variableExpr
<|> liftM Literal phpValue
issetExpr :: Parser PHPExpr
issetExpr = do
reserved "isset"
vars <- parens $ sepBy1 plainVariableExpr (Token.symbol lexer ",")
return $ Isset vars
variableExpr :: Parser PHPExpr
variableExpr = do
prefixOp <- unaryOp
var <- plainVariableExpr
case prefixOp of
Just op -> return $ UnaryExpr Before (mkUnaryOp op) var
Nothing -> do
postOp <- unaryOp
case postOp of
Nothing -> return $ Variable var
Just op -> return $ UnaryExpr After (mkUnaryOp op) var
where
unaryOp = optionMaybe (try (reservedOp "++") <|> try (reservedOp "--"))
functionCallExpr :: Parser PHPExpr
functionCallExpr = try varCall <|> nameCall
where
varCall = do
var <- plainVariableExpr
args <- parens argList
return $ Call (FunctionCallVar var) args
nameCall = do
name <- identifier
args <- parens argList
return $ Call (FunctionCall name) args
argList = sepBy phpExpression (Token.symbol lexer ",")
printExpr :: Parser PHPExpr
printExpr = do
reserved "print"
arg <- parens phpExpression <|> phpExpression
return $ Print arg
phpValue :: Parser PHPValue
phpValue = (reserved "true" >> return (PHPBool True))
<|> (reserved "false" >> return (PHPBool False))
<|> (reserved "null" >> return PHPNull)
<|> (Token.naturalOrFloat lexer >>= return . either PHPInt PHPFloat)
<|> (stringTok >>= return . PHPString)
parseString :: String -> [ParseResult]
parseString str = case parse whileParser "" str of
Left e -> error $ show e
Right r -> case last r of
PlainText "\n" -> init r
_ -> r