-
Notifications
You must be signed in to change notification settings - Fork 172
/
Copy pathPattern.carp
207 lines (170 loc) · 7.79 KB
/
Pattern.carp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
(system-include "carp_pattern.h")
(doc Pattern "is a data type for pattern matching, similar to, but not the same
as, Regular Expressions. [See the docs for more
information](../LanguageGuide.html#patterns).")
(defmodule Pattern
(register-type MatchResult [start Int, end Int])
(defmodule MatchResult
(defn ref-str [ref-matchres]
(fmt "(MatchResult start=%d end=%d)"
(MatchResult.start ref-matchres)
(MatchResult.end ref-matchres) ))
(implements str Pattern.MatchResult.ref-str)
(implements prn Pattern.MatchResult.ref-str)
(defn str [matchres]
(Pattern.MatchResult.ref-str &matchres) )
(implements str Pattern.MatchResult.str)
(implements prn Pattern.MatchResult.str)
)
(defn non-match? [match-res]
(or (Int.< @(MatchResult.start match-res) 0)
(Int.< @(MatchResult.end match-res) 0) ))
(defn extract [match-res data]
(if (non-match? match-res)
(Maybe.Nothing)
(Maybe.Just (String.slice data @(MatchResult.start match-res)
@(MatchResult.end match-res) ))))
(doc match-from "returns start and end indizes of the first match after start-pos. Note that the end index points to the 1st character _after_ the match (like all slice functions).")
(register match-from (Fn [&Pattern &String Int] Pattern.MatchResult))
(doc match "returns start and end indizes of the first match after the start of the string. Note that the end index points to the 1st character _after_ the match (like all slice functions).")
(defn match [pattern data]
(match-from pattern data 0) )
(doc find "finds the index of a pattern in a string.
Returns `-1` if it doesn’t find a matching pattern.")
(defn find [pattern data]
@(Pattern.MatchResult.start &(Pattern.match pattern data)) )
(doc find-all "finds all indices of a pattern in a string. The patterns may _not_ overlap.
Returns `[]` if it doesn’t find a matching pattern.")
(defn find-all-matches [pattern data]
(let-do [result []
stop (String.length data)
found (match-from pattern data 0)
start @(MatchResult.end &found) ]
(while-do (and (Int.<= start stop)
(not (non-match? &found)) )
(set! result (Array.push-back result found))
(set! found (match-from pattern data start))
(set! start @(MatchResult.end &found)) )
result ))
(defn find-all [pattern data]
(Array.copy-map
&(fn [m] @(MatchResult.start m))
&(find-all-matches pattern data) ))
(doc match-groups "finds the match groups of the first match of a pattern in
a string.
Returns `[]` if it doesn’t find a matching pattern.")
(register match-groups (Fn [&Pattern &String] (Array String)))
(doc match-str "finds the first match of a pattern in a string.
Returns an empty string if it doesn’t find a matching pattern.")
(defn match-str [pattern data]
(Maybe.from (Pattern.extract &(Pattern.match pattern data) data) @"") )
(doc match-all-groups "finds all match groups of a pattern in a string as a nested
array.
Returns `[]` if it doesn’t find a matching pattern.")
(register match-all-groups (Fn [&Pattern &String] (Array (Array String))))
(doc substitute "finds all matches of a pattern in a string and replaces it
by another pattern `n` times.
The substitute pattern can reference the original pattern by match group
indices, such as `\1`. This means that backslashes need to be double escaped.
If you want to replace all occurrences of the pattern, use `-1`.")
(register substitute (Fn [&Pattern &String &String Int] String))
(doc matches? "checks whether a pattern matches a string.")
(defn matches? [pat s] (/= (find pat s) -1))
(register str (Fn [&Pattern] String))
(implements str Pattern.str)
(register prn (Fn [&Pattern] String))
(implements prn Pattern.prn)
(register init (Fn [&String] Pattern))
(register = (Fn [&Pattern &Pattern] Bool))
(implements = Pattern.=)
(register delete (Fn [Pattern] ()))
(implements delete Pattern.delete)
(register copy (Fn [&Pattern] Pattern))
(implements copy Pattern.copy)
(doc from-chars "creates a pattern that matches a group of characters from a
list of those characters.")
(defn from-chars [chars]
(Pattern.init &(str* @"[" (String.from-chars chars) @"]")))
(defn global-match-str [pattern data]
(Array.copy-map
&(fn [m] (Maybe.unsafe-from (extract m data)))
&(find-all-matches pattern data)))
(doc split "splits a string by a pattern.")
(defn split [p s]
(let-do [idx (find-all p s)
strs (global-match-str p s)
lidx (Array.length &idx)
result (Array.allocate (Int.inc lidx))]
(Array.aset-uninitialized! &result 0
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
(for [i 0 (Int.dec (Array.length &idx))]
(let [plen (length (Array.unsafe-nth &strs i))]
(Array.aset-uninitialized! &result (Int.inc i)
(slice s (+ @(Array.unsafe-nth &idx i) plen)
@(Array.unsafe-nth &idx (Int.inc i))))))
(when (> lidx 0)
(let [plen (length (Array.unsafe-nth &strs (Int.dec lidx)))]
(Array.aset-uninitialized! &result lidx
(suffix s (+ @(Array.unsafe-nth &idx (Int.dec lidx))
plen)))))
result))
)
(defmodule String
(doc in? "checks whether a string contains another string.")
(defn in? [s sub]
(Pattern.matches? &(Pattern.init sub) s))
(doc upper? "checks whether a string is all uppercase.")
(defn upper? [s]
(Pattern.matches? #"^[\u\s\p]*$" s))
(doc lower? "checks whether a string is all lowercase.")
(defn lower? [s]
(Pattern.matches? #"^[\l\s\p]*$" s))
(doc num? "checks whether a string is numerical.")
(defn num? [s]
(Pattern.matches? #"^[0-9]*$" s))
(doc alpha? "checks whether a string contains only alphabetical characters (a-Z).")
(defn alpha? [s]
(Pattern.matches? #"^[\u\l]*$" s))
(doc alphanum? "checks whether a string is alphanumerical.")
(defn alphanum? [s]
(Pattern.matches? #"^[\w]*$" s))
(doc hex? "checks whether a string is hexadecimal.")
(defn hex? [s]
(Pattern.matches? #"^[\x]*$" s))
(doc trim-left "trims whitespace from the left of a string.")
(defn trim-left [s]
(Pattern.substitute #"^\s+" s "" 1))
(doc trim-right "trims whitespace from the right of a string.")
(defn trim-right [s]
(Pattern.substitute #"\s+$" s "" 1))
(doc trim "trims whitespace from both sides of a string.")
(defn trim [s]
(trim-left &(trim-right s)))
(doc chomp "trims a newline from the end of a string.")
(defn chomp [s]
(Pattern.substitute #"\r$" &(Pattern.substitute #"\n$" s "" 1) "" 1))
(doc collapse-whitespace "collapses groups of whitespace into single spaces.")
(defn collapse-whitespace [s]
(Pattern.substitute #"\s+" s " " -1))
(doc split-by "splits a string by separators.")
(defn split-by [s separators]
(let-do [pat (Pattern.from-chars separators)
idx (Pattern.find-all &pat s)
lidx (Array.length &idx)
result (Array.allocate (Int.inc lidx))]
(Array.aset-uninitialized! &result 0
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
(for [i 0 (Int.dec (Array.length &idx))]
(Array.aset-uninitialized! &result (Int.inc i)
(slice s (Int.inc @(Array.unsafe-nth &idx i)) @(Array.unsafe-nth &idx (Int.inc i)))))
(when (> lidx 0)
(Array.aset-uninitialized! &result lidx
(suffix s (Int.inc @(Array.unsafe-nth &idx (Int.dec lidx))))))
result))
(doc words "splits a string into words.")
(defn words [s]
(Array.endo-filter &(fn [s] (not (empty? s))) (split-by s &[\tab \space \newline])))
(doc lines "splits a string into lines.")
(defn lines [s]
(split-by s &[\newline]))
)