Skip to content

Commit 1eb64a8

Browse files
committed
Rewrite the matching code to support typed multi-wildcards
Fixes #2.
1 parent cde1e73 commit 1eb64a8

File tree

8 files changed

+259
-152
lines changed

8 files changed

+259
-152
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Grape Changelog
22

3+
## Unreleased
4+
* Add support for typed multi-expressions wildcards such as `$keyword&` and `$number$` (#2)
5+
36
## 0.5.1 (2020/10/27)
47
### CLI
58
* Fix `--line-numbers none`

doc/Patterns.md

+11-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ They are matched recursively: `[$]` matches `[[1]]` twice; once on `[[1]]` and o
77
## Literals
88

99
literals are matched, well, literally. Like in a regular expression, they match their own representation:
10-
the pattern `42` matches the code `42`.
10+
the pattern `42` matches the code `42` and the pattern `:foo` matches the code `:foo`.
1111

1212
## Wildcards
1313
### `$`
@@ -60,7 +60,15 @@ This list is based on [Parcera’s grammar][pg], except that underscores are rep
6060

6161
[pg]: https://github.com/carocad/parcera/blob/83cd988e69116b67c620c099f78b693ac5e37233/src/Clojure.g4
6262

63-
Note: when matching collections you can have a better control over their content by using literals:
63+
Note: when matching collections you can have a better control over their content by using single wildcards:
6464
`$vector` matches any vector, while `[]` matches empty vectors and `[$ $ $]` matches vectors of exactly
6565
3 elements. These can of course be combined: `[$ $number $ $regex]` matches all 4-elements vectors where
66-
the second one is a number and the fourth and last one a regular expression.
66+
the second one is a number and the fourth one a regular expression.
67+
68+
### `$type&`
69+
70+
`$type&` is the child of `$&` and `$type&`. `[$keyword $number&]` matches a vector that contains one keyword
71+
followed by zero or more numbers.
72+
73+
Note consecutive typed multiple-expression wildcards are not supported for now: `[$keyword& $symbols&]` doesn’t work
74+
(yet).

src/grape/core.clj

+7-1
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,16 @@
2626
[tree]
2727
(with-meta (list :code tree) (meta tree)))
2828

29+
(defn- subtrees
30+
[tree]
31+
(->> tree
32+
(tree-seq tree-node? node-children)
33+
(filter tree-node?)))
34+
2935
(defn- find-raw-subtrees
3036
[tree pattern]
3137
(->> tree
32-
(tree-seq tree-node? node-children)
38+
subtrees
3339
(filter #(match? % pattern))))
3440

3541
(defn find-subtrees

src/grape/impl/match.clj

+45-34
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
(ns grape.impl.match
2+
"Internal match utilities."
23
(:require [clojure.string :as str]
3-
[grape.impl.models :refer [tree-leave? pattern? node-type node-children node-child
4-
wildcard-expression? wildcard-expressions? typed-wildcard-expression?
5-
->typed-wildcard]]
4+
[grape.impl.models :as m]
65
[grape.impl.parsing :as p]))
76

87
(defn pattern
98
[code]
10-
{:post [(pattern? %)]}
9+
{:post [(m/pattern? %)]}
1110
(-> code
1211
str/trim
1312
p/parse-code
14-
node-children
13+
m/node-children
1514
first))
1615

1716
;; -------------------
@@ -30,72 +29,84 @@
3029
trees
3130
patterns))))
3231

32+
(defn- match-multi-wildcards?
33+
[trees wildcard-nodes]
34+
(let [node-types (map :node-type (distinct (map m/node-wildcard wildcard-nodes)))]
35+
(or
36+
;; Empty trees always match any number of multiple-expressions wildcards
37+
(empty? trees)
38+
39+
;; $& matches everything
40+
(= [:_all] node-types)
41+
42+
;; $<type>& matches a sequence of nodes of type <type>
43+
(and
44+
(= 1 (count node-types))
45+
(let [node-type (first node-types)]
46+
(every? #(= node-type (m/node-type %)) trees)))
47+
48+
;; We could add some heuristics at this point.
49+
;; - [$t1& $t2& $t3&]: drop trees while type==t1, then while type==t2, etc.
50+
;; - [$t1& $& $t2&]: idem, but start from the end once we reach $&
51+
)))
52+
3353
(defn- match-seq?
3454
"Test if a sequence of subtrees match a sequence of patterns."
3555
[trees patterns]
3656
(let [;; Assume a pattern sequence have the following format:
3757
;; (expression* expressions-wildcard expression*)
38-
;; For convenience, we allow multiple expressions-wildcards to occur
58+
;; For convenience, we allow multiple equal expressions-wildcards to occur
3959
;; as if they were all one wildcard.
4060
;;
4161
;; We first split the pattern to extract that (expression* part from
4262
;; the rest.
43-
[start end-with-wildcard] (split-with (complement wildcard-expressions?)
63+
[start end-with-wildcard] (split-with (complement m/multi-wildcard?)
4464
patterns)
4565
;; Then split the rest into wildcards and the end.
46-
[wildcards end] (split-with wildcard-expressions? end-with-wildcard)]
66+
[wildcard-nodes end] (split-with m/multi-wildcard? end-with-wildcard)]
4767

4868
;; If we have no wildcard, fallback on the default matching.
49-
(if (empty? wildcards)
69+
(if (empty? wildcard-nodes)
5070
(exact-match-seq? trees patterns)
5171
;; Otherwise, extract the start and end of the subtrees.
5272
(let [
53-
;; Take the right number of subtrees at the beginning so they match
54-
;; 'start' patterns.
55-
start-trees (take (count start) trees)
73+
;; Take the right number of subtrees at the beginning so they match 'start' patterns.
74+
[start-trees rest-trees] (split-at (count start) trees)
5675
;; Do the same with the end subtrees.
57-
end-trees (drop (- (count trees) (count end)) trees)]
76+
[wildcarded-trees end-trees] (split-at (- (count rest-trees) (count end)) rest-trees)]
5877
(and
5978
(exact-match-seq? start-trees start)
60-
(exact-match-seq? end-trees end))))))
79+
(exact-match-seq? end-trees end)
80+
(match-multi-wildcards? wildcarded-trees wildcard-nodes))))))
6181

62-
(defn- match-typed-wildcard-expression?
63-
"Test if 'node' matches typed-wildcard 'pattern'."
82+
(defn- match-wildcard?
6483
[node pattern]
65-
;; pattern == (:symbol wildcard-name), with wildcard-name = $something
66-
;; node == (:something_else ...)
67-
;; we're checking if $something-else == $something
68-
(let [wildcard-name (node-child pattern)
69-
node-type (str/replace (name (node-type node)) #"_" "-")
70-
node-type-as-a-wildcard-name (->typed-wildcard node-type)]
71-
(= wildcard-name
72-
node-type-as-a-wildcard-name)))
84+
(let [{:keys [node-type]} (m/node-wildcard pattern)]
85+
(or (= :_all node-type)
86+
(= node-type (m/node-type node)))))
7387

7488
(defn match?
7589
"Test if a subtree matches a pattern. Always return false on the root tree."
7690
[tree pattern]
7791
(cond
7892
;; root tree
79-
(= :code (node-type tree))
93+
(= :code (m/node-type tree))
8094
false
8195

8296
;; one of them is a leave:
8397
;; - if both are leaves and equal, return true
8498
;; - otherwise false (a leave and a non-leave are never equal)
85-
(or (tree-leave? tree) (tree-leave? pattern))
99+
(or (m/tree-leave? tree) (m/tree-leave? pattern))
86100
(= tree pattern)
87101

88-
(wildcard-expression? pattern)
89-
true
90-
91-
(typed-wildcard-expression? pattern)
92-
(match-typed-wildcard-expression? tree pattern)
102+
(m/wildcard? pattern)
103+
(match-wildcard? tree pattern)
93104

94105
;; [:symbol "foo"] ≠ [:keyword "foo"]
95-
(not= (node-type tree) (node-type pattern))
106+
(not= (m/node-type tree) (m/node-type pattern))
96107
false
97108

98109
:else
99-
(let [tree-children (node-children tree)
100-
pattern-children (node-children pattern)]
110+
(let [tree-children (m/node-children tree)
111+
pattern-children (m/node-children pattern)]
101112
(match-seq? tree-children pattern-children))))

src/grape/impl/models.clj

+81-82
Original file line numberDiff line numberDiff line change
@@ -16,58 +16,11 @@
1616
(def node-child second)
1717
(def ^:private raw-node-children rest)
1818

19-
;; -------------------
20-
;; Transformations
21-
;; -------------------
22-
2319
(defn- remove-whitespaces
2420
"Drop whitespaces, comments and discarded forms from a sequence of nodes."
2521
[xs]
2622
(remove #(#{:whitespace :comment :discard} (node-type %)) xs))
2723

28-
(defn- remove-whitespace*
29-
"Remove a specific node from a tree if it's a whitespace. get-fn takes the tree's children and return the node.
30-
remove-fn takes the children and return them without that node."
31-
[tree get-fn remove-fn]
32-
(let [children (raw-node-children tree)
33-
node (get-fn children)]
34-
(if (= :whitespace (node-type node))
35-
(cons (first tree) (remove-fn children))
36-
tree)))
37-
38-
(defn- remove-trailing-whitespace
39-
"Remove the trailing whitespace node of a tree, if any."
40-
[tree]
41-
(remove-whitespace* tree last butlast))
42-
43-
(defn- remove-leading-whitespace
44-
"Remove the leading whitespace node of a tree, if any."
45-
[tree]
46-
(remove-whitespace* tree first rest))
47-
48-
(defn compact-whitespaces
49-
"Transform a tree by 'compacting' its whitespaces: all newlines and sequences of whitespaces are replaced
50-
by a single whitespace. Comments are removed as well."
51-
[tree]
52-
(->> tree
53-
(postwalk
54-
(fn [node]
55-
(if (tree-node? node)
56-
(case (node-type node)
57-
:comment nil
58-
:whitespace (let [s (node-child node)]
59-
(if (or (= \newline (first s))
60-
(< 1 (count s)))
61-
'(:whitespace " ")
62-
node))
63-
(->> node
64-
(remove nil?)
65-
remove-leading-whitespace
66-
remove-trailing-whitespace))
67-
node)))
68-
remove-leading-whitespace
69-
remove-trailing-whitespace))
70-
7124
(defn node-children
7225
"Return non-whitespace node children."
7326
[node]
@@ -77,33 +30,6 @@
7730
;; Wildcards
7831
;; -------------------
7932

80-
(def ^{:dynamic true
81-
:doc "Wildcard symbol used to represent any single expression in a pattern.
82-
This must be a valid Clojure symbol.
83-
It is also used as a prefix for typed wildcards. For example, if this is set to
84-
$ (the default), $string represents any single string expression; $list any
85-
single list expression; etc."}
86-
*wildcard-expression*
87-
"$")
88-
89-
(def ^{:dynamic true
90-
:doc "Wildcard symbol used to represent any number of expressions
91-
in a pattern, including zero. This must be a valid Clojure symbol."}
92-
*wildcard-expressions*
93-
"$&")
94-
95-
(defn wildcard-expression?
96-
"Test if a node is an expression wildcard symbol."
97-
[node]
98-
(= (list :symbol *wildcard-expression*)
99-
node))
100-
101-
(defn wildcard-expressions?
102-
"Test if a node is an expressions wildcard symbol."
103-
[node]
104-
(= (list :symbol *wildcard-expressions*)
105-
node))
106-
10733
(def ^:private
10834
types
10935
;; https://github.com/carocad/parcera/blob/d6b28b1058ef2af447a9452f96c7b6053e59f613/src/parcera/core.cljc#L26
@@ -130,15 +56,88 @@ in a pattern, including zero. This must be a valid Clojure symbol."}
13056
"vector"
13157
"var-quote"})
13258

133-
(defn typed-wildcard-expression?
134-
"Test if a node is an typed expression wildcard symbol."
59+
(def wildcard-prefix "$")
60+
(def multiple-suffix "&")
61+
62+
(defn wildcard?
63+
"Test if a node is a wildcard. This doesn’t check if it's a valid one."
13564
[node]
13665
(and (pattern? node)
13766
(= :symbol (node-type node))
138-
(let [s (node-child node)]
139-
(str/starts-with? s *wildcard-expression*)
140-
(contains? types (subs s 1)))))
67+
(str/starts-with? (node-child node) wildcard-prefix)))
14168

142-
(defn ->typed-wildcard
143-
[typ]
144-
(str *wildcard-expression* typ))
69+
(defn multi-wildcard?
70+
"Test if a node is a multiple-expressions wildcard. This doesn’t check if its type (if any) is valid."
71+
[node]
72+
(and (wildcard? node)
73+
(str/ends-with? (node-child node) multiple-suffix)))
74+
75+
(defn node-wildcard
76+
"Given a node for which `(wildcard? node)` is true, return a map describing the wildcard if it’s valid."
77+
[node]
78+
(let [s (node-child node)
79+
multiple? (str/ends-with? s multiple-suffix)
80+
wildcard-type (subs s 1 (cond-> (count s) multiple? dec))]
81+
(cond
82+
(= "" wildcard-type)
83+
{:node-type :_all
84+
:multiple? multiple?}
85+
86+
(contains? types wildcard-type)
87+
{:node-type (keyword (str/replace wildcard-type #"-" "_"))
88+
:multiple? multiple?})))
89+
90+
;; -------------------
91+
;; Transformations
92+
;; -------------------
93+
94+
(defn- remove-whitespace*
95+
"Remove a specific node from a tree if it's a whitespace. get-fn takes the tree's children and return the node.
96+
remove-fn takes the children and return them without that node."
97+
[tree get-fn remove-fn]
98+
(let [children (raw-node-children tree)
99+
node (get-fn children)]
100+
(if (= :whitespace (node-type node))
101+
(cons (first tree) (remove-fn children))
102+
tree)))
103+
104+
(defn- remove-trailing-whitespace
105+
"Remove the trailing whitespace node of a tree, if any."
106+
[tree]
107+
(remove-whitespace* tree last butlast))
108+
109+
(defn- remove-leading-whitespace
110+
"Remove the leading whitespace node of a tree, if any."
111+
[tree]
112+
(remove-whitespace* tree first rest))
113+
114+
(defn- postwalk-tree
115+
"Equivalent of postwalk that only calls f on tree nodes."
116+
[f tree]
117+
(postwalk
118+
(fn [node]
119+
(if (tree-node? node)
120+
(f node)
121+
node))
122+
tree))
123+
124+
(defn compact-whitespaces
125+
"Transform a tree by 'compacting' its whitespaces: all newlines and sequences of whitespaces are replaced
126+
by a single whitespace. Comments are removed as well."
127+
[tree]
128+
(->> tree
129+
(postwalk-tree
130+
(fn [node]
131+
(case (node-type node)
132+
:comment nil
133+
:whitespace (let [s (node-child node)]
134+
(if (or (= \newline (first s))
135+
(< 1 (count s)))
136+
'(:whitespace " ")
137+
node))
138+
(->> node
139+
(remove nil?)
140+
remove-leading-whitespace
141+
remove-trailing-whitespace))))
142+
remove-leading-whitespace
143+
remove-trailing-whitespace))

0 commit comments

Comments
 (0)