@@ -19,13 +19,13 @@ impl NFAGraph {
1919 let re = PikeVM :: new ( pattern) . map_err ( |e| NFAError :: RegexCompilation ( e. to_string ( ) ) ) ?;
2020 let thompson_nfa = re. get_nfa ( ) ;
2121
22- let state_len = thompson_nfa. states ( ) . len ( ) - 2 ;
22+ let state_len = thompson_nfa. states ( ) . len ( ) - thompson_nfa . start_anchored ( ) . as_usize ( ) ;
2323
2424 let mut graph = Self :: default ( ) ;
2525 graph. regex = pattern. to_string ( ) ;
2626 graph. initialize_nodes ( state_len) ?;
2727 graph. process_all_states ( & thompson_nfa) ?;
28- graph. set_start_states ( & thompson_nfa ) ;
28+ graph. start_states . insert ( 0 ) ;
2929 graph. remove_epsilon_transitions ( ) ?;
3030
3131 graph. verify ( ) ?;
@@ -50,39 +50,66 @@ impl NFAGraph {
5050 /// Processes all states from the Thompson NFA
5151 fn process_all_states ( & mut self , nfa : & NFA ) -> NFAResult < ( ) > {
5252 for state_idx in 0 ..self . nodes . len ( ) {
53- let state_id =
54- StateID :: new ( state_idx + 2 ) . map_err ( |e| NFAError :: InvalidStateId ( e. to_string ( ) ) ) ?;
53+ let state_id = StateID :: new ( state_idx + nfa . start_anchored ( ) . as_usize ( ) )
54+ . map_err ( |e| NFAError :: InvalidStateId ( e. to_string ( ) ) ) ?;
5555
5656 match nfa. state ( state_id) {
5757 State :: Match { .. } => {
5858 self . accept_states . insert ( state_idx) ;
5959 }
6060 State :: ByteRange { trans } => {
61- self . add_byte_range_transition ( state_idx, trans) ?;
61+ self . add_byte_range_transition (
62+ nfa. start_anchored ( ) . as_usize ( ) ,
63+ state_idx,
64+ trans,
65+ ) ?;
6266 }
6367 State :: Sparse ( sparse) => {
64- self . add_sparse_transitions ( state_idx, & sparse. transitions ) ?;
68+ self . add_sparse_transitions (
69+ nfa. start_anchored ( ) . as_usize ( ) ,
70+ state_idx,
71+ & sparse. transitions ,
72+ ) ?;
6573 }
6674 State :: Dense ( dense) => {
67- self . add_dense_transitions ( state_idx, & dense. transitions ) ?;
75+ self . add_dense_transitions (
76+ nfa. start_anchored ( ) . as_usize ( ) ,
77+ state_idx,
78+ & dense. transitions ,
79+ ) ?;
6880 }
6981 State :: Union { alternates } => {
70- self . add_union_transitions ( state_idx, alternates) ?;
82+ self . add_union_transitions (
83+ nfa. start_anchored ( ) . as_usize ( ) ,
84+ state_idx,
85+ alternates,
86+ ) ?;
7187 }
7288 State :: BinaryUnion { alt1, alt2 } => {
73- self . add_binary_union_transitions ( state_idx, alt1, alt2) ?;
89+ self . add_binary_union_transitions (
90+ nfa. start_anchored ( ) . as_usize ( ) ,
91+ state_idx,
92+ alt1,
93+ alt2,
94+ ) ?;
7495 }
7596 State :: Capture {
7697 next,
7798 group_index,
7899 slot,
79100 ..
80101 } => {
81- self . add_capture_transition ( state_idx, next, group_index, slot) ?;
102+ self . add_capture_transition (
103+ nfa. start_anchored ( ) . as_usize ( ) ,
104+ state_idx,
105+ next,
106+ group_index,
107+ slot,
108+ ) ?;
82109 self . num_capture_groups = self . num_capture_groups . max ( group_index. as_usize ( ) ) ;
83110 }
84111 State :: Look { next, .. } => {
85- self . add_look_transition ( state_idx, next) ?;
112+ self . add_look_transition ( nfa . start_anchored ( ) . as_usize ( ) , state_idx, next) ?;
86113 }
87114 State :: Fail => { } // No transitions needed
88115 }
@@ -91,100 +118,122 @@ impl NFAGraph {
91118 }
92119
93120 /// Adds a byte range transition to the graph
94- fn add_byte_range_transition ( & mut self , state_id : usize , trans : & Transition ) -> NFAResult < ( ) > {
121+ fn add_byte_range_transition (
122+ & mut self ,
123+ anchored_state_id : usize ,
124+ state_id : usize ,
125+ trans : & Transition ,
126+ ) -> NFAResult < ( ) > {
95127 for byte in trans. start ..=trans. end {
96128 self . nodes [ state_id]
97129 . byte_transitions
98130 . entry ( byte)
99131 . or_insert_with ( BTreeSet :: new)
100- . insert ( trans. next . as_usize ( ) - 2 ) ;
132+ . insert ( trans. next . as_usize ( ) - anchored_state_id ) ;
101133 }
102134 Ok ( ( ) )
103135 }
104136
105137 /// Adds transitions from a sparse transition set
106138 fn add_sparse_transitions (
107139 & mut self ,
140+ anchored_state_id : usize ,
108141 state_id : usize ,
109142 transitions : & [ Transition ] ,
110143 ) -> NFAResult < ( ) > {
111144 for trans in transitions {
112- self . add_byte_range_transition ( state_id, trans) ?;
145+ self . add_byte_range_transition ( anchored_state_id , state_id, trans) ?;
113146 }
114147 Ok ( ( ) )
115148 }
116149
117150 /// Adds transitions from a dense transition table
118- fn add_dense_transitions ( & mut self , state_id : usize , transitions : & [ StateID ] ) -> NFAResult < ( ) > {
151+ fn add_dense_transitions (
152+ & mut self ,
153+ anchored_state_id : usize ,
154+ state_id : usize ,
155+ transitions : & [ StateID ] ,
156+ ) -> NFAResult < ( ) > {
119157 for ( byte, & next) in transitions. iter ( ) . enumerate ( ) {
120158 if next != StateID :: ZERO {
121159 self . nodes [ state_id]
122160 . byte_transitions
123161 . entry ( byte as u8 )
124162 . or_insert_with ( BTreeSet :: new)
125- . insert ( next. as_usize ( ) - 2 ) ;
163+ . insert ( next. as_usize ( ) - anchored_state_id ) ;
126164 }
127165 }
128166 Ok ( ( ) )
129167 }
130168
131169 /// Adds epsilon transitions for a union state
132- fn add_union_transitions ( & mut self , state_id : usize , alternates : & [ StateID ] ) -> NFAResult < ( ) > {
133- self . nodes [ state_id]
134- . epsilon_transitions
135- . extend ( alternates. iter ( ) . map ( |id| id. as_usize ( ) - 2 ) ) ;
170+ fn add_union_transitions (
171+ & mut self ,
172+ anchored_state_id : usize ,
173+ state_id : usize ,
174+ alternates : & [ StateID ] ,
175+ ) -> NFAResult < ( ) > {
176+ self . nodes [ state_id] . epsilon_transitions . extend (
177+ alternates
178+ . iter ( )
179+ . map ( |id| id. as_usize ( ) - anchored_state_id) ,
180+ ) ;
136181 Ok ( ( ) )
137182 }
138183
139184 /// Adds epsilon transitions for a binary union state
140185 fn add_binary_union_transitions (
141186 & mut self ,
187+ anchored_state_id : usize ,
142188 state_id : usize ,
143189 alt1 : & StateID ,
144190 alt2 : & StateID ,
145191 ) -> NFAResult < ( ) > {
146192 let node = & mut self . nodes [ state_id] ;
147- node. epsilon_transitions . insert ( alt1. as_usize ( ) - 2 ) ;
148- node. epsilon_transitions . insert ( alt2. as_usize ( ) - 2 ) ;
193+ node. epsilon_transitions
194+ . insert ( alt1. as_usize ( ) - anchored_state_id) ;
195+ node. epsilon_transitions
196+ . insert ( alt2. as_usize ( ) - anchored_state_id) ;
149197 Ok ( ( ) )
150198 }
151199
152200 /// Adds an epsilon transition with capture group information
153201 fn add_capture_transition (
154202 & mut self ,
203+ anchored_state_id : usize ,
155204 state_id : usize ,
156205 next : & StateID ,
157206 group_index : & SmallIndex ,
158207 slot : & SmallIndex ,
159208 ) -> NFAResult < ( ) > {
160209 let node = & mut self . nodes [ state_id] ;
161- node. epsilon_transitions . insert ( next. as_usize ( ) - 2 ) ;
210+ node. epsilon_transitions
211+ . insert ( next. as_usize ( ) - anchored_state_id) ;
162212
163213 let group_idx = group_index. as_usize ( ) ;
164214 if group_idx > 0 {
165215 let is_start = slot. as_usize ( ) % 2 == 0 ;
166216 node. capture_groups
167- . entry ( next. as_usize ( ) - 2 )
217+ . entry ( next. as_usize ( ) - anchored_state_id )
168218 . or_insert_with ( BTreeSet :: new)
169219 . insert ( ( group_idx, is_start) ) ;
170220 }
171221 Ok ( ( ) )
172222 }
173223
174224 /// Adds an epsilon transition for a look-around state
175- fn add_look_transition ( & mut self , state_id : usize , next : & StateID ) -> NFAResult < ( ) > {
225+ fn add_look_transition (
226+ & mut self ,
227+ anchored_state_id : usize ,
228+ state_id : usize ,
229+ next : & StateID ,
230+ ) -> NFAResult < ( ) > {
176231 self . nodes [ state_id]
177232 . epsilon_transitions
178- . insert ( next. as_usize ( ) - 2 ) ;
233+ . insert ( next. as_usize ( ) - anchored_state_id ) ;
179234 Ok ( ( ) )
180235 }
181236
182- /// Sets the start states for the NFA
183- fn set_start_states ( & mut self , nfa : & NFA ) {
184- self . start_states
185- . insert ( nfa. start_anchored ( ) . as_usize ( ) - 2 ) ;
186- }
187-
188237 pub fn pretty_print ( & self ) {
189238 println ! ( "\n === NFA Graph ===" ) ;
190239 println ! ( "Regex: {}" , self . regex) ;
0 commit comments